1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Copyright (c) 1994 Stanford University */ |
---|
4 | /* */ |
---|
5 | /* All rights reserved. */ |
---|
6 | /* */ |
---|
7 | /* Permission is given to use, copy, and modify this software for any */ |
---|
8 | /* non-commercial purpose as long as this copyright notice is not */ |
---|
9 | /* removed. All other uses, including redistribution in whole or in */ |
---|
10 | /* part, are forbidden without prior written permission. */ |
---|
11 | /* */ |
---|
12 | /* This software is provided with absolutely no warranty and no */ |
---|
13 | /* support. */ |
---|
14 | /* */ |
---|
15 | /*************************************************************************/ |
---|
16 | |
---|
17 | /* Shared memory implementation of the multigrid method |
---|
18 | Implementation uses red-black gauss-seidel relaxation |
---|
19 | iterations, w cycles, and the method of half-injection for |
---|
20 | residual computation. */ |
---|
21 | |
---|
22 | |
---|
23 | #include <stdio.h> |
---|
24 | #include <stdlib.h> |
---|
25 | #include <math.h> |
---|
26 | |
---|
27 | #include "decs.h" |
---|
28 | |
---|
29 | //////////////////////// |
---|
30 | void multig(long procid) |
---|
31 | { |
---|
32 | long iter; |
---|
33 | double wu; |
---|
34 | double errp; |
---|
35 | long m; |
---|
36 | long flag; |
---|
37 | long k; |
---|
38 | double wmax; |
---|
39 | double local_err; |
---|
40 | double red_local_err; |
---|
41 | double black_local_err; |
---|
42 | double g_error; |
---|
43 | long barrier_start; |
---|
44 | |
---|
45 | flag = 0; |
---|
46 | iter = 0; |
---|
47 | m = numlev - 1; |
---|
48 | wmax = maxwork; |
---|
49 | wu = 0.0; |
---|
50 | |
---|
51 | k = m; |
---|
52 | g_error = 1.0e30; |
---|
53 | while (!flag) |
---|
54 | { |
---|
55 | errp = g_error; |
---|
56 | iter++; |
---|
57 | if (procid == MASTER) |
---|
58 | { |
---|
59 | multi->err_multi = 0.0; |
---|
60 | } |
---|
61 | |
---|
62 | /* barrier to make sure all procs have finished intadd or rescal */ |
---|
63 | /* before proceeding with relaxation */ |
---|
64 | |
---|
65 | // BARRIER |
---|
66 | barrier_start = giet_proctime(); |
---|
67 | sqt_barrier_wait( &barrier ); |
---|
68 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
69 | |
---|
70 | copy_black(k, procid); |
---|
71 | |
---|
72 | relax(k, &red_local_err, RED_ITER, procid); |
---|
73 | |
---|
74 | /* barrier to make sure all red computations have been performed */ |
---|
75 | |
---|
76 | // BARRIER |
---|
77 | barrier_start = giet_proctime(); |
---|
78 | sqt_barrier_wait( &barrier ); |
---|
79 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
80 | |
---|
81 | copy_red(k, procid); |
---|
82 | |
---|
83 | relax(k, &black_local_err, BLACK_ITER, procid); |
---|
84 | |
---|
85 | /* compute max local error from red_local_err and black_local_err */ |
---|
86 | |
---|
87 | if (red_local_err > black_local_err) local_err = red_local_err; |
---|
88 | else local_err = black_local_err; |
---|
89 | |
---|
90 | /* update the global error if necessary */ |
---|
91 | |
---|
92 | sqt_lock_acquire( &error_lock ); |
---|
93 | if (local_err > multi->err_multi) multi->err_multi = local_err; |
---|
94 | sqt_lock_release( &error_lock ); |
---|
95 | |
---|
96 | /* a single relaxation sweep at the finest level is one unit of */ |
---|
97 | /* work */ |
---|
98 | wu += pow((double) 4.0, (double) k - m); |
---|
99 | |
---|
100 | /* barrier to make sure all processors have checked local error */ |
---|
101 | |
---|
102 | // BARRIER |
---|
103 | barrier_start = giet_proctime(); |
---|
104 | sqt_barrier_wait( &barrier ); |
---|
105 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
106 | |
---|
107 | g_error = multi->err_multi; |
---|
108 | |
---|
109 | /* barrier to make sure master does not cycle back to top of loop */ |
---|
110 | /* and reset global->err before we read it and decide what to do */ |
---|
111 | |
---|
112 | // BARRIER |
---|
113 | barrier_start = giet_proctime(); |
---|
114 | sqt_barrier_wait( &barrier ); |
---|
115 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
116 | |
---|
117 | if (g_error >= lev_tol[k]) |
---|
118 | { |
---|
119 | if (wu > wmax) |
---|
120 | { |
---|
121 | /* max work exceeded */ |
---|
122 | giet_pthread_exit("[OCEAN ERROR] work overflow in multig()"); |
---|
123 | } |
---|
124 | else |
---|
125 | { |
---|
126 | /* if we have not converged */ |
---|
127 | if ((k != 0) && (g_error / errp >= 0.6) && (k > minlevel)) |
---|
128 | { |
---|
129 | /* if need to go to coarser grid */ |
---|
130 | |
---|
131 | copy_borders(k, procid); |
---|
132 | copy_rhs_borders(k, procid); |
---|
133 | |
---|
134 | /* This bar is needed because the routine rescal uses the neighbor's |
---|
135 | border points to compute s4. We must ensure that the neighbor's |
---|
136 | border points have been written before we try computing the new |
---|
137 | rescal values */ |
---|
138 | |
---|
139 | // BARRIER |
---|
140 | barrier_start = giet_proctime(); |
---|
141 | sqt_barrier_wait( &barrier ); |
---|
142 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
143 | |
---|
144 | rescal(k, procid); |
---|
145 | |
---|
146 | /* transfer residual to rhs of coarser grid */ |
---|
147 | lev_tol[k - 1] = 0.3 * g_error; |
---|
148 | k = k - 1; |
---|
149 | putz(k, procid); |
---|
150 | /* make initial guess on coarser grid zero */ |
---|
151 | g_error = 1.0e30; |
---|
152 | } |
---|
153 | } |
---|
154 | } |
---|
155 | else |
---|
156 | { |
---|
157 | /* if we have converged at this level */ |
---|
158 | if (k == m) |
---|
159 | { |
---|
160 | /* if finest grid, we are done */ |
---|
161 | flag = 1; |
---|
162 | } |
---|
163 | else |
---|
164 | { |
---|
165 | /* else go to next finest grid */ |
---|
166 | |
---|
167 | copy_borders(k, procid); |
---|
168 | |
---|
169 | intadd(k, procid); |
---|
170 | /* changes the grid values at the finer level. rhs at finer level */ |
---|
171 | /* remains what it already is */ |
---|
172 | k++; |
---|
173 | g_error = 1.0e30; |
---|
174 | } |
---|
175 | } |
---|
176 | } |
---|
177 | } // end multig() |
---|
178 | |
---|
179 | //////////////////////////////////////////////////////// |
---|
180 | // perform red or black iteration (not both) |
---|
181 | //////////////////////////////////////////////////////// |
---|
182 | void relax(long k, double *err, long color, long procid) |
---|
183 | { |
---|
184 | long i; |
---|
185 | long j; |
---|
186 | long iend; |
---|
187 | long jend; |
---|
188 | long oddistart; |
---|
189 | long oddjstart; |
---|
190 | long evenistart; |
---|
191 | long evenjstart; |
---|
192 | double a; |
---|
193 | double h; |
---|
194 | double factor; |
---|
195 | double maxerr; |
---|
196 | double newerr; |
---|
197 | double oldval; |
---|
198 | double newval; |
---|
199 | double **t2a; |
---|
200 | double **t2b; |
---|
201 | double *t1a; |
---|
202 | double *t1b; |
---|
203 | double *t1c; |
---|
204 | double *t1d; |
---|
205 | |
---|
206 | i = 0; |
---|
207 | j = 0; |
---|
208 | |
---|
209 | *err = 0.0; |
---|
210 | h = lev_res[k]; |
---|
211 | |
---|
212 | /* points whose sum of row and col index is even do a red iteration, */ |
---|
213 | /* others do a black */ |
---|
214 | |
---|
215 | evenistart = gps[procid]->eist[k]; |
---|
216 | evenjstart = gps[procid]->ejst[k]; |
---|
217 | oddistart = gps[procid]->oist[k]; |
---|
218 | oddjstart = gps[procid]->ojst[k]; |
---|
219 | |
---|
220 | iend = gps[procid]->rlien[k]; |
---|
221 | jend = gps[procid]->rljen[k]; |
---|
222 | |
---|
223 | factor = 4.0 - eig2 * h * h; |
---|
224 | maxerr = 0.0; |
---|
225 | t2a = (double **) q_multi[procid][k]; |
---|
226 | t2b = (double **) rhs_multi[procid][k]; |
---|
227 | if (color == RED_ITER) { |
---|
228 | for (i = evenistart; i < iend; i += 2) { |
---|
229 | t1a = (double *) t2a[i]; |
---|
230 | t1b = (double *) t2b[i]; |
---|
231 | t1c = (double *) t2a[i - 1]; |
---|
232 | t1d = (double *) t2a[i + 1]; |
---|
233 | for (j = evenjstart; j < jend; j += 2) { |
---|
234 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
235 | oldval = t1a[j]; |
---|
236 | newval = a / factor; |
---|
237 | newerr = oldval - newval; |
---|
238 | t1a[j] = newval; |
---|
239 | if (fabs(newerr) > maxerr) { |
---|
240 | maxerr = fabs(newerr); |
---|
241 | } |
---|
242 | } |
---|
243 | } |
---|
244 | for (i = oddistart; i < iend; i += 2) { |
---|
245 | t1a = (double *) t2a[i]; |
---|
246 | t1b = (double *) t2b[i]; |
---|
247 | t1c = (double *) t2a[i - 1]; |
---|
248 | t1d = (double *) t2a[i + 1]; |
---|
249 | for (j = oddjstart; j < jend; j += 2) { |
---|
250 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
251 | oldval = t1a[j]; |
---|
252 | newval = a / factor; |
---|
253 | newerr = oldval - newval; |
---|
254 | t1a[j] = newval; |
---|
255 | if (fabs(newerr) > maxerr) { |
---|
256 | maxerr = fabs(newerr); |
---|
257 | } |
---|
258 | } |
---|
259 | } |
---|
260 | } else if (color == BLACK_ITER) { |
---|
261 | for (i = evenistart; i < iend; i += 2) { |
---|
262 | t1a = (double *) t2a[i]; |
---|
263 | t1b = (double *) t2b[i]; |
---|
264 | t1c = (double *) t2a[i - 1]; |
---|
265 | t1d = (double *) t2a[i + 1]; |
---|
266 | for (j = oddjstart; j < jend; j += 2) { |
---|
267 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
268 | oldval = t1a[j]; |
---|
269 | newval = a / factor; |
---|
270 | newerr = oldval - newval; |
---|
271 | t1a[j] = newval; |
---|
272 | if (fabs(newerr) > maxerr) { |
---|
273 | maxerr = fabs(newerr); |
---|
274 | } |
---|
275 | } |
---|
276 | } |
---|
277 | for (i = oddistart; i < iend; i += 2) { |
---|
278 | t1a = (double *) t2a[i]; |
---|
279 | t1b = (double *) t2b[i]; |
---|
280 | t1c = (double *) t2a[i - 1]; |
---|
281 | t1d = (double *) t2a[i + 1]; |
---|
282 | for (j = evenjstart; j < jend; j += 2) { |
---|
283 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
284 | oldval = t1a[j]; |
---|
285 | newval = a / factor; |
---|
286 | newerr = oldval - newval; |
---|
287 | t1a[j] = newval; |
---|
288 | if (fabs(newerr) > maxerr) { |
---|
289 | maxerr = fabs(newerr); |
---|
290 | } |
---|
291 | } |
---|
292 | } |
---|
293 | } |
---|
294 | *err = maxerr; |
---|
295 | } // end relax() |
---|
296 | |
---|
297 | //////////////////////////////////////////////////// |
---|
298 | // perform half-injection to next coarsest level |
---|
299 | //////////////////////////////////////////////////// |
---|
300 | void rescal(long kf, long procid) |
---|
301 | { |
---|
302 | long ic; |
---|
303 | long if17; |
---|
304 | long jf; |
---|
305 | long jc; |
---|
306 | long krc; |
---|
307 | long istart; |
---|
308 | long iend; |
---|
309 | long jstart; |
---|
310 | long jend; |
---|
311 | double hf; |
---|
312 | double s; |
---|
313 | double s1; |
---|
314 | double s2; |
---|
315 | double s3; |
---|
316 | double s4; |
---|
317 | double factor; |
---|
318 | double int1; |
---|
319 | double int2; |
---|
320 | double i_int_factor; |
---|
321 | double j_int_factor; |
---|
322 | long i_off; |
---|
323 | long j_off; |
---|
324 | long up_proc; |
---|
325 | long left_proc; |
---|
326 | long im; |
---|
327 | long jm; |
---|
328 | double temp; |
---|
329 | double temp2; |
---|
330 | double **t2a; |
---|
331 | double **t2b; |
---|
332 | double **t2c; |
---|
333 | double *t1a; |
---|
334 | double *t1b; |
---|
335 | double *t1c; |
---|
336 | double *t1d; |
---|
337 | double *t1e; |
---|
338 | double *t1f; |
---|
339 | double *t1g; |
---|
340 | double *t1h; |
---|
341 | |
---|
342 | krc = kf - 1; |
---|
343 | //hc = lev_res[krc]; |
---|
344 | hf = lev_res[kf]; |
---|
345 | i_off = gps[procid]->rownum * ypts_per_proc[krc]; |
---|
346 | j_off = gps[procid]->colnum * xpts_per_proc[krc]; |
---|
347 | up_proc = gps[procid]->neighbors[UP]; |
---|
348 | left_proc = gps[procid]->neighbors[LEFT]; |
---|
349 | im = (imx[kf] - 2) / yprocs; |
---|
350 | jm = (jmx[kf] - 2) / xprocs; |
---|
351 | |
---|
352 | istart = gps[procid]->rlist[krc]; |
---|
353 | jstart = gps[procid]->rljst[krc]; |
---|
354 | iend = gps[procid]->rlien[krc] - 1; |
---|
355 | jend = gps[procid]->rljen[krc] - 1; |
---|
356 | |
---|
357 | factor = 4.0 - eig2 * hf * hf; |
---|
358 | |
---|
359 | t2a = (double **) q_multi[procid][kf]; |
---|
360 | t2b = (double **) rhs_multi[procid][kf]; |
---|
361 | t2c = (double **) rhs_multi[procid][krc]; |
---|
362 | if17 = 2 * (istart - 1); |
---|
363 | for (ic = istart; ic <= iend; ic++) { |
---|
364 | if17 += 2; |
---|
365 | i_int_factor = (ic + i_off) * i_int_coeff[krc] * 0.5; |
---|
366 | jf = 2 * (jstart - 1); |
---|
367 | t1a = (double *) t2a[if17]; |
---|
368 | t1b = (double *) t2b[if17]; |
---|
369 | t1c = (double *) t2c[ic]; |
---|
370 | t1d = (double *) t2a[if17 - 1]; |
---|
371 | t1e = (double *) t2a[if17 + 1]; |
---|
372 | t1f = (double *) t2a[if17 - 2]; |
---|
373 | t1g = (double *) t2a[if17 - 3]; |
---|
374 | t1h = (double *) t2b[if17 - 2]; |
---|
375 | for (jc = jstart; jc <= jend; jc++) { |
---|
376 | jf += 2; |
---|
377 | j_int_factor = (jc + j_off) * j_int_coeff[krc] * 0.5; |
---|
378 | |
---|
379 | /* method of half-injection uses 2.0 instead of 4.0 */ |
---|
380 | |
---|
381 | /* do bilinear interpolation */ |
---|
382 | s = t1a[jf + 1] + t1a[jf - 1] + t1d[jf] + t1e[jf]; |
---|
383 | s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]); |
---|
384 | if (((if17 == 2) && (gps[procid]->neighbors[UP] == -1)) || ((jf == 2) && (gps[procid]->neighbors[LEFT] == -1))) { |
---|
385 | s2 = 0; |
---|
386 | s3 = 0; |
---|
387 | s4 = 0; |
---|
388 | } else if ((if17 == 2) || (jf == 2)) { |
---|
389 | if (jf == 2) { |
---|
390 | temp = q_multi[left_proc][kf][if17][jm - 1]; |
---|
391 | } else { |
---|
392 | temp = t1a[jf - 3]; |
---|
393 | } |
---|
394 | s = t1a[jf - 1] + temp + t1d[jf - 2] + t1e[jf - 2]; |
---|
395 | s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); |
---|
396 | if (if17 == 2) { |
---|
397 | temp = q_multi[up_proc][kf][im - 1][jf]; |
---|
398 | } else { |
---|
399 | temp = t1g[jf]; |
---|
400 | } |
---|
401 | s = t1f[jf + 1] + t1f[jf - 1] + temp + t1d[jf]; |
---|
402 | s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
---|
403 | if (jf == 2) { |
---|
404 | temp = q_multi[left_proc][kf][if17 - 2][jm - 1]; |
---|
405 | } else { |
---|
406 | temp = t1f[jf - 3]; |
---|
407 | } |
---|
408 | if (if17 == 2) { |
---|
409 | temp2 = q_multi[up_proc][kf][im - 1][jf - 2]; |
---|
410 | } else { |
---|
411 | temp2 = t1g[jf - 2]; |
---|
412 | } |
---|
413 | s = t1f[jf - 1] + temp + temp2 + t1d[jf - 2]; |
---|
414 | s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); |
---|
415 | } else { |
---|
416 | s = t1a[jf - 1] + t1a[jf - 3] + t1d[jf - 2] + t1e[jf - 2]; |
---|
417 | s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); |
---|
418 | s = t1f[jf + 1] + t1f[jf - 1] + t1g[jf] + t1d[jf]; |
---|
419 | s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
---|
420 | s = t1f[jf - 1] + t1f[jf - 3] + t1g[jf - 2] + t1d[jf - 2]; |
---|
421 | s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); |
---|
422 | } |
---|
423 | int1 = j_int_factor * s4 + (1.0 - j_int_factor) * s3; |
---|
424 | int2 = j_int_factor * s2 + (1.0 - j_int_factor) * s1; |
---|
425 | //int_val = i_int_factor*int1+(1.0-i_int_factor)*int2; |
---|
426 | t1c[jc] = i_int_factor * int1 + (1.0 - i_int_factor) * int2; |
---|
427 | } |
---|
428 | } |
---|
429 | } |
---|
430 | |
---|
431 | /* perform interpolation and addition to next finest grid */ |
---|
432 | void intadd(long kc, long procid) |
---|
433 | { |
---|
434 | long ic; |
---|
435 | long if17; |
---|
436 | long jf; |
---|
437 | long jc; |
---|
438 | long kf; |
---|
439 | long istart; |
---|
440 | long jstart; |
---|
441 | long iend; |
---|
442 | long jend; |
---|
443 | double int1; |
---|
444 | double int2; |
---|
445 | double i_int_factor1; |
---|
446 | double j_int_factor1; |
---|
447 | double i_int_factor2; |
---|
448 | double j_int_factor2; |
---|
449 | long i_off; |
---|
450 | long j_off; |
---|
451 | double **t2a; |
---|
452 | double **t2b; |
---|
453 | double *t1a; |
---|
454 | double *t1b; |
---|
455 | double *t1c; |
---|
456 | double *t1d; |
---|
457 | double *t1e; |
---|
458 | |
---|
459 | kf = kc + 1; |
---|
460 | //hc = lev_res[kc]; |
---|
461 | //hf = lev_res[kf]; |
---|
462 | |
---|
463 | istart = gps[procid]->rlist[kc]; |
---|
464 | jstart = gps[procid]->rljst[kc]; |
---|
465 | iend = gps[procid]->rlien[kc] - 1; |
---|
466 | jend = gps[procid]->rljen[kc] - 1; |
---|
467 | i_off = gps[procid]->rownum * ypts_per_proc[kc]; |
---|
468 | j_off = gps[procid]->colnum * xpts_per_proc[kc]; |
---|
469 | |
---|
470 | t2a = (double **) q_multi[procid][kc]; |
---|
471 | t2b = (double **) q_multi[procid][kf]; |
---|
472 | if17 = 2 * (istart - 1); |
---|
473 | for (ic = istart; ic <= iend; ic++) { |
---|
474 | if17 += 2; |
---|
475 | i_int_factor1 = ((imx[kc] - 2) - (ic + i_off - 1)) * (i_int_coeff[kf]); |
---|
476 | i_int_factor2 = (ic + i_off) * i_int_coeff[kf]; |
---|
477 | jf = 2 * (jstart - 1); |
---|
478 | |
---|
479 | t1a = (double *) t2a[ic]; |
---|
480 | t1b = (double *) t2a[ic - 1]; |
---|
481 | t1c = (double *) t2a[ic + 1]; |
---|
482 | t1d = (double *) t2b[if17]; |
---|
483 | t1e = (double *) t2b[if17 - 1]; |
---|
484 | for (jc = jstart; jc <= jend; jc++) { |
---|
485 | jf += 2; |
---|
486 | j_int_factor1 = ((jmx[kc] - 2) - (jc + j_off - 1)) * (j_int_coeff[kf]); |
---|
487 | j_int_factor2 = (jc + j_off) * j_int_coeff[kf]; |
---|
488 | |
---|
489 | int1 = j_int_factor1 * t1a[jc - 1] + (1.0 - j_int_factor1) * t1a[jc]; |
---|
490 | int2 = j_int_factor1 * t1b[jc - 1] + (1.0 - j_int_factor1) * t1b[jc]; |
---|
491 | t1e[jf - 1] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; |
---|
492 | int2 = j_int_factor1 * t1c[jc - 1] + (1.0 - j_int_factor1) * t1c[jc]; |
---|
493 | t1d[jf - 1] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; |
---|
494 | int1 = j_int_factor2 * t1a[jc + 1] + (1.0 - j_int_factor2) * t1a[jc]; |
---|
495 | int2 = j_int_factor2 * t1b[jc + 1] + (1.0 - j_int_factor2) * t1b[jc]; |
---|
496 | t1e[jf] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; |
---|
497 | int2 = j_int_factor2 * t1c[jc + 1] + (1.0 - j_int_factor2) * t1c[jc]; |
---|
498 | t1d[jf] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; |
---|
499 | } |
---|
500 | } |
---|
501 | } |
---|
502 | |
---|
503 | //////////////////////////////////////////// |
---|
504 | // initialize a grid to zero in parallel |
---|
505 | //////////////////////////////////////////// |
---|
506 | void putz(long k, long procid) |
---|
507 | { |
---|
508 | long i; |
---|
509 | long j; |
---|
510 | long istart; |
---|
511 | long jstart; |
---|
512 | long iend; |
---|
513 | long jend; |
---|
514 | double **t2a; |
---|
515 | double *t1a; |
---|
516 | |
---|
517 | istart = gps[procid]->rlist[k]; |
---|
518 | jstart = gps[procid]->rljst[k]; |
---|
519 | iend = gps[procid]->rlien[k]; |
---|
520 | jend = gps[procid]->rljen[k]; |
---|
521 | |
---|
522 | t2a = (double **) q_multi[procid][k]; |
---|
523 | for (i = istart; i <= iend; i++) |
---|
524 | { |
---|
525 | t1a = (double *) t2a[i]; |
---|
526 | for (j = jstart; j <= jend; j++) |
---|
527 | { |
---|
528 | t1a[j] = 0.0; |
---|
529 | } |
---|
530 | } |
---|
531 | } |
---|
532 | |
---|
533 | ////////////////////////////////////// |
---|
534 | void copy_borders(long k, long procid) |
---|
535 | { |
---|
536 | long i; |
---|
537 | long j; |
---|
538 | long jj; |
---|
539 | long im; |
---|
540 | long jm; |
---|
541 | long lastrow; |
---|
542 | long lastcol; |
---|
543 | double **t2a; |
---|
544 | double **t2b; |
---|
545 | double *t1a; |
---|
546 | double *t1b; |
---|
547 | |
---|
548 | im = (imx[k] - 2) / yprocs + 2; |
---|
549 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
550 | lastrow = (imx[k] - 2) / yprocs; |
---|
551 | lastcol = (jmx[k] - 2) / xprocs; |
---|
552 | |
---|
553 | t2a = (double **) q_multi[procid][k]; |
---|
554 | jj = gps[procid]->neighbors[UPLEFT]; |
---|
555 | if (jj != -1) { |
---|
556 | t2a[0][0] = q_multi[jj][k][im - 2][jm - 2]; |
---|
557 | } |
---|
558 | jj = gps[procid]->neighbors[UPRIGHT]; |
---|
559 | if (jj != -1) { |
---|
560 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][1]; |
---|
561 | } |
---|
562 | jj = gps[procid]->neighbors[DOWNLEFT]; |
---|
563 | if (jj != -1) { |
---|
564 | t2a[im - 1][0] = q_multi[jj][k][1][jm - 2]; |
---|
565 | } |
---|
566 | jj = gps[procid]->neighbors[DOWNRIGHT]; |
---|
567 | if (jj != -1) { |
---|
568 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][1]; |
---|
569 | } |
---|
570 | |
---|
571 | if (gps[procid]->neighbors[UP] == -1) { |
---|
572 | jj = gps[procid]->neighbors[LEFT]; |
---|
573 | if (jj != -1) { |
---|
574 | t2a[0][0] = q_multi[jj][k][0][jm - 2]; |
---|
575 | } else { |
---|
576 | jj = gps[procid]->neighbors[DOWN]; |
---|
577 | if (jj != -1) { |
---|
578 | t2a[im - 1][0] = q_multi[jj][k][1][0]; |
---|
579 | } |
---|
580 | } |
---|
581 | jj = gps[procid]->neighbors[RIGHT]; |
---|
582 | if (jj != -1) { |
---|
583 | t2a[0][jm - 1] = q_multi[jj][k][0][1]; |
---|
584 | } else { |
---|
585 | jj = gps[procid]->neighbors[DOWN]; |
---|
586 | if (jj != -1) { |
---|
587 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; |
---|
588 | } |
---|
589 | } |
---|
590 | } else if (gps[procid]->neighbors[DOWN] == -1) { |
---|
591 | jj = gps[procid]->neighbors[LEFT]; |
---|
592 | if (jj != -1) { |
---|
593 | t2a[im - 1][0] = q_multi[jj][k][im - 1][jm - 2]; |
---|
594 | } else { |
---|
595 | jj = gps[procid]->neighbors[UP]; |
---|
596 | if (jj != -1) { |
---|
597 | t2a[0][0] = q_multi[jj][k][im - 2][0]; |
---|
598 | } |
---|
599 | } |
---|
600 | jj = gps[procid]->neighbors[RIGHT]; |
---|
601 | if (jj != -1) { |
---|
602 | t2a[im - 1][jm - 1] = q_multi[jj][k][im - 1][1]; |
---|
603 | } else { |
---|
604 | jj = gps[procid]->neighbors[UP]; |
---|
605 | if (jj != -1) { |
---|
606 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; |
---|
607 | } |
---|
608 | } |
---|
609 | } else if (gps[procid]->neighbors[LEFT] == -1) { |
---|
610 | jj = gps[procid]->neighbors[UP]; |
---|
611 | if (jj != -1) { |
---|
612 | t2a[0][0] = q_multi[jj][k][im - 2][0]; |
---|
613 | } |
---|
614 | jj = gps[procid]->neighbors[DOWN]; |
---|
615 | if (jj != -1) { |
---|
616 | t2a[im - 1][0] = q_multi[jj][k][1][0]; |
---|
617 | } |
---|
618 | } else if (gps[procid]->neighbors[RIGHT] == -1) { |
---|
619 | jj = gps[procid]->neighbors[UP]; |
---|
620 | if (jj != -1) { |
---|
621 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; |
---|
622 | } |
---|
623 | jj = gps[procid]->neighbors[DOWN]; |
---|
624 | if (jj != -1) { |
---|
625 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; |
---|
626 | } |
---|
627 | } |
---|
628 | |
---|
629 | j = gps[procid]->neighbors[UP]; |
---|
630 | if (j != -1) { |
---|
631 | t1a = (double *) t2a[0]; |
---|
632 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
633 | for (i = 1; i <= lastcol; i++) { |
---|
634 | t1a[i] = t1b[i]; |
---|
635 | } |
---|
636 | } |
---|
637 | j = gps[procid]->neighbors[DOWN]; |
---|
638 | if (j != -1) { |
---|
639 | t1a = (double *) t2a[im - 1]; |
---|
640 | t1b = (double *) q_multi[j][k][1]; |
---|
641 | for (i = 1; i <= lastcol; i++) { |
---|
642 | t1a[i] = t1b[i]; |
---|
643 | } |
---|
644 | } |
---|
645 | j = gps[procid]->neighbors[LEFT]; |
---|
646 | if (j != -1) { |
---|
647 | t2b = (double **) q_multi[j][k]; |
---|
648 | for (i = 1; i <= lastrow; i++) { |
---|
649 | t2a[i][0] = t2b[i][jm - 2]; |
---|
650 | } |
---|
651 | } |
---|
652 | j = gps[procid]->neighbors[RIGHT]; |
---|
653 | if (j != -1) { |
---|
654 | t2b = (double **) q_multi[j][k]; |
---|
655 | for (i = 1; i <= lastrow; i++) { |
---|
656 | t2a[i][jm - 1] = t2b[i][1]; |
---|
657 | } |
---|
658 | } |
---|
659 | } // end copy_border() |
---|
660 | |
---|
661 | ////////////////////////////////////////// |
---|
662 | void copy_rhs_borders(long k, long procid) |
---|
663 | { |
---|
664 | long i; |
---|
665 | long j; |
---|
666 | long im; |
---|
667 | long jm; |
---|
668 | long lastrow; |
---|
669 | long lastcol; |
---|
670 | double **t2a; |
---|
671 | double **t2b; |
---|
672 | double *t1a; |
---|
673 | double *t1b; |
---|
674 | |
---|
675 | im = (imx[k] - 2) / yprocs + 2; |
---|
676 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
677 | lastrow = (imx[k] - 2) / yprocs; |
---|
678 | lastcol = (jmx[k] - 2) / xprocs; |
---|
679 | |
---|
680 | t2a = (double **) rhs_multi[procid][k]; |
---|
681 | if (gps[procid]->neighbors[UPLEFT] != -1) { |
---|
682 | j = gps[procid]->neighbors[UPLEFT]; |
---|
683 | t2a[0][0] = rhs_multi[j][k][im - 2][jm - 2]; |
---|
684 | } |
---|
685 | |
---|
686 | if (gps[procid]->neighbors[UP] != -1) { |
---|
687 | j = gps[procid]->neighbors[UP]; |
---|
688 | if (j != -1) { |
---|
689 | t1a = (double *) t2a[0]; |
---|
690 | t1b = (double *) rhs_multi[j][k][im - 2]; |
---|
691 | for (i = 2; i <= lastcol; i += 2) { |
---|
692 | t1a[i] = t1b[i]; |
---|
693 | } |
---|
694 | } |
---|
695 | } |
---|
696 | if (gps[procid]->neighbors[LEFT] != -1) { |
---|
697 | j = gps[procid]->neighbors[LEFT]; |
---|
698 | if (j != -1) { |
---|
699 | t2b = (double **) rhs_multi[j][k]; |
---|
700 | for (i = 2; i <= lastrow; i += 2) { |
---|
701 | t2a[i][0] = t2b[i][jm - 2]; |
---|
702 | } |
---|
703 | } |
---|
704 | } |
---|
705 | } |
---|
706 | |
---|
707 | ////////////////////////////////// |
---|
708 | void copy_red(long k, long procid) |
---|
709 | { |
---|
710 | long i; |
---|
711 | long j; |
---|
712 | long im; |
---|
713 | long jm; |
---|
714 | long lastrow; |
---|
715 | long lastcol; |
---|
716 | double **t2a; |
---|
717 | double **t2b; |
---|
718 | double *t1a; |
---|
719 | double *t1b; |
---|
720 | |
---|
721 | im = (imx[k] - 2) / yprocs + 2; |
---|
722 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
723 | lastrow = (imx[k] - 2) / yprocs; |
---|
724 | lastcol = (jmx[k] - 2) / xprocs; |
---|
725 | |
---|
726 | t2a = (double **) q_multi[procid][k]; |
---|
727 | j = gps[procid]->neighbors[UP]; |
---|
728 | if (j != -1) { |
---|
729 | t1a = (double *) t2a[0]; |
---|
730 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
731 | for (i = 2; i <= lastcol; i += 2) { |
---|
732 | t1a[i] = t1b[i]; |
---|
733 | } |
---|
734 | } |
---|
735 | j = gps[procid]->neighbors[DOWN]; |
---|
736 | if (j != -1) { |
---|
737 | t1a = (double *) t2a[im - 1]; |
---|
738 | t1b = (double *) q_multi[j][k][1]; |
---|
739 | for (i = 1; i <= lastcol; i += 2) { |
---|
740 | t1a[i] = t1b[i]; |
---|
741 | } |
---|
742 | } |
---|
743 | j = gps[procid]->neighbors[LEFT]; |
---|
744 | if (j != -1) { |
---|
745 | t2b = (double **) q_multi[j][k]; |
---|
746 | for (i = 2; i <= lastrow; i += 2) { |
---|
747 | t2a[i][0] = t2b[i][jm - 2]; |
---|
748 | } |
---|
749 | } |
---|
750 | j = gps[procid]->neighbors[RIGHT]; |
---|
751 | if (j != -1) { |
---|
752 | t2b = (double **) q_multi[j][k]; |
---|
753 | for (i = 1; i <= lastrow; i += 2) { |
---|
754 | t2a[i][jm - 1] = t2b[i][1]; |
---|
755 | } |
---|
756 | } |
---|
757 | } |
---|
758 | |
---|
759 | //////////////////////////////////// |
---|
760 | void copy_black(long k, long procid) |
---|
761 | { |
---|
762 | long i; |
---|
763 | long j; |
---|
764 | long im; |
---|
765 | long jm; |
---|
766 | long lastrow; |
---|
767 | long lastcol; |
---|
768 | double **t2a; |
---|
769 | double **t2b; |
---|
770 | double *t1a; |
---|
771 | double *t1b; |
---|
772 | |
---|
773 | im = (imx[k] - 2) / yprocs + 2; |
---|
774 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
775 | lastrow = (imx[k] - 2) / yprocs; |
---|
776 | lastcol = (jmx[k] - 2) / xprocs; |
---|
777 | |
---|
778 | t2a = (double **) q_multi[procid][k]; |
---|
779 | j = gps[procid]->neighbors[UP]; |
---|
780 | if (j != -1) { |
---|
781 | t1a = (double *) t2a[0]; |
---|
782 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
783 | for (i = 1; i <= lastcol; i += 2) { |
---|
784 | t1a[i] = t1b[i]; |
---|
785 | } |
---|
786 | } |
---|
787 | j = gps[procid]->neighbors[DOWN]; |
---|
788 | if (j != -1) { |
---|
789 | t1a = (double *) t2a[im - 1]; |
---|
790 | t1b = (double *) q_multi[j][k][1]; |
---|
791 | for (i = 2; i <= lastcol; i += 2) { |
---|
792 | t1a[i] = t1b[i]; |
---|
793 | } |
---|
794 | } |
---|
795 | j = gps[procid]->neighbors[LEFT]; |
---|
796 | if (j != -1) { |
---|
797 | t2b = (double **) q_multi[j][k]; |
---|
798 | for (i = 1; i <= lastrow; i += 2) { |
---|
799 | t2a[i][0] = t2b[i][jm - 2]; |
---|
800 | } |
---|
801 | } |
---|
802 | j = gps[procid]->neighbors[RIGHT]; |
---|
803 | if (j != -1) { |
---|
804 | t2b = (double **) q_multi[j][k]; |
---|
805 | for (i = 2; i <= lastrow; i += 2) { |
---|
806 | t2a[i][jm - 1] = t2b[i][1]; |
---|
807 | } |
---|
808 | } |
---|
809 | } |
---|