1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Copyright (c) 1994 Stanford University */ |
---|
4 | /* */ |
---|
5 | /* All rights reserved. */ |
---|
6 | /* */ |
---|
7 | /* Permission is given to use, copy, and modify this software for any */ |
---|
8 | /* non-commercial purpose as long as this copyright notice is not */ |
---|
9 | /* removed. All other uses, including redistribution in whole or in */ |
---|
10 | /* part, are forbidden without prior written permission. */ |
---|
11 | /* */ |
---|
12 | /* This software is provided with absolutely no warranty and no */ |
---|
13 | /* support. */ |
---|
14 | /* */ |
---|
15 | /*************************************************************************/ |
---|
16 | |
---|
17 | /* Shared memory implementation of the multigrid method |
---|
18 | Implementation uses red-black gauss-seidel relaxation |
---|
19 | iterations, w cycles, and the method of half-injection for |
---|
20 | residual computation. */ |
---|
21 | |
---|
22 | EXTERN_ENV |
---|
23 | |
---|
24 | #include <stdio.h> |
---|
25 | #include <math.h> |
---|
26 | #include <stdlib.h> |
---|
27 | |
---|
28 | #include "decs.h" |
---|
29 | |
---|
30 | /* perform multigrid (w cycles) */ |
---|
31 | void multig(long my_id) |
---|
32 | { |
---|
33 | long iter; |
---|
34 | double wu; |
---|
35 | double errp; |
---|
36 | long m; |
---|
37 | long flag; |
---|
38 | long k; |
---|
39 | long my_num; |
---|
40 | double wmax; |
---|
41 | double local_err; |
---|
42 | double red_local_err; |
---|
43 | double black_local_err; |
---|
44 | double g_error; |
---|
45 | |
---|
46 | flag = 0; |
---|
47 | iter = 0; |
---|
48 | m = numlev - 1; |
---|
49 | wmax = maxwork; |
---|
50 | my_num = my_id; |
---|
51 | wu = 0.0; |
---|
52 | |
---|
53 | k = m; |
---|
54 | g_error = 1.0e30; |
---|
55 | while (!flag) { |
---|
56 | errp = g_error; |
---|
57 | iter++; |
---|
58 | if (my_num == MASTER) { |
---|
59 | multi->err_multi = 0.0; |
---|
60 | } |
---|
61 | |
---|
62 | /* barrier to make sure all procs have finished intadd or rescal */ |
---|
63 | /* before proceeding with relaxation */ |
---|
64 | #if defined(MULTIPLE_BARRIERS) |
---|
65 | BARRIER(bars->error_barrier, nprocs) |
---|
66 | #else |
---|
67 | BARRIER(bars->barrier, nprocs) |
---|
68 | #endif |
---|
69 | copy_black(k, my_num); |
---|
70 | |
---|
71 | relax(k, &red_local_err, RED_ITER, my_num); |
---|
72 | |
---|
73 | /* barrier to make sure all red computations have been performed */ |
---|
74 | #if defined(MULTIPLE_BARRIERS) |
---|
75 | BARRIER(bars->error_barrier, nprocs) |
---|
76 | #else |
---|
77 | BARRIER(bars->barrier, nprocs) |
---|
78 | #endif |
---|
79 | copy_red(k, my_num); |
---|
80 | |
---|
81 | relax(k, &black_local_err, BLACK_ITER, my_num); |
---|
82 | |
---|
83 | /* compute max local error from red_local_err and black_local_err */ |
---|
84 | |
---|
85 | if (red_local_err > black_local_err) { |
---|
86 | local_err = red_local_err; |
---|
87 | } else { |
---|
88 | local_err = black_local_err; |
---|
89 | } |
---|
90 | |
---|
91 | /* update the global error if necessary */ |
---|
92 | |
---|
93 | LOCK(locks->error_lock) |
---|
94 | if (local_err > multi->err_multi) { |
---|
95 | multi->err_multi = local_err; |
---|
96 | } |
---|
97 | UNLOCK(locks->error_lock) |
---|
98 | |
---|
99 | /* a single relaxation sweep at the finest level is one unit of */ |
---|
100 | /* work */ |
---|
101 | wu += pow((double) 4.0, (double) k - m); |
---|
102 | |
---|
103 | /* barrier to make sure all processors have checked local error */ |
---|
104 | #if defined(MULTIPLE_BARRIERS) |
---|
105 | BARRIER(bars->error_barrier, nprocs) |
---|
106 | #else |
---|
107 | BARRIER(bars->barrier, nprocs) |
---|
108 | #endif |
---|
109 | g_error = multi->err_multi; |
---|
110 | |
---|
111 | /* barrier to make sure master does not cycle back to top of loop */ |
---|
112 | /* and reset global->err before we read it and decide what to do */ |
---|
113 | #if defined(MULTIPLE_BARRIERS) |
---|
114 | BARRIER(bars->error_barrier, nprocs) |
---|
115 | #else |
---|
116 | BARRIER(bars->barrier, nprocs) |
---|
117 | #endif |
---|
118 | if (g_error >= lev_tol[k]) { |
---|
119 | if (wu > wmax) { |
---|
120 | /* max work exceeded */ |
---|
121 | fprintf(stderr, "ERROR: Maximum work limit %0.5f exceeded\n", wmax); |
---|
122 | exit(-1); |
---|
123 | } else { |
---|
124 | /* if we have not converged */ |
---|
125 | if ((k != 0) && (g_error / errp >= 0.6) && (k > minlevel)) { |
---|
126 | /* if need to go to coarser grid */ |
---|
127 | |
---|
128 | copy_borders(k, my_num); |
---|
129 | copy_rhs_borders(k, my_num); |
---|
130 | |
---|
131 | /* This bar is needed because the routine rescal uses the neighbor's |
---|
132 | border points to compute s4. We must ensure that the neighbor's |
---|
133 | border points have been written before we try computing the new |
---|
134 | rescal values */ |
---|
135 | |
---|
136 | #if defined(MULTIPLE_BARRIERS) |
---|
137 | BARRIER(bars->error_barrier, nprocs) |
---|
138 | #else |
---|
139 | BARRIER(bars->barrier, nprocs) |
---|
140 | #endif |
---|
141 | rescal(k, my_num); |
---|
142 | |
---|
143 | /* transfer residual to rhs of coarser grid */ |
---|
144 | lev_tol[k - 1] = 0.3 * g_error; |
---|
145 | k = k - 1; |
---|
146 | putz(k, my_num); |
---|
147 | /* make initial guess on coarser grid zero */ |
---|
148 | g_error = 1.0e30; |
---|
149 | } |
---|
150 | } |
---|
151 | } else { |
---|
152 | /* if we have converged at this level */ |
---|
153 | if (k == m) { |
---|
154 | /* if finest grid, we are done */ |
---|
155 | flag = 1; |
---|
156 | } else { |
---|
157 | /* else go to next finest grid */ |
---|
158 | |
---|
159 | copy_borders(k, my_num); |
---|
160 | |
---|
161 | intadd(k, my_num); |
---|
162 | /* changes the grid values at the finer level. rhs at finer level */ |
---|
163 | /* remains what it already is */ |
---|
164 | k++; |
---|
165 | g_error = 1.0e30; |
---|
166 | } |
---|
167 | } |
---|
168 | } |
---|
169 | if (do_output) { |
---|
170 | if (my_num == MASTER) { |
---|
171 | printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter, k, multi->err_multi, wu); |
---|
172 | } |
---|
173 | } |
---|
174 | } |
---|
175 | |
---|
176 | /* perform red or black iteration (not both) */ |
---|
177 | void relax(long k, double *err, long color, long my_num) |
---|
178 | { |
---|
179 | long i; |
---|
180 | long j; |
---|
181 | long iend; |
---|
182 | long jend; |
---|
183 | long oddistart; |
---|
184 | long oddjstart; |
---|
185 | long evenistart; |
---|
186 | long evenjstart; |
---|
187 | double a; |
---|
188 | double h; |
---|
189 | double factor; |
---|
190 | double maxerr; |
---|
191 | double newerr; |
---|
192 | double oldval; |
---|
193 | double newval; |
---|
194 | double **t2a; |
---|
195 | double **t2b; |
---|
196 | double *t1a; |
---|
197 | double *t1b; |
---|
198 | double *t1c; |
---|
199 | double *t1d; |
---|
200 | |
---|
201 | i = 0; |
---|
202 | j = 0; |
---|
203 | |
---|
204 | *err = 0.0; |
---|
205 | h = lev_res[k]; |
---|
206 | |
---|
207 | /* points whose sum of row and col index is even do a red iteration, */ |
---|
208 | /* others do a black */ |
---|
209 | |
---|
210 | evenistart = gp[my_num].eist[k]; |
---|
211 | evenjstart = gp[my_num].ejst[k]; |
---|
212 | oddistart = gp[my_num].oist[k]; |
---|
213 | oddjstart = gp[my_num].ojst[k]; |
---|
214 | |
---|
215 | iend = gp[my_num].rlien[k]; |
---|
216 | jend = gp[my_num].rljen[k]; |
---|
217 | |
---|
218 | factor = 4.0 - eig2 * h * h; |
---|
219 | maxerr = 0.0; |
---|
220 | t2a = (double **) q_multi[my_num][k]; |
---|
221 | t2b = (double **) rhs_multi[my_num][k]; |
---|
222 | if (color == RED_ITER) { |
---|
223 | for (i = evenistart; i < iend; i += 2) { |
---|
224 | t1a = (double *) t2a[i]; |
---|
225 | t1b = (double *) t2b[i]; |
---|
226 | t1c = (double *) t2a[i - 1]; |
---|
227 | t1d = (double *) t2a[i + 1]; |
---|
228 | for (j = evenjstart; j < jend; j += 2) { |
---|
229 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
230 | oldval = t1a[j]; |
---|
231 | newval = a / factor; |
---|
232 | newerr = oldval - newval; |
---|
233 | t1a[j] = newval; |
---|
234 | if (fabs(newerr) > maxerr) { |
---|
235 | maxerr = fabs(newerr); |
---|
236 | } |
---|
237 | } |
---|
238 | } |
---|
239 | for (i = oddistart; i < iend; i += 2) { |
---|
240 | t1a = (double *) t2a[i]; |
---|
241 | t1b = (double *) t2b[i]; |
---|
242 | t1c = (double *) t2a[i - 1]; |
---|
243 | t1d = (double *) t2a[i + 1]; |
---|
244 | for (j = oddjstart; j < jend; j += 2) { |
---|
245 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
246 | oldval = t1a[j]; |
---|
247 | newval = a / factor; |
---|
248 | newerr = oldval - newval; |
---|
249 | t1a[j] = newval; |
---|
250 | if (fabs(newerr) > maxerr) { |
---|
251 | maxerr = fabs(newerr); |
---|
252 | } |
---|
253 | } |
---|
254 | } |
---|
255 | } else if (color == BLACK_ITER) { |
---|
256 | for (i = evenistart; i < iend; i += 2) { |
---|
257 | t1a = (double *) t2a[i]; |
---|
258 | t1b = (double *) t2b[i]; |
---|
259 | t1c = (double *) t2a[i - 1]; |
---|
260 | t1d = (double *) t2a[i + 1]; |
---|
261 | for (j = oddjstart; j < jend; j += 2) { |
---|
262 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
263 | oldval = t1a[j]; |
---|
264 | newval = a / factor; |
---|
265 | newerr = oldval - newval; |
---|
266 | t1a[j] = newval; |
---|
267 | if (fabs(newerr) > maxerr) { |
---|
268 | maxerr = fabs(newerr); |
---|
269 | } |
---|
270 | } |
---|
271 | } |
---|
272 | for (i = oddistart; i < iend; i += 2) { |
---|
273 | t1a = (double *) t2a[i]; |
---|
274 | t1b = (double *) t2b[i]; |
---|
275 | t1c = (double *) t2a[i - 1]; |
---|
276 | t1d = (double *) t2a[i + 1]; |
---|
277 | for (j = evenjstart; j < jend; j += 2) { |
---|
278 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
279 | oldval = t1a[j]; |
---|
280 | newval = a / factor; |
---|
281 | newerr = oldval - newval; |
---|
282 | t1a[j] = newval; |
---|
283 | if (fabs(newerr) > maxerr) { |
---|
284 | maxerr = fabs(newerr); |
---|
285 | } |
---|
286 | } |
---|
287 | } |
---|
288 | } |
---|
289 | *err = maxerr; |
---|
290 | } |
---|
291 | |
---|
292 | /* perform half-injection to next coarsest level */ |
---|
293 | void rescal(long kf, long my_num) |
---|
294 | { |
---|
295 | long ic; |
---|
296 | long if17; |
---|
297 | long jf; |
---|
298 | long jc; |
---|
299 | long krc; |
---|
300 | long istart; |
---|
301 | long iend; |
---|
302 | long jstart; |
---|
303 | long jend; |
---|
304 | double hf; |
---|
305 | double s; |
---|
306 | double s1; |
---|
307 | double s2; |
---|
308 | double s3; |
---|
309 | double s4; |
---|
310 | double factor; |
---|
311 | double int1; |
---|
312 | double int2; |
---|
313 | double i_int_factor; |
---|
314 | double j_int_factor; |
---|
315 | long i_off; |
---|
316 | long j_off; |
---|
317 | long up_proc; |
---|
318 | long left_proc; |
---|
319 | long im; |
---|
320 | long jm; |
---|
321 | double temp; |
---|
322 | double temp2; |
---|
323 | double **t2a; |
---|
324 | double **t2b; |
---|
325 | double **t2c; |
---|
326 | double *t1a; |
---|
327 | double *t1b; |
---|
328 | double *t1c; |
---|
329 | double *t1d; |
---|
330 | double *t1e; |
---|
331 | double *t1f; |
---|
332 | double *t1g; |
---|
333 | double *t1h; |
---|
334 | |
---|
335 | krc = kf - 1; |
---|
336 | //hc = lev_res[krc]; |
---|
337 | hf = lev_res[kf]; |
---|
338 | i_off = (*gp[my_num].rownum) * ypts_per_proc[krc]; |
---|
339 | j_off = (*gp[my_num].colnum) * xpts_per_proc[krc]; |
---|
340 | up_proc = gp[my_num].neighbors[UP]; |
---|
341 | left_proc = gp[my_num].neighbors[LEFT]; |
---|
342 | im = (imx[kf] - 2) / yprocs; |
---|
343 | jm = (jmx[kf] - 2) / xprocs; |
---|
344 | |
---|
345 | istart = gp[my_num].rlist[krc]; |
---|
346 | jstart = gp[my_num].rljst[krc]; |
---|
347 | iend = gp[my_num].rlien[krc] - 1; |
---|
348 | jend = gp[my_num].rljen[krc] - 1; |
---|
349 | |
---|
350 | factor = 4.0 - eig2 * hf * hf; |
---|
351 | |
---|
352 | t2a = (double **) q_multi[my_num][kf]; |
---|
353 | t2b = (double **) rhs_multi[my_num][kf]; |
---|
354 | t2c = (double **) rhs_multi[my_num][krc]; |
---|
355 | if17 = 2 * (istart - 1); |
---|
356 | for (ic = istart; ic <= iend; ic++) { |
---|
357 | if17 += 2; |
---|
358 | i_int_factor = (ic + i_off) * i_int_coeff[krc] * 0.5; |
---|
359 | jf = 2 * (jstart - 1); |
---|
360 | t1a = (double *) t2a[if17]; |
---|
361 | t1b = (double *) t2b[if17]; |
---|
362 | t1c = (double *) t2c[ic]; |
---|
363 | t1d = (double *) t2a[if17 - 1]; |
---|
364 | t1e = (double *) t2a[if17 + 1]; |
---|
365 | t1f = (double *) t2a[if17 - 2]; |
---|
366 | t1g = (double *) t2a[if17 - 3]; |
---|
367 | t1h = (double *) t2b[if17 - 2]; |
---|
368 | for (jc = jstart; jc <= jend; jc++) { |
---|
369 | jf += 2; |
---|
370 | j_int_factor = (jc + j_off) * j_int_coeff[krc] * 0.5; |
---|
371 | |
---|
372 | /* method of half-injection uses 2.0 instead of 4.0 */ |
---|
373 | |
---|
374 | /* do bilinear interpolation */ |
---|
375 | s = t1a[jf + 1] + t1a[jf - 1] + t1d[jf] + t1e[jf]; |
---|
376 | s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]); |
---|
377 | if (((if17 == 2) && (gp[my_num].neighbors[UP] == -1)) || ((jf == 2) && (gp[my_num].neighbors[LEFT] == -1))) { |
---|
378 | s2 = 0; |
---|
379 | s3 = 0; |
---|
380 | s4 = 0; |
---|
381 | } else if ((if17 == 2) || (jf == 2)) { |
---|
382 | if (jf == 2) { |
---|
383 | temp = q_multi[left_proc][kf][if17][jm - 1]; |
---|
384 | } else { |
---|
385 | temp = t1a[jf - 3]; |
---|
386 | } |
---|
387 | s = t1a[jf - 1] + temp + t1d[jf - 2] + t1e[jf - 2]; |
---|
388 | s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); |
---|
389 | if (if17 == 2) { |
---|
390 | temp = q_multi[up_proc][kf][im - 1][jf]; |
---|
391 | } else { |
---|
392 | temp = t1g[jf]; |
---|
393 | } |
---|
394 | s = t1f[jf + 1] + t1f[jf - 1] + temp + t1d[jf]; |
---|
395 | s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
---|
396 | if (jf == 2) { |
---|
397 | temp = q_multi[left_proc][kf][if17 - 2][jm - 1]; |
---|
398 | } else { |
---|
399 | temp = t1f[jf - 3]; |
---|
400 | } |
---|
401 | if (if17 == 2) { |
---|
402 | temp2 = q_multi[up_proc][kf][im - 1][jf - 2]; |
---|
403 | } else { |
---|
404 | temp2 = t1g[jf - 2]; |
---|
405 | } |
---|
406 | s = t1f[jf - 1] + temp + temp2 + t1d[jf - 2]; |
---|
407 | s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); |
---|
408 | } else { |
---|
409 | s = t1a[jf - 1] + t1a[jf - 3] + t1d[jf - 2] + t1e[jf - 2]; |
---|
410 | s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); |
---|
411 | s = t1f[jf + 1] + t1f[jf - 1] + t1g[jf] + t1d[jf]; |
---|
412 | s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
---|
413 | s = t1f[jf - 1] + t1f[jf - 3] + t1g[jf - 2] + t1d[jf - 2]; |
---|
414 | s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); |
---|
415 | } |
---|
416 | int1 = j_int_factor * s4 + (1.0 - j_int_factor) * s3; |
---|
417 | int2 = j_int_factor * s2 + (1.0 - j_int_factor) * s1; |
---|
418 | //int_val = i_int_factor*int1+(1.0-i_int_factor)*int2; |
---|
419 | t1c[jc] = i_int_factor * int1 + (1.0 - i_int_factor) * int2; |
---|
420 | } |
---|
421 | } |
---|
422 | } |
---|
423 | |
---|
424 | /* perform interpolation and addition to next finest grid */ |
---|
425 | void intadd(long kc, long my_num) |
---|
426 | { |
---|
427 | long ic; |
---|
428 | long if17; |
---|
429 | long jf; |
---|
430 | long jc; |
---|
431 | long kf; |
---|
432 | long istart; |
---|
433 | long jstart; |
---|
434 | long iend; |
---|
435 | long jend; |
---|
436 | double int1; |
---|
437 | double int2; |
---|
438 | double i_int_factor1; |
---|
439 | double j_int_factor1; |
---|
440 | double i_int_factor2; |
---|
441 | double j_int_factor2; |
---|
442 | long i_off; |
---|
443 | long j_off; |
---|
444 | double **t2a; |
---|
445 | double **t2b; |
---|
446 | double *t1a; |
---|
447 | double *t1b; |
---|
448 | double *t1c; |
---|
449 | double *t1d; |
---|
450 | double *t1e; |
---|
451 | |
---|
452 | kf = kc + 1; |
---|
453 | //hc = lev_res[kc]; |
---|
454 | //hf = lev_res[kf]; |
---|
455 | |
---|
456 | istart = gp[my_num].rlist[kc]; |
---|
457 | jstart = gp[my_num].rljst[kc]; |
---|
458 | iend = gp[my_num].rlien[kc] - 1; |
---|
459 | jend = gp[my_num].rljen[kc] - 1; |
---|
460 | i_off = (*gp[my_num].rownum) * ypts_per_proc[kc]; |
---|
461 | j_off = (*gp[my_num].colnum) * xpts_per_proc[kc]; |
---|
462 | |
---|
463 | t2a = (double **) q_multi[my_num][kc]; |
---|
464 | t2b = (double **) q_multi[my_num][kf]; |
---|
465 | if17 = 2 * (istart - 1); |
---|
466 | for (ic = istart; ic <= iend; ic++) { |
---|
467 | if17 += 2; |
---|
468 | i_int_factor1 = ((imx[kc] - 2) - (ic + i_off - 1)) * (i_int_coeff[kf]); |
---|
469 | i_int_factor2 = (ic + i_off) * i_int_coeff[kf]; |
---|
470 | jf = 2 * (jstart - 1); |
---|
471 | |
---|
472 | t1a = (double *) t2a[ic]; |
---|
473 | t1b = (double *) t2a[ic - 1]; |
---|
474 | t1c = (double *) t2a[ic + 1]; |
---|
475 | t1d = (double *) t2b[if17]; |
---|
476 | t1e = (double *) t2b[if17 - 1]; |
---|
477 | for (jc = jstart; jc <= jend; jc++) { |
---|
478 | jf += 2; |
---|
479 | j_int_factor1 = ((jmx[kc] - 2) - (jc + j_off - 1)) * (j_int_coeff[kf]); |
---|
480 | j_int_factor2 = (jc + j_off) * j_int_coeff[kf]; |
---|
481 | |
---|
482 | int1 = j_int_factor1 * t1a[jc - 1] + (1.0 - j_int_factor1) * t1a[jc]; |
---|
483 | int2 = j_int_factor1 * t1b[jc - 1] + (1.0 - j_int_factor1) * t1b[jc]; |
---|
484 | t1e[jf - 1] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; |
---|
485 | int2 = j_int_factor1 * t1c[jc - 1] + (1.0 - j_int_factor1) * t1c[jc]; |
---|
486 | t1d[jf - 1] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; |
---|
487 | int1 = j_int_factor2 * t1a[jc + 1] + (1.0 - j_int_factor2) * t1a[jc]; |
---|
488 | int2 = j_int_factor2 * t1b[jc + 1] + (1.0 - j_int_factor2) * t1b[jc]; |
---|
489 | t1e[jf] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; |
---|
490 | int2 = j_int_factor2 * t1c[jc + 1] + (1.0 - j_int_factor2) * t1c[jc]; |
---|
491 | t1d[jf] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; |
---|
492 | } |
---|
493 | } |
---|
494 | } |
---|
495 | |
---|
496 | /* initialize a grid to zero in parallel */ |
---|
497 | void putz(long k, long my_num) |
---|
498 | { |
---|
499 | long i; |
---|
500 | long j; |
---|
501 | long istart; |
---|
502 | long jstart; |
---|
503 | long iend; |
---|
504 | long jend; |
---|
505 | double **t2a; |
---|
506 | double *t1a; |
---|
507 | |
---|
508 | istart = gp[my_num].rlist[k]; |
---|
509 | jstart = gp[my_num].rljst[k]; |
---|
510 | iend = gp[my_num].rlien[k]; |
---|
511 | jend = gp[my_num].rljen[k]; |
---|
512 | |
---|
513 | t2a = (double **) q_multi[my_num][k]; |
---|
514 | for (i = istart; i <= iend; i++) { |
---|
515 | t1a = (double *) t2a[i]; |
---|
516 | for (j = jstart; j <= jend; j++) { |
---|
517 | t1a[j] = 0.0; |
---|
518 | } |
---|
519 | } |
---|
520 | } |
---|
521 | |
---|
522 | void copy_borders(long k, long pid) |
---|
523 | { |
---|
524 | long i; |
---|
525 | long j; |
---|
526 | long jj; |
---|
527 | long im; |
---|
528 | long jm; |
---|
529 | long lastrow; |
---|
530 | long lastcol; |
---|
531 | double **t2a; |
---|
532 | double **t2b; |
---|
533 | double *t1a; |
---|
534 | double *t1b; |
---|
535 | |
---|
536 | im = (imx[k] - 2) / yprocs + 2; |
---|
537 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
538 | lastrow = (imx[k] - 2) / yprocs; |
---|
539 | lastcol = (jmx[k] - 2) / xprocs; |
---|
540 | |
---|
541 | t2a = (double **) q_multi[pid][k]; |
---|
542 | jj = gp[pid].neighbors[UPLEFT]; |
---|
543 | if (jj != -1) { |
---|
544 | t2a[0][0] = q_multi[jj][k][im - 2][jm - 2]; |
---|
545 | } |
---|
546 | jj = gp[pid].neighbors[UPRIGHT]; |
---|
547 | if (jj != -1) { |
---|
548 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][1]; |
---|
549 | } |
---|
550 | jj = gp[pid].neighbors[DOWNLEFT]; |
---|
551 | if (jj != -1) { |
---|
552 | t2a[im - 1][0] = q_multi[jj][k][1][jm - 2]; |
---|
553 | } |
---|
554 | jj = gp[pid].neighbors[DOWNRIGHT]; |
---|
555 | if (jj != -1) { |
---|
556 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][1]; |
---|
557 | } |
---|
558 | |
---|
559 | if (gp[pid].neighbors[UP] == -1) { |
---|
560 | jj = gp[pid].neighbors[LEFT]; |
---|
561 | if (jj != -1) { |
---|
562 | t2a[0][0] = q_multi[jj][k][0][jm - 2]; |
---|
563 | } else { |
---|
564 | jj = gp[pid].neighbors[DOWN]; |
---|
565 | if (jj != -1) { |
---|
566 | t2a[im - 1][0] = q_multi[jj][k][1][0]; |
---|
567 | } |
---|
568 | } |
---|
569 | jj = gp[pid].neighbors[RIGHT]; |
---|
570 | if (jj != -1) { |
---|
571 | t2a[0][jm - 1] = q_multi[jj][k][0][1]; |
---|
572 | } else { |
---|
573 | jj = gp[pid].neighbors[DOWN]; |
---|
574 | if (jj != -1) { |
---|
575 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; |
---|
576 | } |
---|
577 | } |
---|
578 | } else if (gp[pid].neighbors[DOWN] == -1) { |
---|
579 | jj = gp[pid].neighbors[LEFT]; |
---|
580 | if (jj != -1) { |
---|
581 | t2a[im - 1][0] = q_multi[jj][k][im - 1][jm - 2]; |
---|
582 | } else { |
---|
583 | jj = gp[pid].neighbors[UP]; |
---|
584 | if (jj != -1) { |
---|
585 | t2a[0][0] = q_multi[jj][k][im - 2][0]; |
---|
586 | } |
---|
587 | } |
---|
588 | jj = gp[pid].neighbors[RIGHT]; |
---|
589 | if (jj != -1) { |
---|
590 | t2a[im - 1][jm - 1] = q_multi[jj][k][im - 1][1]; |
---|
591 | } else { |
---|
592 | jj = gp[pid].neighbors[UP]; |
---|
593 | if (jj != -1) { |
---|
594 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; |
---|
595 | } |
---|
596 | } |
---|
597 | } else if (gp[pid].neighbors[LEFT] == -1) { |
---|
598 | jj = gp[pid].neighbors[UP]; |
---|
599 | if (jj != -1) { |
---|
600 | t2a[0][0] = q_multi[jj][k][im - 2][0]; |
---|
601 | } |
---|
602 | jj = gp[pid].neighbors[DOWN]; |
---|
603 | if (jj != -1) { |
---|
604 | t2a[im - 1][0] = q_multi[jj][k][1][0]; |
---|
605 | } |
---|
606 | } else if (gp[pid].neighbors[RIGHT] == -1) { |
---|
607 | jj = gp[pid].neighbors[UP]; |
---|
608 | if (jj != -1) { |
---|
609 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; |
---|
610 | } |
---|
611 | jj = gp[pid].neighbors[DOWN]; |
---|
612 | if (jj != -1) { |
---|
613 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; |
---|
614 | } |
---|
615 | } |
---|
616 | |
---|
617 | j = gp[pid].neighbors[UP]; |
---|
618 | if (j != -1) { |
---|
619 | t1a = (double *) t2a[0]; |
---|
620 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
621 | for (i = 1; i <= lastcol; i++) { |
---|
622 | t1a[i] = t1b[i]; |
---|
623 | } |
---|
624 | } |
---|
625 | j = gp[pid].neighbors[DOWN]; |
---|
626 | if (j != -1) { |
---|
627 | t1a = (double *) t2a[im - 1]; |
---|
628 | t1b = (double *) q_multi[j][k][1]; |
---|
629 | for (i = 1; i <= lastcol; i++) { |
---|
630 | t1a[i] = t1b[i]; |
---|
631 | } |
---|
632 | } |
---|
633 | j = gp[pid].neighbors[LEFT]; |
---|
634 | if (j != -1) { |
---|
635 | t2b = (double **) q_multi[j][k]; |
---|
636 | for (i = 1; i <= lastrow; i++) { |
---|
637 | t2a[i][0] = t2b[i][jm - 2]; |
---|
638 | } |
---|
639 | } |
---|
640 | j = gp[pid].neighbors[RIGHT]; |
---|
641 | if (j != -1) { |
---|
642 | t2b = (double **) q_multi[j][k]; |
---|
643 | for (i = 1; i <= lastrow; i++) { |
---|
644 | t2a[i][jm - 1] = t2b[i][1]; |
---|
645 | } |
---|
646 | } |
---|
647 | |
---|
648 | } |
---|
649 | |
---|
650 | void copy_rhs_borders(long k, long procid) |
---|
651 | { |
---|
652 | long i; |
---|
653 | long j; |
---|
654 | long im; |
---|
655 | long jm; |
---|
656 | long lastrow; |
---|
657 | long lastcol; |
---|
658 | double **t2a; |
---|
659 | double **t2b; |
---|
660 | double *t1a; |
---|
661 | double *t1b; |
---|
662 | |
---|
663 | im = (imx[k] - 2) / yprocs + 2; |
---|
664 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
665 | lastrow = (imx[k] - 2) / yprocs; |
---|
666 | lastcol = (jmx[k] - 2) / xprocs; |
---|
667 | |
---|
668 | t2a = (double **) rhs_multi[procid][k]; |
---|
669 | if (gp[procid].neighbors[UPLEFT] != -1) { |
---|
670 | j = gp[procid].neighbors[UPLEFT]; |
---|
671 | t2a[0][0] = rhs_multi[j][k][im - 2][jm - 2]; |
---|
672 | } |
---|
673 | |
---|
674 | if (gp[procid].neighbors[UP] != -1) { |
---|
675 | j = gp[procid].neighbors[UP]; |
---|
676 | if (j != -1) { |
---|
677 | t1a = (double *) t2a[0]; |
---|
678 | t1b = (double *) rhs_multi[j][k][im - 2]; |
---|
679 | for (i = 2; i <= lastcol; i += 2) { |
---|
680 | t1a[i] = t1b[i]; |
---|
681 | } |
---|
682 | } |
---|
683 | } |
---|
684 | if (gp[procid].neighbors[LEFT] != -1) { |
---|
685 | j = gp[procid].neighbors[LEFT]; |
---|
686 | if (j != -1) { |
---|
687 | t2b = (double **) rhs_multi[j][k]; |
---|
688 | for (i = 2; i <= lastrow; i += 2) { |
---|
689 | t2a[i][0] = t2b[i][jm - 2]; |
---|
690 | } |
---|
691 | } |
---|
692 | } |
---|
693 | } |
---|
694 | |
---|
695 | void copy_red(long k, long procid) |
---|
696 | { |
---|
697 | long i; |
---|
698 | long j; |
---|
699 | long im; |
---|
700 | long jm; |
---|
701 | long lastrow; |
---|
702 | long lastcol; |
---|
703 | double **t2a; |
---|
704 | double **t2b; |
---|
705 | double *t1a; |
---|
706 | double *t1b; |
---|
707 | |
---|
708 | im = (imx[k] - 2) / yprocs + 2; |
---|
709 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
710 | lastrow = (imx[k] - 2) / yprocs; |
---|
711 | lastcol = (jmx[k] - 2) / xprocs; |
---|
712 | |
---|
713 | t2a = (double **) q_multi[procid][k]; |
---|
714 | j = gp[procid].neighbors[UP]; |
---|
715 | if (j != -1) { |
---|
716 | t1a = (double *) t2a[0]; |
---|
717 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
718 | for (i = 2; i <= lastcol; i += 2) { |
---|
719 | t1a[i] = t1b[i]; |
---|
720 | } |
---|
721 | } |
---|
722 | j = gp[procid].neighbors[DOWN]; |
---|
723 | if (j != -1) { |
---|
724 | t1a = (double *) t2a[im - 1]; |
---|
725 | t1b = (double *) q_multi[j][k][1]; |
---|
726 | for (i = 1; i <= lastcol; i += 2) { |
---|
727 | t1a[i] = t1b[i]; |
---|
728 | } |
---|
729 | } |
---|
730 | j = gp[procid].neighbors[LEFT]; |
---|
731 | if (j != -1) { |
---|
732 | t2b = (double **) q_multi[j][k]; |
---|
733 | for (i = 2; i <= lastrow; i += 2) { |
---|
734 | t2a[i][0] = t2b[i][jm - 2]; |
---|
735 | } |
---|
736 | } |
---|
737 | j = gp[procid].neighbors[RIGHT]; |
---|
738 | if (j != -1) { |
---|
739 | t2b = (double **) q_multi[j][k]; |
---|
740 | for (i = 1; i <= lastrow; i += 2) { |
---|
741 | t2a[i][jm - 1] = t2b[i][1]; |
---|
742 | } |
---|
743 | } |
---|
744 | } |
---|
745 | |
---|
746 | void copy_black(long k, long procid) |
---|
747 | { |
---|
748 | long i; |
---|
749 | long j; |
---|
750 | long im; |
---|
751 | long jm; |
---|
752 | long lastrow; |
---|
753 | long lastcol; |
---|
754 | double **t2a; |
---|
755 | double **t2b; |
---|
756 | double *t1a; |
---|
757 | double *t1b; |
---|
758 | |
---|
759 | im = (imx[k] - 2) / yprocs + 2; |
---|
760 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
761 | lastrow = (imx[k] - 2) / yprocs; |
---|
762 | lastcol = (jmx[k] - 2) / xprocs; |
---|
763 | |
---|
764 | t2a = (double **) q_multi[procid][k]; |
---|
765 | j = gp[procid].neighbors[UP]; |
---|
766 | if (j != -1) { |
---|
767 | t1a = (double *) t2a[0]; |
---|
768 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
769 | for (i = 1; i <= lastcol; i += 2) { |
---|
770 | t1a[i] = t1b[i]; |
---|
771 | } |
---|
772 | } |
---|
773 | j = gp[procid].neighbors[DOWN]; |
---|
774 | if (j != -1) { |
---|
775 | t1a = (double *) t2a[im - 1]; |
---|
776 | t1b = (double *) q_multi[j][k][1]; |
---|
777 | for (i = 2; i <= lastcol; i += 2) { |
---|
778 | t1a[i] = t1b[i]; |
---|
779 | } |
---|
780 | } |
---|
781 | j = gp[procid].neighbors[LEFT]; |
---|
782 | if (j != -1) { |
---|
783 | t2b = (double **) q_multi[j][k]; |
---|
784 | for (i = 1; i <= lastrow; i += 2) { |
---|
785 | t2a[i][0] = t2b[i][jm - 2]; |
---|
786 | } |
---|
787 | } |
---|
788 | j = gp[procid].neighbors[RIGHT]; |
---|
789 | if (j != -1) { |
---|
790 | t2b = (double **) q_multi[j][k]; |
---|
791 | for (i = 2; i <= lastrow; i += 2) { |
---|
792 | t2a[i][jm - 1] = t2b[i][1]; |
---|
793 | } |
---|
794 | } |
---|
795 | } |
---|