[581] | 1 | /*************************************************************************/ |
---|
| 2 | /* */ |
---|
| 3 | /* Copyright (c) 1994 Stanford University */ |
---|
| 4 | /* */ |
---|
| 5 | /* All rights reserved. */ |
---|
| 6 | /* */ |
---|
| 7 | /* Permission is given to use, copy, and modify this software for any */ |
---|
| 8 | /* non-commercial purpose as long as this copyright notice is not */ |
---|
| 9 | /* removed. All other uses, including redistribution in whole or in */ |
---|
| 10 | /* part, are forbidden without prior written permission. */ |
---|
| 11 | /* */ |
---|
| 12 | /* This software is provided with absolutely no warranty and no */ |
---|
| 13 | /* support. */ |
---|
| 14 | /* */ |
---|
| 15 | /*************************************************************************/ |
---|
| 16 | |
---|
| 17 | /* Shared memory implementation of the multigrid method |
---|
| 18 | Implementation uses red-black gauss-seidel relaxation |
---|
| 19 | iterations, w cycles, and the method of half-injection for |
---|
| 20 | residual computation. */ |
---|
| 21 | |
---|
| 22 | EXTERN_ENV |
---|
| 23 | |
---|
| 24 | #include <stdio.h> |
---|
| 25 | #include <math.h> |
---|
| 26 | #include <stdlib.h> |
---|
| 27 | |
---|
| 28 | #include "decs.h" |
---|
| 29 | |
---|
| 30 | /* perform multigrid (w cycles) */ |
---|
| 31 | void multig(long my_id) |
---|
| 32 | { |
---|
| 33 | long iter; |
---|
| 34 | double wu; |
---|
| 35 | double errp; |
---|
| 36 | long m; |
---|
| 37 | long flag; |
---|
| 38 | long k; |
---|
| 39 | long my_num; |
---|
| 40 | double wmax; |
---|
| 41 | double local_err; |
---|
| 42 | double red_local_err; |
---|
| 43 | double black_local_err; |
---|
| 44 | double g_error; |
---|
| 45 | |
---|
| 46 | flag = 0; |
---|
| 47 | iter = 0; |
---|
| 48 | m = numlev - 1; |
---|
| 49 | wmax = maxwork; |
---|
| 50 | my_num = my_id; |
---|
| 51 | wu = 0.0; |
---|
| 52 | |
---|
| 53 | k = m; |
---|
| 54 | g_error = 1.0e30; |
---|
| 55 | while (!flag) { |
---|
| 56 | errp = g_error; |
---|
| 57 | iter++; |
---|
| 58 | if (my_num == MASTER) { |
---|
| 59 | multi->err_multi = 0.0; |
---|
| 60 | } |
---|
| 61 | |
---|
| 62 | /* barrier to make sure all procs have finished intadd or rescal */ |
---|
| 63 | /* before proceeding with relaxation */ |
---|
| 64 | #if defined(MULTIPLE_BARRIERS) |
---|
| 65 | BARRIER(bars->error_barrier, nprocs) |
---|
| 66 | #else |
---|
| 67 | BARRIER(bars->barrier, nprocs) |
---|
| 68 | #endif |
---|
| 69 | copy_black(k, my_num); |
---|
| 70 | |
---|
| 71 | relax(k, &red_local_err, RED_ITER, my_num); |
---|
| 72 | |
---|
| 73 | /* barrier to make sure all red computations have been performed */ |
---|
| 74 | #if defined(MULTIPLE_BARRIERS) |
---|
| 75 | BARRIER(bars->error_barrier, nprocs) |
---|
| 76 | #else |
---|
| 77 | BARRIER(bars->barrier, nprocs) |
---|
| 78 | #endif |
---|
| 79 | copy_red(k, my_num); |
---|
| 80 | |
---|
| 81 | relax(k, &black_local_err, BLACK_ITER, my_num); |
---|
| 82 | |
---|
| 83 | /* compute max local error from red_local_err and black_local_err */ |
---|
| 84 | |
---|
| 85 | if (red_local_err > black_local_err) { |
---|
| 86 | local_err = red_local_err; |
---|
| 87 | } else { |
---|
| 88 | local_err = black_local_err; |
---|
| 89 | } |
---|
| 90 | |
---|
| 91 | /* update the global error if necessary */ |
---|
| 92 | |
---|
| 93 | LOCK(locks->error_lock) |
---|
| 94 | if (local_err > multi->err_multi) { |
---|
| 95 | multi->err_multi = local_err; |
---|
| 96 | } |
---|
| 97 | UNLOCK(locks->error_lock) |
---|
| 98 | |
---|
| 99 | /* a single relaxation sweep at the finest level is one unit of */ |
---|
| 100 | /* work */ |
---|
| 101 | wu += pow((double) 4.0, (double) k - m); |
---|
| 102 | |
---|
| 103 | /* barrier to make sure all processors have checked local error */ |
---|
| 104 | #if defined(MULTIPLE_BARRIERS) |
---|
| 105 | BARRIER(bars->error_barrier, nprocs) |
---|
| 106 | #else |
---|
| 107 | BARRIER(bars->barrier, nprocs) |
---|
| 108 | #endif |
---|
| 109 | g_error = multi->err_multi; |
---|
| 110 | |
---|
| 111 | /* barrier to make sure master does not cycle back to top of loop */ |
---|
| 112 | /* and reset global->err before we read it and decide what to do */ |
---|
| 113 | #if defined(MULTIPLE_BARRIERS) |
---|
| 114 | BARRIER(bars->error_barrier, nprocs) |
---|
| 115 | #else |
---|
| 116 | BARRIER(bars->barrier, nprocs) |
---|
| 117 | #endif |
---|
| 118 | if (g_error >= lev_tol[k]) { |
---|
| 119 | if (wu > wmax) { |
---|
| 120 | /* max work exceeded */ |
---|
| 121 | fprintf(stderr, "ERROR: Maximum work limit %0.5f exceeded\n", wmax); |
---|
| 122 | exit(-1); |
---|
| 123 | } else { |
---|
| 124 | /* if we have not converged */ |
---|
| 125 | if ((k != 0) && (g_error / errp >= 0.6) && (k > minlevel)) { |
---|
| 126 | /* if need to go to coarser grid */ |
---|
| 127 | |
---|
| 128 | copy_borders(k, my_num); |
---|
| 129 | copy_rhs_borders(k, my_num); |
---|
| 130 | |
---|
| 131 | /* This bar is needed because the routine rescal uses the neighbor's |
---|
| 132 | border points to compute s4. We must ensure that the neighbor's |
---|
| 133 | border points have been written before we try computing the new |
---|
| 134 | rescal values */ |
---|
| 135 | |
---|
| 136 | #if defined(MULTIPLE_BARRIERS) |
---|
| 137 | BARRIER(bars->error_barrier, nprocs) |
---|
| 138 | #else |
---|
| 139 | BARRIER(bars->barrier, nprocs) |
---|
| 140 | #endif |
---|
| 141 | rescal(k, my_num); |
---|
| 142 | |
---|
| 143 | /* transfer residual to rhs of coarser grid */ |
---|
| 144 | lev_tol[k - 1] = 0.3 * g_error; |
---|
| 145 | k = k - 1; |
---|
| 146 | putz(k, my_num); |
---|
| 147 | /* make initial guess on coarser grid zero */ |
---|
| 148 | g_error = 1.0e30; |
---|
| 149 | } |
---|
| 150 | } |
---|
| 151 | } else { |
---|
| 152 | /* if we have converged at this level */ |
---|
| 153 | if (k == m) { |
---|
| 154 | /* if finest grid, we are done */ |
---|
| 155 | flag = 1; |
---|
| 156 | } else { |
---|
| 157 | /* else go to next finest grid */ |
---|
| 158 | |
---|
| 159 | copy_borders(k, my_num); |
---|
| 160 | |
---|
| 161 | intadd(k, my_num); |
---|
| 162 | /* changes the grid values at the finer level. rhs at finer level */ |
---|
| 163 | /* remains what it already is */ |
---|
| 164 | k++; |
---|
| 165 | g_error = 1.0e30; |
---|
| 166 | } |
---|
| 167 | } |
---|
| 168 | } |
---|
| 169 | if (do_output) { |
---|
| 170 | if (my_num == MASTER) { |
---|
| 171 | printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter, k, multi->err_multi, wu); |
---|
| 172 | } |
---|
| 173 | } |
---|
| 174 | } |
---|
| 175 | |
---|
| 176 | /* perform red or black iteration (not both) */ |
---|
| 177 | void relax(long k, double *err, long color, long my_num) |
---|
| 178 | { |
---|
| 179 | long i; |
---|
| 180 | long j; |
---|
| 181 | long iend; |
---|
| 182 | long jend; |
---|
| 183 | long oddistart; |
---|
| 184 | long oddjstart; |
---|
| 185 | long evenistart; |
---|
| 186 | long evenjstart; |
---|
| 187 | double a; |
---|
| 188 | double h; |
---|
| 189 | double factor; |
---|
| 190 | double maxerr; |
---|
| 191 | double newerr; |
---|
| 192 | double oldval; |
---|
| 193 | double newval; |
---|
| 194 | double **t2a; |
---|
| 195 | double **t2b; |
---|
| 196 | double *t1a; |
---|
| 197 | double *t1b; |
---|
| 198 | double *t1c; |
---|
| 199 | double *t1d; |
---|
| 200 | |
---|
| 201 | i = 0; |
---|
| 202 | j = 0; |
---|
| 203 | |
---|
| 204 | *err = 0.0; |
---|
| 205 | h = lev_res[k]; |
---|
| 206 | |
---|
| 207 | /* points whose sum of row and col index is even do a red iteration, */ |
---|
| 208 | /* others do a black */ |
---|
| 209 | |
---|
| 210 | evenistart = gp[my_num].eist[k]; |
---|
| 211 | evenjstart = gp[my_num].ejst[k]; |
---|
| 212 | oddistart = gp[my_num].oist[k]; |
---|
| 213 | oddjstart = gp[my_num].ojst[k]; |
---|
| 214 | |
---|
| 215 | iend = gp[my_num].rlien[k]; |
---|
| 216 | jend = gp[my_num].rljen[k]; |
---|
| 217 | |
---|
| 218 | factor = 4.0 - eig2 * h * h; |
---|
| 219 | maxerr = 0.0; |
---|
| 220 | t2a = (double **) q_multi[my_num][k]; |
---|
| 221 | t2b = (double **) rhs_multi[my_num][k]; |
---|
| 222 | if (color == RED_ITER) { |
---|
| 223 | for (i = evenistart; i < iend; i += 2) { |
---|
| 224 | t1a = (double *) t2a[i]; |
---|
| 225 | t1b = (double *) t2b[i]; |
---|
| 226 | t1c = (double *) t2a[i - 1]; |
---|
| 227 | t1d = (double *) t2a[i + 1]; |
---|
| 228 | for (j = evenjstart; j < jend; j += 2) { |
---|
| 229 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
| 230 | oldval = t1a[j]; |
---|
| 231 | newval = a / factor; |
---|
| 232 | newerr = oldval - newval; |
---|
| 233 | t1a[j] = newval; |
---|
| 234 | if (fabs(newerr) > maxerr) { |
---|
| 235 | maxerr = fabs(newerr); |
---|
| 236 | } |
---|
| 237 | } |
---|
| 238 | } |
---|
| 239 | for (i = oddistart; i < iend; i += 2) { |
---|
| 240 | t1a = (double *) t2a[i]; |
---|
| 241 | t1b = (double *) t2b[i]; |
---|
| 242 | t1c = (double *) t2a[i - 1]; |
---|
| 243 | t1d = (double *) t2a[i + 1]; |
---|
| 244 | for (j = oddjstart; j < jend; j += 2) { |
---|
| 245 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
| 246 | oldval = t1a[j]; |
---|
| 247 | newval = a / factor; |
---|
| 248 | newerr = oldval - newval; |
---|
| 249 | t1a[j] = newval; |
---|
| 250 | if (fabs(newerr) > maxerr) { |
---|
| 251 | maxerr = fabs(newerr); |
---|
| 252 | } |
---|
| 253 | } |
---|
| 254 | } |
---|
| 255 | } else if (color == BLACK_ITER) { |
---|
| 256 | for (i = evenistart; i < iend; i += 2) { |
---|
| 257 | t1a = (double *) t2a[i]; |
---|
| 258 | t1b = (double *) t2b[i]; |
---|
| 259 | t1c = (double *) t2a[i - 1]; |
---|
| 260 | t1d = (double *) t2a[i + 1]; |
---|
| 261 | for (j = oddjstart; j < jend; j += 2) { |
---|
| 262 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
| 263 | oldval = t1a[j]; |
---|
| 264 | newval = a / factor; |
---|
| 265 | newerr = oldval - newval; |
---|
| 266 | t1a[j] = newval; |
---|
| 267 | if (fabs(newerr) > maxerr) { |
---|
| 268 | maxerr = fabs(newerr); |
---|
| 269 | } |
---|
| 270 | } |
---|
| 271 | } |
---|
| 272 | for (i = oddistart; i < iend; i += 2) { |
---|
| 273 | t1a = (double *) t2a[i]; |
---|
| 274 | t1b = (double *) t2b[i]; |
---|
| 275 | t1c = (double *) t2a[i - 1]; |
---|
| 276 | t1d = (double *) t2a[i + 1]; |
---|
| 277 | for (j = evenjstart; j < jend; j += 2) { |
---|
| 278 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
| 279 | oldval = t1a[j]; |
---|
| 280 | newval = a / factor; |
---|
| 281 | newerr = oldval - newval; |
---|
| 282 | t1a[j] = newval; |
---|
| 283 | if (fabs(newerr) > maxerr) { |
---|
| 284 | maxerr = fabs(newerr); |
---|
| 285 | } |
---|
| 286 | } |
---|
| 287 | } |
---|
| 288 | } |
---|
| 289 | *err = maxerr; |
---|
| 290 | } |
---|
| 291 | |
---|
| 292 | /* perform half-injection to next coarsest level */ |
---|
| 293 | void rescal(long kf, long my_num) |
---|
| 294 | { |
---|
| 295 | long ic; |
---|
| 296 | long if17; |
---|
| 297 | long jf; |
---|
| 298 | long jc; |
---|
| 299 | long krc; |
---|
| 300 | long istart; |
---|
| 301 | long iend; |
---|
| 302 | long jstart; |
---|
| 303 | long jend; |
---|
| 304 | double hf; |
---|
| 305 | double s; |
---|
| 306 | double s1; |
---|
| 307 | double s2; |
---|
| 308 | double s3; |
---|
| 309 | double s4; |
---|
| 310 | double factor; |
---|
| 311 | double int1; |
---|
| 312 | double int2; |
---|
| 313 | double i_int_factor; |
---|
| 314 | double j_int_factor; |
---|
| 315 | long i_off; |
---|
| 316 | long j_off; |
---|
| 317 | long up_proc; |
---|
| 318 | long left_proc; |
---|
| 319 | long im; |
---|
| 320 | long jm; |
---|
| 321 | double temp; |
---|
| 322 | double temp2; |
---|
| 323 | double **t2a; |
---|
| 324 | double **t2b; |
---|
| 325 | double **t2c; |
---|
| 326 | double *t1a; |
---|
| 327 | double *t1b; |
---|
| 328 | double *t1c; |
---|
| 329 | double *t1d; |
---|
| 330 | double *t1e; |
---|
| 331 | double *t1f; |
---|
| 332 | double *t1g; |
---|
| 333 | double *t1h; |
---|
| 334 | |
---|
| 335 | krc = kf - 1; |
---|
| 336 | //hc = lev_res[krc]; |
---|
| 337 | hf = lev_res[kf]; |
---|
| 338 | i_off = (*gp[my_num].rownum) * ypts_per_proc[krc]; |
---|
| 339 | j_off = (*gp[my_num].colnum) * xpts_per_proc[krc]; |
---|
| 340 | up_proc = gp[my_num].neighbors[UP]; |
---|
| 341 | left_proc = gp[my_num].neighbors[LEFT]; |
---|
| 342 | im = (imx[kf] - 2) / yprocs; |
---|
| 343 | jm = (jmx[kf] - 2) / xprocs; |
---|
| 344 | |
---|
| 345 | istart = gp[my_num].rlist[krc]; |
---|
| 346 | jstart = gp[my_num].rljst[krc]; |
---|
| 347 | iend = gp[my_num].rlien[krc] - 1; |
---|
| 348 | jend = gp[my_num].rljen[krc] - 1; |
---|
| 349 | |
---|
| 350 | factor = 4.0 - eig2 * hf * hf; |
---|
| 351 | |
---|
| 352 | t2a = (double **) q_multi[my_num][kf]; |
---|
| 353 | t2b = (double **) rhs_multi[my_num][kf]; |
---|
| 354 | t2c = (double **) rhs_multi[my_num][krc]; |
---|
| 355 | if17 = 2 * (istart - 1); |
---|
| 356 | for (ic = istart; ic <= iend; ic++) { |
---|
| 357 | if17 += 2; |
---|
| 358 | i_int_factor = (ic + i_off) * i_int_coeff[krc] * 0.5; |
---|
| 359 | jf = 2 * (jstart - 1); |
---|
| 360 | t1a = (double *) t2a[if17]; |
---|
| 361 | t1b = (double *) t2b[if17]; |
---|
| 362 | t1c = (double *) t2c[ic]; |
---|
| 363 | t1d = (double *) t2a[if17 - 1]; |
---|
| 364 | t1e = (double *) t2a[if17 + 1]; |
---|
| 365 | t1f = (double *) t2a[if17 - 2]; |
---|
| 366 | t1g = (double *) t2a[if17 - 3]; |
---|
| 367 | t1h = (double *) t2b[if17 - 2]; |
---|
| 368 | for (jc = jstart; jc <= jend; jc++) { |
---|
| 369 | jf += 2; |
---|
| 370 | j_int_factor = (jc + j_off) * j_int_coeff[krc] * 0.5; |
---|
| 371 | |
---|
| 372 | /* method of half-injection uses 2.0 instead of 4.0 */ |
---|
| 373 | |
---|
| 374 | /* do bilinear interpolation */ |
---|
| 375 | s = t1a[jf + 1] + t1a[jf - 1] + t1d[jf] + t1e[jf]; |
---|
| 376 | s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]); |
---|
| 377 | if (((if17 == 2) && (gp[my_num].neighbors[UP] == -1)) || ((jf == 2) && (gp[my_num].neighbors[LEFT] == -1))) { |
---|
| 378 | s2 = 0; |
---|
| 379 | s3 = 0; |
---|
| 380 | s4 = 0; |
---|
| 381 | } else if ((if17 == 2) || (jf == 2)) { |
---|
| 382 | if (jf == 2) { |
---|
| 383 | temp = q_multi[left_proc][kf][if17][jm - 1]; |
---|
| 384 | } else { |
---|
| 385 | temp = t1a[jf - 3]; |
---|
| 386 | } |
---|
| 387 | s = t1a[jf - 1] + temp + t1d[jf - 2] + t1e[jf - 2]; |
---|
| 388 | s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); |
---|
| 389 | if (if17 == 2) { |
---|
| 390 | temp = q_multi[up_proc][kf][im - 1][jf]; |
---|
| 391 | } else { |
---|
| 392 | temp = t1g[jf]; |
---|
| 393 | } |
---|
| 394 | s = t1f[jf + 1] + t1f[jf - 1] + temp + t1d[jf]; |
---|
| 395 | s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
---|
| 396 | if (jf == 2) { |
---|
| 397 | temp = q_multi[left_proc][kf][if17 - 2][jm - 1]; |
---|
| 398 | } else { |
---|
| 399 | temp = t1f[jf - 3]; |
---|
| 400 | } |
---|
| 401 | if (if17 == 2) { |
---|
| 402 | temp2 = q_multi[up_proc][kf][im - 1][jf - 2]; |
---|
| 403 | } else { |
---|
| 404 | temp2 = t1g[jf - 2]; |
---|
| 405 | } |
---|
| 406 | s = t1f[jf - 1] + temp + temp2 + t1d[jf - 2]; |
---|
| 407 | s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); |
---|
| 408 | } else { |
---|
| 409 | s = t1a[jf - 1] + t1a[jf - 3] + t1d[jf - 2] + t1e[jf - 2]; |
---|
| 410 | s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); |
---|
| 411 | s = t1f[jf + 1] + t1f[jf - 1] + t1g[jf] + t1d[jf]; |
---|
| 412 | s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
---|
| 413 | s = t1f[jf - 1] + t1f[jf - 3] + t1g[jf - 2] + t1d[jf - 2]; |
---|
| 414 | s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); |
---|
| 415 | } |
---|
| 416 | int1 = j_int_factor * s4 + (1.0 - j_int_factor) * s3; |
---|
| 417 | int2 = j_int_factor * s2 + (1.0 - j_int_factor) * s1; |
---|
| 418 | //int_val = i_int_factor*int1+(1.0-i_int_factor)*int2; |
---|
| 419 | t1c[jc] = i_int_factor * int1 + (1.0 - i_int_factor) * int2; |
---|
| 420 | } |
---|
| 421 | } |
---|
| 422 | } |
---|
| 423 | |
---|
| 424 | /* perform interpolation and addition to next finest grid */ |
---|
| 425 | void intadd(long kc, long my_num) |
---|
| 426 | { |
---|
| 427 | long ic; |
---|
| 428 | long if17; |
---|
| 429 | long jf; |
---|
| 430 | long jc; |
---|
| 431 | long kf; |
---|
| 432 | long istart; |
---|
| 433 | long jstart; |
---|
| 434 | long iend; |
---|
| 435 | long jend; |
---|
| 436 | double int1; |
---|
| 437 | double int2; |
---|
| 438 | double i_int_factor1; |
---|
| 439 | double j_int_factor1; |
---|
| 440 | double i_int_factor2; |
---|
| 441 | double j_int_factor2; |
---|
| 442 | long i_off; |
---|
| 443 | long j_off; |
---|
| 444 | double **t2a; |
---|
| 445 | double **t2b; |
---|
| 446 | double *t1a; |
---|
| 447 | double *t1b; |
---|
| 448 | double *t1c; |
---|
| 449 | double *t1d; |
---|
| 450 | double *t1e; |
---|
| 451 | |
---|
| 452 | kf = kc + 1; |
---|
| 453 | //hc = lev_res[kc]; |
---|
| 454 | //hf = lev_res[kf]; |
---|
| 455 | |
---|
| 456 | istart = gp[my_num].rlist[kc]; |
---|
| 457 | jstart = gp[my_num].rljst[kc]; |
---|
| 458 | iend = gp[my_num].rlien[kc] - 1; |
---|
| 459 | jend = gp[my_num].rljen[kc] - 1; |
---|
| 460 | i_off = (*gp[my_num].rownum) * ypts_per_proc[kc]; |
---|
| 461 | j_off = (*gp[my_num].colnum) * xpts_per_proc[kc]; |
---|
| 462 | |
---|
| 463 | t2a = (double **) q_multi[my_num][kc]; |
---|
| 464 | t2b = (double **) q_multi[my_num][kf]; |
---|
| 465 | if17 = 2 * (istart - 1); |
---|
| 466 | for (ic = istart; ic <= iend; ic++) { |
---|
| 467 | if17 += 2; |
---|
| 468 | i_int_factor1 = ((imx[kc] - 2) - (ic + i_off - 1)) * (i_int_coeff[kf]); |
---|
| 469 | i_int_factor2 = (ic + i_off) * i_int_coeff[kf]; |
---|
| 470 | jf = 2 * (jstart - 1); |
---|
| 471 | |
---|
| 472 | t1a = (double *) t2a[ic]; |
---|
| 473 | t1b = (double *) t2a[ic - 1]; |
---|
| 474 | t1c = (double *) t2a[ic + 1]; |
---|
| 475 | t1d = (double *) t2b[if17]; |
---|
| 476 | t1e = (double *) t2b[if17 - 1]; |
---|
| 477 | for (jc = jstart; jc <= jend; jc++) { |
---|
| 478 | jf += 2; |
---|
| 479 | j_int_factor1 = ((jmx[kc] - 2) - (jc + j_off - 1)) * (j_int_coeff[kf]); |
---|
| 480 | j_int_factor2 = (jc + j_off) * j_int_coeff[kf]; |
---|
| 481 | |
---|
| 482 | int1 = j_int_factor1 * t1a[jc - 1] + (1.0 - j_int_factor1) * t1a[jc]; |
---|
| 483 | int2 = j_int_factor1 * t1b[jc - 1] + (1.0 - j_int_factor1) * t1b[jc]; |
---|
| 484 | t1e[jf - 1] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; |
---|
| 485 | int2 = j_int_factor1 * t1c[jc - 1] + (1.0 - j_int_factor1) * t1c[jc]; |
---|
| 486 | t1d[jf - 1] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; |
---|
| 487 | int1 = j_int_factor2 * t1a[jc + 1] + (1.0 - j_int_factor2) * t1a[jc]; |
---|
| 488 | int2 = j_int_factor2 * t1b[jc + 1] + (1.0 - j_int_factor2) * t1b[jc]; |
---|
| 489 | t1e[jf] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; |
---|
| 490 | int2 = j_int_factor2 * t1c[jc + 1] + (1.0 - j_int_factor2) * t1c[jc]; |
---|
| 491 | t1d[jf] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; |
---|
| 492 | } |
---|
| 493 | } |
---|
| 494 | } |
---|
| 495 | |
---|
| 496 | /* initialize a grid to zero in parallel */ |
---|
| 497 | void putz(long k, long my_num) |
---|
| 498 | { |
---|
| 499 | long i; |
---|
| 500 | long j; |
---|
| 501 | long istart; |
---|
| 502 | long jstart; |
---|
| 503 | long iend; |
---|
| 504 | long jend; |
---|
| 505 | double **t2a; |
---|
| 506 | double *t1a; |
---|
| 507 | |
---|
| 508 | istart = gp[my_num].rlist[k]; |
---|
| 509 | jstart = gp[my_num].rljst[k]; |
---|
| 510 | iend = gp[my_num].rlien[k]; |
---|
| 511 | jend = gp[my_num].rljen[k]; |
---|
| 512 | |
---|
| 513 | t2a = (double **) q_multi[my_num][k]; |
---|
| 514 | for (i = istart; i <= iend; i++) { |
---|
| 515 | t1a = (double *) t2a[i]; |
---|
| 516 | for (j = jstart; j <= jend; j++) { |
---|
| 517 | t1a[j] = 0.0; |
---|
| 518 | } |
---|
| 519 | } |
---|
| 520 | } |
---|
| 521 | |
---|
| 522 | void copy_borders(long k, long pid) |
---|
| 523 | { |
---|
| 524 | long i; |
---|
| 525 | long j; |
---|
| 526 | long jj; |
---|
| 527 | long im; |
---|
| 528 | long jm; |
---|
| 529 | long lastrow; |
---|
| 530 | long lastcol; |
---|
| 531 | double **t2a; |
---|
| 532 | double **t2b; |
---|
| 533 | double *t1a; |
---|
| 534 | double *t1b; |
---|
| 535 | |
---|
| 536 | im = (imx[k] - 2) / yprocs + 2; |
---|
| 537 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
| 538 | lastrow = (imx[k] - 2) / yprocs; |
---|
| 539 | lastcol = (jmx[k] - 2) / xprocs; |
---|
| 540 | |
---|
| 541 | t2a = (double **) q_multi[pid][k]; |
---|
| 542 | jj = gp[pid].neighbors[UPLEFT]; |
---|
| 543 | if (jj != -1) { |
---|
| 544 | t2a[0][0] = q_multi[jj][k][im - 2][jm - 2]; |
---|
| 545 | } |
---|
| 546 | jj = gp[pid].neighbors[UPRIGHT]; |
---|
| 547 | if (jj != -1) { |
---|
| 548 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][1]; |
---|
| 549 | } |
---|
| 550 | jj = gp[pid].neighbors[DOWNLEFT]; |
---|
| 551 | if (jj != -1) { |
---|
| 552 | t2a[im - 1][0] = q_multi[jj][k][1][jm - 2]; |
---|
| 553 | } |
---|
| 554 | jj = gp[pid].neighbors[DOWNRIGHT]; |
---|
| 555 | if (jj != -1) { |
---|
| 556 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][1]; |
---|
| 557 | } |
---|
| 558 | |
---|
| 559 | if (gp[pid].neighbors[UP] == -1) { |
---|
| 560 | jj = gp[pid].neighbors[LEFT]; |
---|
| 561 | if (jj != -1) { |
---|
| 562 | t2a[0][0] = q_multi[jj][k][0][jm - 2]; |
---|
| 563 | } else { |
---|
| 564 | jj = gp[pid].neighbors[DOWN]; |
---|
| 565 | if (jj != -1) { |
---|
| 566 | t2a[im - 1][0] = q_multi[jj][k][1][0]; |
---|
| 567 | } |
---|
| 568 | } |
---|
| 569 | jj = gp[pid].neighbors[RIGHT]; |
---|
| 570 | if (jj != -1) { |
---|
| 571 | t2a[0][jm - 1] = q_multi[jj][k][0][1]; |
---|
| 572 | } else { |
---|
| 573 | jj = gp[pid].neighbors[DOWN]; |
---|
| 574 | if (jj != -1) { |
---|
| 575 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; |
---|
| 576 | } |
---|
| 577 | } |
---|
| 578 | } else if (gp[pid].neighbors[DOWN] == -1) { |
---|
| 579 | jj = gp[pid].neighbors[LEFT]; |
---|
| 580 | if (jj != -1) { |
---|
| 581 | t2a[im - 1][0] = q_multi[jj][k][im - 1][jm - 2]; |
---|
| 582 | } else { |
---|
| 583 | jj = gp[pid].neighbors[UP]; |
---|
| 584 | if (jj != -1) { |
---|
| 585 | t2a[0][0] = q_multi[jj][k][im - 2][0]; |
---|
| 586 | } |
---|
| 587 | } |
---|
| 588 | jj = gp[pid].neighbors[RIGHT]; |
---|
| 589 | if (jj != -1) { |
---|
| 590 | t2a[im - 1][jm - 1] = q_multi[jj][k][im - 1][1]; |
---|
| 591 | } else { |
---|
| 592 | jj = gp[pid].neighbors[UP]; |
---|
| 593 | if (jj != -1) { |
---|
| 594 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; |
---|
| 595 | } |
---|
| 596 | } |
---|
| 597 | } else if (gp[pid].neighbors[LEFT] == -1) { |
---|
| 598 | jj = gp[pid].neighbors[UP]; |
---|
| 599 | if (jj != -1) { |
---|
| 600 | t2a[0][0] = q_multi[jj][k][im - 2][0]; |
---|
| 601 | } |
---|
| 602 | jj = gp[pid].neighbors[DOWN]; |
---|
| 603 | if (jj != -1) { |
---|
| 604 | t2a[im - 1][0] = q_multi[jj][k][1][0]; |
---|
| 605 | } |
---|
| 606 | } else if (gp[pid].neighbors[RIGHT] == -1) { |
---|
| 607 | jj = gp[pid].neighbors[UP]; |
---|
| 608 | if (jj != -1) { |
---|
| 609 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; |
---|
| 610 | } |
---|
| 611 | jj = gp[pid].neighbors[DOWN]; |
---|
| 612 | if (jj != -1) { |
---|
| 613 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; |
---|
| 614 | } |
---|
| 615 | } |
---|
| 616 | |
---|
| 617 | j = gp[pid].neighbors[UP]; |
---|
| 618 | if (j != -1) { |
---|
| 619 | t1a = (double *) t2a[0]; |
---|
| 620 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
| 621 | for (i = 1; i <= lastcol; i++) { |
---|
| 622 | t1a[i] = t1b[i]; |
---|
| 623 | } |
---|
| 624 | } |
---|
| 625 | j = gp[pid].neighbors[DOWN]; |
---|
| 626 | if (j != -1) { |
---|
| 627 | t1a = (double *) t2a[im - 1]; |
---|
| 628 | t1b = (double *) q_multi[j][k][1]; |
---|
| 629 | for (i = 1; i <= lastcol; i++) { |
---|
| 630 | t1a[i] = t1b[i]; |
---|
| 631 | } |
---|
| 632 | } |
---|
| 633 | j = gp[pid].neighbors[LEFT]; |
---|
| 634 | if (j != -1) { |
---|
| 635 | t2b = (double **) q_multi[j][k]; |
---|
| 636 | for (i = 1; i <= lastrow; i++) { |
---|
| 637 | t2a[i][0] = t2b[i][jm - 2]; |
---|
| 638 | } |
---|
| 639 | } |
---|
| 640 | j = gp[pid].neighbors[RIGHT]; |
---|
| 641 | if (j != -1) { |
---|
| 642 | t2b = (double **) q_multi[j][k]; |
---|
| 643 | for (i = 1; i <= lastrow; i++) { |
---|
| 644 | t2a[i][jm - 1] = t2b[i][1]; |
---|
| 645 | } |
---|
| 646 | } |
---|
| 647 | |
---|
| 648 | } |
---|
| 649 | |
---|
| 650 | void copy_rhs_borders(long k, long procid) |
---|
| 651 | { |
---|
| 652 | long i; |
---|
| 653 | long j; |
---|
| 654 | long im; |
---|
| 655 | long jm; |
---|
| 656 | long lastrow; |
---|
| 657 | long lastcol; |
---|
| 658 | double **t2a; |
---|
| 659 | double **t2b; |
---|
| 660 | double *t1a; |
---|
| 661 | double *t1b; |
---|
| 662 | |
---|
| 663 | im = (imx[k] - 2) / yprocs + 2; |
---|
| 664 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
| 665 | lastrow = (imx[k] - 2) / yprocs; |
---|
| 666 | lastcol = (jmx[k] - 2) / xprocs; |
---|
| 667 | |
---|
| 668 | t2a = (double **) rhs_multi[procid][k]; |
---|
| 669 | if (gp[procid].neighbors[UPLEFT] != -1) { |
---|
| 670 | j = gp[procid].neighbors[UPLEFT]; |
---|
| 671 | t2a[0][0] = rhs_multi[j][k][im - 2][jm - 2]; |
---|
| 672 | } |
---|
| 673 | |
---|
| 674 | if (gp[procid].neighbors[UP] != -1) { |
---|
| 675 | j = gp[procid].neighbors[UP]; |
---|
| 676 | if (j != -1) { |
---|
| 677 | t1a = (double *) t2a[0]; |
---|
| 678 | t1b = (double *) rhs_multi[j][k][im - 2]; |
---|
| 679 | for (i = 2; i <= lastcol; i += 2) { |
---|
| 680 | t1a[i] = t1b[i]; |
---|
| 681 | } |
---|
| 682 | } |
---|
| 683 | } |
---|
| 684 | if (gp[procid].neighbors[LEFT] != -1) { |
---|
| 685 | j = gp[procid].neighbors[LEFT]; |
---|
| 686 | if (j != -1) { |
---|
| 687 | t2b = (double **) rhs_multi[j][k]; |
---|
| 688 | for (i = 2; i <= lastrow; i += 2) { |
---|
| 689 | t2a[i][0] = t2b[i][jm - 2]; |
---|
| 690 | } |
---|
| 691 | } |
---|
| 692 | } |
---|
| 693 | } |
---|
| 694 | |
---|
| 695 | void copy_red(long k, long procid) |
---|
| 696 | { |
---|
| 697 | long i; |
---|
| 698 | long j; |
---|
| 699 | long im; |
---|
| 700 | long jm; |
---|
| 701 | long lastrow; |
---|
| 702 | long lastcol; |
---|
| 703 | double **t2a; |
---|
| 704 | double **t2b; |
---|
| 705 | double *t1a; |
---|
| 706 | double *t1b; |
---|
| 707 | |
---|
| 708 | im = (imx[k] - 2) / yprocs + 2; |
---|
| 709 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
| 710 | lastrow = (imx[k] - 2) / yprocs; |
---|
| 711 | lastcol = (jmx[k] - 2) / xprocs; |
---|
| 712 | |
---|
| 713 | t2a = (double **) q_multi[procid][k]; |
---|
| 714 | j = gp[procid].neighbors[UP]; |
---|
| 715 | if (j != -1) { |
---|
| 716 | t1a = (double *) t2a[0]; |
---|
| 717 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
| 718 | for (i = 2; i <= lastcol; i += 2) { |
---|
| 719 | t1a[i] = t1b[i]; |
---|
| 720 | } |
---|
| 721 | } |
---|
| 722 | j = gp[procid].neighbors[DOWN]; |
---|
| 723 | if (j != -1) { |
---|
| 724 | t1a = (double *) t2a[im - 1]; |
---|
| 725 | t1b = (double *) q_multi[j][k][1]; |
---|
| 726 | for (i = 1; i <= lastcol; i += 2) { |
---|
| 727 | t1a[i] = t1b[i]; |
---|
| 728 | } |
---|
| 729 | } |
---|
| 730 | j = gp[procid].neighbors[LEFT]; |
---|
| 731 | if (j != -1) { |
---|
| 732 | t2b = (double **) q_multi[j][k]; |
---|
| 733 | for (i = 2; i <= lastrow; i += 2) { |
---|
| 734 | t2a[i][0] = t2b[i][jm - 2]; |
---|
| 735 | } |
---|
| 736 | } |
---|
| 737 | j = gp[procid].neighbors[RIGHT]; |
---|
| 738 | if (j != -1) { |
---|
| 739 | t2b = (double **) q_multi[j][k]; |
---|
| 740 | for (i = 1; i <= lastrow; i += 2) { |
---|
| 741 | t2a[i][jm - 1] = t2b[i][1]; |
---|
| 742 | } |
---|
| 743 | } |
---|
| 744 | } |
---|
| 745 | |
---|
| 746 | void copy_black(long k, long procid) |
---|
| 747 | { |
---|
| 748 | long i; |
---|
| 749 | long j; |
---|
| 750 | long im; |
---|
| 751 | long jm; |
---|
| 752 | long lastrow; |
---|
| 753 | long lastcol; |
---|
| 754 | double **t2a; |
---|
| 755 | double **t2b; |
---|
| 756 | double *t1a; |
---|
| 757 | double *t1b; |
---|
| 758 | |
---|
| 759 | im = (imx[k] - 2) / yprocs + 2; |
---|
| 760 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
| 761 | lastrow = (imx[k] - 2) / yprocs; |
---|
| 762 | lastcol = (jmx[k] - 2) / xprocs; |
---|
| 763 | |
---|
| 764 | t2a = (double **) q_multi[procid][k]; |
---|
| 765 | j = gp[procid].neighbors[UP]; |
---|
| 766 | if (j != -1) { |
---|
| 767 | t1a = (double *) t2a[0]; |
---|
| 768 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
| 769 | for (i = 1; i <= lastcol; i += 2) { |
---|
| 770 | t1a[i] = t1b[i]; |
---|
| 771 | } |
---|
| 772 | } |
---|
| 773 | j = gp[procid].neighbors[DOWN]; |
---|
| 774 | if (j != -1) { |
---|
| 775 | t1a = (double *) t2a[im - 1]; |
---|
| 776 | t1b = (double *) q_multi[j][k][1]; |
---|
| 777 | for (i = 2; i <= lastcol; i += 2) { |
---|
| 778 | t1a[i] = t1b[i]; |
---|
| 779 | } |
---|
| 780 | } |
---|
| 781 | j = gp[procid].neighbors[LEFT]; |
---|
| 782 | if (j != -1) { |
---|
| 783 | t2b = (double **) q_multi[j][k]; |
---|
| 784 | for (i = 1; i <= lastrow; i += 2) { |
---|
| 785 | t2a[i][0] = t2b[i][jm - 2]; |
---|
| 786 | } |
---|
| 787 | } |
---|
| 788 | j = gp[procid].neighbors[RIGHT]; |
---|
| 789 | if (j != -1) { |
---|
| 790 | t2b = (double **) q_multi[j][k]; |
---|
| 791 | for (i = 2; i <= lastrow; i += 2) { |
---|
| 792 | t2a[i][jm - 1] = t2b[i][1]; |
---|
| 793 | } |
---|
| 794 | } |
---|
| 795 | } |
---|