[799] | 1 | /*************************************************************************/ |
---|
| 2 | /* */ |
---|
| 3 | /* Copyright (c) 1994 Stanford University */ |
---|
| 4 | /* */ |
---|
| 5 | /* All rights reserved. */ |
---|
| 6 | /* */ |
---|
| 7 | /* Permission is given to use, copy, and modify this software for any */ |
---|
| 8 | /* non-commercial purpose as long as this copyright notice is not */ |
---|
| 9 | /* removed. All other uses, including redistribution in whole or in */ |
---|
| 10 | /* part, are forbidden without prior written permission. */ |
---|
| 11 | /* */ |
---|
| 12 | /* This software is provided with absolutely no warranty and no */ |
---|
| 13 | /* support. */ |
---|
| 14 | /* */ |
---|
| 15 | /*************************************************************************/ |
---|
| 16 | |
---|
| 17 | /* Shared memory implementation of the multigrid method |
---|
| 18 | Implementation uses red-black gauss-seidel relaxation |
---|
| 19 | iterations, w cycles, and the method of half-injection for |
---|
| 20 | residual computation. */ |
---|
| 21 | |
---|
| 22 | |
---|
| 23 | #include <stdio.h> |
---|
| 24 | #include <stdlib.h> |
---|
| 25 | #include <math.h> |
---|
| 26 | |
---|
| 27 | #include "decs.h" |
---|
| 28 | |
---|
| 29 | //////////////////////// |
---|
| 30 | void multig(long procid) |
---|
| 31 | { |
---|
| 32 | long iter; |
---|
| 33 | double wu; |
---|
| 34 | double errp; |
---|
| 35 | long m; |
---|
| 36 | long flag; |
---|
| 37 | long k; |
---|
| 38 | double wmax; |
---|
| 39 | double local_err; |
---|
| 40 | double red_local_err; |
---|
| 41 | double black_local_err; |
---|
| 42 | double g_error; |
---|
| 43 | long barrier_start; |
---|
| 44 | |
---|
| 45 | flag = 0; |
---|
| 46 | iter = 0; |
---|
| 47 | m = numlev - 1; |
---|
| 48 | wmax = maxwork; |
---|
| 49 | wu = 0.0; |
---|
| 50 | |
---|
| 51 | k = m; |
---|
| 52 | g_error = 1.0e30; |
---|
| 53 | while (!flag) |
---|
| 54 | { |
---|
| 55 | errp = g_error; |
---|
| 56 | iter++; |
---|
| 57 | if (procid == MASTER) |
---|
| 58 | { |
---|
| 59 | multi->err_multi = 0.0; |
---|
| 60 | } |
---|
| 61 | |
---|
| 62 | /* barrier to make sure all procs have finished intadd or rescal */ |
---|
| 63 | /* before proceeding with relaxation */ |
---|
| 64 | |
---|
| 65 | // BARRIER |
---|
| 66 | barrier_start = giet_proctime(); |
---|
| 67 | sqt_barrier_wait( &barrier ); |
---|
| 68 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
| 69 | |
---|
| 70 | copy_black(k, procid); |
---|
| 71 | |
---|
| 72 | relax(k, &red_local_err, RED_ITER, procid); |
---|
| 73 | |
---|
| 74 | /* barrier to make sure all red computations have been performed */ |
---|
| 75 | |
---|
| 76 | // BARRIER |
---|
| 77 | barrier_start = giet_proctime(); |
---|
| 78 | sqt_barrier_wait( &barrier ); |
---|
| 79 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
| 80 | |
---|
| 81 | copy_red(k, procid); |
---|
| 82 | |
---|
| 83 | relax(k, &black_local_err, BLACK_ITER, procid); |
---|
| 84 | |
---|
| 85 | /* compute max local error from red_local_err and black_local_err */ |
---|
| 86 | |
---|
| 87 | if (red_local_err > black_local_err) local_err = red_local_err; |
---|
| 88 | else local_err = black_local_err; |
---|
| 89 | |
---|
| 90 | /* update the global error if necessary */ |
---|
| 91 | |
---|
| 92 | sqt_lock_acquire( &error_lock ); |
---|
| 93 | if (local_err > multi->err_multi) multi->err_multi = local_err; |
---|
| 94 | sqt_lock_release( &error_lock ); |
---|
| 95 | |
---|
| 96 | /* a single relaxation sweep at the finest level is one unit of */ |
---|
| 97 | /* work */ |
---|
| 98 | wu += pow((double) 4.0, (double) k - m); |
---|
| 99 | |
---|
| 100 | /* barrier to make sure all processors have checked local error */ |
---|
| 101 | |
---|
| 102 | // BARRIER |
---|
| 103 | barrier_start = giet_proctime(); |
---|
| 104 | sqt_barrier_wait( &barrier ); |
---|
| 105 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
| 106 | |
---|
| 107 | g_error = multi->err_multi; |
---|
| 108 | |
---|
| 109 | /* barrier to make sure master does not cycle back to top of loop */ |
---|
| 110 | /* and reset global->err before we read it and decide what to do */ |
---|
| 111 | |
---|
| 112 | // BARRIER |
---|
| 113 | barrier_start = giet_proctime(); |
---|
| 114 | sqt_barrier_wait( &barrier ); |
---|
| 115 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
| 116 | |
---|
| 117 | if (g_error >= lev_tol[k]) |
---|
| 118 | { |
---|
| 119 | if (wu > wmax) |
---|
| 120 | { |
---|
| 121 | /* max work exceeded */ |
---|
| 122 | giet_pthread_exit("[OCEAN ERROR] work overflow in multig()"); |
---|
| 123 | } |
---|
| 124 | else |
---|
| 125 | { |
---|
| 126 | /* if we have not converged */ |
---|
| 127 | if ((k != 0) && (g_error / errp >= 0.6) && (k > minlevel)) |
---|
| 128 | { |
---|
| 129 | /* if need to go to coarser grid */ |
---|
| 130 | |
---|
| 131 | copy_borders(k, procid); |
---|
| 132 | copy_rhs_borders(k, procid); |
---|
| 133 | |
---|
| 134 | /* This bar is needed because the routine rescal uses the neighbor's |
---|
| 135 | border points to compute s4. We must ensure that the neighbor's |
---|
| 136 | border points have been written before we try computing the new |
---|
| 137 | rescal values */ |
---|
| 138 | |
---|
| 139 | // BARRIER |
---|
| 140 | barrier_start = giet_proctime(); |
---|
| 141 | sqt_barrier_wait( &barrier ); |
---|
| 142 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
| 143 | |
---|
| 144 | rescal(k, procid); |
---|
| 145 | |
---|
| 146 | /* transfer residual to rhs of coarser grid */ |
---|
| 147 | lev_tol[k - 1] = 0.3 * g_error; |
---|
| 148 | k = k - 1; |
---|
| 149 | putz(k, procid); |
---|
| 150 | /* make initial guess on coarser grid zero */ |
---|
| 151 | g_error = 1.0e30; |
---|
| 152 | } |
---|
| 153 | } |
---|
| 154 | } |
---|
| 155 | else |
---|
| 156 | { |
---|
| 157 | /* if we have converged at this level */ |
---|
| 158 | if (k == m) |
---|
| 159 | { |
---|
| 160 | /* if finest grid, we are done */ |
---|
| 161 | flag = 1; |
---|
| 162 | } |
---|
| 163 | else |
---|
| 164 | { |
---|
| 165 | /* else go to next finest grid */ |
---|
| 166 | |
---|
| 167 | copy_borders(k, procid); |
---|
| 168 | |
---|
| 169 | intadd(k, procid); |
---|
| 170 | /* changes the grid values at the finer level. rhs at finer level */ |
---|
| 171 | /* remains what it already is */ |
---|
| 172 | k++; |
---|
| 173 | g_error = 1.0e30; |
---|
| 174 | } |
---|
| 175 | } |
---|
| 176 | } |
---|
| 177 | } // end multig() |
---|
| 178 | |
---|
| 179 | //////////////////////////////////////////////////////// |
---|
| 180 | // perform red or black iteration (not both) |
---|
| 181 | //////////////////////////////////////////////////////// |
---|
| 182 | void relax(long k, double *err, long color, long procid) |
---|
| 183 | { |
---|
| 184 | long i; |
---|
| 185 | long j; |
---|
| 186 | long iend; |
---|
| 187 | long jend; |
---|
| 188 | long oddistart; |
---|
| 189 | long oddjstart; |
---|
| 190 | long evenistart; |
---|
| 191 | long evenjstart; |
---|
| 192 | double a; |
---|
| 193 | double h; |
---|
| 194 | double factor; |
---|
| 195 | double maxerr; |
---|
| 196 | double newerr; |
---|
| 197 | double oldval; |
---|
| 198 | double newval; |
---|
| 199 | double **t2a; |
---|
| 200 | double **t2b; |
---|
| 201 | double *t1a; |
---|
| 202 | double *t1b; |
---|
| 203 | double *t1c; |
---|
| 204 | double *t1d; |
---|
| 205 | |
---|
| 206 | i = 0; |
---|
| 207 | j = 0; |
---|
| 208 | |
---|
| 209 | *err = 0.0; |
---|
| 210 | h = lev_res[k]; |
---|
| 211 | |
---|
| 212 | /* points whose sum of row and col index is even do a red iteration, */ |
---|
| 213 | /* others do a black */ |
---|
| 214 | |
---|
| 215 | evenistart = gps[procid]->eist[k]; |
---|
| 216 | evenjstart = gps[procid]->ejst[k]; |
---|
| 217 | oddistart = gps[procid]->oist[k]; |
---|
| 218 | oddjstart = gps[procid]->ojst[k]; |
---|
| 219 | |
---|
| 220 | iend = gps[procid]->rlien[k]; |
---|
| 221 | jend = gps[procid]->rljen[k]; |
---|
| 222 | |
---|
| 223 | factor = 4.0 - eig2 * h * h; |
---|
| 224 | maxerr = 0.0; |
---|
| 225 | t2a = (double **) q_multi[procid][k]; |
---|
| 226 | t2b = (double **) rhs_multi[procid][k]; |
---|
| 227 | if (color == RED_ITER) { |
---|
| 228 | for (i = evenistart; i < iend; i += 2) { |
---|
| 229 | t1a = (double *) t2a[i]; |
---|
| 230 | t1b = (double *) t2b[i]; |
---|
| 231 | t1c = (double *) t2a[i - 1]; |
---|
| 232 | t1d = (double *) t2a[i + 1]; |
---|
| 233 | for (j = evenjstart; j < jend; j += 2) { |
---|
| 234 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
| 235 | oldval = t1a[j]; |
---|
| 236 | newval = a / factor; |
---|
| 237 | newerr = oldval - newval; |
---|
| 238 | t1a[j] = newval; |
---|
| 239 | if (fabs(newerr) > maxerr) { |
---|
| 240 | maxerr = fabs(newerr); |
---|
| 241 | } |
---|
| 242 | } |
---|
| 243 | } |
---|
| 244 | for (i = oddistart; i < iend; i += 2) { |
---|
| 245 | t1a = (double *) t2a[i]; |
---|
| 246 | t1b = (double *) t2b[i]; |
---|
| 247 | t1c = (double *) t2a[i - 1]; |
---|
| 248 | t1d = (double *) t2a[i + 1]; |
---|
| 249 | for (j = oddjstart; j < jend; j += 2) { |
---|
| 250 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
| 251 | oldval = t1a[j]; |
---|
| 252 | newval = a / factor; |
---|
| 253 | newerr = oldval - newval; |
---|
| 254 | t1a[j] = newval; |
---|
| 255 | if (fabs(newerr) > maxerr) { |
---|
| 256 | maxerr = fabs(newerr); |
---|
| 257 | } |
---|
| 258 | } |
---|
| 259 | } |
---|
| 260 | } else if (color == BLACK_ITER) { |
---|
| 261 | for (i = evenistart; i < iend; i += 2) { |
---|
| 262 | t1a = (double *) t2a[i]; |
---|
| 263 | t1b = (double *) t2b[i]; |
---|
| 264 | t1c = (double *) t2a[i - 1]; |
---|
| 265 | t1d = (double *) t2a[i + 1]; |
---|
| 266 | for (j = oddjstart; j < jend; j += 2) { |
---|
| 267 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
| 268 | oldval = t1a[j]; |
---|
| 269 | newval = a / factor; |
---|
| 270 | newerr = oldval - newval; |
---|
| 271 | t1a[j] = newval; |
---|
| 272 | if (fabs(newerr) > maxerr) { |
---|
| 273 | maxerr = fabs(newerr); |
---|
| 274 | } |
---|
| 275 | } |
---|
| 276 | } |
---|
| 277 | for (i = oddistart; i < iend; i += 2) { |
---|
| 278 | t1a = (double *) t2a[i]; |
---|
| 279 | t1b = (double *) t2b[i]; |
---|
| 280 | t1c = (double *) t2a[i - 1]; |
---|
| 281 | t1d = (double *) t2a[i + 1]; |
---|
| 282 | for (j = evenjstart; j < jend; j += 2) { |
---|
| 283 | a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j]; |
---|
| 284 | oldval = t1a[j]; |
---|
| 285 | newval = a / factor; |
---|
| 286 | newerr = oldval - newval; |
---|
| 287 | t1a[j] = newval; |
---|
| 288 | if (fabs(newerr) > maxerr) { |
---|
| 289 | maxerr = fabs(newerr); |
---|
| 290 | } |
---|
| 291 | } |
---|
| 292 | } |
---|
| 293 | } |
---|
| 294 | *err = maxerr; |
---|
| 295 | } // end relax() |
---|
| 296 | |
---|
| 297 | //////////////////////////////////////////////////// |
---|
| 298 | // perform half-injection to next coarsest level |
---|
| 299 | //////////////////////////////////////////////////// |
---|
| 300 | void rescal(long kf, long procid) |
---|
| 301 | { |
---|
| 302 | long ic; |
---|
| 303 | long if17; |
---|
| 304 | long jf; |
---|
| 305 | long jc; |
---|
| 306 | long krc; |
---|
| 307 | long istart; |
---|
| 308 | long iend; |
---|
| 309 | long jstart; |
---|
| 310 | long jend; |
---|
| 311 | double hf; |
---|
| 312 | double s; |
---|
| 313 | double s1; |
---|
| 314 | double s2; |
---|
| 315 | double s3; |
---|
| 316 | double s4; |
---|
| 317 | double factor; |
---|
| 318 | double int1; |
---|
| 319 | double int2; |
---|
| 320 | double i_int_factor; |
---|
| 321 | double j_int_factor; |
---|
| 322 | long i_off; |
---|
| 323 | long j_off; |
---|
| 324 | long up_proc; |
---|
| 325 | long left_proc; |
---|
| 326 | long im; |
---|
| 327 | long jm; |
---|
| 328 | double temp; |
---|
| 329 | double temp2; |
---|
| 330 | double **t2a; |
---|
| 331 | double **t2b; |
---|
| 332 | double **t2c; |
---|
| 333 | double *t1a; |
---|
| 334 | double *t1b; |
---|
| 335 | double *t1c; |
---|
| 336 | double *t1d; |
---|
| 337 | double *t1e; |
---|
| 338 | double *t1f; |
---|
| 339 | double *t1g; |
---|
| 340 | double *t1h; |
---|
| 341 | |
---|
| 342 | krc = kf - 1; |
---|
| 343 | //hc = lev_res[krc]; |
---|
| 344 | hf = lev_res[kf]; |
---|
| 345 | i_off = gps[procid]->rownum * ypts_per_proc[krc]; |
---|
| 346 | j_off = gps[procid]->colnum * xpts_per_proc[krc]; |
---|
| 347 | up_proc = gps[procid]->neighbors[UP]; |
---|
| 348 | left_proc = gps[procid]->neighbors[LEFT]; |
---|
| 349 | im = (imx[kf] - 2) / yprocs; |
---|
| 350 | jm = (jmx[kf] - 2) / xprocs; |
---|
| 351 | |
---|
| 352 | istart = gps[procid]->rlist[krc]; |
---|
| 353 | jstart = gps[procid]->rljst[krc]; |
---|
| 354 | iend = gps[procid]->rlien[krc] - 1; |
---|
| 355 | jend = gps[procid]->rljen[krc] - 1; |
---|
| 356 | |
---|
| 357 | factor = 4.0 - eig2 * hf * hf; |
---|
| 358 | |
---|
| 359 | t2a = (double **) q_multi[procid][kf]; |
---|
| 360 | t2b = (double **) rhs_multi[procid][kf]; |
---|
| 361 | t2c = (double **) rhs_multi[procid][krc]; |
---|
| 362 | if17 = 2 * (istart - 1); |
---|
| 363 | for (ic = istart; ic <= iend; ic++) { |
---|
| 364 | if17 += 2; |
---|
| 365 | i_int_factor = (ic + i_off) * i_int_coeff[krc] * 0.5; |
---|
| 366 | jf = 2 * (jstart - 1); |
---|
| 367 | t1a = (double *) t2a[if17]; |
---|
| 368 | t1b = (double *) t2b[if17]; |
---|
| 369 | t1c = (double *) t2c[ic]; |
---|
| 370 | t1d = (double *) t2a[if17 - 1]; |
---|
| 371 | t1e = (double *) t2a[if17 + 1]; |
---|
| 372 | t1f = (double *) t2a[if17 - 2]; |
---|
| 373 | t1g = (double *) t2a[if17 - 3]; |
---|
| 374 | t1h = (double *) t2b[if17 - 2]; |
---|
| 375 | for (jc = jstart; jc <= jend; jc++) { |
---|
| 376 | jf += 2; |
---|
| 377 | j_int_factor = (jc + j_off) * j_int_coeff[krc] * 0.5; |
---|
| 378 | |
---|
| 379 | /* method of half-injection uses 2.0 instead of 4.0 */ |
---|
| 380 | |
---|
| 381 | /* do bilinear interpolation */ |
---|
| 382 | s = t1a[jf + 1] + t1a[jf - 1] + t1d[jf] + t1e[jf]; |
---|
| 383 | s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]); |
---|
| 384 | if (((if17 == 2) && (gps[procid]->neighbors[UP] == -1)) || ((jf == 2) && (gps[procid]->neighbors[LEFT] == -1))) { |
---|
| 385 | s2 = 0; |
---|
| 386 | s3 = 0; |
---|
| 387 | s4 = 0; |
---|
| 388 | } else if ((if17 == 2) || (jf == 2)) { |
---|
| 389 | if (jf == 2) { |
---|
| 390 | temp = q_multi[left_proc][kf][if17][jm - 1]; |
---|
| 391 | } else { |
---|
| 392 | temp = t1a[jf - 3]; |
---|
| 393 | } |
---|
| 394 | s = t1a[jf - 1] + temp + t1d[jf - 2] + t1e[jf - 2]; |
---|
| 395 | s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); |
---|
| 396 | if (if17 == 2) { |
---|
| 397 | temp = q_multi[up_proc][kf][im - 1][jf]; |
---|
| 398 | } else { |
---|
| 399 | temp = t1g[jf]; |
---|
| 400 | } |
---|
| 401 | s = t1f[jf + 1] + t1f[jf - 1] + temp + t1d[jf]; |
---|
| 402 | s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
---|
| 403 | if (jf == 2) { |
---|
| 404 | temp = q_multi[left_proc][kf][if17 - 2][jm - 1]; |
---|
| 405 | } else { |
---|
| 406 | temp = t1f[jf - 3]; |
---|
| 407 | } |
---|
| 408 | if (if17 == 2) { |
---|
| 409 | temp2 = q_multi[up_proc][kf][im - 1][jf - 2]; |
---|
| 410 | } else { |
---|
| 411 | temp2 = t1g[jf - 2]; |
---|
| 412 | } |
---|
| 413 | s = t1f[jf - 1] + temp + temp2 + t1d[jf - 2]; |
---|
| 414 | s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); |
---|
| 415 | } else { |
---|
| 416 | s = t1a[jf - 1] + t1a[jf - 3] + t1d[jf - 2] + t1e[jf - 2]; |
---|
| 417 | s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]); |
---|
| 418 | s = t1f[jf + 1] + t1f[jf - 1] + t1g[jf] + t1d[jf]; |
---|
| 419 | s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]); |
---|
| 420 | s = t1f[jf - 1] + t1f[jf - 3] + t1g[jf - 2] + t1d[jf - 2]; |
---|
| 421 | s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]); |
---|
| 422 | } |
---|
| 423 | int1 = j_int_factor * s4 + (1.0 - j_int_factor) * s3; |
---|
| 424 | int2 = j_int_factor * s2 + (1.0 - j_int_factor) * s1; |
---|
| 425 | //int_val = i_int_factor*int1+(1.0-i_int_factor)*int2; |
---|
| 426 | t1c[jc] = i_int_factor * int1 + (1.0 - i_int_factor) * int2; |
---|
| 427 | } |
---|
| 428 | } |
---|
| 429 | } |
---|
| 430 | |
---|
| 431 | /* perform interpolation and addition to next finest grid */ |
---|
| 432 | void intadd(long kc, long procid) |
---|
| 433 | { |
---|
| 434 | long ic; |
---|
| 435 | long if17; |
---|
| 436 | long jf; |
---|
| 437 | long jc; |
---|
| 438 | long kf; |
---|
| 439 | long istart; |
---|
| 440 | long jstart; |
---|
| 441 | long iend; |
---|
| 442 | long jend; |
---|
| 443 | double int1; |
---|
| 444 | double int2; |
---|
| 445 | double i_int_factor1; |
---|
| 446 | double j_int_factor1; |
---|
| 447 | double i_int_factor2; |
---|
| 448 | double j_int_factor2; |
---|
| 449 | long i_off; |
---|
| 450 | long j_off; |
---|
| 451 | double **t2a; |
---|
| 452 | double **t2b; |
---|
| 453 | double *t1a; |
---|
| 454 | double *t1b; |
---|
| 455 | double *t1c; |
---|
| 456 | double *t1d; |
---|
| 457 | double *t1e; |
---|
| 458 | |
---|
| 459 | kf = kc + 1; |
---|
| 460 | //hc = lev_res[kc]; |
---|
| 461 | //hf = lev_res[kf]; |
---|
| 462 | |
---|
| 463 | istart = gps[procid]->rlist[kc]; |
---|
| 464 | jstart = gps[procid]->rljst[kc]; |
---|
| 465 | iend = gps[procid]->rlien[kc] - 1; |
---|
| 466 | jend = gps[procid]->rljen[kc] - 1; |
---|
| 467 | i_off = gps[procid]->rownum * ypts_per_proc[kc]; |
---|
| 468 | j_off = gps[procid]->colnum * xpts_per_proc[kc]; |
---|
| 469 | |
---|
| 470 | t2a = (double **) q_multi[procid][kc]; |
---|
| 471 | t2b = (double **) q_multi[procid][kf]; |
---|
| 472 | if17 = 2 * (istart - 1); |
---|
| 473 | for (ic = istart; ic <= iend; ic++) { |
---|
| 474 | if17 += 2; |
---|
| 475 | i_int_factor1 = ((imx[kc] - 2) - (ic + i_off - 1)) * (i_int_coeff[kf]); |
---|
| 476 | i_int_factor2 = (ic + i_off) * i_int_coeff[kf]; |
---|
| 477 | jf = 2 * (jstart - 1); |
---|
| 478 | |
---|
| 479 | t1a = (double *) t2a[ic]; |
---|
| 480 | t1b = (double *) t2a[ic - 1]; |
---|
| 481 | t1c = (double *) t2a[ic + 1]; |
---|
| 482 | t1d = (double *) t2b[if17]; |
---|
| 483 | t1e = (double *) t2b[if17 - 1]; |
---|
| 484 | for (jc = jstart; jc <= jend; jc++) { |
---|
| 485 | jf += 2; |
---|
| 486 | j_int_factor1 = ((jmx[kc] - 2) - (jc + j_off - 1)) * (j_int_coeff[kf]); |
---|
| 487 | j_int_factor2 = (jc + j_off) * j_int_coeff[kf]; |
---|
| 488 | |
---|
| 489 | int1 = j_int_factor1 * t1a[jc - 1] + (1.0 - j_int_factor1) * t1a[jc]; |
---|
| 490 | int2 = j_int_factor1 * t1b[jc - 1] + (1.0 - j_int_factor1) * t1b[jc]; |
---|
| 491 | t1e[jf - 1] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; |
---|
| 492 | int2 = j_int_factor1 * t1c[jc - 1] + (1.0 - j_int_factor1) * t1c[jc]; |
---|
| 493 | t1d[jf - 1] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; |
---|
| 494 | int1 = j_int_factor2 * t1a[jc + 1] + (1.0 - j_int_factor2) * t1a[jc]; |
---|
| 495 | int2 = j_int_factor2 * t1b[jc + 1] + (1.0 - j_int_factor2) * t1b[jc]; |
---|
| 496 | t1e[jf] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1; |
---|
| 497 | int2 = j_int_factor2 * t1c[jc + 1] + (1.0 - j_int_factor2) * t1c[jc]; |
---|
| 498 | t1d[jf] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1; |
---|
| 499 | } |
---|
| 500 | } |
---|
| 501 | } |
---|
| 502 | |
---|
| 503 | //////////////////////////////////////////// |
---|
| 504 | // initialize a grid to zero in parallel |
---|
| 505 | //////////////////////////////////////////// |
---|
| 506 | void putz(long k, long procid) |
---|
| 507 | { |
---|
| 508 | long i; |
---|
| 509 | long j; |
---|
| 510 | long istart; |
---|
| 511 | long jstart; |
---|
| 512 | long iend; |
---|
| 513 | long jend; |
---|
| 514 | double **t2a; |
---|
| 515 | double *t1a; |
---|
| 516 | |
---|
| 517 | istart = gps[procid]->rlist[k]; |
---|
| 518 | jstart = gps[procid]->rljst[k]; |
---|
| 519 | iend = gps[procid]->rlien[k]; |
---|
| 520 | jend = gps[procid]->rljen[k]; |
---|
| 521 | |
---|
| 522 | t2a = (double **) q_multi[procid][k]; |
---|
| 523 | for (i = istart; i <= iend; i++) |
---|
| 524 | { |
---|
| 525 | t1a = (double *) t2a[i]; |
---|
| 526 | for (j = jstart; j <= jend; j++) |
---|
| 527 | { |
---|
| 528 | t1a[j] = 0.0; |
---|
| 529 | } |
---|
| 530 | } |
---|
| 531 | } |
---|
| 532 | |
---|
| 533 | ////////////////////////////////////// |
---|
| 534 | void copy_borders(long k, long procid) |
---|
| 535 | { |
---|
| 536 | long i; |
---|
| 537 | long j; |
---|
| 538 | long jj; |
---|
| 539 | long im; |
---|
| 540 | long jm; |
---|
| 541 | long lastrow; |
---|
| 542 | long lastcol; |
---|
| 543 | double **t2a; |
---|
| 544 | double **t2b; |
---|
| 545 | double *t1a; |
---|
| 546 | double *t1b; |
---|
| 547 | |
---|
| 548 | im = (imx[k] - 2) / yprocs + 2; |
---|
| 549 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
| 550 | lastrow = (imx[k] - 2) / yprocs; |
---|
| 551 | lastcol = (jmx[k] - 2) / xprocs; |
---|
| 552 | |
---|
| 553 | t2a = (double **) q_multi[procid][k]; |
---|
| 554 | jj = gps[procid]->neighbors[UPLEFT]; |
---|
| 555 | if (jj != -1) { |
---|
| 556 | t2a[0][0] = q_multi[jj][k][im - 2][jm - 2]; |
---|
| 557 | } |
---|
| 558 | jj = gps[procid]->neighbors[UPRIGHT]; |
---|
| 559 | if (jj != -1) { |
---|
| 560 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][1]; |
---|
| 561 | } |
---|
| 562 | jj = gps[procid]->neighbors[DOWNLEFT]; |
---|
| 563 | if (jj != -1) { |
---|
| 564 | t2a[im - 1][0] = q_multi[jj][k][1][jm - 2]; |
---|
| 565 | } |
---|
| 566 | jj = gps[procid]->neighbors[DOWNRIGHT]; |
---|
| 567 | if (jj != -1) { |
---|
| 568 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][1]; |
---|
| 569 | } |
---|
| 570 | |
---|
| 571 | if (gps[procid]->neighbors[UP] == -1) { |
---|
| 572 | jj = gps[procid]->neighbors[LEFT]; |
---|
| 573 | if (jj != -1) { |
---|
| 574 | t2a[0][0] = q_multi[jj][k][0][jm - 2]; |
---|
| 575 | } else { |
---|
| 576 | jj = gps[procid]->neighbors[DOWN]; |
---|
| 577 | if (jj != -1) { |
---|
| 578 | t2a[im - 1][0] = q_multi[jj][k][1][0]; |
---|
| 579 | } |
---|
| 580 | } |
---|
| 581 | jj = gps[procid]->neighbors[RIGHT]; |
---|
| 582 | if (jj != -1) { |
---|
| 583 | t2a[0][jm - 1] = q_multi[jj][k][0][1]; |
---|
| 584 | } else { |
---|
| 585 | jj = gps[procid]->neighbors[DOWN]; |
---|
| 586 | if (jj != -1) { |
---|
| 587 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; |
---|
| 588 | } |
---|
| 589 | } |
---|
| 590 | } else if (gps[procid]->neighbors[DOWN] == -1) { |
---|
| 591 | jj = gps[procid]->neighbors[LEFT]; |
---|
| 592 | if (jj != -1) { |
---|
| 593 | t2a[im - 1][0] = q_multi[jj][k][im - 1][jm - 2]; |
---|
| 594 | } else { |
---|
| 595 | jj = gps[procid]->neighbors[UP]; |
---|
| 596 | if (jj != -1) { |
---|
| 597 | t2a[0][0] = q_multi[jj][k][im - 2][0]; |
---|
| 598 | } |
---|
| 599 | } |
---|
| 600 | jj = gps[procid]->neighbors[RIGHT]; |
---|
| 601 | if (jj != -1) { |
---|
| 602 | t2a[im - 1][jm - 1] = q_multi[jj][k][im - 1][1]; |
---|
| 603 | } else { |
---|
| 604 | jj = gps[procid]->neighbors[UP]; |
---|
| 605 | if (jj != -1) { |
---|
| 606 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; |
---|
| 607 | } |
---|
| 608 | } |
---|
| 609 | } else if (gps[procid]->neighbors[LEFT] == -1) { |
---|
| 610 | jj = gps[procid]->neighbors[UP]; |
---|
| 611 | if (jj != -1) { |
---|
| 612 | t2a[0][0] = q_multi[jj][k][im - 2][0]; |
---|
| 613 | } |
---|
| 614 | jj = gps[procid]->neighbors[DOWN]; |
---|
| 615 | if (jj != -1) { |
---|
| 616 | t2a[im - 1][0] = q_multi[jj][k][1][0]; |
---|
| 617 | } |
---|
| 618 | } else if (gps[procid]->neighbors[RIGHT] == -1) { |
---|
| 619 | jj = gps[procid]->neighbors[UP]; |
---|
| 620 | if (jj != -1) { |
---|
| 621 | t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1]; |
---|
| 622 | } |
---|
| 623 | jj = gps[procid]->neighbors[DOWN]; |
---|
| 624 | if (jj != -1) { |
---|
| 625 | t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1]; |
---|
| 626 | } |
---|
| 627 | } |
---|
| 628 | |
---|
| 629 | j = gps[procid]->neighbors[UP]; |
---|
| 630 | if (j != -1) { |
---|
| 631 | t1a = (double *) t2a[0]; |
---|
| 632 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
| 633 | for (i = 1; i <= lastcol; i++) { |
---|
| 634 | t1a[i] = t1b[i]; |
---|
| 635 | } |
---|
| 636 | } |
---|
| 637 | j = gps[procid]->neighbors[DOWN]; |
---|
| 638 | if (j != -1) { |
---|
| 639 | t1a = (double *) t2a[im - 1]; |
---|
| 640 | t1b = (double *) q_multi[j][k][1]; |
---|
| 641 | for (i = 1; i <= lastcol; i++) { |
---|
| 642 | t1a[i] = t1b[i]; |
---|
| 643 | } |
---|
| 644 | } |
---|
| 645 | j = gps[procid]->neighbors[LEFT]; |
---|
| 646 | if (j != -1) { |
---|
| 647 | t2b = (double **) q_multi[j][k]; |
---|
| 648 | for (i = 1; i <= lastrow; i++) { |
---|
| 649 | t2a[i][0] = t2b[i][jm - 2]; |
---|
| 650 | } |
---|
| 651 | } |
---|
| 652 | j = gps[procid]->neighbors[RIGHT]; |
---|
| 653 | if (j != -1) { |
---|
| 654 | t2b = (double **) q_multi[j][k]; |
---|
| 655 | for (i = 1; i <= lastrow; i++) { |
---|
| 656 | t2a[i][jm - 1] = t2b[i][1]; |
---|
| 657 | } |
---|
| 658 | } |
---|
| 659 | } // end copy_border() |
---|
| 660 | |
---|
| 661 | ////////////////////////////////////////// |
---|
| 662 | void copy_rhs_borders(long k, long procid) |
---|
| 663 | { |
---|
| 664 | long i; |
---|
| 665 | long j; |
---|
| 666 | long im; |
---|
| 667 | long jm; |
---|
| 668 | long lastrow; |
---|
| 669 | long lastcol; |
---|
| 670 | double **t2a; |
---|
| 671 | double **t2b; |
---|
| 672 | double *t1a; |
---|
| 673 | double *t1b; |
---|
| 674 | |
---|
| 675 | im = (imx[k] - 2) / yprocs + 2; |
---|
| 676 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
| 677 | lastrow = (imx[k] - 2) / yprocs; |
---|
| 678 | lastcol = (jmx[k] - 2) / xprocs; |
---|
| 679 | |
---|
| 680 | t2a = (double **) rhs_multi[procid][k]; |
---|
| 681 | if (gps[procid]->neighbors[UPLEFT] != -1) { |
---|
| 682 | j = gps[procid]->neighbors[UPLEFT]; |
---|
| 683 | t2a[0][0] = rhs_multi[j][k][im - 2][jm - 2]; |
---|
| 684 | } |
---|
| 685 | |
---|
| 686 | if (gps[procid]->neighbors[UP] != -1) { |
---|
| 687 | j = gps[procid]->neighbors[UP]; |
---|
| 688 | if (j != -1) { |
---|
| 689 | t1a = (double *) t2a[0]; |
---|
| 690 | t1b = (double *) rhs_multi[j][k][im - 2]; |
---|
| 691 | for (i = 2; i <= lastcol; i += 2) { |
---|
| 692 | t1a[i] = t1b[i]; |
---|
| 693 | } |
---|
| 694 | } |
---|
| 695 | } |
---|
| 696 | if (gps[procid]->neighbors[LEFT] != -1) { |
---|
| 697 | j = gps[procid]->neighbors[LEFT]; |
---|
| 698 | if (j != -1) { |
---|
| 699 | t2b = (double **) rhs_multi[j][k]; |
---|
| 700 | for (i = 2; i <= lastrow; i += 2) { |
---|
| 701 | t2a[i][0] = t2b[i][jm - 2]; |
---|
| 702 | } |
---|
| 703 | } |
---|
| 704 | } |
---|
| 705 | } |
---|
| 706 | |
---|
| 707 | ////////////////////////////////// |
---|
| 708 | void copy_red(long k, long procid) |
---|
| 709 | { |
---|
| 710 | long i; |
---|
| 711 | long j; |
---|
| 712 | long im; |
---|
| 713 | long jm; |
---|
| 714 | long lastrow; |
---|
| 715 | long lastcol; |
---|
| 716 | double **t2a; |
---|
| 717 | double **t2b; |
---|
| 718 | double *t1a; |
---|
| 719 | double *t1b; |
---|
| 720 | |
---|
| 721 | im = (imx[k] - 2) / yprocs + 2; |
---|
| 722 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
| 723 | lastrow = (imx[k] - 2) / yprocs; |
---|
| 724 | lastcol = (jmx[k] - 2) / xprocs; |
---|
| 725 | |
---|
| 726 | t2a = (double **) q_multi[procid][k]; |
---|
| 727 | j = gps[procid]->neighbors[UP]; |
---|
| 728 | if (j != -1) { |
---|
| 729 | t1a = (double *) t2a[0]; |
---|
| 730 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
| 731 | for (i = 2; i <= lastcol; i += 2) { |
---|
| 732 | t1a[i] = t1b[i]; |
---|
| 733 | } |
---|
| 734 | } |
---|
| 735 | j = gps[procid]->neighbors[DOWN]; |
---|
| 736 | if (j != -1) { |
---|
| 737 | t1a = (double *) t2a[im - 1]; |
---|
| 738 | t1b = (double *) q_multi[j][k][1]; |
---|
| 739 | for (i = 1; i <= lastcol; i += 2) { |
---|
| 740 | t1a[i] = t1b[i]; |
---|
| 741 | } |
---|
| 742 | } |
---|
| 743 | j = gps[procid]->neighbors[LEFT]; |
---|
| 744 | if (j != -1) { |
---|
| 745 | t2b = (double **) q_multi[j][k]; |
---|
| 746 | for (i = 2; i <= lastrow; i += 2) { |
---|
| 747 | t2a[i][0] = t2b[i][jm - 2]; |
---|
| 748 | } |
---|
| 749 | } |
---|
| 750 | j = gps[procid]->neighbors[RIGHT]; |
---|
| 751 | if (j != -1) { |
---|
| 752 | t2b = (double **) q_multi[j][k]; |
---|
| 753 | for (i = 1; i <= lastrow; i += 2) { |
---|
| 754 | t2a[i][jm - 1] = t2b[i][1]; |
---|
| 755 | } |
---|
| 756 | } |
---|
| 757 | } |
---|
| 758 | |
---|
| 759 | //////////////////////////////////// |
---|
| 760 | void copy_black(long k, long procid) |
---|
| 761 | { |
---|
| 762 | long i; |
---|
| 763 | long j; |
---|
| 764 | long im; |
---|
| 765 | long jm; |
---|
| 766 | long lastrow; |
---|
| 767 | long lastcol; |
---|
| 768 | double **t2a; |
---|
| 769 | double **t2b; |
---|
| 770 | double *t1a; |
---|
| 771 | double *t1b; |
---|
| 772 | |
---|
| 773 | im = (imx[k] - 2) / yprocs + 2; |
---|
| 774 | jm = (jmx[k] - 2) / xprocs + 2; |
---|
| 775 | lastrow = (imx[k] - 2) / yprocs; |
---|
| 776 | lastcol = (jmx[k] - 2) / xprocs; |
---|
| 777 | |
---|
| 778 | t2a = (double **) q_multi[procid][k]; |
---|
| 779 | j = gps[procid]->neighbors[UP]; |
---|
| 780 | if (j != -1) { |
---|
| 781 | t1a = (double *) t2a[0]; |
---|
| 782 | t1b = (double *) q_multi[j][k][im - 2]; |
---|
| 783 | for (i = 1; i <= lastcol; i += 2) { |
---|
| 784 | t1a[i] = t1b[i]; |
---|
| 785 | } |
---|
| 786 | } |
---|
| 787 | j = gps[procid]->neighbors[DOWN]; |
---|
| 788 | if (j != -1) { |
---|
| 789 | t1a = (double *) t2a[im - 1]; |
---|
| 790 | t1b = (double *) q_multi[j][k][1]; |
---|
| 791 | for (i = 2; i <= lastcol; i += 2) { |
---|
| 792 | t1a[i] = t1b[i]; |
---|
| 793 | } |
---|
| 794 | } |
---|
| 795 | j = gps[procid]->neighbors[LEFT]; |
---|
| 796 | if (j != -1) { |
---|
| 797 | t2b = (double **) q_multi[j][k]; |
---|
| 798 | for (i = 1; i <= lastrow; i += 2) { |
---|
| 799 | t2a[i][0] = t2b[i][jm - 2]; |
---|
| 800 | } |
---|
| 801 | } |
---|
| 802 | j = gps[procid]->neighbors[RIGHT]; |
---|
| 803 | if (j != -1) { |
---|
| 804 | t2b = (double **) q_multi[j][k]; |
---|
| 805 | for (i = 2; i <= lastrow; i += 2) { |
---|
| 806 | t2a[i][jm - 1] = t2b[i][1]; |
---|
| 807 | } |
---|
| 808 | } |
---|
| 809 | } |
---|