1 | /*************************************************************************/ |
---|
2 | /* */ |
---|
3 | /* Copyright (c) 1994 Stanford University */ |
---|
4 | /* */ |
---|
5 | /* All rights reserved. */ |
---|
6 | /* */ |
---|
7 | /* Permission is given to use, copy, and modify this software for any */ |
---|
8 | /* non-commercial purpose as long as this copyright notice is not */ |
---|
9 | /* removed. All other uses, including redistribution in whole or in */ |
---|
10 | /* part, are forbidden without prior written permission. */ |
---|
11 | /* */ |
---|
12 | /* This software is provided with absolutely no warranty and no */ |
---|
13 | /* support. */ |
---|
14 | /* */ |
---|
15 | /*************************************************************************/ |
---|
16 | |
---|
17 | /* **************** |
---|
18 | subroutine slave |
---|
19 | **************** */ |
---|
20 | |
---|
21 | #include <stdio.h> |
---|
22 | #include <math.h> |
---|
23 | |
---|
24 | #include "decs.h" |
---|
25 | |
---|
26 | #define VERBOSE 0 |
---|
27 | |
---|
28 | ///////////////////////////////////////////////////////////// |
---|
29 | __attribute__ ((constructor)) void slave( long *ptr_procid ) |
---|
30 | ///////////////////////////////////////////////////////////// |
---|
31 | { |
---|
32 | long i; |
---|
33 | long j; |
---|
34 | long nstep; |
---|
35 | long iindex; |
---|
36 | long iday = 0; |
---|
37 | double ysca1; |
---|
38 | double y; |
---|
39 | double factor; |
---|
40 | double sintemp; |
---|
41 | double curlt; |
---|
42 | double ressqr; |
---|
43 | long istart; |
---|
44 | long iend; |
---|
45 | long jstart; |
---|
46 | long jend; |
---|
47 | long ist; |
---|
48 | long ien; |
---|
49 | long jst; |
---|
50 | long jen; |
---|
51 | double fac; |
---|
52 | long dayflag = 0; |
---|
53 | long dhourflag = 0; |
---|
54 | long endflag = 0; |
---|
55 | long firstrow; |
---|
56 | long lastrow; |
---|
57 | long numrows; |
---|
58 | long firstcol; |
---|
59 | long lastcol; |
---|
60 | long numcols; |
---|
61 | long psiindex; |
---|
62 | double psibipriv; |
---|
63 | double ttime; |
---|
64 | double dhour; |
---|
65 | double day; |
---|
66 | long procid; |
---|
67 | long j_off = 0; |
---|
68 | double ** t2a; |
---|
69 | double ** t2b; |
---|
70 | double * t1a; |
---|
71 | double * t1b; |
---|
72 | double * t1c; |
---|
73 | double * t1d; |
---|
74 | long barrier_start; |
---|
75 | |
---|
76 | procid = *ptr_procid; |
---|
77 | |
---|
78 | // initialise total time in slave() |
---|
79 | gps[procid]->total_time = giet_proctime(); |
---|
80 | |
---|
81 | ressqr = lev_res[numlev - 1] * lev_res[numlev - 1]; |
---|
82 | |
---|
83 | // BARRIER |
---|
84 | barrier_start = giet_proctime(); |
---|
85 | sqt_barrier_wait( &barrier ); |
---|
86 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
87 | |
---|
88 | if ( VERBOSE ) { printf("\n@@@ Thread %d pass first barrier in slave()\n", procid ); } |
---|
89 | |
---|
90 | t2a = (double **) oldga[procid]; |
---|
91 | t2b = (double **) oldgb[procid]; |
---|
92 | for (i = 0; i < im; i++) |
---|
93 | { |
---|
94 | t1a = (double *) t2a[i]; |
---|
95 | t1b = (double *) t2b[i]; |
---|
96 | for (j = 0; j < jm; j++) |
---|
97 | { |
---|
98 | t1a[j] = 0.0; |
---|
99 | t1b[j] = 0.0; |
---|
100 | } |
---|
101 | } |
---|
102 | |
---|
103 | firstcol = 1; |
---|
104 | lastcol = firstcol + gps[procid]->rel_num_x[numlev - 1] - 1; |
---|
105 | firstrow = 1; |
---|
106 | lastrow = firstrow + gps[procid]->rel_num_y[numlev - 1] - 1; |
---|
107 | |
---|
108 | numcols = gps[procid]->rel_num_x[numlev - 1]; |
---|
109 | numrows = gps[procid]->rel_num_y[numlev - 1]; |
---|
110 | j_off = (gps[procid]->colnum) * numcols; |
---|
111 | |
---|
112 | /* every process gets its own copy of the timing variables to avoid |
---|
113 | contention at shared memory locations. here, these variables |
---|
114 | are initialized. */ |
---|
115 | |
---|
116 | ttime = 0.0; |
---|
117 | dhour = 0.0; |
---|
118 | nstep = 0; |
---|
119 | day = 0.0; |
---|
120 | ysca1 = 0.5 * ysca; |
---|
121 | |
---|
122 | if (procid == MASTER) |
---|
123 | { |
---|
124 | t1a = (double *) f; |
---|
125 | for (iindex = 0; iindex <= jmx[numlev - 1] - 1; iindex++) |
---|
126 | { |
---|
127 | y = ((double) iindex) * res; |
---|
128 | t1a[iindex] = f0 + beta * (y - ysca1); |
---|
129 | } |
---|
130 | } |
---|
131 | |
---|
132 | t2a = (double **) psium[procid]; |
---|
133 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
134 | { |
---|
135 | t2a[0][0] = 0.0; |
---|
136 | } |
---|
137 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
138 | { |
---|
139 | t2a[im - 1][0] = 0.0; |
---|
140 | } |
---|
141 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
142 | { |
---|
143 | t2a[0][jm - 1] = 0.0; |
---|
144 | } |
---|
145 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
146 | { |
---|
147 | t2a[im - 1][jm - 1] = 0.0; |
---|
148 | } |
---|
149 | if (gps[procid]->neighbors[UP] == -1) |
---|
150 | { |
---|
151 | t1a = (double *) t2a[0]; |
---|
152 | for (j = firstcol; j <= lastcol; j++) |
---|
153 | { |
---|
154 | t1a[j] = 0.0; |
---|
155 | } |
---|
156 | } |
---|
157 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
158 | { |
---|
159 | t1a = (double *) t2a[im - 1]; |
---|
160 | for (j = firstcol; j <= lastcol; j++) |
---|
161 | { |
---|
162 | t1a[j] = 0.0; |
---|
163 | } |
---|
164 | } |
---|
165 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
166 | { |
---|
167 | for (j = firstrow; j <= lastrow; j++) |
---|
168 | { |
---|
169 | t2a[j][0] = 0.0; |
---|
170 | } |
---|
171 | } |
---|
172 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
173 | { |
---|
174 | for (j = firstrow; j <= lastrow; j++) |
---|
175 | { |
---|
176 | t2a[j][jm - 1] = 0.0; |
---|
177 | } |
---|
178 | } |
---|
179 | |
---|
180 | for (i = firstrow; i <= lastrow; i++) |
---|
181 | { |
---|
182 | t1a = (double *) t2a[i]; |
---|
183 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
184 | { |
---|
185 | t1a[iindex] = 0.0; |
---|
186 | } |
---|
187 | } |
---|
188 | t2a = (double **) psilm[procid]; |
---|
189 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
190 | { |
---|
191 | t2a[0][0] = 0.0; |
---|
192 | } |
---|
193 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
194 | { |
---|
195 | t2a[im - 1][0] = 0.0; |
---|
196 | } |
---|
197 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
198 | { |
---|
199 | t2a[0][jm - 1] = 0.0; |
---|
200 | } |
---|
201 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
202 | { |
---|
203 | t2a[im - 1][jm - 1] = 0.0; |
---|
204 | } |
---|
205 | if (gps[procid]->neighbors[UP] == -1) |
---|
206 | { |
---|
207 | t1a = (double *) t2a[0]; |
---|
208 | for (j = firstcol; j <= lastcol; j++) |
---|
209 | { |
---|
210 | t1a[j] = 0.0; |
---|
211 | } |
---|
212 | } |
---|
213 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
214 | { |
---|
215 | t1a = (double *) t2a[im - 1]; |
---|
216 | for (j = firstcol; j <= lastcol; j++) |
---|
217 | { |
---|
218 | t1a[j] = 0.0; |
---|
219 | } |
---|
220 | } |
---|
221 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
222 | { |
---|
223 | for (j = firstrow; j <= lastrow; j++) |
---|
224 | { |
---|
225 | t2a[j][0] = 0.0; |
---|
226 | } |
---|
227 | } |
---|
228 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
229 | { |
---|
230 | for (j = firstrow; j <= lastrow; j++) |
---|
231 | { |
---|
232 | t2a[j][jm - 1] = 0.0; |
---|
233 | } |
---|
234 | } |
---|
235 | for (i = firstrow; i <= lastrow; i++) |
---|
236 | { |
---|
237 | t1a = (double *) t2a[i]; |
---|
238 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
239 | { |
---|
240 | t1a[iindex] = 0.0; |
---|
241 | } |
---|
242 | } |
---|
243 | |
---|
244 | t2a = (double **) psib[procid]; |
---|
245 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
246 | { |
---|
247 | t2a[0][0] = 1.0; |
---|
248 | } |
---|
249 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
250 | { |
---|
251 | t2a[0][jm - 1] = 1.0; |
---|
252 | } |
---|
253 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
254 | { |
---|
255 | t2a[im - 1][0] = 1.0; |
---|
256 | } |
---|
257 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
258 | { |
---|
259 | t2a[im - 1][jm - 1] = 1.0; |
---|
260 | } |
---|
261 | if (gps[procid]->neighbors[UP] == -1) |
---|
262 | { |
---|
263 | t1a = (double *) t2a[0]; |
---|
264 | for (j = firstcol; j <= lastcol; j++) |
---|
265 | { |
---|
266 | t1a[j] = 1.0; |
---|
267 | } |
---|
268 | } |
---|
269 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
270 | { |
---|
271 | t1a = (double *) t2a[im - 1]; |
---|
272 | for (j = firstcol; j <= lastcol; j++) |
---|
273 | { |
---|
274 | t1a[j] = 1.0; |
---|
275 | } |
---|
276 | } |
---|
277 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
278 | { |
---|
279 | for (j = firstrow; j <= lastrow; j++) |
---|
280 | { |
---|
281 | t2a[j][0] = 1.0; |
---|
282 | } |
---|
283 | } |
---|
284 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
285 | { |
---|
286 | for (j = firstrow; j <= lastrow; j++) |
---|
287 | { |
---|
288 | t2a[j][jm - 1] = 1.0; |
---|
289 | } |
---|
290 | } |
---|
291 | for (i = firstrow; i <= lastrow; i++) |
---|
292 | { |
---|
293 | t1a = (double *) t2a[i]; |
---|
294 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
295 | { |
---|
296 | t1a[iindex] = 0.0; |
---|
297 | } |
---|
298 | } |
---|
299 | |
---|
300 | // BARRIER |
---|
301 | barrier_start = giet_proctime(); |
---|
302 | sqt_barrier_wait( &barrier ); |
---|
303 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
304 | |
---|
305 | if ( VERBOSE ) { printf("\n@@@ Thread %d pass second barrier in slave()\n", procid ); } |
---|
306 | |
---|
307 | /* compute psib array (one-time computation) and integrate into psibi */ |
---|
308 | istart = 1; |
---|
309 | iend = istart + gps[procid]->rel_num_y[numlev - 1] - 1; |
---|
310 | jstart = 1; |
---|
311 | jend = jstart + gps[procid]->rel_num_x[numlev - 1] - 1; |
---|
312 | ist = istart; |
---|
313 | ien = iend; |
---|
314 | jst = jstart; |
---|
315 | jen = jend; |
---|
316 | |
---|
317 | if (gps[procid]->neighbors[UP] == -1) |
---|
318 | { |
---|
319 | istart = 0; |
---|
320 | } |
---|
321 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
322 | { |
---|
323 | jstart = 0; |
---|
324 | } |
---|
325 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
326 | { |
---|
327 | iend = im - 1; |
---|
328 | } |
---|
329 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
330 | { |
---|
331 | jend = jm - 1; |
---|
332 | } |
---|
333 | |
---|
334 | t2a = (double **) rhs_multi[procid][numlev - 1]; |
---|
335 | t2b = (double **) psib[procid]; |
---|
336 | for (i = istart; i <= iend; i++) |
---|
337 | { |
---|
338 | t1a = (double *) t2a[i]; |
---|
339 | t1b = (double *) t2b[i]; |
---|
340 | for (j = jstart; j <= jend; j++) |
---|
341 | { |
---|
342 | t1a[j] = t1b[j] * ressqr; |
---|
343 | } |
---|
344 | } |
---|
345 | t2a = (double **) q_multi[procid][numlev - 1]; |
---|
346 | if (gps[procid]->neighbors[UP] == -1) |
---|
347 | { |
---|
348 | t1a = (double *) t2a[0]; |
---|
349 | t1b = (double *) t2b[0]; |
---|
350 | for (j = jstart; j <= jend; j++) |
---|
351 | { |
---|
352 | t1a[j] = t1b[j]; |
---|
353 | } |
---|
354 | } |
---|
355 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
356 | { |
---|
357 | t1a = (double *) t2a[im - 1]; |
---|
358 | t1b = (double *) t2b[im - 1]; |
---|
359 | for (j = jstart; j <= jend; j++) |
---|
360 | { |
---|
361 | t1a[j] = t1b[j]; |
---|
362 | } |
---|
363 | } |
---|
364 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
365 | { |
---|
366 | for (i = istart; i <= iend; i++) |
---|
367 | { |
---|
368 | t2a[i][0] = t2b[i][0]; |
---|
369 | } |
---|
370 | } |
---|
371 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
372 | { |
---|
373 | for (i = istart; i <= iend; i++) |
---|
374 | { |
---|
375 | t2a[i][jm - 1] = t2b[i][jm - 1]; |
---|
376 | } |
---|
377 | } |
---|
378 | |
---|
379 | // BARRIER |
---|
380 | barrier_start = giet_proctime(); |
---|
381 | sqt_barrier_wait( &barrier ); |
---|
382 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
383 | |
---|
384 | if ( VERBOSE ) { printf("\n@@@ Thread %d pass third barrier in slave()\n", procid ); } |
---|
385 | |
---|
386 | t2a = (double **) psib[procid]; |
---|
387 | j = gps[procid]->neighbors[UP]; |
---|
388 | if (j != -1) |
---|
389 | { |
---|
390 | t1a = (double *) t2a[0]; |
---|
391 | t1b = (double *) psib[j][im - 2]; |
---|
392 | for (i = 1; i < jm - 1; i++) |
---|
393 | { |
---|
394 | t1a[i] = t1b[i]; |
---|
395 | } |
---|
396 | } |
---|
397 | j = gps[procid]->neighbors[DOWN]; |
---|
398 | if (j != -1) |
---|
399 | { |
---|
400 | t1a = (double *) t2a[im - 1]; |
---|
401 | t1b = (double *) psib[j][1]; |
---|
402 | for (i = 1; i < jm - 1; i++) { |
---|
403 | t1a[i] = t1b[i]; |
---|
404 | } |
---|
405 | } |
---|
406 | j = gps[procid]->neighbors[LEFT]; |
---|
407 | if (j != -1) |
---|
408 | { |
---|
409 | t2b = (double **) psib[j]; |
---|
410 | for (i = 1; i < im - 1; i++) |
---|
411 | { |
---|
412 | t2a[i][0] = t2b[i][jm - 2]; |
---|
413 | } |
---|
414 | } |
---|
415 | j = gps[procid]->neighbors[RIGHT]; |
---|
416 | if (j != -1) |
---|
417 | { |
---|
418 | t2b = (double **) psib[j]; |
---|
419 | for (i = 1; i < im - 1; i++) |
---|
420 | { |
---|
421 | t2a[i][jm - 1] = t2b[i][1]; |
---|
422 | } |
---|
423 | } |
---|
424 | |
---|
425 | t2a = (double **) q_multi[procid][numlev - 1]; |
---|
426 | t2b = (double **) psib[procid]; |
---|
427 | fac = 1.0 / (4.0 - ressqr * eig2); |
---|
428 | for (i = ist; i <= ien; i++) { |
---|
429 | t1a = (double *) t2a[i]; |
---|
430 | t1b = (double *) t2b[i]; |
---|
431 | t1c = (double *) t2b[i - 1]; |
---|
432 | t1d = (double *) t2b[i + 1]; |
---|
433 | for (j = jst; j <= jen; j++) { |
---|
434 | t1a[j] = fac * (t1d[j] + t1c[j] + t1b[j + 1] + t1b[j - 1] - ressqr * t1b[j]); |
---|
435 | } |
---|
436 | } |
---|
437 | |
---|
438 | if ( VERBOSE ) { printf("\n@@@ Thread %d in slave() call multi\n", procid ); } |
---|
439 | |
---|
440 | multig(procid); |
---|
441 | |
---|
442 | for (i = istart; i <= iend; i++) |
---|
443 | { |
---|
444 | t1a = (double *) t2a[i]; |
---|
445 | t1b = (double *) t2b[i]; |
---|
446 | for (j = jstart; j <= jend; j++) |
---|
447 | { |
---|
448 | t1b[j] = t1a[j]; |
---|
449 | } |
---|
450 | } |
---|
451 | |
---|
452 | // BARRIER |
---|
453 | barrier_start = giet_proctime(); |
---|
454 | sqt_barrier_wait( &barrier ); |
---|
455 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
456 | |
---|
457 | if ( VERBOSE ) { printf("\n@@@ Thread %d pass fourth barrier in slave()\n", procid ); } |
---|
458 | |
---|
459 | /* update the local running sum psibipriv by summing all the resulting |
---|
460 | values in that process's share of the psib matrix */ |
---|
461 | |
---|
462 | t2a = (double **) psib[procid]; |
---|
463 | psibipriv = 0.0; |
---|
464 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
465 | { |
---|
466 | psibipriv = psibipriv + 0.25 * (t2a[0][0]); |
---|
467 | } |
---|
468 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
469 | { |
---|
470 | psibipriv = psibipriv + 0.25 * (t2a[0][jm - 1]); |
---|
471 | } |
---|
472 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
473 | { |
---|
474 | psibipriv = psibipriv + 0.25 * (t2a[im - 1][0]); |
---|
475 | } |
---|
476 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
477 | { |
---|
478 | psibipriv = psibipriv + 0.25 * (t2a[im - 1][jm - 1]); |
---|
479 | } |
---|
480 | if (gps[procid]->neighbors[UP] == -1) |
---|
481 | { |
---|
482 | t1a = (double *) t2a[0]; |
---|
483 | for (j = firstcol; j <= lastcol; j++) |
---|
484 | { |
---|
485 | psibipriv = psibipriv + 0.5 * t1a[j]; |
---|
486 | } |
---|
487 | } |
---|
488 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
489 | { |
---|
490 | t1a = (double *) t2a[im - 1]; |
---|
491 | for (j = firstcol; j <= lastcol; j++) |
---|
492 | { |
---|
493 | psibipriv = psibipriv + 0.5 * t1a[j]; |
---|
494 | } |
---|
495 | } |
---|
496 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
497 | { |
---|
498 | for (j = firstrow; j <= lastrow; j++) |
---|
499 | { |
---|
500 | psibipriv = psibipriv + 0.5 * t2a[j][0]; |
---|
501 | } |
---|
502 | } |
---|
503 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
504 | { |
---|
505 | for (j = firstrow; j <= lastrow; j++) |
---|
506 | { |
---|
507 | psibipriv = psibipriv + 0.5 * t2a[j][jm - 1]; |
---|
508 | } |
---|
509 | } |
---|
510 | for (i = firstrow; i <= lastrow; i++) |
---|
511 | { |
---|
512 | t1a = (double *) t2a[i]; |
---|
513 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
514 | { |
---|
515 | psibipriv = psibipriv + t1a[iindex]; |
---|
516 | } |
---|
517 | } |
---|
518 | |
---|
519 | /* update the shared variable psibi by summing all the psibiprivs |
---|
520 | of the individual processes into it. note that this combined |
---|
521 | private and shared sum method avoids accessing the shared |
---|
522 | variable psibi once for every element of the matrix. */ |
---|
523 | |
---|
524 | sqt_lock_acquire( &psibi_lock ); |
---|
525 | global->psibi = global->psibi + psibipriv; |
---|
526 | sqt_lock_release( &psibi_lock ); |
---|
527 | |
---|
528 | /* initialize psim matrices |
---|
529 | |
---|
530 | if there is more than one process, then split the processes |
---|
531 | between the two psim matrices; otherwise, let the single process |
---|
532 | work on one first and then the other */ |
---|
533 | |
---|
534 | for (psiindex = 0; psiindex <= 1; psiindex++) |
---|
535 | { |
---|
536 | t2a = (double **) psim[procid][psiindex]; |
---|
537 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
538 | { |
---|
539 | t2a[0][0] = 0.0; |
---|
540 | } |
---|
541 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
542 | { |
---|
543 | t2a[im - 1][0] = 0.0; |
---|
544 | } |
---|
545 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
546 | { |
---|
547 | t2a[0][jm - 1] = 0.0; |
---|
548 | } |
---|
549 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
550 | { |
---|
551 | t2a[im - 1][jm - 1] = 0.0; |
---|
552 | } |
---|
553 | if (gps[procid]->neighbors[UP] == -1) |
---|
554 | { |
---|
555 | t1a = (double *) t2a[0]; |
---|
556 | for (j = firstcol; j <= lastcol; j++) |
---|
557 | { |
---|
558 | t1a[j] = 0.0; |
---|
559 | } |
---|
560 | } |
---|
561 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
562 | { |
---|
563 | t1a = (double *) t2a[im - 1]; |
---|
564 | for (j = firstcol; j <= lastcol; j++) |
---|
565 | { |
---|
566 | t1a[j] = 0.0; |
---|
567 | } |
---|
568 | } |
---|
569 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
570 | { |
---|
571 | for (j = firstrow; j <= lastrow; j++) |
---|
572 | { |
---|
573 | t2a[j][0] = 0.0; |
---|
574 | } |
---|
575 | } |
---|
576 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
577 | { |
---|
578 | for (j = firstrow; j <= lastrow; j++) |
---|
579 | { |
---|
580 | t2a[j][jm - 1] = 0.0; |
---|
581 | } |
---|
582 | } |
---|
583 | for (i = firstrow; i <= lastrow; i++) |
---|
584 | { |
---|
585 | t1a = (double *) t2a[i]; |
---|
586 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
587 | { |
---|
588 | t1a[iindex] = 0.0; |
---|
589 | } |
---|
590 | } |
---|
591 | } |
---|
592 | |
---|
593 | /* initialize psi matrices the same way */ |
---|
594 | |
---|
595 | for (psiindex = 0; psiindex <= 1; psiindex++) |
---|
596 | { |
---|
597 | t2a = (double **) psi[procid][psiindex]; |
---|
598 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
599 | { |
---|
600 | t2a[0][0] = 0.0; |
---|
601 | } |
---|
602 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
603 | { |
---|
604 | t2a[0][jm - 1] = 0.0; |
---|
605 | } |
---|
606 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
607 | { |
---|
608 | t2a[im - 1][0] = 0.0; |
---|
609 | } |
---|
610 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
611 | { |
---|
612 | t2a[im - 1][jm - 1] = 0.0; |
---|
613 | } |
---|
614 | if (gps[procid]->neighbors[UP] == -1) |
---|
615 | { |
---|
616 | t1a = (double *) t2a[0]; |
---|
617 | for (j = firstcol; j <= lastcol; j++) |
---|
618 | { |
---|
619 | t1a[j] = 0.0; |
---|
620 | } |
---|
621 | } |
---|
622 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
623 | { |
---|
624 | t1a = (double *) t2a[im - 1]; |
---|
625 | for (j = firstcol; j <= lastcol; j++) |
---|
626 | { |
---|
627 | t1a[j] = 0.0; |
---|
628 | } |
---|
629 | } |
---|
630 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
631 | { |
---|
632 | for (j = firstrow; j <= lastrow; j++) |
---|
633 | { |
---|
634 | t2a[j][0] = 0.0; |
---|
635 | } |
---|
636 | } |
---|
637 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
638 | { |
---|
639 | for (j = firstrow; j <= lastrow; j++) |
---|
640 | { |
---|
641 | t2a[j][jm - 1] = 0.0; |
---|
642 | } |
---|
643 | } |
---|
644 | for (i = firstrow; i <= lastrow; i++) |
---|
645 | { |
---|
646 | t1a = (double *) t2a[i]; |
---|
647 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
648 | { |
---|
649 | t1a[iindex] = 0.0; |
---|
650 | } |
---|
651 | } |
---|
652 | } |
---|
653 | |
---|
654 | /* compute input curl of wind stress */ |
---|
655 | |
---|
656 | t2a = (double **) tauz[procid]; |
---|
657 | ysca1 = .5 * ysca; |
---|
658 | factor = -dt0 * pi / ysca1; |
---|
659 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
660 | { |
---|
661 | t2a[0][0] = 0.0; |
---|
662 | } |
---|
663 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
664 | { |
---|
665 | t2a[im - 1][0] = 0.0; |
---|
666 | } |
---|
667 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
668 | { |
---|
669 | sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1; |
---|
670 | sintemp = sin(sintemp); |
---|
671 | t2a[0][jm - 1] = factor * sintemp; |
---|
672 | } |
---|
673 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
674 | { |
---|
675 | sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1; |
---|
676 | sintemp = sin(sintemp); |
---|
677 | t2a[im - 1][jm - 1] = factor * sintemp; |
---|
678 | } |
---|
679 | if (gps[procid]->neighbors[UP] == -1) |
---|
680 | { |
---|
681 | t1a = (double *) t2a[0]; |
---|
682 | for (j = firstcol; j <= lastcol; j++) |
---|
683 | { |
---|
684 | sintemp = pi * ((double) j + j_off) * res / ysca1; |
---|
685 | sintemp = sin(sintemp); |
---|
686 | curlt = factor * sintemp; |
---|
687 | t1a[j] = curlt; |
---|
688 | } |
---|
689 | } |
---|
690 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
691 | { |
---|
692 | t1a = (double *) t2a[im - 1]; |
---|
693 | for (j = firstcol; j <= lastcol; j++) |
---|
694 | { |
---|
695 | sintemp = pi * ((double) j + j_off) * res / ysca1; |
---|
696 | sintemp = sin(sintemp); |
---|
697 | curlt = factor * sintemp; |
---|
698 | t1a[j] = curlt; |
---|
699 | } |
---|
700 | } |
---|
701 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
702 | { |
---|
703 | for (j = firstrow; j <= lastrow; j++) |
---|
704 | { |
---|
705 | t2a[j][0] = 0.0; |
---|
706 | } |
---|
707 | } |
---|
708 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
709 | { |
---|
710 | sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1; |
---|
711 | sintemp = sin(sintemp); |
---|
712 | curlt = factor * sintemp; |
---|
713 | for (j = firstrow; j <= lastrow; j++) |
---|
714 | { |
---|
715 | t2a[j][jm - 1] = curlt; |
---|
716 | } |
---|
717 | } |
---|
718 | for (i = firstrow; i <= lastrow; i++) |
---|
719 | { |
---|
720 | t1a = (double *) t2a[i]; |
---|
721 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
722 | { |
---|
723 | sintemp = pi * ((double) iindex + j_off) * res / ysca1; |
---|
724 | sintemp = sin(sintemp); |
---|
725 | curlt = factor * sintemp; |
---|
726 | t1a[iindex] = curlt; |
---|
727 | } |
---|
728 | } |
---|
729 | |
---|
730 | // BARRIER |
---|
731 | barrier_start = giet_proctime(); |
---|
732 | sqt_barrier_wait( &barrier ); |
---|
733 | gps[procid]->sync_time += (giet_proctime() - barrier_start); |
---|
734 | |
---|
735 | if ( VERBOSE ) { printf("\n@@@ Thread %d pass fifth barrier in slave()\n", procid ); } |
---|
736 | |
---|
737 | /*************************************************************** |
---|
738 | one-time stuff over at this point |
---|
739 | ***************************************************************/ |
---|
740 | |
---|
741 | while (!endflag) |
---|
742 | { |
---|
743 | while ((!dayflag) || (!dhourflag)) |
---|
744 | { |
---|
745 | dayflag = 0; |
---|
746 | dhourflag = 0; |
---|
747 | |
---|
748 | if ( VERBOSE ) { printf("\n@@@ Thread %d call slave2() : step %d\n", procid, nstep ); } |
---|
749 | |
---|
750 | slave2(procid, firstrow, lastrow, numrows, firstcol, lastcol, numcols); |
---|
751 | |
---|
752 | /* update time and step number |
---|
753 | note that these time and step variables are private i.e. every |
---|
754 | process has its own copy and keeps track of its own time */ |
---|
755 | |
---|
756 | ttime = ttime + dtau; |
---|
757 | nstep = nstep + 1; |
---|
758 | day = ttime / 86400.0; |
---|
759 | |
---|
760 | if (day > ((double) outday0)) |
---|
761 | { |
---|
762 | dayflag = 1; |
---|
763 | iday = (long) day; |
---|
764 | dhour = dhour + dtau; |
---|
765 | if (dhour >= 86400.0) |
---|
766 | { |
---|
767 | dhourflag = 1; |
---|
768 | } |
---|
769 | } |
---|
770 | } // end while |
---|
771 | dhour = 0.0; |
---|
772 | |
---|
773 | t2a = (double **) psium[procid]; |
---|
774 | t2b = (double **) psim[procid][0]; |
---|
775 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
776 | { |
---|
777 | t2a[0][0] = t2a[0][0] + t2b[0][0]; |
---|
778 | } |
---|
779 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
780 | { |
---|
781 | t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0]; |
---|
782 | } |
---|
783 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
784 | { |
---|
785 | t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1]; |
---|
786 | } |
---|
787 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
788 | { |
---|
789 | t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1]; |
---|
790 | } |
---|
791 | if (gps[procid]->neighbors[UP] == -1) |
---|
792 | { |
---|
793 | t1a = (double *) t2a[0]; |
---|
794 | t1b = (double *) t2b[0]; |
---|
795 | for (j = firstcol; j <= lastcol; j++) |
---|
796 | { |
---|
797 | t1a[j] = t1a[j] + t1b[j]; |
---|
798 | } |
---|
799 | } |
---|
800 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
801 | { |
---|
802 | t1a = (double *) t2a[im - 1]; |
---|
803 | t1b = (double *) t2b[im - 1]; |
---|
804 | for (j = firstcol; j <= lastcol; j++) |
---|
805 | { |
---|
806 | t1a[j] = t1a[j] + t1b[j]; |
---|
807 | } |
---|
808 | } |
---|
809 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
810 | { |
---|
811 | for (j = firstrow; j <= lastrow; j++) |
---|
812 | { |
---|
813 | t2a[j][0] = t2a[j][0] + t2b[j][0]; |
---|
814 | } |
---|
815 | } |
---|
816 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
817 | { |
---|
818 | for (j = firstrow; j <= lastrow; j++) |
---|
819 | { |
---|
820 | t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1]; |
---|
821 | } |
---|
822 | } |
---|
823 | for (i = firstrow; i <= lastrow; i++) |
---|
824 | { |
---|
825 | t1a = (double *) t2a[i]; |
---|
826 | t1b = (double *) t2b[i]; |
---|
827 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
828 | { |
---|
829 | t1a[iindex] = t1a[iindex] + t1b[iindex]; |
---|
830 | } |
---|
831 | } |
---|
832 | |
---|
833 | /* update values of psilm array to psilm + psim[2] */ |
---|
834 | |
---|
835 | t2a = (double **) psilm[procid]; |
---|
836 | t2b = (double **) psim[procid][1]; |
---|
837 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
838 | { |
---|
839 | t2a[0][0] = t2a[0][0] + t2b[0][0]; |
---|
840 | } |
---|
841 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[LEFT] == -1)) |
---|
842 | { |
---|
843 | t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0]; |
---|
844 | } |
---|
845 | if ((gps[procid]->neighbors[UP] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
846 | { |
---|
847 | t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1]; |
---|
848 | } |
---|
849 | if ((gps[procid]->neighbors[DOWN] == -1) && (gps[procid]->neighbors[RIGHT] == -1)) |
---|
850 | { |
---|
851 | t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1]; |
---|
852 | } |
---|
853 | if (gps[procid]->neighbors[UP] == -1) |
---|
854 | { |
---|
855 | t1a = (double *) t2a[0]; |
---|
856 | t1b = (double *) t2b[0]; |
---|
857 | for (j = firstcol; j <= lastcol; j++) |
---|
858 | { |
---|
859 | t1a[j] = t1a[j] + t1b[j]; |
---|
860 | } |
---|
861 | } |
---|
862 | if (gps[procid]->neighbors[DOWN] == -1) |
---|
863 | { |
---|
864 | t1a = (double *) t2a[im - 1]; |
---|
865 | t1b = (double *) t2b[im - 1]; |
---|
866 | for (j = firstcol; j <= lastcol; j++) |
---|
867 | { |
---|
868 | t1a[j] = t1a[j] + t1b[j]; |
---|
869 | } |
---|
870 | } |
---|
871 | if (gps[procid]->neighbors[LEFT] == -1) |
---|
872 | { |
---|
873 | for (j = firstrow; j <= lastrow; j++) |
---|
874 | { |
---|
875 | t2a[j][0] = t2a[j][0] + t2b[j][0]; |
---|
876 | } |
---|
877 | } |
---|
878 | if (gps[procid]->neighbors[RIGHT] == -1) |
---|
879 | { |
---|
880 | for (j = firstrow; j <= lastrow; j++) |
---|
881 | { |
---|
882 | t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1]; |
---|
883 | } |
---|
884 | } |
---|
885 | for (i = firstrow; i <= lastrow; i++) |
---|
886 | { |
---|
887 | t1a = (double *) t2a[i]; |
---|
888 | t1b = (double *) t2b[i]; |
---|
889 | for (iindex = firstcol; iindex <= lastcol; iindex++) |
---|
890 | { |
---|
891 | t1a[iindex] = t1a[iindex] + t1b[iindex]; |
---|
892 | } |
---|
893 | } |
---|
894 | if (iday >= (long) outday3) |
---|
895 | { |
---|
896 | endflag = 1; |
---|
897 | } |
---|
898 | } // end while endflag |
---|
899 | |
---|
900 | gps[procid]->total_time = giet_proctime() - (gps[procid]->total_time); |
---|
901 | |
---|
902 | if ( procid != MASTER ) giet_pthread_exit("slave completed"); |
---|
903 | |
---|
904 | } // end slave() |
---|