Context Navigation

source: soft/giet_vm/applications/ocean/slave1.C @ 792

Last change on this file since 792 was 598, checked in by guerin, 10 years ago
ocean: fix app broken by r589
File size: 27.1 KB

Line
1	/*************************************************************************/
2	/* */
3	/* Copyright (c) 1994 Stanford University */
4	/* */
5	/* All rights reserved. */
6	/* */
7	/* Permission is given to use, copy, and modify this software for any */
8	/* non-commercial purpose as long as this copyright notice is not */
9	/* removed. All other uses, including redistribution in whole or in */
10	/* part, are forbidden without prior written permission. */
11	/* */
12	/* This software is provided with absolutely no warranty and no */
13	/* support. */
14	/* */
15	/*************************************************************************/
16
17	/* ****************
18	subroutine slave
19	**************** */
20
21	EXTERN_ENV
22
23	#include <stdio.h>
24	#include <math.h>
25	#include <stdlib.h>
26
27	#include "decs.h"
28
29	void slave(long *ptr_procid)
30	{
31	long i;
32	long j;
33	long nstep;
34	long iindex;
35	long iday;
36	double ysca1;
37	double y;
38	double factor;
39	double sintemp;
40	double curlt;
41	double ressqr;
42	long istart;
43	long iend;
44	long jstart;
45	long jend;
46	long ist;
47	long ien;
48	long jst;
49	long jen;
50	double fac;
51	long dayflag = 0;
52	long dhourflag = 0;
53	long endflag = 0;
54	long firstrow;
55	long lastrow;
56	long numrows;
57	long firstcol;
58	long lastcol;
59	long numcols;
60	long psiindex;
61	double psibipriv;
62	double ttime;
63	double dhour;
64	double day;
65	long procid;
66	long j_off = 0;
67	unsigned long t1;
68	double **t2a;
69	double **t2b;
70	double *t1a;
71	double *t1b;
72	double *t1c;
73	double *t1d;
74
75
76	/*
77	LOCK(locks->idlock)
78	procid = global->id;
79	global->id = global->id+1;
80	UNLOCK(locks->idlock)
81	*/
82
83	procid = *ptr_procid;
84	ressqr = lev_res[numlev - 1] * lev_res[numlev - 1];
85
86	#if defined(MULTIPLE_BARRIERS)
87	BARRIER(bars->sl_prini, nprocs)
88	#else
89	BARRIER(bars->barrier, nprocs)
90	#endif
91	/* POSSIBLE ENHANCEMENT: Here is where one might pin processes to
92	processors to avoid migration. */
93	/* POSSIBLE ENHANCEMENT: Here is where one might distribute
94	data structures across physically distributed memories as
95	desired.
96
97	One way to do this is as follows. The function allocate(START,SIZE,I)
98	is assumed to place all addresses x such that
99	(START <= x < START+SIZE) on node I.
100
101	long d_size;
102	unsigned long g_size;
103	unsigned long mg_size;
104
105	if (procid == MASTER) {
106	g_size = ((jmx[numlev-1]-2)/xprocs+2)((imx[numlev-1]-2)/yprocs+2)siz
107	eof(double) +
108	((imx[numlev-1]-2)/yprocs+2)sizeof(double );
109
110	mg_size = numlevsizeof(double *);
111	for (i=0;i<numlev;i++) {
112	mg_size+=((imx[i]-2)/yprocs+2)((jmx[i]-2)/xprocs+2)sizeof(double)+
113	((imx[i]-2)/yprocs+2)sizeof(double );
114	}
115	for (i= 0;i<nprocs;i++) {
116	d_size = 2sizeof(double *);
117	allocate((unsigned long) psi[i],d_size,i);
118	allocate((unsigned long) psim[i],d_size,i);
119	allocate((unsigned long) work1[i],d_size,i);
120	allocate((unsigned long) work4[i],d_size,i);
121	allocate((unsigned long) work5[i],d_size,i);
122	allocate((unsigned long) work7[i],d_size,i);
123	allocate((unsigned long) temparray[i],d_size,i);
124	allocate((unsigned long) psi[i][0],g_size,i);
125	allocate((unsigned long) psi[i][1],g_size,i);
126	allocate((unsigned long) psim[i][0],g_size,i);
127	allocate((unsigned long) psim[i][1],g_size,i);
128	allocate((unsigned long) psium[i],g_size,i);
129	allocate((unsigned long) psilm[i],g_size,i);
130	allocate((unsigned long) psib[i],g_size,i);
131	allocate((unsigned long) ga[i],g_size,i);
132	allocate((unsigned long) gb[i],g_size,i);
133	allocate((unsigned long) work1[i][0],g_size,i);
134	allocate((unsigned long) work1[i][1],g_size,i);
135	allocate((unsigned long) work2[i],g_size,i);
136	allocate((unsigned long) work3[i],g_size,i);
137	allocate((unsigned long) work4[i][0],g_size,i);
138	allocate((unsigned long) work4[i][1],g_size,i);
139	allocate((unsigned long) work5[i][0],g_size,i);
140	allocate((unsigned long) work5[i][1],g_size,i);
141	allocate((unsigned long) work6[i],g_size,i);
142	allocate((unsigned long) work7[i][0],g_size,i);
143	allocate((unsigned long) work7[i][1],g_size,i);
144	allocate((unsigned long) temparray[i][0],g_size,i);
145	allocate((unsigned long) temparray[i][1],g_size,i);
146	allocate((unsigned long) tauz[i],g_size,i);
147	allocate((unsigned long) oldga[i],g_size,i);
148	allocate((unsigned long) oldgb[i],g_size,i);
149	d_size = numlev * sizeof(long);
150	allocate((unsigned long) gp[i].rel_num_x,d_size,i);
151	allocate((unsigned long) gp[i].rel_num_y,d_size,i);
152	allocate((unsigned long) gp[i].eist,d_size,i);
153	allocate((unsigned long) gp[i].ejst,d_size,i);
154	allocate((unsigned long) gp[i].oist,d_size,i);
155	allocate((unsigned long) gp[i].ojst,d_size,i);
156	allocate((unsigned long) gp[i].rlist,d_size,i);
157	allocate((unsigned long) gp[i].rljst,d_size,i);
158	allocate((unsigned long) gp[i].rlien,d_size,i);
159	allocate((unsigned long) gp[i].rljen,d_size,i);
160
161	allocate((unsigned long) q_multi[i],mg_size,i);
162	allocate((unsigned long) rhs_multi[i],mg_size,i);
163	allocate((unsigned long) &(gp[i]),sizeof(struct Global_Private),i);
164	}
165	}
166
167	*/
168	t2a = (double **) oldga[procid];
169	t2b = (double **) oldgb[procid];
170	for (i = 0; i < im; i++) {
171	t1a = (double *) t2a[i];
172	t1b = (double *) t2b[i];
173	for (j = 0; j < jm; j++) {
174	t1a[j] = 0.0;
175	t1b[j] = 0.0;
176	}
177	}
178
179	firstcol = 1;
180	lastcol = firstcol + gp[procid].rel_num_x[numlev - 1] - 1;
181	firstrow = 1;
182	lastrow = firstrow + gp[procid].rel_num_y[numlev - 1] - 1;
183	numcols = gp[procid].rel_num_x[numlev - 1];
184	numrows = gp[procid].rel_num_y[numlev - 1];
185	j_off = (gp[procid].colnum) numcols;
186
187	/*
188	if (procid > nprocs/2) {
189	psinum = 2;
190	} else {
191	psinum = 1;
192	}
193	*/
194
195	/* every process gets its own copy of the timing variables to avoid
196	contention at shared memory locations. here, these variables
197	are initialized. */
198
199	ttime = 0.0;
200	dhour = 0.0;
201	nstep = 0;
202	day = 0.0;
203
204	ysca1 = 0.5 * ysca;
205
206	if (*gp[procid].lpid == MASTER) {
207
208	f = (double ) G_MALLOC(oim sizeof(double), procid);
209
210	t1a = (double *) f;
211	for (iindex = 0; iindex <= jmx[numlev - 1] - 1; iindex++) {
212	y = ((double) iindex) * res;
213	t1a[iindex] = f0 + beta * (y - ysca1);
214	}
215	}
216
217	t2a = (double **) psium[procid];
218	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
219	t2a[0][0] = 0.0;
220	}
221	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
222	t2a[im - 1][0] = 0.0;
223	}
224	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
225	t2a[0][jm - 1] = 0.0;
226	}
227	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
228	t2a[im - 1][jm - 1] = 0.0;
229	}
230	if (gp[procid].neighbors[UP] == -1) {
231	t1a = (double *) t2a[0];
232	for (j = firstcol; j <= lastcol; j++) {
233	t1a[j] = 0.0;
234	}
235	}
236	if (gp[procid].neighbors[DOWN] == -1) {
237	t1a = (double *) t2a[im - 1];
238	for (j = firstcol; j <= lastcol; j++) {
239	t1a[j] = 0.0;
240	}
241	}
242	if (gp[procid].neighbors[LEFT] == -1) {
243	for (j = firstrow; j <= lastrow; j++) {
244	t2a[j][0] = 0.0;
245	}
246	}
247	if (gp[procid].neighbors[RIGHT] == -1) {
248	for (j = firstrow; j <= lastrow; j++) {
249	t2a[j][jm - 1] = 0.0;
250	}
251	}
252
253	for (i = firstrow; i <= lastrow; i++) {
254	t1a = (double *) t2a[i];
255	for (iindex = firstcol; iindex <= lastcol; iindex++) {
256	t1a[iindex] = 0.0;
257	}
258	}
259	t2a = (double **) psilm[procid];
260	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
261	t2a[0][0] = 0.0;
262	}
263	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
264	t2a[im - 1][0] = 0.0;
265	}
266	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
267	t2a[0][jm - 1] = 0.0;
268	}
269	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
270	t2a[im - 1][jm - 1] = 0.0;
271	}
272	if (gp[procid].neighbors[UP] == -1) {
273	t1a = (double *) t2a[0];
274	for (j = firstcol; j <= lastcol; j++) {
275	t1a[j] = 0.0;
276	}
277	}
278	if (gp[procid].neighbors[DOWN] == -1) {
279	t1a = (double *) t2a[im - 1];
280	for (j = firstcol; j <= lastcol; j++) {
281	t1a[j] = 0.0;
282	}
283	}
284	if (gp[procid].neighbors[LEFT] == -1) {
285	for (j = firstrow; j <= lastrow; j++) {
286	t2a[j][0] = 0.0;
287	}
288	}
289	if (gp[procid].neighbors[RIGHT] == -1) {
290	for (j = firstrow; j <= lastrow; j++) {
291	t2a[j][jm - 1] = 0.0;
292	}
293	}
294	for (i = firstrow; i <= lastrow; i++) {
295	t1a = (double *) t2a[i];
296	for (iindex = firstcol; iindex <= lastcol; iindex++) {
297	t1a[iindex] = 0.0;
298	}
299	}
300
301	t2a = (double **) psib[procid];
302	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
303	t2a[0][0] = 1.0;
304	}
305	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
306	t2a[0][jm - 1] = 1.0;
307	}
308	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
309	t2a[im - 1][0] = 1.0;
310	}
311	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
312	t2a[im - 1][jm - 1] = 1.0;
313	}
314	if (gp[procid].neighbors[UP] == -1) {
315	t1a = (double *) t2a[0];
316	for (j = firstcol; j <= lastcol; j++) {
317	t1a[j] = 1.0;
318	}
319	}
320	if (gp[procid].neighbors[DOWN] == -1) {
321	t1a = (double *) t2a[im - 1];
322	for (j = firstcol; j <= lastcol; j++) {
323	t1a[j] = 1.0;
324	}
325	}
326	if (gp[procid].neighbors[LEFT] == -1) {
327	for (j = firstrow; j <= lastrow; j++) {
328	t2a[j][0] = 1.0;
329	}
330	}
331	if (gp[procid].neighbors[RIGHT] == -1) {
332	for (j = firstrow; j <= lastrow; j++) {
333	t2a[j][jm - 1] = 1.0;
334	}
335	}
336	for (i = firstrow; i <= lastrow; i++) {
337	t1a = (double *) t2a[i];
338	for (iindex = firstcol; iindex <= lastcol; iindex++) {
339	t1a[iindex] = 0.0;
340	}
341	}
342
343	/* wait until all processes have completed the above initialization */
344	#if defined(MULTIPLE_BARRIERS)
345	BARRIER(bars->sl_prini, nprocs)
346	#else
347	BARRIER(bars->barrier, nprocs)
348	#endif
349	/* compute psib array (one-time computation) and integrate into psibi */
350	istart = 1;
351	iend = istart + gp[procid].rel_num_y[numlev - 1] - 1;
352	jstart = 1;
353	jend = jstart + gp[procid].rel_num_x[numlev - 1] - 1;
354	ist = istart;
355	ien = iend;
356	jst = jstart;
357	jen = jend;
358
359	if (gp[procid].neighbors[UP] == -1) {
360	istart = 0;
361	}
362	if (gp[procid].neighbors[LEFT] == -1) {
363	jstart = 0;
364	}
365	if (gp[procid].neighbors[DOWN] == -1) {
366	iend = im - 1;
367	}
368	if (gp[procid].neighbors[RIGHT] == -1) {
369	jend = jm - 1;
370	}
371
372	t2a = (double **) rhs_multi[procid][numlev - 1];
373	t2b = (double **) psib[procid];
374	for (i = istart; i <= iend; i++) {
375	t1a = (double *) t2a[i];
376	t1b = (double *) t2b[i];
377	for (j = jstart; j <= jend; j++) {
378	t1a[j] = t1b[j] * ressqr;
379	}
380	}
381	t2a = (double **) q_multi[procid][numlev - 1];
382	if (gp[procid].neighbors[UP] == -1) {
383	t1a = (double *) t2a[0];
384	t1b = (double *) t2b[0];
385	for (j = jstart; j <= jend; j++) {
386	t1a[j] = t1b[j];
387	}
388	}
389	if (gp[procid].neighbors[DOWN] == -1) {
390	t1a = (double *) t2a[im - 1];
391	t1b = (double *) t2b[im - 1];
392	for (j = jstart; j <= jend; j++) {
393	t1a[j] = t1b[j];
394	}
395	}
396	if (gp[procid].neighbors[LEFT] == -1) {
397	for (i = istart; i <= iend; i++) {
398	t2a[i][0] = t2b[i][0];
399	}
400	}
401	if (gp[procid].neighbors[RIGHT] == -1) {
402	for (i = istart; i <= iend; i++) {
403	t2a[i][jm - 1] = t2b[i][jm - 1];
404	}
405	}
406
407	#if defined(MULTIPLE_BARRIERS)
408	BARRIER(bars->sl_psini, nprocs)
409	#else
410	BARRIER(bars->barrier, nprocs)
411	#endif
412
413	t2a = (double **) psib[procid];
414	j = gp[procid].neighbors[UP];
415	if (j != -1) {
416	t1a = (double *) t2a[0];
417	t1b = (double *) psib[j][im - 2];
418	for (i = 1; i < jm - 1; i++) {
419	t1a[i] = t1b[i];
420	}
421	}
422	j = gp[procid].neighbors[DOWN];
423	if (j != -1) {
424	t1a = (double *) t2a[im - 1];
425	t1b = (double *) psib[j][1];
426	for (i = 1; i < jm - 1; i++) {
427	t1a[i] = t1b[i];
428	}
429	}
430	j = gp[procid].neighbors[LEFT];
431	if (j != -1) {
432	t2b = (double **) psib[j];
433	for (i = 1; i < im - 1; i++) {
434	t2a[i][0] = t2b[i][jm - 2];
435	}
436	}
437	j = gp[procid].neighbors[RIGHT];
438	if (j != -1) {
439	t2b = (double **) psib[j];
440	for (i = 1; i < im - 1; i++) {
441	t2a[i][jm - 1] = t2b[i][1];
442	}
443	}
444
445	t2a = (double **) q_multi[procid][numlev - 1];
446	t2b = (double **) psib[procid];
447	fac = 1.0 / (4.0 - ressqr * eig2);
448	for (i = ist; i <= ien; i++) {
449	t1a = (double *) t2a[i];
450	t1b = (double *) t2b[i];
451	t1c = (double *) t2b[i - 1];
452	t1d = (double *) t2b[i + 1];
453	for (j = jst; j <= jen; j++) {
454	t1a[j] = fac * (t1d[j] + t1c[j] + t1b[j + 1] + t1b[j - 1] - ressqr * t1b[j]);
455	}
456	}
457
458	multig(procid);
459
460	for (i = istart; i <= iend; i++) {
461	t1a = (double *) t2a[i];
462	t1b = (double *) t2b[i];
463	for (j = jstart; j <= jend; j++) {
464	t1b[j] = t1a[j];
465	}
466	}
467
468	#if defined(MULTIPLE_BARRIERS)
469	BARRIER(bars->sl_prini, nprocs)
470	#else
471	BARRIER(bars->barrier, nprocs)
472	#endif
473
474	/* update the local running sum psibipriv by summing all the resulting
475	values in that process's share of the psib matrix */
476
477	t2a = (double **) psib[procid];
478	psibipriv = 0.0;
479	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
480	psibipriv = psibipriv + 0.25 * (t2a[0][0]);
481	}
482	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
483	psibipriv = psibipriv + 0.25 * (t2a[0][jm - 1]);
484	}
485	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
486	psibipriv = psibipriv + 0.25 * (t2a[im - 1][0]);
487	}
488	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
489	psibipriv = psibipriv + 0.25 * (t2a[im - 1][jm - 1]);
490	}
491	if (gp[procid].neighbors[UP] == -1) {
492	t1a = (double *) t2a[0];
493	for (j = firstcol; j <= lastcol; j++) {
494	psibipriv = psibipriv + 0.5 * t1a[j];
495	}
496	}
497	if (gp[procid].neighbors[DOWN] == -1) {
498	t1a = (double *) t2a[im - 1];
499	for (j = firstcol; j <= lastcol; j++) {
500	psibipriv = psibipriv + 0.5 * t1a[j];
501	}
502	}
503	if (gp[procid].neighbors[LEFT] == -1) {
504	for (j = firstrow; j <= lastrow; j++) {
505	psibipriv = psibipriv + 0.5 * t2a[j][0];
506	}
507	}
508	if (gp[procid].neighbors[RIGHT] == -1) {
509	for (j = firstrow; j <= lastrow; j++) {
510	psibipriv = psibipriv + 0.5 * t2a[j][jm - 1];
511	}
512	}
513	for (i = firstrow; i <= lastrow; i++) {
514	t1a = (double *) t2a[i];
515	for (iindex = firstcol; iindex <= lastcol; iindex++) {
516	psibipriv = psibipriv + t1a[iindex];
517	}
518	}
519
520	/* update the shared variable psibi by summing all the psibiprivs
521	of the individual processes into it. note that this combined
522	private and shared sum method avoids accessing the shared
523	variable psibi once for every element of the matrix. */
524
525	LOCK(locks->psibilock);
526	global->psibi = global->psibi + psibipriv;
527	UNLOCK(locks->psibilock);
528
529	/* initialize psim matrices
530
531	if there is more than one process, then split the processes
532	between the two psim matrices; otherwise, let the single process
533	work on one first and then the other */
534
535	for (psiindex = 0; psiindex <= 1; psiindex++) {
536	t2a = (double **) psim[procid][psiindex];
537	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
538	t2a[0][0] = 0.0;
539	}
540	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
541	t2a[im - 1][0] = 0.0;
542	}
543	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
544	t2a[0][jm - 1] = 0.0;
545	}
546	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
547	t2a[im - 1][jm - 1] = 0.0;
548	}
549	if (gp[procid].neighbors[UP] == -1) {
550	t1a = (double *) t2a[0];
551	for (j = firstcol; j <= lastcol; j++) {
552	t1a[j] = 0.0;
553	}
554	}
555	if (gp[procid].neighbors[DOWN] == -1) {
556	t1a = (double *) t2a[im - 1];
557	for (j = firstcol; j <= lastcol; j++) {
558	t1a[j] = 0.0;
559	}
560	}
561	if (gp[procid].neighbors[LEFT] == -1) {
562	for (j = firstrow; j <= lastrow; j++) {
563	t2a[j][0] = 0.0;
564	}
565	}
566	if (gp[procid].neighbors[RIGHT] == -1) {
567	for (j = firstrow; j <= lastrow; j++) {
568	t2a[j][jm - 1] = 0.0;
569	}
570	}
571	for (i = firstrow; i <= lastrow; i++) {
572	t1a = (double *) t2a[i];
573	for (iindex = firstcol; iindex <= lastcol; iindex++) {
574	t1a[iindex] = 0.0;
575	}
576	}
577	}
578
579	/* initialize psi matrices the same way */
580
581	for (psiindex = 0; psiindex <= 1; psiindex++) {
582	t2a = (double **) psi[procid][psiindex];
583	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
584	t2a[0][0] = 0.0;
585	}
586	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
587	t2a[0][jm - 1] = 0.0;
588	}
589	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
590	t2a[im - 1][0] = 0.0;
591	}
592	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
593	t2a[im - 1][jm - 1] = 0.0;
594	}
595	if (gp[procid].neighbors[UP] == -1) {
596	t1a = (double *) t2a[0];
597	for (j = firstcol; j <= lastcol; j++) {
598	t1a[j] = 0.0;
599	}
600	}
601	if (gp[procid].neighbors[DOWN] == -1) {
602	t1a = (double *) t2a[im - 1];
603	for (j = firstcol; j <= lastcol; j++) {
604	t1a[j] = 0.0;
605	}
606	}
607	if (gp[procid].neighbors[LEFT] == -1) {
608	for (j = firstrow; j <= lastrow; j++) {
609	t2a[j][0] = 0.0;
610	}
611	}
612	if (gp[procid].neighbors[RIGHT] == -1) {
613	for (j = firstrow; j <= lastrow; j++) {
614	t2a[j][jm - 1] = 0.0;
615	}
616	}
617	for (i = firstrow; i <= lastrow; i++) {
618	t1a = (double *) t2a[i];
619	for (iindex = firstcol; iindex <= lastcol; iindex++) {
620	t1a[iindex] = 0.0;
621	}
622	}
623	}
624
625	/* compute input curl of wind stress */
626
627
628	t2a = (double **) tauz[procid];
629	ysca1 = .5 * ysca;
630	factor = -t0 * pi / ysca1;
631	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
632	t2a[0][0] = 0.0;
633	}
634	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
635	t2a[im - 1][0] = 0.0;
636	}
637	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
638	sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
639	sintemp = sin(sintemp);
640	t2a[0][jm - 1] = factor * sintemp;
641	}
642	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
643	sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
644	sintemp = sin(sintemp);
645	t2a[im - 1][jm - 1] = factor * sintemp;
646	}
647	if (gp[procid].neighbors[UP] == -1) {
648	t1a = (double *) t2a[0];
649	for (j = firstcol; j <= lastcol; j++) {
650	sintemp = pi * ((double) j + j_off) * res / ysca1;
651	sintemp = sin(sintemp);
652	curlt = factor * sintemp;
653	t1a[j] = curlt;
654	}
655	}
656	if (gp[procid].neighbors[DOWN] == -1) {
657	t1a = (double *) t2a[im - 1];
658	for (j = firstcol; j <= lastcol; j++) {
659	sintemp = pi * ((double) j + j_off) * res / ysca1;
660	sintemp = sin(sintemp);
661	curlt = factor * sintemp;
662	t1a[j] = curlt;
663	}
664	}
665	if (gp[procid].neighbors[LEFT] == -1) {
666	for (j = firstrow; j <= lastrow; j++) {
667	t2a[j][0] = 0.0;
668	}
669	}
670	if (gp[procid].neighbors[RIGHT] == -1) {
671	sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;
672	sintemp = sin(sintemp);
673	curlt = factor * sintemp;
674	for (j = firstrow; j <= lastrow; j++) {
675	t2a[j][jm - 1] = curlt;
676	}
677	}
678	for (i = firstrow; i <= lastrow; i++) {
679	t1a = (double *) t2a[i];
680	for (iindex = firstcol; iindex <= lastcol; iindex++) {
681	sintemp = pi * ((double) iindex + j_off) * res / ysca1;
682	sintemp = sin(sintemp);
683	curlt = factor * sintemp;
684	t1a[iindex] = curlt;
685	}
686	}
687
688	#if defined(MULTIPLE_BARRIERS)
689	BARRIER(bars->sl_onetime, nprocs)
690	#else
691	BARRIER(bars->barrier, nprocs)
692	#endif
693
694	/***************************************************************
695	one-time stuff over at this point
696	***************************************************************/
697	while (!endflag) {
698	while ((!dayflag) \|\| (!dhourflag)) {
699	dayflag = 0;
700	dhourflag = 0;
701	if (nstep == 1) {
702	for (i = 0; i < 10; i++) {
703	gp[procid].steps_time[i] = 0;
704	}
705	if (procid == MASTER) {
706	CLOCK(global->trackstart)
707	}
708	if ((procid == MASTER) \|\| (do_stats)) {
709	CLOCK(t1);
710	(*gp[procid].total_time) = t1;
711	(*gp[procid].multi_time) = 0;
712	}
713	/* POSSIBLE ENHANCEMENT: Here is where one might reset the
714	statistics that one is measuring about the parallel execution */
715	}
716
717	slave2(procid, firstrow, lastrow, numrows, firstcol, lastcol, numcols);
718
719	/* update time and step number
720	note that these time and step variables are private i.e. every
721	process has its own copy and keeps track of its own time */
722
723	ttime = ttime + dtau;
724	nstep = nstep + 1;
725	day = ttime / 86400.0;
726
727	if (day > ((double) outday0)) {
728	dayflag = 1;
729	iday = (long) day;
730	dhour = dhour + dtau;
731	if (dhour >= 86400.0) {
732	dhourflag = 1;
733	}
734	}
735	}
736	dhour = 0.0;
737
738	t2a = (double **) psium[procid];
739	t2b = (double **) psim[procid][0];
740	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
741	t2a[0][0] = t2a[0][0] + t2b[0][0];
742	}
743	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
744	t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0];
745	}
746	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
747	t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1];
748	}
749	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
750	t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1];
751	}
752	if (gp[procid].neighbors[UP] == -1) {
753	t1a = (double *) t2a[0];
754	t1b = (double *) t2b[0];
755	for (j = firstcol; j <= lastcol; j++) {
756	t1a[j] = t1a[j] + t1b[j];
757	}
758	}
759	if (gp[procid].neighbors[DOWN] == -1) {
760	t1a = (double *) t2a[im - 1];
761	t1b = (double *) t2b[im - 1];
762	for (j = firstcol; j <= lastcol; j++) {
763	t1a[j] = t1a[j] + t1b[j];
764	}
765	}
766	if (gp[procid].neighbors[LEFT] == -1) {
767	for (j = firstrow; j <= lastrow; j++) {
768	t2a[j][0] = t2a[j][0] + t2b[j][0];
769	}
770	}
771	if (gp[procid].neighbors[RIGHT] == -1) {
772	for (j = firstrow; j <= lastrow; j++) {
773	t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1];
774	}
775	}
776	for (i = firstrow; i <= lastrow; i++) {
777	t1a = (double *) t2a[i];
778	t1b = (double *) t2b[i];
779	for (iindex = firstcol; iindex <= lastcol; iindex++) {
780	t1a[iindex] = t1a[iindex] + t1b[iindex];
781	}
782	}
783
784	/* update values of psilm array to psilm + psim[2] */
785
786	t2a = (double **) psilm[procid];
787	t2b = (double **) psim[procid][1];
788	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
789	t2a[0][0] = t2a[0][0] + t2b[0][0];
790	}
791	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {
792	t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0];
793	}
794	if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
795	t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1];
796	}
797	if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {
798	t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1];
799	}
800	if (gp[procid].neighbors[UP] == -1) {
801	t1a = (double *) t2a[0];
802	t1b = (double *) t2b[0];
803	for (j = firstcol; j <= lastcol; j++) {
804	t1a[j] = t1a[j] + t1b[j];
805	}
806	}
807	if (gp[procid].neighbors[DOWN] == -1) {
808	t1a = (double *) t2a[im - 1];
809	t1b = (double *) t2b[im - 1];
810	for (j = firstcol; j <= lastcol; j++) {
811	t1a[j] = t1a[j] + t1b[j];
812	}
813	}
814	if (gp[procid].neighbors[LEFT] == -1) {
815	for (j = firstrow; j <= lastrow; j++) {
816	t2a[j][0] = t2a[j][0] + t2b[j][0];
817	}
818	}
819	if (gp[procid].neighbors[RIGHT] == -1) {
820	for (j = firstrow; j <= lastrow; j++) {
821	t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1];
822	}
823	}
824	for (i = firstrow; i <= lastrow; i++) {
825	t1a = (double *) t2a[i];
826	t1b = (double *) t2b[i];
827	for (iindex = firstcol; iindex <= lastcol; iindex++) {
828	t1a[iindex] = t1a[iindex] + t1b[iindex];
829	}
830	}
831	if (iday >= (long) outday3) {
832	endflag = 1;
833	}
834	}
835	if ((procid == MASTER) \|\| (do_stats)) {
836	CLOCK(t1);
837	(gp[procid].total_time) = t1 - (gp[procid].total_time);
838	}
839	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: