Context Navigation

source: soft/giet_vm/applications/ocean/multi.C @ 743

Last change on this file since 743 was 598, checked in by guerin, 9 years ago
ocean: fix app broken by r589
File size: 23.4 KB

Line
1	/*************************************************************************/
2	/* */
3	/* Copyright (c) 1994 Stanford University */
4	/* */
5	/* All rights reserved. */
6	/* */
7	/* Permission is given to use, copy, and modify this software for any */
8	/* non-commercial purpose as long as this copyright notice is not */
9	/* removed. All other uses, including redistribution in whole or in */
10	/* part, are forbidden without prior written permission. */
11	/* */
12	/* This software is provided with absolutely no warranty and no */
13	/* support. */
14	/* */
15	/*************************************************************************/
16
17	/* Shared memory implementation of the multigrid method
18	Implementation uses red-black gauss-seidel relaxation
19	iterations, w cycles, and the method of half-injection for
20	residual computation. */
21
22	EXTERN_ENV
23
24	#include <stdio.h>
25	#include <math.h>
26	#include <stdlib.h>
27
28	#include "decs.h"
29
30	/* perform multigrid (w cycles) */
31	void multig(long my_id)
32	{
33	long iter;
34	double wu;
35	double errp;
36	long m;
37	long flag;
38	long k;
39	long my_num;
40	double wmax;
41	double local_err;
42	double red_local_err;
43	double black_local_err;
44	double g_error;
45
46	flag = 0;
47	iter = 0;
48	m = numlev - 1;
49	wmax = maxwork;
50	my_num = my_id;
51	wu = 0.0;
52
53	k = m;
54	g_error = 1.0e30;
55	while (!flag) {
56	errp = g_error;
57	iter++;
58	if (my_num == MASTER) {
59	multi->err_multi = 0.0;
60	}
61
62	/* barrier to make sure all procs have finished intadd or rescal */
63	/* before proceeding with relaxation */
64	#if defined(MULTIPLE_BARRIERS)
65	BARRIER(bars->error_barrier, nprocs)
66	#else
67	BARRIER(bars->barrier, nprocs)
68	#endif
69	copy_black(k, my_num);
70
71	relax(k, &red_local_err, RED_ITER, my_num);
72
73	/* barrier to make sure all red computations have been performed */
74	#if defined(MULTIPLE_BARRIERS)
75	BARRIER(bars->error_barrier, nprocs)
76	#else
77	BARRIER(bars->barrier, nprocs)
78	#endif
79	copy_red(k, my_num);
80
81	relax(k, &black_local_err, BLACK_ITER, my_num);
82
83	/* compute max local error from red_local_err and black_local_err */
84
85	if (red_local_err > black_local_err) {
86	local_err = red_local_err;
87	} else {
88	local_err = black_local_err;
89	}
90
91	/* update the global error if necessary */
92
93	LOCK(locks->error_lock)
94	if (local_err > multi->err_multi) {
95	multi->err_multi = local_err;
96	}
97	UNLOCK(locks->error_lock)
98
99	/* a single relaxation sweep at the finest level is one unit of */
100	/* work */
101	wu += pow((double) 4.0, (double) k - m);
102
103	/* barrier to make sure all processors have checked local error */
104	#if defined(MULTIPLE_BARRIERS)
105	BARRIER(bars->error_barrier, nprocs)
106	#else
107	BARRIER(bars->barrier, nprocs)
108	#endif
109	g_error = multi->err_multi;
110
111	/* barrier to make sure master does not cycle back to top of loop */
112	/* and reset global->err before we read it and decide what to do */
113	#if defined(MULTIPLE_BARRIERS)
114	BARRIER(bars->error_barrier, nprocs)
115	#else
116	BARRIER(bars->barrier, nprocs)
117	#endif
118	if (g_error >= lev_tol[k]) {
119	if (wu > wmax) {
120	/* max work exceeded */
121	fprintf(stderr, "ERROR: Maximum work limit %0.5f exceeded\n", wmax);
122	exit(-1);
123	} else {
124	/* if we have not converged */
125	if ((k != 0) && (g_error / errp >= 0.6) && (k > minlevel)) {
126	/* if need to go to coarser grid */
127
128	copy_borders(k, my_num);
129	copy_rhs_borders(k, my_num);
130
131	/* This bar is needed because the routine rescal uses the neighbor's
132	border points to compute s4. We must ensure that the neighbor's
133	border points have been written before we try computing the new
134	rescal values */
135
136	#if defined(MULTIPLE_BARRIERS)
137	BARRIER(bars->error_barrier, nprocs)
138	#else
139	BARRIER(bars->barrier, nprocs)
140	#endif
141	rescal(k, my_num);
142
143	/* transfer residual to rhs of coarser grid */
144	lev_tol[k - 1] = 0.3 * g_error;
145	k = k - 1;
146	putz(k, my_num);
147	/* make initial guess on coarser grid zero */
148	g_error = 1.0e30;
149	}
150	}
151	} else {
152	/* if we have converged at this level */
153	if (k == m) {
154	/* if finest grid, we are done */
155	flag = 1;
156	} else {
157	/* else go to next finest grid */
158
159	copy_borders(k, my_num);
160
161	intadd(k, my_num);
162	/* changes the grid values at the finer level. rhs at finer level */
163	/* remains what it already is */
164	k++;
165	g_error = 1.0e30;
166	}
167	}
168	}
169	if (do_output) {
170	if (my_num == MASTER) {
171	printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter, k, multi->err_multi, wu);
172	}
173	}
174	}
175
176	/* perform red or black iteration (not both) */
177	void relax(long k, double *err, long color, long my_num)
178	{
179	long i;
180	long j;
181	long iend;
182	long jend;
183	long oddistart;
184	long oddjstart;
185	long evenistart;
186	long evenjstart;
187	double a;
188	double h;
189	double factor;
190	double maxerr;
191	double newerr;
192	double oldval;
193	double newval;
194	double **t2a;
195	double **t2b;
196	double *t1a;
197	double *t1b;
198	double *t1c;
199	double *t1d;
200
201	i = 0;
202	j = 0;
203
204	*err = 0.0;
205	h = lev_res[k];
206
207	/* points whose sum of row and col index is even do a red iteration, */
208	/* others do a black */
209
210	evenistart = gp[my_num].eist[k];
211	evenjstart = gp[my_num].ejst[k];
212	oddistart = gp[my_num].oist[k];
213	oddjstart = gp[my_num].ojst[k];
214
215	iend = gp[my_num].rlien[k];
216	jend = gp[my_num].rljen[k];
217
218	factor = 4.0 - eig2 * h * h;
219	maxerr = 0.0;
220	t2a = (double **) q_multi[my_num][k];
221	t2b = (double **) rhs_multi[my_num][k];
222	if (color == RED_ITER) {
223	for (i = evenistart; i < iend; i += 2) {
224	t1a = (double *) t2a[i];
225	t1b = (double *) t2b[i];
226	t1c = (double *) t2a[i - 1];
227	t1d = (double *) t2a[i + 1];
228	for (j = evenjstart; j < jend; j += 2) {
229	a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];
230	oldval = t1a[j];
231	newval = a / factor;
232	newerr = oldval - newval;
233	t1a[j] = newval;
234	if (fabs(newerr) > maxerr) {
235	maxerr = fabs(newerr);
236	}
237	}
238	}
239	for (i = oddistart; i < iend; i += 2) {
240	t1a = (double *) t2a[i];
241	t1b = (double *) t2b[i];
242	t1c = (double *) t2a[i - 1];
243	t1d = (double *) t2a[i + 1];
244	for (j = oddjstart; j < jend; j += 2) {
245	a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];
246	oldval = t1a[j];
247	newval = a / factor;
248	newerr = oldval - newval;
249	t1a[j] = newval;
250	if (fabs(newerr) > maxerr) {
251	maxerr = fabs(newerr);
252	}
253	}
254	}
255	} else if (color == BLACK_ITER) {
256	for (i = evenistart; i < iend; i += 2) {
257	t1a = (double *) t2a[i];
258	t1b = (double *) t2b[i];
259	t1c = (double *) t2a[i - 1];
260	t1d = (double *) t2a[i + 1];
261	for (j = oddjstart; j < jend; j += 2) {
262	a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];
263	oldval = t1a[j];
264	newval = a / factor;
265	newerr = oldval - newval;
266	t1a[j] = newval;
267	if (fabs(newerr) > maxerr) {
268	maxerr = fabs(newerr);
269	}
270	}
271	}
272	for (i = oddistart; i < iend; i += 2) {
273	t1a = (double *) t2a[i];
274	t1b = (double *) t2b[i];
275	t1c = (double *) t2a[i - 1];
276	t1d = (double *) t2a[i + 1];
277	for (j = evenjstart; j < jend; j += 2) {
278	a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];
279	oldval = t1a[j];
280	newval = a / factor;
281	newerr = oldval - newval;
282	t1a[j] = newval;
283	if (fabs(newerr) > maxerr) {
284	maxerr = fabs(newerr);
285	}
286	}
287	}
288	}
289	*err = maxerr;
290	}
291
292	/* perform half-injection to next coarsest level */
293	void rescal(long kf, long my_num)
294	{
295	long ic;
296	long if17;
297	long jf;
298	long jc;
299	long krc;
300	long istart;
301	long iend;
302	long jstart;
303	long jend;
304	double hf;
305	double s;
306	double s1;
307	double s2;
308	double s3;
309	double s4;
310	double factor;
311	double int1;
312	double int2;
313	double i_int_factor;
314	double j_int_factor;
315	long i_off;
316	long j_off;
317	long up_proc;
318	long left_proc;
319	long im;
320	long jm;
321	double temp;
322	double temp2;
323	double **t2a;
324	double **t2b;
325	double **t2c;
326	double *t1a;
327	double *t1b;
328	double *t1c;
329	double *t1d;
330	double *t1e;
331	double *t1f;
332	double *t1g;
333	double *t1h;
334
335	krc = kf - 1;
336	//hc = lev_res[krc];
337	hf = lev_res[kf];
338	i_off = (gp[my_num].rownum) ypts_per_proc[krc];
339	j_off = (gp[my_num].colnum) xpts_per_proc[krc];
340	up_proc = gp[my_num].neighbors[UP];
341	left_proc = gp[my_num].neighbors[LEFT];
342	im = (imx[kf] - 2) / yprocs;
343	jm = (jmx[kf] - 2) / xprocs;
344
345	istart = gp[my_num].rlist[krc];
346	jstart = gp[my_num].rljst[krc];
347	iend = gp[my_num].rlien[krc] - 1;
348	jend = gp[my_num].rljen[krc] - 1;
349
350	factor = 4.0 - eig2 * hf * hf;
351
352	t2a = (double **) q_multi[my_num][kf];
353	t2b = (double **) rhs_multi[my_num][kf];
354	t2c = (double **) rhs_multi[my_num][krc];
355	if17 = 2 * (istart - 1);
356	for (ic = istart; ic <= iend; ic++) {
357	if17 += 2;
358	i_int_factor = (ic + i_off) * i_int_coeff[krc] * 0.5;
359	jf = 2 * (jstart - 1);
360	t1a = (double *) t2a[if17];
361	t1b = (double *) t2b[if17];
362	t1c = (double *) t2c[ic];
363	t1d = (double *) t2a[if17 - 1];
364	t1e = (double *) t2a[if17 + 1];
365	t1f = (double *) t2a[if17 - 2];
366	t1g = (double *) t2a[if17 - 3];
367	t1h = (double *) t2b[if17 - 2];
368	for (jc = jstart; jc <= jend; jc++) {
369	jf += 2;
370	j_int_factor = (jc + j_off) * j_int_coeff[krc] * 0.5;
371
372	/* method of half-injection uses 2.0 instead of 4.0 */
373
374	/* do bilinear interpolation */
375	s = t1a[jf + 1] + t1a[jf - 1] + t1d[jf] + t1e[jf];
376	s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]);
377	if (((if17 == 2) && (gp[my_num].neighbors[UP] == -1)) \|\| ((jf == 2) && (gp[my_num].neighbors[LEFT] == -1))) {
378	s2 = 0;
379	s3 = 0;
380	s4 = 0;
381	} else if ((if17 == 2) \|\| (jf == 2)) {
382	if (jf == 2) {
383	temp = q_multi[left_proc][kf][if17][jm - 1];
384	} else {
385	temp = t1a[jf - 3];
386	}
387	s = t1a[jf - 1] + temp + t1d[jf - 2] + t1e[jf - 2];
388	s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]);
389	if (if17 == 2) {
390	temp = q_multi[up_proc][kf][im - 1][jf];
391	} else {
392	temp = t1g[jf];
393	}
394	s = t1f[jf + 1] + t1f[jf - 1] + temp + t1d[jf];
395	s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);
396	if (jf == 2) {
397	temp = q_multi[left_proc][kf][if17 - 2][jm - 1];
398	} else {
399	temp = t1f[jf - 3];
400	}
401	if (if17 == 2) {
402	temp2 = q_multi[up_proc][kf][im - 1][jf - 2];
403	} else {
404	temp2 = t1g[jf - 2];
405	}
406	s = t1f[jf - 1] + temp + temp2 + t1d[jf - 2];
407	s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]);
408	} else {
409	s = t1a[jf - 1] + t1a[jf - 3] + t1d[jf - 2] + t1e[jf - 2];
410	s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]);
411	s = t1f[jf + 1] + t1f[jf - 1] + t1g[jf] + t1d[jf];
412	s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);
413	s = t1f[jf - 1] + t1f[jf - 3] + t1g[jf - 2] + t1d[jf - 2];
414	s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]);
415	}
416	int1 = j_int_factor * s4 + (1.0 - j_int_factor) * s3;
417	int2 = j_int_factor * s2 + (1.0 - j_int_factor) * s1;
418	//int_val = i_int_factorint1+(1.0-i_int_factor)int2;
419	t1c[jc] = i_int_factor * int1 + (1.0 - i_int_factor) * int2;
420	}
421	}
422	}
423
424	/* perform interpolation and addition to next finest grid */
425	void intadd(long kc, long my_num)
426	{
427	long ic;
428	long if17;
429	long jf;
430	long jc;
431	long kf;
432	long istart;
433	long jstart;
434	long iend;
435	long jend;
436	double int1;
437	double int2;
438	double i_int_factor1;
439	double j_int_factor1;
440	double i_int_factor2;
441	double j_int_factor2;
442	long i_off;
443	long j_off;
444	double **t2a;
445	double **t2b;
446	double *t1a;
447	double *t1b;
448	double *t1c;
449	double *t1d;
450	double *t1e;
451
452	kf = kc + 1;
453	//hc = lev_res[kc];
454	//hf = lev_res[kf];
455
456	istart = gp[my_num].rlist[kc];
457	jstart = gp[my_num].rljst[kc];
458	iend = gp[my_num].rlien[kc] - 1;
459	jend = gp[my_num].rljen[kc] - 1;
460	i_off = (gp[my_num].rownum) ypts_per_proc[kc];
461	j_off = (gp[my_num].colnum) xpts_per_proc[kc];
462
463	t2a = (double **) q_multi[my_num][kc];
464	t2b = (double **) q_multi[my_num][kf];
465	if17 = 2 * (istart - 1);
466	for (ic = istart; ic <= iend; ic++) {
467	if17 += 2;
468	i_int_factor1 = ((imx[kc] - 2) - (ic + i_off - 1)) * (i_int_coeff[kf]);
469	i_int_factor2 = (ic + i_off) * i_int_coeff[kf];
470	jf = 2 * (jstart - 1);
471
472	t1a = (double *) t2a[ic];
473	t1b = (double *) t2a[ic - 1];
474	t1c = (double *) t2a[ic + 1];
475	t1d = (double *) t2b[if17];
476	t1e = (double *) t2b[if17 - 1];
477	for (jc = jstart; jc <= jend; jc++) {
478	jf += 2;
479	j_int_factor1 = ((jmx[kc] - 2) - (jc + j_off - 1)) * (j_int_coeff[kf]);
480	j_int_factor2 = (jc + j_off) * j_int_coeff[kf];
481
482	int1 = j_int_factor1 * t1a[jc - 1] + (1.0 - j_int_factor1) * t1a[jc];
483	int2 = j_int_factor1 * t1b[jc - 1] + (1.0 - j_int_factor1) * t1b[jc];
484	t1e[jf - 1] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1;
485	int2 = j_int_factor1 * t1c[jc - 1] + (1.0 - j_int_factor1) * t1c[jc];
486	t1d[jf - 1] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1;
487	int1 = j_int_factor2 * t1a[jc + 1] + (1.0 - j_int_factor2) * t1a[jc];
488	int2 = j_int_factor2 * t1b[jc + 1] + (1.0 - j_int_factor2) * t1b[jc];
489	t1e[jf] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1;
490	int2 = j_int_factor2 * t1c[jc + 1] + (1.0 - j_int_factor2) * t1c[jc];
491	t1d[jf] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1;
492	}
493	}
494	}
495
496	/* initialize a grid to zero in parallel */
497	void putz(long k, long my_num)
498	{
499	long i;
500	long j;
501	long istart;
502	long jstart;
503	long iend;
504	long jend;
505	double **t2a;
506	double *t1a;
507
508	istart = gp[my_num].rlist[k];
509	jstart = gp[my_num].rljst[k];
510	iend = gp[my_num].rlien[k];
511	jend = gp[my_num].rljen[k];
512
513	t2a = (double **) q_multi[my_num][k];
514	for (i = istart; i <= iend; i++) {
515	t1a = (double *) t2a[i];
516	for (j = jstart; j <= jend; j++) {
517	t1a[j] = 0.0;
518	}
519	}
520	}
521
522	void copy_borders(long k, long pid)
523	{
524	long i;
525	long j;
526	long jj;
527	long im;
528	long jm;
529	long lastrow;
530	long lastcol;
531	double **t2a;
532	double **t2b;
533	double *t1a;
534	double *t1b;
535
536	im = (imx[k] - 2) / yprocs + 2;
537	jm = (jmx[k] - 2) / xprocs + 2;
538	lastrow = (imx[k] - 2) / yprocs;
539	lastcol = (jmx[k] - 2) / xprocs;
540
541	t2a = (double **) q_multi[pid][k];
542	jj = gp[pid].neighbors[UPLEFT];
543	if (jj != -1) {
544	t2a[0][0] = q_multi[jj][k][im - 2][jm - 2];
545	}
546	jj = gp[pid].neighbors[UPRIGHT];
547	if (jj != -1) {
548	t2a[0][jm - 1] = q_multi[jj][k][im - 2][1];
549	}
550	jj = gp[pid].neighbors[DOWNLEFT];
551	if (jj != -1) {
552	t2a[im - 1][0] = q_multi[jj][k][1][jm - 2];
553	}
554	jj = gp[pid].neighbors[DOWNRIGHT];
555	if (jj != -1) {
556	t2a[im - 1][jm - 1] = q_multi[jj][k][1][1];
557	}
558
559	if (gp[pid].neighbors[UP] == -1) {
560	jj = gp[pid].neighbors[LEFT];
561	if (jj != -1) {
562	t2a[0][0] = q_multi[jj][k][0][jm - 2];
563	} else {
564	jj = gp[pid].neighbors[DOWN];
565	if (jj != -1) {
566	t2a[im - 1][0] = q_multi[jj][k][1][0];
567	}
568	}
569	jj = gp[pid].neighbors[RIGHT];
570	if (jj != -1) {
571	t2a[0][jm - 1] = q_multi[jj][k][0][1];
572	} else {
573	jj = gp[pid].neighbors[DOWN];
574	if (jj != -1) {
575	t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1];
576	}
577	}
578	} else if (gp[pid].neighbors[DOWN] == -1) {
579	jj = gp[pid].neighbors[LEFT];
580	if (jj != -1) {
581	t2a[im - 1][0] = q_multi[jj][k][im - 1][jm - 2];
582	} else {
583	jj = gp[pid].neighbors[UP];
584	if (jj != -1) {
585	t2a[0][0] = q_multi[jj][k][im - 2][0];
586	}
587	}
588	jj = gp[pid].neighbors[RIGHT];
589	if (jj != -1) {
590	t2a[im - 1][jm - 1] = q_multi[jj][k][im - 1][1];
591	} else {
592	jj = gp[pid].neighbors[UP];
593	if (jj != -1) {
594	t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1];
595	}
596	}
597	} else if (gp[pid].neighbors[LEFT] == -1) {
598	jj = gp[pid].neighbors[UP];
599	if (jj != -1) {
600	t2a[0][0] = q_multi[jj][k][im - 2][0];
601	}
602	jj = gp[pid].neighbors[DOWN];
603	if (jj != -1) {
604	t2a[im - 1][0] = q_multi[jj][k][1][0];
605	}
606	} else if (gp[pid].neighbors[RIGHT] == -1) {
607	jj = gp[pid].neighbors[UP];
608	if (jj != -1) {
609	t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1];
610	}
611	jj = gp[pid].neighbors[DOWN];
612	if (jj != -1) {
613	t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1];
614	}
615	}
616
617	j = gp[pid].neighbors[UP];
618	if (j != -1) {
619	t1a = (double *) t2a[0];
620	t1b = (double *) q_multi[j][k][im - 2];
621	for (i = 1; i <= lastcol; i++) {
622	t1a[i] = t1b[i];
623	}
624	}
625	j = gp[pid].neighbors[DOWN];
626	if (j != -1) {
627	t1a = (double *) t2a[im - 1];
628	t1b = (double *) q_multi[j][k][1];
629	for (i = 1; i <= lastcol; i++) {
630	t1a[i] = t1b[i];
631	}
632	}
633	j = gp[pid].neighbors[LEFT];
634	if (j != -1) {
635	t2b = (double **) q_multi[j][k];
636	for (i = 1; i <= lastrow; i++) {
637	t2a[i][0] = t2b[i][jm - 2];
638	}
639	}
640	j = gp[pid].neighbors[RIGHT];
641	if (j != -1) {
642	t2b = (double **) q_multi[j][k];
643	for (i = 1; i <= lastrow; i++) {
644	t2a[i][jm - 1] = t2b[i][1];
645	}
646	}
647
648	}
649
650	void copy_rhs_borders(long k, long procid)
651	{
652	long i;
653	long j;
654	long im;
655	long jm;
656	long lastrow;
657	long lastcol;
658	double **t2a;
659	double **t2b;
660	double *t1a;
661	double *t1b;
662
663	im = (imx[k] - 2) / yprocs + 2;
664	jm = (jmx[k] - 2) / xprocs + 2;
665	lastrow = (imx[k] - 2) / yprocs;
666	lastcol = (jmx[k] - 2) / xprocs;
667
668	t2a = (double **) rhs_multi[procid][k];
669	if (gp[procid].neighbors[UPLEFT] != -1) {
670	j = gp[procid].neighbors[UPLEFT];
671	t2a[0][0] = rhs_multi[j][k][im - 2][jm - 2];
672	}
673
674	if (gp[procid].neighbors[UP] != -1) {
675	j = gp[procid].neighbors[UP];
676	if (j != -1) {
677	t1a = (double *) t2a[0];
678	t1b = (double *) rhs_multi[j][k][im - 2];
679	for (i = 2; i <= lastcol; i += 2) {
680	t1a[i] = t1b[i];
681	}
682	}
683	}
684	if (gp[procid].neighbors[LEFT] != -1) {
685	j = gp[procid].neighbors[LEFT];
686	if (j != -1) {
687	t2b = (double **) rhs_multi[j][k];
688	for (i = 2; i <= lastrow; i += 2) {
689	t2a[i][0] = t2b[i][jm - 2];
690	}
691	}
692	}
693	}
694
695	void copy_red(long k, long procid)
696	{
697	long i;
698	long j;
699	long im;
700	long jm;
701	long lastrow;
702	long lastcol;
703	double **t2a;
704	double **t2b;
705	double *t1a;
706	double *t1b;
707
708	im = (imx[k] - 2) / yprocs + 2;
709	jm = (jmx[k] - 2) / xprocs + 2;
710	lastrow = (imx[k] - 2) / yprocs;
711	lastcol = (jmx[k] - 2) / xprocs;
712
713	t2a = (double **) q_multi[procid][k];
714	j = gp[procid].neighbors[UP];
715	if (j != -1) {
716	t1a = (double *) t2a[0];
717	t1b = (double *) q_multi[j][k][im - 2];
718	for (i = 2; i <= lastcol; i += 2) {
719	t1a[i] = t1b[i];
720	}
721	}
722	j = gp[procid].neighbors[DOWN];
723	if (j != -1) {
724	t1a = (double *) t2a[im - 1];
725	t1b = (double *) q_multi[j][k][1];
726	for (i = 1; i <= lastcol; i += 2) {
727	t1a[i] = t1b[i];
728	}
729	}
730	j = gp[procid].neighbors[LEFT];
731	if (j != -1) {
732	t2b = (double **) q_multi[j][k];
733	for (i = 2; i <= lastrow; i += 2) {
734	t2a[i][0] = t2b[i][jm - 2];
735	}
736	}
737	j = gp[procid].neighbors[RIGHT];
738	if (j != -1) {
739	t2b = (double **) q_multi[j][k];
740	for (i = 1; i <= lastrow; i += 2) {
741	t2a[i][jm - 1] = t2b[i][1];
742	}
743	}
744	}
745
746	void copy_black(long k, long procid)
747	{
748	long i;
749	long j;
750	long im;
751	long jm;
752	long lastrow;
753	long lastcol;
754	double **t2a;
755	double **t2b;
756	double *t1a;
757	double *t1b;
758
759	im = (imx[k] - 2) / yprocs + 2;
760	jm = (jmx[k] - 2) / xprocs + 2;
761	lastrow = (imx[k] - 2) / yprocs;
762	lastcol = (jmx[k] - 2) / xprocs;
763
764	t2a = (double **) q_multi[procid][k];
765	j = gp[procid].neighbors[UP];
766	if (j != -1) {
767	t1a = (double *) t2a[0];
768	t1b = (double *) q_multi[j][k][im - 2];
769	for (i = 1; i <= lastcol; i += 2) {
770	t1a[i] = t1b[i];
771	}
772	}
773	j = gp[procid].neighbors[DOWN];
774	if (j != -1) {
775	t1a = (double *) t2a[im - 1];
776	t1b = (double *) q_multi[j][k][1];
777	for (i = 2; i <= lastcol; i += 2) {
778	t1a[i] = t1b[i];
779	}
780	}
781	j = gp[procid].neighbors[LEFT];
782	if (j != -1) {
783	t2b = (double **) q_multi[j][k];
784	for (i = 1; i <= lastrow; i += 2) {
785	t2a[i][0] = t2b[i][jm - 2];
786	}
787	}
788	j = gp[procid].neighbors[RIGHT];
789	if (j != -1) {
790	t2b = (double **) q_multi[j][k];
791	for (i = 2; i <= lastrow; i += 2) {
792	t2a[i][jm - 1] = t2b[i][1];
793	}
794	}
795	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: