Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

multi.c @ 812

Last change on this file since 812 was 799, checked in by alain, 9 years ago
Introducing a port of ocean application without macros.
File size: 24.1 KB

Rev	Line
[799]	1	/*************************************************************************/
	2	/* */
	3	/* Copyright (c) 1994 Stanford University */
	4	/* */
	5	/* All rights reserved. */
	6	/* */
	7	/* Permission is given to use, copy, and modify this software for any */
	8	/* non-commercial purpose as long as this copyright notice is not */
	9	/* removed. All other uses, including redistribution in whole or in */
	10	/* part, are forbidden without prior written permission. */
	11	/* */
	12	/* This software is provided with absolutely no warranty and no */
	13	/* support. */
	14	/* */
	15	/*************************************************************************/
	16
	17	/* Shared memory implementation of the multigrid method
	18	Implementation uses red-black gauss-seidel relaxation
	19	iterations, w cycles, and the method of half-injection for
	20	residual computation. */
	21
	22
	23	#include <stdio.h>
	24	#include <stdlib.h>
	25	#include <math.h>
	26
	27	#include "decs.h"
	28
	29	////////////////////////
	30	void multig(long procid)
	31	{
	32	long iter;
	33	double wu;
	34	double errp;
	35	long m;
	36	long flag;
	37	long k;
	38	double wmax;
	39	double local_err;
	40	double red_local_err;
	41	double black_local_err;
	42	double g_error;
	43	long barrier_start;
	44
	45	flag = 0;
	46	iter = 0;
	47	m = numlev - 1;
	48	wmax = maxwork;
	49	wu = 0.0;
	50
	51	k = m;
	52	g_error = 1.0e30;
	53	while (!flag)
	54	{
	55	errp = g_error;
	56	iter++;
	57	if (procid == MASTER)
	58	{
	59	multi->err_multi = 0.0;
	60	}
	61
	62	/* barrier to make sure all procs have finished intadd or rescal */
	63	/* before proceeding with relaxation */
	64
	65	// BARRIER
	66	barrier_start = giet_proctime();
	67	sqt_barrier_wait( &barrier );
	68	gps[procid]->sync_time += (giet_proctime() - barrier_start);
	69
	70	copy_black(k, procid);
	71
	72	relax(k, &red_local_err, RED_ITER, procid);
	73
	74	/* barrier to make sure all red computations have been performed */
	75
	76	// BARRIER
	77	barrier_start = giet_proctime();
	78	sqt_barrier_wait( &barrier );
	79	gps[procid]->sync_time += (giet_proctime() - barrier_start);
	80
	81	copy_red(k, procid);
	82
	83	relax(k, &black_local_err, BLACK_ITER, procid);
	84
	85	/* compute max local error from red_local_err and black_local_err */
	86
	87	if (red_local_err > black_local_err) local_err = red_local_err;
	88	else local_err = black_local_err;
	89
	90	/* update the global error if necessary */
	91
	92	sqt_lock_acquire( &error_lock );
	93	if (local_err > multi->err_multi) multi->err_multi = local_err;
	94	sqt_lock_release( &error_lock );
	95
	96	/* a single relaxation sweep at the finest level is one unit of */
	97	/* work */
	98	wu += pow((double) 4.0, (double) k - m);
	99
	100	/* barrier to make sure all processors have checked local error */
	101
	102	// BARRIER
	103	barrier_start = giet_proctime();
	104	sqt_barrier_wait( &barrier );
	105	gps[procid]->sync_time += (giet_proctime() - barrier_start);
	106
	107	g_error = multi->err_multi;
	108
	109	/* barrier to make sure master does not cycle back to top of loop */
	110	/* and reset global->err before we read it and decide what to do */
	111
	112	// BARRIER
	113	barrier_start = giet_proctime();
	114	sqt_barrier_wait( &barrier );
	115	gps[procid]->sync_time += (giet_proctime() - barrier_start);
	116
	117	if (g_error >= lev_tol[k])
	118	{
	119	if (wu > wmax)
	120	{
	121	/* max work exceeded */
	122	giet_pthread_exit("[OCEAN ERROR] work overflow in multig()");
	123	}
	124	else
	125	{
	126	/* if we have not converged */
	127	if ((k != 0) && (g_error / errp >= 0.6) && (k > minlevel))
	128	{
	129	/* if need to go to coarser grid */
	130
	131	copy_borders(k, procid);
	132	copy_rhs_borders(k, procid);
	133
	134	/* This bar is needed because the routine rescal uses the neighbor's
	135	border points to compute s4. We must ensure that the neighbor's
	136	border points have been written before we try computing the new
	137	rescal values */
	138
	139	// BARRIER
	140	barrier_start = giet_proctime();
	141	sqt_barrier_wait( &barrier );
	142	gps[procid]->sync_time += (giet_proctime() - barrier_start);
	143
	144	rescal(k, procid);
	145
	146	/* transfer residual to rhs of coarser grid */
	147	lev_tol[k - 1] = 0.3 * g_error;
	148	k = k - 1;
	149	putz(k, procid);
	150	/* make initial guess on coarser grid zero */
	151	g_error = 1.0e30;
	152	}
	153	}
	154	}
	155	else
	156	{
	157	/* if we have converged at this level */
	158	if (k == m)
	159	{
	160	/* if finest grid, we are done */
	161	flag = 1;
	162	}
	163	else
	164	{
	165	/* else go to next finest grid */
	166
	167	copy_borders(k, procid);
	168
	169	intadd(k, procid);
	170	/* changes the grid values at the finer level. rhs at finer level */
	171	/* remains what it already is */
	172	k++;
	173	g_error = 1.0e30;
	174	}
	175	}
	176	}
	177	} // end multig()
	178
	179	////////////////////////////////////////////////////////
	180	// perform red or black iteration (not both)
	181	////////////////////////////////////////////////////////
	182	void relax(long k, double *err, long color, long procid)
	183	{
	184	long i;
	185	long j;
	186	long iend;
	187	long jend;
	188	long oddistart;
	189	long oddjstart;
	190	long evenistart;
	191	long evenjstart;
	192	double a;
	193	double h;
	194	double factor;
	195	double maxerr;
	196	double newerr;
	197	double oldval;
	198	double newval;
	199	double **t2a;
	200	double **t2b;
	201	double *t1a;
	202	double *t1b;
	203	double *t1c;
	204	double *t1d;
	205
	206	i = 0;
	207	j = 0;
	208
	209	*err = 0.0;
	210	h = lev_res[k];
	211
	212	/* points whose sum of row and col index is even do a red iteration, */
	213	/* others do a black */
	214
	215	evenistart = gps[procid]->eist[k];
	216	evenjstart = gps[procid]->ejst[k];
	217	oddistart = gps[procid]->oist[k];
	218	oddjstart = gps[procid]->ojst[k];
	219
	220	iend = gps[procid]->rlien[k];
	221	jend = gps[procid]->rljen[k];
	222
	223	factor = 4.0 - eig2 * h * h;
	224	maxerr = 0.0;
	225	t2a = (double **) q_multi[procid][k];
	226	t2b = (double **) rhs_multi[procid][k];
	227	if (color == RED_ITER) {
	228	for (i = evenistart; i < iend; i += 2) {
	229	t1a = (double *) t2a[i];
	230	t1b = (double *) t2b[i];
	231	t1c = (double *) t2a[i - 1];
	232	t1d = (double *) t2a[i + 1];
	233	for (j = evenjstart; j < jend; j += 2) {
	234	a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];
	235	oldval = t1a[j];
	236	newval = a / factor;
	237	newerr = oldval - newval;
	238	t1a[j] = newval;
	239	if (fabs(newerr) > maxerr) {
	240	maxerr = fabs(newerr);
	241	}
	242	}
	243	}
	244	for (i = oddistart; i < iend; i += 2) {
	245	t1a = (double *) t2a[i];
	246	t1b = (double *) t2b[i];
	247	t1c = (double *) t2a[i - 1];
	248	t1d = (double *) t2a[i + 1];
	249	for (j = oddjstart; j < jend; j += 2) {
	250	a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];
	251	oldval = t1a[j];
	252	newval = a / factor;
	253	newerr = oldval - newval;
	254	t1a[j] = newval;
	255	if (fabs(newerr) > maxerr) {
	256	maxerr = fabs(newerr);
	257	}
	258	}
	259	}
	260	} else if (color == BLACK_ITER) {
	261	for (i = evenistart; i < iend; i += 2) {
	262	t1a = (double *) t2a[i];
	263	t1b = (double *) t2b[i];
	264	t1c = (double *) t2a[i - 1];
	265	t1d = (double *) t2a[i + 1];
	266	for (j = oddjstart; j < jend; j += 2) {
	267	a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];
	268	oldval = t1a[j];
	269	newval = a / factor;
	270	newerr = oldval - newval;
	271	t1a[j] = newval;
	272	if (fabs(newerr) > maxerr) {
	273	maxerr = fabs(newerr);
	274	}
	275	}
	276	}
	277	for (i = oddistart; i < iend; i += 2) {
	278	t1a = (double *) t2a[i];
	279	t1b = (double *) t2b[i];
	280	t1c = (double *) t2a[i - 1];
	281	t1d = (double *) t2a[i + 1];
	282	for (j = evenjstart; j < jend; j += 2) {
	283	a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];
	284	oldval = t1a[j];
	285	newval = a / factor;
	286	newerr = oldval - newval;
	287	t1a[j] = newval;
	288	if (fabs(newerr) > maxerr) {
	289	maxerr = fabs(newerr);
	290	}
	291	}
	292	}
	293	}
	294	*err = maxerr;
	295	} // end relax()
	296
	297	////////////////////////////////////////////////////
	298	// perform half-injection to next coarsest level
	299	////////////////////////////////////////////////////
	300	void rescal(long kf, long procid)
	301	{
	302	long ic;
	303	long if17;
	304	long jf;
	305	long jc;
	306	long krc;
	307	long istart;
	308	long iend;
	309	long jstart;
	310	long jend;
	311	double hf;
	312	double s;
	313	double s1;
	314	double s2;
	315	double s3;
	316	double s4;
	317	double factor;
	318	double int1;
	319	double int2;
	320	double i_int_factor;
	321	double j_int_factor;
	322	long i_off;
	323	long j_off;
	324	long up_proc;
	325	long left_proc;
	326	long im;
	327	long jm;
	328	double temp;
	329	double temp2;
	330	double **t2a;
	331	double **t2b;
	332	double **t2c;
	333	double *t1a;
	334	double *t1b;
	335	double *t1c;
	336	double *t1d;
	337	double *t1e;
	338	double *t1f;
	339	double *t1g;
	340	double *t1h;
	341
	342	krc = kf - 1;
	343	//hc = lev_res[krc];
	344	hf = lev_res[kf];
	345	i_off = gps[procid]->rownum * ypts_per_proc[krc];
	346	j_off = gps[procid]->colnum * xpts_per_proc[krc];
	347	up_proc = gps[procid]->neighbors[UP];
	348	left_proc = gps[procid]->neighbors[LEFT];
	349	im = (imx[kf] - 2) / yprocs;
	350	jm = (jmx[kf] - 2) / xprocs;
	351
	352	istart = gps[procid]->rlist[krc];
	353	jstart = gps[procid]->rljst[krc];
	354	iend = gps[procid]->rlien[krc] - 1;
	355	jend = gps[procid]->rljen[krc] - 1;
	356
	357	factor = 4.0 - eig2 * hf * hf;
	358
	359	t2a = (double **) q_multi[procid][kf];
	360	t2b = (double **) rhs_multi[procid][kf];
	361	t2c = (double **) rhs_multi[procid][krc];
	362	if17 = 2 * (istart - 1);
	363	for (ic = istart; ic <= iend; ic++) {
	364	if17 += 2;
	365	i_int_factor = (ic + i_off) * i_int_coeff[krc] * 0.5;
	366	jf = 2 * (jstart - 1);
	367	t1a = (double *) t2a[if17];
	368	t1b = (double *) t2b[if17];
	369	t1c = (double *) t2c[ic];
	370	t1d = (double *) t2a[if17 - 1];
	371	t1e = (double *) t2a[if17 + 1];
	372	t1f = (double *) t2a[if17 - 2];
	373	t1g = (double *) t2a[if17 - 3];
	374	t1h = (double *) t2b[if17 - 2];
	375	for (jc = jstart; jc <= jend; jc++) {
	376	jf += 2;
	377	j_int_factor = (jc + j_off) * j_int_coeff[krc] * 0.5;
	378
	379	/* method of half-injection uses 2.0 instead of 4.0 */
	380
	381	/* do bilinear interpolation */
	382	s = t1a[jf + 1] + t1a[jf - 1] + t1d[jf] + t1e[jf];
	383	s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]);
	384	if (((if17 == 2) && (gps[procid]->neighbors[UP] == -1)) \|\| ((jf == 2) && (gps[procid]->neighbors[LEFT] == -1))) {
	385	s2 = 0;
	386	s3 = 0;
	387	s4 = 0;
	388	} else if ((if17 == 2) \|\| (jf == 2)) {
	389	if (jf == 2) {
	390	temp = q_multi[left_proc][kf][if17][jm - 1];
	391	} else {
	392	temp = t1a[jf - 3];
	393	}
	394	s = t1a[jf - 1] + temp + t1d[jf - 2] + t1e[jf - 2];
	395	s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]);
	396	if (if17 == 2) {
	397	temp = q_multi[up_proc][kf][im - 1][jf];
	398	} else {
	399	temp = t1g[jf];
	400	}
	401	s = t1f[jf + 1] + t1f[jf - 1] + temp + t1d[jf];
	402	s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);
	403	if (jf == 2) {
	404	temp = q_multi[left_proc][kf][if17 - 2][jm - 1];
	405	} else {
	406	temp = t1f[jf - 3];
	407	}
	408	if (if17 == 2) {
	409	temp2 = q_multi[up_proc][kf][im - 1][jf - 2];
	410	} else {
	411	temp2 = t1g[jf - 2];
	412	}
	413	s = t1f[jf - 1] + temp + temp2 + t1d[jf - 2];
	414	s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]);
	415	} else {
	416	s = t1a[jf - 1] + t1a[jf - 3] + t1d[jf - 2] + t1e[jf - 2];
	417	s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]);
	418	s = t1f[jf + 1] + t1f[jf - 1] + t1g[jf] + t1d[jf];
	419	s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);
	420	s = t1f[jf - 1] + t1f[jf - 3] + t1g[jf - 2] + t1d[jf - 2];
	421	s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]);
	422	}
	423	int1 = j_int_factor * s4 + (1.0 - j_int_factor) * s3;
	424	int2 = j_int_factor * s2 + (1.0 - j_int_factor) * s1;
	425	//int_val = i_int_factorint1+(1.0-i_int_factor)int2;
	426	t1c[jc] = i_int_factor * int1 + (1.0 - i_int_factor) * int2;
	427	}
	428	}
	429	}
	430
	431	/* perform interpolation and addition to next finest grid */
	432	void intadd(long kc, long procid)
	433	{
	434	long ic;
	435	long if17;
	436	long jf;
	437	long jc;
	438	long kf;
	439	long istart;
	440	long jstart;
	441	long iend;
	442	long jend;
	443	double int1;
	444	double int2;
	445	double i_int_factor1;
	446	double j_int_factor1;
	447	double i_int_factor2;
	448	double j_int_factor2;
	449	long i_off;
	450	long j_off;
	451	double **t2a;
	452	double **t2b;
	453	double *t1a;
	454	double *t1b;
	455	double *t1c;
	456	double *t1d;
	457	double *t1e;
	458
	459	kf = kc + 1;
	460	//hc = lev_res[kc];
	461	//hf = lev_res[kf];
	462
	463	istart = gps[procid]->rlist[kc];
	464	jstart = gps[procid]->rljst[kc];
	465	iend = gps[procid]->rlien[kc] - 1;
	466	jend = gps[procid]->rljen[kc] - 1;
	467	i_off = gps[procid]->rownum * ypts_per_proc[kc];
	468	j_off = gps[procid]->colnum * xpts_per_proc[kc];
	469
	470	t2a = (double **) q_multi[procid][kc];
	471	t2b = (double **) q_multi[procid][kf];
	472	if17 = 2 * (istart - 1);
	473	for (ic = istart; ic <= iend; ic++) {
	474	if17 += 2;
	475	i_int_factor1 = ((imx[kc] - 2) - (ic + i_off - 1)) * (i_int_coeff[kf]);
	476	i_int_factor2 = (ic + i_off) * i_int_coeff[kf];
	477	jf = 2 * (jstart - 1);
	478
	479	t1a = (double *) t2a[ic];
	480	t1b = (double *) t2a[ic - 1];
	481	t1c = (double *) t2a[ic + 1];
	482	t1d = (double *) t2b[if17];
	483	t1e = (double *) t2b[if17 - 1];
	484	for (jc = jstart; jc <= jend; jc++) {
	485	jf += 2;
	486	j_int_factor1 = ((jmx[kc] - 2) - (jc + j_off - 1)) * (j_int_coeff[kf]);
	487	j_int_factor2 = (jc + j_off) * j_int_coeff[kf];
	488
	489	int1 = j_int_factor1 * t1a[jc - 1] + (1.0 - j_int_factor1) * t1a[jc];
	490	int2 = j_int_factor1 * t1b[jc - 1] + (1.0 - j_int_factor1) * t1b[jc];
	491	t1e[jf - 1] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1;
	492	int2 = j_int_factor1 * t1c[jc - 1] + (1.0 - j_int_factor1) * t1c[jc];
	493	t1d[jf - 1] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1;
	494	int1 = j_int_factor2 * t1a[jc + 1] + (1.0 - j_int_factor2) * t1a[jc];
	495	int2 = j_int_factor2 * t1b[jc + 1] + (1.0 - j_int_factor2) * t1b[jc];
	496	t1e[jf] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1;
	497	int2 = j_int_factor2 * t1c[jc + 1] + (1.0 - j_int_factor2) * t1c[jc];
	498	t1d[jf] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1;
	499	}
	500	}
	501	}
	502
	503	////////////////////////////////////////////
	504	// initialize a grid to zero in parallel
	505	////////////////////////////////////////////
	506	void putz(long k, long procid)
	507	{
	508	long i;
	509	long j;
	510	long istart;
	511	long jstart;
	512	long iend;
	513	long jend;
	514	double **t2a;
	515	double *t1a;
	516
	517	istart = gps[procid]->rlist[k];
	518	jstart = gps[procid]->rljst[k];
	519	iend = gps[procid]->rlien[k];
	520	jend = gps[procid]->rljen[k];
	521
	522	t2a = (double **) q_multi[procid][k];
	523	for (i = istart; i <= iend; i++)
	524	{
	525	t1a = (double *) t2a[i];
	526	for (j = jstart; j <= jend; j++)
	527	{
	528	t1a[j] = 0.0;
	529	}
	530	}
	531	}
	532
	533	//////////////////////////////////////
	534	void copy_borders(long k, long procid)
	535	{
	536	long i;
	537	long j;
	538	long jj;
	539	long im;
	540	long jm;
	541	long lastrow;
	542	long lastcol;
	543	double **t2a;
	544	double **t2b;
	545	double *t1a;
	546	double *t1b;
	547
	548	im = (imx[k] - 2) / yprocs + 2;
	549	jm = (jmx[k] - 2) / xprocs + 2;
	550	lastrow = (imx[k] - 2) / yprocs;
	551	lastcol = (jmx[k] - 2) / xprocs;
	552
	553	t2a = (double **) q_multi[procid][k];
	554	jj = gps[procid]->neighbors[UPLEFT];
	555	if (jj != -1) {
	556	t2a[0][0] = q_multi[jj][k][im - 2][jm - 2];
	557	}
	558	jj = gps[procid]->neighbors[UPRIGHT];
	559	if (jj != -1) {
	560	t2a[0][jm - 1] = q_multi[jj][k][im - 2][1];
	561	}
	562	jj = gps[procid]->neighbors[DOWNLEFT];
	563	if (jj != -1) {
	564	t2a[im - 1][0] = q_multi[jj][k][1][jm - 2];
	565	}
	566	jj = gps[procid]->neighbors[DOWNRIGHT];
	567	if (jj != -1) {
	568	t2a[im - 1][jm - 1] = q_multi[jj][k][1][1];
	569	}
	570
	571	if (gps[procid]->neighbors[UP] == -1) {
	572	jj = gps[procid]->neighbors[LEFT];
	573	if (jj != -1) {
	574	t2a[0][0] = q_multi[jj][k][0][jm - 2];
	575	} else {
	576	jj = gps[procid]->neighbors[DOWN];
	577	if (jj != -1) {
	578	t2a[im - 1][0] = q_multi[jj][k][1][0];
	579	}
	580	}
	581	jj = gps[procid]->neighbors[RIGHT];
	582	if (jj != -1) {
	583	t2a[0][jm - 1] = q_multi[jj][k][0][1];
	584	} else {
	585	jj = gps[procid]->neighbors[DOWN];
	586	if (jj != -1) {
	587	t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1];
	588	}
	589	}
	590	} else if (gps[procid]->neighbors[DOWN] == -1) {
	591	jj = gps[procid]->neighbors[LEFT];
	592	if (jj != -1) {
	593	t2a[im - 1][0] = q_multi[jj][k][im - 1][jm - 2];
	594	} else {
	595	jj = gps[procid]->neighbors[UP];
	596	if (jj != -1) {
	597	t2a[0][0] = q_multi[jj][k][im - 2][0];
	598	}
	599	}
	600	jj = gps[procid]->neighbors[RIGHT];
	601	if (jj != -1) {
	602	t2a[im - 1][jm - 1] = q_multi[jj][k][im - 1][1];
	603	} else {
	604	jj = gps[procid]->neighbors[UP];
	605	if (jj != -1) {
	606	t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1];
	607	}
	608	}
	609	} else if (gps[procid]->neighbors[LEFT] == -1) {
	610	jj = gps[procid]->neighbors[UP];
	611	if (jj != -1) {
	612	t2a[0][0] = q_multi[jj][k][im - 2][0];
	613	}
	614	jj = gps[procid]->neighbors[DOWN];
	615	if (jj != -1) {
	616	t2a[im - 1][0] = q_multi[jj][k][1][0];
	617	}
	618	} else if (gps[procid]->neighbors[RIGHT] == -1) {
	619	jj = gps[procid]->neighbors[UP];
	620	if (jj != -1) {
	621	t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1];
	622	}
	623	jj = gps[procid]->neighbors[DOWN];
	624	if (jj != -1) {
	625	t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1];
	626	}
	627	}
	628
	629	j = gps[procid]->neighbors[UP];
	630	if (j != -1) {
	631	t1a = (double *) t2a[0];
	632	t1b = (double *) q_multi[j][k][im - 2];
	633	for (i = 1; i <= lastcol; i++) {
	634	t1a[i] = t1b[i];
	635	}
	636	}
	637	j = gps[procid]->neighbors[DOWN];
	638	if (j != -1) {
	639	t1a = (double *) t2a[im - 1];
	640	t1b = (double *) q_multi[j][k][1];
	641	for (i = 1; i <= lastcol; i++) {
	642	t1a[i] = t1b[i];
	643	}
	644	}
	645	j = gps[procid]->neighbors[LEFT];
	646	if (j != -1) {
	647	t2b = (double **) q_multi[j][k];
	648	for (i = 1; i <= lastrow; i++) {
	649	t2a[i][0] = t2b[i][jm - 2];
	650	}
	651	}
	652	j = gps[procid]->neighbors[RIGHT];
	653	if (j != -1) {
	654	t2b = (double **) q_multi[j][k];
	655	for (i = 1; i <= lastrow; i++) {
	656	t2a[i][jm - 1] = t2b[i][1];
	657	}
	658	}
	659	} // end copy_border()
	660
	661	//////////////////////////////////////////
	662	void copy_rhs_borders(long k, long procid)
	663	{
	664	long i;
	665	long j;
	666	long im;
	667	long jm;
	668	long lastrow;
	669	long lastcol;
	670	double **t2a;
	671	double **t2b;
	672	double *t1a;
	673	double *t1b;
	674
	675	im = (imx[k] - 2) / yprocs + 2;
	676	jm = (jmx[k] - 2) / xprocs + 2;
	677	lastrow = (imx[k] - 2) / yprocs;
	678	lastcol = (jmx[k] - 2) / xprocs;
	679
	680	t2a = (double **) rhs_multi[procid][k];
	681	if (gps[procid]->neighbors[UPLEFT] != -1) {
	682	j = gps[procid]->neighbors[UPLEFT];
	683	t2a[0][0] = rhs_multi[j][k][im - 2][jm - 2];
	684	}
	685
	686	if (gps[procid]->neighbors[UP] != -1) {
	687	j = gps[procid]->neighbors[UP];
	688	if (j != -1) {
	689	t1a = (double *) t2a[0];
	690	t1b = (double *) rhs_multi[j][k][im - 2];
	691	for (i = 2; i <= lastcol; i += 2) {
	692	t1a[i] = t1b[i];
	693	}
	694	}
	695	}
	696	if (gps[procid]->neighbors[LEFT] != -1) {
	697	j = gps[procid]->neighbors[LEFT];
	698	if (j != -1) {
	699	t2b = (double **) rhs_multi[j][k];
	700	for (i = 2; i <= lastrow; i += 2) {
	701	t2a[i][0] = t2b[i][jm - 2];
	702	}
	703	}
	704	}
	705	}
	706
	707	//////////////////////////////////
	708	void copy_red(long k, long procid)
	709	{
	710	long i;
	711	long j;
	712	long im;
	713	long jm;
	714	long lastrow;
	715	long lastcol;
	716	double **t2a;
	717	double **t2b;
	718	double *t1a;
	719	double *t1b;
	720
	721	im = (imx[k] - 2) / yprocs + 2;
	722	jm = (jmx[k] - 2) / xprocs + 2;
	723	lastrow = (imx[k] - 2) / yprocs;
	724	lastcol = (jmx[k] - 2) / xprocs;
	725
	726	t2a = (double **) q_multi[procid][k];
	727	j = gps[procid]->neighbors[UP];
	728	if (j != -1) {
	729	t1a = (double *) t2a[0];
	730	t1b = (double *) q_multi[j][k][im - 2];
	731	for (i = 2; i <= lastcol; i += 2) {
	732	t1a[i] = t1b[i];
	733	}
	734	}
	735	j = gps[procid]->neighbors[DOWN];
	736	if (j != -1) {
	737	t1a = (double *) t2a[im - 1];
	738	t1b = (double *) q_multi[j][k][1];
	739	for (i = 1; i <= lastcol; i += 2) {
	740	t1a[i] = t1b[i];
	741	}
	742	}
	743	j = gps[procid]->neighbors[LEFT];
	744	if (j != -1) {
	745	t2b = (double **) q_multi[j][k];
	746	for (i = 2; i <= lastrow; i += 2) {
	747	t2a[i][0] = t2b[i][jm - 2];
	748	}
	749	}
	750	j = gps[procid]->neighbors[RIGHT];
	751	if (j != -1) {
	752	t2b = (double **) q_multi[j][k];
	753	for (i = 1; i <= lastrow; i += 2) {
	754	t2a[i][jm - 1] = t2b[i][1];
	755	}
	756	}
	757	}
	758
	759	////////////////////////////////////
	760	void copy_black(long k, long procid)
	761	{
	762	long i;
	763	long j;
	764	long im;
	765	long jm;
	766	long lastrow;
	767	long lastcol;
	768	double **t2a;
	769	double **t2b;
	770	double *t1a;
	771	double *t1b;
	772
	773	im = (imx[k] - 2) / yprocs + 2;
	774	jm = (jmx[k] - 2) / xprocs + 2;
	775	lastrow = (imx[k] - 2) / yprocs;
	776	lastcol = (jmx[k] - 2) / xprocs;
	777
	778	t2a = (double **) q_multi[procid][k];
	779	j = gps[procid]->neighbors[UP];
	780	if (j != -1) {
	781	t1a = (double *) t2a[0];
	782	t1b = (double *) q_multi[j][k][im - 2];
	783	for (i = 1; i <= lastcol; i += 2) {
	784	t1a[i] = t1b[i];
	785	}
	786	}
	787	j = gps[procid]->neighbors[DOWN];
	788	if (j != -1) {
	789	t1a = (double *) t2a[im - 1];
	790	t1b = (double *) q_multi[j][k][1];
	791	for (i = 2; i <= lastcol; i += 2) {
	792	t1a[i] = t1b[i];
	793	}
	794	}
	795	j = gps[procid]->neighbors[LEFT];
	796	if (j != -1) {
	797	t2b = (double **) q_multi[j][k];
	798	for (i = 1; i <= lastrow; i += 2) {
	799	t2a[i][0] = t2b[i][jm - 2];
	800	}
	801	}
	802	j = gps[procid]->neighbors[RIGHT];
	803	if (j != -1) {
	804	t2b = (double **) q_multi[j][k];
	805	for (i = 2; i <= lastrow; i += 2) {
	806	t2a[i][jm - 1] = t2b[i][1];
	807	}
	808	}
	809	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: soft/giet_vm/applications/ocean/multi.c @ 812

Download in other formats: