Changeset 589 for soft/giet_vm/applications
- Timestamp:
- Jul 8, 2015, 3:57:15 PM (9 years ago)
- Location:
- soft/giet_vm/applications
- Files:
-
- 11 added
- 6 deleted
- 29 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/classif/Makefile
r488 r589 1 APP_NAME=classif2 1 3 USE+= stdio.o 4 USE+= mwmr_channel.o 5 6 USES=$(patsubst %,$(BUILD_PATH)/$(LIB_NAME)/%,$(USE)) 2 APP_NAME = classif 7 3 8 4 OBJS= main.o 9 5 10 all: $(APP_NAME).elf 6 LIBS= -L../../build/libs -luser 11 7 12 BIN_NAME_PATH=$(BUILD_PATH)$(APP_NAME).elf 8 INCLUDES = -I. -I../.. -I../../giet_libs -I../../giet_xml 13 9 14 $(APP_NAME).elf: $(OBJS) $(APP_NAME).ld 15 $(LD) -o $(BIN_NAME_PATH) -T $(APP_NAME).ld $(OBJS) $(USES) 16 $(DU) -D $(BIN_NAME_PATH) > $@.txt 10 LIB_DEPS = ../../build/libs/libuser.a 11 12 appli.elf: $(OBJS) $(APP_NAME).ld $(LIBS_DEPS) 13 $(LD) -o $@ -T $(APP_NAME).ld $(OBJS) $(LIBS) 14 $(DU) -D $@ > $@.txt 17 15 18 16 %.o: %.c 19 $(CC) $(INCLUDE) $(CFLAGS) $($*.o_CFLAGS) -c -o $@ $< 20 $(DU) -D $@ > $@.txt 17 $(CC) $(INCLUDES) $(CFLAGS) -c -o $@ $< 21 18 22 19 clean: 23 rm -f *.o *.elf *.txt core *~ 2>$(TRASH) 24 rm $(BIN_NAME_PATH) 2>$(TRASH) 20 rm -f *.o *.elf *.txt *.pyc core *~ -
soft/giet_vm/applications/classif/classif.py
r533 r589 4 4 5 5 ################################################################################### 6 # file : classif.py 6 # file : classif.py 7 7 # date : november 2014 8 8 # author : Alain Greiner … … 30 30 ################################################################################## 31 31 32 ###################### ###33 def classif( mapping ):32 ###################### 33 def extend( mapping ): 34 34 35 35 x_size = mapping.x_size … … 60 60 mapping.addVseg( vspace, 'classif_data', data_base , data_size, 61 61 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 62 binpath = 'build/classif/ classif.elf',62 binpath = 'build/classif/appli.elf', 63 63 local = False ) 64 64 … … 84 84 code_base , code_size, 85 85 'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM', 86 binpath = 'build/classif/ classif.elf',86 binpath = 'build/classif/appli.elf', 87 87 local = True ) 88 88 … … 133 133 if __name__ == '__main__': 134 134 135 vspace = classif( Mapping( 'test', 2, 2, 4 ) )135 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 136 136 print vspace.xml() 137 137 -
soft/giet_vm/applications/convol/convol.py
r502 r589 4 4 5 5 ###################################################################################### 6 # file : convol.py (for the convol application)6 # file : convol.py 7 7 # date : may 2014 8 8 # author : Alain Greiner … … 27 27 28 28 ###################### 29 def convol( mapping ):29 def extend( mapping ): 30 30 31 31 x_size = mapping.x_size … … 54 54 mapping.addVseg( vspace, 'conv_data', data_base , data_size, 55 55 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 56 binpath = 'build/convol/ convol.elf',56 binpath = 'build/convol/appli.elf', 57 57 local = False ) 58 58 … … 67 67 mapping.addVseg( vspace, 'conv_code_%d_%d' % (x,y), base, size, 68 68 'CXWU', vtype = 'ELF', x = x , y = y , pseg = 'RAM', 69 binpath = 'build/convol/ convol.elf',69 binpath = 'build/convol/appli.elf', 70 70 local = True ) 71 71 … … 119 119 if __name__ == '__main__': 120 120 121 vspace = convol( Mapping( 'test', 2, 2, 4 ) )121 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 122 122 print vspace.xml() 123 123 -
soft/giet_vm/applications/convol/main.c
r502 r589 39 39 #define FRAME_SIZE (NB_PIXELS * PIXEL_SIZE) 40 40 41 #define SEEK_SET 0 42 41 43 #define TA(c,l,p) (A[c][((NP) * (l)) + (p)]) 42 44 #define TB(c,p,l) (B[c][((NL) * (p)) + (l)]) … … 125 127 unsigned int ntasks = nclusters * nprocs; // number of tasks 126 128 unsigned int frame_size = FRAME_SIZE; // total size (bytes) 127 unsigned int nblocks = frame_size / 512; // number of blocks/frame128 129 129 130 unsigned int lines_per_task = NL / ntasks; // lines per task … … 166 167 { 167 168 giet_shr_printf("\n[CONVOL] task[0,0,0] starts barrier init at cycle %d\n" 168 "- CLUSTERS = %d\n"169 "- PROCS = %d\n"170 "- TASKS = %d\n"171 "- BLOCKS= %d\n",172 giet_proctime(), nclusters, nprocs, ntasks, nblocks);169 "- CLUSTERS = %d\n" 170 "- PROCS = %d\n" 171 "- TASKS = %d\n" 172 "- LINES/TASK = %d\n", 173 giet_proctime(), nclusters, nprocs, ntasks, lines_per_task ); 173 174 #if USE_SQT_BARRIER 174 175 sqt_barrier_init( &barrier, x_size , y_size , nprocs ); … … 182 183 barrier_init_ok = 1; 183 184 } 184 else 185 else 185 186 { 186 187 while ( barrier_init_ok == 0 ); … … 250 251 /////////////////////////////////////////////////////////////////////////// 251 252 // task[0,0,0] open the file containing image, and load it from disk 252 // to all A[c] buffers ( nblocks/ nclusters loaded in each cluster).253 // to all A[c] buffers (frame_size / nclusters loaded in each cluster). 253 254 // Other tasks are waiting on the init_ok condition. 254 255 ////////////////////////////////////////////////////////////////////////// … … 256 257 { 257 258 // open file 258 file = giet_fat_open( "misc/philips_image.raw", 0 );259 file = giet_fat_open( "misc/philips_image.raw" , 0 ); 259 260 if ( file < 0 ) giet_exit( "[CONVOL ERROR] task[0,0,0] cannot open" 260 261 " file misc/philips_image.raw" ); … … 268 269 "for cluster %d at cycle %d\n", c, giet_proctime() ); 269 270 271 giet_fat_lseek( file, 272 (frame_size/nclusters)*c, 273 SEEK_SET ); 274 270 275 giet_fat_read( file, 271 276 A[c], 272 nblocks/nclusters, 273 (nblocks/nclusters)*c ); 277 frame_size/nclusters ); 274 278 275 279 giet_shr_printf( "\n[CONVOL] task[0,0,0] completes load " -
soft/giet_vm/applications/coproc/coproc.py
r555 r589 4 4 5 5 ################################################################################## 6 # file : coproc.py (for the coproc application)6 # file : coproc.py 7 7 # date : march 2015 8 8 # author : Alain Greiner … … 22 22 23 23 ###################### 24 def coproc( mapping ):24 def extend( mapping ): 25 25 26 26 x_size = mapping.x_size … … 55 55 mapping.addVseg( vspace, 'coproc_data', data_base , data_size, 56 56 'C_WU', vtype = 'ELF', x = x, y = y, pseg = 'RAM', 57 binpath = 'build/coproc/ coproc.elf',57 binpath = 'build/coproc/appli.elf', 58 58 local = False ) 59 59 … … 61 61 mapping.addVseg( vspace, 'coproc_code', code_base , code_size, 62 62 'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM', 63 binpath = 'build/coproc/ coproc.elf',63 binpath = 'build/coproc/appli.elf', 64 64 local = False ) 65 65 … … 82 82 if __name__ == '__main__': 83 83 84 vspace = coproc( Mapping( 'test', 2, 2, 4 ) )84 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 85 85 print vspace.xml() 86 86 -
soft/giet_vm/applications/coproc/main.c
r555 r589 12 12 13 13 #include "stdio.h" 14 #include "mapping_info.h" // for coprocessors types an modes14 #include "mapping_info.h" // for coprocessors types an modes 15 15 16 16 #define VECTOR_SIZE 128 -
soft/giet_vm/applications/dhrystone/Makefile
r241 r589 1 APP_NAME=dhrystone2 1 3 USE+= stdio.o 4 #USE+= mwmr_channel.o 2 APP_NAME = dhrystone 5 3 6 USES=$(patsubst %,$(BUILD_PATH)/$(LIB_NAME)/%,$(USE)) 4 OBJS = dhry_1.o \ 5 dhry_2.o 7 6 8 OBJS= dhry_1.o dhry_2.o 7 LIBS = -L../../build/libs -luser 9 8 10 all: $(APP_NAME).elf 9 INCLUDES = -I../../giet_libs -I. -I../.. 11 10 12 BIN_NAME_PATH=$(BUILD_PATH)$(APP_NAME).elf 11 LIB_DEPS = ../../build/libs/libuser.a 13 12 14 $(APP_NAME).elf: $(OBJS) $(APP_NAME).ld 15 $(LD) -o $ (BIN_NAME_PATH) -T $(APP_NAME).ld $(OBJS) $(USES)16 $(DU) -D $ (BIN_NAME_PATH)> $@.txt13 appli.elf: $(OBJS) $(APP_NAME).ld $(LIBS_DEPS) 14 $(LD) -o $@ -T $(APP_NAME).ld $(OBJS) $(LIBS) 15 $(DU) -D $@ > $@.txt 17 16 18 17 %.o: %.c 19 $(CC) $(INCLUDE) $(CFLAGS) $($*.o_CFLAGS) -c -o $@ $< 20 $(DU) -D $@ > $@.txt 18 $(CC) $(INCLUDES) $(CFLAGS) -c -o $@ $< 21 19 22 20 clean: 23 rm -f *.o *.elf *.txt core *~ 2>$(TRASH) 24 rm $(BIN_NAME_PATH) 2>$(TRASH) 21 rm -f *.o *.elf *.txt core *~ -
soft/giet_vm/applications/display/Makefile
r191 r589 1 APP_NAME=display2 1 3 USE+= stdio.o 4 5 USES=$(patsubst %,$(BUILD_PATH)/$(LIB_NAME)/%,$(USE)) 2 APP_NAME = display 6 3 7 4 OBJS= main.o 8 5 9 all: $(APP_NAME).elf 6 LIBS= -L../../build/libs -luser 10 7 11 BIN_NAME_PATH=$(BUILD_PATH)$(APP_NAME).elf 8 INCLUDES = -I../../giet_libs -I. -I../.. 12 9 13 $(APP_NAME).elf: $(OBJS) $(APP_NAME).ld 14 $(LD) -o $(BIN_NAME_PATH) -T $(APP_NAME).ld $(OBJS) $(USES) 15 $(DU) -D $(BIN_NAME_PATH) > $@.txt 10 LIB_DEPS = ../../build/libs/libuser.a 11 12 appli.elf: $(OBJS) $(APP_NAME).ld $(LIBS_DEPS) 13 $(LD) -o $@ -T $(APP_NAME).ld $(OBJS) $(LIBS) 14 $(DU) -D $@ > $@.txt 16 15 17 16 %.o: %.c 18 $(CC) $(INCLUDE) $(CFLAGS) $($*.o_CFLAGS) -c -o $@ $< 19 $(DU) -D $@ > $@.txt 20 17 $(CC) $(INCLUDES) $(CFLAGS) -c -o $@ $< 21 18 22 19 clean: 23 rm -f *.o *.elf *.txt core *~ 2>$(TRASH) 24 rm $(BIN_NAME_PATH) 2>$(TRASH) 20 rm -f *.o *.elf *.txt core *~ -
soft/giet_vm/applications/display/display.py
r574 r589 13 13 14 14 ###################### 15 def display( mapping ):15 def extend( mapping ): 16 16 17 17 nprocs = mapping.nprocs … … 38 38 mapping.addVseg( vspace, 'disp_data', data_base , data_size, 39 39 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 40 binpath = 'build/display/ display.elf',40 binpath = 'build/display/appli.elf', 41 41 local = False ) 42 42 … … 44 44 mapping.addVseg( vspace, 'disp_code', code_base , code_size, 45 45 'CXWU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 46 binpath = 'build/display/ display.elf',46 binpath = 'build/display/appli.elf', 47 47 local = False ) 48 48 … … 69 69 if __name__ == '__main__': 70 70 71 vspace = display( Mapping( 'test', 2, 2, 4 ) )71 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 72 72 print vspace.xml() 73 73 -
soft/giet_vm/applications/display/main.c
r574 r589 16 16 #define NLINES 256 17 17 #define NIMAGES 1 18 #define NBLOCKS (NPIXELS*NLINES/512) // number of blocks per image19 18 20 19 #define INTERACTIVE 1 … … 69 68 { 70 69 // load buf0 71 giet_fat_read( fd, buf0, N BLOCKS, image*NBLOCKS );70 giet_fat_read( fd, buf0, NPIXELS*NLINES ); 72 71 73 72 giet_tty_printf("\n[DISPLAY] Proc[%d,%d,%d] load image %d at cycle %d\n", … … 85 84 86 85 // load buf1 87 giet_fat_read( fd, buf1, N BLOCKS, image*NBLOCKS );86 giet_fat_read( fd, buf1, NPIXELS*NLINES ); 88 87 89 88 giet_tty_printf("\n[DISPLAY] Proc[%d,%d,%d] load image %d at cycle %d\n", -
soft/giet_vm/applications/gameoflife/gameoflife.py
r504 r589 4 4 5 5 ################################################################################## 6 # file : gameoflife.py (for the gameoflife application)6 # file : gameoflife.py 7 7 # date : february 2015 8 8 # author : Alain Greiner … … 26 26 ################################################################################## 27 27 28 ###################### ####29 def gameoflife( mapping ):28 ###################### 29 def extend( mapping ): 30 30 31 31 x_size = mapping.x_size … … 54 54 mapping.addVseg( vspace, 'gol_data', data_base , data_size, 55 55 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 56 binpath = 'build/gameoflife/ gameoflife.elf',56 binpath = 'build/gameoflife/appli.elf', 57 57 local = False , big = True ) 58 58 … … 78 78 code_base , code_size, 79 79 'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM', 80 binpath = 'build/gameoflife/ gameoflife.elf',80 binpath = 'build/gameoflife/appli.elf', 81 81 local = True ) 82 82 … … 118 118 if __name__ == '__main__': 119 119 120 vspace = gameoflife( Mapping( 'test', 2, 2, 4 ) )120 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 121 121 print vspace.xml() 122 122 -
soft/giet_vm/applications/ocean/Makefile.config.giet
r584 r589 3 3 # Default plateform architecture and default CPU 4 4 #------------------------------------------------------------------------------ 5 GIET_DISTRIB= $(PWD)/../..5 GIET_DISTRIB= ../.. 6 6 7 7 #------------------------------------------------------------------------------ … … 67 67 68 68 clean: 69 $(RM) -f *.c *.h *.o *.pyc $(TARGET)69 rm -f *.c *.h *.o *.pyc *.elf *.txt core *~ 70 70 71 71 -
soft/giet_vm/applications/ocean/Makefile.giet
r581 r589 1 TARGET = ocean.elf1 TARGET = appli.elf 2 2 OBJS = jacobcalc.o jacobcalc2.o laplacalc.o linkup.o main.o multi.o slave1.o slave2.o subblock.o giet_utils.o 3 3 -
soft/giet_vm/applications/ocean/giet_utils.C
r581 r589 1 /* Définitions des fonctions standard (simplifiées) utilisées par ocean pour GIET */ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 2 2 3 #include <stdarg.h>4 #include <stdio.h>5 #include <malloc.h>6 #include <stdlib.h>7 8 EXTERN_ENV9 10 #include "decs.h"11 #include "giet_utils.h"12 13 FILE * stdout = "";14 FILE *stderr = "STDERR : ";15 16 extern double ****main_q_multi;17 extern double ****main_rhs_multi;18 extern double ****main_psi;19 extern double ****main_psim;20 extern double ***main_psium;21 extern double ***main_psilm;22 extern double ***main_psib;23 extern double ***main_ga;24 extern double ***main_gb;25 extern double ***main_oldga;26 extern double ***main_oldgb;27 extern double ****main_work1;28 extern double ***main_work2;29 extern double ***main_work3;30 extern double ****main_work4;31 extern double ****main_work5;32 extern double ***main_work6;33 extern double ****main_work7;34 extern long *main_imx;35 extern long *main_jmx;36 37 extern double *main_lev_res;38 extern double *main_lev_tol;39 extern double *main_i_int_coeff;40 extern double *main_j_int_coeff;41 extern long *main_xpts_per_proc;42 extern long *main_ypts_per_proc;43 extern long main_xprocs;44 extern long main_yprocs;45 extern long main_numlev;46 extern double main_eig2;47 extern long main_im;48 extern long main_jm;49 50 double ****work1 __attribute__ ((section("seg_ldata")));51 double ***work2 __attribute__ ((section("seg_ldata")));52 double ***work3 __attribute__ ((section("seg_ldata")));53 double ****work4 __attribute__ ((section("seg_ldata")));54 double ****work5 __attribute__ ((section("seg_ldata")));55 double ***work6 __attribute__ ((section("seg_ldata")));56 double ****work7 __attribute__ ((section("seg_ldata")));57 double ****psi __attribute__ ((section("seg_ldata")));58 double ****psim __attribute__ ((section("seg_ldata")));59 double ***psium __attribute__ ((section("seg_ldata")));60 double ***psilm __attribute__ ((section("seg_ldata")));61 double ***psib __attribute__ ((section("seg_ldata")));62 double ***ga __attribute__ ((section("seg_ldata")));63 double ***gb __attribute__ ((section("seg_ldata")));64 double ***oldga __attribute__ ((section("seg_ldata")));65 double ***oldgb __attribute__ ((section("seg_ldata")));66 double ****q_multi __attribute__ ((section("seg_ldata")));67 double ****rhs_multi __attribute__ ((section("seg_ldata")));68 long *imx __attribute__ ((section("seg_ldata")));69 long *jmx __attribute__ ((section("seg_ldata")));70 double *f __attribute__ ((section("seg_ldata")));71 struct Global_Private *gp;72 73 double *lev_res __attribute__ ((section("seg_ldata")));74 double *lev_tol __attribute__ ((section("seg_ldata")));75 double *i_int_coeff __attribute__ ((section("seg_ldata")));76 double *j_int_coeff __attribute__ ((section("seg_ldata")));77 long *xpts_per_proc __attribute__ ((section("seg_ldata")));78 long *ypts_per_proc __attribute__ ((section("seg_ldata")));79 long xprocs __attribute__ ((section("seg_ldata")));80 long yprocs __attribute__ ((section("seg_ldata")));81 long numlev __attribute__ ((section("seg_ldata")));82 double eig2 __attribute__ ((section("seg_ldata")));83 long im __attribute__ ((section("seg_ldata")));84 long jm __attribute__ ((section("seg_ldata")));85 86 unsigned int nclusters_x __attribute__ ((section("seg_ldata")));87 unsigned int nclusters_y __attribute__ ((section("seg_ldata")));88 unsigned int procs_per_cluster __attribute__ ((section("seg_ldata")));89 90 volatile long heap_inited = 0;91 volatile int run_threads = 0;92 93 //Entry point for all threads (except main)94 // waiting allocs and inits of main then copy read-only tabs in ldata segment (replicated)95 // some read-write tabs are also replicated, but not entirely : only pointers96 __attribute__ ((constructor)) void thread()97 {98 unsigned long size;99 long id = (long) giet_thread_id();100 101 unsigned int cx, cy, lp;102 103 giet_proc_xyp(&cx, &cy, &lp);104 giet_shr_printf("Thread %d (%d:%d.%d) waiting\n", id, cx, cy, lp);105 106 if (lp == 0) {107 108 giet_procs_number(&nclusters_x, &nclusters_y, &procs_per_cluster);109 heap_init(cx, cy);110 111 while (heap_inited != id) {112 asm volatile ("nop\r\n");113 }114 heap_inited += procs_per_cluster;115 116 117 size = nprocs * sizeof(double ***);118 rhs_multi = (double ****) G_MALLOC(size, id);119 q_multi = (double ****) G_MALLOC(size, id);120 psi = (double ****) G_MALLOC(size, id);121 psim = (double ****) G_MALLOC(size, id);122 work1 = (double ****) G_MALLOC(size, id);123 work4 = (double ****) G_MALLOC(size, id);124 work5 = (double ****) G_MALLOC(size, id);125 work7 = (double ****) G_MALLOC(size, id);126 127 size = nprocs * sizeof(double **);128 psium = (double ***) G_MALLOC(size, id);129 psilm = (double ***) G_MALLOC(size, id);130 psib = (double ***) G_MALLOC(size, id);131 ga = (double ***) G_MALLOC(size, id);132 gb = (double ***) G_MALLOC(size, id);133 oldga = (double ***) G_MALLOC(size, id);134 oldgb = (double ***) G_MALLOC(size, id);135 work2 = (double ***) G_MALLOC(size, id);136 work3 = (double ***) G_MALLOC(size, id);137 work6 = (double ***) G_MALLOC(size, id);138 }139 140 while (run_threads != 1) {141 asm volatile ("nop\r\n");142 }143 144 *gp[id].lpid = lp;145 146 if (lp == 0) {147 int i, j, k;148 149 xprocs = main_xprocs;150 yprocs = main_yprocs;151 numlev = main_numlev;152 eig2 = main_eig2;153 im = main_im;154 jm = main_jm;155 156 size = numlev * sizeof(long);157 imx = (long *) G_MALLOC(size, id);158 jmx = (long *) G_MALLOC(size, id);159 xpts_per_proc = (long *) G_MALLOC(size, id);160 ypts_per_proc = (long *) G_MALLOC(size, id);161 162 size = numlev * sizeof(double);163 lev_res = (double *) G_MALLOC(size, id);164 lev_tol = (double *) G_MALLOC(size, id);165 i_int_coeff = (double *) G_MALLOC(size, id);166 j_int_coeff = (double *) G_MALLOC(size, id);167 168 for(i=0;i<numlev;i++) {169 imx[i] = main_imx[i];170 jmx[i] = main_jmx[i];171 lev_res[i] = main_lev_res[i];172 lev_tol[i] = main_lev_tol[i];173 i_int_coeff[i] = main_i_int_coeff[i];174 j_int_coeff[i] = main_j_int_coeff[i];175 xpts_per_proc[i] = main_xpts_per_proc[i];176 ypts_per_proc[i] = main_ypts_per_proc[i];177 }178 179 size = numlev * sizeof(double **);180 for (i = 0; i < nprocs; i++) {181 182 q_multi[i] = (double ***) G_MALLOC(size, id);183 rhs_multi[i] = (double ***) G_MALLOC(size, id);184 185 for (j = 0; j < numlev; j++) {186 187 rhs_multi[i][j] = (double **) G_MALLOC(((imx[j] - 2) / yprocs + 2) * sizeof(double *), id);188 q_multi[i][j] = (double **) G_MALLOC(((imx[j] - 2) / yprocs + 2) * sizeof(double *), id);189 for (k = 0; k < ((imx[j] - 2) / yprocs + 2); k++) {190 q_multi[i][j][k] = main_q_multi[i][j][k];191 rhs_multi[i][j][k] = main_rhs_multi[i][j][k];192 }193 194 }195 196 work1[i] = main_work1[i];197 work2[i] = main_work2[i];198 work3[i] = main_work3[i];199 work4[i] = main_work4[i];200 work5[i] = main_work5[i];201 work6[i] = main_work6[i];202 work7[i] = main_work7[i];203 psi[i] = main_psi[i];204 psim[i] = main_psim[i];205 psium[i] = main_psium[i];206 psilm[i] = main_psilm[i];207 psib[i] = main_psib[i];208 ga[i] = main_ga[i];209 gb[i] = main_gb[i];210 oldga[i] = main_oldga[i];211 oldgb[i] = main_oldgb[i];212 }213 }214 giet_shr_printf("Thread %d launched\n", id);215 216 slave(&id);217 218 BARRIER(bars->barrier, nprocs)219 220 giet_exit("done.");221 }222 223 224 const char *optarg;225 226 int getopt(int argc, char *const *argv, const char *optstring)227 {228 return -1;229 }230 231 //give the cluster coordinate by thread number232 // if tid=-1, return the next cluster (round robin)233 void clusterXY(int tid, unsigned int *cx, unsigned int *cy)234 {235 unsigned int cid;236 static unsigned int x = 0, y = 0;237 238 cid = tid / procs_per_cluster;239 240 if (tid != -1) {241 *cx = (cid / nclusters_y);242 *cy = (cid % nclusters_y);243 return;244 }245 246 if (giet_thread_id() != 0) {247 giet_exit("pseudo-random mapped malloc : thread 0 only");248 }249 250 x++;251 if (x == nclusters_x) {252 x = 0;253 y++;254 if (y == nclusters_y) {255 y = 0;256 }257 }258 *cx = x;259 *cy = y;260 }261 262 void *ocean_malloc(unsigned long s, int tid)263 {264 void *ptr;265 unsigned int x, y;266 clusterXY(tid, &x, &y);267 ptr = remote_malloc(s, x, y);268 giet_assert (ptr != 0, "Malloc failed");269 return ptr;270 }271 272 void exit(int status)273 {274 if (status) {275 giet_exit("Done (status != 0)");276 } else {277 giet_exit("Done (ok)");278 }279 } -
soft/giet_vm/applications/ocean/jacobcalc.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /* Does the arakawa jacobian calculation (of the x and y matrices,18 putting the results in the z matrix) for a subblock. */19 20 EXTERN_ENV21 22 #include <stdio.h>23 #include <math.h>24 25 #include "decs.h"26 27 void jacobcalc(double ***x, double ***y, double ***z, long pid, long firstrow, long lastrow, long firstcol, long lastcol)28 {29 double f1;30 double f2;31 double f3;32 double f4;33 double f5;34 double f6;35 double f7;36 double f8;37 long iindex;38 long indexp1;39 long indexm1;40 long im1;41 long ip1;42 long i;43 long j;44 long jj;45 double **t2a;46 double **t2b;47 double **t2c;48 double *t1a;49 double *t1b;50 double *t1c;51 double *t1d;52 double *t1e;53 double *t1f;54 double *t1g;55 56 t2a = (double **) z[pid];57 if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[LEFT] == -1)) {58 t2a[0][0] = 0.0;59 }60 if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[LEFT] == -1)) {61 t2a[im - 1][0] = 0.0;62 }63 if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {64 t2a[0][jm - 1] = 0.0;65 }66 if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {67 t2a[im - 1][jm - 1] = 0.0;68 }69 70 t2a = (double **) x[pid];71 jj = gp[pid].neighbors[UPLEFT];72 if (jj != -1) {73 t2a[0][0] = x[jj][im - 2][jm - 2];74 }75 jj = gp[pid].neighbors[UPRIGHT];76 if (jj != -1) {77 t2a[0][jm - 1] = x[jj][im - 2][1];78 }79 jj = gp[pid].neighbors[DOWNLEFT];80 if (jj != -1) {81 t2a[im - 1][0] = x[jj][1][jm - 2];82 }83 jj = gp[pid].neighbors[DOWNRIGHT];84 if (jj != -1) {85 t2a[im - 1][jm - 1] = x[jj][1][1];86 }87 88 t2a = (double **) y[pid];89 jj = gp[pid].neighbors[UPLEFT];90 if (jj != -1) {91 t2a[0][0] = y[jj][im - 2][jm - 2];92 }93 jj = gp[pid].neighbors[UPRIGHT];94 if (jj != -1) {95 t2a[0][jm - 1] = y[jj][im - 2][1];96 }97 jj = gp[pid].neighbors[DOWNLEFT];98 if (jj != -1) {99 t2a[im - 1][0] = y[jj][1][jm - 2];100 }101 jj = gp[pid].neighbors[DOWNRIGHT];102 if (jj != -1) {103 t2a[im - 1][jm - 1] = y[jj][1][1];104 }105 106 t2a = (double **) x[pid];107 if (gp[pid].neighbors[UP] == -1) {108 jj = gp[pid].neighbors[LEFT];109 if (jj != -1) {110 t2a[0][0] = x[jj][0][jm - 2];111 } else {112 jj = gp[pid].neighbors[DOWN];113 if (jj != -1) {114 t2a[im - 1][0] = x[jj][1][0];115 }116 }117 jj = gp[pid].neighbors[RIGHT];118 if (jj != -1) {119 t2a[0][jm - 1] = x[jj][0][1];120 } else {121 jj = gp[pid].neighbors[DOWN];122 if (jj != -1) {123 t2a[im - 1][jm - 1] = x[jj][1][jm - 1];124 }125 }126 } else if (gp[pid].neighbors[DOWN] == -1) {127 jj = gp[pid].neighbors[LEFT];128 if (jj != -1) {129 t2a[im - 1][0] = x[jj][im - 1][jm - 2];130 } else {131 jj = gp[pid].neighbors[UP];132 if (jj != -1) {133 t2a[0][0] = x[jj][im - 2][0];134 }135 }136 jj = gp[pid].neighbors[RIGHT];137 if (jj != -1) {138 t2a[im - 1][jm - 1] = x[jj][im - 1][1];139 } else {140 jj = gp[pid].neighbors[UP];141 if (jj != -1) {142 t2a[0][jm - 1] = x[jj][im - 2][jm - 1];143 }144 }145 } else if (gp[pid].neighbors[LEFT] == -1) {146 jj = gp[pid].neighbors[UP];147 if (jj != -1) {148 t2a[0][0] = x[jj][im - 2][0];149 }150 jj = gp[pid].neighbors[DOWN];151 if (jj != -1) {152 t2a[im - 1][0] = x[jj][1][0];153 }154 } else if (gp[pid].neighbors[RIGHT] == -1) {155 jj = gp[pid].neighbors[UP];156 if (jj != -1) {157 t2a[0][jm - 1] = x[jj][im - 2][jm - 1];158 }159 jj = gp[pid].neighbors[DOWN];160 if (jj != -1) {161 t2a[im - 1][jm - 1] = x[jj][1][jm - 1];162 }163 }164 165 t2a = (double **) y[pid];166 if (gp[pid].neighbors[UP] == -1) {167 jj = gp[pid].neighbors[LEFT];168 if (jj != -1) {169 t2a[0][0] = y[jj][0][jm - 2];170 } else {171 jj = gp[pid].neighbors[DOWN];172 if (jj != -1) {173 t2a[im - 1][0] = y[jj][1][0];174 }175 }176 jj = gp[pid].neighbors[RIGHT];177 if (jj != -1) {178 t2a[0][jm - 1] = y[jj][0][1];179 } else {180 jj = gp[pid].neighbors[DOWN];181 if (jj != -1) {182 t2a[im - 1][jm - 1] = y[jj][1][jm - 1];183 }184 }185 } else if (gp[pid].neighbors[DOWN] == -1) {186 jj = gp[pid].neighbors[LEFT];187 if (jj != -1) {188 t2a[im - 1][0] = y[jj][im - 1][jm - 2];189 } else {190 jj = gp[pid].neighbors[UP];191 if (jj != -1) {192 t2a[0][0] = y[jj][im - 2][0];193 }194 }195 jj = gp[pid].neighbors[RIGHT];196 if (jj != -1) {197 t2a[im - 1][jm - 1] = y[jj][im - 1][1];198 } else {199 jj = gp[pid].neighbors[UP];200 if (jj != -1) {201 t2a[0][jm - 1] = y[jj][im - 2][jm - 1];202 }203 }204 } else if (gp[pid].neighbors[LEFT] == -1) {205 jj = gp[pid].neighbors[UP];206 if (jj != -1) {207 t2a[0][0] = y[jj][im - 2][0];208 }209 jj = gp[pid].neighbors[DOWN];210 if (jj != -1) {211 t2a[im - 1][0] = y[jj][1][0];212 }213 } else if (gp[pid].neighbors[RIGHT] == -1) {214 jj = gp[pid].neighbors[UP];215 if (jj != -1) {216 t2a[0][jm - 1] = y[jj][im - 2][jm - 1];217 }218 jj = gp[pid].neighbors[DOWN];219 if (jj != -1) {220 t2a[im - 1][jm - 1] = y[jj][1][jm - 1];221 }222 }223 224 j = gp[pid].neighbors[UP];225 if (j != -1) {226 t1a = (double *) t2a[0];227 t1b = (double *) y[j][im - 2];228 for (i = 1; i <= lastcol; i++) {229 t1a[i] = t1b[i];230 }231 }232 j = gp[pid].neighbors[DOWN];233 if (j != -1) {234 t1a = (double *) t2a[im - 1];235 t1b = (double *) y[j][1];236 for (i = 1; i <= lastcol; i++) {237 t1a[i] = t1b[i];238 }239 }240 j = gp[pid].neighbors[LEFT];241 if (j != -1) {242 t2b = (double **) y[j];243 for (i = 1; i <= lastrow; i++) {244 t2a[i][0] = t2b[i][jm - 2];245 }246 }247 j = gp[pid].neighbors[RIGHT];248 if (j != -1) {249 t2b = (double **) y[j];250 for (i = 1; i <= lastrow; i++) {251 t2a[i][jm - 1] = t2b[i][1];252 }253 }254 255 t2a = (double **) x[pid];256 j = gp[pid].neighbors[UP];257 if (j != -1) {258 t1a = (double *) t2a[0];259 t1b = (double *) x[j][im - 2];260 for (i = 1; i <= lastcol; i++) {261 t1a[i] = t1b[i];262 }263 }264 j = gp[pid].neighbors[DOWN];265 if (j != -1) {266 t1a = (double *) t2a[im - 1];267 t1b = (double *) x[j][1];268 for (i = 1; i <= lastcol; i++) {269 t1a[i] = t1b[i];270 }271 }272 j = gp[pid].neighbors[LEFT];273 if (j != -1) {274 t2b = (double **) x[j];275 for (i = 1; i <= lastrow; i++) {276 t2a[i][0] = t2b[i][jm - 2];277 }278 }279 j = gp[pid].neighbors[RIGHT];280 if (j != -1) {281 t2b = (double **) x[j];282 for (i = 1; i <= lastrow; i++) {283 t2a[i][jm - 1] = t2b[i][1];284 }285 }286 287 t2a = (double **) x[pid];288 t2b = (double **) y[pid];289 t2c = (double **) z[pid];290 for (i = firstrow; i <= lastrow; i++) {291 ip1 = i + 1;292 im1 = i - 1;293 t1a = (double *) t2a[i];294 t1b = (double *) t2b[i];295 t1c = (double *) t2c[i];296 t1d = (double *) t2b[ip1];297 t1e = (double *) t2b[im1];298 t1f = (double *) t2a[ip1];299 t1g = (double *) t2a[im1];300 for (iindex = firstcol; iindex <= lastcol; iindex++) {301 indexp1 = iindex + 1;302 indexm1 = iindex - 1;303 f1 = (t1b[indexm1] + t1d[indexm1] - t1b[indexp1] - t1d[indexp1]) * (t1f[iindex] - t1a[iindex]);304 f2 = (t1e[indexm1] + t1b[indexm1] - t1e[indexp1] - t1b[indexp1]) * (t1a[iindex] - t1g[iindex]);305 f3 = (t1d[iindex] + t1d[indexp1] - t1e[iindex] - t1e[indexp1]) * (t1a[indexp1] - t1a[iindex]);306 f4 = (t1d[indexm1] + t1d[iindex] - t1e[indexm1] - t1e[iindex]) * (t1a[iindex] - t1a[indexm1]);307 f5 = (t1d[iindex] - t1b[indexp1]) * (t1f[indexp1] - t1a[iindex]);308 f6 = (t1b[indexm1] - t1e[iindex]) * (t1a[iindex] - t1g[indexm1]);309 f7 = (t1b[indexp1] - t1e[iindex]) * (t1g[indexp1] - t1a[iindex]);310 f8 = (t1d[iindex] - t1b[indexm1]) * (t1a[iindex] - t1f[indexm1]);311 312 t1c[iindex] = factjacob * (f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8);313 }314 }315 316 if (gp[pid].neighbors[UP] == -1) {317 t1c = (double *) t2c[0];318 for (j = firstcol; j <= lastcol; j++) {319 t1c[j] = 0.0;320 }321 }322 if (gp[pid].neighbors[DOWN] == -1) {323 t1c = (double *) t2c[im - 1];324 for (j = firstcol; j <= lastcol; j++) {325 t1c[j] = 0.0;326 }327 }328 if (gp[pid].neighbors[LEFT] == -1) {329 for (j = firstrow; j <= lastrow; j++) {330 t2c[j][0] = 0.0;331 }332 }333 if (gp[pid].neighbors[RIGHT] == -1) {334 for (j = firstrow; j <= lastrow; j++) {335 t2c[j][jm - 1] = 0.0;336 }337 }338 339 } -
soft/giet_vm/applications/ocean/jacobcalc2.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /* Does the arakawa jacobian calculation (of the x and y matrices,18 putting the results in the z matrix) for a subblock. */19 20 EXTERN_ENV21 22 #include <stdio.h>23 #include <math.h>24 25 #include "decs.h"26 27 void jacobcalc2(double ****x, double ****y, double ****z, long psiindex, long pid, long firstrow, long lastrow, long firstcol, long lastcol)28 {29 double f1;30 double f2;31 double f3;32 double f4;33 double f5;34 double f6;35 double f7;36 double f8;37 long iindex;38 long indexp1;39 long indexm1;40 long im1;41 long ip1;42 long i;43 long j;44 long jj;45 double **t2a;46 double **t2b;47 double **t2c;48 double *t1a;49 double *t1b;50 double *t1c;51 double *t1d;52 double *t1e;53 double *t1f;54 double *t1g;55 56 t2a = z[pid][psiindex];57 if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[LEFT] == -1)) {58 t2a[0][0] = 0.0;59 }60 if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[LEFT] == -1)) {61 t2a[im - 1][0] = 0.0;62 }63 if ((gp[pid].neighbors[UP] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {64 t2a[0][jm - 1] = 0.0;65 }66 if ((gp[pid].neighbors[DOWN] == -1) && (gp[pid].neighbors[RIGHT] == -1)) {67 t2a[im - 1][jm - 1] = 0.0;68 }69 70 t2a = x[pid][psiindex];71 jj = gp[pid].neighbors[UPLEFT];72 if (jj != -1) {73 t2a[0][0] = x[jj][psiindex][im - 2][jm - 2];74 }75 jj = gp[pid].neighbors[UPRIGHT];76 if (jj != -1) {77 t2a[0][jm - 1] = x[jj][psiindex][im - 2][1];78 }79 jj = gp[pid].neighbors[DOWNLEFT];80 if (jj != -1) {81 t2a[im - 1][0] = x[jj][psiindex][1][jm - 2];82 }83 jj = gp[pid].neighbors[DOWNRIGHT];84 if (jj != -1) {85 t2a[im - 1][jm - 1] = x[jj][psiindex][1][1];86 }87 88 t2a = y[pid][psiindex];89 jj = gp[pid].neighbors[UPLEFT];90 if (jj != -1) {91 t2a[0][0] = y[jj][psiindex][im - 2][jm - 2];92 }93 jj = gp[pid].neighbors[UPRIGHT];94 if (jj != -1) {95 t2a[0][jm - 1] = y[jj][psiindex][im - 2][1];96 }97 jj = gp[pid].neighbors[DOWNLEFT];98 if (jj != -1) {99 t2a[im - 1][0] = y[jj][psiindex][1][jm - 2];100 }101 jj = gp[pid].neighbors[DOWNRIGHT];102 if (jj != -1) {103 t2a[im - 1][jm - 1] = y[jj][psiindex][1][1];104 }105 106 t2a = x[pid][psiindex];107 if (gp[pid].neighbors[UP] == -1) {108 jj = gp[pid].neighbors[LEFT];109 if (jj != -1) {110 t2a[0][0] = x[jj][psiindex][0][jm - 2];111 } else {112 jj = gp[pid].neighbors[DOWN];113 if (jj != -1) {114 t2a[im - 1][0] = x[jj][psiindex][1][0];115 }116 }117 jj = gp[pid].neighbors[RIGHT];118 if (jj != -1) {119 t2a[0][jm - 1] = x[jj][psiindex][0][1];120 } else {121 jj = gp[pid].neighbors[DOWN];122 if (jj != -1) {123 t2a[im - 1][jm - 1] = x[jj][psiindex][1][jm - 1];124 }125 }126 } else if (gp[pid].neighbors[DOWN] == -1) {127 jj = gp[pid].neighbors[LEFT];128 if (jj != -1) {129 t2a[im - 1][0] = x[jj][psiindex][im - 1][jm - 2];130 } else {131 jj = gp[pid].neighbors[UP];132 if (jj != -1) {133 t2a[0][0] = x[jj][psiindex][im - 2][0];134 }135 }136 jj = gp[pid].neighbors[RIGHT];137 if (jj != -1) {138 t2a[im - 1][jm - 1] = x[jj][psiindex][im - 1][1];139 } else {140 jj = gp[pid].neighbors[UP];141 if (jj != -1) {142 t2a[0][jm - 1] = x[jj][psiindex][im - 2][jm - 1];143 }144 }145 } else if (gp[pid].neighbors[LEFT] == -1) {146 jj = gp[pid].neighbors[UP];147 if (jj != -1) {148 t2a[0][0] = x[jj][psiindex][im - 2][0];149 }150 jj = gp[pid].neighbors[DOWN];151 if (jj != -1) {152 t2a[im - 1][0] = x[jj][psiindex][1][0];153 }154 } else if (gp[pid].neighbors[RIGHT] == -1) {155 jj = gp[pid].neighbors[UP];156 if (jj != -1) {157 t2a[0][jm - 1] = x[jj][psiindex][im - 2][jm - 1];158 }159 jj = gp[pid].neighbors[DOWN];160 if (jj != -1) {161 t2a[im - 1][jm - 1] = x[jj][psiindex][1][jm - 1];162 }163 }164 165 t2a = y[pid][psiindex];166 if (gp[pid].neighbors[UP] == -1) {167 jj = gp[pid].neighbors[LEFT];168 if (jj != -1) {169 t2a[0][0] = y[jj][psiindex][0][jm - 2];170 } else {171 jj = gp[pid].neighbors[DOWN];172 if (jj != -1) {173 t2a[im - 1][0] = y[jj][psiindex][1][0];174 }175 }176 jj = gp[pid].neighbors[RIGHT];177 if (jj != -1) {178 t2a[0][jm - 1] = y[jj][psiindex][0][1];179 } else {180 jj = gp[pid].neighbors[DOWN];181 if (jj != -1) {182 t2a[im - 1][jm - 1] = y[jj][psiindex][1][jm - 1];183 }184 }185 } else if (gp[pid].neighbors[DOWN] == -1) {186 jj = gp[pid].neighbors[LEFT];187 if (jj != -1) {188 t2a[im - 1][0] = y[jj][psiindex][im - 1][jm - 2];189 } else {190 jj = gp[pid].neighbors[UP];191 if (jj != -1) {192 t2a[0][0] = y[jj][psiindex][im - 2][0];193 }194 }195 jj = gp[pid].neighbors[RIGHT];196 if (jj != -1) {197 t2a[im - 1][jm - 1] = y[jj][psiindex][im - 1][1];198 } else {199 jj = gp[pid].neighbors[UP];200 if (jj != -1) {201 t2a[0][jm - 1] = y[jj][psiindex][im - 2][jm - 1];202 }203 }204 } else if (gp[pid].neighbors[LEFT] == -1) {205 jj = gp[pid].neighbors[UP];206 if (jj != -1) {207 t2a[0][0] = y[jj][psiindex][im - 2][0];208 }209 jj = gp[pid].neighbors[DOWN];210 if (jj != -1) {211 t2a[im - 1][0] = y[jj][psiindex][1][0];212 }213 } else if (gp[pid].neighbors[RIGHT] == -1) {214 jj = gp[pid].neighbors[UP];215 if (jj != -1) {216 t2a[0][jm - 1] = y[jj][psiindex][im - 2][jm - 1];217 }218 jj = gp[pid].neighbors[DOWN];219 if (jj != -1) {220 t2a[im - 1][jm - 1] = y[jj][psiindex][1][jm - 1];221 }222 }223 224 t2a = y[pid][psiindex];225 j = gp[pid].neighbors[UP];226 if (j != -1) {227 t1a = (double *) t2a[0];228 t1b = (double *) y[j][psiindex][im - 2];229 for (i = 1; i <= lastcol; i++) {230 t1a[i] = t1b[i];231 }232 }233 j = gp[pid].neighbors[DOWN];234 if (j != -1) {235 t1a = (double *) t2a[im - 1];236 t1b = (double *) y[j][psiindex][1];237 for (i = 1; i <= lastcol; i++) {238 t1a[i] = t1b[i];239 }240 }241 j = gp[pid].neighbors[LEFT];242 if (j != -1) {243 t2b = y[j][psiindex];244 for (i = 1; i <= lastrow; i++) {245 t2a[i][0] = t2b[i][jm - 2];246 }247 }248 j = gp[pid].neighbors[RIGHT];249 if (j != -1) {250 t2b = y[j][psiindex];251 for (i = 1; i <= lastrow; i++) {252 t2a[i][jm - 1] = t2b[i][1];253 }254 }255 256 t2a = x[pid][psiindex];257 j = gp[pid].neighbors[UP];258 if (j != -1) {259 t1a = (double *) t2a[0];260 t1b = (double *) x[j][psiindex][im - 2];261 for (i = 1; i <= lastcol; i++) {262 t1a[i] = t1b[i];263 }264 }265 j = gp[pid].neighbors[DOWN];266 if (j != -1) {267 t1a = (double *) t2a[im - 1];268 t1b = (double *) x[j][psiindex][1];269 for (i = 1; i <= lastcol; i++) {270 t1a[i] = t1b[i];271 }272 }273 j = gp[pid].neighbors[LEFT];274 if (j != -1) {275 t2b = x[j][psiindex];276 for (i = 1; i <= lastrow; i++) {277 t2a[i][0] = t2b[i][jm - 2];278 }279 }280 j = gp[pid].neighbors[RIGHT];281 if (j != -1) {282 t2b = x[j][psiindex];283 for (i = 1; i <= lastrow; i++) {284 t2a[i][jm - 1] = t2b[i][1];285 }286 }287 288 t2a = x[pid][psiindex];289 t2b = y[pid][psiindex];290 t2c = z[pid][psiindex];291 for (i = firstrow; i <= lastrow; i++) {292 ip1 = i + 1;293 im1 = i - 1;294 t1a = (double *) t2a[i];295 t1b = (double *) t2b[i];296 t1c = (double *) t2c[i];297 t1d = (double *) t2b[ip1];298 t1e = (double *) t2b[im1];299 t1f = (double *) t2a[ip1];300 t1g = (double *) t2a[im1];301 for (iindex = firstcol; iindex <= lastcol; iindex++) {302 indexp1 = iindex + 1;303 indexm1 = iindex - 1;304 f1 = (t1b[indexm1] + t1d[indexm1] - t1b[indexp1] - t1d[indexp1]) * (t1f[iindex] - t1a[iindex]);305 f2 = (t1e[indexm1] + t1b[indexm1] - t1e[indexp1] - t1b[indexp1]) * (t1a[iindex] - t1g[iindex]);306 f3 = (t1d[iindex] + t1d[indexp1] - t1e[iindex] - t1e[indexp1]) * (t1a[indexp1] - t1a[iindex]);307 f4 = (t1d[indexm1] + t1d[iindex] - t1e[indexm1] - t1e[iindex]) * (t1a[iindex] - t1a[indexm1]);308 f5 = (t1d[iindex] - t1b[indexp1]) * (t1f[indexp1] - t1a[iindex]);309 f6 = (t1b[indexm1] - t1e[iindex]) * (t1a[iindex] - t1g[indexm1]);310 f7 = (t1b[indexp1] - t1e[iindex]) * (t1g[indexp1] - t1a[iindex]);311 f8 = (t1d[iindex] - t1b[indexm1]) * (t1a[iindex] - t1f[indexm1]);312 313 t1c[iindex] = factjacob * (f1 + f2 + f3 + f4 + f5 + f6 + f7 + f8);314 315 }316 }317 318 if (gp[pid].neighbors[UP] == -1) {319 t1c = (double *) t2c[0];320 for (j = firstcol; j <= lastcol; j++) {321 t1c[j] = 0.0;322 }323 }324 if (gp[pid].neighbors[DOWN] == -1) {325 t1c = (double *) t2c[im - 1];326 for (j = firstcol; j <= lastcol; j++) {327 t1c[j] = 0.0;328 }329 }330 if (gp[pid].neighbors[LEFT] == -1) {331 for (j = firstrow; j <= lastrow; j++) {332 t2c[j][0] = 0.0;333 }334 }335 if (gp[pid].neighbors[RIGHT] == -1) {336 for (j = firstrow; j <= lastrow; j++) {337 t2c[j][jm - 1] = 0.0;338 }339 }340 341 } -
soft/giet_vm/applications/ocean/laplacalc.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /* Performs the laplacian calculation for a subblock */18 19 EXTERN_ENV20 21 #include <stdio.h>22 #include <math.h>23 24 #include "decs.h"25 26 void laplacalc(long procid, double ****x, double ****z, long psiindex, long firstrow, long lastrow, long firstcol, long lastcol)27 {28 long iindex;29 long indexp1;30 long indexm1;31 long ip1;32 long im1;33 long i;34 long j;35 double **t2a;36 double **t2b;37 double *t1a;38 double *t1b;39 double *t1c;40 double *t1d;41 42 t2a = (double **) x[procid][psiindex];43 j = gp[procid].neighbors[UP];44 if (j != -1) {45 t1a = (double *) t2a[0];46 t1b = (double *) x[j][psiindex][im - 2];47 for (i = 1; i <= lastcol; i++) {48 t1a[i] = t1b[i];49 }50 }51 j = gp[procid].neighbors[DOWN];52 if (j != -1) {53 t1a = (double *) t2a[im - 1];54 t1b = (double *) x[j][psiindex][1];55 for (i = 1; i <= lastcol; i++) {56 t1a[i] = t1b[i];57 }58 }59 j = gp[procid].neighbors[LEFT];60 if (j != -1) {61 t2b = (double **) x[j][psiindex];62 for (i = 1; i <= lastrow; i++) {63 t2a[i][0] = t2b[i][jm - 2];64 }65 }66 j = gp[procid].neighbors[RIGHT];67 if (j != -1) {68 t2b = (double **) x[j][psiindex];69 for (i = 1; i <= lastrow; i++) {70 t2a[i][jm - 1] = t2b[i][1];71 }72 }73 74 t2a = (double **) x[procid][psiindex];75 t2b = (double **) z[procid][psiindex];76 for (i = firstrow; i <= lastrow; i++) {77 ip1 = i + 1;78 im1 = i - 1;79 t1a = (double *) t2a[i];80 t1b = (double *) t2b[i];81 t1c = (double *) t2a[ip1];82 t1d = (double *) t2a[im1];83 for (iindex = firstcol; iindex <= lastcol; iindex++) {84 indexp1 = iindex + 1;85 indexm1 = iindex - 1;86 t1b[iindex] = factlap * (t1c[iindex] + t1d[iindex] + t1a[indexp1] + t1a[indexm1] - 4. * t1a[iindex]);87 }88 }89 90 if (gp[procid].neighbors[UP] == -1) {91 t1b = (double *) t2b[0];92 for (j = firstcol; j <= lastcol; j++) {93 t1b[j] = 0.0;94 }95 }96 if (gp[procid].neighbors[DOWN] == -1) {97 t1b = (double *) t2b[im - 1];98 for (j = firstcol; j <= lastcol; j++) {99 t1b[j] = 0.0;100 }101 }102 if (gp[procid].neighbors[LEFT] == -1) {103 for (j = firstrow; j <= lastrow; j++) {104 t2b[j][0] = 0.0;105 }106 }107 if (gp[procid].neighbors[RIGHT] == -1) {108 for (j = firstrow; j <= lastrow; j++) {109 t2b[j][jm - 1] = 0.0;110 }111 }112 113 } -
soft/giet_vm/applications/ocean/linkup.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /* Set all the pointers to the proper locations for the q_multi and18 rhs_multi data structures */19 20 EXTERN_ENV21 22 #include "decs.h"23 24 void link_all()25 {26 long i;27 long j;28 29 for (j = 0; j < nprocs; j++) {30 linkup(psium[j]);31 linkup(psilm[j]);32 linkup(psib[j]);33 linkup(ga[j]);34 linkup(gb[j]);35 linkup(work2[j]);36 linkup(work3[j]);37 linkup(work6[j]);38 linkup(tauz[j]);39 linkup(oldga[j]);40 linkup(oldgb[j]);41 for (i = 0; i <= 1; i++) {42 linkup(psi[j][i]);43 linkup(psim[j][i]);44 linkup(work1[j][i]);45 linkup(work4[j][i]);46 linkup(work5[j][i]);47 linkup(work7[j][i]);48 linkup(temparray[j][i]);49 }50 }51 link_multi();52 }53 54 void linkup(double **row_ptr)55 {56 long i;57 double *a;58 double **row;59 double **y;60 long x_part;61 long y_part;62 63 x_part = (jm - 2) / xprocs + 2;64 y_part = (im - 2) / yprocs + 2;65 row = row_ptr;66 y = row + y_part;67 a = (double *) y;68 for (i = 0; i < y_part; i++) {69 *row = (double *) a;70 row++;71 a += x_part;72 }73 }74 75 void link_multi()76 {77 long i;78 long j;79 long l;80 double *a;81 double **row;82 double **y;83 unsigned long z;84 unsigned long zz;85 long x_part;86 long y_part;87 unsigned long d_size;88 89 z = ((unsigned long) q_multi + nprocs * sizeof(double ***));90 91 if (nprocs % 2 == 1) { /* To make sure that the actual data92 starts double word aligned, add an extra93 pointer */94 z += sizeof(double ***);95 }96 97 d_size = numlev * sizeof(double **);98 if (numlev % 2 == 1) { /* To make sure that the actual data99 starts double word aligned, add an extra100 pointer */101 d_size += sizeof(double **);102 }103 for (i = 0; i < numlev; i++) {104 d_size += ((imx[i] - 2) / yprocs + 2) * ((jmx[i] - 2) / xprocs + 2) * sizeof(double) + ((imx[i] - 2) / yprocs + 2) * sizeof(double *);105 }106 for (i = 0; i < nprocs; i++) {107 q_multi[i] = (double ***) z;108 z += d_size;109 }110 for (j = 0; j < nprocs; j++) {111 zz = (unsigned long) q_multi[j];112 zz += numlev * sizeof(double **);113 if (numlev % 2 == 1) { /* To make sure that the actual data114 starts double word aligned, add an extra115 pointer */116 zz += sizeof(double **);117 }118 for (i = 0; i < numlev; i++) {119 d_size = ((imx[i] - 2) / yprocs + 2) * ((jmx[i] - 2) / xprocs + 2) * sizeof(double) + ((imx[i] - 2) / yprocs + 2) * sizeof(double *);120 q_multi[j][i] = (double **) zz;121 zz += d_size;122 }123 }124 125 for (l = 0; l < numlev; l++) {126 x_part = (jmx[l] - 2) / xprocs + 2;127 y_part = (imx[l] - 2) / yprocs + 2;128 for (j = 0; j < nprocs; j++) {129 row = q_multi[j][l];130 y = row + y_part;131 a = (double *) y;132 for (i = 0; i < y_part; i++) {133 *row = (double *) a;134 row++;135 a += x_part;136 }137 }138 }139 140 z = ((unsigned long) rhs_multi + nprocs * sizeof(double ***));141 if (nprocs % 2 == 1) { /* To make sure that the actual data142 starts double word aligned, add an extra143 pointer */144 z += sizeof(double ***);145 }146 147 d_size = numlev * sizeof(double **);148 if (numlev % 2 == 1) { /* To make sure that the actual data149 starts double word aligned, add an extra150 pointer */151 d_size += sizeof(double **);152 }153 for (i = 0; i < numlev; i++) {154 d_size += ((imx[i] - 2) / yprocs + 2) * ((jmx[i] - 2) / xprocs + 2) * sizeof(double) + ((imx[i] - 2) / yprocs + 2) * sizeof(double *);155 }156 for (i = 0; i < nprocs; i++) {157 rhs_multi[i] = (double ***) z;158 z += d_size;159 }160 for (j = 0; j < nprocs; j++) {161 zz = (unsigned long) rhs_multi[j];162 zz += numlev * sizeof(double **);163 if (numlev % 2 == 1) { /* To make sure that the actual data164 starts double word aligned, add an extra165 pointer */166 zz += sizeof(double **);167 }168 for (i = 0; i < numlev; i++) {169 d_size = ((imx[i] - 2) / yprocs + 2) * ((jmx[i] - 2) / xprocs + 2) * sizeof(double) + ((imx[i] - 2) / yprocs + 2) * sizeof(double *);170 rhs_multi[j][i] = (double **) zz;171 zz += d_size;172 }173 }174 175 for (l = 0; l < numlev; l++) {176 x_part = (jmx[l] - 2) / xprocs + 2;177 y_part = (imx[l] - 2) / yprocs + 2;178 for (j = 0; j < nprocs; j++) {179 row = rhs_multi[j][l];180 y = row + y_part;181 a = (double *) y;182 for (i = 0; i < y_part; i++) {183 *row = (double *) a;184 row++;185 a += x_part;186 }187 }188 }189 190 } -
soft/giet_vm/applications/ocean/main.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /*************************************************************************/18 /* */19 /* SPLASH Ocean Code */20 /* */21 /* This application studies the role of eddy and boundary currents in */22 /* influencing large-scale ocean movements. This implementation uses */23 /* dynamically allocated four-dimensional arrays for grid data storage. */24 /* */25 /* Command line options: */26 /* */27 /* -mM : Simulate MxM ocean. M must be (power of 2) +2. */28 /* -nN : N = number of threads. N must be power of 2. */29 /* -eE : E = error tolerance for iterative relaxation. */30 /* -rR : R = distance between grid points in meters. */31 /* -tT : T = timestep in seconds. */32 /* -s : Print timing statistics. */33 /* -o : Print out relaxation residual values. */34 /* -h : Print out command line options. */35 /* */36 /* Default: OCEAN -m130 -n1 -e1e-7 -r20000.0 -t28800.0 */37 /* */38 /* NOTE: This code works under both the FORK and SPROC models. */39 /* */40 /*************************************************************************/41 42 MAIN_ENV43 44 #define DEFAULT_M 51445 #define DEFAULT_N 446 #define DEFAULT_E 1e-747 #define DEFAULT_T 28800.048 #define DEFAULT_R 20000.049 #define UP 050 #define DOWN 151 #define LEFT 252 #define RIGHT 353 #define UPLEFT 454 #define UPRIGHT 555 #define DOWNLEFT 656 #define DOWNRIGHT 757 #define PAGE_SIZE 409658 59 #include <stdio.h>60 #include <math.h>61 #include <stdlib.h>62 63 #include "decs.h"64 65 struct multi_struct *multi;66 struct global_struct *global;67 struct locks_struct *locks;68 struct bars_struct *bars;69 70 struct Global_Private *main_gp;71 double ****main_psi;72 double ****main_psim;73 double ***main_psium;74 double ***main_psilm;75 double ***main_psib;76 double ***main_ga;77 double ***main_gb;78 double ****main_work1;79 double ***main_work2;80 double ***main_work3;81 double ****main_work4;82 double ****main_work5;83 double ***main_work6;84 double ****main_work7;85 double ***main_oldga;86 double ***main_oldgb;87 double ****main_q_multi;88 double ****main_rhs_multi;89 double ****temparray;90 double ***tauz;91 long *main_imx;92 long *main_jmx;93 94 long nprocs = DEFAULT_N;95 const double h1 = 1000.0;96 const double h3 = 4000.0;97 const double h = 5000.0;98 const double lf = -5.12e11;99 double res = DEFAULT_R;100 double dtau = DEFAULT_T;101 const double f0 = 8.3e-5;102 const double beta = 2.0e-11;103 const double gpr = 0.02;104 double ysca;105 long oim;106 long jmm1;107 double tolerance = DEFAULT_E;108 const double pi = 3.141592653589793;109 const double t0 = 0.5e-4;110 const double outday0 = 1.0;111 const double outday1 = 2.0;112 const double outday2 = 2.0;113 const double outday3 = 2.0;114 const double maxwork = 10000.0;115 double factjacob;116 double factlap;117 118 //TODO : répliquer ça :119 double *main_lev_res;120 double *main_lev_tol;121 double *main_i_int_coeff;122 double *main_j_int_coeff;123 long *main_xpts_per_proc;124 long *main_ypts_per_proc;125 long main_xprocs;126 long main_yprocs;127 long main_numlev;128 double main_eig2;129 long main_im = DEFAULT_M;130 long main_jm;131 132 long minlevel;133 long do_stats = 1;134 long do_output = 0;135 long *ids_procs;136 137 138 __attribute__ ((constructor)) int main(int argc, char *argv[])139 {140 long i;141 long j;142 long k;143 long x_part;144 long y_part;145 long d_size;146 long itemp;147 long jtemp;148 double procsqrt;149 long temp = 0;150 double min_total;151 double max_total;152 double avg_total;153 double avg_wait;154 double max_wait;155 double min_wait;156 double min_multi;157 double max_multi;158 double avg_multi;159 double min_frac;160 double max_frac;161 double avg_frac;162 long imax_wait;163 long imin_wait;164 long ch;165 unsigned long long computeend;166 unsigned long long start;167 im = main_im;168 169 CLOCK(start);170 171 while ((ch = getopt(argc, argv, "m:n:e:r:t:soh")) != -1) {172 switch (ch) {173 case 'm':174 im = atoi(optarg);175 if (log_2(im - 2) == -1) {176 printerr("Grid must be ((power of 2)+2) in each dimension\n");177 exit(-1);178 }179 break;180 case 'n':181 nprocs = atoi(optarg);182 if (nprocs < 1) {183 printerr("N must be >= 1\n");184 exit(-1);185 }186 if (log_2(nprocs) == -1) {187 printerr("N must be a power of 2\n");188 exit(-1);189 }190 break;191 case 'e':192 tolerance = atof(optarg);193 break;194 case 'r':195 res = atof(optarg);196 break;197 case 't':198 dtau = atof(optarg);199 break;200 case 's':201 do_stats = !do_stats;202 break;203 case 'o':204 do_output = !do_output;205 break;206 case 'h':207 printf("Usage: ocean <options>\n\n");208 printf("options:\n");209 printf(" -mM : Simulate MxM ocean. M must be (power of 2) + 2 (default = %d).\n", DEFAULT_M);210 printf(" -nN : N = number of threads. N must be power of 2 (default = %d).\n", DEFAULT_N);211 printf(" -eE : E = error tolerance for iterative relaxation (default = %f).\n", DEFAULT_E);212 printf(" -rR : R = distance between grid points in meters (default = %f).\n", DEFAULT_R);213 printf(" -tT : T = timestep in seconds (default = %f).\n", DEFAULT_T);214 printf(" -s : Print timing statistics.\n");215 printf(" -o : Print out relaxation residual values.\n");216 printf(" -h : Print out command line options.\n\n");217 exit(0);218 break;219 }220 }221 222 MAIN_INITENV223 224 jm = im;225 226 printf("\n");227 printf("Ocean simulation with W-cycle multigrid solver\n");228 printf(" Processors : %1ld\n", nprocs);229 printf(" Grid size : %1ld x %1ld\n", im, jm);230 printf(" Grid resolution (meters) : %0.2f\n", res);231 printf(" Time between relaxations (seconds) : %0.0f\n", dtau);232 printf(" Error tolerance : %0.7g\n", tolerance);233 printf("\n");234 235 xprocs = 0;236 yprocs = 0;237 238 procsqrt = sqrt((double) nprocs);239 j = (long) procsqrt;240 241 while ((xprocs == 0) && (j > 0)) {242 k = nprocs / j;243 if (k * j == nprocs) {244 if (k > j) {245 xprocs = j;246 yprocs = k;247 } else {248 xprocs = k;249 yprocs = j;250 }251 }252 j--;253 }254 255 if (xprocs == 0) {256 printerr("Could not find factors for subblocking\n");257 exit(-1);258 }259 260 minlevel = 0;261 itemp = 1;262 jtemp = 1;263 numlev = 0;264 minlevel = 0;265 266 while (itemp < (im - 2)) {267 itemp = itemp * 2;268 jtemp = jtemp * 2;269 if ((itemp / yprocs > 1) && (jtemp / xprocs > 1)) {270 numlev++;271 }272 }273 274 if (numlev == 0) {275 printerr("Must have at least 2 grid points per processor in each dimension\n");276 exit(-1);277 }278 279 main_imx = (long *) G_MALLOC(numlev * sizeof(long), 0);280 main_jmx = (long *) G_MALLOC(numlev * sizeof(long), 0);281 main_lev_res = (double *) G_MALLOC(numlev * sizeof(double), 0);282 main_lev_tol = (double *) G_MALLOC(numlev * sizeof(double), 0);283 main_i_int_coeff = (double *) G_MALLOC(numlev * sizeof(double), 0);284 main_j_int_coeff = (double *) G_MALLOC(numlev * sizeof(double), 0);285 main_xpts_per_proc = (long *) G_MALLOC(numlev * sizeof(long), 0);286 main_ypts_per_proc = (long *) G_MALLOC(numlev * sizeof(long), 0);287 ids_procs = (long *) G_MALLOC(nprocs * sizeof(long), 0);288 289 imx = main_imx;290 jmx = main_jmx;291 lev_res = main_lev_res;292 lev_tol = main_lev_tol;293 i_int_coeff = main_i_int_coeff;294 j_int_coeff = main_j_int_coeff;295 xpts_per_proc = main_xpts_per_proc;296 ypts_per_proc = main_ypts_per_proc;297 298 for (i = 0; i < nprocs; i++) {299 ids_procs[i] = i;300 }301 302 imx[numlev - 1] = im;303 jmx[numlev - 1] = jm;304 lev_res[numlev - 1] = res;305 lev_tol[numlev - 1] = tolerance;306 307 for (i = numlev - 2; i >= 0; i--) {308 imx[i] = ((imx[i + 1] - 2) / 2) + 2;309 jmx[i] = ((jmx[i + 1] - 2) / 2) + 2;310 lev_res[i] = lev_res[i + 1] * 2;311 }312 313 for (i = 0; i < numlev; i++) {314 xpts_per_proc[i] = (jmx[i] - 2) / xprocs;315 ypts_per_proc[i] = (imx[i] - 2) / yprocs;316 }317 for (i = numlev - 1; i >= 0; i--) {318 if ((xpts_per_proc[i] < 2) || (ypts_per_proc[i] < 2)) {319 minlevel = i + 1;320 break;321 }322 }323 324 for (i = 0; i < numlev; i++) {325 temp += imx[i];326 }327 temp = 0;328 j = 0;329 for (k = 0; k < numlev; k++) {330 for (i = 0; i < imx[k]; i++) {331 j++;332 temp += jmx[k];333 }334 }335 336 d_size = nprocs * sizeof(double ***);337 main_psi = (double ****) G_MALLOC(d_size, 0);338 main_psim = (double ****) G_MALLOC(d_size, 0);339 main_work1 = (double ****) G_MALLOC(d_size, 0);340 main_work4 = (double ****) G_MALLOC(d_size, 0);341 main_work5 = (double ****) G_MALLOC(d_size, 0);342 main_work7 = (double ****) G_MALLOC(d_size, 0);343 temparray = (double ****) G_MALLOC(d_size, -1);344 345 psi = main_psi;346 psim = main_psim;347 work1 = main_work1;348 work4 = main_work4;349 work5 = main_work5;350 work7 = main_work7;351 352 d_size = 2 * sizeof(double **);353 for (i = 0; i < nprocs; i++) {354 psi[i] = (double ***) G_MALLOC(d_size, i);355 psim[i] = (double ***) G_MALLOC(d_size, i);356 work1[i] = (double ***) G_MALLOC(d_size, i);357 work4[i] = (double ***) G_MALLOC(d_size, i);358 work5[i] = (double ***) G_MALLOC(d_size, i);359 work7[i] = (double ***) G_MALLOC(d_size, i);360 temparray[i] = (double ***) G_MALLOC(d_size, i);361 }362 363 d_size = nprocs * sizeof(double **);364 main_psium = (double ***) G_MALLOC(d_size, 0);365 main_psilm = (double ***) G_MALLOC(d_size, 0);366 main_psib = (double ***) G_MALLOC(d_size, 0);367 main_ga = (double ***) G_MALLOC(d_size, 0);368 main_gb = (double ***) G_MALLOC(d_size, 0);369 main_work2 = (double ***) G_MALLOC(d_size, 0);370 main_work3 = (double ***) G_MALLOC(d_size, 0);371 main_work6 = (double ***) G_MALLOC(d_size, 0);372 tauz = (double ***) G_MALLOC(d_size, 0);373 main_oldga = (double ***) G_MALLOC(d_size, 0);374 main_oldgb = (double ***) G_MALLOC(d_size, 0);375 376 psium = main_psium;377 psilm = main_psilm;378 psib = main_psib;379 ga = main_ga;380 gb = main_gb;381 work2 = main_work2;382 work3 = main_work3;383 work6 = main_work6;384 oldga = main_oldga;385 oldgb = main_oldgb;386 387 main_gp = (struct Global_Private *) G_MALLOC((nprocs + 1) * sizeof(struct Global_Private), -1);388 gp = main_gp;389 390 for (i = 0; i < nprocs; i++) {391 gp[i].pad = (char *) G_MALLOC(PAGE_SIZE * sizeof(char), i);392 gp[i].rel_num_x = (long *) G_MALLOC(numlev * sizeof(long), i);393 gp[i].rel_num_y = (long *) G_MALLOC(numlev * sizeof(long), i);394 gp[i].eist = (long *) G_MALLOC(numlev * sizeof(long), i);395 gp[i].ejst = (long *) G_MALLOC(numlev * sizeof(long), i);396 gp[i].oist = (long *) G_MALLOC(numlev * sizeof(long), i);397 gp[i].ojst = (long *) G_MALLOC(numlev * sizeof(long), i);398 gp[i].rlist = (long *) G_MALLOC(numlev * sizeof(long), i);399 gp[i].rljst = (long *) G_MALLOC(numlev * sizeof(long), i);400 gp[i].rlien = (long *) G_MALLOC(numlev * sizeof(long), i);401 gp[i].rljen = (long *) G_MALLOC(numlev * sizeof(long), i);402 gp[i].neighbors = (long *) G_MALLOC(8 * sizeof(long), i);403 gp[i].rownum = (long *) G_MALLOC(sizeof(long), i);404 gp[i].colnum = (long *) G_MALLOC(sizeof(long), i);405 gp[i].lpid = (long *) G_MALLOC(sizeof(long), i);406 gp[i].multi_time = (double *) G_MALLOC(sizeof(double), i);407 gp[i].total_time = (double *) G_MALLOC(sizeof(double), i);408 gp[i].sync_time = (double *) G_MALLOC(sizeof(double), i);409 gp[i].process_time = (double *) G_MALLOC(sizeof(double), i);410 gp[i].step_start = (double *) G_MALLOC(sizeof(double), i);411 gp[i].steps_time = (double *) G_MALLOC(10 * sizeof(double), i);412 *gp[i].multi_time = 0;413 *gp[i].total_time = 0;414 *gp[i].sync_time = 0;415 *gp[i].process_time = 0;416 *gp[i].lpid = i;417 }418 419 subblock();420 421 x_part = (jm - 2) / xprocs + 2;422 y_part = (im - 2) / yprocs + 2;423 424 d_size = x_part * y_part * sizeof(double) + y_part * sizeof(double *);425 426 global = (struct global_struct *) G_MALLOC(sizeof(struct global_struct), -1);427 428 for (i = 0; i < nprocs; i++) {429 psi[i][0] = (double **) G_MALLOC(d_size, i);430 psi[i][1] = (double **) G_MALLOC(d_size, i);431 psim[i][0] = (double **) G_MALLOC(d_size, i);432 psim[i][1] = (double **) G_MALLOC(d_size, i);433 psium[i] = (double **) G_MALLOC(d_size, i);434 psilm[i] = (double **) G_MALLOC(d_size, i);435 psib[i] = (double **) G_MALLOC(d_size, i);436 ga[i] = (double **) G_MALLOC(d_size, i);437 gb[i] = (double **) G_MALLOC(d_size, i);438 work1[i][0] = (double **) G_MALLOC(d_size, i);439 work1[i][1] = (double **) G_MALLOC(d_size, i);440 work2[i] = (double **) G_MALLOC(d_size, i);441 work3[i] = (double **) G_MALLOC(d_size, i);442 work4[i][0] = (double **) G_MALLOC(d_size, i);443 work4[i][1] = (double **) G_MALLOC(d_size, i);444 work5[i][0] = (double **) G_MALLOC(d_size, i);445 work5[i][1] = (double **) G_MALLOC(d_size, i);446 work6[i] = (double **) G_MALLOC(d_size, i);447 work7[i][0] = (double **) G_MALLOC(d_size, i);448 work7[i][1] = (double **) G_MALLOC(d_size, i);449 temparray[i][0] = (double **) G_MALLOC(d_size, i);450 temparray[i][1] = (double **) G_MALLOC(d_size, i);451 tauz[i] = (double **) G_MALLOC(d_size, i);452 oldga[i] = (double **) G_MALLOC(d_size, i);453 oldgb[i] = (double **) G_MALLOC(d_size, i);454 }455 456 oim = im;457 //f = (double *) G_MALLOC(oim*sizeof(double), 0);458 multi = (struct multi_struct *) G_MALLOC(sizeof(struct multi_struct), -1);459 460 d_size = numlev * sizeof(double **);461 if (numlev % 2 == 1) { /* To make sure that the actual data462 starts double word aligned, add an extra463 pointer */464 d_size += sizeof(double **);465 }466 for (i = 0; i < numlev; i++) {467 d_size += ((imx[i] - 2) / yprocs + 2) * ((jmx[i] - 2) / xprocs + 2) * sizeof(double) + ((imx[i] - 2) / yprocs + 2) * sizeof(double *);468 }469 470 d_size *= nprocs;471 472 if (nprocs % 2 == 1) { /* To make sure that the actual data473 starts double word aligned, add an extra474 pointer */475 d_size += sizeof(double ***);476 }477 478 d_size += nprocs * sizeof(double ***);479 main_q_multi = (double ****) G_MALLOC(d_size, -1);480 main_rhs_multi = (double ****) G_MALLOC(d_size, -1);481 q_multi = main_q_multi;482 rhs_multi = main_rhs_multi;483 484 485 locks = (struct locks_struct *) G_MALLOC(sizeof(struct locks_struct), -1);486 bars = (struct bars_struct *) G_MALLOC(sizeof(struct bars_struct), -1);487 488 LOCKINIT(locks->idlock)489 LOCKINIT(locks->psiailock)490 LOCKINIT(locks->psibilock)491 LOCKINIT(locks->donelock)492 LOCKINIT(locks->error_lock)493 LOCKINIT(locks->bar_lock)494 #if defined(MULTIPLE_BARRIERS)495 BARINIT(bars->iteration, nprocs)496 BARINIT(bars->gsudn, nprocs)497 BARINIT(bars->p_setup, nprocs)498 BARINIT(bars->p_redph, nprocs)499 BARINIT(bars->p_soln, nprocs)500 BARINIT(bars->p_subph, nprocs)501 BARINIT(bars->sl_prini, nprocs)502 BARINIT(bars->sl_psini, nprocs)503 BARINIT(bars->sl_onetime, nprocs)504 BARINIT(bars->sl_phase_1, nprocs)505 BARINIT(bars->sl_phase_2, nprocs)506 BARINIT(bars->sl_phase_3, nprocs)507 BARINIT(bars->sl_phase_4, nprocs)508 BARINIT(bars->sl_phase_5, nprocs)509 BARINIT(bars->sl_phase_6, nprocs)510 BARINIT(bars->sl_phase_7, nprocs)511 BARINIT(bars->sl_phase_8, nprocs)512 BARINIT(bars->sl_phase_9, nprocs)513 BARINIT(bars->sl_phase_10, nprocs)514 BARINIT(bars->error_barrier, nprocs)515 #else516 BARINIT(bars->barrier, nprocs)517 #endif518 link_all();519 520 multi->err_multi = 0.0;521 i_int_coeff[0] = 0.0;522 j_int_coeff[0] = 0.0;523 524 for (i = 0; i < numlev; i++) {525 i_int_coeff[i] = 1.0 / (imx[i] - 1);526 j_int_coeff[i] = 1.0 / (jmx[i] - 1);527 }528 529 /*530 initialize constants and variables531 532 id is a global shared variable that has fetch-and-add operations533 performed on it by processes to obtain their pids.534 */535 536 //global->id = 0;537 global->trackstart = 0;538 global->psibi = 0.0;539 540 factjacob = -1. / (12. * res * res);541 factlap = 1. / (res * res);542 eig2 = -h * f0 * f0 / (h1 * h3 * gpr);543 544 jmm1 = jm - 1;545 ysca = ((double) jmm1) * res;546 im = (imx[numlev - 1] - 2) / yprocs + 2;547 jm = (jmx[numlev - 1] - 2) / xprocs + 2;548 549 main_im = im;550 main_jm = jm;551 main_numlev = numlev;552 main_xprocs = xprocs;553 main_yprocs = yprocs;554 main_eig2 = eig2;555 556 if (do_output) {557 printf(" MULTIGRID OUTPUTS\n");558 }559 560 CREATE(slave, nprocs);561 WAIT_FOR_END(nprocs);562 CLOCK(computeend);563 564 printf("\n");565 printf(" PROCESS STATISTICS\n");566 printf(" Total Multigrid Multigrid\n");567 printf(" Proc Time Time Fraction\n");568 printf(" 0 %15.0f %15.0f %10.3f\n", (*gp[0].total_time), (*gp[0].multi_time), (*gp[0].multi_time) / (*gp[0].total_time));569 570 if (do_stats) {571 double phase_time;572 min_total = max_total = avg_total = (*gp[0].total_time);573 min_multi = max_multi = avg_multi = (*gp[0].multi_time);574 min_frac = max_frac = avg_frac = (*gp[0].multi_time) / (*gp[0].total_time);575 avg_wait = *gp[0].sync_time;576 max_wait = *gp[0].sync_time;577 min_wait = *gp[0].sync_time;578 imax_wait = 0;579 imin_wait = 0;580 581 for (i = 1; i < nprocs; i++) {582 if ((*gp[i].total_time) > max_total) {583 max_total = (*gp[i].total_time);584 }585 if ((*gp[i].total_time) < min_total) {586 min_total = (*gp[i].total_time);587 }588 if ((*gp[i].multi_time) > max_multi) {589 max_multi = (*gp[i].multi_time);590 }591 if ((*gp[i].multi_time) < min_multi) {592 min_multi = (*gp[i].multi_time);593 }594 if ((*gp[i].multi_time) / (*gp[i].total_time) > max_frac) {595 max_frac = (*gp[i].multi_time) / (*gp[i].total_time);596 }597 if ((*gp[i].multi_time) / (*gp[i].total_time) < min_frac) {598 min_frac = (*gp[i].multi_time) / (*gp[i].total_time);599 }600 avg_total += (*gp[i].total_time);601 avg_multi += (*gp[i].multi_time);602 avg_frac += (*gp[i].multi_time) / (*gp[i].total_time);603 avg_wait += (*gp[i].sync_time);604 if (max_wait < (*gp[i].sync_time)) {605 max_wait = (*gp[i].sync_time);606 imax_wait = i;607 }608 if (min_wait > (*gp[i].sync_time)) {609 min_wait = (*gp[i].sync_time);610 imin_wait = i;611 }612 }613 avg_total = avg_total / nprocs;614 avg_multi = avg_multi / nprocs;615 avg_frac = avg_frac / nprocs;616 avg_wait = avg_wait / nprocs;617 for (i = 1; i < nprocs; i++) {618 printf(" %3ld %15.0f %15.0f %10.3f\n", i, (*gp[i].total_time), (*gp[i].multi_time), (*gp[i].multi_time) / (*gp[i].total_time));619 }620 printf(" Avg %15.0f %15.0f %10.3f\n", avg_total, avg_multi, avg_frac);621 printf(" Min %15.0f %15.0f %10.3f\n", min_total, min_multi, min_frac);622 printf(" Max %15.0f %15.0f %10.3f\n", max_total, max_multi, max_frac);623 624 printf("\n\n Sync\n");625 printf(" Proc Time Fraction\n");626 for (i = 0; i < nprocs; i++) {627 printf(" %ld %u %f\n", i, (unsigned int)*gp[i].sync_time, *gp[i].sync_time / ((long)(*gp[i].total_time)));628 }629 630 printf(" Avg %f %f\n", avg_wait, (double) avg_wait / (long) (computeend - global->trackstart));631 printf(" Min %f %f\n", min_wait, (double) min_wait / (long) (*gp[imin_wait].total_time));632 printf(" Max %f %f\n", max_wait, (double) max_wait / (long) (*gp[imax_wait].total_time));633 634 printf("\nPhases Avg :\n\n");635 for (i = 0; i < 10; i++) {636 phase_time = 0;637 for (j = 0; j < nprocs; j++) {638 phase_time += gp[j].steps_time[i];639 }640 phase_time /= (double) nprocs;641 printf(" %d = %f (fraction %f)\n", i + 1, phase_time, phase_time / (long) (computeend - global->trackstart));642 }643 }644 printf("\n");645 646 global->starttime = start;647 printf(" TIMING INFORMATION\n");648 printf("[NPROCS] : %16ld\n", nprocs);649 printf("[START1] : %16llu\n", global->starttime);650 printf("[START2] : %16llu\n", global->trackstart);651 printf("[END] : %16llu\n", computeend);652 printf("[TOTAL] : %16llu\n", computeend - global->starttime); // With init653 printf("[PARALLEL_COMPUTE] : %16llu\n", computeend - global->trackstart); // Without init654 printf("(excludes first timestep)\n");655 printf("\n");656 657 MAIN_END658 659 }660 661 long log_2(long number)662 {663 long cumulative = 1;664 long out = 0;665 long done = 0;666 667 while ((cumulative < number) && (!done) && (out < 50)) {668 if (cumulative == number) {669 done = 1;670 } else {671 cumulative = cumulative * 2;672 out++;673 }674 }675 676 if (cumulative == number) {677 return (out);678 } else {679 return (-1);680 }681 }682 683 void printerr(char *s)684 {685 fprintf(stderr, "ERROR: %s\n", s);686 }687 688 689 // Local Variables:690 // tab-width: 4691 // c-basic-offset: 4692 // c-file-offsets:((innamespace . 0)(inline-open . 0))693 // indent-tabs-mode: nil694 // End:695 696 // vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4 -
soft/giet_vm/applications/ocean/multi.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /* Shared memory implementation of the multigrid method18 Implementation uses red-black gauss-seidel relaxation19 iterations, w cycles, and the method of half-injection for20 residual computation. */21 22 EXTERN_ENV23 24 #include <stdio.h>25 #include <math.h>26 #include <stdlib.h>27 28 #include "decs.h"29 30 /* perform multigrid (w cycles) */31 void multig(long my_id)32 {33 long iter;34 double wu;35 double errp;36 long m;37 long flag;38 long k;39 long my_num;40 double wmax;41 double local_err;42 double red_local_err;43 double black_local_err;44 double g_error;45 46 flag = 0;47 iter = 0;48 m = numlev - 1;49 wmax = maxwork;50 my_num = my_id;51 wu = 0.0;52 53 k = m;54 g_error = 1.0e30;55 while (!flag) {56 errp = g_error;57 iter++;58 if (my_num == MASTER) {59 multi->err_multi = 0.0;60 }61 62 /* barrier to make sure all procs have finished intadd or rescal */63 /* before proceeding with relaxation */64 #if defined(MULTIPLE_BARRIERS)65 BARRIER(bars->error_barrier, nprocs)66 #else67 BARRIER(bars->barrier, nprocs)68 #endif69 copy_black(k, my_num);70 71 relax(k, &red_local_err, RED_ITER, my_num);72 73 /* barrier to make sure all red computations have been performed */74 #if defined(MULTIPLE_BARRIERS)75 BARRIER(bars->error_barrier, nprocs)76 #else77 BARRIER(bars->barrier, nprocs)78 #endif79 copy_red(k, my_num);80 81 relax(k, &black_local_err, BLACK_ITER, my_num);82 83 /* compute max local error from red_local_err and black_local_err */84 85 if (red_local_err > black_local_err) {86 local_err = red_local_err;87 } else {88 local_err = black_local_err;89 }90 91 /* update the global error if necessary */92 93 LOCK(locks->error_lock)94 if (local_err > multi->err_multi) {95 multi->err_multi = local_err;96 }97 UNLOCK(locks->error_lock)98 99 /* a single relaxation sweep at the finest level is one unit of */100 /* work */101 wu += pow((double) 4.0, (double) k - m);102 103 /* barrier to make sure all processors have checked local error */104 #if defined(MULTIPLE_BARRIERS)105 BARRIER(bars->error_barrier, nprocs)106 #else107 BARRIER(bars->barrier, nprocs)108 #endif109 g_error = multi->err_multi;110 111 /* barrier to make sure master does not cycle back to top of loop */112 /* and reset global->err before we read it and decide what to do */113 #if defined(MULTIPLE_BARRIERS)114 BARRIER(bars->error_barrier, nprocs)115 #else116 BARRIER(bars->barrier, nprocs)117 #endif118 if (g_error >= lev_tol[k]) {119 if (wu > wmax) {120 /* max work exceeded */121 fprintf(stderr, "ERROR: Maximum work limit %0.5f exceeded\n", wmax);122 exit(-1);123 } else {124 /* if we have not converged */125 if ((k != 0) && (g_error / errp >= 0.6) && (k > minlevel)) {126 /* if need to go to coarser grid */127 128 copy_borders(k, my_num);129 copy_rhs_borders(k, my_num);130 131 /* This bar is needed because the routine rescal uses the neighbor's132 border points to compute s4. We must ensure that the neighbor's133 border points have been written before we try computing the new134 rescal values */135 136 #if defined(MULTIPLE_BARRIERS)137 BARRIER(bars->error_barrier, nprocs)138 #else139 BARRIER(bars->barrier, nprocs)140 #endif141 rescal(k, my_num);142 143 /* transfer residual to rhs of coarser grid */144 lev_tol[k - 1] = 0.3 * g_error;145 k = k - 1;146 putz(k, my_num);147 /* make initial guess on coarser grid zero */148 g_error = 1.0e30;149 }150 }151 } else {152 /* if we have converged at this level */153 if (k == m) {154 /* if finest grid, we are done */155 flag = 1;156 } else {157 /* else go to next finest grid */158 159 copy_borders(k, my_num);160 161 intadd(k, my_num);162 /* changes the grid values at the finer level. rhs at finer level */163 /* remains what it already is */164 k++;165 g_error = 1.0e30;166 }167 }168 }169 if (do_output) {170 if (my_num == MASTER) {171 printf("iter %ld, level %ld, residual norm %12.8e, work = %7.3f\n", iter, k, multi->err_multi, wu);172 }173 }174 }175 176 /* perform red or black iteration (not both) */177 void relax(long k, double *err, long color, long my_num)178 {179 long i;180 long j;181 long iend;182 long jend;183 long oddistart;184 long oddjstart;185 long evenistart;186 long evenjstart;187 double a;188 double h;189 double factor;190 double maxerr;191 double newerr;192 double oldval;193 double newval;194 double **t2a;195 double **t2b;196 double *t1a;197 double *t1b;198 double *t1c;199 double *t1d;200 201 i = 0;202 j = 0;203 204 *err = 0.0;205 h = lev_res[k];206 207 /* points whose sum of row and col index is even do a red iteration, */208 /* others do a black */209 210 evenistart = gp[my_num].eist[k];211 evenjstart = gp[my_num].ejst[k];212 oddistart = gp[my_num].oist[k];213 oddjstart = gp[my_num].ojst[k];214 215 iend = gp[my_num].rlien[k];216 jend = gp[my_num].rljen[k];217 218 factor = 4.0 - eig2 * h * h;219 maxerr = 0.0;220 t2a = (double **) q_multi[my_num][k];221 t2b = (double **) rhs_multi[my_num][k];222 if (color == RED_ITER) {223 for (i = evenistart; i < iend; i += 2) {224 t1a = (double *) t2a[i];225 t1b = (double *) t2b[i];226 t1c = (double *) t2a[i - 1];227 t1d = (double *) t2a[i + 1];228 for (j = evenjstart; j < jend; j += 2) {229 a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];230 oldval = t1a[j];231 newval = a / factor;232 newerr = oldval - newval;233 t1a[j] = newval;234 if (fabs(newerr) > maxerr) {235 maxerr = fabs(newerr);236 }237 }238 }239 for (i = oddistart; i < iend; i += 2) {240 t1a = (double *) t2a[i];241 t1b = (double *) t2b[i];242 t1c = (double *) t2a[i - 1];243 t1d = (double *) t2a[i + 1];244 for (j = oddjstart; j < jend; j += 2) {245 a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];246 oldval = t1a[j];247 newval = a / factor;248 newerr = oldval - newval;249 t1a[j] = newval;250 if (fabs(newerr) > maxerr) {251 maxerr = fabs(newerr);252 }253 }254 }255 } else if (color == BLACK_ITER) {256 for (i = evenistart; i < iend; i += 2) {257 t1a = (double *) t2a[i];258 t1b = (double *) t2b[i];259 t1c = (double *) t2a[i - 1];260 t1d = (double *) t2a[i + 1];261 for (j = oddjstart; j < jend; j += 2) {262 a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];263 oldval = t1a[j];264 newval = a / factor;265 newerr = oldval - newval;266 t1a[j] = newval;267 if (fabs(newerr) > maxerr) {268 maxerr = fabs(newerr);269 }270 }271 }272 for (i = oddistart; i < iend; i += 2) {273 t1a = (double *) t2a[i];274 t1b = (double *) t2b[i];275 t1c = (double *) t2a[i - 1];276 t1d = (double *) t2a[i + 1];277 for (j = evenjstart; j < jend; j += 2) {278 a = t1a[j + 1] + t1a[j - 1] + t1c[j] + t1d[j] - t1b[j];279 oldval = t1a[j];280 newval = a / factor;281 newerr = oldval - newval;282 t1a[j] = newval;283 if (fabs(newerr) > maxerr) {284 maxerr = fabs(newerr);285 }286 }287 }288 }289 *err = maxerr;290 }291 292 /* perform half-injection to next coarsest level */293 void rescal(long kf, long my_num)294 {295 long ic;296 long if17;297 long jf;298 long jc;299 long krc;300 long istart;301 long iend;302 long jstart;303 long jend;304 double hf;305 double s;306 double s1;307 double s2;308 double s3;309 double s4;310 double factor;311 double int1;312 double int2;313 double i_int_factor;314 double j_int_factor;315 long i_off;316 long j_off;317 long up_proc;318 long left_proc;319 long im;320 long jm;321 double temp;322 double temp2;323 double **t2a;324 double **t2b;325 double **t2c;326 double *t1a;327 double *t1b;328 double *t1c;329 double *t1d;330 double *t1e;331 double *t1f;332 double *t1g;333 double *t1h;334 335 krc = kf - 1;336 //hc = lev_res[krc];337 hf = lev_res[kf];338 i_off = (*gp[my_num].rownum) * ypts_per_proc[krc];339 j_off = (*gp[my_num].colnum) * xpts_per_proc[krc];340 up_proc = gp[my_num].neighbors[UP];341 left_proc = gp[my_num].neighbors[LEFT];342 im = (imx[kf] - 2) / yprocs;343 jm = (jmx[kf] - 2) / xprocs;344 345 istart = gp[my_num].rlist[krc];346 jstart = gp[my_num].rljst[krc];347 iend = gp[my_num].rlien[krc] - 1;348 jend = gp[my_num].rljen[krc] - 1;349 350 factor = 4.0 - eig2 * hf * hf;351 352 t2a = (double **) q_multi[my_num][kf];353 t2b = (double **) rhs_multi[my_num][kf];354 t2c = (double **) rhs_multi[my_num][krc];355 if17 = 2 * (istart - 1);356 for (ic = istart; ic <= iend; ic++) {357 if17 += 2;358 i_int_factor = (ic + i_off) * i_int_coeff[krc] * 0.5;359 jf = 2 * (jstart - 1);360 t1a = (double *) t2a[if17];361 t1b = (double *) t2b[if17];362 t1c = (double *) t2c[ic];363 t1d = (double *) t2a[if17 - 1];364 t1e = (double *) t2a[if17 + 1];365 t1f = (double *) t2a[if17 - 2];366 t1g = (double *) t2a[if17 - 3];367 t1h = (double *) t2b[if17 - 2];368 for (jc = jstart; jc <= jend; jc++) {369 jf += 2;370 j_int_factor = (jc + j_off) * j_int_coeff[krc] * 0.5;371 372 /* method of half-injection uses 2.0 instead of 4.0 */373 374 /* do bilinear interpolation */375 s = t1a[jf + 1] + t1a[jf - 1] + t1d[jf] + t1e[jf];376 s1 = 2.0 * (t1b[jf] - s + factor * t1a[jf]);377 if (((if17 == 2) && (gp[my_num].neighbors[UP] == -1)) || ((jf == 2) && (gp[my_num].neighbors[LEFT] == -1))) {378 s2 = 0;379 s3 = 0;380 s4 = 0;381 } else if ((if17 == 2) || (jf == 2)) {382 if (jf == 2) {383 temp = q_multi[left_proc][kf][if17][jm - 1];384 } else {385 temp = t1a[jf - 3];386 }387 s = t1a[jf - 1] + temp + t1d[jf - 2] + t1e[jf - 2];388 s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]);389 if (if17 == 2) {390 temp = q_multi[up_proc][kf][im - 1][jf];391 } else {392 temp = t1g[jf];393 }394 s = t1f[jf + 1] + t1f[jf - 1] + temp + t1d[jf];395 s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);396 if (jf == 2) {397 temp = q_multi[left_proc][kf][if17 - 2][jm - 1];398 } else {399 temp = t1f[jf - 3];400 }401 if (if17 == 2) {402 temp2 = q_multi[up_proc][kf][im - 1][jf - 2];403 } else {404 temp2 = t1g[jf - 2];405 }406 s = t1f[jf - 1] + temp + temp2 + t1d[jf - 2];407 s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]);408 } else {409 s = t1a[jf - 1] + t1a[jf - 3] + t1d[jf - 2] + t1e[jf - 2];410 s2 = 2.0 * (t1b[jf - 2] - s + factor * t1a[jf - 2]);411 s = t1f[jf + 1] + t1f[jf - 1] + t1g[jf] + t1d[jf];412 s3 = 2.0 * (t1h[jf] - s + factor * t1f[jf]);413 s = t1f[jf - 1] + t1f[jf - 3] + t1g[jf - 2] + t1d[jf - 2];414 s4 = 2.0 * (t1h[jf - 2] - s + factor * t1f[jf - 2]);415 }416 int1 = j_int_factor * s4 + (1.0 - j_int_factor) * s3;417 int2 = j_int_factor * s2 + (1.0 - j_int_factor) * s1;418 //int_val = i_int_factor*int1+(1.0-i_int_factor)*int2;419 t1c[jc] = i_int_factor * int1 + (1.0 - i_int_factor) * int2;420 }421 }422 }423 424 /* perform interpolation and addition to next finest grid */425 void intadd(long kc, long my_num)426 {427 long ic;428 long if17;429 long jf;430 long jc;431 long kf;432 long istart;433 long jstart;434 long iend;435 long jend;436 double int1;437 double int2;438 double i_int_factor1;439 double j_int_factor1;440 double i_int_factor2;441 double j_int_factor2;442 long i_off;443 long j_off;444 double **t2a;445 double **t2b;446 double *t1a;447 double *t1b;448 double *t1c;449 double *t1d;450 double *t1e;451 452 kf = kc + 1;453 //hc = lev_res[kc];454 //hf = lev_res[kf];455 456 istart = gp[my_num].rlist[kc];457 jstart = gp[my_num].rljst[kc];458 iend = gp[my_num].rlien[kc] - 1;459 jend = gp[my_num].rljen[kc] - 1;460 i_off = (*gp[my_num].rownum) * ypts_per_proc[kc];461 j_off = (*gp[my_num].colnum) * xpts_per_proc[kc];462 463 t2a = (double **) q_multi[my_num][kc];464 t2b = (double **) q_multi[my_num][kf];465 if17 = 2 * (istart - 1);466 for (ic = istart; ic <= iend; ic++) {467 if17 += 2;468 i_int_factor1 = ((imx[kc] - 2) - (ic + i_off - 1)) * (i_int_coeff[kf]);469 i_int_factor2 = (ic + i_off) * i_int_coeff[kf];470 jf = 2 * (jstart - 1);471 472 t1a = (double *) t2a[ic];473 t1b = (double *) t2a[ic - 1];474 t1c = (double *) t2a[ic + 1];475 t1d = (double *) t2b[if17];476 t1e = (double *) t2b[if17 - 1];477 for (jc = jstart; jc <= jend; jc++) {478 jf += 2;479 j_int_factor1 = ((jmx[kc] - 2) - (jc + j_off - 1)) * (j_int_coeff[kf]);480 j_int_factor2 = (jc + j_off) * j_int_coeff[kf];481 482 int1 = j_int_factor1 * t1a[jc - 1] + (1.0 - j_int_factor1) * t1a[jc];483 int2 = j_int_factor1 * t1b[jc - 1] + (1.0 - j_int_factor1) * t1b[jc];484 t1e[jf - 1] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1;485 int2 = j_int_factor1 * t1c[jc - 1] + (1.0 - j_int_factor1) * t1c[jc];486 t1d[jf - 1] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1;487 int1 = j_int_factor2 * t1a[jc + 1] + (1.0 - j_int_factor2) * t1a[jc];488 int2 = j_int_factor2 * t1b[jc + 1] + (1.0 - j_int_factor2) * t1b[jc];489 t1e[jf] += i_int_factor1 * int2 + (1.0 - i_int_factor1) * int1;490 int2 = j_int_factor2 * t1c[jc + 1] + (1.0 - j_int_factor2) * t1c[jc];491 t1d[jf] += i_int_factor2 * int2 + (1.0 - i_int_factor2) * int1;492 }493 }494 }495 496 /* initialize a grid to zero in parallel */497 void putz(long k, long my_num)498 {499 long i;500 long j;501 long istart;502 long jstart;503 long iend;504 long jend;505 double **t2a;506 double *t1a;507 508 istart = gp[my_num].rlist[k];509 jstart = gp[my_num].rljst[k];510 iend = gp[my_num].rlien[k];511 jend = gp[my_num].rljen[k];512 513 t2a = (double **) q_multi[my_num][k];514 for (i = istart; i <= iend; i++) {515 t1a = (double *) t2a[i];516 for (j = jstart; j <= jend; j++) {517 t1a[j] = 0.0;518 }519 }520 }521 522 void copy_borders(long k, long pid)523 {524 long i;525 long j;526 long jj;527 long im;528 long jm;529 long lastrow;530 long lastcol;531 double **t2a;532 double **t2b;533 double *t1a;534 double *t1b;535 536 im = (imx[k] - 2) / yprocs + 2;537 jm = (jmx[k] - 2) / xprocs + 2;538 lastrow = (imx[k] - 2) / yprocs;539 lastcol = (jmx[k] - 2) / xprocs;540 541 t2a = (double **) q_multi[pid][k];542 jj = gp[pid].neighbors[UPLEFT];543 if (jj != -1) {544 t2a[0][0] = q_multi[jj][k][im - 2][jm - 2];545 }546 jj = gp[pid].neighbors[UPRIGHT];547 if (jj != -1) {548 t2a[0][jm - 1] = q_multi[jj][k][im - 2][1];549 }550 jj = gp[pid].neighbors[DOWNLEFT];551 if (jj != -1) {552 t2a[im - 1][0] = q_multi[jj][k][1][jm - 2];553 }554 jj = gp[pid].neighbors[DOWNRIGHT];555 if (jj != -1) {556 t2a[im - 1][jm - 1] = q_multi[jj][k][1][1];557 }558 559 if (gp[pid].neighbors[UP] == -1) {560 jj = gp[pid].neighbors[LEFT];561 if (jj != -1) {562 t2a[0][0] = q_multi[jj][k][0][jm - 2];563 } else {564 jj = gp[pid].neighbors[DOWN];565 if (jj != -1) {566 t2a[im - 1][0] = q_multi[jj][k][1][0];567 }568 }569 jj = gp[pid].neighbors[RIGHT];570 if (jj != -1) {571 t2a[0][jm - 1] = q_multi[jj][k][0][1];572 } else {573 jj = gp[pid].neighbors[DOWN];574 if (jj != -1) {575 t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1];576 }577 }578 } else if (gp[pid].neighbors[DOWN] == -1) {579 jj = gp[pid].neighbors[LEFT];580 if (jj != -1) {581 t2a[im - 1][0] = q_multi[jj][k][im - 1][jm - 2];582 } else {583 jj = gp[pid].neighbors[UP];584 if (jj != -1) {585 t2a[0][0] = q_multi[jj][k][im - 2][0];586 }587 }588 jj = gp[pid].neighbors[RIGHT];589 if (jj != -1) {590 t2a[im - 1][jm - 1] = q_multi[jj][k][im - 1][1];591 } else {592 jj = gp[pid].neighbors[UP];593 if (jj != -1) {594 t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1];595 }596 }597 } else if (gp[pid].neighbors[LEFT] == -1) {598 jj = gp[pid].neighbors[UP];599 if (jj != -1) {600 t2a[0][0] = q_multi[jj][k][im - 2][0];601 }602 jj = gp[pid].neighbors[DOWN];603 if (jj != -1) {604 t2a[im - 1][0] = q_multi[jj][k][1][0];605 }606 } else if (gp[pid].neighbors[RIGHT] == -1) {607 jj = gp[pid].neighbors[UP];608 if (jj != -1) {609 t2a[0][jm - 1] = q_multi[jj][k][im - 2][jm - 1];610 }611 jj = gp[pid].neighbors[DOWN];612 if (jj != -1) {613 t2a[im - 1][jm - 1] = q_multi[jj][k][1][jm - 1];614 }615 }616 617 j = gp[pid].neighbors[UP];618 if (j != -1) {619 t1a = (double *) t2a[0];620 t1b = (double *) q_multi[j][k][im - 2];621 for (i = 1; i <= lastcol; i++) {622 t1a[i] = t1b[i];623 }624 }625 j = gp[pid].neighbors[DOWN];626 if (j != -1) {627 t1a = (double *) t2a[im - 1];628 t1b = (double *) q_multi[j][k][1];629 for (i = 1; i <= lastcol; i++) {630 t1a[i] = t1b[i];631 }632 }633 j = gp[pid].neighbors[LEFT];634 if (j != -1) {635 t2b = (double **) q_multi[j][k];636 for (i = 1; i <= lastrow; i++) {637 t2a[i][0] = t2b[i][jm - 2];638 }639 }640 j = gp[pid].neighbors[RIGHT];641 if (j != -1) {642 t2b = (double **) q_multi[j][k];643 for (i = 1; i <= lastrow; i++) {644 t2a[i][jm - 1] = t2b[i][1];645 }646 }647 648 }649 650 void copy_rhs_borders(long k, long procid)651 {652 long i;653 long j;654 long im;655 long jm;656 long lastrow;657 long lastcol;658 double **t2a;659 double **t2b;660 double *t1a;661 double *t1b;662 663 im = (imx[k] - 2) / yprocs + 2;664 jm = (jmx[k] - 2) / xprocs + 2;665 lastrow = (imx[k] - 2) / yprocs;666 lastcol = (jmx[k] - 2) / xprocs;667 668 t2a = (double **) rhs_multi[procid][k];669 if (gp[procid].neighbors[UPLEFT] != -1) {670 j = gp[procid].neighbors[UPLEFT];671 t2a[0][0] = rhs_multi[j][k][im - 2][jm - 2];672 }673 674 if (gp[procid].neighbors[UP] != -1) {675 j = gp[procid].neighbors[UP];676 if (j != -1) {677 t1a = (double *) t2a[0];678 t1b = (double *) rhs_multi[j][k][im - 2];679 for (i = 2; i <= lastcol; i += 2) {680 t1a[i] = t1b[i];681 }682 }683 }684 if (gp[procid].neighbors[LEFT] != -1) {685 j = gp[procid].neighbors[LEFT];686 if (j != -1) {687 t2b = (double **) rhs_multi[j][k];688 for (i = 2; i <= lastrow; i += 2) {689 t2a[i][0] = t2b[i][jm - 2];690 }691 }692 }693 }694 695 void copy_red(long k, long procid)696 {697 long i;698 long j;699 long im;700 long jm;701 long lastrow;702 long lastcol;703 double **t2a;704 double **t2b;705 double *t1a;706 double *t1b;707 708 im = (imx[k] - 2) / yprocs + 2;709 jm = (jmx[k] - 2) / xprocs + 2;710 lastrow = (imx[k] - 2) / yprocs;711 lastcol = (jmx[k] - 2) / xprocs;712 713 t2a = (double **) q_multi[procid][k];714 j = gp[procid].neighbors[UP];715 if (j != -1) {716 t1a = (double *) t2a[0];717 t1b = (double *) q_multi[j][k][im - 2];718 for (i = 2; i <= lastcol; i += 2) {719 t1a[i] = t1b[i];720 }721 }722 j = gp[procid].neighbors[DOWN];723 if (j != -1) {724 t1a = (double *) t2a[im - 1];725 t1b = (double *) q_multi[j][k][1];726 for (i = 1; i <= lastcol; i += 2) {727 t1a[i] = t1b[i];728 }729 }730 j = gp[procid].neighbors[LEFT];731 if (j != -1) {732 t2b = (double **) q_multi[j][k];733 for (i = 2; i <= lastrow; i += 2) {734 t2a[i][0] = t2b[i][jm - 2];735 }736 }737 j = gp[procid].neighbors[RIGHT];738 if (j != -1) {739 t2b = (double **) q_multi[j][k];740 for (i = 1; i <= lastrow; i += 2) {741 t2a[i][jm - 1] = t2b[i][1];742 }743 }744 }745 746 void copy_black(long k, long procid)747 {748 long i;749 long j;750 long im;751 long jm;752 long lastrow;753 long lastcol;754 double **t2a;755 double **t2b;756 double *t1a;757 double *t1b;758 759 im = (imx[k] - 2) / yprocs + 2;760 jm = (jmx[k] - 2) / xprocs + 2;761 lastrow = (imx[k] - 2) / yprocs;762 lastcol = (jmx[k] - 2) / xprocs;763 764 t2a = (double **) q_multi[procid][k];765 j = gp[procid].neighbors[UP];766 if (j != -1) {767 t1a = (double *) t2a[0];768 t1b = (double *) q_multi[j][k][im - 2];769 for (i = 1; i <= lastcol; i += 2) {770 t1a[i] = t1b[i];771 }772 }773 j = gp[procid].neighbors[DOWN];774 if (j != -1) {775 t1a = (double *) t2a[im - 1];776 t1b = (double *) q_multi[j][k][1];777 for (i = 2; i <= lastcol; i += 2) {778 t1a[i] = t1b[i];779 }780 }781 j = gp[procid].neighbors[LEFT];782 if (j != -1) {783 t2b = (double **) q_multi[j][k];784 for (i = 1; i <= lastrow; i += 2) {785 t2a[i][0] = t2b[i][jm - 2];786 }787 }788 j = gp[procid].neighbors[RIGHT];789 if (j != -1) {790 t2b = (double **) q_multi[j][k];791 for (i = 2; i <= lastrow; i += 2) {792 t2a[i][jm - 1] = t2b[i][1];793 }794 }795 } -
soft/giet_vm/applications/ocean/ocean.py
r581 r589 27 27 28 28 ######################### 29 def ocean( mapping ):29 def extend( mapping ): 30 30 31 31 x_size = mapping.x_size … … 57 57 mapping.addVseg( vspace, 'ocean_data', data_base , data_size, 58 58 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 59 binpath = 'build/ocean/ ocean.elf',59 binpath = 'build/ocean/appli.elf', 60 60 local = False ) 61 61 … … 69 69 code_base , code_size, 70 70 'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM', 71 binpath = 'build/ocean/ ocean.elf',71 binpath = 'build/ocean/appli.elf', 72 72 local = True ) 73 73 … … 135 135 if __name__ == '__main__': 136 136 137 vspace = ocean( Mapping( 'test', 2, 2, 4 ) )137 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 138 138 print vspace.xml() 139 139 -
soft/giet_vm/applications/ocean/slave1.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /* ****************18 subroutine slave19 **************** */20 21 EXTERN_ENV22 23 #include <stdio.h>24 #include <math.h>25 #include <stdlib.h>26 27 #include "decs.h"28 29 void slave(long *ptr_procid)30 {31 long i;32 long j;33 long nstep;34 long iindex;35 long iday;36 double ysca1;37 double y;38 double factor;39 double sintemp;40 double curlt;41 double ressqr;42 long istart;43 long iend;44 long jstart;45 long jend;46 long ist;47 long ien;48 long jst;49 long jen;50 double fac;51 long dayflag = 0;52 long dhourflag = 0;53 long endflag = 0;54 long firstrow;55 long lastrow;56 long numrows;57 long firstcol;58 long lastcol;59 long numcols;60 long psiindex;61 double psibipriv;62 double ttime;63 double dhour;64 double day;65 long procid;66 long j_off = 0;67 unsigned long t1;68 double **t2a;69 double **t2b;70 double *t1a;71 double *t1b;72 double *t1c;73 double *t1d;74 75 76 /*77 LOCK(locks->idlock)78 procid = global->id;79 global->id = global->id+1;80 UNLOCK(locks->idlock)81 */82 83 procid = *ptr_procid;84 ressqr = lev_res[numlev - 1] * lev_res[numlev - 1];85 86 #if defined(MULTIPLE_BARRIERS)87 BARRIER(bars->sl_prini, nprocs)88 #else89 BARRIER(bars->barrier, nprocs)90 #endif91 /* POSSIBLE ENHANCEMENT: Here is where one might pin processes to92 processors to avoid migration. */93 /* POSSIBLE ENHANCEMENT: Here is where one might distribute94 data structures across physically distributed memories as95 desired.96 97 One way to do this is as follows. The function allocate(START,SIZE,I)98 is assumed to place all addresses x such that99 (START <= x < START+SIZE) on node I.100 101 long d_size;102 unsigned long g_size;103 unsigned long mg_size;104 105 if (procid == MASTER) {106 g_size = ((jmx[numlev-1]-2)/xprocs+2)*((imx[numlev-1]-2)/yprocs+2)*siz107 eof(double) +108 ((imx[numlev-1]-2)/yprocs+2)*sizeof(double *);109 110 mg_size = numlev*sizeof(double **);111 for (i=0;i<numlev;i++) {112 mg_size+=((imx[i]-2)/yprocs+2)*((jmx[i]-2)/xprocs+2)*sizeof(double)+113 ((imx[i]-2)/yprocs+2)*sizeof(double *);114 }115 for (i= 0;i<nprocs;i++) {116 d_size = 2*sizeof(double **);117 allocate((unsigned long) psi[i],d_size,i);118 allocate((unsigned long) psim[i],d_size,i);119 allocate((unsigned long) work1[i],d_size,i);120 allocate((unsigned long) work4[i],d_size,i);121 allocate((unsigned long) work5[i],d_size,i);122 allocate((unsigned long) work7[i],d_size,i);123 allocate((unsigned long) temparray[i],d_size,i);124 allocate((unsigned long) psi[i][0],g_size,i);125 allocate((unsigned long) psi[i][1],g_size,i);126 allocate((unsigned long) psim[i][0],g_size,i);127 allocate((unsigned long) psim[i][1],g_size,i);128 allocate((unsigned long) psium[i],g_size,i);129 allocate((unsigned long) psilm[i],g_size,i);130 allocate((unsigned long) psib[i],g_size,i);131 allocate((unsigned long) ga[i],g_size,i);132 allocate((unsigned long) gb[i],g_size,i);133 allocate((unsigned long) work1[i][0],g_size,i);134 allocate((unsigned long) work1[i][1],g_size,i);135 allocate((unsigned long) work2[i],g_size,i);136 allocate((unsigned long) work3[i],g_size,i);137 allocate((unsigned long) work4[i][0],g_size,i);138 allocate((unsigned long) work4[i][1],g_size,i);139 allocate((unsigned long) work5[i][0],g_size,i);140 allocate((unsigned long) work5[i][1],g_size,i);141 allocate((unsigned long) work6[i],g_size,i);142 allocate((unsigned long) work7[i][0],g_size,i);143 allocate((unsigned long) work7[i][1],g_size,i);144 allocate((unsigned long) temparray[i][0],g_size,i);145 allocate((unsigned long) temparray[i][1],g_size,i);146 allocate((unsigned long) tauz[i],g_size,i);147 allocate((unsigned long) oldga[i],g_size,i);148 allocate((unsigned long) oldgb[i],g_size,i);149 d_size = numlev * sizeof(long);150 allocate((unsigned long) gp[i].rel_num_x,d_size,i);151 allocate((unsigned long) gp[i].rel_num_y,d_size,i);152 allocate((unsigned long) gp[i].eist,d_size,i);153 allocate((unsigned long) gp[i].ejst,d_size,i);154 allocate((unsigned long) gp[i].oist,d_size,i);155 allocate((unsigned long) gp[i].ojst,d_size,i);156 allocate((unsigned long) gp[i].rlist,d_size,i);157 allocate((unsigned long) gp[i].rljst,d_size,i);158 allocate((unsigned long) gp[i].rlien,d_size,i);159 allocate((unsigned long) gp[i].rljen,d_size,i);160 161 allocate((unsigned long) q_multi[i],mg_size,i);162 allocate((unsigned long) rhs_multi[i],mg_size,i);163 allocate((unsigned long) &(gp[i]),sizeof(struct Global_Private),i);164 }165 }166 167 */168 t2a = (double **) oldga[procid];169 t2b = (double **) oldgb[procid];170 for (i = 0; i < im; i++) {171 t1a = (double *) t2a[i];172 t1b = (double *) t2b[i];173 for (j = 0; j < jm; j++) {174 t1a[j] = 0.0;175 t1b[j] = 0.0;176 }177 }178 179 firstcol = 1;180 lastcol = firstcol + gp[procid].rel_num_x[numlev - 1] - 1;181 firstrow = 1;182 lastrow = firstrow + gp[procid].rel_num_y[numlev - 1] - 1;183 numcols = gp[procid].rel_num_x[numlev - 1];184 numrows = gp[procid].rel_num_y[numlev - 1];185 j_off = (*gp[procid].colnum) * numcols;186 187 /*188 if (procid > nprocs/2) {189 psinum = 2;190 } else {191 psinum = 1;192 }193 */194 195 /* every process gets its own copy of the timing variables to avoid196 contention at shared memory locations. here, these variables197 are initialized. */198 199 ttime = 0.0;200 dhour = 0.0;201 nstep = 0;202 day = 0.0;203 204 ysca1 = 0.5 * ysca;205 206 if (*gp[procid].lpid == MASTER) {207 208 f = (double *) G_MALLOC(oim * sizeof(double), procid);209 210 t1a = (double *) f;211 for (iindex = 0; iindex <= jmx[numlev - 1] - 1; iindex++) {212 y = ((double) iindex) * res;213 t1a[iindex] = f0 + beta * (y - ysca1);214 }215 }216 217 t2a = (double **) psium[procid];218 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {219 t2a[0][0] = 0.0;220 }221 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {222 t2a[im - 1][0] = 0.0;223 }224 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {225 t2a[0][jm - 1] = 0.0;226 }227 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {228 t2a[im - 1][jm - 1] = 0.0;229 }230 if (gp[procid].neighbors[UP] == -1) {231 t1a = (double *) t2a[0];232 for (j = firstcol; j <= lastcol; j++) {233 t1a[j] = 0.0;234 }235 }236 if (gp[procid].neighbors[DOWN] == -1) {237 t1a = (double *) t2a[im - 1];238 for (j = firstcol; j <= lastcol; j++) {239 t1a[j] = 0.0;240 }241 }242 if (gp[procid].neighbors[LEFT] == -1) {243 for (j = firstrow; j <= lastrow; j++) {244 t2a[j][0] = 0.0;245 }246 }247 if (gp[procid].neighbors[RIGHT] == -1) {248 for (j = firstrow; j <= lastrow; j++) {249 t2a[j][jm - 1] = 0.0;250 }251 }252 253 for (i = firstrow; i <= lastrow; i++) {254 t1a = (double *) t2a[i];255 for (iindex = firstcol; iindex <= lastcol; iindex++) {256 t1a[iindex] = 0.0;257 }258 }259 t2a = (double **) psilm[procid];260 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {261 t2a[0][0] = 0.0;262 }263 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {264 t2a[im - 1][0] = 0.0;265 }266 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {267 t2a[0][jm - 1] = 0.0;268 }269 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {270 t2a[im - 1][jm - 1] = 0.0;271 }272 if (gp[procid].neighbors[UP] == -1) {273 t1a = (double *) t2a[0];274 for (j = firstcol; j <= lastcol; j++) {275 t1a[j] = 0.0;276 }277 }278 if (gp[procid].neighbors[DOWN] == -1) {279 t1a = (double *) t2a[im - 1];280 for (j = firstcol; j <= lastcol; j++) {281 t1a[j] = 0.0;282 }283 }284 if (gp[procid].neighbors[LEFT] == -1) {285 for (j = firstrow; j <= lastrow; j++) {286 t2a[j][0] = 0.0;287 }288 }289 if (gp[procid].neighbors[RIGHT] == -1) {290 for (j = firstrow; j <= lastrow; j++) {291 t2a[j][jm - 1] = 0.0;292 }293 }294 for (i = firstrow; i <= lastrow; i++) {295 t1a = (double *) t2a[i];296 for (iindex = firstcol; iindex <= lastcol; iindex++) {297 t1a[iindex] = 0.0;298 }299 }300 301 t2a = (double **) psib[procid];302 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {303 t2a[0][0] = 1.0;304 }305 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {306 t2a[0][jm - 1] = 1.0;307 }308 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {309 t2a[im - 1][0] = 1.0;310 }311 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {312 t2a[im - 1][jm - 1] = 1.0;313 }314 if (gp[procid].neighbors[UP] == -1) {315 t1a = (double *) t2a[0];316 for (j = firstcol; j <= lastcol; j++) {317 t1a[j] = 1.0;318 }319 }320 if (gp[procid].neighbors[DOWN] == -1) {321 t1a = (double *) t2a[im - 1];322 for (j = firstcol; j <= lastcol; j++) {323 t1a[j] = 1.0;324 }325 }326 if (gp[procid].neighbors[LEFT] == -1) {327 for (j = firstrow; j <= lastrow; j++) {328 t2a[j][0] = 1.0;329 }330 }331 if (gp[procid].neighbors[RIGHT] == -1) {332 for (j = firstrow; j <= lastrow; j++) {333 t2a[j][jm - 1] = 1.0;334 }335 }336 for (i = firstrow; i <= lastrow; i++) {337 t1a = (double *) t2a[i];338 for (iindex = firstcol; iindex <= lastcol; iindex++) {339 t1a[iindex] = 0.0;340 }341 }342 343 /* wait until all processes have completed the above initialization */344 #if defined(MULTIPLE_BARRIERS)345 BARRIER(bars->sl_prini, nprocs)346 #else347 BARRIER(bars->barrier, nprocs)348 #endif349 /* compute psib array (one-time computation) and integrate into psibi */350 istart = 1;351 iend = istart + gp[procid].rel_num_y[numlev - 1] - 1;352 jstart = 1;353 jend = jstart + gp[procid].rel_num_x[numlev - 1] - 1;354 ist = istart;355 ien = iend;356 jst = jstart;357 jen = jend;358 359 if (gp[procid].neighbors[UP] == -1) {360 istart = 0;361 }362 if (gp[procid].neighbors[LEFT] == -1) {363 jstart = 0;364 }365 if (gp[procid].neighbors[DOWN] == -1) {366 iend = im - 1;367 }368 if (gp[procid].neighbors[RIGHT] == -1) {369 jend = jm - 1;370 }371 372 t2a = (double **) rhs_multi[procid][numlev - 1];373 t2b = (double **) psib[procid];374 for (i = istart; i <= iend; i++) {375 t1a = (double *) t2a[i];376 t1b = (double *) t2b[i];377 for (j = jstart; j <= jend; j++) {378 t1a[j] = t1b[j] * ressqr;379 }380 }381 t2a = (double **) q_multi[procid][numlev - 1];382 if (gp[procid].neighbors[UP] == -1) {383 t1a = (double *) t2a[0];384 t1b = (double *) t2b[0];385 for (j = jstart; j <= jend; j++) {386 t1a[j] = t1b[j];387 }388 }389 if (gp[procid].neighbors[DOWN] == -1) {390 t1a = (double *) t2a[im - 1];391 t1b = (double *) t2b[im - 1];392 for (j = jstart; j <= jend; j++) {393 t1a[j] = t1b[j];394 }395 }396 if (gp[procid].neighbors[LEFT] == -1) {397 for (i = istart; i <= iend; i++) {398 t2a[i][0] = t2b[i][0];399 }400 }401 if (gp[procid].neighbors[RIGHT] == -1) {402 for (i = istart; i <= iend; i++) {403 t2a[i][jm - 1] = t2b[i][jm - 1];404 }405 }406 407 #if defined(MULTIPLE_BARRIERS)408 BARRIER(bars->sl_psini, nprocs)409 #else410 BARRIER(bars->barrier, nprocs)411 #endif412 413 t2a = (double **) psib[procid];414 j = gp[procid].neighbors[UP];415 if (j != -1) {416 t1a = (double *) t2a[0];417 t1b = (double *) psib[j][im - 2];418 for (i = 1; i < jm - 1; i++) {419 t1a[i] = t1b[i];420 }421 }422 j = gp[procid].neighbors[DOWN];423 if (j != -1) {424 t1a = (double *) t2a[im - 1];425 t1b = (double *) psib[j][1];426 for (i = 1; i < jm - 1; i++) {427 t1a[i] = t1b[i];428 }429 }430 j = gp[procid].neighbors[LEFT];431 if (j != -1) {432 t2b = (double **) psib[j];433 for (i = 1; i < im - 1; i++) {434 t2a[i][0] = t2b[i][jm - 2];435 }436 }437 j = gp[procid].neighbors[RIGHT];438 if (j != -1) {439 t2b = (double **) psib[j];440 for (i = 1; i < im - 1; i++) {441 t2a[i][jm - 1] = t2b[i][1];442 }443 }444 445 t2a = (double **) q_multi[procid][numlev - 1];446 t2b = (double **) psib[procid];447 fac = 1.0 / (4.0 - ressqr * eig2);448 for (i = ist; i <= ien; i++) {449 t1a = (double *) t2a[i];450 t1b = (double *) t2b[i];451 t1c = (double *) t2b[i - 1];452 t1d = (double *) t2b[i + 1];453 for (j = jst; j <= jen; j++) {454 t1a[j] = fac * (t1d[j] + t1c[j] + t1b[j + 1] + t1b[j - 1] - ressqr * t1b[j]);455 }456 }457 458 multig(procid);459 460 for (i = istart; i <= iend; i++) {461 t1a = (double *) t2a[i];462 t1b = (double *) t2b[i];463 for (j = jstart; j <= jend; j++) {464 t1b[j] = t1a[j];465 }466 }467 468 #if defined(MULTIPLE_BARRIERS)469 BARRIER(bars->sl_prini, nprocs)470 #else471 BARRIER(bars->barrier, nprocs)472 #endif473 474 /* update the local running sum psibipriv by summing all the resulting475 values in that process's share of the psib matrix */476 477 t2a = (double **) psib[procid];478 psibipriv = 0.0;479 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {480 psibipriv = psibipriv + 0.25 * (t2a[0][0]);481 }482 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {483 psibipriv = psibipriv + 0.25 * (t2a[0][jm - 1]);484 }485 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {486 psibipriv = psibipriv + 0.25 * (t2a[im - 1][0]);487 }488 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {489 psibipriv = psibipriv + 0.25 * (t2a[im - 1][jm - 1]);490 }491 if (gp[procid].neighbors[UP] == -1) {492 t1a = (double *) t2a[0];493 for (j = firstcol; j <= lastcol; j++) {494 psibipriv = psibipriv + 0.5 * t1a[j];495 }496 }497 if (gp[procid].neighbors[DOWN] == -1) {498 t1a = (double *) t2a[im - 1];499 for (j = firstcol; j <= lastcol; j++) {500 psibipriv = psibipriv + 0.5 * t1a[j];501 }502 }503 if (gp[procid].neighbors[LEFT] == -1) {504 for (j = firstrow; j <= lastrow; j++) {505 psibipriv = psibipriv + 0.5 * t2a[j][0];506 }507 }508 if (gp[procid].neighbors[RIGHT] == -1) {509 for (j = firstrow; j <= lastrow; j++) {510 psibipriv = psibipriv + 0.5 * t2a[j][jm - 1];511 }512 }513 for (i = firstrow; i <= lastrow; i++) {514 t1a = (double *) t2a[i];515 for (iindex = firstcol; iindex <= lastcol; iindex++) {516 psibipriv = psibipriv + t1a[iindex];517 }518 }519 520 /* update the shared variable psibi by summing all the psibiprivs521 of the individual processes into it. note that this combined522 private and shared sum method avoids accessing the shared523 variable psibi once for every element of the matrix. */524 525 LOCK(locks->psibilock);526 global->psibi = global->psibi + psibipriv;527 UNLOCK(locks->psibilock);528 529 /* initialize psim matrices530 531 if there is more than one process, then split the processes532 between the two psim matrices; otherwise, let the single process533 work on one first and then the other */534 535 for (psiindex = 0; psiindex <= 1; psiindex++) {536 t2a = (double **) psim[procid][psiindex];537 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {538 t2a[0][0] = 0.0;539 }540 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {541 t2a[im - 1][0] = 0.0;542 }543 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {544 t2a[0][jm - 1] = 0.0;545 }546 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {547 t2a[im - 1][jm - 1] = 0.0;548 }549 if (gp[procid].neighbors[UP] == -1) {550 t1a = (double *) t2a[0];551 for (j = firstcol; j <= lastcol; j++) {552 t1a[j] = 0.0;553 }554 }555 if (gp[procid].neighbors[DOWN] == -1) {556 t1a = (double *) t2a[im - 1];557 for (j = firstcol; j <= lastcol; j++) {558 t1a[j] = 0.0;559 }560 }561 if (gp[procid].neighbors[LEFT] == -1) {562 for (j = firstrow; j <= lastrow; j++) {563 t2a[j][0] = 0.0;564 }565 }566 if (gp[procid].neighbors[RIGHT] == -1) {567 for (j = firstrow; j <= lastrow; j++) {568 t2a[j][jm - 1] = 0.0;569 }570 }571 for (i = firstrow; i <= lastrow; i++) {572 t1a = (double *) t2a[i];573 for (iindex = firstcol; iindex <= lastcol; iindex++) {574 t1a[iindex] = 0.0;575 }576 }577 }578 579 /* initialize psi matrices the same way */580 581 for (psiindex = 0; psiindex <= 1; psiindex++) {582 t2a = (double **) psi[procid][psiindex];583 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {584 t2a[0][0] = 0.0;585 }586 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {587 t2a[0][jm - 1] = 0.0;588 }589 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {590 t2a[im - 1][0] = 0.0;591 }592 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {593 t2a[im - 1][jm - 1] = 0.0;594 }595 if (gp[procid].neighbors[UP] == -1) {596 t1a = (double *) t2a[0];597 for (j = firstcol; j <= lastcol; j++) {598 t1a[j] = 0.0;599 }600 }601 if (gp[procid].neighbors[DOWN] == -1) {602 t1a = (double *) t2a[im - 1];603 for (j = firstcol; j <= lastcol; j++) {604 t1a[j] = 0.0;605 }606 }607 if (gp[procid].neighbors[LEFT] == -1) {608 for (j = firstrow; j <= lastrow; j++) {609 t2a[j][0] = 0.0;610 }611 }612 if (gp[procid].neighbors[RIGHT] == -1) {613 for (j = firstrow; j <= lastrow; j++) {614 t2a[j][jm - 1] = 0.0;615 }616 }617 for (i = firstrow; i <= lastrow; i++) {618 t1a = (double *) t2a[i];619 for (iindex = firstcol; iindex <= lastcol; iindex++) {620 t1a[iindex] = 0.0;621 }622 }623 }624 625 /* compute input curl of wind stress */626 627 628 t2a = (double **) tauz[procid];629 ysca1 = .5 * ysca;630 factor = -t0 * pi / ysca1;631 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {632 t2a[0][0] = 0.0;633 }634 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {635 t2a[im - 1][0] = 0.0;636 }637 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {638 sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;639 sintemp = sin(sintemp);640 t2a[0][jm - 1] = factor * sintemp;641 }642 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {643 sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;644 sintemp = sin(sintemp);645 t2a[im - 1][jm - 1] = factor * sintemp;646 }647 if (gp[procid].neighbors[UP] == -1) {648 t1a = (double *) t2a[0];649 for (j = firstcol; j <= lastcol; j++) {650 sintemp = pi * ((double) j + j_off) * res / ysca1;651 sintemp = sin(sintemp);652 curlt = factor * sintemp;653 t1a[j] = curlt;654 }655 }656 if (gp[procid].neighbors[DOWN] == -1) {657 t1a = (double *) t2a[im - 1];658 for (j = firstcol; j <= lastcol; j++) {659 sintemp = pi * ((double) j + j_off) * res / ysca1;660 sintemp = sin(sintemp);661 curlt = factor * sintemp;662 t1a[j] = curlt;663 }664 }665 if (gp[procid].neighbors[LEFT] == -1) {666 for (j = firstrow; j <= lastrow; j++) {667 t2a[j][0] = 0.0;668 }669 }670 if (gp[procid].neighbors[RIGHT] == -1) {671 sintemp = pi * ((double) jm - 1 + j_off) * res / ysca1;672 sintemp = sin(sintemp);673 curlt = factor * sintemp;674 for (j = firstrow; j <= lastrow; j++) {675 t2a[j][jm - 1] = curlt;676 }677 }678 for (i = firstrow; i <= lastrow; i++) {679 t1a = (double *) t2a[i];680 for (iindex = firstcol; iindex <= lastcol; iindex++) {681 sintemp = pi * ((double) iindex + j_off) * res / ysca1;682 sintemp = sin(sintemp);683 curlt = factor * sintemp;684 t1a[iindex] = curlt;685 }686 }687 688 #if defined(MULTIPLE_BARRIERS)689 BARRIER(bars->sl_onetime, nprocs)690 #else691 BARRIER(bars->barrier, nprocs)692 #endif693 694 /***************************************************************695 one-time stuff over at this point696 ***************************************************************/697 while (!endflag) {698 while ((!dayflag) || (!dhourflag)) {699 dayflag = 0;700 dhourflag = 0;701 if (nstep == 1) {702 for (i = 0; i < 10; i++) {703 gp[procid].steps_time[i] = 0;704 }705 if (procid == MASTER) {706 CLOCK(global->trackstart)707 }708 if ((procid == MASTER) || (do_stats)) {709 CLOCK(t1);710 (*gp[procid].total_time) = t1;711 (*gp[procid].multi_time) = 0;712 }713 /* POSSIBLE ENHANCEMENT: Here is where one might reset the714 statistics that one is measuring about the parallel execution */715 }716 717 slave2(procid, firstrow, lastrow, numrows, firstcol, lastcol, numcols);718 719 /* update time and step number720 note that these time and step variables are private i.e. every721 process has its own copy and keeps track of its own time */722 723 ttime = ttime + dtau;724 nstep = nstep + 1;725 day = ttime / 86400.0;726 727 if (day > ((double) outday0)) {728 dayflag = 1;729 iday = (long) day;730 dhour = dhour + dtau;731 if (dhour >= 86400.0) {732 dhourflag = 1;733 }734 }735 }736 dhour = 0.0;737 738 t2a = (double **) psium[procid];739 t2b = (double **) psim[procid][0];740 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {741 t2a[0][0] = t2a[0][0] + t2b[0][0];742 }743 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {744 t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0];745 }746 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {747 t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1];748 }749 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {750 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1];751 }752 if (gp[procid].neighbors[UP] == -1) {753 t1a = (double *) t2a[0];754 t1b = (double *) t2b[0];755 for (j = firstcol; j <= lastcol; j++) {756 t1a[j] = t1a[j] + t1b[j];757 }758 }759 if (gp[procid].neighbors[DOWN] == -1) {760 t1a = (double *) t2a[im - 1];761 t1b = (double *) t2b[im - 1];762 for (j = firstcol; j <= lastcol; j++) {763 t1a[j] = t1a[j] + t1b[j];764 }765 }766 if (gp[procid].neighbors[LEFT] == -1) {767 for (j = firstrow; j <= lastrow; j++) {768 t2a[j][0] = t2a[j][0] + t2b[j][0];769 }770 }771 if (gp[procid].neighbors[RIGHT] == -1) {772 for (j = firstrow; j <= lastrow; j++) {773 t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1];774 }775 }776 for (i = firstrow; i <= lastrow; i++) {777 t1a = (double *) t2a[i];778 t1b = (double *) t2b[i];779 for (iindex = firstcol; iindex <= lastcol; iindex++) {780 t1a[iindex] = t1a[iindex] + t1b[iindex];781 }782 }783 784 /* update values of psilm array to psilm + psim[2] */785 786 t2a = (double **) psilm[procid];787 t2b = (double **) psim[procid][1];788 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {789 t2a[0][0] = t2a[0][0] + t2b[0][0];790 }791 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {792 t2a[im - 1][0] = t2a[im - 1][0] + t2b[im - 1][0];793 }794 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {795 t2a[0][jm - 1] = t2a[0][jm - 1] + t2b[0][jm - 1];796 }797 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {798 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + t2b[im - 1][jm - 1];799 }800 if (gp[procid].neighbors[UP] == -1) {801 t1a = (double *) t2a[0];802 t1b = (double *) t2b[0];803 for (j = firstcol; j <= lastcol; j++) {804 t1a[j] = t1a[j] + t1b[j];805 }806 }807 if (gp[procid].neighbors[DOWN] == -1) {808 t1a = (double *) t2a[im - 1];809 t1b = (double *) t2b[im - 1];810 for (j = firstcol; j <= lastcol; j++) {811 t1a[j] = t1a[j] + t1b[j];812 }813 }814 if (gp[procid].neighbors[LEFT] == -1) {815 for (j = firstrow; j <= lastrow; j++) {816 t2a[j][0] = t2a[j][0] + t2b[j][0];817 }818 }819 if (gp[procid].neighbors[RIGHT] == -1) {820 for (j = firstrow; j <= lastrow; j++) {821 t2a[j][jm - 1] = t2a[j][jm - 1] + t2b[j][jm - 1];822 }823 }824 for (i = firstrow; i <= lastrow; i++) {825 t1a = (double *) t2a[i];826 t1b = (double *) t2b[i];827 for (iindex = firstcol; iindex <= lastcol; iindex++) {828 t1a[iindex] = t1a[iindex] + t1b[iindex];829 }830 }831 if (iday >= (long) outday3) {832 endflag = 1;833 }834 }835 if ((procid == MASTER) || (do_stats)) {836 CLOCK(t1);837 (*gp[procid].total_time) = t1 - (*gp[procid].total_time);838 }839 } -
soft/giet_vm/applications/ocean/slave2.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 /* ****************18 subroutine slave219 **************** */20 21 EXTERN_ENV22 23 #include <stdio.h>24 #include <math.h>25 #include <stdlib.h>26 27 #include "decs.h"28 29 void slave2(long procid, long firstrow, long lastrow, long numrows, long firstcol, long lastcol, long numcols)30 {31 long i;32 long j;33 long iindex;34 double hh1;35 double hh3;36 double hinv;37 double h1inv;38 long istart;39 long iend;40 long jstart;41 long jend;42 long ist;43 long ien;44 long jst;45 long jen;46 double ressqr;47 double psiaipriv;48 double f4;49 double timst;50 long psiindex;51 long i_off;52 long j_off;53 long multi_start;54 long multi_end;55 double **t2a;56 double **t2b;57 double **t2c;58 double **t2d;59 double **t2e;60 double **t2f;61 double **t2g;62 double **t2h;63 double *t1a;64 double *t1b;65 double *t1c;66 double *t1d;67 double *t1e;68 double *t1f;69 double *t1g;70 double *t1h;71 72 ressqr = lev_res[numlev - 1] * lev_res[numlev - 1];73 i_off = (*gp[procid].rownum) * numrows;74 j_off = (*gp[procid].colnum) * numcols;75 76 START_PHASE(procid, 1);77 78 /* ***************************************************************79 80 f i r s t p h a s e (of timestep calculation)81 82 ***************************************************************/83 84 t2a = (double **) ga[procid];85 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {86 t2a[0][0] = 0.0;87 }88 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {89 t2a[im - 1][0] = 0.0;90 }91 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {92 t2a[0][jm - 1] = 0.0;93 }94 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {95 t2a[im - 1][jm - 1] = 0.0;96 }97 if (gp[procid].neighbors[UP] == -1) {98 t1a = (double *) t2a[0];99 for (j = firstcol; j <= lastcol; j++) {100 t1a[j] = 0.0;101 }102 }103 if (gp[procid].neighbors[DOWN] == -1) {104 t1a = (double *) t2a[im - 1];105 for (j = firstcol; j <= lastcol; j++) {106 t1a[j] = 0.0;107 }108 }109 if (gp[procid].neighbors[LEFT] == -1) {110 for (j = firstrow; j <= lastrow; j++) {111 t2a[j][0] = 0.0;112 }113 }114 if (gp[procid].neighbors[RIGHT] == -1) {115 for (j = firstrow; j <= lastrow; j++) {116 t2a[j][jm - 1] = 0.0;117 }118 }119 for (i = firstrow; i <= lastrow; i++) {120 t1a = (double *) t2a[i];121 for (iindex = firstcol; iindex <= lastcol; iindex++) {122 t1a[iindex] = 0.0;123 }124 }125 126 t2a = (double **) gb[procid];127 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {128 t2a[0][0] = 0.0;129 }130 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {131 t2a[im - 1][0] = 0.0;132 }133 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {134 t2a[0][jm - 1] = 0.0;135 }136 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {137 t2a[im - 1][jm - 1] = 0.0;138 }139 if (gp[procid].neighbors[UP] == -1) {140 t1a = (double *) t2a[0];141 for (j = firstcol; j <= lastcol; j++) {142 t1a[j] = 0.0;143 }144 }145 if (gp[procid].neighbors[DOWN] == -1) {146 t1a = (double *) t2a[im - 1];147 for (j = firstcol; j <= lastcol; j++) {148 t1a[j] = 0.0;149 }150 }151 if (gp[procid].neighbors[LEFT] == -1) {152 for (j = firstrow; j <= lastrow; j++) {153 t2a[j][0] = 0.0;154 }155 }156 if (gp[procid].neighbors[RIGHT] == -1) {157 for (j = firstrow; j <= lastrow; j++) {158 t2a[j][jm - 1] = 0.0;159 }160 }161 for (i = firstrow; i <= lastrow; i++) {162 t1a = (double *) t2a[i];163 for (iindex = firstcol; iindex <= lastcol; iindex++) {164 t1a[iindex] = 0.0;165 }166 }167 168 /* put the laplacian of psi{1,3} in work1{1,2}169 note that psi(i,j,2) represents the psi3 array in170 the original equations */171 172 for (psiindex = 0; psiindex <= 1; psiindex++) {173 t2a = (double **) work1[procid][psiindex];174 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {175 t2a[0][0] = 0;176 }177 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {178 t2a[im - 1][0] = 0;179 }180 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {181 t2a[0][jm - 1] = 0;182 }183 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {184 t2a[im - 1][jm - 1] = 0;185 }186 laplacalc(procid, psi, work1, psiindex, firstrow, lastrow, firstcol, lastcol);187 }188 189 /* set values of work2 array to psi1 - psi3 */190 191 t2a = (double **) work2[procid];192 t2b = (double **) psi[procid][0];193 t2c = (double **) psi[procid][1];194 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {195 t2a[0][0] = t2b[0][0] - t2c[0][0];196 }197 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {198 t2a[im - 1][0] = t2b[im - 1][0] - t2c[im - 1][0];199 }200 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {201 t2a[0][jm - 1] = t2b[0][jm - 1] - t2c[0][jm - 1];202 }203 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {204 t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1] - t2c[im - 1][jm - 1];205 }206 if (gp[procid].neighbors[UP] == -1) {207 t1a = (double *) t2a[0];208 t1b = (double *) t2b[0];209 t1c = (double *) t2c[0];210 for (j = firstcol; j <= lastcol; j++) {211 t1a[j] = t1b[j] - t1c[j];212 }213 }214 if (gp[procid].neighbors[DOWN] == -1) {215 t1a = (double *) t2a[im - 1];216 t1b = (double *) t2b[im - 1];217 t1c = (double *) t2c[im - 1];218 for (j = firstcol; j <= lastcol; j++) {219 t1a[j] = t1b[j] - t1c[j];220 }221 }222 if (gp[procid].neighbors[LEFT] == -1) {223 for (j = firstrow; j <= lastrow; j++) {224 t2a[j][0] = t2b[j][0] - t2c[j][0];225 }226 }227 if (gp[procid].neighbors[RIGHT] == -1) {228 for (j = firstrow; j <= lastrow; j++) {229 t2a[j][jm - 1] = t2b[j][jm - 1] - t2c[j][jm - 1];230 }231 }232 for (i = firstrow; i <= lastrow; i++) {233 t1a = (double *) t2a[i];234 t1b = (double *) t2b[i];235 t1c = (double *) t2c[i];236 for (iindex = firstcol; iindex <= lastcol; iindex++) {237 t1a[iindex] = t1b[iindex] - t1c[iindex];238 }239 }240 241 /* set values of work3 array to h3/h * psi1 + h1/h * psi3 */242 243 t2a = (double **) work3[procid];244 hh3 = h3 / h;245 hh1 = h1 / h;246 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {247 t2a[0][0] = hh3 * t2a[0][0] + hh1 * t2c[0][0];248 }249 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {250 t2a[im - 1][0] = hh3 * t2a[im - 1][0] + hh1 * t2c[im - 1][0];251 }252 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {253 t2a[0][jm - 1] = hh3 * t2a[0][jm - 1] + hh1 * t2c[0][jm - 1];254 }255 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {256 t2a[im - 1][jm - 1] = hh3 * t2a[im - 1][jm - 1] + hh1 * t2c[im - 1][jm - 1];257 }258 if (gp[procid].neighbors[UP] == -1) {259 for (j = firstcol; j <= lastcol; j++) {260 t2a[0][j] = hh3 * t2a[0][j] + hh1 * t2c[0][j];261 }262 }263 if (gp[procid].neighbors[DOWN] == -1) {264 for (j = firstcol; j <= lastcol; j++) {265 t2a[im - 1][j] = hh3 * t2a[im - 1][j] + hh1 * t2c[im - 1][j];266 }267 }268 if (gp[procid].neighbors[LEFT] == -1) {269 for (j = firstrow; j <= lastrow; j++) {270 t2a[j][0] = hh3 * t2a[j][0] + hh1 * t2c[j][0];271 }272 }273 if (gp[procid].neighbors[RIGHT] == -1) {274 for (j = firstrow; j <= lastrow; j++) {275 t2a[j][jm - 1] = hh3 * t2a[j][jm - 1] + hh1 * t2c[j][jm - 1];276 }277 }278 for (i = firstrow; i <= lastrow; i++) {279 t1a = (double *) t2a[i];280 t1c = (double *) t2c[i];281 for (iindex = firstcol; iindex <= lastcol; iindex++) {282 t1a[iindex] = hh3 * t1a[iindex] + hh1 * t1c[iindex];283 }284 }285 286 /* set values of temparray{1,3} to psi{1,3} */287 288 for (psiindex = 0; psiindex <= 1; psiindex++) {289 t2a = (double **) temparray[procid][psiindex];290 t2b = (double **) psi[procid][psiindex];291 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {292 t2a[0][0] = t2b[0][0];293 }294 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {295 t2a[im - 1][0] = t2b[im - 1][0];296 }297 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {298 t2a[0][jm - 1] = t2b[0][jm - 1];299 }300 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {301 t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];302 }303 if (gp[procid].neighbors[UP] == -1) {304 for (j = firstcol; j <= lastcol; j++) {305 t2a[0][j] = t2b[0][j];306 }307 }308 if (gp[procid].neighbors[DOWN] == -1) {309 for (j = firstcol; j <= lastcol; j++) {310 t2a[im - 1][j] = t2b[im - 1][j];311 }312 }313 if (gp[procid].neighbors[LEFT] == -1) {314 for (j = firstrow; j <= lastrow; j++) {315 t2a[j][0] = t2b[j][0];316 }317 }318 if (gp[procid].neighbors[RIGHT] == -1) {319 for (j = firstrow; j <= lastrow; j++) {320 t2a[j][jm - 1] = t2b[j][jm - 1];321 }322 }323 324 for (i = firstrow; i <= lastrow; i++) {325 t1a = (double *) t2a[i];326 t1b = (double *) t2b[i];327 for (iindex = firstcol; iindex <= lastcol; iindex++) {328 t1a[iindex] = t1b[iindex];329 }330 }331 }332 333 END_PHASE(procid, 1);334 335 #if defined(MULTIPLE_BARRIERS)336 BARRIER(bars->sl_phase_1, nprocs)337 #else338 BARRIER(bars->barrier, nprocs)339 #endif340 341 /* *******************************************************342 343 s e c o n d p h a s e344 345 *******************************************************346 347 set values of psi{1,3} to psim{1,3} */348 349 START_PHASE(procid, 2);350 351 for (psiindex = 0; psiindex <= 1; psiindex++) {352 t2a = (double **) psi[procid][psiindex];353 t2b = (double **) psim[procid][psiindex];354 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {355 t2a[0][0] = t2b[0][0];356 }357 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {358 t2a[0][jm - 1] = t2b[0][jm - 1];359 }360 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {361 t2a[im - 1][0] = t2b[im - 1][0];362 }363 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {364 t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];365 }366 if (gp[procid].neighbors[UP] == -1) {367 for (j = firstcol; j <= lastcol; j++) {368 t2a[0][j] = t2b[0][j];369 }370 }371 if (gp[procid].neighbors[DOWN] == -1) {372 for (j = firstcol; j <= lastcol; j++) {373 t2a[im - 1][j] = t2b[im - 1][j];374 }375 }376 if (gp[procid].neighbors[LEFT] == -1) {377 for (j = firstrow; j <= lastrow; j++) {378 t2a[j][0] = t2b[j][0];379 }380 }381 if (gp[procid].neighbors[RIGHT] == -1) {382 for (j = firstrow; j <= lastrow; j++) {383 t2a[j][jm - 1] = t2b[j][jm - 1];384 }385 }386 387 for (i = firstrow; i <= lastrow; i++) {388 t1a = (double *) t2a[i];389 t1b = (double *) t2b[i];390 for (iindex = firstcol; iindex <= lastcol; iindex++) {391 t1a[iindex] = t1b[iindex];392 }393 }394 }395 396 /* put the laplacian of the psim array397 into the work7 array; first part of a three-laplacian398 calculation to compute the friction terms */399 400 for (psiindex = 0; psiindex <= 1; psiindex++) {401 t2a = (double **) work7[procid][psiindex];402 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {403 t2a[0][0] = 0;404 }405 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {406 t2a[im - 1][0] = 0;407 }408 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {409 t2a[0][jm - 1] = 0;410 }411 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {412 t2a[im - 1][jm - 1] = 0;413 }414 laplacalc(procid, psim, work7, psiindex, firstrow, lastrow, firstcol, lastcol);415 }416 417 /* to the values of the work1{1,2} arrays obtained from the418 laplacians of psi{1,2} in the previous phase, add to the419 elements of every column the corresponding value in the420 one-dimenional f array */421 422 for (psiindex = 0; psiindex <= 1; psiindex++) {423 t2a = (double **) work1[procid][psiindex];424 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {425 t2a[0][0] = t2a[0][0] + f[0];426 }427 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {428 t2a[im - 1][0] = t2a[im - 1][0] + f[0];429 }430 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {431 t2a[0][jm - 1] = t2a[0][jm - 1] + f[jmx[numlev - 1] - 1];432 }433 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {434 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + f[jmx[numlev - 1] - 1];435 }436 if (gp[procid].neighbors[UP] == -1) {437 for (j = firstcol; j <= lastcol; j++) {438 t2a[0][j] = t2a[0][j] + f[j + j_off];439 }440 }441 if (gp[procid].neighbors[DOWN] == -1) {442 for (j = firstcol; j <= lastcol; j++) {443 t2a[im - 1][j] = t2a[im - 1][j] + f[j + j_off];444 }445 }446 if (gp[procid].neighbors[LEFT] == -1) {447 for (j = firstrow; j <= lastrow; j++) {448 t2a[j][0] = t2a[j][0] + f[j + i_off];449 }450 }451 if (gp[procid].neighbors[RIGHT] == -1) {452 for (j = firstrow; j <= lastrow; j++) {453 t2a[j][jm - 1] = t2a[j][jm - 1] + f[j + i_off];454 }455 }456 for (i = firstrow; i <= lastrow; i++) {457 t1a = (double *) t2a[i];458 for (iindex = firstcol; iindex <= lastcol; iindex++) {459 t1a[iindex] = t1a[iindex] + f[iindex + j_off];460 }461 }462 }463 464 END_PHASE(procid, 2);465 466 #if defined(MULTIPLE_BARRIERS)467 BARRIER(bars->sl_phase_2, nprocs)468 #else469 BARRIER(bars->barrier, nprocs)470 #endif471 /* *******************************************************472 473 t h i r d p h a s e474 475 *******************************************************476 477 put the jacobian of the work1{1,2} and psi{1,3} arrays478 (the latter currently in temparray) in the work5{1,2} arrays */479 480 START_PHASE(procid, 3);481 482 for (psiindex = 0; psiindex <= 1; psiindex++) {483 jacobcalc2(work1, temparray, work5, psiindex, procid, firstrow, lastrow, firstcol, lastcol);484 }485 486 /* set values of psim{1,3} to temparray{1,3} */487 488 for (psiindex = 0; psiindex <= 1; psiindex++) {489 t2a = (double **) psim[procid][psiindex];490 t2b = (double **) temparray[procid][psiindex];491 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {492 t2a[0][0] = t2b[0][0];493 }494 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {495 t2a[im - 1][0] = t2b[im - 1][0];496 }497 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {498 t2a[0][jm - 1] = t2b[0][jm - 1];499 }500 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {501 t2a[im - 1][jm - 1] = t2b[im - 1][jm - 1];502 }503 if (gp[procid].neighbors[UP] == -1) {504 t1a = (double *) t2a[0];505 t1b = (double *) t2b[0];506 for (j = firstcol; j <= lastcol; j++) {507 t1a[j] = t1b[j];508 }509 }510 if (gp[procid].neighbors[DOWN] == -1) {511 t1a = (double *) t2a[im - 1];512 t1b = (double *) t2b[im - 1];513 for (j = firstcol; j <= lastcol; j++) {514 t1a[j] = t1b[j];515 }516 }517 if (gp[procid].neighbors[LEFT] == -1) {518 for (j = firstrow; j <= lastrow; j++) {519 t2a[j][0] = t2b[j][0];520 }521 }522 if (gp[procid].neighbors[RIGHT] == -1) {523 for (j = firstrow; j <= lastrow; j++) {524 t2a[j][jm - 1] = t2b[j][jm - 1];525 }526 }527 for (i = firstrow; i <= lastrow; i++) {528 t1a = (double *) t2a[i];529 t1b = (double *) t2b[i];530 for (iindex = firstcol; iindex <= lastcol; iindex++) {531 t1a[iindex] = t1b[iindex];532 }533 }534 }535 536 /* put the laplacian of the work7{1,2} arrays in the work4{1,2}537 arrays; second step in the three-laplacian friction calculation */538 539 for (psiindex = 0; psiindex <= 1; psiindex++) {540 laplacalc(procid, work7, work4, psiindex, firstrow, lastrow, firstcol, lastcol);541 }542 543 END_PHASE(procid, 3);544 545 #if defined(MULTIPLE_BARRIERS)546 BARRIER(bars->sl_phase_3, nprocs)547 #else548 BARRIER(bars->barrier, nprocs)549 #endif550 551 /* *******************************************************552 553 f o u r t h p h a s e554 555 *******************************************************556 557 put the jacobian of the work2 and work3 arrays in the work6558 array */559 560 START_PHASE(procid, 4);561 562 jacobcalc(work2, work3, work6, procid, firstrow, lastrow, firstcol, lastcol);563 564 /* put the laplacian of the work4{1,2} arrays in the work7{1,2}565 arrays; third step in the three-laplacian friction calculation */566 567 for (psiindex = 0; psiindex <= 1; psiindex++) {568 laplacalc(procid, work4, work7, psiindex, firstrow, lastrow, firstcol, lastcol);569 }570 571 END_PHASE(procid, 4);572 573 #if defined(MULTIPLE_BARRIERS)574 BARRIER(bars->sl_phase_4, nprocs)575 #else576 BARRIER(bars->barrier, nprocs)577 #endif578 579 /* *******************************************************580 581 f i f t h p h a s e582 583 *******************************************************584 585 use the values of the work5, work6 and work7 arrays586 computed in the previous time-steps to compute the587 ga and gb arrays */588 589 START_PHASE(procid, 5);590 591 hinv = 1.0 / h;592 h1inv = 1.0 / h1;593 594 t2a = (double **) ga[procid];595 t2b = (double **) gb[procid];596 t2c = (double **) work5[procid][0];597 t2d = (double **) work5[procid][1];598 t2e = (double **) work7[procid][0];599 t2f = (double **) work7[procid][1];600 t2g = (double **) work6[procid];601 t2h = (double **) tauz[procid];602 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {603 t2a[0][0] = t2c[0][0] - t2d[0][0] + eig2 * t2g[0][0] + h1inv * t2h[0][0] + lf * t2e[0][0] - lf * t2f[0][0];604 t2b[0][0] = hh1 * t2c[0][0] + hh3 * t2d[0][0] + hinv * t2h[0][0] + lf * hh1 * t2e[0][0] + lf * hh3 * t2f[0][0];605 }606 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {607 t2a[im - 1][0] = t2c[im - 1][0] - t2d[im - 1][0] + eig2 * t2g[im - 1][0] + h1inv * t2h[im - 1][0] + lf * t2e[im - 1][0] - lf * t2f[im - 1][0];608 t2b[im - 1][0] = hh1 * t2c[im - 1][0] + hh3 * t2d[im - 1][0] + hinv * t2h[im - 1][0] + lf * hh1 * t2e[im - 1][0] + lf * hh3 * t2f[im - 1][0];609 }610 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {611 t2a[0][jm - 1] = t2c[0][jm - 1] - t2d[0][jm - 1] + eig2 * t2g[0][jm - 1] + h1inv * t2h[0][jm - 1] + lf * t2e[0][jm - 1] - lf * t2f[0][jm - 1];612 t2b[0][jm - 1] = hh1 * t2c[0][jm - 1] + hh3 * t2d[0][jm - 1] + hinv * t2h[0][jm - 1] + lf * hh1 * t2e[0][jm - 1] + lf * hh3 * t2f[0][jm - 1];613 }614 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {615 t2a[im - 1][jm - 1] = t2c[im - 1][jm - 1] - t2d[im - 1][jm - 1] + eig2 * t2g[im - 1][jm - 1] + h1inv * t2h[im - 1][jm - 1] + lf * t2e[im - 1][jm - 1] - lf * t2f[im - 1][jm - 1];616 t2b[im - 1][jm - 1] = hh1 * t2c[im - 1][jm - 1] + hh3 * t2d[im - 1][jm - 1] + hinv * t2h[im - 1][jm - 1] + lf * hh1 * t2e[im - 1][jm - 1] + lf * hh3 * t2f[im - 1][jm - 1];617 }618 if (gp[procid].neighbors[UP] == -1) {619 t1a = (double *) t2a[0];620 t1b = (double *) t2b[0];621 t1c = (double *) t2c[0];622 t1d = (double *) t2d[0];623 t1e = (double *) t2e[0];624 t1f = (double *) t2f[0];625 t1g = (double *) t2g[0];626 t1h = (double *) t2h[0];627 for (j = firstcol; j <= lastcol; j++) {628 t1a[j] = t1c[j] - t1d[j] + eig2 * t1g[j] + h1inv * t1h[j] + lf * t1e[j] - lf * t1f[j];629 t1b[j] = hh1 * t1c[j] + hh3 * t1d[j] + hinv * t1h[j] + lf * hh1 * t1e[j] + lf * hh3 * t1f[j];630 }631 }632 if (gp[procid].neighbors[DOWN] == -1) {633 t1a = (double *) t2a[im - 1];634 t1b = (double *) t2b[im - 1];635 t1c = (double *) t2c[im - 1];636 t1d = (double *) t2d[im - 1];637 t1e = (double *) t2e[im - 1];638 t1f = (double *) t2f[im - 1];639 t1g = (double *) t2g[im - 1];640 t1h = (double *) t2h[im - 1];641 for (j = firstcol; j <= lastcol; j++) {642 t1a[j] = t1c[j] - t1d[j] + eig2 * t1g[j] + h1inv * t1h[j] + lf * t1e[j] - lf * t1f[j];643 t1b[j] = hh1 * t1c[j] + hh3 * t1d[j] + hinv * t1h[j] + lf * hh1 * t1e[j] + lf * hh3 * t1f[j];644 }645 }646 if (gp[procid].neighbors[LEFT] == -1) {647 for (j = firstrow; j <= lastrow; j++) {648 t2a[j][0] = t2c[j][0] - t2d[j][0] + eig2 * t2g[j][0] + h1inv * t2h[j][0] + lf * t2e[j][0] - lf * t2f[j][0];649 t2b[j][0] = hh1 * t2c[j][0] + hh3 * t2d[j][0] + hinv * t2h[j][0] + lf * hh1 * t2e[j][0] + lf * hh3 * t2f[j][0];650 }651 }652 if (gp[procid].neighbors[RIGHT] == -1) {653 for (j = firstrow; j <= lastrow; j++) {654 t2a[j][jm - 1] = t2c[j][jm - 1] - t2d[j][jm - 1] + eig2 * t2g[j][jm - 1] + h1inv * t2h[j][jm - 1] + lf * t2e[j][jm - 1] - lf * t2f[j][jm - 1];655 t2b[j][jm - 1] = hh1 * t2c[j][jm - 1] + hh3 * t2d[j][jm - 1] + hinv * t2h[j][jm - 1] + lf * hh1 * t2e[j][jm - 1] + lf * hh3 * t2f[j][jm - 1];656 }657 }658 659 for (i = firstrow; i <= lastrow; i++) {660 t1a = (double *) t2a[i];661 t1b = (double *) t2b[i];662 t1c = (double *) t2c[i];663 t1d = (double *) t2d[i];664 t1e = (double *) t2e[i];665 t1f = (double *) t2f[i];666 t1g = (double *) t2g[i];667 t1h = (double *) t2h[i];668 for (iindex = firstcol; iindex <= lastcol; iindex++) {669 t1a[iindex] = t1c[iindex] - t1d[iindex] + eig2 * t1g[iindex] + h1inv * t1h[iindex] + lf * t1e[iindex] - lf * t1f[iindex];670 t1b[iindex] = hh1 * t1c[iindex] + hh3 * t1d[iindex] + hinv * t1h[iindex] + lf * hh1 * t1e[iindex] + lf * hh3 * t1f[iindex];671 }672 }673 674 END_PHASE(procid, 5);675 676 #if defined(MULTIPLE_BARRIERS)677 BARRIER(bars->sl_phase_5, nprocs)678 #else679 BARRIER(bars->barrier, nprocs)680 #endif681 682 /* *******************************************************683 684 s i x t h p h a s e685 686 ******************************************************* */687 688 START_PHASE(procid, 6);689 690 istart = 1;691 iend = istart + gp[procid].rel_num_y[numlev - 1] - 1;692 jstart = 1;693 jend = jstart + gp[procid].rel_num_x[numlev - 1] - 1;694 ist = istart;695 ien = iend;696 jst = jstart;697 jen = jend;698 699 if (gp[procid].neighbors[UP] == -1) {700 istart = 0;701 }702 if (gp[procid].neighbors[LEFT] == -1) {703 jstart = 0;704 }705 if (gp[procid].neighbors[DOWN] == -1) {706 iend = im - 1;707 }708 if (gp[procid].neighbors[RIGHT] == -1) {709 jend = jm - 1;710 }711 t2a = (double **) rhs_multi[procid][numlev - 1];712 t2b = (double **) ga[procid];713 t2c = (double **) oldga[procid];714 t2d = (double **) q_multi[procid][numlev - 1];715 for (i = istart; i <= iend; i++) {716 t1a = (double *) t2a[i];717 t1b = (double *) t2b[i];718 for (j = jstart; j <= jend; j++) {719 t1a[j] = t1b[j] * ressqr;720 }721 }722 723 if (gp[procid].neighbors[UP] == -1) {724 t1d = (double *) t2d[0];725 t1b = (double *) t2b[0];726 for (j = jstart; j <= jend; j++) {727 t1d[j] = t1b[j];728 }729 }730 if (gp[procid].neighbors[DOWN] == -1) {731 t1d = (double *) t2d[im - 1];732 t1b = (double *) t2b[im - 1];733 for (j = jstart; j <= jend; j++) {734 t1d[j] = t1b[j];735 }736 }737 if (gp[procid].neighbors[LEFT] == -1) {738 for (i = istart; i <= iend; i++) {739 t2d[i][0] = t2b[i][0];740 }741 }742 if (gp[procid].neighbors[RIGHT] == -1) {743 for (i = istart; i <= iend; i++) {744 t2d[i][jm - 1] = t2b[i][jm - 1];745 }746 }747 //fac = 1.0 / (4.0 - ressqr*eig2);748 for (i = ist; i <= ien; i++) {749 t1d = (double *) t2d[i];750 t1c = (double *) t2c[i];751 for (j = jst; j <= jen; j++) {752 t1d[j] = t1c[j];753 }754 }755 756 if ((procid == MASTER) || (do_stats)) {757 CLOCK(multi_start);758 }759 760 multig(procid);761 762 if ((procid == MASTER) || (do_stats)) {763 CLOCK(multi_end);764 (*gp[procid].multi_time) += (multi_end - multi_start);765 }766 767 /* the shared sum variable psiai is initialized to 0 at768 every time-step */769 770 if (procid == MASTER) {771 global->psiai = 0.0;772 }773 774 /* copy the solution for use as initial guess in next time-step */775 776 for (i = istart; i <= iend; i++) {777 t1b = (double *) t2b[i];778 t1c = (double *) t2c[i];779 t1d = (double *) t2d[i];780 for (j = jstart; j <= jend; j++) {781 t1b[j] = t1d[j];782 t1c[j] = t1d[j];783 }784 }785 786 END_PHASE(procid, 6);787 788 #if defined(MULTIPLE_BARRIERS)789 BARRIER(bars->sl_phase_6, nprocs)790 #else791 BARRIER(bars->barrier, nprocs)792 #endif793 794 /* *******************************************************795 796 s e v e n t h p h a s e797 798 *******************************************************799 800 every process computes the running sum for its assigned portion801 in a private variable psiaipriv */802 803 START_PHASE(procid, 7);804 805 psiaipriv = 0.0;806 t2a = (double **) ga[procid];807 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {808 psiaipriv = psiaipriv + 0.25 * (t2a[0][0]);809 }810 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {811 psiaipriv = psiaipriv + 0.25 * (t2a[0][jm - 1]);812 }813 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {814 psiaipriv = psiaipriv + 0.25 * (t2a[im - 1][0]);815 }816 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {817 psiaipriv = psiaipriv + 0.25 * (t2a[im - 1][jm - 1]);818 }819 if (gp[procid].neighbors[UP] == -1) {820 t1a = (double *) t2a[0];821 for (j = firstcol; j <= lastcol; j++) {822 psiaipriv = psiaipriv + 0.5 * t1a[j];823 }824 }825 if (gp[procid].neighbors[DOWN] == -1) {826 t1a = (double *) t2a[im - 1];827 for (j = firstcol; j <= lastcol; j++) {828 psiaipriv = psiaipriv + 0.5 * t1a[j];829 }830 }831 if (gp[procid].neighbors[LEFT] == -1) {832 for (j = firstrow; j <= lastrow; j++) {833 psiaipriv = psiaipriv + 0.5 * t2a[j][0];834 }835 }836 if (gp[procid].neighbors[RIGHT] == -1) {837 for (j = firstrow; j <= lastrow; j++) {838 psiaipriv = psiaipriv + 0.5 * t2a[j][jm - 1];839 }840 }841 for (i = firstrow; i <= lastrow; i++) {842 t1a = (double *) t2a[i];843 for (iindex = firstcol; iindex <= lastcol; iindex++) {844 psiaipriv = psiaipriv + t1a[iindex];845 }846 }847 848 /* after computing its private sum, every process adds that to the849 shared running sum psiai */850 851 LOCK(locks->psiailock)852 global->psiai = global->psiai + psiaipriv;853 UNLOCK(locks->psiailock)854 855 END_PHASE(procid, 7);856 857 #if defined(MULTIPLE_BARRIERS)858 BARRIER(bars->sl_phase_7, nprocs)859 #else860 BARRIER(bars->barrier, nprocs)861 #endif862 863 /* *******************************************************864 865 e i g h t h p h a s e866 867 *******************************************************868 869 augment ga(i,j) with [-psiai/psibi]*psib(i,j) */870 871 START_PHASE(procid, 8);872 873 f4 = (-global->psiai) /(global->psibi);874 875 t2a = (double **) ga[procid];876 t2b = (double **) psib[procid];877 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {878 t2a[0][0] = t2a[0][0] + f4 * t2b[0][0];879 }880 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {881 t2a[im - 1][0] = t2a[im - 1][0] + f4 * t2b[im - 1][0];882 }883 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {884 t2a[0][jm - 1] = t2a[0][jm - 1] + f4 * t2b[0][jm - 1];885 }886 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {887 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + f4 * t2b[im - 1][jm - 1];888 }889 if (gp[procid].neighbors[UP] == -1) {890 t1a = (double *) t2a[0];891 t1b = (double *) t2b[0];892 for (j = firstcol; j <= lastcol; j++) {893 t1a[j] = t1a[j] + f4 * t1b[j];894 }895 }896 if (gp[procid].neighbors[DOWN] == -1) {897 t1a = (double *) t2a[im - 1];898 t1b = (double *) t2b[im - 1];899 for (j = firstcol; j <= lastcol; j++) {900 t1a[j] = t1a[j] + f4 * t1b[j];901 }902 }903 if (gp[procid].neighbors[LEFT] == -1) {904 for (j = firstrow; j <= lastrow; j++) {905 t2a[j][0] = t2a[j][0] + f4 * t2b[j][0];906 }907 }908 if (gp[procid].neighbors[RIGHT] == -1) {909 for (j = firstrow; j <= lastrow; j++) {910 t2a[j][jm - 1] = t2a[j][jm - 1] + f4 * t2b[j][jm - 1];911 }912 }913 for (i = firstrow; i <= lastrow; i++) {914 t1a = (double *) t2a[i];915 t1b = (double *) t2b[i];916 for (iindex = firstcol; iindex <= lastcol; iindex++) {917 t1a[iindex] = t1a[iindex] + f4 * t1b[iindex];918 }919 }920 921 t2a = (double **) rhs_multi[procid][numlev - 1];922 t2b = (double **) gb[procid];923 t2c = (double **) oldgb[procid];924 t2d = (double **) q_multi[procid][numlev - 1];925 for (i = istart; i <= iend; i++) {926 t1a = (double *) t2a[i];927 t1b = (double *) t2b[i];928 for (j = jstart; j <= jend; j++) {929 t1a[j] = t1b[j] * ressqr;930 }931 }932 if (gp[procid].neighbors[UP] == -1) {933 t1d = (double *) t2d[0];934 t1b = (double *) t2b[0];935 for (j = jstart; j <= jend; j++) {936 t1d[j] = t1b[j];937 }938 }939 if (gp[procid].neighbors[DOWN] == -1) {940 t1d = (double *) t2d[im - 1];941 t1b = (double *) t2b[im - 1];942 for (j = jstart; j <= jend; j++) {943 t1d[j] = t1b[j];944 }945 }946 if (gp[procid].neighbors[LEFT] == -1) {947 for (i = istart; i <= iend; i++) {948 t2d[i][0] = t2b[i][0];949 }950 }951 if (gp[procid].neighbors[RIGHT] == -1) {952 for (i = istart; i <= iend; i++) {953 t2d[i][jm - 1] = t2b[i][jm - 1];954 }955 }956 //fac = 1.0 / (4.0 - ressqr*eig2);957 for (i = ist; i <= ien; i++) {958 t1d = (double *) t2d[i];959 t1c = (double *) t2c[i];960 for (j = jst; j <= jen; j++) {961 t1d[j] = t1c[j];962 }963 }964 965 if ((procid == MASTER) || (do_stats)) {966 CLOCK(multi_start);967 }968 969 multig(procid);970 971 if ((procid == MASTER) || (do_stats)) {972 CLOCK(multi_end);973 (*gp[procid].multi_time) += (multi_end - multi_start);974 }975 976 for (i = istart; i <= iend; i++) {977 t1b = (double *) t2b[i];978 t1c = (double *) t2c[i];979 t1d = (double *) t2d[i];980 for (j = jstart; j <= jend; j++) {981 t1b[j] = t1d[j];982 t1c[j] = t1d[j];983 }984 }985 986 END_PHASE(procid, 8);987 988 #if defined(MULTIPLE_BARRIERS)989 BARRIER(bars->sl_phase_8, nprocs)990 #else991 BARRIER(bars->barrier, nprocs)992 #endif993 994 /* *******************************************************995 996 n i n t h p h a s e997 998 *******************************************************999 1000 put appropriate linear combinations of ga and gb in work2 and work3;1001 note that here (as in most cases) the constant multipliers are made1002 private variables; the specific order in which things are done is1003 chosen in order to hopefully reuse things brought into the cache1004 1005 note that here again we choose to have all processes share the work1006 on both matrices despite the fact that the work done per element1007 is the same, because the operand matrices are the same in both cases */1008 1009 START_PHASE(procid, 9);1010 1011 t2a = (double **) ga[procid];1012 t2b = (double **) gb[procid];1013 t2c = (double **) work2[procid];1014 t2d = (double **) work3[procid];1015 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {1016 t2c[0][0] = t2b[0][0] - hh1 * t2a[0][0];1017 t2d[0][0] = t2b[0][0] + hh3 * t2a[0][0];1018 }1019 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {1020 t2c[im - 1][0] = t2b[im - 1][0] - hh1 * t2a[im - 1][0];1021 t2d[im - 1][0] = t2b[im - 1][0] + hh3 * t2a[im - 1][0];1022 }1023 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {1024 t2c[0][jm - 1] = t2b[0][jm - 1] - hh1 * t2a[0][jm - 1];1025 t2d[0][jm - 1] = t2b[0][jm - 1] + hh3 * t2a[0][jm - 1];1026 }1027 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {1028 t2c[im - 1][jm - 1] = t2b[im - 1][jm - 1] - hh1 * t2a[im - 1][jm - 1];1029 t2d[im - 1][jm - 1] = t2b[im - 1][jm - 1] + hh3 * t2a[im - 1][jm - 1];1030 }1031 if (gp[procid].neighbors[UP] == -1) {1032 t1a = (double *) t2a[0];1033 t1b = (double *) t2b[0];1034 t1c = (double *) t2c[0];1035 t1d = (double *) t2d[0];1036 for (j = firstcol; j <= lastcol; j++) {1037 t1d[j] = t1b[j] + hh3 * t1a[j];1038 t1c[j] = t1b[j] - hh1 * t1a[j];1039 }1040 }1041 if (gp[procid].neighbors[DOWN] == -1) {1042 t1a = (double *) t2a[im - 1];1043 t1b = (double *) t2b[im - 1];1044 t1c = (double *) t2c[im - 1];1045 t1d = (double *) t2d[im - 1];1046 for (j = firstcol; j <= lastcol; j++) {1047 t1d[j] = t1b[j] + hh3 * t1a[j];1048 t1c[j] = t1b[j] - hh1 * t1a[j];1049 }1050 }1051 if (gp[procid].neighbors[LEFT] == -1) {1052 for (j = firstrow; j <= lastrow; j++) {1053 t2d[j][0] = t2b[j][0] + hh3 * t2a[j][0];1054 t2c[j][0] = t2b[j][0] - hh1 * t2a[j][0];1055 }1056 }1057 if (gp[procid].neighbors[RIGHT] == -1) {1058 for (j = firstrow; j <= lastrow; j++) {1059 t2d[j][jm - 1] = t2b[j][jm - 1] + hh3 * t2a[j][jm - 1];1060 t2c[j][jm - 1] = t2b[j][jm - 1] - hh1 * t2a[j][jm - 1];1061 }1062 }1063 1064 for (i = firstrow; i <= lastrow; i++) {1065 t1a = (double *) t2a[i];1066 t1b = (double *) t2b[i];1067 t1c = (double *) t2c[i];1068 t1d = (double *) t2d[i];1069 for (iindex = firstcol; iindex <= lastcol; iindex++) {1070 t1d[iindex] = t1b[iindex] + hh3 * t1a[iindex];1071 t1c[iindex] = t1b[iindex] - hh1 * t1a[iindex];1072 }1073 }1074 1075 END_PHASE(procid, 9);1076 1077 #if defined(MULTIPLE_BARRIERS)1078 BARRIER(bars->sl_phase_9, nprocs)1079 #else1080 BARRIER(bars->barrier, nprocs)1081 #endif1082 1083 /* *******************************************************1084 1085 t e n t h p h a s e1086 1087 *******************************************************/1088 1089 START_PHASE(procid, 10);1090 timst = 2 * dtau;1091 1092 /* update the psi{1,3} matrices by adding 2*dtau*work3 to each */1093 1094 t2a = (double **) psi[procid][0];1095 t2b = (double **) work3[procid];1096 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {1097 t2a[0][0] = t2a[0][0] + timst * t2b[0][0];1098 }1099 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {1100 t2a[im - 1][0] = t2a[im - 1][0] + timst * t2b[im - 1][0];1101 }1102 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {1103 t2a[0][jm - 1] = t2a[0][jm - 1] + timst * t2b[0][jm - 1];1104 }1105 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {1106 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + timst * t2b[im - 1][jm - 1];1107 }1108 if (gp[procid].neighbors[UP] == -1) {1109 t1a = (double *) t2a[0];1110 t1b = (double *) t2b[0];1111 for (j = firstcol; j <= lastcol; j++) {1112 t1a[j] = t1a[j] + timst * t1b[j];1113 }1114 }1115 if (gp[procid].neighbors[DOWN] == -1) {1116 t1a = (double *) t2a[im - 1];1117 t1b = (double *) t2b[im - 1];1118 for (j = firstcol; j <= lastcol; j++) {1119 t1a[j] = t1a[j] + timst * t1b[j];1120 }1121 }1122 if (gp[procid].neighbors[LEFT] == -1) {1123 for (j = firstrow; j <= lastrow; j++) {1124 t2a[j][0] = t2a[j][0] + timst * t2b[j][0];1125 }1126 }1127 if (gp[procid].neighbors[RIGHT] == -1) {1128 for (j = firstrow; j <= lastrow; j++) {1129 t2a[j][jm - 1] = t2a[j][jm - 1] + timst * t2b[j][jm - 1];1130 }1131 }1132 for (i = firstrow; i <= lastrow; i++) {1133 t1a = (double *) t2a[i];1134 t1b = (double *) t2b[i];1135 for (iindex = firstcol; iindex <= lastcol; iindex++) {1136 t1a[iindex] = t1a[iindex] + timst * t1b[iindex];1137 }1138 }1139 1140 t2a = (double **) psi[procid][1];1141 t2b = (double **) work2[procid];1142 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[LEFT] == -1)) {1143 t2a[0][0] = t2a[0][0] + timst * t2b[0][0];1144 }1145 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[LEFT] == -1)) {1146 t2a[im - 1][0] = t2a[im - 1][0] + timst * t2b[im - 1][0];1147 }1148 if ((gp[procid].neighbors[UP] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {1149 t2a[0][jm - 1] = t2a[0][jm - 1] + timst * t2b[0][jm - 1];1150 }1151 if ((gp[procid].neighbors[DOWN] == -1) && (gp[procid].neighbors[RIGHT] == -1)) {1152 t2a[im - 1][jm - 1] = t2a[im - 1][jm - 1] + timst * t2b[im - 1][jm - 1];1153 }1154 if (gp[procid].neighbors[UP] == -1) {1155 t1a = (double *) t2a[0];1156 t1b = (double *) t2b[0];1157 for (j = firstcol; j <= lastcol; j++) {1158 t1a[j] = t1a[j] + timst * t1b[j];1159 }1160 }1161 if (gp[procid].neighbors[DOWN] == -1) {1162 t1a = (double *) t2a[im - 1];1163 t1b = (double *) t2b[im - 1];1164 for (j = firstcol; j <= lastcol; j++) {1165 t1a[j] = t1a[j] + timst * t1b[j];1166 }1167 }1168 if (gp[procid].neighbors[LEFT] == -1) {1169 for (j = firstrow; j <= lastrow; j++) {1170 t2a[j][0] = t2a[j][0] + timst * t2b[j][0];1171 }1172 }1173 if (gp[procid].neighbors[RIGHT] == -1) {1174 for (j = firstrow; j <= lastrow; j++) {1175 t2a[j][jm - 1] = t2a[j][jm - 1] + timst * t2b[j][jm - 1];1176 }1177 }1178 1179 for (i = firstrow; i <= lastrow; i++) {1180 t1a = (double *) t2a[i];1181 t1b = (double *) t2b[i];1182 for (iindex = firstcol; iindex <= lastcol; iindex++) {1183 t1a[iindex] = t1a[iindex] + timst * t1b[iindex];1184 }1185 }1186 1187 END_PHASE(procid, 10);1188 1189 #if defined(MULTIPLE_BARRIERS)1190 BARRIER(bars->sl_phase_10, nprocs)1191 #else1192 BARRIER(bars->barrier, nprocs)1193 #endif1194 } -
soft/giet_vm/applications/ocean/subblock.C
r581 r589 1 /*************************************************************************/ 2 /* */ 3 /* Copyright (c) 1994 Stanford University */ 4 /* */ 5 /* All rights reserved. */ 6 /* */ 7 /* Permission is given to use, copy, and modify this software for any */ 8 /* non-commercial purpose as long as this copyright notice is not */ 9 /* removed. All other uses, including redistribution in whole or in */ 10 /* part, are forbidden without prior written permission. */ 11 /* */ 12 /* This software is provided with absolutely no warranty and no */ 13 /* support. */ 14 /* */ 15 /*************************************************************************/ 1 #line 115 "/Users/alain/soc/giet_vm/applications/ocean/null_macros/c.m4.null.GIET" 16 2 17 EXTERN_ENV18 19 #include <stdio.h>20 #include <math.h>21 22 #include "decs.h"23 24 25 void subblock()26 {27 long i;28 long j;29 long k;30 long xportion;31 long yportion;32 long my_num;33 34 /* Determine starting coord and number of points to process in */35 /* each direction */36 37 for (i = 0; i < numlev; i++) {38 xportion = (jmx[i] - 2) / xprocs;39 //xextra = (jmx[i] - 2) % xprocs;40 for (j = 0; j < xprocs; j++) {41 for (k = 0; k < yprocs; k++) {42 gp[k * xprocs + j].rel_num_x[i] = xportion;43 }44 }45 yportion = (imx[i] - 2) / yprocs;46 //yextra = (imx[i] - 2) % yprocs;47 for (j = 0; j < yprocs; j++) {48 for (k = 0; k < xprocs; k++) {49 gp[j * xprocs + k].rel_num_y[i] = yportion;50 }51 }52 }53 54 for (my_num = 0; my_num < nprocs; my_num++) {55 for (i = 0; i < numlev; i++) {56 gp[my_num].rlist[i] = 1;57 gp[my_num].rljst[i] = 1;58 gp[my_num].rlien[i] = gp[my_num].rlist[i] + gp[my_num].rel_num_y[i];59 gp[my_num].rljen[i] = gp[my_num].rljst[i] + gp[my_num].rel_num_x[i];60 gp[my_num].eist[i] = gp[my_num].rlist[i] + 1;61 gp[my_num].oist[i] = gp[my_num].rlist[i];62 gp[my_num].ejst[i] = gp[my_num].rljst[i] + 1;63 gp[my_num].ojst[i] = gp[my_num].rljst[i];64 }65 }66 67 for (i = 0; i < nprocs; i++) {68 gp[i].neighbors[LEFT] = -1;69 gp[i].neighbors[RIGHT] = -1;70 gp[i].neighbors[UP] = -1;71 gp[i].neighbors[DOWN] = -1;72 gp[i].neighbors[UPLEFT] = -1;73 gp[i].neighbors[UPRIGHT] = -1;74 gp[i].neighbors[DOWNLEFT] = -1;75 gp[i].neighbors[DOWNRIGHT] = -1;76 77 if (i >= xprocs) {78 gp[i].neighbors[UP] = i - xprocs;79 }80 if (i < nprocs - xprocs) {81 gp[i].neighbors[DOWN] = i + xprocs;82 }83 if ((i % xprocs) > 0) {84 gp[i].neighbors[LEFT] = i - 1;85 }86 if ((i % xprocs) < (xprocs - 1)) {87 gp[i].neighbors[RIGHT] = i + 1;88 }89 90 j = gp[i].neighbors[UP];91 92 if (j != -1) {93 if ((j % xprocs) > 0) {94 gp[i].neighbors[UPLEFT] = j - 1;95 }96 if ((j % xprocs) < (xprocs - 1)) {97 gp[i].neighbors[UPRIGHT] = j + 1;98 }99 }100 101 j = gp[i].neighbors[DOWN];102 103 if (j != -1) {104 if ((j % xprocs) > 0) {105 gp[i].neighbors[DOWNLEFT] = j - 1;106 }107 if ((j % xprocs) < (xprocs - 1)) {108 gp[i].neighbors[DOWNRIGHT] = j + 1;109 }110 }111 }112 113 for (i = 0; i < nprocs; i++) {114 (*gp[i].rownum) = i / xprocs;115 (*gp[i].colnum) = i % xprocs;116 }117 } -
soft/giet_vm/applications/router/Makefile
r191 r589 1 APP_NAME=router2 1 3 USE+= stdio.o 4 USE+= mwmr_channel.o 5 6 USES=$(patsubst %,$(BUILD_PATH)/$(LIB_NAME)/%,$(USE)) 2 APP_NAME = router 7 3 8 4 OBJS= main.o 9 5 10 all: $(APP_NAME).elf 6 LIBS= -L../../build/libs -luser 11 7 12 BIN_NAME_PATH=$(BUILD_PATH)$(APP_NAME).elf 8 INCLUDES = -I. -I../.. -I../../giet_libs -I../../giet_xml 13 9 14 $(APP_NAME).elf: $(OBJS) $(APP_NAME).ld 15 $(LD) -o $(BIN_NAME_PATH) -T $(APP_NAME).ld $(OBJS) $(USES) 16 $(DU) -D $(BIN_NAME_PATH) > $@.txt 10 LIB_DEPS = ../../build/libs/libuser.a 11 12 appli.elf: $(OBJS) $(APP_NAME).ld $(LIBS_DEPS) 13 $(LD) -o $@ -T $(APP_NAME).ld $(OBJS) $(LIBS) 14 $(DU) -D $@ > $@.txt 17 15 18 16 %.o: %.c 19 $(CC) $(INCLUDE) $(CFLAGS) $($*.o_CFLAGS) -c -o $@ $< 20 $(DU) -D $@ > $@.txt 17 $(CC) $(INCLUDES) $(CFLAGS) -c -o $@ $< 21 18 22 19 clean: 23 rm -f *.o *.elf *.txt core *~ 2>$(TRASH) 24 rm $(BIN_NAME_PATH) 2>$(TRASH) 20 rm -f *.o *.elf *.txt core *~ -
soft/giet_vm/applications/router/router.py
r457 r589 26 26 #################################################################################### 27 27 28 ###################### ###29 def router( mapping ):28 ###################### 29 def extend( mapping ): 30 30 31 31 x_size = mapping.x_size … … 54 54 mapping.addVseg( vspace, 'router_data_0', data_0_base , data_0_size, 55 55 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 56 binpath = 'build/router/ router.elf',56 binpath = 'build/router/appli.elf', 57 57 local = False ) 58 58 … … 60 60 mapping.addVseg( vspace, 'router_data_1', data_1_base , data_1_size, 61 61 'C_WU', vtype = 'ELF', x = x_size - 1, y = y_size - 1, pseg = 'RAM', 62 binpath = 'build/router/ router.elf',62 binpath = 'build/router/appli.elf', 63 63 local = False ) 64 64 … … 108 108 if __name__ == '__main__': 109 109 110 vspace = router( Mapping( 'test', 2, 2, 4 ) )110 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 111 111 print vspace.xml() 112 112 -
soft/giet_vm/applications/sort/sort.py
r502 r589 4 4 5 5 #################################################################################### 6 # file : sort.py (for the sort application)6 # file : sort.py 7 7 # date : may 2014 8 8 # author : Alain Greiner … … 20 20 #################################################################################### 21 21 22 #################### 23 def sort( mapping ):22 ###################### 23 def extend( mapping ): 24 24 25 25 x_size = mapping.x_size … … 50 50 mapping.addVseg( vspace, 'sort_data', data_base , data_size, 51 51 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 52 binpath = 'build/sort/ sort.elf',52 binpath = 'build/sort/appli.elf', 53 53 local = False ) 54 54 … … 61 61 mapping.addVseg( vspace, 'sort_code', code_base , code_size, 62 62 'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM', 63 binpath = 'build/sort/ sort.elf',63 binpath = 'build/sort/appli.elf', 64 64 local = True ) 65 65 … … 113 113 if __name__ == '__main__': 114 114 115 vspace = sort( Mapping( 'test', 2, 2, 4 ) )115 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 116 116 print vspace.xml() 117 117 -
soft/giet_vm/applications/transpose/main.c
r574 r589 4 4 // author : Alain Greiner 5 5 /////////////////////////////////////////////////////////////////////////////////////// 6 // This multi-threaded application makes a transpose for a NN*NN pixels 7 // sequence of images. 6 // This multi-threaded application makes a transpose for a NN*NN pixels image. 8 7 // It can run on a multi-processors, multi-clusters architecture, with one thread 9 8 // per processor. 10 9 // 11 // The image sequence is read from a file (one byte per pixel). 10 // The image is read from a file (one byte per pixel), transposed and 11 // saved in a second file. Then the transposed image is read from the second file, 12 // transposed again and saved in a third file. 13 // 12 14 // The input and output buffers containing the image are distributed in all clusters. 13 15 // 14 // - The image size NN must fit the frame buffer size : 128 bytes16 // - The image size NN must fit the frame buffer size. 15 17 // - The block size in block device must be 512 bytes. 16 // - The number of clusters must be a power of 2 no larger than 3217 // - The number of processors per cluster must be a power of 2 no larger than 4 18 // - The number of clusters must be a power of 2 no larger than 64. 19 // - The number of processors per cluster must be a power of 2 no larger than 4. 18 20 // 19 21 // For each image the application makes a self test (checksum for each line). … … 25 27 #include "malloc.h" 26 28 27 #define BLOCK_SIZE 512 // block size on disk 28 #define CLUSTERS_MAX 32 // max number of clusters 29 #define PROCS_MAX 4 // max number of processors per cluster 30 #define NN 256 // image size : nlines = npixels 31 #define NB_IMAGES 1 // number of images to be handled 32 #define FILE_PATHNAME "misc/lena.raw" // pathname on virtual disk 33 #define INSTRUMENTATION_OK 0 // display statistics on TTY when non zero 29 #define BLOCK_SIZE 512 // block size on disk 30 #define X_MAX 8 // max number of clusters in row 31 #define Y_MAX 8 // max number of clusters in column 32 #define PROCS_MAX 4 // max number of procs per cluster 33 #define CLUSTER_MAX (X_MAX * Y_MAX) // max number of clusters 34 #define NN 256 // image size : nlines = npixels 35 #define INITIAL_FILE_PATH "misc/lena.raw" // pathname on virtual disk 36 #define TRANSPOSED_FILE_PATH "/home/lena_transposed.raw" // pathname on virtual disk 37 #define RESTORED_FILE_PATH "/home/lena_restored.raw" // pathname on virtual disk 38 #define INSTRUMENTATION_OK 1 // display statistics on TTY 34 39 35 40 /////////////////////////////////////////////////////// … … 38 43 39 44 // instrumentation counters for each processor in each cluster 40 unsigned int LOAD_START[CLUSTERS_MAX][PROCS_MAX]; 41 unsigned int LOAD_END [CLUSTERS_MAX][PROCS_MAX]; 42 unsigned int TRSP_START[CLUSTERS_MAX][PROCS_MAX]; 43 unsigned int TRSP_END [CLUSTERS_MAX][PROCS_MAX]; 44 unsigned int DISP_START[CLUSTERS_MAX][PROCS_MAX]; 45 unsigned int DISP_END [CLUSTERS_MAX][PROCS_MAX]; 45 unsigned int LOAD_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 46 unsigned int LOAD_END [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 47 unsigned int TRSP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 48 unsigned int TRSP_END [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 49 unsigned int DISP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 50 unsigned int DISP_END [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 51 unsigned int STOR_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 52 unsigned int STOR_END [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 46 53 47 54 // arrays of pointers on distributed buffers 48 55 // one input buffer & one output buffer per cluster 49 unsigned char* buf_in [CLUSTER S_MAX];50 unsigned char* buf_out[CLUSTER S_MAX];56 unsigned char* buf_in [CLUSTER_MAX]; 57 unsigned char* buf_out[CLUSTER_MAX]; 51 58 52 59 // checksum variables … … 57 64 giet_sqt_barrier_t barrier; 58 65 59 volatile unsigned int init_ok = 0; 66 volatile unsigned int global_init_ok = 0; 67 volatile unsigned int local_init_ok[X_MAX][Y_MAX] = {{ 0 }}; 60 68 61 69 ////////////////////////////////////////// … … 63 71 ////////////////////////////////////////// 64 72 { 65 66 73 unsigned int l; // line index for loops 67 74 unsigned int p; // pixel index for loops 68 unsigned int c; // cluster index for loops69 75 70 76 // processor identifiers … … 82 88 giet_procs_number( &x_size , &y_size , &nprocs ); 83 89 84 unsigned int nclusters = x_size * y_size; // number of clusters 85 unsigned int ntasks = x_size * y_size * nprocs; // number of tasks 86 unsigned int npixels = NN * NN; // pixels per image 87 unsigned int nblocks = npixels / BLOCK_SIZE; // blocks per image 88 unsigned int image = 0; // image counter 89 int file = 0; // file descriptor 90 unsigned int cluster_id = (x * y_size) + y; // "continuous" index 91 unsigned int task_id = (cluster_id * nprocs) + lpid; // "continuous" task index 92 93 // Processor [0,0,0] makes initialisation 94 // It includes parameters checking, barrier initialization, 95 // distributed buffers allocation, and file open 90 unsigned int nclusters = x_size * y_size; // number of clusters 91 unsigned int ntasks = x_size * y_size * nprocs; // number of tasks 92 unsigned int npixels = NN * NN; // pixels per image 93 unsigned int iteration = 0; // iiteration iter 94 int fd_initial = 0; // initial file descriptor 95 int fd_transposed = 0; // transposed file descriptor 96 int fd_restored = 0; // restored file descriptor 97 unsigned int cluster_id = (x * y_size) + y; // "continuous" index 98 unsigned int task_id = (cluster_id * nprocs) + lpid; // "continuous" task index 99 100 101 /////////////////////////////////////////////////////////////////////// 102 // Processor [0,0,0] makes global initialisation 103 // It includes parameters checking, heap and barrier initialization. 104 // Others processors wait initialisation completion 105 /////////////////////////////////////////////////////////////////////// 106 96 107 if ( (x==0) && (y==0) && (lpid==0) ) 97 108 { … … 101 112 } 102 113 if ((nclusters != 1) && (nclusters != 2) && (nclusters != 4) && 103 (nclusters != 8) && (nclusters != 16) && (nclusters != 32) )104 { 105 giet_exit("[TRANSPOSE ERROR] number of clusters must be 1,2,4,8,16,32 ");114 (nclusters != 8) && (nclusters != 16) && (nclusters != 32) && (nclusters != 64) ) 115 { 116 giet_exit("[TRANSPOSE ERROR] number of clusters must be 1,2,4,8,16,32,64"); 106 117 } 107 118 if ( ntasks > NN ) … … 110 121 } 111 122 112 // Distributed buffers allocation 113 // The buffers containing one image are distributed in the user 114 // heap (one buf_in and one buf_out per cluster). 115 // Each buffer contains (NN*NN / nclusters) bytes. 116 for ( c = 0 ; c < nclusters ; c++ ) 117 { 118 unsigned int rx = c / y_size; 119 unsigned int ry = c % y_size; 120 121 heap_init( rx, ry ); 122 buf_in[c] = remote_malloc( npixels/nclusters, rx, ry ); 123 buf_out[c] = remote_malloc( npixels/nclusters, rx, ry ); 124 125 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes buffer allocation" 126 " for cluster[%d,%d] at cycle %d\n" 127 " - buf_in = %x\n" 128 " - buf_out = %x\n", 129 rx, ry, giet_proctime(), 130 (unsigned int)buf_in[c], 131 (unsigned int)buf_out[c] ); 132 } 133 134 // Barrier initialisation 123 // distributed heap initialisation 124 unsigned int cx , cy; 125 for ( cx = 0 ; cx < x_size ; cx++ ) 126 { 127 for ( cy = 0 ; cy < y_size ; cy++ ) 128 { 129 heap_init( cx , cy ); 130 } 131 } 132 133 // barrier initialisation 135 134 sqt_barrier_init( &barrier, x_size , y_size , nprocs ); 136 135 137 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes barrier init at cycle %d\n",136 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes heap & barrier init at cycle %d\n", 138 137 giet_proctime() ); 139 138 140 // open file containing images 141 file = giet_fat_open( FILE_PATHNAME , 0 ); 142 143 if (file < 0) 144 { 145 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d]" 146 " cannot open file %s", 147 x , y , lpid , FILE_PATHNAME ); 148 giet_exit(" open() failure"); 149 } 150 else 151 { 152 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file misc/images.raw\n"); 153 } 154 init_ok = 1; 139 // diplay disk content 140 giet_fat_list( "/" ); 141 giet_fat_list( "/misc" ); 142 giet_fat_list( "/home" ); 143 giet_fat_list( "/build" ); 144 giet_fat_list( "/build/kernel" ); 145 giet_fat_list( "/build/transpose" ); 146 147 global_init_ok = 1; 155 148 } 156 else // others processors wait initialisation completion149 else 157 150 { 158 while ( init_ok == 0 );151 while ( global_init_ok == 0 ); 159 152 } 160 153 161 ///////////////////////// 162 // Main loop (on images) 163 while (image < NB_IMAGES) 154 /////////////////////////////////////////////////////////////////////// 155 // In each cluster, only task running on processor[x,y,0] allocates 156 // the local buffers containing the images in the distributed heap 157 // (one buf_in and one buf_out per cluster). 158 // Other processors in cluster wait completion. 159 /////////////////////////////////////////////////////////////////////// 160 161 if ( lpid == 0 ) 164 162 { 165 // pseudo parallel load from disk to buf_in buffer : nblocks/nclusters blocks 166 // only task running on processor with (lpid == 0) does it 167 168 LOAD_START[cluster_id][lpid] = giet_proctime(); 163 buf_in[cluster_id] = remote_malloc( npixels/nclusters, x, y ); 164 buf_out[cluster_id] = remote_malloc( npixels/nclusters, x, y ); 165 166 if ( (x==0) && (y==0) ) 167 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes buffer allocation" 168 " for cluster[%d,%d] at cycle %d\n" 169 " - buf_in = %x\n" 170 " - buf_out = %x\n", 171 x, y, lpid, x, y, giet_proctime(), 172 (unsigned int)buf_in[cluster_id], (unsigned int)buf_out[cluster_id] ); 173 174 /////////////////////////////////////////////////////////////////////// 175 // In each cluster, only task running on procesor[x,y,0] open the 176 // three private file descriptors for the three files 177 /////////////////////////////////////////////////////////////////////// 178 179 // open initial file 180 fd_initial = giet_fat_open( INITIAL_FILE_PATH , O_RDONLY ); // read_only 181 if ( fd_initial < 0 ) 182 { 183 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n", 184 x , y , lpid , INITIAL_FILE_PATH ); 185 giet_exit(" open() failure"); 186 } 187 else if ( (x==0) && (y==0) && (lpid==0) ) 188 { 189 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n", 190 INITIAL_FILE_PATH , fd_initial ); 191 } 192 193 // open transposed file 194 fd_transposed = giet_fat_open( TRANSPOSED_FILE_PATH , O_CREATE ); // create if required 195 if ( fd_transposed < 0 ) 196 { 197 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n", 198 x , y , lpid , TRANSPOSED_FILE_PATH ); 199 giet_exit(" open() failure"); 200 } 201 else if ( (x==0) && (y==0) && (lpid==0) ) 202 { 203 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n", 204 TRANSPOSED_FILE_PATH , fd_transposed ); 205 } 206 207 // open restored file 208 fd_restored = giet_fat_open( RESTORED_FILE_PATH , O_CREATE ); // create if required 209 if ( fd_restored < 0 ) 210 { 211 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n", 212 x , y , lpid , RESTORED_FILE_PATH ); 213 giet_exit(" open() failure"); 214 } 215 else if ( (x==0) && (y==0) && (lpid==0) ) 216 { 217 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n", 218 RESTORED_FILE_PATH , fd_restored ); 219 } 220 221 local_init_ok[x][y] = 1; 222 } 223 else 224 { 225 while( local_init_ok[x][y] == 0 ); 226 } 227 228 /////////////////////////////////////////////////////////////////////// 229 // Main loop / two iterations: 230 // - first makes initial => transposed 231 // - second makes transposed => restored 232 // All processors execute this main loop. 233 /////////////////////////////////////////////////////////////////////// 234 235 unsigned int fd_in = fd_initial; 236 unsigned int fd_out = fd_transposed; 237 238 while (iteration < 2) 239 { 240 /////////////////////////////////////////////////////////////////////// 241 // pseudo parallel load from disk to buf_in buffers: npixels/nclusters 242 // only task running on processor(x,y,0) does it 243 /////////////////////////////////////////////////////////////////////// 244 245 LOAD_START[x][y][lpid] = giet_proctime(); 169 246 170 247 if (lpid == 0) 171 248 { 172 giet_fat_read( file, 173 buf_in[cluster_id], 174 (nblocks / nclusters), 175 ((image*nblocks) + ((nblocks*cluster_id)/nclusters)) ); 249 unsigned int offset = ((npixels*cluster_id)/nclusters); 250 if ( giet_fat_lseek( fd_in, 251 offset, 252 SEEK_SET ) != offset ) 253 { 254 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot seek fd = %d\n", 255 x , y , lpid , fd_in ); 256 giet_exit(" seek() failure"); 257 } 258 259 unsigned int pixels = npixels / nclusters; 260 if ( giet_fat_read( fd_in, 261 buf_in[cluster_id], 262 pixels ) != pixels ) 263 { 264 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot read fd = %d\n", 265 x , y , lpid , fd_in ); 266 giet_exit(" read() failure"); 267 } 176 268 177 269 if ( (x==0) && (y==0) ) 178 270 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes load" 179 " for i mage%d at cycle %d\n",180 x, y, lpid, i mage, giet_proctime() );181 } 182 183 LOAD_END[ cluster_id][lpid] = giet_proctime();271 " for iteration %d at cycle %d\n", 272 x, y, lpid, iteration, giet_proctime() ); 273 } 274 275 LOAD_END[x][y][lpid] = giet_proctime(); 184 276 185 277 ///////////////////////////// 186 278 sqt_barrier_wait( &barrier ); 187 279 280 /////////////////////////////////////////////////////////////////////// 188 281 // parallel transpose from buf_in to buf_out 189 282 // each task makes the transposition for nlt lines (nlt = NN/ntasks) 190 283 // from line [task_id*nlt] to line [(task_id + 1)*nlt - 1] 191 284 // (p,l) are the absolute pixel coordinates in the source image 192 193 194 TRSP_START[ cluster_id][lpid] = giet_proctime();285 /////////////////////////////////////////////////////////////////////// 286 287 TRSP_START[x][y][lpid] = giet_proctime(); 195 288 196 289 unsigned int nlt = NN / ntasks; // number of lines per task … … 233 326 if ( (x==0) && (y==0) ) 234 327 giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] completes transpose" 235 " for i mage%d at cycle %d\n",236 x, y, i mage, giet_proctime() );237 238 } 239 TRSP_END[ cluster_id][lpid] = giet_proctime();328 " for iteration %d at cycle %d\n", 329 x, y, iteration, giet_proctime() ); 330 331 } 332 TRSP_END[x][y][lpid] = giet_proctime(); 240 333 241 334 ///////////////////////////// 242 335 sqt_barrier_wait( &barrier ); 243 336 244 245 if ( USE_FBF ) // external frame buffer available 246 { 247 // parallel display from local buf_out to frame buffer 248 // all processors contribute to display using memcpy... 249 250 DISP_START[cluster_id][lpid] = giet_proctime(); 251 252 unsigned int npt = npixels / ntasks; // number of pixels per task 253 254 giet_fbf_sync_write( npt * task_id, 255 &buf_out[cluster_id][lpid*npt], 256 npt ); 257 258 if ( (x==0) && (y==0) && (lpid==0) ) 259 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes display" 260 " for image %d at cycle %d\n", 261 x, y, lpid, image, giet_proctime() ); 262 263 DISP_END[cluster_id][lpid] = giet_proctime(); 264 265 ///////////////////////////// 266 sqt_barrier_wait( &barrier ); 267 } 268 else // checksum by processor(x,y,0) in each cluster 269 { 270 if ( lpid == 0 ) 271 { 272 unsigned int success = 1; 273 unsigned int start = cluster_id * nlc; 274 unsigned int stop = start + nlc; 275 276 for ( l = start ; l < stop ; l++ ) 277 { 278 check_line_after[l] = 0; 279 280 for ( p = 0 ; p < NN ; p++ ) 281 { 282 // read one byte in remote buffer 283 src_cluster = p / nlc; 284 src_index = (p % nlc)*NN + l; 285 286 unsigned char byte = buf_out[src_cluster][src_index]; 287 288 check_line_after[l] = check_line_after[l] + byte; 289 } 290 291 if ( check_line_before[l] != check_line_after[l] ) success = 0; 292 } 293 294 if ( success ) 295 { 296 giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] checksum OK" 297 " for image %d at cycle %d\n", 298 x, y, image, giet_proctime() ); 299 } 300 else 301 { 302 giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] checksum KO" 303 " for image %d at cycle %d\n", 304 x, y, image, giet_proctime() ); 305 } 306 } 307 } 337 /////////////////////////////////////////////////////////////////////// 338 // parallel display from local buf_out to frame buffer 339 // all tasks contribute to display using memcpy... 340 /////////////////////////////////////////////////////////////////////// 341 342 DISP_START[x][y][lpid] = giet_proctime(); 343 344 unsigned int npt = npixels / ntasks; // number of pixels per task 345 346 giet_fbf_sync_write( npt * task_id, 347 &buf_out[cluster_id][lpid*npt], 348 npt ); 349 350 if ( (x==0) && (y==0) && (lpid==0) ) 351 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes display" 352 " for iteration %d at cycle %d\n", 353 x, y, lpid, iteration, giet_proctime() ); 354 355 DISP_END[x][y][lpid] = giet_proctime(); 308 356 309 357 ///////////////////////////// 310 358 sqt_barrier_wait( &barrier ); 311 359 360 /////////////////////////////////////////////////////////////////////// 361 // pseudo parallel store : buf_out buffers to disk : npixels/nclusters 362 // only task running on processor(x,y,0) does it 363 /////////////////////////////////////////////////////////////////////// 364 365 STOR_START[x][y][lpid] = giet_proctime(); 366 367 if ( lpid == 0 ) 368 { 369 unsigned int offset = ((npixels*cluster_id)/nclusters); 370 if ( giet_fat_lseek( fd_out, 371 offset, 372 SEEK_SET ) != offset ) 373 { 374 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot seek fr = %d\n", 375 x , y , lpid , fd_out ); 376 giet_exit(" seek() failure"); 377 } 378 379 unsigned int pixels = npixels / nclusters; 380 if ( giet_fat_write( fd_out, 381 buf_out[cluster_id], 382 pixels ) != pixels ) 383 { 384 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot write fd = %d\n", 385 x , y , lpid , fd_out ); 386 giet_exit(" write() failure"); 387 } 388 389 if ( (x==0) && (y==0) ) 390 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes store" 391 " for iteration %d at cycle %d\n", 392 x, y, lpid, iteration, giet_proctime() ); 393 } 394 395 STOR_END[x][y][lpid] = giet_proctime(); 396 397 ///////////////////////////// 398 sqt_barrier_wait( &barrier ); 399 312 400 // instrumentation done by processor [0,0,0] 313 401 if ( (x==0) && (y==0) && (lpid==0) && INSTRUMENTATION_OK ) 314 402 { 315 int c c, pp;403 int cx , cy , pp ; 316 404 unsigned int min_load_start = 0xFFFFFFFF; 317 405 unsigned int max_load_start = 0; … … 326 414 unsigned int min_disp_ended = 0xFFFFFFFF; 327 415 unsigned int max_disp_ended = 0; 328 329 for (cc = 0; cc < nclusters; cc++) 330 { 331 for (pp = 0; pp < NB_PROCS_MAX; pp++) 332 { 333 if (LOAD_START[cc][pp] < min_load_start) min_load_start = LOAD_START[cc][pp]; 334 if (LOAD_START[cc][pp] > max_load_start) max_load_start = LOAD_START[cc][pp]; 335 if (LOAD_END[cc][pp] < min_load_ended) min_load_ended = LOAD_END[cc][pp]; 336 if (LOAD_END[cc][pp] > max_load_ended) max_load_ended = LOAD_END[cc][pp]; 337 if (TRSP_START[cc][pp] < min_trsp_start) min_trsp_start = TRSP_START[cc][pp]; 338 if (TRSP_START[cc][pp] > max_trsp_start) max_trsp_start = TRSP_START[cc][pp]; 339 if (TRSP_END[cc][pp] < min_trsp_ended) min_trsp_ended = TRSP_END[cc][pp]; 340 if (TRSP_END[cc][pp] > max_trsp_ended) max_trsp_ended = TRSP_END[cc][pp]; 341 if (DISP_START[cc][pp] < min_disp_start) min_disp_start = DISP_START[cc][pp]; 342 if (DISP_START[cc][pp] > max_disp_start) max_disp_start = DISP_START[cc][pp]; 343 if (DISP_END[cc][pp] < min_disp_ended) min_disp_ended = DISP_END[cc][pp]; 344 if (DISP_END[cc][pp] > max_disp_ended) max_disp_ended = DISP_END[cc][pp]; 345 } 346 } 416 unsigned int min_stor_start = 0xFFFFFFFF; 417 unsigned int max_stor_start = 0; 418 unsigned int min_stor_ended = 0xFFFFFFFF; 419 unsigned int max_stor_ended = 0; 420 421 for (cx = 0; cx < x_size; cx++) 422 { 423 for (cy = 0; cy < y_size; cy++) 424 { 425 for (pp = 0; pp < NB_PROCS_MAX; pp++) 426 { 427 if (LOAD_START[cx][cy][pp] < min_load_start) min_load_start = LOAD_START[cx][cy][pp]; 428 if (LOAD_START[cx][cy][pp] > max_load_start) max_load_start = LOAD_START[cx][cy][pp]; 429 if (LOAD_END[cx][cy][pp] < min_load_ended) min_load_ended = LOAD_END[cx][cy][pp]; 430 if (LOAD_END[cx][cy][pp] > max_load_ended) max_load_ended = LOAD_END[cx][cy][pp]; 431 if (TRSP_START[cx][cy][pp] < min_trsp_start) min_trsp_start = TRSP_START[cx][cy][pp]; 432 if (TRSP_START[cx][cy][pp] > max_trsp_start) max_trsp_start = TRSP_START[cx][cy][pp]; 433 if (TRSP_END[cx][cy][pp] < min_trsp_ended) min_trsp_ended = TRSP_END[cx][cy][pp]; 434 if (TRSP_END[cx][cy][pp] > max_trsp_ended) max_trsp_ended = TRSP_END[cx][cy][pp]; 435 if (DISP_START[cx][cy][pp] < min_disp_start) min_disp_start = DISP_START[cx][cy][pp]; 436 if (DISP_START[cx][cy][pp] > max_disp_start) max_disp_start = DISP_START[cx][cy][pp]; 437 if (DISP_END[cx][cy][pp] < min_disp_ended) min_disp_ended = DISP_END[cx][cy][pp]; 438 if (DISP_END[cx][cy][pp] > max_disp_ended) max_disp_ended = DISP_END[cx][cy][pp]; 439 if (STOR_START[cx][cy][pp] < min_stor_start) min_stor_start = STOR_START[cx][cy][pp]; 440 if (STOR_START[cx][cy][pp] > max_stor_start) max_stor_start = STOR_START[cx][cy][pp]; 441 if (STOR_END[cx][cy][pp] < min_stor_ended) min_stor_ended = STOR_END[cx][cy][pp]; 442 if (STOR_END[cx][cy][pp] > max_stor_ended) max_stor_ended = STOR_END[cx][cy][pp]; 443 } 444 } 445 } 446 447 giet_shr_printf("\n ---------------- Instrumentation Results ---------------------\n"); 347 448 348 449 giet_shr_printf(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n", … … 369 470 min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, 370 471 max_disp_ended-min_disp_ended); 371 } 372 373 image++; 472 473 giet_shr_printf(" - STOR_START : min = %d / max = %d / med = %d / delta = %d\n", 474 min_stor_start, max_stor_start, (min_stor_start+max_stor_start)/2, 475 max_stor_start-min_stor_start); 476 477 giet_shr_printf(" - STOR_END : min = %d / max = %d / med = %d / delta = %d\n", 478 min_stor_ended, max_stor_ended, (min_stor_ended+max_stor_ended)/2, 479 max_stor_ended-min_stor_ended); 480 } 374 481 375 482 ///////////////////////////// 376 483 sqt_barrier_wait( &barrier ); 377 484 378 } // end while image 379 380 // Processor[0,0,0] releases the Distributed buffers 381 if ( (x==0) && (y==0) && (lpid==0) ) 485 // update iteration variables 486 fd_in = fd_transposed; 487 fd_out = fd_restored; 488 iteration++; 489 490 } // end while 491 492 /////////////////////////////////////////////////////////////////////// 493 // In each cluster, only task running on Processor[x,y,0] releases 494 // the distributed buffers and close the file descriptors. 495 /////////////////////////////////////////////////////////////////////// 496 497 if ( lpid==0 ) 382 498 { 383 for ( c = 0 ; c < nclusters ; c++ ) 384 { 385 free( buf_in[c] ); 386 free( buf_in[c] ); 387 } 499 free( buf_in[cluster_id] ); 500 free( buf_out[cluster_id] ); 501 502 giet_fat_close( fd_initial ); 503 giet_fat_close( fd_transposed ); 504 giet_fat_close( fd_restored ); 388 505 } 389 506 507 // display disk content 508 if ( (x==0) && (y == 0) && (lpid == 0) ) 509 { 510 giet_fat_list( "/" ); 511 giet_fat_list( "/misc" ); 512 giet_fat_list( "/home" ); 513 giet_fat_list( "/build" ); 514 giet_fat_list( "/build/kernel" ); 515 giet_fat_list( "/build/transpose" ); 516 517 giet_fat_remove( "/home/lena_transposed" , 0 ); 518 giet_fat_remove( "/home/lena_restored" , 0 ); 519 520 giet_fat_remove( "/home" , 1 ); 521 522 giet_fat_list( "/" ); 523 giet_fat_list( "/misc" ); 524 giet_fat_list( "/home" ); 525 giet_fat_list( "/build" ); 526 giet_fat_list( "/build/kernel" ); 527 giet_fat_list( "/build/transpose" ); 528 } 529 390 530 giet_exit("Completed"); 391 531 -
soft/giet_vm/applications/transpose/transpose.py
r502 r589 4 4 5 5 ################################################################################## 6 # file : transpose.py (for the transpose application)6 # file : transpose.py 7 7 # date : may 2014 8 8 # author : Alain Greiner … … 26 26 ################################################################################## 27 27 28 ###################### ###29 def transpose( mapping ):28 ###################### 29 def extend( mapping ): 30 30 31 31 x_size = mapping.x_size … … 54 54 mapping.addVseg( vspace, 'trsp_data', data_base , data_size, 55 55 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 56 binpath = 'build/transpose/ transpose.elf',56 binpath = 'build/transpose/appli.elf', 57 57 local = False ) 58 58 … … 66 66 code_base , code_size, 67 67 'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM', 68 binpath = 'build/transpose/ transpose.elf',68 binpath = 'build/transpose/appli.elf', 69 69 local = True ) 70 70 … … 118 118 if __name__ == '__main__': 119 119 120 vspace = transpose( Mapping( 'test', 2, 2, 4 ) )120 vspace = extend( Mapping( 'test', 2, 2, 4 ) ) 121 121 print vspace.xml() 122 122
Note: See TracChangeset
for help on using the changeset viewer.