Changeset 708 for soft/giet_vm/applications/gameoflife
- Timestamp:
- Oct 1, 2015, 4:09:25 PM (9 years ago)
- Location:
- soft/giet_vm/applications/gameoflife
- Files:
-
- 2 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/gameoflife/Makefile
r589 r708 1 2 CC = mipsel-unknown-elf-gcc 3 AS = mipsel-unknown-elf-as 4 LD = mipsel-unknown-elf-ld 5 DU = mipsel-unknown-elf-objdump 6 AR = mipsel-unknown-elf-ar 1 7 2 8 APP_NAME = gameoflife 3 9 4 OBJS= main.o10 OBJS= gameoflife.o 5 11 6 12 LIBS= -L../../build/libs -luser -
soft/giet_vm/applications/gameoflife/gameoflife.c
r707 r708 4 4 // Authors : Alexandre Joannou <alexandre.joannou@lip6.fr> november 2013 5 5 // Alain Greiner <alain.greiner@lip6.fr> february 2015 6 // 6 ////////////////////////////////////////////////////////////////////////////////// 7 7 // This multi-threaded application is an emulation of the Game of Life automaton. 8 8 // The world size is defined by the HEIGHT and WIDTH parameters. 9 // There is one task per processor.10 // Each task compute HEIGHT/nbprocs lines.11 // Task running on processor P(0,0,0) initialises the barrier, the TTY terminal,12 // and the chained buffer DMA controler.13 9 // 14 // The number of processors must be a power of 2 not larger than HEIGHT. 10 // There is at most one thread per processor in the platform. 11 // - If the number of processors is larger than the number of lines, 12 // the number of threads is equal to the number of lines, and 13 // each thread process one single line. 14 // - if the number of processors is not larger than the number of lines, 15 // the number of threads is equal to the number of processors, and 16 // each thread process HEIGHT/nthreads (or HEIGHT/nthreads + 1) lines. 17 // 18 // Thread running on processor P(0,0,0) execute the main() function, 19 // that initialises the barrier, the TTY terminal, the CMA controler, 20 // and launch the other threads, before calling the execute function. 21 // Other threads are just running the execute() function. 22 // 23 // The total number of processors cannot be larger than 1024 = 16 * 16 * 4 15 24 ////////////////////////////////////////////////////////////////////////////////// 16 25 … … 20 29 #include "mapping_info.h" 21 30 #include "hard_config.h" 22 23 #define WIDTH 128 24 #define HEIGHT 12825 #define NB_ITERATION 100000000026 27 #define PRINTF(...) ({ if ( proc_id==0) { giet_tty_printf(__VA_ARGS__); } })31 #include "malloc.h" 32 33 #define WIDTH FBUF_X_SIZE 34 #define HEIGHT FBUF_Y_SIZE 35 36 #define VERBOSE 1 28 37 29 38 typedef unsigned char uint8_t; 30 39 31 uint8_t WORLD[2][HEIGHT][WIDTH] __attribute__((aligned(64))); 32 33 uint8_t DISPLAY[2][HEIGHT][WIDTH] __attribute__((aligned(64))); 40 typedef struct 41 { 42 unsigned int index; // index of first line to be processed 43 unsigned int lines; // number of lines to be processed 44 } arguments_t; 45 46 arguments_t args[1024]; // at most 1024 threads 47 48 uint8_t world[2][HEIGHT][WIDTH] __attribute__((aligned(64))); 49 50 uint8_t display[2][HEIGHT][WIDTH] __attribute__((aligned(64))); 34 51 35 52 unsigned int status0[16]; … … 37 54 38 55 giet_sqt_barrier_t barrier; 39 40 volatile unsigned int init_ok;41 56 42 57 //////////////////////////////////// … … 50 65 for(x = 0 ; x < WIDTH ; x++) 51 66 { 52 WORLD[phase][y][x] = (giet_rand() >> (x % 8)) & 0x1;67 world[phase][y][x] = (giet_rand() >> (x % 8)) & 0x1; 53 68 } 54 69 } … … 62 77 uint8_t nb = 0; 63 78 64 nb += WORLD[phase][(y - 1) % HEIGHT][(x - 1) % WIDTH];65 nb += WORLD[phase][ y ][(x - 1) % WIDTH];66 nb += WORLD[phase][(y + 1) % HEIGHT][(x - 1) % WIDTH];67 nb += WORLD[phase][(y - 1) % HEIGHT][ x ];68 nb += WORLD[phase][(y + 1) % HEIGHT][ x ];69 nb += WORLD[phase][(y - 1) % HEIGHT][(x + 1) % WIDTH];70 nb += WORLD[phase][ y ][(x + 1) % WIDTH];71 nb += WORLD[phase][(y + 1) % HEIGHT][(x + 1) % WIDTH];79 nb += world[phase][(y - 1) % HEIGHT][(x - 1) % WIDTH]; 80 nb += world[phase][ y ][(x - 1) % WIDTH]; 81 nb += world[phase][(y + 1) % HEIGHT][(x - 1) % WIDTH]; 82 nb += world[phase][(y - 1) % HEIGHT][ x ]; 83 nb += world[phase][(y + 1) % HEIGHT][ x ]; 84 nb += world[phase][(y - 1) % HEIGHT][(x + 1) % WIDTH]; 85 nb += world[phase][ y ][(x + 1) % WIDTH]; 86 nb += world[phase][(y + 1) % HEIGHT][(x + 1) % WIDTH]; 72 87 73 88 return nb; … … 81 96 uint8_t nb_neighbours_alive = number_of_alive_neighbour( phase, x , y ); 82 97 83 if ( WORLD[phase][y][x] == 1)98 if (world[phase][y][x] == 1) 84 99 { 85 100 if (nb_neighbours_alive == 2 || nb_neighbours_alive == 3) return 1; … … 88 103 { 89 104 if (nb_neighbours_alive == 3) return 1; 90 else return WORLD[phase][y][x];105 else return world[phase][y][x]; 91 106 } 92 107 return 0; … … 103 118 for(x = 0; x < WIDTH ; x++) 104 119 { 105 WORLD[phase][y][x] = compute_cell( 1 - phase , x , y );120 world[phase][y][x] = compute_cell( 1 - phase , x , y ); 106 121 } 107 122 } … … 118 133 for(x = 0; x < WIDTH ; x++) 119 134 { 120 DISPLAY[phase][y][x] = WORLD[phase][y][x]*255; 121 } 122 } 123 } 135 display[phase][y][x] = world[phase][y][x]*255; 136 } 137 } 138 } 139 140 141 142 /////////////////////////////////////////////////////////////// 143 __attribute__((constructor)) void execute( arguments_t* pargs ) 144 /////////////////////////////////////////////////////////////// 145 { 146 unsigned int nb_lines = pargs->lines; 147 unsigned int base_line = pargs->index; 148 149 ///////////// parallel world initialization 150 151 // All processors initialize world[0] 152 init_world( 0 , base_line , nb_lines ); 153 154 // copy world[0] to display[0] 155 copy_world( 0 , base_line , nb_lines ); 156 157 // synchronise with other procs 158 sqt_barrier_wait( &barrier ); 159 160 // main() makes display[0] 161 if ( base_line == 0 ) giet_fbf_cma_display ( 0 ); 162 163 //////////// evolution : 2 steps per iteration 164 165 unsigned int i = 0; 166 while( 1 ) 167 { 168 // compute world[1] from world[0] 169 compute_new_gen( 1 , base_line , nb_lines ); 170 171 // copy world[1] to display[1] 172 copy_world( 1 , base_line , nb_lines ); 173 174 // synchronise with other procs 175 sqt_barrier_wait( &barrier ); 176 177 // main makes display[1] 178 if ( base_line == 0 ) giet_fbf_cma_display ( 1 ); 179 180 #if VERBOSE 181 if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i ); 182 #endif 183 184 // compute world[0] from world[1] 185 compute_new_gen( 0 , base_line , nb_lines ); 186 187 // copy world[0] to display[0] 188 copy_world( 0 , base_line , nb_lines ); 189 190 // synchronise with other procs 191 sqt_barrier_wait( &barrier ); 192 193 // main makes display[0] 194 if ( base_line == 0 ) giet_fbf_cma_display ( 0 ); 195 196 #if VERBOSE 197 if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i + 1 ); 198 #endif 199 200 i++; 201 202 } // end evolution loop 203 204 giet_pthread_exit("Completed"); 205 206 } // end main() 207 208 124 209 125 210 //////////////////////////////////////// … … 133 218 giet_proc_xyp( &x, &y, &p ); 134 219 135 // get p rocessors number220 // get platform parameters 136 221 unsigned int x_size; 137 222 unsigned int y_size; … … 139 224 giet_procs_number( &x_size, &y_size, &nprocs ); 140 225 141 // compute continuous processor index & number of procs 142 unsigned int proc_id = (((x * y_size) + y) * nprocs) + p; 143 unsigned int n_global_procs = x_size * y_size * nprocs; 144 145 unsigned int i; 146 147 unsigned int nb_line = HEIGHT / n_global_procs; 148 unsigned int base_line = nb_line * proc_id; 149 150 // parameters checking 151 giet_assert( (n_global_procs <= HEIGHT), 152 " Number or processors larger than world height" ); 153 154 giet_assert( ((WIDTH == FBUF_X_SIZE) && (HEIGHT == FBUF_Y_SIZE)), 155 "Frame Buffer size does not fit the world size" ); 156 157 giet_assert( ((x_size == 1) || (x_size == 2) || (x_size == 4) || 158 (x_size == 8) || (x_size == 16)), 159 "x_size must be a power of 2 no larger than 16" ); 160 161 giet_assert( ((y_size == 1) || (y_size == 2) || (y_size == 4) || 162 (y_size == 8) || (y_size == 16)), 163 "y_size must be a power of 2 no larger than 16" ); 164 165 giet_assert( ((nprocs == 1) || (nprocs == 2) || (nprocs == 4)), 166 "nprocs must be a power of 2 no larger than 4" ); 167 168 // P[0,0,0] makes initialisation 169 if ( proc_id == 0 ) 170 { 171 // get a private TTY for P[0,0,0] 172 giet_tty_alloc( 0 ); 173 174 // get a Chained Buffer DMA channel 175 giet_fbf_cma_alloc(); 176 177 // initializes the source and destination buffers 178 giet_fbf_cma_init_buf( &DISPLAY[0][0][0] , 179 &DISPLAY[1][0][0] , 180 status0 , 181 status1 ); 182 183 // activates CMA channel 184 giet_fbf_cma_start( HEIGHT * WIDTH ); 185 186 // initializes distributed heap 187 unsigned int cx; 188 unsigned int cy; 189 for ( cx = 0 ; cx < x_size ; cx++ ) 190 { 191 for ( cx = 0 ; cx < x_size ; cx++ ) 192 { 193 heap_init( cx , cy ); 194 } 195 } 196 197 // initialises barrier 198 sqt_barrier_init( &barrier , x_size , y_size , nprocs ); 199 200 PRINTF("\n[GAMEOFLIFE] P[0,0,0] completes initialisation at cycle %d\n" 201 " nprocs = %d / nlines = %d\n", 202 giet_proctime() , n_global_procs, HEIGHT ); 203 204 // activates all other processors 205 init_ok = 1; 226 giet_pthread_assert( (x_size <= 16) , "x_size no larger than 16" ); 227 giet_pthread_assert( (y_size <= 16) , "y_size no larger than 16" ); 228 giet_pthread_assert( (nprocs <= 4) , "nprocs no larger than 16" ); 229 230 // compute number of threads and min number of lines per thread 231 // extra is the number of threads that must process one extra line 232 unsigned int total_procs = x_size * y_size * nprocs; 233 unsigned int nthreads; 234 unsigned int nlines; 235 unsigned int extra; 236 if ( total_procs > HEIGHT ) 237 { 238 nthreads = HEIGHT; 239 nlines = 1; 240 extra = 0; 206 241 } 207 242 else 208 243 { 209 while ( init_ok == 0 ) asm volatile("nop\n nop\n nop"); 210 } 211 212 ///////////// world initialization ( All processors ) 213 214 // All processors initialize WORLD[0] 215 init_world( 0 , base_line , nb_line ); 216 217 // copy WORLD[0] to DISPLAY[0] 218 copy_world( 0 , base_line , nb_line ); 219 220 // synchronise with other procs 221 sqt_barrier_wait( &barrier ); 222 223 // P(0,0,0) displays DISPLAY[0] 224 if ( proc_id == 0 ) giet_fbf_cma_display ( 0 ); 225 226 PRINTF("\n[GAMEOFLIFE] starts evolution at cycle %d\n", giet_proctime() ); 227 228 //////////// evolution : 2 steps per iteration 229 230 for (i = 0 ; i < NB_ITERATION ; i++) 231 { 232 // compute WORLD[1] from WORLD[0] 233 compute_new_gen( 1 , base_line , nb_line ); 234 235 // copy WORLD[1] to DISPLAY[1] 236 copy_world( 1 , base_line , nb_line ); 237 238 // synchronise with other procs 239 sqt_barrier_wait( &barrier ); 240 241 // P(0,0,0) displays DISPLAY[1] 242 if ( proc_id == 0 ) giet_fbf_cma_display ( 1 ); 243 244 PRINTF(" - step %d\n", 2*i ); 245 246 // compute WORLD[0] from WORLD[1] 247 compute_new_gen( 0 , base_line , nb_line ); 248 249 // copy WORLD[0] to DISPLAY[0] 250 copy_world( 0 , base_line , nb_line ); 251 252 // synchronise with other procs 253 sqt_barrier_wait( &barrier ); 254 255 // P(0,0,0) displays DISPLAY[0] 256 if ( proc_id == 0 ) giet_fbf_cma_display ( 0 ); 257 258 PRINTF(" - step %d\n", 2*i + 1 ); 259 } // end main loop 260 261 PRINTF("\n*** End of main at cycle %d ***\n", giet_proctime()); 262 263 giet_exit("Completed"); 244 nthreads = total_procs; 245 nlines = HEIGHT / total_procs; 246 extra = HEIGHT % total_procs; 247 } 248 249 // get a shared TTY 250 giet_tty_alloc( 1 ); 251 252 // get a Chained Buffer DMA channel 253 giet_fbf_cma_alloc(); 254 255 // initializes the source and destination buffers 256 giet_fbf_cma_init_buf( &display[0][0][0] , 257 &display[1][0][0] , 258 status0 , 259 status1 ); 260 261 // activates CMA channel 262 giet_fbf_cma_start( HEIGHT * WIDTH ); 263 264 // initializes distributed heap 265 unsigned int cx; 266 unsigned int cy; 267 for ( cx = 0 ; cx < x_size ; cx++ ) 268 { 269 for ( cy = 0 ; cy < y_size ; cy++ ) 270 { 271 heap_init( cx , cy ); 272 } 273 } 274 275 // initialises barrier 276 sqt_barrier_init( &barrier , x_size , y_size , nprocs ); 277 278 giet_tty_printf("\n[GAMEOFLIFE] P[%d,%d,%d] completes initialisation at cycle %d\n" 279 " nprocs = %d / nlines = %d / nthreads = %d\n", 280 x, y, p, giet_proctime() , total_procs , HEIGHT , nthreads ); 281 282 // compute arguments (index, nlines) for all threads 283 unsigned int n; // thread index 284 unsigned int index; // first line index 285 for ( n = 0 , index = 0 ; n < nthreads ; n++ ) 286 { 287 if ( extra ) 288 { 289 args[n].index = index; 290 args[n].lines = nlines + 1; 291 index = index + nlines + 1; 292 } 293 else 294 { 295 args[n].index = index; 296 args[n].lines = nlines; 297 index = index + nlines; 298 } 299 #if VERBOSE 300 giet_tty_printf("[GAMEOFLIFE] Thread %d : first = %d / nlines = %d\n", 301 n , args[n].index , args[n].lines ); 302 #endif 303 } 304 305 // launch all other threads 306 pthread_t trdid; // unused because no pthread_join() 307 for ( n = 1 ; n < nthreads ; n++ ) 308 { 309 if ( giet_pthread_create( &trdid, 310 NULL, // no attribute 311 &execute, 312 &args[n] ) ) 313 { 314 giet_tty_printf("\n[TRANSPOSE ERROR] creating thread %x\n", n ); 315 giet_pthread_exit( NULL ); 316 } 317 } 318 319 // run execute function 320 execute( &args[0] ); 321 322 giet_pthread_exit( "completed" ); 323 264 324 } // end main() 325 326 265 327 266 328 // Local Variables: -
soft/giet_vm/applications/gameoflife/gameoflife.py
r669 r708 10 10 # This file describes the mapping of the multi-threaded "gameoflife" 11 11 # application on a multi-clusters, multi-processors architecture. 12 # This include both the mapping of virtual segments on the clusters, 13 # and the mapping of tasks on processors. 14 # There is one task per processor. 12 # There is one thread per processor. 15 13 # The mapping of virtual segments is the following: 16 14 # - There is one shared data vseg in cluster[0][0] … … 102 100 if ( mapping.clusters[cluster_id].procs ): 103 101 for p in xrange( nprocs ): 104 trdid = (((x * y_size) + y) * nprocs) + p 102 if (x == 0) and (y == 0) and (p == 0) : # main thread 103 startid = 1 104 is_main = True 105 else : # other threads 106 startid = 0 107 is_main = False 105 108 106 mapping.addTask( vspace, 'gol_%d_%d_%d' % (x,y,p), 107 trdid, x, y, p, 108 'gol_stack_%d_%d_%d' % (x,y,p), 109 'gol_heap_%d_%d' %(x,y) , 0 ) 109 mapping.addThread( vspace, 110 'gol_%d_%d_%d' % (x,y,p), 111 is_main, 112 x, y, p, 113 'gol_stack_%d_%d_%d' % (x,y,p), 114 'gol_heap_%d_%d' % (x,y), 115 startid ) 110 116 111 117 # extend mapping name
Note: See TracChangeset
for help on using the changeset viewer.