source: soft/giet_vm/applications/gameoflife/gameoflife.c @ 785

Last change on this file since 785 was 724, checked in by alain, 9 years ago

Update graphical applications.

File size: 9.7 KB
RevLine 
[502]1//////////////////////////////////////////////////////////////////////////////////
[721]2// File    : gameoflife.c
[509]3// Date    : November 2013 / February 2015
4// Authors :  Alexandre Joannou <alexandre.joannou@lip6.fr> november 2013
5//            Alain Greiner <alain.greiner@lip6.fr> february 2015
[708]6//////////////////////////////////////////////////////////////////////////////////
[669]7// This multi-threaded application is an emulation of the Game of Life automaton.
[712]8// The world size is defined by the Frame Buffer width and height.
[509]9//
[708]10// There is at most one thread per processor in the platform.
11// - If the number of processors is larger than the number of lines,
12//   the number of threads is equal to the number of lines, and
13//   each thread process one single line.
14// - if the number of processors is not larger than the number of lines,
15//   the number of threads is equal to the number of processors, and
[712]16//   each thread process height/nthreads (or height/nthreads + 1) lines.
[708]17//
18// Thread running on processor P(0,0,0) execute the main() function,
19// that initialises the barrier, the TTY terminal, the CMA controler,
20// and launch the other threads, before calling the execute function.
21// Other threads are just running the execute() function.
22//
23// The total number of processors cannot be larger than 1024 = 16 * 16 * 4
[502]24//////////////////////////////////////////////////////////////////////////////////
[251]25
26#include "stdio.h"
27#include "limits.h"
[502]28#include "user_barrier.h"
[251]29#include "mapping_info.h"
[669]30#include "hard_config.h"
[708]31#include "malloc.h"
[251]32
[708]33#define VERBOSE         1
[251]34
[509]35typedef unsigned char uint8_t;
[251]36
[708]37typedef struct
38{
39    unsigned int    index;    // index of first line to be processed
40    unsigned int    lines;    // number of lines to be processed
41}   arguments_t;
[295]42
[708]43arguments_t   args[1024];     // at most 1024 threads
[251]44
[712]45uint8_t world[2][256][256] __attribute__((aligned(64)));
[708]46
[712]47uint8_t display[2][256][256] __attribute__((aligned(64)));
[708]48
[712]49unsigned int status0[16] __attribute__((aligned(64)));
50unsigned int status1[16] __attribute__((aligned(64)));
[669]51
[509]52giet_sqt_barrier_t barrier;
[251]53
[712]54unsigned int width;
55unsigned int height;
56
[509]57////////////////////////////////////
58void init_world( unsigned int phase,
59                 unsigned int base_line,
60                 unsigned int nb_line )
[251]61{
[509]62   unsigned int x,y;
63   for (y = base_line ; y < base_line + nb_line ; y++)
[502]64   {
[712]65      for(x = 0 ; x < width ; x++) 
[502]66      {
[708]67         world[phase][y][x] = (giet_rand() >> (x % 8)) & 0x1;
[251]68      }
69   }
70}
71
[509]72//////////////////////////////////////////////////////
73uint8_t number_of_alive_neighbour( unsigned int phase,
74                                   unsigned int x, 
75                                   unsigned int y )
[251]76{
77   uint8_t nb = 0;
78
[712]79   nb += world[phase][(y - 1) % height][(x - 1) % width];
80   nb += world[phase][ y              ][(x - 1) % width];
81   nb += world[phase][(y + 1) % height][(x - 1) % width];
82   nb += world[phase][(y - 1) % height][ x             ];
83   nb += world[phase][(y + 1) % height][ x             ];
84   nb += world[phase][(y - 1) % height][(x + 1) % width];
85   nb += world[phase][ y              ][(x + 1) % width];
86   nb += world[phase][(y + 1) % height][(x + 1) % width];
[251]87
88   return nb;
89}
90
[509]91/////////////////////////////////////////
92uint8_t compute_cell( unsigned int phase,
93                      unsigned int x, 
94                      unsigned int y )
[251]95{
[509]96   uint8_t nb_neighbours_alive = number_of_alive_neighbour( phase, x , y );
97
[708]98   if (world[phase][y][x] == 1) 
[502]99   {
100      if (nb_neighbours_alive == 2 || nb_neighbours_alive == 3)  return 1;
[251]101   }
[502]102   else 
103   {
104      if (nb_neighbours_alive == 3) return 1;
[708]105      else                          return world[phase][y][x];
[251]106   }
107   return 0;
108}
109
[509]110/////////////////////////////////////////
111void compute_new_gen( unsigned int phase,
112                      unsigned int base_line, 
113                      unsigned int nb_line )
[251]114{
[509]115   unsigned int x,y;
[295]116   for (y = base_line; y < base_line + nb_line; y++)
117   {
[712]118      for(x = 0; x < width ; x++) 
[295]119      {
[708]120         world[phase][y][x] = compute_cell( 1 - phase , x , y ); 
[251]121      }
122   }
123}
124
[509]125////////////////////////////////////
126void copy_world( unsigned int phase,
127                 unsigned int base_line,
128                 unsigned int nb_line )
[251]129{
[509]130   unsigned int x,y;
[502]131   for (y = base_line; y < base_line + nb_line; y++)
132   {
[712]133      for(x = 0; x < width ; x++) 
[502]134      {
[708]135         display[phase][y][x] = world[phase][y][x]*255; 
[251]136      }
137   }
138}
139
[708]140
141
142///////////////////////////////////////////////////////////////
143__attribute__((constructor)) void execute( arguments_t* pargs )
144///////////////////////////////////////////////////////////////
145{
146   unsigned int nb_lines      = pargs->lines;
147   unsigned int base_line     = pargs->index;
148
149   ///////////// parallel world  initialization
150
151   // All processors initialize world[0]
152   init_world( 0 , base_line , nb_lines );
153
154   // copy world[0] to display[0]
155   copy_world( 0 , base_line , nb_lines );
156
157   // synchronise with other procs
158   sqt_barrier_wait( &barrier );
159
160   // main() makes display[0]
161   if ( base_line == 0 ) giet_fbf_cma_display ( 0 );
162
163   //////////// evolution : 2 steps per iteration
164
165   unsigned int i = 0;
166   while( 1 )
167   {
168      // compute world[1] from world[0]
169      compute_new_gen( 1 , base_line , nb_lines );
170
171      // copy world[1] to display[1]
172      copy_world( 1 , base_line , nb_lines );
173
174      // synchronise with other procs
175      sqt_barrier_wait( &barrier );
176
177      // main makes display[1]
178      if ( base_line == 0 ) giet_fbf_cma_display ( 1 );
179   
180#if VERBOSE
181      if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i );
182#endif
183   
184      // compute world[0] from world[1]
185      compute_new_gen( 0 , base_line , nb_lines );
186
187      // copy world[0] to display[0]
188      copy_world( 0 , base_line , nb_lines );
189
190      // synchronise with other procs
191      sqt_barrier_wait( &barrier );
192
193      // main makes display[0]
194      if ( base_line == 0 ) giet_fbf_cma_display ( 0 );
195
196#if VERBOSE
197      if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i + 1 );
198#endif
199
200      i++;
201
202   } // end evolution loop
203
204   giet_pthread_exit("Completed");
205
206} // end main()
207
208
209
[263]210////////////////////////////////////////
[251]211__attribute__((constructor)) void main()
[669]212////////////////////////////////////////
[251]213{
[432]214   // get processor identifier
215   unsigned int x;
216   unsigned int y;
217   unsigned int p;
218   giet_proc_xyp( &x, &y, &p );
219
[708]220   // get platform parameters
[502]221   unsigned int x_size;
222   unsigned int y_size;
[669]223   unsigned int nprocs;
224   giet_procs_number( &x_size, &y_size, &nprocs );
[502]225
[712]226   // get a shared TTY
227   giet_tty_alloc( 1 );
228
[708]229   giet_pthread_assert( (x_size <= 16) , "x_size no larger than 16" );
230   giet_pthread_assert( (y_size <= 16) , "y_size no larger than 16" );
[712]231   giet_pthread_assert( (nprocs <=  4) , "nprocs no larger than 4" );
[432]232
[712]233   // get FBF width and height
234   giet_fbf_size( &width , &height );
235
236   giet_pthread_assert( (width  <= 256)   , "FBF width larger than 256" );
237   giet_pthread_assert( (height <= 256)   , "FBF height larger than 256" );
238   giet_pthread_assert( (width  && height) , "FBF not available" );
239
[708]240   // compute number of threads and min number of lines per thread
241   // extra is the number of threads that must process one extra line
242   unsigned int total_procs = x_size * y_size * nprocs; 
243   unsigned int nthreads;
244   unsigned int nlines;
245   unsigned int extra;
[712]246   if ( total_procs > height )
[708]247   {
[712]248      nthreads = height;
[708]249      nlines   = 1;
250      extra    = 0;
251   }
252   else
253   {
254      nthreads = total_procs;
[712]255      nlines   = height / total_procs;
256      extra    = height % total_procs; 
[708]257   }
[251]258
[712]259   // get FBF ownership
260   giet_fbf_alloc();
[251]261
[724]262   // get a CMA channel for two user buffers
263   giet_fbf_cma_alloc( 2 );
[509]264
[724]265   // register the user buffers
266   giet_fbf_cma_init_buf( 0 , &display[0][0][0] , status0 );
267   giet_fbf_cma_init_buf( 1 , &display[1][0][0] , status1 );
[669]268
[708]269   // activates CMA channel
[724]270   giet_fbf_cma_start();
[669]271
[708]272   // initializes distributed heap
273   unsigned int cx;
274   unsigned int cy;
275   for ( cx = 0 ; cx < x_size ; cx++ )
[295]276   {
[708]277      for ( cy = 0 ; cy < y_size ; cy++ )
[669]278      {
[708]279         heap_init( cx , cy );
[669]280      }
[708]281   }
[669]282
[708]283   // initialises barrier
284   sqt_barrier_init( &barrier , x_size , y_size , nprocs );
[509]285
[708]286   giet_tty_printf("\n[GAMEOFLIFE] P[%d,%d,%d] completes initialisation at cycle %d\n"
287                   " nprocs = %d / nlines = %d / nthreads = %d\n", 
[712]288                   x, y, p, giet_proctime() , total_procs , height , nthreads );
[669]289
[708]290   // compute arguments (index, nlines) for all threads
291   unsigned int n;                   // thread index
292   unsigned int index;               // first line index
293   for ( n = 0 , index = 0 ; n < nthreads ; n++ )
[295]294   {
[708]295      if ( extra )
296      {
297         args[n].index = index;
298         args[n].lines = nlines + 1;
299         index         = index + nlines + 1;
300      }
301      else
302      {
303         args[n].index = index;
304         args[n].lines = nlines;
305         index         = index + nlines;
306      }
307#if VERBOSE     
308giet_tty_printf("[GAMEOFLIFE] Thread %d : first = %d / nlines = %d\n",
309                n , args[n].index , args[n].lines );
310#endif
[295]311   }
312
[708]313   // launch all other threads
314   pthread_t  trdid;                 // unused because no pthread_join()
315   for ( n = 1 ; n < nthreads ; n++ )
[251]316   {
[708]317      if ( giet_pthread_create( &trdid,
318                                NULL,                  // no attribute
319                                &execute,
320                                &args[n] ) )
321      {
322          giet_tty_printf("\n[TRANSPOSE ERROR] creating thread %x\n", n );
323          giet_pthread_exit( NULL );
324      }
325   }
[502]326
[708]327   // run execute function
328   execute( &args[0] );
[509]329
[708]330   giet_pthread_exit( "completed" );
331   
332} // end main()
[502]333
[251]334
[509]335
[251]336// Local Variables:
337// tab-width: 3
338// c-basic-offset: 3
339// c-file-offsets:((innamespace . 0)(inline-open . 0))
340// indent-tabs-mode: nil
341// End:
342
343// vim: filetype=cpp:expandtab:shiftwidth=3:tabstop=3:softtabstop=3
344
345
346
Note: See TracBrowser for help on using the repository browser.