source: soft/giet_vm/applications/gameoflife/gameoflife.c @ 712

Last change on this file since 712 was 712, checked in by alain, 9 years ago

Introduce the giet_fbf_size() and giet_fbf_alloc() system calls.

File size: 9.8 KB
RevLine 
[502]1//////////////////////////////////////////////////////////////////////////////////
[509]2// File    : main.c  (for gameoflife)
3// Date    : November 2013 / February 2015
4// Authors :  Alexandre Joannou <alexandre.joannou@lip6.fr> november 2013
5//            Alain Greiner <alain.greiner@lip6.fr> february 2015
[708]6//////////////////////////////////////////////////////////////////////////////////
[669]7// This multi-threaded application is an emulation of the Game of Life automaton.
[712]8// The world size is defined by the Frame Buffer width and height.
[509]9//
[708]10// There is at most one thread per processor in the platform.
11// - If the number of processors is larger than the number of lines,
12//   the number of threads is equal to the number of lines, and
13//   each thread process one single line.
14// - if the number of processors is not larger than the number of lines,
15//   the number of threads is equal to the number of processors, and
[712]16//   each thread process height/nthreads (or height/nthreads + 1) lines.
[708]17//
18// Thread running on processor P(0,0,0) execute the main() function,
19// that initialises the barrier, the TTY terminal, the CMA controler,
20// and launch the other threads, before calling the execute function.
21// Other threads are just running the execute() function.
22//
23// The total number of processors cannot be larger than 1024 = 16 * 16 * 4
[502]24//////////////////////////////////////////////////////////////////////////////////
[251]25
26#include "stdio.h"
27#include "limits.h"
[502]28#include "user_barrier.h"
[251]29#include "mapping_info.h"
[669]30#include "hard_config.h"
[708]31#include "malloc.h"
[251]32
[708]33#define VERBOSE         1
[251]34
[509]35typedef unsigned char uint8_t;
[251]36
[708]37typedef struct
38{
39    unsigned int    index;    // index of first line to be processed
40    unsigned int    lines;    // number of lines to be processed
41}   arguments_t;
[295]42
[708]43arguments_t   args[1024];     // at most 1024 threads
[251]44
[712]45uint8_t world[2][256][256] __attribute__((aligned(64)));
[708]46
[712]47uint8_t display[2][256][256] __attribute__((aligned(64)));
[708]48
[712]49unsigned int status0[16] __attribute__((aligned(64)));
50unsigned int status1[16] __attribute__((aligned(64)));
[669]51
[509]52giet_sqt_barrier_t barrier;
[251]53
[712]54unsigned int width;
55unsigned int height;
56
[509]57////////////////////////////////////
58void init_world( unsigned int phase,
59                 unsigned int base_line,
60                 unsigned int nb_line )
[251]61{
[509]62   unsigned int x,y;
63   for (y = base_line ; y < base_line + nb_line ; y++)
[502]64   {
[712]65      for(x = 0 ; x < width ; x++) 
[502]66      {
[708]67         world[phase][y][x] = (giet_rand() >> (x % 8)) & 0x1;
[251]68      }
69   }
70}
71
[509]72//////////////////////////////////////////////////////
73uint8_t number_of_alive_neighbour( unsigned int phase,
74                                   unsigned int x, 
75                                   unsigned int y )
[251]76{
77   uint8_t nb = 0;
78
[712]79   nb += world[phase][(y - 1) % height][(x - 1) % width];
80   nb += world[phase][ y              ][(x - 1) % width];
81   nb += world[phase][(y + 1) % height][(x - 1) % width];
82   nb += world[phase][(y - 1) % height][ x             ];
83   nb += world[phase][(y + 1) % height][ x             ];
84   nb += world[phase][(y - 1) % height][(x + 1) % width];
85   nb += world[phase][ y              ][(x + 1) % width];
86   nb += world[phase][(y + 1) % height][(x + 1) % width];
[251]87
88   return nb;
89}
90
[509]91/////////////////////////////////////////
92uint8_t compute_cell( unsigned int phase,
93                      unsigned int x, 
94                      unsigned int y )
[251]95{
[509]96   uint8_t nb_neighbours_alive = number_of_alive_neighbour( phase, x , y );
97
[708]98   if (world[phase][y][x] == 1) 
[502]99   {
100      if (nb_neighbours_alive == 2 || nb_neighbours_alive == 3)  return 1;
[251]101   }
[502]102   else 
103   {
104      if (nb_neighbours_alive == 3) return 1;
[708]105      else                          return world[phase][y][x];
[251]106   }
107   return 0;
108}
109
[509]110/////////////////////////////////////////
111void compute_new_gen( unsigned int phase,
112                      unsigned int base_line, 
113                      unsigned int nb_line )
[251]114{
[509]115   unsigned int x,y;
[295]116   for (y = base_line; y < base_line + nb_line; y++)
117   {
[712]118      for(x = 0; x < width ; x++) 
[295]119      {
[708]120         world[phase][y][x] = compute_cell( 1 - phase , x , y ); 
[251]121      }
122   }
123}
124
[509]125////////////////////////////////////
126void copy_world( unsigned int phase,
127                 unsigned int base_line,
128                 unsigned int nb_line )
[251]129{
[509]130   unsigned int x,y;
[502]131   for (y = base_line; y < base_line + nb_line; y++)
132   {
[712]133      for(x = 0; x < width ; x++) 
[502]134      {
[708]135         display[phase][y][x] = world[phase][y][x]*255; 
[251]136      }
137   }
138}
139
[708]140
141
142///////////////////////////////////////////////////////////////
143__attribute__((constructor)) void execute( arguments_t* pargs )
144///////////////////////////////////////////////////////////////
145{
146   unsigned int nb_lines      = pargs->lines;
147   unsigned int base_line     = pargs->index;
148
149   ///////////// parallel world  initialization
150
151   // All processors initialize world[0]
152   init_world( 0 , base_line , nb_lines );
153
154   // copy world[0] to display[0]
155   copy_world( 0 , base_line , nb_lines );
156
157   // synchronise with other procs
158   sqt_barrier_wait( &barrier );
159
160   // main() makes display[0]
161   if ( base_line == 0 ) giet_fbf_cma_display ( 0 );
162
163   //////////// evolution : 2 steps per iteration
164
165   unsigned int i = 0;
166   while( 1 )
167   {
168      // compute world[1] from world[0]
169      compute_new_gen( 1 , base_line , nb_lines );
170
171      // copy world[1] to display[1]
172      copy_world( 1 , base_line , nb_lines );
173
174      // synchronise with other procs
175      sqt_barrier_wait( &barrier );
176
177      // main makes display[1]
178      if ( base_line == 0 ) giet_fbf_cma_display ( 1 );
179   
180#if VERBOSE
181      if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i );
182#endif
183   
184      // compute world[0] from world[1]
185      compute_new_gen( 0 , base_line , nb_lines );
186
187      // copy world[0] to display[0]
188      copy_world( 0 , base_line , nb_lines );
189
190      // synchronise with other procs
191      sqt_barrier_wait( &barrier );
192
193      // main makes display[0]
194      if ( base_line == 0 ) giet_fbf_cma_display ( 0 );
195
196#if VERBOSE
197      if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i + 1 );
198#endif
199
200      i++;
201
202   } // end evolution loop
203
204   giet_pthread_exit("Completed");
205
206} // end main()
207
208
209
[263]210////////////////////////////////////////
[251]211__attribute__((constructor)) void main()
[669]212////////////////////////////////////////
[251]213{
[432]214   // get processor identifier
215   unsigned int x;
216   unsigned int y;
217   unsigned int p;
218   giet_proc_xyp( &x, &y, &p );
219
[708]220   // get platform parameters
[502]221   unsigned int x_size;
222   unsigned int y_size;
[669]223   unsigned int nprocs;
224   giet_procs_number( &x_size, &y_size, &nprocs );
[502]225
[712]226   // get a shared TTY
227   giet_tty_alloc( 1 );
228
[708]229   giet_pthread_assert( (x_size <= 16) , "x_size no larger than 16" );
230   giet_pthread_assert( (y_size <= 16) , "y_size no larger than 16" );
[712]231   giet_pthread_assert( (nprocs <=  4) , "nprocs no larger than 4" );
[432]232
[712]233   // get FBF width and height
234   giet_fbf_size( &width , &height );
235
236   giet_pthread_assert( (width  <= 256)   , "FBF width larger than 256" );
237   giet_pthread_assert( (height <= 256)   , "FBF height larger than 256" );
238   giet_pthread_assert( (width  && height) , "FBF not available" );
239
[708]240   // compute number of threads and min number of lines per thread
241   // extra is the number of threads that must process one extra line
242   unsigned int total_procs = x_size * y_size * nprocs; 
243   unsigned int nthreads;
244   unsigned int nlines;
245   unsigned int extra;
[712]246   if ( total_procs > height )
[708]247   {
[712]248      nthreads = height;
[708]249      nlines   = 1;
250      extra    = 0;
251   }
252   else
253   {
254      nthreads = total_procs;
[712]255      nlines   = height / total_procs;
256      extra    = height % total_procs; 
[708]257   }
[251]258
[712]259   // get FBF ownership
260   giet_fbf_alloc();
[251]261
[708]262   // get a Chained Buffer DMA channel
263   giet_fbf_cma_alloc();
[509]264
[708]265   // initializes the source and destination buffers
266   giet_fbf_cma_init_buf( &display[0][0][0] , 
267                          &display[1][0][0] , 
268                          status0 ,
269                          status1 );
[669]270
[708]271   // activates CMA channel
[712]272   giet_fbf_cma_start( height * width );
[669]273
[708]274   // initializes distributed heap
275   unsigned int cx;
276   unsigned int cy;
277   for ( cx = 0 ; cx < x_size ; cx++ )
[295]278   {
[708]279      for ( cy = 0 ; cy < y_size ; cy++ )
[669]280      {
[708]281         heap_init( cx , cy );
[669]282      }
[708]283   }
[669]284
[708]285   // initialises barrier
286   sqt_barrier_init( &barrier , x_size , y_size , nprocs );
[509]287
[708]288   giet_tty_printf("\n[GAMEOFLIFE] P[%d,%d,%d] completes initialisation at cycle %d\n"
289                   " nprocs = %d / nlines = %d / nthreads = %d\n", 
[712]290                   x, y, p, giet_proctime() , total_procs , height , nthreads );
[669]291
[708]292   // compute arguments (index, nlines) for all threads
293   unsigned int n;                   // thread index
294   unsigned int index;               // first line index
295   for ( n = 0 , index = 0 ; n < nthreads ; n++ )
[295]296   {
[708]297      if ( extra )
298      {
299         args[n].index = index;
300         args[n].lines = nlines + 1;
301         index         = index + nlines + 1;
302      }
303      else
304      {
305         args[n].index = index;
306         args[n].lines = nlines;
307         index         = index + nlines;
308      }
309#if VERBOSE     
310giet_tty_printf("[GAMEOFLIFE] Thread %d : first = %d / nlines = %d\n",
311                n , args[n].index , args[n].lines );
312#endif
[295]313   }
314
[708]315   // launch all other threads
316   pthread_t  trdid;                 // unused because no pthread_join()
317   for ( n = 1 ; n < nthreads ; n++ )
[251]318   {
[708]319      if ( giet_pthread_create( &trdid,
320                                NULL,                  // no attribute
321                                &execute,
322                                &args[n] ) )
323      {
324          giet_tty_printf("\n[TRANSPOSE ERROR] creating thread %x\n", n );
325          giet_pthread_exit( NULL );
326      }
327   }
[502]328
[708]329   // run execute function
330   execute( &args[0] );
[509]331
[708]332   giet_pthread_exit( "completed" );
333   
334} // end main()
[502]335
[251]336
[509]337
[251]338// Local Variables:
339// tab-width: 3
340// c-basic-offset: 3
341// c-file-offsets:((innamespace . 0)(inline-open . 0))
342// indent-tabs-mode: nil
343// End:
344
345// vim: filetype=cpp:expandtab:shiftwidth=3:tabstop=3:softtabstop=3
346
347
348
Note: See TracBrowser for help on using the repository browser.