Ignore:
Timestamp:
Oct 1, 2015, 4:09:25 PM (9 years ago)
Author:
alain
Message:

Adapt the following application to the POSIX threads API

  • convol
  • classif
  • raycast
  • coproc
  • display
  • gameoflife
  • transpose
  • shell
File:
1 moved

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/gameoflife/gameoflife.c

    r707 r708  
    44// Authors :  Alexandre Joannou <alexandre.joannou@lip6.fr> november 2013
    55//            Alain Greiner <alain.greiner@lip6.fr> february 2015
    6 //
     6//////////////////////////////////////////////////////////////////////////////////
    77// This multi-threaded application is an emulation of the Game of Life automaton.
    88// The world size is defined by the HEIGHT and WIDTH parameters.
    9 // There is one task per processor.
    10 // Each task compute HEIGHT/nbprocs lines.
    11 // Task running on processor P(0,0,0) initialises the barrier, the TTY terminal,
    12 // and the chained buffer DMA controler.
    139//
    14 // The number of processors must be a power of 2 not larger than HEIGHT.
     10// There is at most one thread per processor in the platform.
     11// - If the number of processors is larger than the number of lines,
     12//   the number of threads is equal to the number of lines, and
     13//   each thread process one single line.
     14// - if the number of processors is not larger than the number of lines,
     15//   the number of threads is equal to the number of processors, and
     16//   each thread process HEIGHT/nthreads (or HEIGHT/nthreads + 1) lines.
     17//
     18// Thread running on processor P(0,0,0) execute the main() function,
     19// that initialises the barrier, the TTY terminal, the CMA controler,
     20// and launch the other threads, before calling the execute function.
     21// Other threads are just running the execute() function.
     22//
     23// The total number of processors cannot be larger than 1024 = 16 * 16 * 4
    1524//////////////////////////////////////////////////////////////////////////////////
    1625
     
    2029#include "mapping_info.h"
    2130#include "hard_config.h"
    22 
    23 #define WIDTH           128
    24 #define HEIGHT          128
    25 #define NB_ITERATION    1000000000
    26 
    27 #define PRINTF(...) ({ if ( proc_id==0) { giet_tty_printf(__VA_ARGS__); } })
     31#include "malloc.h"
     32
     33#define WIDTH           FBUF_X_SIZE
     34#define HEIGHT          FBUF_Y_SIZE
     35
     36#define VERBOSE         1
    2837
    2938typedef unsigned char uint8_t;
    3039
    31 uint8_t WORLD[2][HEIGHT][WIDTH] __attribute__((aligned(64)));
    32 
    33 uint8_t DISPLAY[2][HEIGHT][WIDTH] __attribute__((aligned(64)));
     40typedef struct
     41{
     42    unsigned int    index;    // index of first line to be processed
     43    unsigned int    lines;    // number of lines to be processed
     44}   arguments_t;
     45
     46arguments_t   args[1024];     // at most 1024 threads
     47
     48uint8_t world[2][HEIGHT][WIDTH] __attribute__((aligned(64)));
     49
     50uint8_t display[2][HEIGHT][WIDTH] __attribute__((aligned(64)));
    3451
    3552unsigned int status0[16];
     
    3754
    3855giet_sqt_barrier_t barrier;
    39 
    40 volatile unsigned int init_ok;
    4156
    4257////////////////////////////////////
     
    5065      for(x = 0 ; x < WIDTH ; x++)
    5166      {
    52          WORLD[phase][y][x] = (giet_rand() >> (x % 8)) & 0x1;
     67         world[phase][y][x] = (giet_rand() >> (x % 8)) & 0x1;
    5368      }
    5469   }
     
    6277   uint8_t nb = 0;
    6378
    64    nb += WORLD[phase][(y - 1) % HEIGHT][(x - 1) % WIDTH];
    65    nb += WORLD[phase][ y              ][(x - 1) % WIDTH];
    66    nb += WORLD[phase][(y + 1) % HEIGHT][(x - 1) % WIDTH];
    67    nb += WORLD[phase][(y - 1) % HEIGHT][ x             ];
    68    nb += WORLD[phase][(y + 1) % HEIGHT][ x             ];
    69    nb += WORLD[phase][(y - 1) % HEIGHT][(x + 1) % WIDTH];
    70    nb += WORLD[phase][ y              ][(x + 1) % WIDTH];
    71    nb += WORLD[phase][(y + 1) % HEIGHT][(x + 1) % WIDTH];
     79   nb += world[phase][(y - 1) % HEIGHT][(x - 1) % WIDTH];
     80   nb += world[phase][ y              ][(x - 1) % WIDTH];
     81   nb += world[phase][(y + 1) % HEIGHT][(x - 1) % WIDTH];
     82   nb += world[phase][(y - 1) % HEIGHT][ x             ];
     83   nb += world[phase][(y + 1) % HEIGHT][ x             ];
     84   nb += world[phase][(y - 1) % HEIGHT][(x + 1) % WIDTH];
     85   nb += world[phase][ y              ][(x + 1) % WIDTH];
     86   nb += world[phase][(y + 1) % HEIGHT][(x + 1) % WIDTH];
    7287
    7388   return nb;
     
    8196   uint8_t nb_neighbours_alive = number_of_alive_neighbour( phase, x , y );
    8297
    83    if (WORLD[phase][y][x] == 1)
     98   if (world[phase][y][x] == 1)
    8499   {
    85100      if (nb_neighbours_alive == 2 || nb_neighbours_alive == 3)  return 1;
     
    88103   {
    89104      if (nb_neighbours_alive == 3) return 1;
    90       else                          return WORLD[phase][y][x];
     105      else                          return world[phase][y][x];
    91106   }
    92107   return 0;
     
    103118      for(x = 0; x < WIDTH ; x++)
    104119      {
    105          WORLD[phase][y][x] = compute_cell( 1 - phase , x , y ); 
     120         world[phase][y][x] = compute_cell( 1 - phase , x , y ); 
    106121      }
    107122   }
     
    118133      for(x = 0; x < WIDTH ; x++)
    119134      {
    120          DISPLAY[phase][y][x] = WORLD[phase][y][x]*255; 
    121       }
    122    }
    123 }
     135         display[phase][y][x] = world[phase][y][x]*255; 
     136      }
     137   }
     138}
     139
     140
     141
     142///////////////////////////////////////////////////////////////
     143__attribute__((constructor)) void execute( arguments_t* pargs )
     144///////////////////////////////////////////////////////////////
     145{
     146   unsigned int nb_lines      = pargs->lines;
     147   unsigned int base_line     = pargs->index;
     148
     149   ///////////// parallel world  initialization
     150
     151   // All processors initialize world[0]
     152   init_world( 0 , base_line , nb_lines );
     153
     154   // copy world[0] to display[0]
     155   copy_world( 0 , base_line , nb_lines );
     156
     157   // synchronise with other procs
     158   sqt_barrier_wait( &barrier );
     159
     160   // main() makes display[0]
     161   if ( base_line == 0 ) giet_fbf_cma_display ( 0 );
     162
     163   //////////// evolution : 2 steps per iteration
     164
     165   unsigned int i = 0;
     166   while( 1 )
     167   {
     168      // compute world[1] from world[0]
     169      compute_new_gen( 1 , base_line , nb_lines );
     170
     171      // copy world[1] to display[1]
     172      copy_world( 1 , base_line , nb_lines );
     173
     174      // synchronise with other procs
     175      sqt_barrier_wait( &barrier );
     176
     177      // main makes display[1]
     178      if ( base_line == 0 ) giet_fbf_cma_display ( 1 );
     179   
     180#if VERBOSE
     181      if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i );
     182#endif
     183   
     184      // compute world[0] from world[1]
     185      compute_new_gen( 0 , base_line , nb_lines );
     186
     187      // copy world[0] to display[0]
     188      copy_world( 0 , base_line , nb_lines );
     189
     190      // synchronise with other procs
     191      sqt_barrier_wait( &barrier );
     192
     193      // main makes display[0]
     194      if ( base_line == 0 ) giet_fbf_cma_display ( 0 );
     195
     196#if VERBOSE
     197      if ( base_line == 0 ) giet_tty_printf(" - step %d\n", 2*i + 1 );
     198#endif
     199
     200      i++;
     201
     202   } // end evolution loop
     203
     204   giet_pthread_exit("Completed");
     205
     206} // end main()
     207
     208
    124209
    125210////////////////////////////////////////
     
    133218   giet_proc_xyp( &x, &y, &p );
    134219
    135    // get processors number
     220   // get platform parameters
    136221   unsigned int x_size;
    137222   unsigned int y_size;
     
    139224   giet_procs_number( &x_size, &y_size, &nprocs );
    140225
    141    // compute continuous processor index & number of procs
    142    unsigned int proc_id = (((x * y_size) + y) * nprocs) + p; 
    143    unsigned int n_global_procs = x_size * y_size * nprocs;
    144 
    145    unsigned int i;
    146 
    147    unsigned int nb_line       = HEIGHT / n_global_procs;
    148    unsigned int base_line     = nb_line * proc_id;
    149    
    150    // parameters checking
    151    giet_assert( (n_global_procs <= HEIGHT),
    152                 " Number or processors larger than world height" );
    153 
    154    giet_assert( ((WIDTH == FBUF_X_SIZE) && (HEIGHT == FBUF_Y_SIZE)),
    155                 "Frame Buffer size does not fit the world size" );
    156    
    157    giet_assert( ((x_size == 1) || (x_size == 2) || (x_size == 4) ||
    158                  (x_size == 8) || (x_size == 16)),
    159                 "x_size must be a power of 2 no larger than 16" );
    160 
    161    giet_assert( ((y_size == 1) || (y_size == 2) || (y_size == 4) ||
    162                  (y_size == 8) || (y_size == 16)),
    163                 "y_size must be a power of 2 no larger than 16" );
    164 
    165    giet_assert( ((nprocs == 1) || (nprocs == 2) || (nprocs == 4)),
    166                 "nprocs must be a power of 2 no larger than 4" );
    167 
    168    // P[0,0,0] makes initialisation
    169    if ( proc_id == 0 )
    170    {
    171       // get a private TTY for P[0,0,0]
    172       giet_tty_alloc( 0 );
    173 
    174       // get a Chained Buffer DMA channel
    175       giet_fbf_cma_alloc();
    176 
    177       // initializes the source and destination buffers
    178       giet_fbf_cma_init_buf( &DISPLAY[0][0][0] ,
    179                              &DISPLAY[1][0][0] ,
    180                              status0 ,
    181                              status1 );
    182 
    183       // activates CMA channel
    184       giet_fbf_cma_start( HEIGHT * WIDTH );
    185 
    186       // initializes distributed heap
    187       unsigned int cx;
    188       unsigned int cy;
    189       for ( cx = 0 ; cx < x_size ; cx++ )
    190       {
    191          for ( cx = 0 ; cx < x_size ; cx++ )
    192          {
    193             heap_init( cx , cy );
    194          }
    195       }
    196 
    197       // initialises barrier
    198       sqt_barrier_init( &barrier , x_size , y_size , nprocs );
    199 
    200       PRINTF("\n[GAMEOFLIFE] P[0,0,0] completes initialisation at cycle %d\n"
    201              " nprocs = %d / nlines = %d\n",
    202              giet_proctime() , n_global_procs, HEIGHT );
    203 
    204       // activates all other processors
    205       init_ok = 1;
     226   giet_pthread_assert( (x_size <= 16) , "x_size no larger than 16" );
     227   giet_pthread_assert( (y_size <= 16) , "y_size no larger than 16" );
     228   giet_pthread_assert( (nprocs <=  4) , "nprocs no larger than 16" );
     229
     230   // compute number of threads and min number of lines per thread
     231   // extra is the number of threads that must process one extra line
     232   unsigned int total_procs = x_size * y_size * nprocs;
     233   unsigned int nthreads;
     234   unsigned int nlines;
     235   unsigned int extra;
     236   if ( total_procs > HEIGHT )
     237   {
     238      nthreads = HEIGHT;
     239      nlines   = 1;
     240      extra    = 0;
    206241   }
    207242   else
    208243   {
    209       while ( init_ok == 0 ) asm volatile("nop\n nop\n nop");
    210    }
    211 
    212    ///////////// world  initialization ( All processors )
    213 
    214    // All processors initialize WORLD[0]
    215    init_world( 0 , base_line , nb_line );
    216 
    217    // copy WORLD[0] to DISPLAY[0]
    218    copy_world( 0 , base_line , nb_line );
    219 
    220    // synchronise with other procs
    221    sqt_barrier_wait( &barrier );
    222 
    223    // P(0,0,0) displays DISPLAY[0]
    224    if ( proc_id == 0 ) giet_fbf_cma_display ( 0 );
    225 
    226    PRINTF("\n[GAMEOFLIFE] starts evolution at cycle %d\n", giet_proctime() );
    227    
    228    //////////// evolution : 2 steps per iteration
    229 
    230    for (i = 0 ; i < NB_ITERATION ; i++)
    231    {
    232       // compute WORLD[1] from WORLD[0]
    233       compute_new_gen( 1 , base_line , nb_line );
    234 
    235       // copy WORLD[1] to DISPLAY[1]
    236       copy_world( 1 , base_line , nb_line );
    237 
    238       // synchronise with other procs
    239       sqt_barrier_wait( &barrier );
    240 
    241       // P(0,0,0) displays DISPLAY[1]
    242       if ( proc_id == 0 ) giet_fbf_cma_display ( 1 );
    243    
    244       PRINTF(" - step %d\n", 2*i );
    245    
    246       // compute WORLD[0] from WORLD[1]
    247       compute_new_gen( 0 , base_line , nb_line );
    248 
    249       // copy WORLD[0] to DISPLAY[0]
    250       copy_world( 0 , base_line , nb_line );
    251 
    252       // synchronise with other procs
    253       sqt_barrier_wait( &barrier );
    254 
    255       // P(0,0,0) displays DISPLAY[0]
    256       if ( proc_id == 0 ) giet_fbf_cma_display ( 0 );
    257 
    258       PRINTF(" - step %d\n", 2*i + 1 );
    259    } // end main loop
    260 
    261    PRINTF("\n*** End of main at cycle %d ***\n", giet_proctime());
    262 
    263    giet_exit("Completed");
     244      nthreads = total_procs;
     245      nlines   = HEIGHT / total_procs;
     246      extra    = HEIGHT % total_procs; 
     247   }
     248
     249   // get a shared TTY
     250   giet_tty_alloc( 1 );
     251
     252   // get a Chained Buffer DMA channel
     253   giet_fbf_cma_alloc();
     254
     255   // initializes the source and destination buffers
     256   giet_fbf_cma_init_buf( &display[0][0][0] ,
     257                          &display[1][0][0] ,
     258                          status0 ,
     259                          status1 );
     260
     261   // activates CMA channel
     262   giet_fbf_cma_start( HEIGHT * WIDTH );
     263
     264   // initializes distributed heap
     265   unsigned int cx;
     266   unsigned int cy;
     267   for ( cx = 0 ; cx < x_size ; cx++ )
     268   {
     269      for ( cy = 0 ; cy < y_size ; cy++ )
     270      {
     271         heap_init( cx , cy );
     272      }
     273   }
     274
     275   // initialises barrier
     276   sqt_barrier_init( &barrier , x_size , y_size , nprocs );
     277
     278   giet_tty_printf("\n[GAMEOFLIFE] P[%d,%d,%d] completes initialisation at cycle %d\n"
     279                   " nprocs = %d / nlines = %d / nthreads = %d\n",
     280                   x, y, p, giet_proctime() , total_procs , HEIGHT , nthreads );
     281
     282   // compute arguments (index, nlines) for all threads
     283   unsigned int n;                   // thread index
     284   unsigned int index;               // first line index
     285   for ( n = 0 , index = 0 ; n < nthreads ; n++ )
     286   {
     287      if ( extra )
     288      {
     289         args[n].index = index;
     290         args[n].lines = nlines + 1;
     291         index         = index + nlines + 1;
     292      }
     293      else
     294      {
     295         args[n].index = index;
     296         args[n].lines = nlines;
     297         index         = index + nlines;
     298      }
     299#if VERBOSE     
     300giet_tty_printf("[GAMEOFLIFE] Thread %d : first = %d / nlines = %d\n",
     301                n , args[n].index , args[n].lines );
     302#endif
     303   }
     304
     305   // launch all other threads
     306   pthread_t  trdid;                 // unused because no pthread_join()
     307   for ( n = 1 ; n < nthreads ; n++ )
     308   {
     309      if ( giet_pthread_create( &trdid,
     310                                NULL,                  // no attribute
     311                                &execute,
     312                                &args[n] ) )
     313      {
     314          giet_tty_printf("\n[TRANSPOSE ERROR] creating thread %x\n", n );
     315          giet_pthread_exit( NULL );
     316      }
     317   }
     318
     319   // run execute function
     320   execute( &args[0] );
     321
     322   giet_pthread_exit( "completed" );
     323   
    264324} // end main()
     325
     326
    265327
    266328// Local Variables:
Note: See TracChangeset for help on using the changeset viewer.