Ignore:
Timestamp:
Feb 8, 2015, 9:20:45 PM (10 years ago)
Author:
alain
Message:

1) Introduce distributed barriers in the multi-threads applications
(classif) transpose, convol, sort, gameoflife)

2) Introducing support for architectures containing empty clusters
in the mapping of these multi-threaded applications.

3) Removing the "command line arguments" in the sort application
(replaced by the giet_procs_number() system call.

Location:
soft/giet_vm/applications/convol
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/convol/convol.py

    r457 r502  
    1212#  This include both the mapping of virtual segments on the clusters,
    1313#  and the mapping of tasks on processors.
     14#  There is one task per processor.
     15#  The mapping of virtual segments is the following:
     16#    - There is one shared data vseg in cluster[0][0]
     17#    - The code vsegs are replicated on all clusters containing processors.
     18#    - There is one heap vseg per cluster containing processors.
     19#    - The stacks vsegs are distibuted on all clusters containing processors.
    1420#  This mapping uses 5 platform parameters, (obtained from the "mapping" argument)
    15 - x_size    : number of clusters in a row
    16 - y_size    : number of clusters in a column
    17 - x_width   : number of bits coding x coordinate
    18 - y_width   : number of bits coding y coordinate
    19 - nprocs : number of processors per cluster
     21  - x_size    : number of clusters in a row
     22  - y_size    : number of clusters in a column
     23  - x_width   : number of bits coding x coordinate
     24  - y_width   : number of bits coding y coordinate
     25  - nprocs : number of processors per cluster
    2026####################################################################################
    2127
     
    4652   
    4753    # data vseg in cluster[0,0] : non local
    48     mapping.addVseg( vspace, 'conv_data', data_base , data_size, 'C_WU', vtype = 'ELF',
    49                      x = 0, y = 0, pseg = 'RAM', binpath = 'build/convol/convol.elf',
     54    mapping.addVseg( vspace, 'conv_data', data_base , data_size,
     55                     'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM',
     56                     binpath = 'build/convol/convol.elf',
    5057                     local = False )
    5158
     
    5360    for x in xrange (x_size):
    5461        for y in xrange (y_size):
    55             size       = code_size
    56             base       = code_base
    57             mapping.addVseg( vspace, 'conv_code_%d_%d' % (x,y), base, size,
    58                              'CXWU', vtype = 'ELF', x = x , y = y , pseg = 'RAM',
    59                              binpath = 'build/convol/convol.elf',
    60                              local = True )
     62            cluster_id = (x * y_size) + y
     63            if ( mapping.clusters[cluster_id].procs ):
     64                size       = code_size
     65                base       = code_base
     66
     67                mapping.addVseg( vspace, 'conv_code_%d_%d' % (x,y), base, size,
     68                                 'CXWU', vtype = 'ELF', x = x , y = y , pseg = 'RAM',
     69                                 binpath = 'build/convol/convol.elf',
     70                                 local = True )
    6171
    6272    # stack vsegs : local (one stack per processor)
    6373    for x in xrange (x_size):
    6474        for y in xrange (y_size):
    65             for p in xrange( nprocs ):
    66                 proc_id = (((x * y_size) + y) * nprocs) + p
    67                 size    = (stack_size / nprocs) & 0xFFFFF000
    68                 base    = stack_base + (proc_id * size)
    69                 mapping.addVseg( vspace, 'conv_stack_%d_%d_%d' % (x,y,p), base, size,
    70                                  'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM',
    71                                  local = True, big = True )
     75            cluster_id = (x * y_size) + y
     76            if ( mapping.clusters[cluster_id].procs ):
     77                for p in xrange( nprocs ):
     78                    proc_id = (((x * y_size) + y) * nprocs) + p
     79                    size    = (stack_size / nprocs) & 0xFFFFF000
     80                    base    = stack_base + (proc_id * size)
     81
     82                    mapping.addVseg( vspace, 'conv_stack_%d_%d_%d' % (x,y,p),
     83                                     base, size, 'C_WU', vtype = 'BUFFER',
     84                                     x = x , y = y , pseg = 'RAM',
     85                                     local = True, big = True )
    7286           
    73     # heap vsegs : distributed but non local (all heap vsegs can be accessed by all tasks)
     87    # heap vsegs : distributed but non local (any heap can be accessed by any task)
    7488    for x in xrange (x_size):
    7589        for y in xrange (y_size):
    7690            cluster_id = (x * y_size) + y
    77             size       = heap_size
    78             base       = heap_base + (cluster_id * size)
    79             mapping.addVseg( vspace, 'conv_heap_%d_%d' % (x,y), base, size,
    80                              'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM',
    81                              local = False, big = True )
     91            if ( mapping.clusters[cluster_id].procs ):
     92                size       = heap_size
     93                base       = heap_base + (cluster_id * size)
     94
     95                mapping.addVseg( vspace, 'conv_heap_%d_%d' % (x,y), base, size,
     96                                 'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM',
     97                                 local = False, big = True )
    8298
    8399    # distributed tasks : one task per processor
    84100    for x in xrange (x_size):
    85101        for y in xrange (y_size):
    86             for p in xrange( nprocs ):
    87                 trdid = (((x * y_size) + y) * nprocs) + p
    88                 mapping.addTask( vspace, 'conv_%d_%d_%d' % (x,y,p), trdid, x, y, p,
    89                                  'conv_stack_%d_%d_%d' % (x,y,p),
    90                                  'conv_heap_%d_%d' % (x,y), 0 )
     102            cluster_id = (x * y_size) + y
     103            if ( mapping.clusters[cluster_id].procs ):
     104                for p in xrange( nprocs ):
     105                    trdid = (((x * y_size) + y) * nprocs) + p
     106
     107                    mapping.addTask( vspace, 'conv_%d_%d_%d' % (x,y,p),
     108                                     trdid, x, y, p,
     109                                     'conv_stack_%d_%d_%d' % (x,y,p),
     110                                     'conv_heap_%d_%d' % (x,y), 0 )
    91111
    92112    # extend mapping name
     
    95115    return vspace  # useful for test
    96116           
    97 ################################ test ######################################################
     117################################ test ################################################
    98118
    99119if __name__ == '__main__':
  • soft/giet_vm/applications/convol/main.c

    r488 r502  
    1 ////////////////////////////////////////////////////////////////////////////////////////////
     1///////////////////////////////////////////////////////////////////////////////////////
    22// File   : main.c   (for convol application)
    33// Date   : june 2014
    44// author : Alain Greiner
    5 ////////////////////////////////////////////////////////////////////////////////////////////
     5///////////////////////////////////////////////////////////////////////////////////////
    66// This multi-threaded application application implements a 2D convolution product. 
    77// The convolution kernel is [201]*[35] pixels, but it can be factored in two
    88// independant line and column convolution products.
    99// It can run on a multi-processors, multi-clusters architecture, with one thread
    10 // per processor. It uses the he following hardware parameters, that must be defined
    11 // in the hard_config.h file:
    12 // - X_SIZE       : number of clusters in a row
    13 // - Y_SIZE       : number of clusters in a column
    14 // - NB_PROCS_MAX : number of processors per cluster
    15 // - FBUF_X_SIZE  : number of pixels per line in frame buffer
    16 // - FBUF_Y_SIZE  : number of lines  in frame buffer
     10// per processor.
    1711//
    1812// The (1024 * 1024) pixels image is read from a file (2 bytes per pixel).
    1913//
    20 // - The number of clusters containing processors must be a power of 2.
    21 // - The number of processors per cluster must be a power of 2.
    22 ////////////////////////////////////////////////////////////////////////////////////////////
    23 
    24 #include "hard_config.h"
     14// - number of clusters containing processors must be power of 2 no larger than 256.
     15// - number of processors per cluster must be power of 2 no larger than 8.
     16///////////////////////////////////////////////////////////////////////////////////////
     17
    2518#include "stdio.h"
    2619#include "stdlib.h"
    27 #include "barrier.h"
     20#include "user_barrier.h"
    2821#include "malloc.h"
    2922
    30 #define USE_SBT_BARRIER            1
     23#define USE_SQT_BARRIER            1
    3124#define VERBOSE                    0
    3225#define SUPER_VERBOSE              0
    3326
     27#define X_SIZE_MAX                 16
     28#define Y_SIZE_MAX                 16
     29#define PROCS_MAX                  8
     30#define CLUSTERS_MAX               (X_SIZE_MAX * Y_SIZE_MAX)
     31
    3432#define INITIAL_DISPLAY_ENABLE     0
    3533#define FINAL_DISPLAY_ENABLE       1
    3634
    37 #define NB_CLUSTERS                (X_SIZE * Y_SIZE)
    3835#define PIXEL_SIZE                 2
    3936#define NL                         1024
     
    5350// global instrumentation counters (cluster_id, lpid]
    5451
    55 unsigned int START[NB_CLUSTERS][NB_PROCS_MAX];
    56 unsigned int H_BEG[NB_CLUSTERS][NB_PROCS_MAX];
    57 unsigned int H_END[NB_CLUSTERS][NB_PROCS_MAX];
    58 unsigned int V_BEG[NB_CLUSTERS][NB_PROCS_MAX];
    59 unsigned int V_END[NB_CLUSTERS][NB_PROCS_MAX];
    60 unsigned int D_BEG[NB_CLUSTERS][NB_PROCS_MAX];
    61 unsigned int D_END[NB_CLUSTERS][NB_PROCS_MAX];
     52unsigned int START[CLUSTERS_MAX][PROCS_MAX];
     53unsigned int H_BEG[CLUSTERS_MAX][PROCS_MAX];
     54unsigned int H_END[CLUSTERS_MAX][PROCS_MAX];
     55unsigned int V_BEG[CLUSTERS_MAX][PROCS_MAX];
     56unsigned int V_END[CLUSTERS_MAX][PROCS_MAX];
     57unsigned int D_BEG[CLUSTERS_MAX][PROCS_MAX];
     58unsigned int D_END[CLUSTERS_MAX][PROCS_MAX];
    6259
    6360// global synchronization barrier
    6461
    65 #if USE_SBT_BARRIER
    66 giet_sbt_barrier_t  barrier;
     62#if USE_SQT_BARRIER
     63giet_sqt_barrier_t  barrier;
    6764#else
    6865giet_barrier_t      barrier;
     
    7471
    7572// global pointers on distributed buffers in all clusters
    76 unsigned short * GA[NB_CLUSTERS];
    77 int *            GB[NB_CLUSTERS];
    78 int *            GC[NB_CLUSTERS];
    79 int *            GD[NB_CLUSTERS];
    80 unsigned char *  GZ[NB_CLUSTERS];
     73unsigned short * GA[CLUSTERS_MAX];
     74int *            GB[CLUSTERS_MAX];
     75int *            GC[CLUSTERS_MAX];
     76int *            GD[CLUSTERS_MAX];
     77unsigned char *  GZ[CLUSTERS_MAX];
    8178
    8279///////////////////////////////////////////
     
    109106    int z; // vertical filter index for loops
    110107
     108    // plat-form parameters
     109    unsigned int x_size;             // number of clusters in a row
     110    unsigned int y_size;             // number of clusters in a column
     111    unsigned int nprocs;             // number of processors per cluster
     112   
     113    giet_procs_number( &x_size , &y_size , &nprocs );
     114
    111115    // processor identifiers
    112     unsigned int x;                                           // x coordinate
    113     unsigned int y;                                           // y coordinate
    114     unsigned int lpid;                                        // local proc/task id
     116    unsigned int x;                                         // x coordinate
     117    unsigned int y;                                         // y coordinate
     118    unsigned int lpid;                                      // local proc/task id
    115119    giet_proc_xyp( &x, &y, &lpid );
    116120
    117     int          file        = 0;                             // file descriptor
    118     unsigned int nprocs      = NB_PROCS_MAX;                  // procs per cluster
    119     unsigned int nclusters   = NB_CLUSTERS;                   // number of clusters
    120     unsigned int cluster_id  = (x * Y_SIZE) + y;              // continuous cluster index
    121     unsigned int task_id     = (cluster_id * nprocs) + lpid;  // continuous task index
    122     unsigned int ntasks      = nclusters * nprocs;            // number of tasks
    123     unsigned int frame_size  = FRAME_SIZE;                    // total size (bytes)
    124     unsigned int nblocks     = frame_size / 512;              // number of blocks per frame
    125 
    126     unsigned int lines_per_task     = NL / ntasks;            // lines per task
    127     unsigned int lines_per_cluster  = NL / nclusters;         // lines per cluster
    128     unsigned int pixels_per_task    = NP / ntasks;            // columns per task
    129     unsigned int pixels_per_cluster = NP / nclusters;         // columns per cluster
     121    int          file       = 0;                            // file descriptor
     122    unsigned int nclusters  = x_size * y_size;              // number of clusters
     123    unsigned int cluster_id = (x * y_size) + y;             // continuous cluster index
     124    unsigned int task_id    = (cluster_id * nprocs) + lpid; // continuous task index
     125    unsigned int ntasks     = nclusters * nprocs;           // number of tasks
     126    unsigned int frame_size = FRAME_SIZE;                   // total size (bytes)
     127    unsigned int nblocks    = frame_size / 512;             // number of blocks/frame
     128
     129    unsigned int lines_per_task     = NL / ntasks;          // lines per task
     130    unsigned int lines_per_cluster  = NL / nclusters;       // lines per cluster
     131    unsigned int pixels_per_task    = NP / ntasks;          // columns per task
     132    unsigned int pixels_per_cluster = NP / nclusters;       // columns per cluster
    130133
    131134    int first, last;
     
    140143     // parameters checking
    141144   
    142     if ( (NP != FBUF_X_SIZE) || (NL != FBUF_Y_SIZE) )
    143     {
    144         giet_exit("[TRANSPOSE ERROR] Frame buffer size does not fit image size");
    145     }
    146     if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4))
    147         giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2 or 4\n");
    148 
    149     if ((X_SIZE!=1) && (X_SIZE!=2) && (X_SIZE!=4) && (X_SIZE!=8) && (X_SIZE!=16))
    150         giet_exit( "[CONVOL ERROR] X_SIZE must be 1, 2, 4, 8, 16\n");
     145    if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4) && (nprocs != 8))
     146        giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2, 4 or 8\n");
     147
     148    if ((x_size!=1) && (x_size!=2) && (x_size!=4) && (x_size!=8) && (x_size!=16))
     149        giet_exit( "[CONVOL ERROR] x_size must be 1, 2, 4, 8, 16\n");
    151150       
    152     if ((Y_SIZE!=1) && (Y_SIZE!=2) && (Y_SIZE!=4) && (Y_SIZE!=8) && (Y_SIZE!=16))
    153         giet_exit( "[CONVOL ERROR] Y_SIZE must be 1, 2, 4, 8, 16\n");
     151    if ((y_size!=1) && (y_size!=2) && (y_size!=4) && (y_size!=8) && (y_size!=16))
     152        giet_exit( "[CONVOL ERROR] y_size must be 1, 2, 4, 8, 16\n");
    154153
    155154    if ( NL % nclusters != 0 )
    156         giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NL");
     155        giet_exit( "[CONVOL ERROR] CLUSTERS_MAX must be a divider of NL");
    157156
    158157    if ( NP % nclusters != 0 )
    159         giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NP");
     158        giet_exit( "[CONVOL ERROR] CLUSTERS_MAX must be a divider of NP");
    160159
    161160   
     
    166165    if ( (x==0) && (y==0) && (lpid==0) )
    167166    {
    168         // parameters checking
    169         if ( (NP != FBUF_X_SIZE) || (NL != FBUF_Y_SIZE) )
    170             giet_exit("[TRANSPOSE ERROR] Frame buffer size does not fit image size");
    171        
    172         if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4))
    173             giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2 or 4\n");
    174 
    175         if ((X_SIZE!=1) && (X_SIZE!=2) && (X_SIZE!=4) && (X_SIZE!=8) && (X_SIZE!=16))
    176             giet_exit( "[CONVOL ERROR] X_SIZE must be 1, 2, 4, 8, 16\n");
    177        
    178         if ((Y_SIZE!=1) && (Y_SIZE!=2) && (Y_SIZE!=4) && (Y_SIZE!=8) && (Y_SIZE!=16))
    179             giet_exit( "[CONVOL ERROR] Y_SIZE must be 1, 2, 4, 8, 16\n");
    180 
    181         if ( NL % nclusters != 0 )
    182             giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NL");
    183 
    184         if ( NP % nclusters != 0 )
    185             giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NP");
    186 
    187    
    188167        giet_shr_printf("\n[CONVOL] task[0,0,0] starts barrier init at cycle %d\n"
    189                         "- NB_CLUSTERS     = %d\n"
    190                         "- NB_PROCS_MAX    = %d\n"
    191                         "- NB_TASKS        = %d\n"
    192                         "- NB_BLOCKS       = %x\n",
     168                        "- CLUSTERS  = %d\n"
     169                        "- PROCS     = %d\n"
     170                        "- TASKS     = %d\n"
     171                        "- BLOCKS    = %d\n",
    193172                        giet_proctime(), nclusters, nprocs, ntasks, nblocks );
    194 #if USE_SBT_BARRIER
    195         sbt_barrier_init( &barrier, nclusters , nprocs );
     173#if USE_SQT_BARRIER
     174        sqt_barrier_init( &barrier, x_size , y_size , nprocs );
    196175#else
    197176        barrier_init( &barrier, ntasks );
     
    216195
    217196#if VERBOSE
    218 giet_shr_printf( "\n[CONVOL] task[%d,%d,%d] enters malloc at cycle %d\n", x,y,lpid, date );
     197giet_shr_printf( "\n[CONVOL] task[%d,%d,%d] enters malloc at cycle %d\n",
     198                 x,y,lpid, date );
    219199#endif
    220200
     
    242222
    243223    ///////////////////////////////
    244     #if USE_SBT_BARRIER
    245     sbt_barrier_wait( &barrier );
     224    #if USE_SQT_BARRIER
     225    sqt_barrier_wait( &barrier );
    246226    #else
    247227    barrier_wait( &barrier );
     
    253233    ///////////////////////////////////////////////////////////////////
    254234
    255     unsigned short * A[NB_CLUSTERS];
    256     int *            B[NB_CLUSTERS];
    257     int *            C[NB_CLUSTERS];
    258     int *            D[NB_CLUSTERS];
    259     unsigned char *  Z[NB_CLUSTERS];
     235    unsigned short * A[CLUSTERS_MAX];
     236    int            * B[CLUSTERS_MAX];
     237    int            * C[CLUSTERS_MAX];
     238    int            * D[CLUSTERS_MAX];
     239    unsigned char  * Z[CLUSTERS_MAX];
    260240
    261241    for (c = 0; c < nclusters; c++)
     
    283263                         " at cycle %d\n", giet_proctime() );
    284264
    285         for ( c = 0 ; c < NB_CLUSTERS ; c++ )
     265        for ( c = 0 ; c < nclusters ; c++ )
    286266        {
    287267            giet_shr_printf( "\n[CONVOL] task[0,0,0] starts load "
     
    341321
    342322        ////////////////////////////
    343         #if USE_SBT_BARRIER
    344         sbt_barrier_wait( &barrier );
     323        #if USE_SQT_BARRIER
     324        sqt_barrier_wait( &barrier );
    345325        #else
    346326        barrier_wait( &barrier );
     
    447427
    448428    /////////////////////////////
    449     #if USE_SBT_BARRIER
    450     sbt_barrier_wait( &barrier );
     429    #if USE_SQT_BARRIER
     430    sqt_barrier_wait( &barrier );
    451431    #else
    452432    barrier_wait( &barrier );
     
    567547
    568548    ////////////////////////////
    569     #if USE_SBT_BARRIER
    570     sbt_barrier_wait( &barrier );
     549    #if USE_SQT_BARRIER
     550    sqt_barrier_wait( &barrier );
    571551    #else
    572552    barrier_wait( &barrier );
     
    626606     
    627607    //////////////////////////////
    628     #if USE_SBT_BARRIER
    629     sbt_barrier_wait( &barrier );
     608    #if USE_SQT_BARRIER
     609    sqt_barrier_wait( &barrier );
    630610    #else
    631611    barrier_wait( &barrier );
Note: See TracChangeset for help on using the changeset viewer.