Ignore:
Timestamp:
Feb 8, 2015, 9:20:45 PM (9 years ago)
Author:
alain
Message:

1) Introduce distributed barriers in the multi-threads applications
(classif) transpose, convol, sort, gameoflife)

2) Introducing support for architectures containing empty clusters
in the mapping of these multi-threaded applications.

3) Removing the "command line arguments" in the sort application
(replaced by the giet_procs_number() system call.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/convol/main.c

    r488 r502  
    1 ////////////////////////////////////////////////////////////////////////////////////////////
     1///////////////////////////////////////////////////////////////////////////////////////
    22// File   : main.c   (for convol application)
    33// Date   : june 2014
    44// author : Alain Greiner
    5 ////////////////////////////////////////////////////////////////////////////////////////////
     5///////////////////////////////////////////////////////////////////////////////////////
    66// This multi-threaded application application implements a 2D convolution product. 
    77// The convolution kernel is [201]*[35] pixels, but it can be factored in two
    88// independant line and column convolution products.
    99// It can run on a multi-processors, multi-clusters architecture, with one thread
    10 // per processor. It uses the he following hardware parameters, that must be defined
    11 // in the hard_config.h file:
    12 // - X_SIZE       : number of clusters in a row
    13 // - Y_SIZE       : number of clusters in a column
    14 // - NB_PROCS_MAX : number of processors per cluster
    15 // - FBUF_X_SIZE  : number of pixels per line in frame buffer
    16 // - FBUF_Y_SIZE  : number of lines  in frame buffer
     10// per processor.
    1711//
    1812// The (1024 * 1024) pixels image is read from a file (2 bytes per pixel).
    1913//
    20 // - The number of clusters containing processors must be a power of 2.
    21 // - The number of processors per cluster must be a power of 2.
    22 ////////////////////////////////////////////////////////////////////////////////////////////
    23 
    24 #include "hard_config.h"
     14// - number of clusters containing processors must be power of 2 no larger than 256.
     15// - number of processors per cluster must be power of 2 no larger than 8.
     16///////////////////////////////////////////////////////////////////////////////////////
     17
    2518#include "stdio.h"
    2619#include "stdlib.h"
    27 #include "barrier.h"
     20#include "user_barrier.h"
    2821#include "malloc.h"
    2922
    30 #define USE_SBT_BARRIER            1
     23#define USE_SQT_BARRIER            1
    3124#define VERBOSE                    0
    3225#define SUPER_VERBOSE              0
    3326
     27#define X_SIZE_MAX                 16
     28#define Y_SIZE_MAX                 16
     29#define PROCS_MAX                  8
     30#define CLUSTERS_MAX               (X_SIZE_MAX * Y_SIZE_MAX)
     31
    3432#define INITIAL_DISPLAY_ENABLE     0
    3533#define FINAL_DISPLAY_ENABLE       1
    3634
    37 #define NB_CLUSTERS                (X_SIZE * Y_SIZE)
    3835#define PIXEL_SIZE                 2
    3936#define NL                         1024
     
    5350// global instrumentation counters (cluster_id, lpid]
    5451
    55 unsigned int START[NB_CLUSTERS][NB_PROCS_MAX];
    56 unsigned int H_BEG[NB_CLUSTERS][NB_PROCS_MAX];
    57 unsigned int H_END[NB_CLUSTERS][NB_PROCS_MAX];
    58 unsigned int V_BEG[NB_CLUSTERS][NB_PROCS_MAX];
    59 unsigned int V_END[NB_CLUSTERS][NB_PROCS_MAX];
    60 unsigned int D_BEG[NB_CLUSTERS][NB_PROCS_MAX];
    61 unsigned int D_END[NB_CLUSTERS][NB_PROCS_MAX];
     52unsigned int START[CLUSTERS_MAX][PROCS_MAX];
     53unsigned int H_BEG[CLUSTERS_MAX][PROCS_MAX];
     54unsigned int H_END[CLUSTERS_MAX][PROCS_MAX];
     55unsigned int V_BEG[CLUSTERS_MAX][PROCS_MAX];
     56unsigned int V_END[CLUSTERS_MAX][PROCS_MAX];
     57unsigned int D_BEG[CLUSTERS_MAX][PROCS_MAX];
     58unsigned int D_END[CLUSTERS_MAX][PROCS_MAX];
    6259
    6360// global synchronization barrier
    6461
    65 #if USE_SBT_BARRIER
    66 giet_sbt_barrier_t  barrier;
     62#if USE_SQT_BARRIER
     63giet_sqt_barrier_t  barrier;
    6764#else
    6865giet_barrier_t      barrier;
     
    7471
    7572// global pointers on distributed buffers in all clusters
    76 unsigned short * GA[NB_CLUSTERS];
    77 int *            GB[NB_CLUSTERS];
    78 int *            GC[NB_CLUSTERS];
    79 int *            GD[NB_CLUSTERS];
    80 unsigned char *  GZ[NB_CLUSTERS];
     73unsigned short * GA[CLUSTERS_MAX];
     74int *            GB[CLUSTERS_MAX];
     75int *            GC[CLUSTERS_MAX];
     76int *            GD[CLUSTERS_MAX];
     77unsigned char *  GZ[CLUSTERS_MAX];
    8178
    8279///////////////////////////////////////////
     
    109106    int z; // vertical filter index for loops
    110107
     108    // plat-form parameters
     109    unsigned int x_size;             // number of clusters in a row
     110    unsigned int y_size;             // number of clusters in a column
     111    unsigned int nprocs;             // number of processors per cluster
     112   
     113    giet_procs_number( &x_size , &y_size , &nprocs );
     114
    111115    // processor identifiers
    112     unsigned int x;                                           // x coordinate
    113     unsigned int y;                                           // y coordinate
    114     unsigned int lpid;                                        // local proc/task id
     116    unsigned int x;                                         // x coordinate
     117    unsigned int y;                                         // y coordinate
     118    unsigned int lpid;                                      // local proc/task id
    115119    giet_proc_xyp( &x, &y, &lpid );
    116120
    117     int          file        = 0;                             // file descriptor
    118     unsigned int nprocs      = NB_PROCS_MAX;                  // procs per cluster
    119     unsigned int nclusters   = NB_CLUSTERS;                   // number of clusters
    120     unsigned int cluster_id  = (x * Y_SIZE) + y;              // continuous cluster index
    121     unsigned int task_id     = (cluster_id * nprocs) + lpid;  // continuous task index
    122     unsigned int ntasks      = nclusters * nprocs;            // number of tasks
    123     unsigned int frame_size  = FRAME_SIZE;                    // total size (bytes)
    124     unsigned int nblocks     = frame_size / 512;              // number of blocks per frame
    125 
    126     unsigned int lines_per_task     = NL / ntasks;            // lines per task
    127     unsigned int lines_per_cluster  = NL / nclusters;         // lines per cluster
    128     unsigned int pixels_per_task    = NP / ntasks;            // columns per task
    129     unsigned int pixels_per_cluster = NP / nclusters;         // columns per cluster
     121    int          file       = 0;                            // file descriptor
     122    unsigned int nclusters  = x_size * y_size;              // number of clusters
     123    unsigned int cluster_id = (x * y_size) + y;             // continuous cluster index
     124    unsigned int task_id    = (cluster_id * nprocs) + lpid; // continuous task index
     125    unsigned int ntasks     = nclusters * nprocs;           // number of tasks
     126    unsigned int frame_size = FRAME_SIZE;                   // total size (bytes)
     127    unsigned int nblocks    = frame_size / 512;             // number of blocks/frame
     128
     129    unsigned int lines_per_task     = NL / ntasks;          // lines per task
     130    unsigned int lines_per_cluster  = NL / nclusters;       // lines per cluster
     131    unsigned int pixels_per_task    = NP / ntasks;          // columns per task
     132    unsigned int pixels_per_cluster = NP / nclusters;       // columns per cluster
    130133
    131134    int first, last;
     
    140143     // parameters checking
    141144   
    142     if ( (NP != FBUF_X_SIZE) || (NL != FBUF_Y_SIZE) )
    143     {
    144         giet_exit("[TRANSPOSE ERROR] Frame buffer size does not fit image size");
    145     }
    146     if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4))
    147         giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2 or 4\n");
    148 
    149     if ((X_SIZE!=1) && (X_SIZE!=2) && (X_SIZE!=4) && (X_SIZE!=8) && (X_SIZE!=16))
    150         giet_exit( "[CONVOL ERROR] X_SIZE must be 1, 2, 4, 8, 16\n");
     145    if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4) && (nprocs != 8))
     146        giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2, 4 or 8\n");
     147
     148    if ((x_size!=1) && (x_size!=2) && (x_size!=4) && (x_size!=8) && (x_size!=16))
     149        giet_exit( "[CONVOL ERROR] x_size must be 1, 2, 4, 8, 16\n");
    151150       
    152     if ((Y_SIZE!=1) && (Y_SIZE!=2) && (Y_SIZE!=4) && (Y_SIZE!=8) && (Y_SIZE!=16))
    153         giet_exit( "[CONVOL ERROR] Y_SIZE must be 1, 2, 4, 8, 16\n");
     151    if ((y_size!=1) && (y_size!=2) && (y_size!=4) && (y_size!=8) && (y_size!=16))
     152        giet_exit( "[CONVOL ERROR] y_size must be 1, 2, 4, 8, 16\n");
    154153
    155154    if ( NL % nclusters != 0 )
    156         giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NL");
     155        giet_exit( "[CONVOL ERROR] CLUSTERS_MAX must be a divider of NL");
    157156
    158157    if ( NP % nclusters != 0 )
    159         giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NP");
     158        giet_exit( "[CONVOL ERROR] CLUSTERS_MAX must be a divider of NP");
    160159
    161160   
     
    166165    if ( (x==0) && (y==0) && (lpid==0) )
    167166    {
    168         // parameters checking
    169         if ( (NP != FBUF_X_SIZE) || (NL != FBUF_Y_SIZE) )
    170             giet_exit("[TRANSPOSE ERROR] Frame buffer size does not fit image size");
    171        
    172         if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4))
    173             giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2 or 4\n");
    174 
    175         if ((X_SIZE!=1) && (X_SIZE!=2) && (X_SIZE!=4) && (X_SIZE!=8) && (X_SIZE!=16))
    176             giet_exit( "[CONVOL ERROR] X_SIZE must be 1, 2, 4, 8, 16\n");
    177        
    178         if ((Y_SIZE!=1) && (Y_SIZE!=2) && (Y_SIZE!=4) && (Y_SIZE!=8) && (Y_SIZE!=16))
    179             giet_exit( "[CONVOL ERROR] Y_SIZE must be 1, 2, 4, 8, 16\n");
    180 
    181         if ( NL % nclusters != 0 )
    182             giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NL");
    183 
    184         if ( NP % nclusters != 0 )
    185             giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NP");
    186 
    187    
    188167        giet_shr_printf("\n[CONVOL] task[0,0,0] starts barrier init at cycle %d\n"
    189                         "- NB_CLUSTERS     = %d\n"
    190                         "- NB_PROCS_MAX    = %d\n"
    191                         "- NB_TASKS        = %d\n"
    192                         "- NB_BLOCKS       = %x\n",
     168                        "- CLUSTERS  = %d\n"
     169                        "- PROCS     = %d\n"
     170                        "- TASKS     = %d\n"
     171                        "- BLOCKS    = %d\n",
    193172                        giet_proctime(), nclusters, nprocs, ntasks, nblocks );
    194 #if USE_SBT_BARRIER
    195         sbt_barrier_init( &barrier, nclusters , nprocs );
     173#if USE_SQT_BARRIER
     174        sqt_barrier_init( &barrier, x_size , y_size , nprocs );
    196175#else
    197176        barrier_init( &barrier, ntasks );
     
    216195
    217196#if VERBOSE
    218 giet_shr_printf( "\n[CONVOL] task[%d,%d,%d] enters malloc at cycle %d\n", x,y,lpid, date );
     197giet_shr_printf( "\n[CONVOL] task[%d,%d,%d] enters malloc at cycle %d\n",
     198                 x,y,lpid, date );
    219199#endif
    220200
     
    242222
    243223    ///////////////////////////////
    244     #if USE_SBT_BARRIER
    245     sbt_barrier_wait( &barrier );
     224    #if USE_SQT_BARRIER
     225    sqt_barrier_wait( &barrier );
    246226    #else
    247227    barrier_wait( &barrier );
     
    253233    ///////////////////////////////////////////////////////////////////
    254234
    255     unsigned short * A[NB_CLUSTERS];
    256     int *            B[NB_CLUSTERS];
    257     int *            C[NB_CLUSTERS];
    258     int *            D[NB_CLUSTERS];
    259     unsigned char *  Z[NB_CLUSTERS];
     235    unsigned short * A[CLUSTERS_MAX];
     236    int            * B[CLUSTERS_MAX];
     237    int            * C[CLUSTERS_MAX];
     238    int            * D[CLUSTERS_MAX];
     239    unsigned char  * Z[CLUSTERS_MAX];
    260240
    261241    for (c = 0; c < nclusters; c++)
     
    283263                         " at cycle %d\n", giet_proctime() );
    284264
    285         for ( c = 0 ; c < NB_CLUSTERS ; c++ )
     265        for ( c = 0 ; c < nclusters ; c++ )
    286266        {
    287267            giet_shr_printf( "\n[CONVOL] task[0,0,0] starts load "
     
    341321
    342322        ////////////////////////////
    343         #if USE_SBT_BARRIER
    344         sbt_barrier_wait( &barrier );
     323        #if USE_SQT_BARRIER
     324        sqt_barrier_wait( &barrier );
    345325        #else
    346326        barrier_wait( &barrier );
     
    447427
    448428    /////////////////////////////
    449     #if USE_SBT_BARRIER
    450     sbt_barrier_wait( &barrier );
     429    #if USE_SQT_BARRIER
     430    sqt_barrier_wait( &barrier );
    451431    #else
    452432    barrier_wait( &barrier );
     
    567547
    568548    ////////////////////////////
    569     #if USE_SBT_BARRIER
    570     sbt_barrier_wait( &barrier );
     549    #if USE_SQT_BARRIER
     550    sqt_barrier_wait( &barrier );
    571551    #else
    572552    barrier_wait( &barrier );
     
    626606     
    627607    //////////////////////////////
    628     #if USE_SBT_BARRIER
    629     sbt_barrier_wait( &barrier );
     608    #if USE_SQT_BARRIER
     609    sqt_barrier_wait( &barrier );
    630610    #else
    631611    barrier_wait( &barrier );
Note: See TracChangeset for help on using the changeset viewer.