Ignore:
Timestamp:
Jul 8, 2015, 3:57:15 PM (9 years ago)
Author:
alain
Message:

Modify all applications to support two new rules:
1) introduce a local Makefile for each application.
2) change "application.elf" name to "application/appli.elf" name in the application.py" file.
Introduce the shell application.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/transpose/main.c

    r574 r589  
    44// author : Alain Greiner
    55///////////////////////////////////////////////////////////////////////////////////////
    6 // This multi-threaded application makes a transpose for a NN*NN pixels
    7 // sequence of images.
     6// This multi-threaded application makes a transpose for a NN*NN pixels image.
    87// It can run on a multi-processors, multi-clusters architecture, with one thread
    98// per processor.
    109//
    11 // The image sequence is read from a file (one byte per pixel).
     10// The image is read from a file (one byte per pixel), transposed and
     11// saved in a second file. Then the transposed image is read from the second file,
     12// transposed again and saved in a third file.
     13//
    1214// The input and output buffers containing the image are distributed in all clusters.
    1315//
    14 // - The image size NN must fit the frame buffer size: 128 bytes
     16// - The image size NN must fit the frame buffer size.
    1517// - The block size in block device must be 512 bytes.
    16 // - The number of clusters  must be a power of 2 no larger than 32
    17 // - The number of processors per cluster must be a power of 2 no larger than 4
     18// - The number of clusters  must be a power of 2 no larger than 64.
     19// - The number of processors per cluster must be a power of 2 no larger than 4.
    1820//
    1921// For each image the application makes a self test (checksum for each line).
     
    2527#include "malloc.h"
    2628
    27 #define BLOCK_SIZE          512                 // block size on disk
    28 #define CLUSTERS_MAX        32                  // max number of clusters
    29 #define PROCS_MAX           4                   // max number of processors per cluster
    30 #define NN                  256                 // image size : nlines = npixels
    31 #define NB_IMAGES           1                   // number of images to be handled
    32 #define FILE_PATHNAME       "misc/lena.raw"     // pathname on virtual disk
    33 #define INSTRUMENTATION_OK  0                   // display statistics on TTY when non zero
     29#define BLOCK_SIZE            512                         // block size on disk
     30#define X_MAX                 8                           // max number of clusters in row
     31#define Y_MAX                 8                           // max number of clusters in column
     32#define PROCS_MAX             4                           // max number of procs per cluster
     33#define CLUSTER_MAX           (X_MAX * Y_MAX)             // max number of clusters
     34#define NN                    256                         // image size : nlines = npixels
     35#define INITIAL_FILE_PATH     "misc/lena.raw"             // pathname on virtual disk
     36#define TRANSPOSED_FILE_PATH  "/home/lena_transposed.raw" // pathname on virtual disk
     37#define RESTORED_FILE_PATH    "/home/lena_restored.raw"   // pathname on virtual disk
     38#define INSTRUMENTATION_OK    1                           // display statistics on TTY
    3439
    3540///////////////////////////////////////////////////////
     
    3843
    3944// instrumentation counters for each processor in each cluster
    40 unsigned int LOAD_START[CLUSTERS_MAX][PROCS_MAX];
    41 unsigned int LOAD_END  [CLUSTERS_MAX][PROCS_MAX];
    42 unsigned int TRSP_START[CLUSTERS_MAX][PROCS_MAX];
    43 unsigned int TRSP_END  [CLUSTERS_MAX][PROCS_MAX];
    44 unsigned int DISP_START[CLUSTERS_MAX][PROCS_MAX];
    45 unsigned int DISP_END  [CLUSTERS_MAX][PROCS_MAX];
     45unsigned int LOAD_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     46unsigned int LOAD_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     47unsigned int TRSP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     48unsigned int TRSP_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     49unsigned int DISP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     50unsigned int DISP_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     51unsigned int STOR_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
     52unsigned int STOR_END  [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}};
    4653
    4754// arrays of pointers on distributed buffers
    4855// one input buffer & one output buffer per cluster
    49 unsigned char*  buf_in [CLUSTERS_MAX];
    50 unsigned char*  buf_out[CLUSTERS_MAX];
     56unsigned char*  buf_in [CLUSTER_MAX];
     57unsigned char*  buf_out[CLUSTER_MAX];
    5158
    5259// checksum variables
     
    5764giet_sqt_barrier_t barrier;
    5865
    59 volatile unsigned int init_ok = 0;
     66volatile unsigned int global_init_ok = 0;
     67volatile unsigned int local_init_ok[X_MAX][Y_MAX] = {{ 0 }};
    6068
    6169//////////////////////////////////////////
     
    6371//////////////////////////////////////////
    6472{
    65 
    6673    unsigned int l;                  // line index for loops
    6774    unsigned int p;                  // pixel index for loops
    68     unsigned int c;                  // cluster index for loops
    6975
    7076    // processor identifiers
     
    8288    giet_procs_number( &x_size , &y_size , &nprocs );
    8389
    84     unsigned int nclusters  = x_size * y_size;               // number of clusters
    85     unsigned int ntasks     = x_size * y_size * nprocs;      // number of tasks
    86     unsigned int npixels    = NN * NN;                       // pixels per image
    87     unsigned int nblocks    = npixels / BLOCK_SIZE;          // blocks per image
    88     unsigned int image      = 0;                             // image counter
    89     int          file       = 0;                             // file descriptor
    90     unsigned int cluster_id = (x * y_size) + y;              // "continuous" index   
    91     unsigned int task_id    = (cluster_id * nprocs) + lpid;  // "continuous" task index
    92 
    93     // Processor [0,0,0] makes initialisation
    94     // It includes parameters checking, barrier initialization,
    95     // distributed buffers allocation, and file open
     90    unsigned int nclusters     = x_size * y_size;               // number of clusters
     91    unsigned int ntasks        = x_size * y_size * nprocs;      // number of tasks
     92    unsigned int npixels       = NN * NN;                       // pixels per image
     93    unsigned int iteration     = 0;                             // iiteration iter
     94    int          fd_initial    = 0;                             // initial file descriptor
     95    int          fd_transposed = 0;                             // transposed file descriptor
     96    int          fd_restored   = 0;                             // restored file descriptor
     97    unsigned int cluster_id    = (x * y_size) + y;              // "continuous" index   
     98    unsigned int task_id       = (cluster_id * nprocs) + lpid;  // "continuous" task index
     99
     100
     101    ///////////////////////////////////////////////////////////////////////
     102    // Processor [0,0,0] makes global initialisation
     103    // It includes parameters checking, heap and barrier initialization.
     104    // Others processors wait initialisation completion
     105    ///////////////////////////////////////////////////////////////////////
     106
    96107    if ( (x==0) && (y==0) && (lpid==0) )
    97108    {
     
    101112        }
    102113        if ((nclusters != 1) && (nclusters != 2) && (nclusters != 4) &&
    103             (nclusters != 8) && (nclusters != 16) && (nclusters != 32) )
    104         {
    105             giet_exit("[TRANSPOSE ERROR] number of clusters must be 1,2,4,8,16,32");
     114            (nclusters != 8) && (nclusters != 16) && (nclusters != 32) && (nclusters != 64) )
     115        {
     116            giet_exit("[TRANSPOSE ERROR] number of clusters must be 1,2,4,8,16,32,64");
    106117        }
    107118        if ( ntasks > NN )
     
    110121        }
    111122
    112         // Distributed buffers allocation
    113         // The buffers containing one image are distributed in the user
    114         // heap (one buf_in and one buf_out per cluster).
    115         // Each buffer contains (NN*NN / nclusters) bytes.
    116         for ( c = 0 ; c < nclusters ; c++ )
    117         {
    118             unsigned int rx = c / y_size;
    119             unsigned int ry = c % y_size;
    120 
    121             heap_init( rx, ry );
    122             buf_in[c]  = remote_malloc( npixels/nclusters, rx, ry );
    123             buf_out[c] = remote_malloc( npixels/nclusters, rx, ry );
    124 
    125             giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes buffer allocation"
    126                             " for cluster[%d,%d] at cycle %d\n"
    127                             " - buf_in  = %x\n"
    128                             " - buf_out = %x\n",
    129                             rx, ry, giet_proctime(),
    130                             (unsigned int)buf_in[c],
    131                             (unsigned int)buf_out[c] );
    132         }
    133 
    134         // Barrier initialisation
     123        // distributed heap initialisation
     124        unsigned int cx , cy;
     125        for ( cx = 0 ; cx < x_size ; cx++ )
     126        {
     127            for ( cy = 0 ; cy < y_size ; cy++ )
     128            {
     129                heap_init( cx , cy );
     130            }
     131        }
     132
     133        // barrier initialisation
    135134        sqt_barrier_init( &barrier, x_size , y_size , nprocs );
    136135
    137         giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes barrier init at cycle %d\n",
     136        giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes heap & barrier init at cycle %d\n",
    138137                        giet_proctime() );
    139138
    140         // open file containing images
    141         file = giet_fat_open( FILE_PATHNAME , 0 );
    142 
    143         if (file < 0)
    144         {
    145             giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d]"
    146                             " cannot open file %s",
    147                             x , y , lpid , FILE_PATHNAME );
    148             giet_exit(" open() failure");
    149         }
    150         else
    151         {
    152             giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file misc/images.raw\n");
    153         }
    154         init_ok = 1;
     139        // diplay disk content
     140        giet_fat_list( "/" );
     141        giet_fat_list( "/misc" );
     142        giet_fat_list( "/home" );
     143        giet_fat_list( "/build" );
     144        giet_fat_list( "/build/kernel" );
     145        giet_fat_list( "/build/transpose" );
     146
     147        global_init_ok = 1;
    155148    }
    156     else   // others processors wait initialisation completion
     149    else 
    157150    {
    158         while ( init_ok == 0 );
     151        while ( global_init_ok == 0 );
    159152    }
    160153   
    161     /////////////////////////
    162     // Main loop (on images)
    163     while (image < NB_IMAGES)
     154    ///////////////////////////////////////////////////////////////////////
     155    // In each cluster, only task running on processor[x,y,0] allocates
     156    // the local buffers containing the images in the distributed heap
     157    // (one buf_in and one buf_out per cluster).
     158    // Other processors in cluster wait completion.
     159    ///////////////////////////////////////////////////////////////////////
     160
     161    if ( lpid == 0 )
    164162    {
    165         // pseudo parallel load from disk to buf_in buffer : nblocks/nclusters blocks
    166         // only task running on processor with (lpid == 0) does it
    167 
    168         LOAD_START[cluster_id][lpid] = giet_proctime();
     163        buf_in[cluster_id]  = remote_malloc( npixels/nclusters, x, y );
     164        buf_out[cluster_id] = remote_malloc( npixels/nclusters, x, y );
     165
     166        if ( (x==0) && (y==0) )
     167        giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes buffer allocation"
     168                        " for cluster[%d,%d] at cycle %d\n"
     169                        " - buf_in  = %x\n"
     170                        " - buf_out = %x\n",
     171                        x, y, lpid, x, y, giet_proctime(),
     172                        (unsigned int)buf_in[cluster_id], (unsigned int)buf_out[cluster_id] );
     173
     174        ///////////////////////////////////////////////////////////////////////
     175        // In each cluster, only task running on procesor[x,y,0] open the
     176        // three private file descriptors for the three files
     177        ///////////////////////////////////////////////////////////////////////
     178
     179        // open initial file
     180        fd_initial = giet_fat_open( INITIAL_FILE_PATH , O_RDONLY );  // read_only
     181        if ( fd_initial < 0 )
     182        {
     183            giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n",
     184                            x , y , lpid , INITIAL_FILE_PATH );
     185            giet_exit(" open() failure");
     186        }
     187        else if ( (x==0) && (y==0) && (lpid==0) )
     188        {
     189            giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n",
     190                            INITIAL_FILE_PATH , fd_initial );
     191        }
     192
     193        // open transposed file
     194        fd_transposed = giet_fat_open( TRANSPOSED_FILE_PATH , O_CREATE );   // create if required
     195        if ( fd_transposed < 0 )
     196        {
     197            giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n",
     198                            x , y , lpid , TRANSPOSED_FILE_PATH );
     199            giet_exit(" open() failure");
     200        }
     201        else if ( (x==0) && (y==0) && (lpid==0) )
     202        {
     203            giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n",
     204                            TRANSPOSED_FILE_PATH , fd_transposed );
     205        }
     206
     207        // open restored file
     208        fd_restored = giet_fat_open( RESTORED_FILE_PATH , O_CREATE );   // create if required
     209        if ( fd_restored < 0 )
     210        {
     211            giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n",
     212                            x , y , lpid , RESTORED_FILE_PATH );
     213            giet_exit(" open() failure");
     214        }
     215        else if ( (x==0) && (y==0) && (lpid==0) )
     216        {
     217            giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n",
     218                            RESTORED_FILE_PATH , fd_restored );
     219        }
     220
     221        local_init_ok[x][y] = 1;
     222    }
     223    else
     224    {
     225        while( local_init_ok[x][y] == 0 );
     226    }
     227
     228    ///////////////////////////////////////////////////////////////////////
     229    // Main loop / two iterations:
     230    // - first makes  initial    => transposed
     231    // - second makes transposed => restored
     232    // All processors execute this main loop.
     233    ///////////////////////////////////////////////////////////////////////
     234
     235    unsigned int fd_in  = fd_initial;
     236    unsigned int fd_out = fd_transposed;
     237
     238    while (iteration < 2)
     239    {
     240        ///////////////////////////////////////////////////////////////////////
     241        // pseudo parallel load from disk to buf_in buffers: npixels/nclusters
     242        // only task running on processor(x,y,0) does it
     243        ///////////////////////////////////////////////////////////////////////
     244
     245        LOAD_START[x][y][lpid] = giet_proctime();
    169246
    170247        if (lpid == 0)
    171248        {
    172             giet_fat_read( file,
    173                            buf_in[cluster_id],
    174                            (nblocks / nclusters),
    175                            ((image*nblocks) + ((nblocks*cluster_id)/nclusters)) );
     249            unsigned int offset = ((npixels*cluster_id)/nclusters);
     250            if ( giet_fat_lseek( fd_in,
     251                                 offset,
     252                                 SEEK_SET ) != offset )
     253            {
     254                giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot seek fd = %d\n",
     255                                x , y , lpid , fd_in );
     256                giet_exit(" seek() failure");
     257            }
     258
     259            unsigned int pixels = npixels / nclusters;
     260            if ( giet_fat_read( fd_in,
     261                                buf_in[cluster_id],
     262                                pixels ) != pixels )
     263            {
     264                giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot read fd = %d\n",
     265                                x , y , lpid , fd_in );
     266                giet_exit(" read() failure");
     267            }
    176268
    177269            if ( (x==0) && (y==0) )
    178270            giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes load"
    179                             "  for image %d at cycle %d\n",
    180                             x, y, lpid, image, giet_proctime() );
    181         }
    182 
    183         LOAD_END[cluster_id][lpid] = giet_proctime();
     271                            "  for iteration %d at cycle %d\n",
     272                            x, y, lpid, iteration, giet_proctime() );
     273        }
     274
     275        LOAD_END[x][y][lpid] = giet_proctime();
    184276
    185277        /////////////////////////////
    186278        sqt_barrier_wait( &barrier );
    187279
     280        ///////////////////////////////////////////////////////////////////////
    188281        // parallel transpose from buf_in to buf_out
    189282        // each task makes the transposition for nlt lines (nlt = NN/ntasks)
    190283        // from line [task_id*nlt] to line [(task_id + 1)*nlt - 1]
    191284        // (p,l) are the absolute pixel coordinates in the source image
    192 
    193 
    194         TRSP_START[cluster_id][lpid] = giet_proctime();
     285        ///////////////////////////////////////////////////////////////////////
     286
     287        TRSP_START[x][y][lpid] = giet_proctime();
    195288
    196289        unsigned int nlt   = NN / ntasks;      // number of lines per task
     
    233326            if ( (x==0) && (y==0) )
    234327            giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] completes transpose"
    235                             " for image %d at cycle %d\n",
    236                             x, y, image, giet_proctime() );
    237 
    238         }
    239         TRSP_END[cluster_id][lpid] = giet_proctime();
     328                            " for iteration %d at cycle %d\n",
     329                            x, y, iteration, giet_proctime() );
     330
     331        }
     332        TRSP_END[x][y][lpid] = giet_proctime();
    240333
    241334        /////////////////////////////
    242335        sqt_barrier_wait( &barrier );
    243336
    244 
    245         if ( USE_FBF )  // external frame buffer available
    246         {
    247             // parallel display from local buf_out to frame buffer
    248             // all processors contribute to display using memcpy...
    249 
    250             DISP_START[cluster_id][lpid] = giet_proctime();
    251 
    252             unsigned int  npt   = npixels / ntasks;   // number of pixels per task
    253 
    254             giet_fbf_sync_write( npt * task_id,
    255                                  &buf_out[cluster_id][lpid*npt],
    256                                  npt );
    257 
    258             if ( (x==0) && (y==0) && (lpid==0) )
    259             giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes display"
    260                             " for image %d at cycle %d\n",
    261                             x, y, lpid, image, giet_proctime() );
    262 
    263             DISP_END[cluster_id][lpid] = giet_proctime();
    264 
    265             /////////////////////////////
    266             sqt_barrier_wait( &barrier );
    267         }
    268         else         // checksum by processor(x,y,0) in each cluster
    269         {
    270             if ( lpid == 0 )
    271             {
    272                 unsigned int success = 1;
    273                 unsigned int start   = cluster_id * nlc;
    274                 unsigned int stop    = start + nlc;
    275 
    276                 for ( l = start ; l < stop ; l++ )
    277                 {
    278                     check_line_after[l] = 0;
    279 
    280                     for ( p = 0 ; p < NN ; p++ )
    281                     {
    282                         // read one byte in remote buffer
    283                         src_cluster = p / nlc;
    284                         src_index   = (p % nlc)*NN + l;
    285 
    286                         unsigned char byte = buf_out[src_cluster][src_index];
    287 
    288                         check_line_after[l] = check_line_after[l] + byte;
    289                     }
    290 
    291                     if ( check_line_before[l] != check_line_after[l] ) success = 0;
    292                 }
    293 
    294                 if ( success )
    295                 {
    296                     giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] checksum OK"
    297                                     " for image %d at cycle %d\n",
    298                                     x, y, image, giet_proctime() );
    299                 }
    300                 else
    301                 {
    302                     giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] checksum KO"
    303                                     " for image %d at cycle %d\n",
    304                                     x, y, image, giet_proctime() );
    305                 }
    306             }
    307         }
     337        ///////////////////////////////////////////////////////////////////////
     338        // parallel display from local buf_out to frame buffer
     339        // all tasks contribute to display using memcpy...
     340        ///////////////////////////////////////////////////////////////////////
     341
     342        DISP_START[x][y][lpid] = giet_proctime();
     343
     344        unsigned int  npt   = npixels / ntasks;   // number of pixels per task
     345
     346        giet_fbf_sync_write( npt * task_id,
     347                             &buf_out[cluster_id][lpid*npt],
     348                             npt );
     349
     350        if ( (x==0) && (y==0) && (lpid==0) )
     351        giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes display"
     352                        " for iteration %d at cycle %d\n",
     353                        x, y, lpid, iteration, giet_proctime() );
     354
     355        DISP_END[x][y][lpid] = giet_proctime();
    308356
    309357        /////////////////////////////
    310358        sqt_barrier_wait( &barrier );
    311359
     360        ///////////////////////////////////////////////////////////////////////
     361        // pseudo parallel store : buf_out buffers to disk : npixels/nclusters
     362        // only task running on processor(x,y,0) does it
     363        ///////////////////////////////////////////////////////////////////////
     364
     365        STOR_START[x][y][lpid] = giet_proctime();
     366
     367        if ( lpid == 0 )
     368        {
     369            unsigned int offset = ((npixels*cluster_id)/nclusters);
     370            if ( giet_fat_lseek( fd_out,
     371                                 offset,
     372                                 SEEK_SET ) != offset )
     373            {
     374                giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot seek fr = %d\n",
     375                                x , y , lpid , fd_out );
     376                giet_exit(" seek() failure");
     377            }
     378
     379            unsigned int pixels = npixels / nclusters;
     380            if ( giet_fat_write( fd_out,
     381                                 buf_out[cluster_id],
     382                                 pixels ) != pixels )
     383            {
     384                giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot write fd = %d\n",
     385                                x , y , lpid , fd_out );
     386                giet_exit(" write() failure");
     387            }
     388
     389            if ( (x==0) && (y==0) )
     390            giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes store"
     391                            "  for iteration %d at cycle %d\n",
     392                            x, y, lpid, iteration, giet_proctime() );
     393        }
     394
     395        STOR_END[x][y][lpid] = giet_proctime();
     396
     397        /////////////////////////////
     398        sqt_barrier_wait( &barrier );
     399
    312400        // instrumentation done by processor [0,0,0]
    313401        if ( (x==0) && (y==0) && (lpid==0) && INSTRUMENTATION_OK )
    314402        {
    315             int cc, pp;
     403            int cx , cy , pp ;
    316404            unsigned int min_load_start = 0xFFFFFFFF;
    317405            unsigned int max_load_start = 0;
     
    326414            unsigned int min_disp_ended = 0xFFFFFFFF;
    327415            unsigned int max_disp_ended = 0;
    328 
    329             for (cc = 0; cc < nclusters; cc++)
    330             {
    331                 for (pp = 0; pp < NB_PROCS_MAX; pp++)
    332                 {
    333                     if (LOAD_START[cc][pp] < min_load_start)  min_load_start = LOAD_START[cc][pp];
    334                     if (LOAD_START[cc][pp] > max_load_start)  max_load_start = LOAD_START[cc][pp];
    335                     if (LOAD_END[cc][pp]   < min_load_ended)  min_load_ended = LOAD_END[cc][pp];
    336                     if (LOAD_END[cc][pp]   > max_load_ended)  max_load_ended = LOAD_END[cc][pp];
    337                     if (TRSP_START[cc][pp] < min_trsp_start)  min_trsp_start = TRSP_START[cc][pp];
    338                     if (TRSP_START[cc][pp] > max_trsp_start)  max_trsp_start = TRSP_START[cc][pp];
    339                     if (TRSP_END[cc][pp]   < min_trsp_ended)  min_trsp_ended = TRSP_END[cc][pp];
    340                     if (TRSP_END[cc][pp]   > max_trsp_ended)  max_trsp_ended = TRSP_END[cc][pp];
    341                     if (DISP_START[cc][pp] < min_disp_start)  min_disp_start = DISP_START[cc][pp];
    342                     if (DISP_START[cc][pp] > max_disp_start)  max_disp_start = DISP_START[cc][pp];
    343                     if (DISP_END[cc][pp]   < min_disp_ended)  min_disp_ended = DISP_END[cc][pp];
    344                     if (DISP_END[cc][pp]   > max_disp_ended)  max_disp_ended = DISP_END[cc][pp];
    345                 }
    346             }
     416            unsigned int min_stor_start = 0xFFFFFFFF;
     417            unsigned int max_stor_start = 0;
     418            unsigned int min_stor_ended = 0xFFFFFFFF;
     419            unsigned int max_stor_ended = 0;
     420
     421            for (cx = 0; cx < x_size; cx++)
     422            {
     423            for (cy = 0; cy < y_size; cy++)
     424            {
     425            for (pp = 0; pp < NB_PROCS_MAX; pp++)
     426            {
     427                if (LOAD_START[cx][cy][pp] < min_load_start)  min_load_start = LOAD_START[cx][cy][pp];
     428                if (LOAD_START[cx][cy][pp] > max_load_start)  max_load_start = LOAD_START[cx][cy][pp];
     429                if (LOAD_END[cx][cy][pp]   < min_load_ended)  min_load_ended = LOAD_END[cx][cy][pp];
     430                if (LOAD_END[cx][cy][pp]   > max_load_ended)  max_load_ended = LOAD_END[cx][cy][pp];
     431                if (TRSP_START[cx][cy][pp] < min_trsp_start)  min_trsp_start = TRSP_START[cx][cy][pp];
     432                if (TRSP_START[cx][cy][pp] > max_trsp_start)  max_trsp_start = TRSP_START[cx][cy][pp];
     433                if (TRSP_END[cx][cy][pp]   < min_trsp_ended)  min_trsp_ended = TRSP_END[cx][cy][pp];
     434                if (TRSP_END[cx][cy][pp]   > max_trsp_ended)  max_trsp_ended = TRSP_END[cx][cy][pp];
     435                if (DISP_START[cx][cy][pp] < min_disp_start)  min_disp_start = DISP_START[cx][cy][pp];
     436                if (DISP_START[cx][cy][pp] > max_disp_start)  max_disp_start = DISP_START[cx][cy][pp];
     437                if (DISP_END[cx][cy][pp]   < min_disp_ended)  min_disp_ended = DISP_END[cx][cy][pp];
     438                if (DISP_END[cx][cy][pp]   > max_disp_ended)  max_disp_ended = DISP_END[cx][cy][pp];
     439                if (STOR_START[cx][cy][pp] < min_stor_start)  min_stor_start = STOR_START[cx][cy][pp];
     440                if (STOR_START[cx][cy][pp] > max_stor_start)  max_stor_start = STOR_START[cx][cy][pp];
     441                if (STOR_END[cx][cy][pp]   < min_stor_ended)  min_stor_ended = STOR_END[cx][cy][pp];
     442                if (STOR_END[cx][cy][pp]   > max_stor_ended)  max_stor_ended = STOR_END[cx][cy][pp];
     443            }
     444            }
     445            }
     446
     447            giet_shr_printf("\n   ---------------- Instrumentation Results ---------------------\n");
    347448
    348449            giet_shr_printf(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n",
     
    369470                            min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2,
    370471                            max_disp_ended-min_disp_ended);
    371         }
    372 
    373         image++;
     472
     473            giet_shr_printf(" - STOR_START : min = %d / max = %d / med = %d / delta = %d\n",
     474                            min_stor_start, max_stor_start, (min_stor_start+max_stor_start)/2,
     475                            max_stor_start-min_stor_start);
     476
     477            giet_shr_printf(" - STOR_END   : min = %d / max = %d / med = %d / delta = %d\n",
     478                            min_stor_ended, max_stor_ended, (min_stor_ended+max_stor_ended)/2,
     479                            max_stor_ended-min_stor_ended);
     480        }
    374481
    375482        /////////////////////////////
    376483        sqt_barrier_wait( &barrier );
    377484
    378     } // end while image     
    379 
    380     // Processor[0,0,0] releases the Distributed buffers
    381     if ( (x==0) && (y==0) && (lpid==0) )
     485        // update iteration variables
     486        fd_in  = fd_transposed;
     487        fd_out = fd_restored;
     488        iteration++;
     489
     490    } // end while     
     491
     492    ///////////////////////////////////////////////////////////////////////
     493    // In each cluster, only task running on Processor[x,y,0] releases
     494    // the distributed buffers and close the file descriptors.
     495    ///////////////////////////////////////////////////////////////////////
     496
     497    if ( lpid==0 )
    382498    {
    383         for ( c = 0 ; c < nclusters ; c++ )
    384         {
    385             free( buf_in[c] );
    386             free( buf_in[c] );
    387         }
     499        free( buf_in[cluster_id] );
     500        free( buf_out[cluster_id] );
     501
     502        giet_fat_close( fd_initial );
     503        giet_fat_close( fd_transposed );
     504        giet_fat_close( fd_restored );
    388505    }
    389506
     507    // display disk content
     508    if ( (x==0) && (y == 0) && (lpid == 0) )
     509    {
     510        giet_fat_list( "/" );
     511        giet_fat_list( "/misc" );
     512        giet_fat_list( "/home" );
     513        giet_fat_list( "/build" );
     514        giet_fat_list( "/build/kernel" );
     515        giet_fat_list( "/build/transpose" );
     516
     517        giet_fat_remove( "/home/lena_transposed" , 0 );
     518        giet_fat_remove( "/home/lena_restored" , 0 );
     519
     520        giet_fat_remove( "/home" , 1 );
     521
     522        giet_fat_list( "/" );
     523        giet_fat_list( "/misc" );
     524        giet_fat_list( "/home" );
     525        giet_fat_list( "/build" );
     526        giet_fat_list( "/build/kernel" );
     527        giet_fat_list( "/build/transpose" );
     528    }
     529   
    390530    giet_exit("Completed");
    391531
Note: See TracChangeset for help on using the changeset viewer.