Ignore:
Timestamp:
May 16, 2011, 12:35:03 PM (14 years ago)
Author:
alain
Message:

Introducing Instrumentation

Location:
trunk/softs/soft_transpose_giet
Files:
2 added
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/softs/soft_transpose_giet/main.c

    r158 r171  
    11#include "stdio.h"
    22
    3 #define NL              128
    4 #define NP              128
    5 #define NB_IMAGES       2
    6 #define BLOCK_SIZE      128
     3#define NL              512
     4#define NP              512
     5#define NB_IMAGES       1
     6#define BLOCK_SIZE      512
    77
    88#define PRINTF          if(local_id == 0) tty_printf
     
    2323    unsigned int        image     = 0;
    2424    unsigned int        date      = 0;
    25     unsigned int        delta     = 0;
    2625
    2726    unsigned int        c;                                              // cluster index for loops
     
    4544    {
    4645        PRINTF("NB_PROCS must be 1, 2 or 4\n");
    47 
    48         exit();
    4946    }
    5047    if( (nclusters !=  1) && (nclusters !=  2) && (nclusters !=  4) && (nclusters !=  8) &&
    51         (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters !=128) )
    52     {
    53         PRINTF("NB_CLUSTERS must be a power of 2 between 1 and 128\n");
    54         exit();
    55     }
    56     if( ntasks > 128 )
    57     {
    58         PRINTF("NB_PROCS * NB_CLUSTERS cannot be larger than 128 4\n");
    59         exit();
     48        (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters !=128) &&
     49        (nclusters != 256) )
     50    {
     51        PRINTF("NB_CLUSTERS must be a power of 1 between 1 and 256\n");
     52    }
     53    if( ntasks > 1024 )
     54    {
     55        PRINTF("NB_PROCS * NB_CLUSTERS cannot be larger than 1024\n");
    6056    }
    6157    if( proc_id >= ntasks )
     
    6460    }
    6561
    66     // Arrays of pointers on the shared, distributed buffers
    67     // containing the images (sized for the worst case : 128 clusters)
    68     unsigned char*      A[128];
    69     unsigned char*      B[128];
     62    // Arrays of pointers on the shared, distributed buffers containing the images
     63    // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters)
     64    unsigned char*      A[256];
     65    unsigned char*      B[256];
    7066   
     67    // Arrays of pointers on the instrumentation arrays
     68    // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters)
     69    // each pointer points on the base adress of an array of NPROCS unsigned int
     70    unsigned int*       LOAD_START[256];
     71    unsigned int*       LOAD_ENDED[256];
     72    unsigned int*       TRSP_START[256];
     73    unsigned int*       TRSP_ENDED[256];
     74    unsigned int*       DISP_START[256];
     75    unsigned int*       DISP_ENDED[256];
     76 
    7177    // shared buffers address definition
    72     // from the seg_heap_base and segment_increment
    73     // values defined in the ldscript file.
    74     // These arrays of pointers are identical and
    75     // replicated in the stack of each task
     78    // from the seg_heap_base and increment depending on the cluster index
     79    // These arrays of pointers are identical and replicated in the stack of each task
    7680    for( c=0 ; c<nclusters ; c++)
    7781    {
    78         A[c] = (unsigned char*)(base + increment*c);
    79         B[c] = (unsigned char*)(base + NL*NP + increment*c);
     82        A[c]          = (unsigned char*)(base           + increment*c);
     83        B[c]          = (unsigned char*)(base + NL*NP   + increment*c);
     84        LOAD_START[c] = (unsigned int*) (base + 2*NL*NP + increment*c);
     85        LOAD_ENDED[c] = (unsigned int*) (base + 3*NL*NP + increment*c);
     86        TRSP_START[c] = (unsigned int*) (base + 4*NL*NP + increment*c);
     87        TRSP_ENDED[c] = (unsigned int*) (base + 5*NL*NP + increment*c);
     88        DISP_START[c] = (unsigned int*) (base + 6*NL*NP + increment*c);
     89        DISP_ENDED[c] = (unsigned int*) (base + 7*NL*NP + increment*c);
    8090    }
    8191
     
    98108        // only task running on processor with (local_id == 0) does it
    99109
    100         delta = proctime() - date;
    101         date  = date + delta;
    102 
    103110        if ( local_id == 0 )
    104111        {
    105             PRINTF("\n*** Starting load for image %d *** at cycle %d (%d)\n", image, date, delta);
     112            int p;
     113
     114            date = proctime();
     115            PRINTF("\n*** Starting load for image %d at cycle %d\n", image, date);
     116            for ( p=0 ; p<nprocs ; p++ ) LOAD_START[cluster_id][p] = date;
    106117
    107118            if( ioc_read(image*nblocks + nblocks*cluster_id/nclusters , A[cluster_id], nblocks/nclusters) )
     
    115126                exit();
    116127            }
    117             delta = proctime() - date;
    118             date  = date + delta;
    119             PRINTF("*** Completing load for image %d *** at cycle %d (%d)\n", image, date, delta);
     128
     129            date = proctime();
     130            PRINTF("*** Completing load for image %d at cycle %d\n", image, date);
     131            for ( p=0 ; p<nprocs ; p++ ) LOAD_ENDED[cluster_id][p] = date;
    120132        }
    121133
     
    126138        // (p,l) are the (x,y) pixel coordinates in the source image
    127139
    128         delta = proctime() - date;
    129         date  = date + delta;
    130 
    131         PRINTF("\n*** Starting transpose for image %d at cycle %d (%d)\n", image, date, delta);
     140
     141        date = proctime();
     142        PRINTF("\n*** Starting transpose for image %d at cycle %d\n", image, date);
     143        TRSP_START[cluster_id][local_id] = date;
    132144
    133145        unsigned int nlt        = NL/ntasks;
     
    148160
    149161        }
    150         delta = proctime() - date;
    151         date  = date + delta;
    152         PRINTF("*** Completing transpose for image %d *** at cycle %d (%d)\n", image, date, delta);
     162        date = proctime();
     163        PRINTF("*** Completing transpose for image %d at cycle %d\n", image, date);
     164        TRSP_ENDED[cluster_id][local_id] = date;
    153165
    154166        barrier_wait(1);
     
    157169        // each processor uses its private dma to display NL*NP/ntasks pixels
    158170
    159         delta = proctime() - date;
    160         date  = date + delta;
    161 
    162         PRINTF("\n*** Starting display for image %d at cycle %d (%d)\n", image, date, delta);
     171        date = proctime();
     172        PRINTF("\n*** Starting display for image %d at cycle %d\n", image, date);
     173        DISP_START[cluster_id][local_id] = date;
    163174
    164175        unsigned int npxt = NL*NP/ntasks;       // number of pixels per task
     
    175186        }
    176187
    177         delta = proctime() - date;
    178         date  = date + delta;
    179         PRINTF("*** Completing display for image %d at cycle %d (%d)\n", image, date, delta);
     188        date = proctime();
     189        PRINTF("*** Completing display for image %d at cycle %d\n", image, date);
     190        DISP_ENDED[cluster_id][local_id] = date;
    180191
    181192        barrier_wait(2);
    182193
     194        // Instrumentation (done by processor 0 in cluster 0)
     195        if ( local_id == 0 )
     196        {
     197            date = proctime();
     198            PRINTF("\n*** Starting Instrumentation for image %d at cycle %d\n\n", image, date);
     199
     200            int cc, pp;
     201            unsigned int min_load_start = 1000000000;
     202            unsigned int max_load_start = 0;
     203            unsigned int min_load_ended = 1000000000;
     204            unsigned int max_load_ended = 0;
     205            unsigned int min_trsp_start = 1000000000;
     206            unsigned int max_trsp_start = 0;
     207            unsigned int min_trsp_ended = 1000000000;
     208            unsigned int max_trsp_ended = 0;
     209            unsigned int min_disp_start = 1000000000;
     210            unsigned int max_disp_start = 0;
     211            unsigned int min_disp_ended = 1000000000;
     212            unsigned int max_disp_ended = 0;
     213
     214            for ( cc=0 ; cc<nclusters ; cc++ )
     215            {
     216                for ( pp=0 ; pp<nprocs ; pp++ )
     217                {
     218                    if ( LOAD_START[cc][pp] < min_load_start ) min_load_start = LOAD_START[cc][pp];
     219                    if ( LOAD_START[cc][pp] > max_load_start ) max_load_start = LOAD_START[cc][pp];
     220                    if ( LOAD_ENDED[cc][pp] < min_load_ended ) min_load_ended = LOAD_ENDED[cc][pp];
     221                    if ( LOAD_ENDED[cc][pp] > max_load_ended ) max_load_ended = LOAD_ENDED[cc][pp];
     222
     223                    if ( TRSP_START[cc][pp] < min_trsp_start ) min_trsp_start = TRSP_START[cc][pp];
     224                    if ( TRSP_START[cc][pp] > max_trsp_start ) max_trsp_start = TRSP_START[cc][pp];
     225                    if ( TRSP_ENDED[cc][pp] < min_trsp_ended ) min_trsp_ended = TRSP_ENDED[cc][pp];
     226                    if ( TRSP_ENDED[cc][pp] > max_trsp_ended ) max_trsp_ended = TRSP_ENDED[cc][pp];
     227
     228                    if ( DISP_START[cc][pp] < min_disp_start ) min_disp_start = DISP_START[cc][pp];
     229                    if ( DISP_START[cc][pp] > max_disp_start ) max_disp_start = DISP_START[cc][pp];
     230                    if ( DISP_ENDED[cc][pp] < min_disp_ended ) min_disp_ended = DISP_ENDED[cc][pp];
     231                    if ( DISP_ENDED[cc][pp] > max_disp_ended ) max_disp_ended = DISP_ENDED[cc][pp];
     232
     233                }
     234            }
     235            PRINTF(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n",
     236            min_load_start, max_load_start, (min_load_start+max_load_start)/2, max_load_start-min_load_start);
     237            PRINTF(" - LOAD_END   : min = %d / max = %d / med = %d / delta = %d\n",
     238            min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2, max_load_ended-min_load_ended);
     239
     240            PRINTF(" - TRSP_START : min = %d / max = %d / med = %d / delta = %d\n",
     241            min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2, max_trsp_start-min_trsp_start);
     242            PRINTF(" - TRSP_END   : min = %d / max = %d / med = %d / delta = %d\n",
     243            min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2, max_trsp_ended-min_trsp_ended);
     244
     245            PRINTF(" - DISP_START : min = %d / max = %d / med = %d / delta = %d\n",
     246            min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2, max_disp_start-min_disp_start);
     247            PRINTF(" - DISP_END   : min = %d / max = %d / med = %d / delta = %d\n",
     248            min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, max_disp_ended-min_disp_ended);
     249
     250            PRINTF(" - BARRIER TRSP/DISP = %d\n", min_disp_start - max_trsp_ended);
     251        }
    183252        // next image
    184253        image++;
     254
    185255    } // end while image     
     256
    186257    while(1);
    187258} // end main()
Note: See TracChangeset for help on using the changeset viewer.