Changeset 383 for soft


Ignore:
Timestamp:
Aug 7, 2014, 12:27:17 PM (10 years ago)
Author:
alain
Message:

Update the transpose application to use the new malloc.h and barrier.h libraries.

Location:
soft/giet_vm/transpose
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/transpose/main.c

    r355 r383  
    2020#include "stdio.h"
    2121#include "barrier.h"
     22#include "malloc.h"
    2223
    2324#define NN                  128                 // image size : nlines = npixels = 128
    2425#define NB_IMAGES           5                   // number of images to be handled
    2526#define FILE_PATHNAME       "misc/images.raw"   // file pathname on disk
    26 
     27#define NB_CLUSTERS         (X_SIZE * Y_SIZE)   // number of clusters
    2728#define INSTRUMENTATION_OK  1                   // display statistics on TTY when non zero
    2829
     
    3435// for each processor (up to 4 processors)
    3536// in each cluster (up to 32 clusters)
    36 unsigned int LOAD_START[32][4];
    37 unsigned int LOAD_END  [32][4];
    38 unsigned int TRSP_START[32][4];
    39 unsigned int TRSP_END  [32][4];
    40 unsigned int DISP_START[32][4];
    41 unsigned int DISP_END  [32][4];
     37unsigned int LOAD_START[NB_CLUSTERS][NB_PROCS_MAX];
     38unsigned int LOAD_END  [NB_CLUSTERS][NB_PROCS_MAX];
     39unsigned int TRSP_START[NB_CLUSTERS][NB_PROCS_MAX];
     40unsigned int TRSP_END  [NB_CLUSTERS][NB_PROCS_MAX];
     41unsigned int DISP_START[NB_CLUSTERS][NB_PROCS_MAX];
     42unsigned int DISP_END  [NB_CLUSTERS][NB_PROCS_MAX];
    4243
    4344// arrays of pointers on distributed buffers
    4445// one input buffer & one output buffer per cluster
    45 unsigned char*  buf_in [32];
    46 unsigned char*  buf_out[32];
     46unsigned char*  buf_in [NB_CLUSTERS];
     47unsigned char*  buf_out[NB_CLUSTERS];
    4748
    4849// checksum variables
     
    5051unsigned check_line_after[NN];
    5152
    52 // synchronisation barriers
    53 giet_barrier_t barrier_0;
    54 giet_barrier_t barrier_1;
    55 giet_barrier_t barrier_2;
    56 giet_barrier_t barrier_3;
    57 giet_barrier_t barrier_4;
    58 giet_barrier_t barrier_5;
     53// global synchronisation barrier
     54giet_barrier_t barrier;
    5955
    6056volatile unsigned int init_ok = 1;
     
    6258//////////////////////////////////////////
    6359__attribute__ ((constructor)) void main()
     60//////////////////////////////////////////
    6461{
    6562
     
    10097        }
    10198
    102         barrier_init( &barrier_0, ntasks );
    103         barrier_init( &barrier_1, ntasks );
    104         barrier_init( &barrier_2, ntasks );
    105         barrier_init( &barrier_3, ntasks );
    106         barrier_init( &barrier_4, ntasks );
    107         barrier_init( &barrier_5, ntasks );
     99        barrier_init( &barrier, ntasks );
    108100
    109101        giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes barrier init at cycle %d\n",
     
    135127 
    136128        // allocate buffers in cluster[x,y]
    137         buf_in[cluster_id]  = ((unsigned char*)heap_base) + (cluster_xy << 20);
    138         buf_out[cluster_id] = buf_in[cluster_id] + NN*NN/nclusters;
     129        buf_in[cluster_id]  = remote_malloc( npixels/NB_CLUSTERS, x, y);
     130        buf_out[cluster_id] = remote_malloc( npixels/NB_CLUSTERS, x, y);
    139131
    140132        giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes buffer allocation at cycle %d\n"
     
    163155    }
    164156
    165     ///////////////////////////
    166     barrier_wait( &barrier_0 );
     157    /////////////////////////
     158    barrier_wait( &barrier );
    167159
    168160    // Main loop (on images)
     
    188180        LOAD_END[cluster_id][lpid] = giet_proctime();
    189181
    190         ///////////////////////////
    191         barrier_wait( &barrier_1 );
     182        /////////////////////////
     183        barrier_wait( &barrier );
    192184
    193185        // parallel transpose from buf_in to buf_out
     
    243235        TRSP_END[cluster_id][lpid] = giet_proctime();
    244236
    245         ///////////////////////////
    246         barrier_wait( &barrier_2 );
     237        /////////////////////////
     238        barrier_wait( &barrier );
    247239
    248240        // optional parallel display from local buf_out to frame buffer
     
    268260            DISP_END[cluster_id][lpid] = giet_proctime();
    269261
    270             ///////////////////////////
    271             barrier_wait( &barrier_3 );
     262            /////////////////////////
     263            barrier_wait( &barrier );
    272264        }
    273265
     
    312304        }
    313305
    314         ///////////////////////////
    315         barrier_wait( &barrier_4 );
     306        /////////////////////////
     307        barrier_wait( &barrier );
    316308
    317309        // instrumentation done by processor [0,0,0]
     
    382374        // all tasks must wait instrumentation completion
    383375        //////////////////////////////////////////////////
    384         barrier_wait( &barrier_5 );
     376        barrier_wait( &barrier );
    385377
    386378    } // end while image     
  • soft/giet_vm/transpose/transpose.py

    r336 r383  
    3131    # define vsegs base & size
    3232    code_base  = 0x10000000
    33     code_size  = 0x00010000     # 64 Kbytes
     33    code_size  = 0x00010000     # 64 Kbytes (replicated in each cluster)
    3434   
    3535    data_base  = 0x20000000
    36     data_size  = 0x00010000     # 64 Kbytes
     36    data_size  = 0x00010000     # 64 Kbytes (non replicated)
    3737
    3838    ptab_base  = 0x30000000
    39     ptab_size  = 0x00040000     # 256 Kbytes
     39    ptab_size  = 0x00040000     # 256 Kbytes (replicated in each cluster)
    4040
    4141    stack_base = 0x40000000
    42     stack_size = 0x00010000     # 64 Kbytes
     42    stack_size = 0x00100000     # 1 Mbytes (to be divided between all tasks)
    4343
    4444    heap_base  = 0x50000000
    45     heap_size  = 0x00010000     # 64 Kbytes
     45    heap_size  = 0x00010000     # 64 Kbytes (to be shared by all tasks)
    4646
    47     # create Vspace
     47    # create vspace
    4848    vspace = mapping.addVspace( name = 'transpose', startname = 'trsp_data' )
    4949   
    50     # non replicated vsegs in cluster[0,0]
    51     mapping.addVseg( vspace, 'trsp_code', code_base , code_size, 'CXWU', vtype = 'ELF',
    52                      x = 0, y = 0, pseg = 'RAM', binpath = 'build/transpose/transpose.elf' )
     50    # data vseg : shared (only in cluster[0,0])
     51    mapping.addVseg( vspace, 'trsp_data', data_base , data_size,
     52                     'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM',
     53                     binpath = 'build/transpose/transpose.elf',
     54                     local = False )
    5355
    54     mapping.addVseg( vspace, 'trsp_data', data_base , data_size, 'C_WU', vtype = 'ELF',
    55                      x = 0, y = 0, pseg = 'RAM', binpath = 'build/transpose/transpose.elf' )
     56    # code vsegs : local (one copy in each cluster)
     57    for x in xrange (x_size):
     58        for y in xrange (y_size):
     59            mapping.addVseg( vspace, 'trsp_code_%d_%d' %(x,y), code_base , code_size,
     60                             'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM',
     61                             binpath = 'build/transpose/transpose.elf',
     62                             local = True )
    5663
    57     mapping.addVseg( vspace, 'trsp_ptab', ptab_base , ptab_size, 'C_WU', vtype = 'PTAB',
    58                      x = 0, y = 0, pseg = 'RAM', align = 13 )
     64    # ptab vsegs : local (one specific ptab per cluster)
     65    for x in xrange (x_size):
     66        for y in xrange (y_size):
     67            mapping.addVseg( vspace, 'trsp_ptab_%d_%d' %(x,y), ptab_base , ptab_size,
     68                            'C_WU', vtype = 'PTAB', x = x, y = y, pseg = 'RAM',
     69                            align = 13,
     70                            local = True )
    5971
    60     # distributed vsegs: one stack per processor/task, one heap per cluster
    61     for x_rep in xrange (x_size):
    62         for y_rep in xrange (y_size):
    63             cluster_offset = ((x_rep << y_width) + y_rep) << 20  # 1 Mbytes per cluster
    64             mapping.addVseg( vspace, 'trsp_heap_%d_%d' % (x_rep, y_rep),
    65                              heap_base + cluster_offset, heap_size, 'C_WU',
    66                              vtype = 'BUFFER', x = x_rep, y = y_rep, pseg = 'RAM' )
    67            
     72    # stacks vsegs: local (one stack per processor, procs_max stacks per cluster)           
     73    for x in xrange (x_size):
     74        for y in xrange (y_size):
    6875            for p in xrange( procs_max ):
    69                 proc_offset = cluster_offset + (p << 18)         # 256 Kbytes per proc
    70                 mapping.addVseg( vspace, 'trsp_stack_%d_%d_%d' % (x_rep, y_rep, p),
    71                                  stack_base + proc_offset, stack_size, 'C_WU',
    72                                  vtype = 'BUFFER', x = x_rep, y = y_rep, pseg = 'RAM' )
    73            
     76                proc_id = (((x * y_size) + y) * procs_max) + p
     77                size    = stack_size / (x_size * y_size * procs_max)
     78                base    = stack_base + (proc_id * size)
     79                mapping.addVseg( vspace, 'trsp_stack_%d_%d_%d' % (x,y,p), base, size,
     80                                 'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM',
     81                                 local = True )
     82
     83    # heap vsegs: shared (all heap segments can be accessed by all tasks)
     84    for x in xrange (x_size):
     85        for y in xrange (y_size):
     86            cluster_id = (x * y_size) + y
     87            size  = heap_size / (x_size * y_size)
     88            base  = heap_base + (cluster_id * size)
     89            mapping.addVseg( vspace, 'trsp_heap_%d_%d' % (x,y), base, size,
     90                             'C_WU', vtype = 'BUFFER', x = x, y = y, pseg = 'RAM',
     91                             local = False )
     92
    7493    # distributed tasks / one task per processor
    7594    for x in xrange (x_size):
    7695        for y in xrange (y_size):
    7796            for p in xrange( procs_max ):
    78 
    7997                trdid = (((x * y_size) + y) * procs_max) + p
    80                 mapping.addTask( vspace, 'sort_%d_%d_%d' % (x,y,p), trdid, x, y, p,
     98                mapping.addTask( vspace, 'trsp_%d_%d_%d' % (x,y,p), trdid, x, y, p,
    8199                                 'trsp_stack_%d_%d_%d' % (x,y,p),
    82100                                 'trsp_heap_%d_%d' % (x,y), 0 )
Note: See TracChangeset for help on using the changeset viewer.