Ignore:
Timestamp:
Feb 8, 2015, 9:20:45 PM (9 years ago)
Author:
alain
Message:

1) Introduce distributed barriers in the multi-threads applications
(classif) transpose, convol, sort, gameoflife)

2) Introducing support for architectures containing empty clusters
in the mapping of these multi-threaded applications.

3) Removing the "command line arguments" in the sort application
(replaced by the giet_procs_number() system call.

Location:
soft/giet_vm/applications/classif
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/classif/classif.py

    r488 r502  
    33from mapping import *
    44
    5 ######################################################################################
     5###################################################################################
    66#   file   : classif.py 
    77#   date   : november 2014
    88#   author : Alain Greiner
    9 #######################################################################################
     9###################################################################################
    1010#  This file describes the mapping of the multi-threaded "classif"
    1111#  application on a multi-clusters, multi-processors architecture.
    1212#  The mapping of tasks on processors is the following:
    13 #  - one "load" task per cluster,
    14 #  - one "store" task per cluster,
    15 #  - (nprocs-2) "analyse" task per cluster.
    16 #  The mapping of virtual segments on the clusters is the following:
    17 #    - The code vsegs are replicated on all clusters.
     13#    - one "load" task per cluster containing processors,
     14#    - one "store" task per cluster containing processors,
     15#    - (nprocs-2) "analyse" task per cluster containing processors.
     16#  The mapping of virtual segments is the following:
    1817#    - There is one shared data vseg in cluster[0][0]
    19 #    - There is one heap vseg per cluster.
    20 #    - The stacks vsegs are distibuted on all clusters.
     18#    - The code vsegs are replicated on all clusters containing processors.
     19#    - There is one heap vseg per cluster containing processors.
     20#    - The stacks vsegs are distibuted on all clusters containing processors.
    2121#  This mapping uses 5 platform parameters, (obtained from the "mapping" argument)
    2222#    - x_size    : number of clusters in a row
     
    2828#  WARNING: The target architecture cannot contain less
    2929#           than 3 processors per cluster.
    30 ####################################################################################
     30##################################################################################
    3131
    3232#########################
     
    4949
    5050    heap_base  = 0x30000000
    51     heap_size  = 0x00008000     # 32 Kbytes (per cluster)     
     51    heap_size  = 0x00040000     # 256 Kbytes (per cluster)     
    5252
    5353    stack_base = 0x40000000
     
    6363                     local = False )
    6464
    65     # heap_x_y vsegs : shared / one per cluster
     65    # heap vsegs : shared (one per cluster)
    6666    for x in xrange (x_size):
    6767        for y in xrange (y_size):
    68             base = heap_base + ( (4*x + y) * heap_size )
     68            cluster_id = (x * y_size) + y
     69            if ( mapping.clusters[cluster_id].procs ):
     70                size  = heap_size
     71                base  = heap_base + (cluster_id * size)
    6972
    70             mapping.addVseg( vspace, 'classif_heap_%d_%d' %(x,y), base , heap_size,
    71                              'C_WU', vtype = 'HEAP', x = x, y = y, pseg = 'RAM',
    72                              local = False )
     73                mapping.addVseg( vspace, 'classif_heap_%d_%d' %(x,y), base , size,
     74                                 'C_WU', vtype = 'HEAP', x = x, y = y, pseg = 'RAM',
     75                                 local = False )
    7376
    7477    # code vsegs : local (one copy in each cluster)
    7578    for x in xrange (x_size):
    7679        for y in xrange (y_size):
     80            cluster_id = (x * y_size) + y
     81            if ( mapping.clusters[cluster_id].procs ):
    7782
    78             mapping.addVseg( vspace, 'classif_code_%d_%d' %(x,y), code_base , code_size,
    79                              'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM',
    80                              binpath = 'build/classif/classif.elf',
    81                              local = True )
     83                mapping.addVseg( vspace, 'classif_code_%d_%d' %(x,y),
     84                                 code_base , code_size,
     85                                 'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM',
     86                                 binpath = 'build/classif/classif.elf',
     87                                 local = True )
    8288
    83     # stacks vsegs: local (one stack per processor => nprocs stacks per cluster)           
     89    # stacks vsegs: local (one stack per processor => nprocs stacks per cluster)
    8490    for x in xrange (x_size):
    8591        for y in xrange (y_size):
    86             for p in xrange( nprocs ):
    87                 proc_id = (((x * y_size) + y) * nprocs) + p
    88                 size    = (stack_size / nprocs) & 0xFFFFF000
    89                 base    = stack_base + (proc_id * size)
     92            cluster_id = (x * y_size) + y
     93            if ( mapping.clusters[cluster_id].procs ):
     94                for p in xrange( nprocs ):
     95                    proc_id = (((x * y_size) + y) * nprocs) + p
     96                    size    = (stack_size / nprocs) & 0xFFFFF000
     97                    base    = stack_base + (proc_id * size)
    9098
    91                 mapping.addVseg( vspace, 'classif_stack_%d_%d_%d' % (x,y,p), base, size,
    92                                  'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM',
    93                                  local = True, big = True )
     99                    mapping.addVseg( vspace, 'classif_stack_%d_%d_%d' % (x,y,p),
     100                                     base, size, 'C_WU', vtype = 'BUFFER',
     101                                     x = x , y = y , pseg = 'RAM',
     102                                     local = True, big = True )
    94103
    95104    # distributed tasks / one task per processor
    96105    for x in xrange (x_size):
    97106        for y in xrange (y_size):
    98             for p in xrange( nprocs ):
    99                 trdid = (((x * y_size) + y) * nprocs) + p
    100                 if  ( p== 0 ):                              # task load
    101                     task_index = 0
    102                     task_name  = 'load_%d_%d_%d' %(x,y,p)           
    103                 elif  ( p== 1 ):                            # task store
    104                     task_index = 1
    105                     task_name  = 'store_%d_%d_%d' %(x,y,p)           
    106                 else :                                      # task analyse
    107                     task_index = 2
    108                     task_name  = 'analyse_%d_%d_%d' % (x,y,p)
     107            cluster_id = (x * y_size) + y
     108            if ( mapping.clusters[cluster_id].procs ):
     109                for p in xrange( nprocs ):
     110                    trdid = (((x * y_size) + y) * nprocs) + p
     111                    if  ( p== 0 ):                              # task load
     112                        task_index = 0
     113                        task_name  = 'load_%d_%d_%d' %(x,y,p)           
     114                    elif  ( p== 1 ):                            # task store
     115                        task_index = 1
     116                        task_name  = 'store_%d_%d_%d' %(x,y,p)           
     117                    else :                                      # task analyse
     118                        task_index = 2
     119                        task_name  = 'analyse_%d_%d_%d' % (x,y,p)
    109120
    110                 mapping.addTask( vspace, task_name, trdid, x, y, p,
    111                                  'classif_stack_%d_%d_%d' % (x,y,p),
    112                                  'classif_heap_%d_%d' % (x,y),
    113                                  task_index )
     121                    mapping.addTask( vspace, task_name, trdid, x, y, p,
     122                                     'classif_stack_%d_%d_%d' % (x,y,p),
     123                                     'classif_heap_%d_%d' % (x,y),
     124                                     task_index )
    114125
    115126    # extend mapping name
     
    118129    return vspace  # useful for test
    119130           
    120 ################################ test ######################################################
     131################################ test ################################################
    121132
    122133if __name__ == '__main__':
  • soft/giet_vm/applications/classif/main.c

    r488 r502  
    1 /////////////////////////////////////////////////////////////////////////////////////////
     1///////////////////////////////////////////////////////////////////////////////////////
    22// File   : main.c   (for classif application)
    33// Date   : november 2014
    44// author : Alain Greiner
    5 /////////////////////////////////////////////////////////////////////////////////////////
     5///////////////////////////////////////////////////////////////////////////////////////
    66// This multi-threaded application takes a stream of Gigabit Ethernet packets,
    77// and makes packet analysis and classification, based on the source MAC address.
     
    99// component to receive and send packets on the Gigabit Ethernet port.
    1010//
    11 // This application is described as a TCG (Task and Communication Graph) containing
    12 // (N+2) tasks per cluster:
     11// It can run on architectures containing up to 16 * 16 clusters,
     12// and up to 8 processors per cluster.
     13//
     14// This application is described as a TCG (Task and Communication Graph)
     15// containing (N+2) tasks per cluster:
    1316// - one "load" task
     17// - one "store" task
    1418// - N "analyse" tasks
    15 // - one "store" task
    16 // The 4 Kbytes containers are diributed (N+2 containers per cluster):
     19// The containers are distributed (N+2 containers per cluster):
    1720// - one RX container (part of the kernel rx_chbuf), in the kernel heap.
    1821// - one TX container (part of the kernel tx-chbuf), in the kernel heap.
     
    3033// The MWMR fifo descriptors array is defined as a global variable in cluster[0][0].
    3134//
    32 // Initialisation is done in two steps by the "load" tasks:
    33 // - Task "load" in cluster[0][0] initialises NIC & CMA channel, and initialises
    34 //   the barrier between all "load" tasks. Other "load" tasks are waiting on the
    35 //   global_sync synchronisation variable.
    36 // - In each cluster[x][y], the "load" task allocates the working containers
    37 //   and the MWMR fifos descriptors in the local heap.
    38 //   The "analyse" tasks are waiting on the sync[x][y] variables.
     35// Initialisation is done in two steps by the "load" & "store" tasks:
     36// - Task "load" in cluster[0][0] initialises the barrier between all "load" tasks,
     37//   allocates NIC & CMA RX channel, and starts the NIC_CMA RX transfer.
     38//   Other "load" tasks are waiting on the load_sync synchronisation variable.
     39//   Task "store" in cluster[0][0] initialises the barrier between all "store" tasks,
     40//   allocates NIC & CMA TX channels, and starts the NIC_CMA TX transfer.
     41//   Other "store" tasks are waiting on the store_sync synchronisation variable.
     42// - When this global initialisation is completed, the "load" task in all clusters
     43//   allocates the working containers and the MWMR fifos descriptors from the
     44//   user local heap. In each cluster, the "analyse" and "store" tasks are waiting
     45//   the local initialisation completion on the local_sync[x][y] variables.
    3946//
    40 // Instrumentation results display is done by the "store" task in cluster[0][0]
    41 // when all "store" tasks completed the number of clusters specified by the
    42 // CONTAINERS_MAX parameter.
    43 //     
    4447// When initialisation is completed, all tasks loop on containers:
    4548// 1) The "load" task get an empty working container from the fifo_s2l,
     
    4750//    and transfer ownership of this container to one "analysis" task by writing
    4851//    into the fifo_l2a.   
    49 //
    5052// 2) The "analyse" task get one working container from the fifo_l2a, analyse
    5153//    each packet header, compute the packet type (depending on the SRC MAC address),
    5254//    increment the correspondint classification counter, and transpose the SRC
    5355//    and the DST MAC addresses fot TX tranmission.
    54 //
    5556// 3) The "store" task transfer get a full working container from the fifo_a2s,
    5657//    transfer this user container content to the the kernel tx_chbuf,
    5758//    and transfer ownership of this empty container to the "load" task by writing
    5859//    into the fifo_s2l.   
    59 //
    60 // This application uses the following hardware parameters (hard_config.h file):
    61 // - X_SIZE       : number of clusters in a row
    62 // - Y_SIZE       : number of clusters in a column
    63 // - NB_PROCS_MAX : number of processors per cluster
    64 /////////////////////////////////////////////////////////////////////////////////////////
     60//     
     61// Instrumentation results display is done by the "store" task in cluster[0][0]
     62// when all "store" tasks completed the number of clusters specified by the
     63// CONTAINERS_MAX parameter.
     64///////////////////////////////////////////////////////////////////////////////////////
    6565
    6666#include "stdio.h"
    67 #include "barrier.h"
     67#include "user_barrier.h"
    6868#include "malloc.h"
    6969#include "user_lock.h"
    7070#include "mwmr_channel.h"
    71 #include "hard_config.h"
    72 
    73 #define CONTAINERS_MAX  5
    74 #define VERBOSE_ANALYSE 1
    75 #define ANALYSIS_TASKS  (NB_PROCS_MAX - 2)
    76 
    77 /////////////////////////////////////////////////////////////////////////////////////////
     71
     72#define X_SIZE_MAX      16
     73#define Y_SIZE_MAX      16
     74#define NPROCS_MAX      8
     75#define CONTAINERS_MAX  500
     76#define VERBOSE_ANALYSE 0
     77
     78///////////////////////////////////////////////////////////////////////////////////////
    7879//    Global variables
    7980// The MWMR channels (descriptors and buffers), as well as the working containers
     
    8182// But the pointers on these distributed structures are shared arrays
    8283// stored in cluster[0][0].
    83 /////////////////////////////////////////////////////////////////////////////////////////
    84 
    85 // pointers on distributed temp[x][y][n] containers
    86 unsigned int*       container[X_SIZE][Y_SIZE][ANALYSIS_TASKS]; 
     84///////////////////////////////////////////////////////////////////////////////////////
     85
     86// pointers on distributed containers
     87unsigned int*       container[X_SIZE_MAX][Y_SIZE_MAX][NPROCS_MAX-2]; 
    8788
    8889// pointers on distributed mwmr fifos containing : temp[x][y][l] container descriptors
    89 mwmr_channel_t*     mwmr_l2a[X_SIZE][Y_SIZE]; 
    90 mwmr_channel_t*     mwmr_a2s[X_SIZE][Y_SIZE];
    91 mwmr_channel_t*     mwmr_s2l[X_SIZE][Y_SIZE];
     90mwmr_channel_t*     mwmr_l2a[X_SIZE_MAX][Y_SIZE_MAX]; 
     91mwmr_channel_t*     mwmr_a2s[X_SIZE_MAX][Y_SIZE_MAX];
     92mwmr_channel_t*     mwmr_s2l[X_SIZE_MAX][Y_SIZE_MAX];
    9293
    9394// local synchros signaling local MWMR fifos initialisation completion
    94 unsigned int        local_sync[X_SIZE][Y_SIZE]; 
     95volatile unsigned int        local_sync[X_SIZE_MAX][Y_SIZE_MAX]; 
    9596
    9697// global synchro signaling global initialisation completion
    97 unsigned int        load_sync  = 0;
    98 unsigned int        store_sync = 0;
     98volatile unsigned int        load_sync  = 0;
     99volatile unsigned int        store_sync = 0;
    99100
    100101// instrumentation counters
    101102unsigned int        counter[16];
    102103
    103 // distributed barriers (between "load" and "store" tasks)
    104 giet_sbt_barrier_t  rx_barrier;
    105 giet_sbt_barrier_t  tx_barrier;
     104// distributed barrier between "load" tasks
     105giet_sqt_barrier_t  rx_barrier;
     106
     107// distributed barrier between "store" tasks
     108giet_sqt_barrier_t  tx_barrier;
    106109
    107110// NIC_RX and NIC_TX channel index
     
    113116/////////////////////////////////////////
    114117{
     118    // each "load" task get platform parameters
     119    unsigned int    x_size;                                             // number of clusters in a row
     120    unsigned int    y_size;                     // number of clusters in a column
     121    unsigned int    nprocs;                     // number of processors per cluster
     122    giet_procs_number( &x_size, &y_size, &nprocs );
     123
     124    giet_assert( (x_size <= X_SIZE_MAX) &&
     125                 (y_size <= Y_SIZE_MAX) &&
     126                 (nprocs <= NPROCS_MAX) ,
     127                 "[CLASSIF ERROR] illegal platform parameters" );
     128
    115129    // each "load" task get processor identifiers
    116130    unsigned int    x;
     
    119133    giet_proc_xyp( &x, &y, &l );
    120134
    121     // "load" task[0][0] initialises barrier between load tasks,
     135    // "load" task[0][0] initialises barrier between all load tasks,
    122136    // allocates the NIC & CMA RX channels, and start the NIC_CMA RX transfer.
    123137    // Other "load" tasks wait completion
    124138    if ( (x==0) && (y==0) )
    125139    {
    126         giet_shr_printf("\n*** Task load on P[%d][%d][%d] starts at cycle %d\n",
    127                         x , y , l , giet_proctime() );
     140        giet_shr_printf("\n*** Task load on P[%d][%d][%d] starts at cycle %d\n"
     141                        "  x_size = %d / y_size = %d / nprocs = %d\n",
     142                        x , y , l , giet_proctime() , x_size, y_size, nprocs );
    128143 
    129         sbt_barrier_init( &rx_barrier, X_SIZE*Y_SIZE , 1 );
    130         nic_rx_channel = giet_nic_rx_alloc();
     144        sqt_barrier_init( &rx_barrier, x_size , y_size , 1 );
     145        nic_rx_channel = giet_nic_rx_alloc( x_size , y_size );
    131146        giet_nic_rx_start( nic_rx_channel );
    132147        load_sync = 1;
     
    137152    }   
    138153
    139     // all load tasks allocate containers[x][y][n] (from local heap)
     154    // each load tasks allocates containers[x][y][n] (from local heap)
    140155    // and register pointers in the local stack
    141156    unsigned int   n;
    142     unsigned int*  cont[ANALYSIS_TASKS];
    143 
    144     for ( n = 0 ; n < ANALYSIS_TASKS ; n++ )
     157    unsigned int*  cont[NPROCS_MAX-2];
     158    unsigned int   analysis_tasks = nprocs-2;
     159
     160    for ( n = 0 ; n < analysis_tasks ; n++ )
    145161    {
    146162        container[x][y][n] = malloc( 4096 );
     
    148164    }
    149165   
    150     // all load tasks allocate data buffers for mwmr fifos (from local heap)
    151     unsigned int*  data_l2a = malloc( ANALYSIS_TASKS<<2 );
    152     unsigned int*  data_a2s = malloc( ANALYSIS_TASKS<<2 );
    153     unsigned int*  data_s2l = malloc( ANALYSIS_TASKS<<2 );
    154 
    155     // all load tasks allocate mwmr fifos descriptors (from local heap)
     166    // each load task allocates data buffers for mwmr fifos (from local heap)
     167    unsigned int*  data_l2a = malloc( analysis_tasks<<2 );
     168    unsigned int*  data_a2s = malloc( analysis_tasks<<2 );
     169    unsigned int*  data_s2l = malloc( analysis_tasks<<2 );
     170
     171    // each load task allocates mwmr fifos descriptors (from local heap)
    156172    mwmr_l2a[x][y] = malloc( sizeof(mwmr_channel_t) );
    157173    mwmr_a2s[x][y] = malloc( sizeof(mwmr_channel_t) );
    158174    mwmr_s2l[x][y] = malloc( sizeof(mwmr_channel_t) );
    159175
    160     // all "load" tasks register local pointers on mwmr fifos in local stack
     176    // each load task registers local pointers on mwmr fifos in local stack
    161177    mwmr_channel_t* fifo_l2a = mwmr_l2a[x][y];
    162178    mwmr_channel_t* fifo_a2s = mwmr_a2s[x][y];
    163179    mwmr_channel_t* fifo_s2l = mwmr_s2l[x][y];
    164180
    165     // all "load" tasks initialise local mwmr fifos descriptors
     181    // each load task initialises local mwmr fifos descriptors
    166182    // ( width = 4 bytes / depth = number of analysis tasks )
    167     mwmr_init( fifo_l2a , data_l2a , 1 , ANALYSIS_TASKS );
    168     mwmr_init( fifo_a2s , data_a2s , 1 , ANALYSIS_TASKS );
    169     mwmr_init( fifo_s2l , data_s2l , 1 , ANALYSIS_TASKS );
     183    mwmr_init( fifo_l2a , data_l2a , 1 , analysis_tasks );
     184    mwmr_init( fifo_a2s , data_a2s , 1 , analysis_tasks );
     185    mwmr_init( fifo_s2l , data_s2l , 1 , analysis_tasks );
    170186
    171187   
    172     // all "load" tasks initialise local containers as empty in fifo_s2l
    173     for ( n = 0 ; n < ANALYSIS_TASKS ; n++ ) mwmr_write( fifo_s2l , &n , 1 );
    174 
    175     // each "load" task[x][y] signals mwmr fifos initialisation completion
     188    // each load task initialises local containers as empty in fifo_s2l
     189    for ( n = 0 ; n < analysis_tasks ; n++ ) mwmr_write( fifo_s2l , &n , 1 );
     190
     191    // each load task[x][y] signals mwmr fifos initialisation completion
    176192    // to other tasks in same cluster[x][y]
    177193    local_sync[x][y] = 1;
    178194
    179     // "load" task[0][0] displays status
     195    // load task[0][0] displays status
    180196    if ( (x==0) && (y==0) )
    181197    giet_shr_printf("\n*** Task load on P[%d,%d,%d] enters main loop at cycle %d\n"
     
    192208                    (unsigned int)fifo_s2l, (unsigned int)data_s2l,
    193209                    (unsigned int)cont[0],
    194                     X_SIZE, Y_SIZE, NB_PROCS_MAX );
     210                    x_size, y_size, nprocs );
    195211 
    196212    /////////////////////////////////////////////////////////////
    197     // All "load" tasks enter the main loop (on containers)
    198     unsigned int count = 0;     // loaded containers count
    199     unsigned int index;         // available container index
    200     unsigned int* temp;         // pointer on available container
     213    // All load tasks enter the main loop (on containers)
     214    unsigned int  count = 0;     // loaded containers count
     215    unsigned int  index;         // available container index
     216    unsigned int* temp;          // pointer on available container
    201217
    202218    while ( count < CONTAINERS_MAX )
    203219    {
    204         // get one empty count index from fifo_s2l
     220        // get one empty container index from fifo_s2l
    205221        mwmr_read( fifo_s2l , &index , 1 );
    206222        temp = cont[index];
    207223
    208         // get one count from  kernel rx_chbuf
     224        // get one container from  kernel rx_chbuf
    209225        giet_nic_rx_move( nic_rx_channel, temp );
    210226
     
    213229        unsigned int nwords   = temp[0] >> 16;
    214230
    215         if ( (x==X_SIZE-1) && (y==Y_SIZE-1) )
     231        if ( (x==0) && (y==0) )
    216232        giet_shr_printf("\n*** Task load on P[%d,%d,%d] get container %d at cycle %d"
    217233                        " : %d packets / %d words\n",
    218234                        x, y, l, count, giet_proctime(), npackets, nwords );
    219235
    220         // put the full count index to fifo_l2a
     236        // put the full container index to fifo_l2a
    221237        mwmr_write( fifo_l2a, &index , 1 );
    222238
     
    225241
    226242    // all "load" tasks synchronise before stats
    227     sbt_barrier_wait( &rx_barrier );
     243    sqt_barrier_wait( &rx_barrier );
    228244
    229245    // "load" task[0][0] stops the NIC_CMA RX transfer and displays stats
     
    244260//////////////////////////////////////////
    245261{
     262    // each "load" task get platform parameters
     263    unsigned int    x_size;                                             // number of clusters in row
     264    unsigned int    y_size;                     // number of clusters in a column
     265    unsigned int    nprocs;                     // number of processors per cluster
     266    giet_procs_number( &x_size, &y_size, &nprocs );
     267
    246268    // get processor identifiers
    247269    unsigned int    x;
     
    250272    giet_proc_xyp( &x, &y, &l );
    251273
    252 
    253274    // "store" task[0][0] initialises the barrier between all "store" tasks,
    254275    // allocates NIC & CMA TX channels, and starts the NIC_CMA TX transfer.
     
    256277    if ( (x==0) && (y==0) )
    257278    {
    258         giet_shr_printf("\n*** Task store on P[%d][%d][%d] starts at cycle %d\n",
    259                         x , y , l , giet_proctime() );
     279        giet_shr_printf("\n*** Task store on P[%d][%d][%d] starts at cycle %d\n"
     280                        "  x_size = %d / y_size = %d / nprocs = %d\n",
     281                        x , y , l , giet_proctime() , x_size, y_size, nprocs );
    260282 
    261         sbt_barrier_init( &tx_barrier , X_SIZE*Y_SIZE , 1 );
    262         nic_tx_channel = giet_nic_tx_alloc();
     283        sqt_barrier_init( &tx_barrier , x_size , y_size , 1 );
     284        nic_tx_channel = giet_nic_tx_alloc( x_size , y_size );
    263285        giet_nic_tx_start( nic_tx_channel );
    264286        store_sync = 1;
     
    272294    while ( local_sync[x][y] == 0 ) asm volatile ("nop");
    273295
    274     // all "store" tasks register pointers on working containers in local stack
     296    // each "store" tasks register pointers on working containers in local stack
    275297    unsigned int   n;
    276     unsigned int*  cont[ANALYSIS_TASKS];
    277     for ( n = 0 ; n < ANALYSIS_TASKS ; n++ )
     298    unsigned int   analysis_tasks = nprocs-2;
     299    unsigned int*  cont[NPROCS_MAX-2];
     300
     301    for ( n = 0 ; n < analysis_tasks ; n++ )
    278302    {
    279303        cont[n] = container[x][y][n];
     
    318342        unsigned int nwords   = temp[0] >> 16;
    319343
    320         if ( (x==X_SIZE-1) && (y==Y_SIZE-1) )
     344        if ( (x==0) && (y==0) )
    321345        giet_shr_printf("\n*** Task store on P[%d,%d,%d] get container %d at cycle %d"
    322346                        " : %d packets / %d words\n",
     
    330354
    331355    // all "store" tasks synchronise before result display
    332     sbt_barrier_wait( &tx_barrier );
     356    sqt_barrier_wait( &tx_barrier );
    333357
    334358    // "store" task[0,0] stops NIC_CMA TX transfer and displays results
     
    377401////////////////////////////////////////////
    378402{
     403    // each "load" task get platform parameters
     404    unsigned int    x_size;                                             // number of clusters in row
     405    unsigned int    y_size;                     // number of clusters in a column
     406    unsigned int    nprocs;                     // number of processors per cluster
     407    giet_procs_number( &x_size, &y_size, &nprocs );
     408
    379409    // get processor identifiers
    380410    unsigned int    x;
     
    385415    if ( (x==0) && (y==0) )
    386416    {
    387         giet_shr_printf("\n*** Task analyse on P[%d][%d][%d] starts at cycle %d\n",
    388                         x , y , l , giet_proctime() );
     417        giet_shr_printf("\n*** Task analyse on P[%d][%d][%d] starts at cycle %d\n"
     418                        "  x_size = %d / y_size = %d / nprocs = %d\n",
     419                        x , y , l , giet_proctime() , x_size, y_size, nprocs );
    389420    }
    390421 
     
    394425    // all "analyse" tasks register pointers on working containers in local stack
    395426    unsigned int   n;
    396     unsigned int*  cont[ANALYSIS_TASKS];
    397     for ( n = 0 ; n < ANALYSIS_TASKS ; n++ )
     427    unsigned int   analysis_tasks = nprocs-2;
     428    unsigned int*  cont[NPROCS_MAX-2];
     429    for ( n = 0 ; n < analysis_tasks ; n++ )
    398430    {
    399431        cont[n] = container[x][y][n];
     
    471503            unsigned int word2 = temp[first + 2];
    472504
     505#if VERBOSE_ANALYSE
    473506            unsigned long long dst = ((unsigned long long)(word1 & 0xFFFF0000)>>16) |
    474507                                     (((unsigned long long)word0)<<16);
    475508            unsigned long long src = ((unsigned long long)(word1 & 0x0000FFFF)<<32) |
    476509                                     ((unsigned long long)word2);
    477 #if VERBOSE_ANALYSE
    478510            if ( p < 10 )
    479511            {
Note: See TracChangeset for help on using the changeset viewer.