Changeset 473 for soft


Ignore:
Timestamp:
Dec 12, 2014, 5:44:59 PM (10 years ago)
Author:
alain
Message:

Improving the classif application:
We use now the distributed heap, and the user malloc()
to implement the distributed MWMR buffers and the
distributed MWMR descriptors.

Location:
soft/giet_vm/applications/classif
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/classif/classif.ld

    r457 r473  
    11/****************************************************************************
    2 * Definition of the base address for all virtual segments
     2* Definition of the base address for code and data vsegs
    33*****************************************************************************/
    44
    55seg_code_base        = 0x10000000;
    6 
    7 seg_data_0_0_base    = 0x20000000;
    8 seg_data_0_1_base    = 0x20010000;
    9 seg_data_0_2_base    = 0x20020000;
    10 seg_data_0_3_base    = 0x20030000;
    11 
    12 seg_data_1_0_base    = 0x20040000;
    13 seg_data_1_1_base    = 0x20050000;
    14 seg_data_1_2_base    = 0x20060000;
    15 seg_data_1_3_base    = 0x20070000;
    16 
    17 seg_data_2_0_base    = 0x20080000;
    18 seg_data_2_1_base    = 0x20090000;
    19 seg_data_2_2_base    = 0x200A0000;
    20 seg_data_2_3_base    = 0x200B0000;
    21 
    22 seg_data_3_0_base    = 0x200C0000;
    23 seg_data_3_1_base    = 0x200D0000;
    24 seg_data_3_2_base    = 0x200E0000;
    25 seg_data_3_3_base    = 0x200F0000;
     6seg_data_base        = 0x20000000;
    267
    278/***************************************************************************
     
    3718    }
    3819
    39     . = seg_data_0_0_base;
    40     seg_data_0_0 :
     20    . = seg_data_base;
     21    seg_data :
    4122    {
    4223        *(.ctors)
     
    5132        *(.sbss)
    5233        *(.scommon)
    53         *(.data_0_0)
    54     }
    55     . = seg_data_0_1_base;
    56     seg_data_0_1 :
    57     {
    58         *(.data_0_1)
    59     }
    60     . = seg_data_0_2_base;
    61     seg_data_0_2 :
    62     {
    63         *(.data_0_2)
    64     }
    65     . = seg_data_0_3_base;
    66     seg_data_0_3 :
    67     {
    68         *(.data_0_3)
    69     }
    70 
    71     . = seg_data_1_0_base;
    72     seg_data_1_0 :
    73     {
    74         *(.data_1_0)
    75     }
    76     . = seg_data_1_1_base;
    77     seg_data_1_1 :
    78     {
    79         *(.data_1_1)
    80     }
    81     . = seg_data_1_2_base;
    82     seg_data_1_2 :
    83     {
    84         *(.data_1_2)
    85     }
    86     . = seg_data_1_3_base;
    87     seg_data_1_3 :
    88     {
    89         *(.data_1_3)
    90     }
    91 
    92     . = seg_data_2_0_base;
    93     seg_data_2_0 :
    94     {
    95         *(.data_2_0)
    96     }
    97     . = seg_data_2_1_base;
    98     seg_data_2_1 :
    99     {
    100         *(.data_2_1)
    101     }
    102     . = seg_data_2_2_base;
    103     seg_data_2_2 :
    104     {
    105         *(.data_2_2)
    106     }
    107     . = seg_data_2_3_base;
    108     seg_data_2_3 :
    109     {
    110         *(.data_2_3)
    111     }
    112 
    113     . = seg_data_3_0_base;
    114     seg_data_3_0 :
    115     {
    116         *(.data_3_0)
    117     }
    118     . = seg_data_3_1_base;
    119     seg_data_3_1 :
    120     {
    121         *(.data_3_1)
    122     }
    123     . = seg_data_3_2_base;
    124     seg_data_3_2 :
    125     {
    126         *(.data_3_2)
    127     }
    128     . = seg_data_3_3_base;
    129     seg_data_3_3 :
    130     {
    131         *(.data_3_3)
    13234    }
    13335}
  • soft/giet_vm/applications/classif/classif.py

    r457 r473  
    1515#  The mapping of virtual segments on the clusters is the following:
    1616#    - The code vsegs are replicated on all clusters.
    17 #    - There is one shared data vseg per cluster.
     17#    - There is one shared data vseg in cluster[0][0]
     18#    - There is one heap vseg per cluster.
    1819#    - The stacks vsegs are distibuted on all clusters.
    1920#  This mapping uses 5 platform parameters, (obtained from the "mapping" argument)
     
    3738    y_width   = mapping.y_width
    3839
    39     assert (x_size <= 4) and (y_size <= 4)
    4040    assert (nprocs >= 2)
    4141
     
    4545   
    4646    data_base  = 0x20000000
    47     data_size  = 0x00010000     # 64 Kbytes (per cluster)
     47    data_size  = 0x00010000     # 64 Kbytes
     48
     49    heap_base  = 0x30000000
     50    heap_size  = 0x00008000     # 32 Kbytes (per cluster)     
    4851
    4952    stack_base = 0x40000000
     
    5154
    5255    # create vspace
    53     vspace = mapping.addVspace( name = 'classif', startname = 'classif_data_0_0' )
     56    vspace = mapping.addVspace( name = 'classif', startname = 'classif_data' )
    5457   
    55     # data_x_y vsegs : shared / one per cluster
     58    # data vseg : shared / cluster[0][0]
     59    mapping.addVseg( vspace, 'classif_data', data_base , data_size,
     60                     'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM',
     61                     binpath = 'build/classif/classif.elf',
     62                     local = False )
     63
     64    # heap_x_y vsegs : shared / one per cluster
    5665    for x in xrange (x_size):
    5766        for y in xrange (y_size):
    58             base = data_base + ( (4*x + y) * data_size )
     67            base = heap_base + ( (4*x + y) * heap_size )
    5968
    60             mapping.addVseg( vspace, 'classif_data_%d_%d' %(x,y), base , data_size,
    61                              'C_WU', vtype = 'ELF', x = x, y = y, pseg = 'RAM',
    62                              binpath = 'build/classif/classif.elf',
     69            mapping.addVseg( vspace, 'classif_heap_%d_%d' %(x,y), base , heap_size,
     70                             'C_WU', vtype = 'HEAP', x = x, y = y, pseg = 'RAM',
    6371                             local = False )
    6472
     
    94102                else :                                      # task analyse
    95103                    task_index = 1
    96                     task_name  = 'analyse_%d_%d_%d' % (x,y,p
    97 )
     104                    task_name  = 'analyse_%d_%d_%d' % (x,y,p)
     105
    98106                mapping.addTask( vspace, task_name, trdid, x, y, p,
    99                                  'classif_stack_%d_%d_%d' % (x,y,p), '' , task_index )
     107                                 'classif_stack_%d_%d_%d' % (x,y,p),
     108                                 'classif_heap_%d_%d' % (x,y),
     109                                 task_index )
    100110
    101111    # extend mapping name
  • soft/giet_vm/applications/classif/main.c

    r457 r473  
    1717// as global variables distributed in (up to) 16 clusters.
    1818//
    19 // WARNING: the platform cannot contain more than 16 clusters: (X_SIZE < 4) && (Y_SIZE < 4)
    20 //
    21 // 1) The "load" task transfer one container from the kernel chbuf associated to the
    22 //    NIC_RX channel, to a private buffer. Then it copies this bufferer to the local MWMR fifo.
     19// Initialisation is done in two steps by the "load" tasks:
     20// - Task "load" in cluster[0][0] initialises NIC & CMA channel, and initialises
     21//   the barrier between all "load" tasks. Other "load" tasks are waiting on the
     22//   global_sync synchronisation variable.
     23// - In each cluster[x][y], the load" task allocates the MWMR fifo descriptor & the data
     24//   buffer in the local heap, and store the pointers on a global array of pointers.
     25//   The "analyse" tasks are waiting on the sync[x][y] variables.
     26//     
     27// When initialisation is completed, all "load and "analyse" tasks loop on containers:
     28// 1) The "load" task transfer containers from the kernel chbuf associated to the
     29//    NIC_RX channel (in cluster[0][0]), to the local MWMR fifo (in cluster[x][y]),
     30//    after an in termediate copy in a private stack buffer.
    2331//    Each "load" task loads CONTAINERS_MAX containers before exit, and the
    2432//    task in cluster[0,0] displays the results stored in global counters filled
    25 //    by the "analyse" tasks.
     33//    by the "analyse" tasks when all "load" tasks reach the barrier.
    2634//
    2735// 2) The "analyse" task transfer one container from the local MWMR fifo to a private
     
    4755#define CONTAINERS_MAX   10
    4856
    49 ///////////  distributed data /////////////////////////////////////////////////////////////
    50 // - fifo_x_y is the local MWMR fifo descriptor
    51 // - data_x_y is the local MWMR fifo data buffer
    52 // - sync_x_y is the local variable signaling MWMR fifo initialisation
    53 ///////////  distributed data /////////////////////////////////////////////////////////////
    54 
    55 #if ( (X_SIZE > 0) && (Y_SIZE > 0) )
    56 __attribute__((section (".data_0_0")))  mwmr_channel_t fifo_0_0;
    57 __attribute__((section (".data_0_0")))  unsigned int   data_0_0[NB_PROCS_MAX<<10];
    58 __attribute__((section (".data_0_0")))  unsigned int   sync_0_0 = 0;
    59 #endif
    60 #if ( (X_SIZE > 0) && (Y_SIZE > 1) )
    61 __attribute__((section (".data_0_1")))  mwmr_channel_t fifo_0_1;
    62 __attribute__((section (".data_0_1")))  unsigned int   data_0_1[NB_PROCS_MAX<<10];
    63 __attribute__((section (".data_0_1")))  unsigned int   sync_0_1 = 0;
    64 #endif
    65 #if ( (X_SIZE > 0) && (Y_SIZE > 2) )
    66 __attribute__((section (".data_0_2")))  mwmr_channel_t fifo_0_2;
    67 __attribute__((section (".data_0_2")))  unsigned int   data_0_2[NB_PROCS_MAX<<10];
    68 __attribute__((section (".data_0_2")))  unsigned int   sync_0_2 = 0;
    69 #endif
    70 #if ( (X_SIZE > 0) && (Y_SIZE > 3) )
    71 __attribute__((section (".data_0_3")))  mwmr_channel_t fifo_0_3;
    72 __attribute__((section (".data_0_3")))  unsigned int   data_0_3[NB_PROCS_MAX<<10];
    73 __attribute__((section (".data_0_3")))  unsigned int   sync_0_3 = 0;
    74 #endif
    75 #if ( (X_SIZE > 1) && (Y_SIZE > 0) )
    76 __attribute__((section (".data_1_0")))  mwmr_channel_t fifo_1_0;
    77 __attribute__((section (".data_1_0")))  unsigned int   data_1_0[NB_PROCS_MAX<<10];
    78 __attribute__((section (".data_1_0")))  unsigned int   sync_1_0 = 0;
    79 #endif
    80 #if ( (X_SIZE > 1) && (Y_SIZE > 1) )
    81 __attribute__((section (".data_1_1")))  mwmr_channel_t fifo_1_1;
    82 __attribute__((section (".data_1_1")))  unsigned int   data_1_1[NB_PROCS_MAX<<10];
    83 __attribute__((section (".data_1_1")))  unsigned int   sync_1_1 = 0;
    84 #endif
    85 #if ( (X_SIZE > 1) && (Y_SIZE > 2) )
    86 __attribute__((section (".data_1_2")))  mwmr_channel_t fifo_1_2;
    87 __attribute__((section (".data_1_2")))  unsigned int   data_1_2[NB_PROCS_MAX<<10];
    88 __attribute__((section (".data_1_2")))  unsigned int   sync_1_2 = 0;
    89 #endif
    90 #if ( (X_SIZE > 1) && (Y_SIZE > 3) )
    91 __attribute__((section (".data_1_3")))  mwmr_channel_t fifo_1_3;
    92 __attribute__((section (".data_1_3")))  unsigned int   data_1_3[NB_PROCS_MAX<<10];
    93 __attribute__((section (".data_1_3")))  unsigned int   sync_1_3 = 0;
    94 #endif
    95 #if ( (X_SIZE > 2) && (Y_SIZE > 0) )
    96 __attribute__((section (".data_2_0")))  mwmr_channel_t fifo_2_0;
    97 __attribute__((section (".data_2_0")))  unsigned int   data_2_0[NB_PROCS_MAX<<10];
    98 __attribute__((section (".data_2_0")))  unsigned int   sync_2_0 = 0;
    99 #endif
    100 #if ( (X_SIZE > 2) && (Y_SIZE > 1) )
    101 __attribute__((section (".data_2_1")))  mwmr_channel_t fifo_2_1;
    102 __attribute__((section (".data_2_1")))  unsigned int   data_2_1[NB_PROCS_MAX<<10];
    103 __attribute__((section (".data_2_1")))  unsigned int   sync_2_1 = 0;
    104 #endif
    105 #if ( (X_SIZE > 2) && (Y_SIZE > 2) )
    106 __attribute__((section (".data_2_2")))  mwmr_channel_t fifo_2_2;
    107 __attribute__((section (".data_2_2")))  unsigned int   data_2_2[NB_PROCS_MAX<<10];
    108 __attribute__((section (".data_2_2")))  unsigned int   sync_2_2 = 0;
    109 #endif
    110 #if ( (X_SIZE > 2) && (Y_SIZE > 3) )
    111 __attribute__((section (".data_2_3")))  mwmr_channel_t fifo_2_3;
    112 __attribute__((section (".data_2_3")))  unsigned int   data_2_3[NB_PROCS_MAX<<10];
    113 __attribute__((section (".data_2_3")))  unsigned int   sync_2_3 = 0;
    114 #endif
    115 #if ( (X_SIZE > 3) && (Y_SIZE > 0) )
    116 __attribute__((section (".data_3_0")))  mwmr_channel_t fifo_3_0;
    117 __attribute__((section (".data_3_0")))  unsigned int   data_3_0[NB_PROCS_MAX<<10];
    118 __attribute__((section (".data_3_0")))  unsigned int   sync_3_0 = 0;
    119 #endif
    120 #if ( (X_SIZE > 3) && (Y_SIZE > 1) )
    121 __attribute__((section (".data_3_1")))  mwmr_channel_t fifo_3_1;
    122 __attribute__((section (".data_3_1")))  unsigned int   data_3_1[NB_PROCS_MAX<<10];
    123 __attribute__((section (".data_3_1")))  unsigned int   sync_3_1 = 0;
    124 #endif
    125 #if ( (X_SIZE > 3) && (Y_SIZE > 2) )
    126 __attribute__((section (".data_3_2")))  mwmr_channel_t fifo_3_2;
    127 __attribute__((section (".data_3_2")))  unsigned int   data_3_2[NB_PROCS_MAX<<10];
    128 __attribute__((section (".data_3_2")))  unsigned int   sync_3_2 = 0;
    129 #endif
    130 #if ( (X_SIZE > 3) && (Y_SIZE > 3) )
    131 __attribute__((section (".data_3_3")))  mwmr_channel_t fifo_3_3;
    132 __attribute__((section (".data_3_3")))  unsigned int   data_3_3[NB_PROCS_MAX<<10];
    133 __attribute__((section (".data_3_3")))  unsigned int   sync_3_3 = 0;
    134 #endif
    135 
    136 /////////// shared variables in cluster[0,0] //////////////////////////
    137 
    138 __attribute__((section (".data_0_0")))  unsigned int count[16];
    139 
    140 __attribute__((section (".data_0_0")))  giet_barrier_t barrier;
    141 
    142 __attribute__((section (".data_0_0")))  unsigned int global_init_ok = 0;
    143 
    144 __attribute__((section (".data_0_0")))  unsigned int nic_channel;
    145 
    146 
    147 /////////////////////////////////////////
     57///////////////////////////////////////////////////////////////////////////////////////////
     58//    Global variables
     59// The communication channels are distributed in the clusters,
     60// but the pointers arrays are global variables in cluster[0][0]
     61///////////////////////////////////////////////////////////////////////////////////////////
     62
     63mwmr_channel_t*  mwmr[X_SIZE][Y_SIZE];        // distributed MWMR fifos pointers
     64
     65unsigned int     local_sync[X_SIZE][Y_SIZE];  // distributed synchros "load" / "analyse"
     66
     67unsigned int     global_sync = 0;             // global synchro between "load" tasks
     68
     69unsigned int     count[16];                   // instrumentation counters
     70
     71giet_barrier_t   barrier;                     // barrier between "load" (instrumentation)
     72
     73unsigned int     nic_channel;                 // allocated NIC channel index
     74
     75///////////////////////////////////////////////////////////////////////////////////////////
    14876__attribute__ ((constructor)) void load()
    149 /////////////////////////////////////////
     77///////////////////////////////////////////////////////////////////////////////////////////
    15078{
    15179    // get processor identifiers
     
    15886    if (Y_SIZE > 4 )  giet_exit("The Y_SIZE parameter cannot be larger than 4\n");
    15987
    160     // local buffer to store one container
     88    // local buffer to store one container in private stack
    16189    unsigned int  temp[1024];
    16290
    163     // get pointer on local MWMR fifo descriptor and data buffer
    164     unsigned int    offset = ((x * 4) + y) * 0x10000;
    165     mwmr_channel_t* fifo = (mwmr_channel_t*)(((unsigned int)&fifo_0_0) + offset);
    166     unsigned int*   data = (unsigned int*)  (((unsigned int)data_0_0)  + offset);
    167     unsigned int*   sync = (unsigned int*)  (((unsigned int)&sync_0_0) + offset);
    168 
     91// giet_shr_printf("\n@@@ P[%d,%d,%d] enters load task at cycle %d\n",
     92//                 x, y, l, giet_proctime() );
     93
     94    // allocates data buffer for MWMR fifo in local heap
     95    unsigned int*  data = malloc( NB_PROCS_MAX<<12 );
     96
     97//giet_shr_printf("\n@@@ P[%d,%d,%d] completes data malloc at cycle %d "
     98//                "/ &data = %x\n", x, y, l, giet_proctime(), (unsigned int)data );
     99
     100    // allocates MWMR fifo descriptor in local heap
     101    mwmr_channel_t*  fifo = malloc( sizeof(mwmr_channel_t) );
     102
     103//giet_shr_printf("\n@@@ P[%d,%d,%d] completes mwmr malloc at cycle %d "
     104//                "/ &mwmr = %x\n", x, y, l, giet_proctime(), (unsigned int)mwmr );
     105
     106    // makes copy of pointer in global array for "analyse" tasks
     107    mwmr[x][y] = fifo;
     108
     109    // display status for cluster[X_SIZE-1][Y_SIZE-1]
    169110    if ( (x==X_SIZE-1) && (y==Y_SIZE-1) )
    170111    giet_shr_printf("\n*** Task load starts on P[%d,%d,%d] at cycle %d\n"
    171                     "      &fifo = %x / &data = %x / &sync = %x\n",
    172                     x, y, l, giet_proctime(),
    173                     (unsigned int)fifo, (unsigned int)data, (unsigned int)sync );
     112                    "      &fifo  = %x / &data  = %x / &sync  = %x\n"
     113                    "      x_size = %d / y_size = %d / nprocs = %d\n",
     114                    x , y , l , giet_proctime() ,
     115                    (unsigned int)fifo , (unsigned int)data, (unsigned int)(&local_sync[x][y]) ,
     116                    X_SIZE, Y_SIZE, NB_PROCS_MAX );
    174117
    175118    // Task load on cluster[0,0] makes global initialisation:
     
    188131        barrier_init( &barrier, X_SIZE * Y_SIZE );
    189132
    190         global_init_ok = 1;
     133        // clear NIC RX channels stats
     134        giet_nic_rx_clear();
     135
     136        global_sync = 1;
    191137    }
    192138    else
    193139    {
    194         while ( global_init_ok == 0 ) asm volatile ("nop");
     140        while ( global_sync == 0 ) asm volatile ("nop");
    195141    }   
    196142
     
    198144    mwmr_init( fifo , data , 1024 , NB_PROCS_MAX );
    199145
    200     // signal MWMR fifo initialisation completion
    201     *sync = 1;
     146    // signal MWMR fifo initialisation completion to analyse tasks
     147    local_sync[x][y] = 1;
    202148
    203149    // main loop (on containers)
     
    226172    barrier_wait( &barrier );
    227173
    228     // Task load in cluster[0,0] displays counters and stops NIC / CMA transfer
     174    // Task load in cluster[0,0] stops NIC and displays results
    229175    if ( (x==0) && (y==0) )
    230176    {
     177        giet_nic_rx_stop();
     178
    231179        giet_shr_printf("\n@@@@ Clasification Results @@@\n"
    232180                        " - TYPE 0 : %d packets\n"
     
    246194                        " - TYPE E : %d packets\n"
    247195                        " - TYPE F : %d packets\n",
    248             count[0x0], count[0x1], count[0x2], count[0x3],
    249             count[0x4], count[0x5], count[0x6], count[0x7],
    250             count[0x8], count[0x9], count[0xA], count[0xB],
    251             count[0xC], count[0xD], count[0xE], count[0xF] );
    252 
    253         giet_nic_rx_stop();
     196                        count[0x0], count[0x1], count[0x2], count[0x3],
     197                        count[0x4], count[0x5], count[0x6], count[0x7],
     198                        count[0x8], count[0x9], count[0xA], count[0xB],
     199                        count[0xC], count[0xD], count[0xE], count[0xF] );
    254200
    255201        giet_nic_rx_stats();
     
    277223    unsigned int  temp[1024];
    278224
     225    // wait MWMR channel initialisation (done by task load)
     226    while ( local_sync[x][y] == 0 ) asm volatile ("nop");
     227
    279228    // get pointer on MWMR channel descriptor
    280     unsigned int    offset = ((x * 4) + y) * 0x10000;
    281     mwmr_channel_t* fifo = (mwmr_channel_t*)(((unsigned int)&fifo_0_0) + offset);
    282     unsigned int*   sync = (unsigned int*)  (((unsigned int)&sync_0_0) + offset);
    283 
     229    mwmr_channel_t* fifo = mwmr[x][y];
     230
     231    // display status for cluster[X_SIZE-1][Y_SIZE-1]
    284232    if ( (x==X_SIZE-1) && (y==Y_SIZE-1) )
    285233    giet_shr_printf("\n*** Task analyse starts on P[%d,%d,%d] at cycle %d\n"
    286234                    "       &fifo = %x / &sync = %x\n",
    287235                    x, y, l, giet_proctime(),
    288                     (unsigned int)fifo, (unsigned int)sync );
     236                    (unsigned int)fifo, (unsigned int)(&local_sync[x][y]) );
    289237   
    290     // wait MWMR channel initialisation (done by task load)
    291     while ( *sync == 0 ) asm volatile ("nop");
    292 
    293     // infinite loop (on containers)
     238    // main loop (on containers)
    294239    unsigned int nwords;     // number of words in container
    295240    unsigned int npackets;   // number of packets in container
Note: See TracChangeset for help on using the changeset viewer.