Ignore:
Timestamp:
Feb 8, 2015, 9:20:45 PM (10 years ago)
Author:
alain
Message:

1) Introduce distributed barriers in the multi-threads applications
(classif) transpose, convol, sort, gameoflife)

2) Introducing support for architectures containing empty clusters
in the mapping of these multi-threaded applications.

3) Removing the "command line arguments" in the sort application
(replaced by the giet_procs_number() system call.

Location:
soft/giet_vm/applications/sort
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/applications/sort/main.c

    r432 r502  
    99//      barrier routines to apply a sort algorithm in several stages.
    1010//
    11 //      Considerations :
    12 //
    13 //          - It supports up to 256 processors and the number of processors
    14 //            must be a power of 2.
    15 //
    16 //          - If there is only one TTY available, this application uses a spin
    17 //            lock to avoid several threads writting at the same time.
    18 //
    19 //          - This application must be executed on a cache coherent
    20 //            architecture. Otherwise some modifications must be applied
    21 //
    22 //          - The processors executing this application must have a contiguous
    23 //            processor id and the first processor must have id 0.
     11//      Constraints :
     12//
     13//      - It supports up to 1024 processors and the number of processors
     14//        must be a power of 2.
     15//
     16//      _ The array of values to be sorted (ARRAY_LENGTH) must be power of 2
     17//        larger than the number of processors.
     18//
     19//      - This application must be executed on a cache coherent architecture.
    2420//
    2521///////////////////////////////////////////////////////////////////////////////
     
    2824#include "mapping_info.h"
    2925#include "hard_config.h"
    30 #include "barrier.h"
    31 
    32 #define ARRAY_LENGTH    512
    33 #define IPT             (ARRAY_LENGTH / *nb_thread) // ITEMS PER THREAD
     26#include "user_barrier.h"
     27
     28#define ARRAY_LENGTH    4096
     29#define IPT             (ARRAY_LENGTH / threads) // ITEMS PER THREAD
    3430
    3531////////////////////////////////////////////////////////////////////////////////
    36 // Processors other than 0 display algorithm state
    37 // The processor 0 always displays some information so this does not affect him
     32// Processors other than 0 display algorithm state if VERBOSE non zero
    3833
    3934#define VERBOSE         1
    4035
    4136////////////////////////////////////////////////////////////////////////////////
    42 // Define printf according to verbosity option and number of available
    43 // TTY
     37// Define printf according to verbosity option and number of available TTY
    4438
    4539#if (VERBOSE == 1)
     
    5044
    5145#define task0_printf(...) if(thread_id == 0) giet_shr_printf(__VA_ARGS__)
    52 
    53 #define exit    giet_exit
    54 #define procid  giet_procid
    55 #define rand    giet_rand
    5646
    5747int array0[ARRAY_LENGTH];
     
    7363        int init_pos_result);
    7464
    75 ///////////////////////////////////////////////////
    76 // This application support at most 256 processors
    77 // Number of barriers = log2(nb_thread)
    78 
    79 giet_barrier_t barrier[8];
     65///////////////////////////////////////////////////////
     66// This application supports at most 1024 processors
     67// Number of barriers = log2(threads)
     68
     69giet_barrier_t barrier[10];
    8070
    8171//////////////////////////////////////////
     
    8373{
    8474    int thread_id = giet_thread_id();
    85     unsigned int* nb_thread;
    8675    int * src_array = NULL;
    8776    int * dst_array = NULL;
     
    9180    unsigned int time_end;   
    9281
    93     giet_vobj_get_vbase( "sort" ,
    94                          "sort_args",
    95                          (unsigned int*)&nb_thread );
    96    
    97     task0_printf("\n[ Thread 0 ] Starting sort application with %u threads "
    98                  "at cycle %u\n", *nb_thread, time_start);
     82    // compute number of threads (one thread per proc)
     83    unsigned int x_size;
     84    unsigned int y_size;
     85    unsigned int nprocs;
     86    unsigned int threads;
     87    giet_procs_number( &x_size , &y_size , &nprocs );
     88    threads = x_size * y_size * nprocs;
     89
     90    if ( (threads != 1)   && (threads != 2)   && (threads != 4)   &&
     91         (threads != 8)   && (threads != 16 ) && (threads != 32)  &&
     92         (threads != 64)  && (threads != 128) && (threads != 256) &&
     93         (threads != 512) && (threads != 1024) )
     94    {
     95        task0_printf("[SORT ERROR] Number of processors must be power of 2\n"
     96                     "  x_size = %d / y_size = %d / nprocs = %d\n",
     97                     x_size , y_size , nprocs );
     98        giet_exit("error");
     99    }
     100
     101    task0_printf("\n[ Thread 0 ] Starting sort application with %d threads "
     102                 "at cycle %d\n", threads, time_start);
    99103
    100104    ///////////////////////////
     
    103107    if (thread_id == 0)
    104108    {
    105         for (i = 0; i < __builtin_ctz(*nb_thread); i++)
    106         {
    107             barrier_init(&barrier[i], *nb_thread >> i);
     109        for (i = 0; i < __builtin_ctz( threads ); i++)
     110        {
     111            barrier_init(&barrier[i], threads >> i);
    108112        }
    109113
     
    120124    for (i = IPT * thread_id; i < IPT * (thread_id + 1); i++)
    121125    {
    122         array0[i] = rand();
     126        array0[i] = giet_rand();
    123127    }
    124128
     
    132136    printf("[ Thread %d ] Finishing Stage 0\n\r", thread_id);
    133137
    134     for (i = 0; i < __builtin_ctz(*nb_thread); i++)
     138    for (i = 0; i < __builtin_ctz( threads ); i++)
    135139    {
    136140        barrier_wait(&barrier[i]);
     
    139143        {
    140144            printf("[ Thread %d ] Quit\n\r", thread_id );
    141             exit("Completed");
     145            giet_exit("Completed");
    142146        }
    143147
     
    173177    if(thread_id != 0)
    174178    {
    175         exit("error: only thread 0 should get here");
     179        giet_exit("error: only thread 0 should get here");
    176180    }
    177181
     
    196200    if (success)
    197201    {
    198         exit("!!! Success !!!");
     202        giet_exit("!!! Success !!!");
    199203    }
    200204    else
     
    206210            printf("array[%d] = %d\n", i, dst_array[i]);
    207211        }
    208         exit("!!!  Failure !!!");
    209     }
    210 
    211     exit("Completed");
     212        giet_exit("!!!  Failure !!!");
     213    }
     214
     215    giet_exit("Completed");
    212216}
    213217
  • soft/giet_vm/applications/sort/sort.py

    r434 r502  
    3333    # define vsegs base & size
    3434    code_base  = 0x10000000
    35     code_size  = 0x00200000     # 2 Mbytes (replicated in each cluster)
     35    code_size  = 0x00010000     # 64 Kbytes (replicated in each cluster)
    3636
    3737    data_base  = 0x20000000
    38     data_size  = 0x00100000     # 1 Mbyte (non replicated)
    39 
    40     args_base  = 0x20100000
    41     args_size  = 0x00000004     # 4 bytes (non replicated)
     38    data_size  = 0x00010000     # 64 Kbyte (non replicated)
    4239
    4340    stack_base = 0x40000000
     
    5451                     'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM',
    5552                     binpath = 'build/sort/sort.elf',
    56                      local = False, big = True )
    57 
    58     # args vseg : non local (only in cluster[0,0])
    59     mapping.addVseg( vspace, 'sort_args', args_base , args_size,
    60                      'C_WU', vtype = 'CONST', x = 0, y = 0, pseg = 'RAM',
    61                      init = ntasks,
    62                      local = False, big = True )
     53                     local = False )
    6354
    6455    # code vsegs : local (one copy per cluster)
    6556    for x in xrange (x_size):
    6657        for y in xrange (y_size):
    67             mapping.addVseg( vspace, 'sort_code', code_base , code_size,
    68                              'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM',
    69                              binpath = 'build/sort/sort.elf',
    70                              local = True, big = True )
     58            cluster_id = (x * y_size) + y
     59            if ( mapping.clusters[cluster_id].procs ):
     60
     61                mapping.addVseg( vspace, 'sort_code', code_base , code_size,
     62                                 'CXWU', vtype = 'ELF', x = x, y = y, pseg = 'RAM',
     63                                 binpath = 'build/sort/sort.elf',
     64                                 local = True )
    7165
    7266    # stacks vsegs : local (one stack per task)
    7367    for x in xrange (x_size):
    7468        for y in xrange (y_size):
    75             for p in xrange (nprocs):
    76                 proc_id = (((x * y_size) + y) * nprocs) + p
    77                 size    = stack_size / nprocs
    78                 base    = stack_base + (proc_id * size)
    79                 mapping.addVseg( vspace, 'sort_stack_%d_%d_%d' % (x,y,p), base, size,
     69            cluster_id = (x * y_size) + y
     70            if ( mapping.clusters[cluster_id].procs ):
     71                for p in xrange (nprocs):
     72                    proc_id = (((x * y_size) + y) * nprocs) + p
     73                    size    = stack_size / nprocs
     74                    base    = stack_base + (proc_id * size)
     75
     76                    mapping.addVseg( vspace, 'sort_stack_%d_%d_%d' % (x,y,p),
     77                                     base, size, 'C_WU', vtype = 'BUFFER',
     78                                     x = x, y = y, pseg = 'RAM',
     79                                     local = True, big = True )
     80
     81    # heap vsegs : distributed but non local (all tasks can access all heap vsegs)
     82    for x in xrange (x_size):
     83        for y in xrange (y_size):
     84            cluster_id = (x * y_size) + y
     85            if ( mapping.clusters[cluster_id].procs ):
     86                size       = heap_size
     87                base       = heap_base + (cluster_id * size)
     88
     89                mapping.addVseg( vspace, 'sort_heap_%d_%d' % (x,y), base, size,
    8090                                 'C_WU', vtype = 'BUFFER', x = x, y = y, pseg = 'RAM',
    81                                  local = True, big = True )
    82 
    83             # heap vsegs : distributed but non local (all tasks can access all heap vsegs)
    84             cluster_id = (x * y_size) + y
    85             size       = heap_size
    86             base       = heap_base + (cluster_id * size)
    87             mapping.addVseg( vspace, 'sort_heap_%d_%d' % (x,y), base, size,
    88                              'C_WU', vtype = 'BUFFER', x = x, y = y, pseg = 'RAM',
    89                              local = False, big = True )
     91                                 local = False, big = True )
    9092
    9193    # distributed tasks / one task per processor
    9294    for x in xrange (x_size):
    9395        for y in xrange (y_size):
    94             for p in xrange( nprocs ):
    95                 trdid = (((x * y_size) + y) * nprocs) + p
    96                 mapping.addTask( vspace, 'sort_%d_%d_%d' % (x,y,p), trdid, x, y, p,
    97                                  'sort_stack_%d_%d_%d' % (x,y,p),
    98                                  'sort_heap_%d_%d' % (x,y), 0 )
     96            cluster_id = (x * y_size) + y
     97            if ( mapping.clusters[cluster_id].procs ):
     98                for p in xrange( nprocs ):
     99                    trdid = (((x * y_size) + y) * nprocs) + p
     100
     101                    mapping.addTask( vspace, 'sort_%d_%d_%d' % (x,y,p),
     102                                     trdid, x, y, p,
     103                                     'sort_stack_%d_%d_%d' % (x,y,p),
     104                                     'sort_heap_%d_%d' % (x,y), 0 )
    99105
    100106    # extend mapping name
Note: See TracChangeset for help on using the changeset viewer.