Changeset 173 for trunk/softs


Ignore:
Timestamp:
May 30, 2011, 10:37:56 AM (14 years ago)
Author:
alain
Message:

_barrier_wait(): separate count and lock variables

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/softs/giet_tsar/drivers.c

    r158 r173  
    128128////////////////////////////////////////////////////////////////////////////////////////
    129129
    130 in_unckdata int volatile    _barrier_initial_value[8] = { 0,0,0,0,0,0,0,0 };
    131 in_unckdata int volatile    _barrier_count[8]         = { 0,0,0,0,0,0,0,0 };
     130in_unckdata int volatile    _barrier_initial_value[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
     131in_unckdata int volatile    _barrier_count[16]         = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
     132in_unckdata int volatile    _barrier_lock[16]          = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
    132133
    133134////////////////////////////////////////////////////////////////////////////////////////
     
    179180//  _procid()
    180181// Access CP0 and returns processor ident
     182// No more than 1024 processors...
    181183////////////////////////////////////////////////////////////////////////////////////////
    182184in_drivers unsigned int _procid()
     
    184186    unsigned int ret;
    185187    asm volatile( "mfc0 %0, $15, 1": "=r"(ret) );
    186     return (ret & 0xFF);
     188    return (ret & 0x3FF);
    187189}
    188190////////////////////////////////////////////////////////////////////////////////////////
     
    359361    size_t              ntasks          = (size_t)&NB_TASKS;
    360362    size_t              nprocs          = (size_t)&NB_PROCS;
    361     size_t              nclusters       = (size_t)&NB_CLUSTERS;
    362     unsigned int        base            = (unsigned int)&seg_tty_base;
     363    size_t                  nclusters   = (size_t)&NB_CLUSTERS;
     364    unsigned int    base                = (unsigned int)&seg_tty_base;
    363365    unsigned int        increment       = _segment_increment(ntasks*TTY_SPAN*4);
    364366    size_t              pid             = _procid();
    365     size_t              tid             = _current_task_array[pid];
    366367    int                 nwritten        = 0;
     368    size_t                  tid;
    367369    int                 i;
    368370
    369     if( tid >= ntasks )                 return -1;
     371    if( ntasks == 0 )  tid = 0;
     372    else               tid = _current_task_array[pid];
     373
     374    if( tid >= ntasks )                         return -1;
    370375    if( pid >= nprocs*nclusters )       return -1;
    371376
     
    400405    size_t              nprocs          = (size_t)&NB_PROCS;
    401406    size_t              nclusters       = (size_t)&NB_CLUSTERS;
    402     unsigned int        base            = (unsigned int)&seg_tty_base;
     407    unsigned int    base                = (unsigned int)&seg_tty_base;
    403408    unsigned int        increment       = _segment_increment(ntasks*TTY_SPAN*4);
    404409    size_t              pid             = _procid();
    405     size_t              tid             = _current_task_array[pid];
    406 
    407     if( length != 1)                    return -1;
    408     if( pid >= nprocs*nclusters )       return -1;
    409     if( tid >= ntasks )                 return -1;
     410    size_t                  tid;
     411
     412    if( pid > 7 )   tid = 0;
     413    else            tid = _current_task_array[pid];
     414
     415    if( length != 1)                        return -1;
     416    if( pid >= nprocs*nclusters )   return -1;
     417    if( tid >= ntasks )                     return -1;
    410418   
    411419    tty_address = (char*)(base + increment + tid*TTY_SPAN*4);
     
    436444{
    437445    int     pid         = _procid();
    438     int     tid         = _current_task_array[pid];
    439446    int     ntasks      = (int)&NB_TASKS;
    440447    int     nprocs      = (int)&NB_PROCS;
    441448    int     nclusters   = (int)&NB_CLUSTERS;
    442449    int     tty_index;
    443 
     450    int     tid;
     451
     452    if( pid > 7 )   tid = 0;
     453    else            tid = _current_task_array[pid];
     454 
    444455    if( length != 1)                    return -1;
    445456    if( pid >= nprocs*nclusters )       return -1;
     
    858869// _barrier_init()
    859870// This function makes a cooperative initialisation of the barrier:
    860 // Several tasks can try to initialize the barrier, but the initialisation
     871// - barrier_count[index] <= N
     872// - barrier_lock[index]  <= 0
     873// All tasks try to initialize the barrier, but the initialisation
    861874// is done by only one task, using LL/SC instructions.
     875// This cooperative initialisation is questionnable,
     876// bcause the barrier can ony be initialised once...
    862877//////////////////////////////////////////////////////////////////////////////////////
    863878in_drivers int _barrier_init(unsigned int index, unsigned int value)
     
    866881    register int* pinit         = (int*)&_barrier_initial_value[index];
    867882    register int* pcount        = (int*)&_barrier_count[index];
     883    register int* plock         = (int*)&_barrier_lock[index];
    868884
    869885    if ( index > 7 )    return 1;
     
    871887    // parallel initialisation using atomic instructions LL/SC
    872888    asm volatile ("_barrier_init_test:                  \n"
    873                   "ll   $2,     0(%0)                   \n"     // read initial value
     889                  "ll   $2,     0(%0)                   \n"     // read barrier_inital_value
    874890                  "bnez $2,     _barrier_init_done      \n"
    875                   "move $3,     %2                      \n"
    876                   "sc   $3,     0(%0)                   \n"     // try to write initial value
     891                  "move $3,     %3                              \n"
     892                  "sc   $3,     0(%0)                   \n"     // try to write barrier_initial_value
    877893                  "beqz $3,     _barrier_init_test      \n"
    878                   "move $3,     %2                      \n"
    879                   "sw   $3,     0(%1)                   \n"     // write count
     894                  "move $3,     %3                                  \n"
     895                  "sw   $3,     0(%1)                           \n"     // barrier_count <= barrier_initial_value
     896                  "move $3, $0                      \n" //
     897                  "sw   $3,     0(%2)                           \n"     // barrier_lock <= 0
    880898                  "_barrier_init_done:                  \n"
    881                   ::"r"(pinit),"r"(pcount),"r"(value):"$2","$3");
     899                  ::"r"(pinit),"r"(pcount),"r"(plock),"r"(value):"$2","$3");
    882900    return 0 ;
    883901}
    884902//////////////////////////////////////////////////////////////////////////////////////
    885903//      _barrier_wait()
    886 // This blocking function uses a busy_wait technics (on the counter value),
     904// This blocking function uses a busy_wait technics (on the barrier_lock value),
    887905// because the GIET does not support dynamic scheduling/descheduling of tasks.
    888 // In the busy waiting state, each task uses a pseudo-random delay between
    889 // two successive read of the barrier counter in order to avoid bus saturation.
    890 // the average delay is about 1000 cycles.
    891 // There is at most 8 independant barriers, and an error is returned
    892 // if the barrier index is larger than 7.
     906// The barrier state is actually defined by two variables:
     907// _barrier_count[index] define the number of particpants that are waiting
     908// _barrier_lock[index] define the bool variable whose value is polled
     909// The last participant change the value of _barrier_lock[index] to release the barrier...
     910// There is at most 16 independant barriers, and an error is returned
     911// if the barrier index is larger than 15.
    893912//////////////////////////////////////////////////////////////////////////////////////
    894913in_drivers int _barrier_wait(unsigned int index)
    895914{
    896915    register int*       pcount          = (int*)&_barrier_count[index];         
    897     register int        maxcount        = _barrier_initial_value[index];
    898916    register int        count;
    899917
    900     if ( index > 7 )    return 1;
    901 
    902     // parallel decrement barrier counter using atomic instructions LL/SC
    903     // input : pointer on the barrier counter
    904     // output : counter value
     918    int                lock             = _barrier_lock[index];         
     919
     920    if ( index > 15 )   return 1;
     921   
     922    // parallel decrement _barrier_count[index] using atomic instructions LL/SC
     923    // input : pointer on _barrier_count[index]
     924    // output : count = _barrier_count[index] (before decrementation)
    905925    asm volatile ("_barrier_decrement:                          \n"
    906926                  "ll   %0,     0(%1)                           \n"
     
    910930                  :"=r"(count):"r"(pcount):"$2","$3");
    911931
    912     // the last task re-initializes the barrier counter
    913     // to the max value, waking up all other waiting tasks
     932    // the last task re-initializes the barrier_ count variable
     933    // and the barrier_lock variable, waking up all other waiting tasks
    914934
    915935    if ( count == 1 )    // last task
    916936    {
    917         *pcount = maxcount;
    918         return 0;
     937        _barrier_count[index] = _barrier_initial_value[index];
     938        asm volatile( "sync" );
     939        _barrier_lock[index]   = (lock == 0) ? 1 : 0;
     940        return 0 ;
    919941    }
    920942    else                // other tasks
    921943    {
    922         while ( *pcount != maxcount )   { }     // busy waiting
     944        while ( lock == _barrier_lock[index] )  { }     // busy waiting
    923945        return 0 ;
    924946    }
Note: See TracChangeset for help on using the changeset viewer.