Ignore:
Timestamp:
Feb 8, 2015, 12:55:35 PM (10 years ago)
Author:
alain
Message:

Introduce quad tree for distributed locks and barriers.

File:
1 moved

Legend:

Unmodified
Added
Removed
  • soft/giet_vm/giet_common/kernel_locks.c

    r466 r495  
    11///////////////////////////////////////////////////////////////////////////////////
    2 // File     : locks.c
     2// File     : kernel_locks.c
    33// Date     : 01/12/2014
    44// Author   : alain greiner
     
    66///////////////////////////////////////////////////////////////////////////////////
    77
    8 #include "locks.h"
     8#include "kernel_locks.h"
    99#include "giet_config.h"
    1010#include "hard_config.h"
     
    1212#include "tty0.h"
    1313#include "kernel_malloc.h"
     14#include "io.h"
    1415
    1516///////////////////////////////////////////////////
    1617unsigned int _atomic_increment( unsigned int* ptr,
    17                                 unsigned int  increment )
     18                                int           increment )
    1819{
    1920    unsigned int value;
     
    3536}
    3637
     38
    3739///////////////////////////////////////////////////////////////////////////////////
    3840//      Simple lock access functions
     
    5355
    5456    asm volatile ( "1515:                   \n"
    55                        "lw   $2,    0(%0)       \n"   /* $2 <= lock current value         */
    56                        "bnez $2,    1515b       \n"   /* retry if lock already taken      */
    57                    "ll   $2,    0(%0)       \n"   /* ll_buffer <= lock current value  */
    58                    "bnez $2,    1515b       \n"   /* retry if lock already taken      */
    59                    "li   $3,    1           \n"   /* $3 <= argument for sc            */
    60                    "sc   $3,    0(%0)       \n"   /* try to set lock                  */
    61                    "beqz $3,    1515b       \n"   /* retry if sc failure              */
     57                       "lw   $2,    0(%0)       \n"
     58                       "bnez $2,    1515b       \n"
     59                   "ll   $2,    0(%0)       \n"
     60                   "bnez $2,    1515b       \n"
     61                   "li   $3,    1           \n"
     62                   "sc   $3,    0(%0)       \n"
     63                   "beqz $3,    1515b       \n"
    6264                   :
    6365                   : "r"(lock)
     
    7476void _simple_lock_release( simple_lock_t* lock )
    7577{
    76     asm volatile ( "sync                    \n"   /* for consistency                  */
    77                    "sw   $0,    0(%0)       \n"   /* release lock                     */
    78                    :
    79                    : "r"(lock)
    80                    : "memory" );
     78    asm volatile ( "sync" );   // for consistency
     79
     80    lock->value = 0;
    8181
    8282#if GIET_DEBUG_SIMPLE_LOCK
     
    107107unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
    108108unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
    109 _puts("\n[DEBUG SPIN_LOCK] P[");
    110 _putd( x );
    111 _puts(",");
    112 _putd( y );
    113 _puts(",");
    114 _putd( l );
    115 _puts("] init lock ");
    116 _putx( (unsigned int)lock );
    117 _puts(" (current = ");
    118 _putd( lock->current );
    119 _puts(" / free = ");
    120 _putd( lock->free );
    121 _puts(" )\n");
     109_nolock_printf("\n[DEBUG SPIN_LOCK] P[%d,%d,%d] initializes lock %x at cycle %d\n",
     110               x, y, l, (unsigned int)lock, _get_proctime() );
    122111#endif
    123112
     
    136125unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
    137126unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
    138 _puts("\n[DEBUG SPIN_LOCK] P[");
    139 _putd( x );
    140 _puts(",");
    141 _putd( y );
    142 _puts(",");
    143 _putd( l );
    144 _puts("] get ticket ");
    145 _putx( ticket );
    146 _puts(" for lock ");
    147 _putx( (unsigned int)lock );
    148 _puts(" (current = ");
    149 _putd( lock->current );
    150 _puts(" / free = ");
    151 _putd( lock->free );
    152 _puts(" )\n");
    153 #endif
    154 
     127_nolock_printf("\n[DEBUG SPIN_LOCK] P[%d,%d,%d] get ticket %d for lock %x at cycle %d"
     128               " / current = %d / free = %d\n",
     129               x, y, l, ticket, (unsigned int)lock, _get_proctime(),
     130               lock->current, lock->free );
     131#endif
    155132
    156133    // poll the spin_lock current slot index
    157     asm volatile("5678:                   \n"
    158                  "lw   $10,  0(%0)        \n"
    159                  "move $11,  %1           \n"
    160                  "bne  $10,  $11,  5678b  \n"
    161                  :
    162                  : "r"(lock), "r"(ticket)
    163                  : "$10", "$11" );
     134    while ( ioread32( &lock->current ) != ticket ) asm volatile ("nop");
    164135
    165136#if GIET_DEBUG_SPIN_LOCK
    166 _puts("\n[DEBUG SPIN_LOCK] P[");
    167 _putd( x );
    168 _puts(",");
    169 _putd( y );
    170 _puts(",");
    171 _putd( l );
    172 _puts("] get lock ");
    173 _putx( (unsigned int)lock );
    174 _puts(" (current = ");
    175 _putd( lock->current );
    176 _puts(" / free = ");
    177 _putd( lock->free );
    178 _puts(" )\n");
     137_nolock_printf("\n[DEBUG SPIN_LOCK] P[%d,%d,%d] get lock %x at cycle %d"
     138               " / current = %d / free = %d\n",
     139               x, y, l, (unsigned int)lock, _get_proctime(),
     140               lock->current, lock->free );
    179141#endif
    180142
     
    184146void _spin_lock_release( spin_lock_t* lock )
    185147{
    186     unsigned int current = lock->current;
    187 
    188     if ( current == (GIET_LOCK_MAX_TICKET - 1) ) current = 0;
    189     else                                         current = current + 1;
    190 
    191     asm volatile ( "sync                    \n"   /* for consistency                  */
    192                    "sw   %1,    0(%0)       \n"   /* release lock                     */
    193                    :
    194                    : "r"(lock), "r"(current)
    195                    : "memory" );
    196    
     148    asm volatile ( "sync" );   // for consistency
     149
     150    lock->current = lock->current + 1;
    197151
    198152#if GIET_DEBUG_SPIN_LOCK
    199 unsigned int    gpid = _get_procid();
    200 unsigned int    x    = gpid >> (Y_WIDTH + P_WIDTH);
    201 unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
    202 unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
    203 _puts("\n[DEBUG SPIN_LOCK] P[");
    204 _putd( x );
    205 _puts(",");
    206 _putd( y );
    207 _puts(",");
    208 _putd( l );
    209 _puts("] release lock ");
    210 _putx( (unsigned int)lock );
    211 _puts(" (current = ");
    212 _putd( lock->current );
    213 _puts(" / free = ");
    214 _putd( lock->free );
    215 _puts(" )\n");
    216 #endif
    217 
    218 }
    219 
    220 ///////////////////////////////////////////////////////////////////////////////////
    221 //      SBT lock access functions
    222 ///////////////////////////////////////////////////////////////////////////////////
    223 
    224 ///////////////////////////////////////////////////////////////////////////////////
    225 // This recursive function is used by the _sbt_lock_init() function
    226 // to initializes the SBT nodes (mainly the parent and child pointers).
    227 // It traverses the SBT from top to bottom.
    228 ///////////////////////////////////////////////////////////////////////////////////
    229 static void _sbt_lock_build( sbt_lock_t*     lock,      // pointer on the SBT lock
    230                              unsigned int    x,         // SBT node x coordinate
    231                              unsigned int    y,         // SBT node y coordinate
    232                              unsigned int    level,     // SBT node level
    233                              lock_node_t*    parent )   // pointer on parent node
    234 {
    235 
    236 #if GIET_DEBUG_SBT_LOCK
     153_nolock_printf("\n[DEBUG SPIN_LOCK] P[%d,%d,%d] release lock %x at cycle %d"
     154               " / current = %d / free = %d\n",
     155               x, y, l, (unsigned int)lock, _get_proctime(),
     156               lock->current, lock->free );
     157#endif
     158
     159}
     160
     161
     162
     163///////////////////////////////////////////////////////////////////////////////////
     164//      SQT lock access functions
     165///////////////////////////////////////////////////////////////////////////////////
     166
     167///////////////////////////////////////////////////////////////////////////////////
     168// This recursive function is used by the _sqt_lock_init() function
     169// to initializes the SQT nodes (mainly the parent and child pointers).
     170// It traverses the SQT from top to bottom.
     171// The SQT can be uncomplete (when xmax or ymax are not power of 2),
     172// and the recursion stops when the (x,y) coordinates exceed the footprint.
     173///////////////////////////////////////////////////////////////////////////////////
     174static
     175void _sqt_lock_build( sqt_lock_t*      lock,      // pointer on the SQT lock
     176                      unsigned int     x,         // node X coordinate
     177                      unsigned int     y,         // node Y coordinate
     178                      unsigned int     level,     // node level
     179                      sqt_lock_node_t* parent,    // pointer on parent node
     180                      unsigned int     xmax,      // SQT X size
     181                      unsigned int     ymax )     // SQT Y size
     182{
     183
     184#if GIET_DEBUG_SQT_LOCK
    237185unsigned int    gpid = _get_procid();
    238186unsigned int    px   = gpid >> (Y_WIDTH + P_WIDTH);
     
    242190
    243191    // get target node pointer
    244     lock_node_t* node = lock->node[x][y][level];
     192    sqt_lock_node_t* node = lock->node[x][y][level];
    245193   
    246194    if (level == 0 )        // terminal case
    247195    {
    248196        // initializes target node
    249         node->taken    = 0;   
    250         node->level    = level;
     197        node->current  = 0;   
     198        node->free     = 0;
     199        node->level    = 0;
    251200        node->parent   = parent;
    252         node->child0   = NULL;
    253         node->child1   = NULL;
    254         node->x        = x;
    255         node->y        = y;
    256 
    257 #if GIET_DEBUG_SBT_LOCK
    258 _nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] initialises SBT node[%d,%d,%d] : "
    259       "parent = %x / childO = %x / child1 = %x\n",
    260       px , py , pl , node->x , node->y , node->level ,
    261       (unsigned int)node->parent , (unsigned int)node->child0 , (unsigned int)node->child1 );
     201        node->child[0] = NULL;
     202        node->child[1] = NULL;
     203        node->child[2] = NULL;
     204        node->child[3] = NULL;
     205
     206#if GIET_DEBUG_SQT_LOCK
     207_nolock_printf("\n[DEBUG SQT_LOCK] P[%d,%d,%d] initialises SQT node[%d,%d,%d] : \n"
     208      " parent = %x / childO = %x / child1 = %x / child2 = %x / child3 = %x\n",
     209      px , py , pl , x , y , level ,
     210      (unsigned int)node->parent ,
     211      (unsigned int)node->child[0] ,
     212      (unsigned int)node->child[1] ,
     213      (unsigned int)node->child[2] ,
     214      (unsigned int)node->child[3] );
    262215#endif
    263216
     
    265218    else                   // non terminal case
    266219    {
    267         unsigned int x0;   // x coordinate for child0
    268         unsigned int y0;   // y coordinate for child0;
    269         unsigned int x1;   // x coordinate for child1;
    270         unsigned int y1;   // y coordinate for child1;
     220        unsigned int cx[4];      // x coordinate for children
     221        unsigned int cy[4];      // y coordinate for children
     222        unsigned int i;          // child index
    271223
    272224        // the child0 coordinates are equal to the parent coordinates
    273         // the child1 coordinates are incremented depending on the level value
    274         if ( level & 0x1 ) // odd level => X binary tree
     225        // other childs coordinates are incremented depending on the level value
     226        cx[0] = x;
     227        cy[0] = y;
     228
     229        cx[1] = x + (1 << (level-1));
     230        cy[1] = y;
     231
     232        cx[2] = x;
     233        cy[2] = y + (1 << (level-1));
     234
     235        cx[3] = x + (1 << (level-1));
     236        cy[3] = y + (1 << (level-1));
     237
     238        // initializes target node
     239        for ( i = 0 ; i < 4 ; i++ )
    275240        {
    276             x0 = x;
    277             y0 = y;
    278             x1 = x + (1 << ((level-1)>>1));
    279             y1 = y;
    280         }   
    281         else               // even level => Y binary tree
    282         {
    283             x0 = x;
    284             y0 = y;
    285             x1 = x;
    286             y1 = y + (1 << ((level-1)>>1));
     241            if ( (cx[i] < xmax) && (cy[i] < ymax) )
     242                node->child[i] = lock->node[cx[i]][cy[i]][level-1];
     243            else 
     244                node->child[i] = NULL;
    287245        }
    288 
    289         // initializes target node
    290         node->taken    = 0;
     246        node->current  = 0;
     247        node->free     = 0;
    291248        node->level    = level;
    292249        node->parent   = parent;
    293         node->child0   = lock->node[x0][y0][level-1];
    294         node->child1   = lock->node[x1][y1][level-1];
    295 
    296 #if GIET_DEBUG_SBT_LOCK
    297 _nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] initialises SBT node[%d,%d,%d] : "
    298       "parent = %x / childO = %x / child1 = %x\n",
     250
     251#if GIET_DEBUG_SQT_LOCK
     252_nolock_printf("\n[DEBUG SQT_LOCK] P[%d,%d,%d] initialises SQT node[%d,%d,%d] : \n"
     253      " parent = %x / childO = %x / child1 = %x / child2 = %x / child3 = %x\n",
    299254      px , py , pl , x , y , level ,
    300       (unsigned int)node->parent , (unsigned int)node->child0 , (unsigned int)node->child1 );
    301 #endif
    302 
    303         // recursive calls for children nodes
    304         _sbt_lock_build( lock , x0 , y0 , level-1 , node );
    305         _sbt_lock_build( lock , x1 , y1 , level-1 , node );
     255      (unsigned int)node->parent ,
     256      (unsigned int)node->child[0] ,
     257      (unsigned int)node->child[1] ,
     258      (unsigned int)node->child[2] ,
     259      (unsigned int)node->child[3] );
     260#endif
     261
     262       // recursive calls for children nodes
     263        for ( i = 0 ; i < 4 ; i++ )
     264        {
     265            if ( (cx[i] < xmax) && (cy[i] < ymax) )
     266                _sqt_lock_build( lock,
     267                                 cx[i],
     268                                 cy[i],
     269                                 level-1,
     270                                 node,
     271                                 xmax,
     272                                 ymax );
     273        }
    306274    }
    307 
    308 }  // end _sbt_lock_build()
    309 
    310 //////////////////////////////////////////////////////////////////////////////////
    311 // This recursive function is used by the sbt_lock_acquire() function to
    312 // get the SBT lock: It tries to get each "partial" lock on the path from bottom
    313 // to top, using an atomic LL/SC, and starting from bottom.
    314 // It is blocking : it poll each "partial lock until it can be taken.
    315 // The lock is finally obtained when all "partial" locks, at all levels are taken.
    316 //////////////////////////////////////////////////////////////////////////////////
    317 static void _sbt_lock_take( lock_node_t* node )
    318 {
    319     // try to take "partial" lock
    320     unsigned int* taken = &node->taken;
    321 
    322     asm volatile ( "1945:                   \n"
    323                        "lw   $2,    0(%0)       \n"   /* $2 <= lock current value         */
    324                        "bnez $2,    1945b       \n"   /* retry if lock already taken      */
    325                    "ll   $2,    0(%0)       \n"   /* ll_buffer <= lock current value  */
    326                    "bnez $2,    1945b       \n"   /* retry if lock already taken      */
    327                    "li   $3,    1           \n"   /* $3 <= argument for sc            */
    328                    "sc   $3,    0(%0)       \n"   /* try to set lock                  */
    329                    "beqz $3,    1945b       \n"   /* retry if sc failure              */
    330                    :
    331                    : "r"(taken)
    332                    : "$2", "$3", "memory" );
    333 
    334 #if GIET_DEBUG_SBT_LOCK
     275}  // end _sqt_lock_build()
     276
     277/////////////////////////////////////////////////////////////////////////////////
     278// This external function initialises the distributed SQT lock.
     279// It allocates memory for the distributed SQT nodes in clusters,
     280// and initializes the SQT nodes pointers array (stored in cluster[0][0].
     281// The SQT can be "uncomplete" as SQT lock nodes are only build in clusters
     282// containing processors.
     283// The actual number of SQT locks nodes in a cluster[x][y] depends on (x,y):
     284// At least 1 node / at most 5 nodes per cluster:
     285// - lock arbitrating between all processors of   1 cluster  has level 0,
     286// - lock arbitrating between all processors of   4 clusters has level 1,
     287// - lock arbitrating between all processors of  16 clusters has level 2,
     288// - lock arbitrating between all processors of  64 clusters has level 3,
     289// - lock arbitrating between all processors of 256 clusters has level 4,
     290/////////////////////////////////////////////////////////////////////////////////
     291void _sqt_lock_init( sqt_lock_t*  lock )
     292{
     293    unsigned int levels;
     294    unsigned int xmax;
     295    unsigned int ymax;
     296
     297    // compute the smallest SQT covering all processors
     298    _get_sqt_footprint( &xmax, &ymax, &levels );
     299
     300
     301#if GIET_DEBUG_SQT_LOCK
    335302unsigned int    gpid = _get_procid();
    336303unsigned int    px   = gpid >> (Y_WIDTH + P_WIDTH);
    337304unsigned int    py   = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
    338305unsigned int    pl   = gpid & ((1<<P_WIDTH)-1);
    339 _nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] get partial SBT lock[%d,%d,%d] : vaddr = %x\n",
    340       px , py , pl , node->x , node->y , node->level , (unsigned int)node );
    341 #endif
    342 
    343     // try to take the parent node lock until top is reached
    344     if ( node->parent != NULL ) _sbt_lock_take( node->parent );
    345 
    346 } // end _sbt_lock_take()
     306_nolock_printf("\n[DEBUG SQT_LOCK] P[%d,%d,%d] initialises SQT lock %x : \n"
     307               " xmax = %d / ymax = %d / levels = %d\n",
     308               px , py , pl , (unsigned int)lock ,
     309               xmax , ymax , levels );
     310#endif
     311
    347312   
    348 
    349 /////////////////////////////////////////////////////////////////////////////////
    350 // This recursive function is used by the sbt_lock_release() function to
    351 // release the SBT lock: It reset all "partial" locks on the path from bottom
    352 // to top, using a normal write, and starting from bottom.
    353 /////////////////////////////////////////////////////////////////////////////////
    354 static void _sbt_lock_free( lock_node_t* node )
    355 {
    356     // reset "partial" lock
    357     node->taken = 0;
    358 
    359 #if GIET_DEBUG_SBT_LOCK
    360 unsigned int    gpid = _get_procid();
    361 unsigned int    px   = gpid >> (Y_WIDTH + P_WIDTH);
    362 unsigned int    py   = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
    363 unsigned int    pl   = gpid & ((1<<P_WIDTH)-1);
    364 _nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] release partial SBT lock[%d,%d,%d] : vaddr = %x\n",
    365       px , py , pl , node->x , node->y , node->level , (unsigned int)node );
    366 #endif
    367 
    368     // reset parent node until top is reached
    369     if ( node->parent != NULL ) _sbt_lock_free( node->parent );
    370 
    371 } // end _sbt_lock_free()
    372 
    373 //////////////////////////////////////////////////////////////////////////////////
    374 // This external function initialises the distributed SBT lock.
    375 //////////////////////////////////////////////////////////////////////////////////
    376 void _sbt_lock_init( sbt_lock_t*  lock )
    377 {
    378     unsigned int levels = 0;     // depth of the SBT (number of levels)
    379 
    380     // compute SBT levels
    381     if      ((X_SIZE == 1 ) && (Y_SIZE == 1 ))  levels = 1;
    382     else if ((X_SIZE == 2 ) && (Y_SIZE == 1 ))  levels = 2;
    383     else if ((X_SIZE == 2 ) && (Y_SIZE == 2 ))  levels = 3;
    384     else if ((X_SIZE == 4 ) && (Y_SIZE == 2 ))  levels = 4;
    385     else if ((X_SIZE == 4 ) && (Y_SIZE == 4 ))  levels = 5;
    386     else if ((X_SIZE == 8 ) && (Y_SIZE == 4 ))  levels = 6;
    387     else if ((X_SIZE == 8 ) && (Y_SIZE == 8 ))  levels = 7;
    388     else if ((X_SIZE == 16) && (Y_SIZE == 8 ))  levels = 8;
    389     else if ((X_SIZE == 16) && (Y_SIZE == 16))  levels = 9;
    390     else
     313    unsigned int x;              // x coordinate for one SQT node
     314    unsigned int y;              // y coordinate for one SQT node
     315    unsigned int l;              // level for one SQT node
     316
     317    for ( x = 0 ; x < xmax ; x++ )
    391318    {
    392         _nolock_printf("\n[GIET ERROR] _sbt_lock_init() :illegal X_SIZE/Y_SIZE \n");
    393         _exit();
    394     }
    395 
    396 #if GIET_DEBUG_SBT_LOCK
    397 unsigned int    gpid = _get_procid();
    398 unsigned int    px   = gpid >> (Y_WIDTH + P_WIDTH);
    399 unsigned int    py   = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
    400 unsigned int    pl   = gpid & ((1<<P_WIDTH)-1);
    401 _nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] initialises SBT lock %x : %d levels\n",
    402                px , py , pl , (unsigned int)lock , levels );
    403 #endif
    404 
    405     // allocates memory for the SBT nodes and initializes SBT nodes pointers array
    406     // the actual number of SBT nodes in a cluster(x,y) depends on (x,y):
    407     // At least 1 node / at most 9 nodes per cluster.
    408     unsigned int x;              // x coordinate for one SBT node
    409     unsigned int y;              // y coordinate for one SBT node
    410     unsigned int l;              // level for one SBT node
    411     for ( x = 0 ; x < X_SIZE ; x++ )
    412     {
    413         for ( y = 0 ; y < Y_SIZE ; y++ )
     319        for ( y = 0 ; y < ymax ; y++ )
    414320        {
    415321            for ( l = 0 ; l < levels ; l++ )             // level 0 nodes
     
    417323               
    418324                if ( ( (l == 0) && ((x&0x00) == 0) && ((y&0x00) == 0) ) ||
    419                      ( (l == 1) && ((x&0x01) == 0) && ((y&0x00) == 0) ) ||
    420                      ( (l == 2) && ((x&0x01) == 0) && ((y&0x01) == 0) ) ||
    421                      ( (l == 3) && ((x&0x03) == 0) && ((y&0x01) == 0) ) ||
    422                      ( (l == 4) && ((x&0x03) == 0) && ((y&0x03) == 0) ) ||
    423                      ( (l == 5) && ((x&0x07) == 0) && ((y&0x03) == 0) ) ||
    424                      ( (l == 6) && ((x&0x07) == 0) && ((y&0x07) == 0) ) ||
    425                      ( (l == 7) && ((x&0x0F) == 0) && ((y&0x07) == 0) ) ||
    426                      ( (l == 8) && ((x&0x0F) == 0) && ((y&0x0F) == 0) ) )
     325                     ( (l == 1) && ((x&0x01) == 0) && ((y&0x01) == 0) ) ||
     326                     ( (l == 2) && ((x&0x03) == 0) && ((y&0x03) == 0) ) ||
     327                     ( (l == 3) && ((x&0x07) == 0) && ((y&0x07) == 0) ) ||
     328                     ( (l == 4) && ((x&0x0F) == 0) && ((y&0x0F) == 0) ) )
    427329                 {
    428                      lock->node[x][y][l] = (lock_node_t*)_remote_malloc( sizeof(lock_node_t),
    429                                                                          x, y );
    430 
    431 #if GIET_DEBUG_SBT_LOCK
    432 _nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] allocates SBT node[%d,%d,%d] : vaddr = %x\n",
     330                     lock->node[x][y][l] =
     331                     (sqt_lock_node_t*)_remote_malloc( sizeof(sqt_lock_node_t),
     332                                                       x, y );
     333
     334#if GIET_DEBUG_SQT_LOCK
     335_nolock_printf("\n[DEBUG SQT_LOCK] P[%d,%d,%d] allocates SQT node[%d,%d,%d]"
     336               " : vaddr = %x\n",
    433337               px , py , pl , x , y , l , (unsigned int)lock->node[x][y][l] );
    434338#endif
     
    438342    }
    439343           
    440 #if GIET_DEBUG_SBT_LOCK
    441 _nolock_printf("\n[DEBUG SBT_LOCK] SBT nodes initialisation starts\n");
    442 #endif
    443 
    444     // recursively initialize all SBT nodes from root to bottom
    445     _sbt_lock_build( lock,       // pointer on the SBT lock descriptor
     344    // recursively initialize all SQT nodes from root to bottom
     345    _sqt_lock_build( lock,       // pointer on the SQT lock descriptor
    446346                     0,          // x coordinate
    447347                     0,          // y coordinate
    448                      levels-1,   // level in SBT
    449                      NULL );     // pointer on the parent node
     348                     levels-1,   // level in SQT
     349                     NULL,       // pointer on the parent node
     350                     xmax,       // SQT footprint X size
     351                     ymax );     // SQT footprint X size
    450352
    451353    asm volatile ("sync" ::: "memory");
    452354
    453 #if GIET_DEBUG_SBT_LOCK
    454 _nolock_printf("\n[DEBUG SBT_LOCK] SBT nodes initialisation completed\n");
    455 #endif
    456 
    457 } // end _sbt_lock_init()
     355#if GIET_DEBUG_SQT_LOCK
     356_nolock_printf("\n[DEBUG SQT_LOCK] SQT nodes initialisation completed\n");
     357#endif
     358
     359} // end _sqt_lock_init()
    458360
    459361//////////////////////////////////////////////////////////////////////////////////
    460 // This external function get thes SBT lock.
     362// This recursive function is used by the sqt_lock_acquire() function to get
     363// a distributed SQT lock: It tries to get each local queuing lock on the path
     364// from bottom to top, and starting from bottom.
     365// It is blocking : it polls each "partial lock until it can be taken.
     366// The lock is finally obtained when all locks, at all levels are taken.
     367//////////////////////////////////////////////////////////////////////////////////
     368static
     369void _sqt_lock_take( sqt_lock_node_t* node )
     370{
     371    // get next free ticket from local lock
     372    unsigned int ticket = _atomic_increment( &node->free, 1 );
     373
     374#if GIET_DEBUG_SQT_LOCK
     375unsigned int    gpid = _get_procid();
     376unsigned int    x    = gpid >> (Y_WIDTH + P_WIDTH);
     377unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
     378unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
     379_nolock_printf("\n[DEBUG SQT_LOCK] P[%d,%d,%d] get ticket %d for SQT lock %x"
     380               " / level = %d / current = %d / free = %d\n",
     381               x , y , l , ticket , (unsigned int)node ,
     382               node->level , node->current , node->free );
     383#endif
     384
     385    // poll the local lock current index
     386    while ( ioread32( &node->current ) != ticket ) asm volatile( "nop" );
     387
     388#if GIET_DEBUG_SQT_LOCK
     389_nolock_printf("\n[DEBUG SQT_LOCK] P[%d,%d,%d] get SQT lock %x"
     390               " / level = %d / current = %d / free = %d\n",
     391               x , y , l , (unsigned int)node ,
     392               node->level , node->current , node->free );
     393#endif
     394
     395    // try to take the parent node lock until top is reached
     396    if ( node->parent != NULL ) _sqt_lock_take( node->parent );
     397
     398} // end _sqt_lock_take()
     399   
     400//////////////////////////////////////////////////////////////////////////////////
     401// This external function get thes SQT lock.
    461402// Returns only when the lock has been taken.
    462403/////////////////////////////////////////////////////////////////////////////////
    463 void _sbt_lock_acquire( sbt_lock_t*  lock )
     404void _sqt_lock_acquire( sqt_lock_t*  lock )
    464405{
    465406    // get cluster coordinates
     
    468409    unsigned int y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
    469410
    470     // try to recursively take the "partial" locks (from bottom to top)
    471     _sbt_lock_take( lock->node[x][y][0] );
    472 }
    473 
    474 
    475 /////////////////////////////////////////////////////////////////////////////////
    476 // This external function releases the SBT lock.
    477 /////////////////////////////////////////////////////////////////////////////////
    478 void _sbt_lock_release( sbt_lock_t*  lock )
    479 {
     411    // try to recursively take the distributed locks (from bottom to top)
     412    _sqt_lock_take( lock->node[x][y][0] );
     413}
     414
     415
     416/////////////////////////////////////////////////////////////////////////////////
     417// This recursive function is used by the sqt_lock_release() function to
     418// release distributed SQT lock: It releases all local locks on the path from
     419// bottom to top, using a normal read/write, and starting from bottom.
     420/////////////////////////////////////////////////////////////////////////////////
     421static
     422void _sqt_lock_give( sqt_lock_node_t* node )
     423{
     424    // release the local lock
     425    node->current = node->current + 1;
     426
     427#if GIET_DEBUG_SQT_LOCK
     428unsigned int    gpid = _get_procid();
     429unsigned int    x   = gpid >> (Y_WIDTH + P_WIDTH);
     430unsigned int    y   = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
     431unsigned int    l   = gpid & ((1<<P_WIDTH)-1);
     432_nolock_printf("\n[DEBUG SQT_LOCK] P[%d,%d,%d] release SQT lock %x"
     433               " / level = %d / current = %d / free = %d\n",
     434               x , y , l , (unsigned int)node,
     435               node->level , node->current , node->free );
     436#endif
     437
     438    // reset parent node until top is reached
     439    if ( node->parent != NULL ) _sqt_lock_give( node->parent );
     440
     441} // end _sqt_lock_give()
     442
     443
     444/////////////////////////////////////////////////////////////////////////////////
     445// This external function releases the SQT lock.
     446/////////////////////////////////////////////////////////////////////////////////
     447void _sqt_lock_release( sqt_lock_t*  lock )
     448{
     449    asm volatile ( "sync" );   // for consistency
     450
    480451    // get cluster coordinates
    481452    unsigned int gpid = _get_procid();
     
    483454    unsigned int y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
    484455
    485     // recursively reset the "partial" locks (from bottom to top)
    486     _sbt_lock_free( lock->node[x][y][0] );
     456    // recursively reset the distributed locks (from bottom to top)
     457    _sqt_lock_give( lock->node[x][y][0] );
    487458}
    488459
Note: See TracChangeset for help on using the changeset viewer.