///////////////////////////////////////////////////////////////////////////////////
// File     : locks.c
// Date     : 01/12/2014
// Author   : alain greiner
// Copyright (c) UPMC-LIP6
///////////////////////////////////////////////////////////////////////////////////

#include "locks.h"
#include "giet_config.h"
#include "hard_config.h"
#include "utils.h"
#include "tty0.h"
#include "kernel_malloc.h"

///////////////////////////////////////////////////
unsigned int _atomic_increment( unsigned int* ptr,
                                unsigned int  increment )
{
    unsigned int value;

    asm volatile (
        "1234:                         \n"
        "move $10,   %1                \n"   /* $10 <= ptr               */
        "move $11,   %2                \n"   /* $11 <= increment         */
        "ll   $12,   0($10)            \n"   /* $12 <= *ptr              */
        "addu $13,   $11,    $12       \n"   /* $13 <= *ptr + increment  */
        "sc   $13,   0($10)            \n"   /* M[ptr] <= new            */ 
        "beqz $13,   1234b             \n"   /* retry if failure         */
        "move %0,    $12               \n"   /* value <= *ptr if success */
        : "=r" (value) 
        : "r" (ptr), "r" (increment)
        : "$10", "$11", "$12", "$13", "memory" );

    return value;
}

///////////////////////////////////////////////////////////////////////////////////
//      Simple lock access functions
///////////////////////////////////////////////////////////////////////////////////

////////////////////////////////////////////////
void _simple_lock_acquire( simple_lock_t* lock )
{

#if GIET_DEBUG_SIMPLE_LOCK
unsigned int    gpid = _get_procid();
unsigned int    x    = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
_nolock_printf("\n[DEBUG SIMPLE_LOCK] P[%d,%d,%d] enters acquire() at cycle %d\n",
               x , y , l , _get_proctime() );
#endif

    asm volatile ( "1515:                   \n"
	               "lw   $2,    0(%0)       \n"   /* $2 <= lock current value         */
	               "bnez $2,    1515b       \n"   /* retry if lock already taken      */
                   "ll   $2,    0(%0)       \n"   /* ll_buffer <= lock current value  */
                   "bnez $2,    1515b       \n"   /* retry if lock already taken      */
                   "li   $3,    1           \n"   /* $3 <= argument for sc            */
                   "sc   $3,    0(%0)       \n"   /* try to set lock                  */
                   "beqz $3,    1515b       \n"   /* retry if sc failure              */
                   :
                   : "r"(lock)
                   : "$2", "$3", "memory" );

#if GIET_DEBUG_SIMPLE_LOCK
_nolock_printf("\n[DEBUG SIMPLE_LOCK] P[%d,%d,%d] exit acquire() at cycle %d\n",
               x , y , l , _get_proctime() );
#endif

}

////////////////////////////////////////////////
void _simple_lock_release( simple_lock_t* lock )
{
    asm volatile ( "sync                    \n"   /* for consistency                  */
                   "sw   $0,    0(%0)       \n"   /* release lock                     */
                   :
                   : "r"(lock)
                   : "memory" );

#if GIET_DEBUG_SIMPLE_LOCK
unsigned int    gpid = _get_procid();
unsigned int    x    = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
_nolock_printf("\n[DEBUG SIMPLE_LOCK] P[%d,%d,%d] release() at cycle %d\n",
               x , y , l , _get_proctime() );
#endif

}


///////////////////////////////////////////////////////////////////////////////////
//      Queuing Lock access functions
///////////////////////////////////////////////////////////////////////////////////

/////////////////////////////////////////
void _spin_lock_init( spin_lock_t* lock )
{
    lock->current = 0;
    lock->free    = 0;

#if GIET_DEBUG_SPIN_LOCK
unsigned int    gpid = _get_procid();
unsigned int    x    = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
_puts("\n[DEBUG SPIN_LOCK] P[");
_putd( x );
_puts(",");
_putd( y );
_puts(",");
_putd( l );
_puts("] init lock ");
_putx( (unsigned int)lock );
_puts(" (current = ");
_putd( lock->current );
_puts(" / free = ");
_putd( lock->free );
_puts(" )\n");
#endif

}


////////////////////////////////////////////
void _spin_lock_acquire( spin_lock_t* lock )
{
    // get next free slot index fromlock
    unsigned int ticket = _atomic_increment( &lock->free, 1 );

#if GIET_DEBUG_SPIN_LOCK
unsigned int    gpid = _get_procid();
unsigned int    x    = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
_puts("\n[DEBUG SPIN_LOCK] P[");
_putd( x );
_puts(",");
_putd( y );
_puts(",");
_putd( l );
_puts("] get ticket ");
_putx( ticket );
_puts(" for lock ");
_putx( (unsigned int)lock );
_puts(" (current = ");
_putd( lock->current );
_puts(" / free = ");
_putd( lock->free );
_puts(" )\n");
#endif


    // poll the spin_lock current slot index
    asm volatile("5678:                   \n"
                 "lw   $10,  0(%0)        \n"
                 "move $11,  %1           \n"
                 "bne  $10,  $11,  5678b  \n"
                 :
                 : "r"(lock), "r"(ticket)
                 : "$10", "$11" );

#if GIET_DEBUG_SPIN_LOCK
_puts("\n[DEBUG SPIN_LOCK] P[");
_putd( x );
_puts(",");
_putd( y );
_puts(",");
_putd( l );
_puts("] get lock ");
_putx( (unsigned int)lock );
_puts(" (current = ");
_putd( lock->current );
_puts(" / free = ");
_putd( lock->free );
_puts(" )\n");
#endif

}

////////////////////////////////////////////
void _spin_lock_release( spin_lock_t* lock )
{
    unsigned int current = lock->current;

    if ( current == (GIET_LOCK_MAX_TICKET - 1) ) current = 0;
    else                                         current = current + 1;

    asm volatile ( "sync                    \n"   /* for consistency                  */
                   "sw   %1,    0(%0)       \n"   /* release lock                     */
                   :
                   : "r"(lock), "r"(current)
                   : "memory" );
    

#if GIET_DEBUG_SPIN_LOCK
unsigned int    gpid = _get_procid();
unsigned int    x    = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    l    = gpid & ((1<<P_WIDTH)-1);
_puts("\n[DEBUG SPIN_LOCK] P[");
_putd( x );
_puts(",");
_putd( y );
_puts(",");
_putd( l );
_puts("] release lock ");
_putx( (unsigned int)lock );
_puts(" (current = ");
_putd( lock->current );
_puts(" / free = ");
_putd( lock->free );
_puts(" )\n");
#endif

}

///////////////////////////////////////////////////////////////////////////////////
//      SBT lock access functions
///////////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////////////////////////////////////////////
// This recursive function is used by the _sbt_lock_init() function
// to initializes the SBT nodes (mainly the parent and child pointers).
// It traverses the SBT from top to bottom.
///////////////////////////////////////////////////////////////////////////////////
static void _sbt_lock_build( sbt_lock_t*     lock,      // pointer on the SBT lock
                             unsigned int    x,         // SBT node x coordinate
                             unsigned int    y,         // SBT node y coordinate
                             unsigned int    level,     // SBT node level
                             lock_node_t*    parent )   // pointer on parent node
{

#if GIET_DEBUG_SBT_LOCK
unsigned int    gpid = _get_procid();
unsigned int    px   = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    py   = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    pl   = gpid & ((1<<P_WIDTH)-1);
#endif

    // get target node pointer
    lock_node_t* node = lock->node[x][y][level];
    
    if (level == 0 )        // terminal case
    {
        // initializes target node
        node->taken    = 0;   
        node->level    = level;
        node->parent   = parent;
        node->child0   = NULL;
        node->child1   = NULL;
        node->x        = x;
        node->y        = y;

#if GIET_DEBUG_SBT_LOCK
_nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] initialises SBT node[%d,%d,%d] : "
      "parent = %x / childO = %x / child1 = %x\n",
      px , py , pl , node->x , node->y , node->level , 
      (unsigned int)node->parent , (unsigned int)node->child0 , (unsigned int)node->child1 );
#endif

    }
    else                   // non terminal case
    {
        unsigned int x0;   // x coordinate for child0
        unsigned int y0;   // y coordinate for child0;
        unsigned int x1;   // x coordinate for child1;
        unsigned int y1;   // y coordinate for child1;

        // the child0 coordinates are equal to the parent coordinates
        // the child1 coordinates are incremented depending on the level value
        if ( level & 0x1 ) // odd level => X binary tree
        {
            x0 = x;
            y0 = y;
            x1 = x + (1 << ((level-1)>>1));
            y1 = y;
        }    
        else               // even level => Y binary tree
        {
            x0 = x;
            y0 = y;
            x1 = x;
            y1 = y + (1 << ((level-1)>>1));
        }

        // initializes target node
        node->taken    = 0;
        node->level    = level;
        node->parent   = parent;
        node->child0   = lock->node[x0][y0][level-1];
        node->child1   = lock->node[x1][y1][level-1];

#if GIET_DEBUG_SBT_LOCK
_nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] initialises SBT node[%d,%d,%d] : "
      "parent = %x / childO = %x / child1 = %x\n",
      px , py , pl , x , y , level , 
      (unsigned int)node->parent , (unsigned int)node->child0 , (unsigned int)node->child1 );
#endif

        // recursive calls for children nodes
        _sbt_lock_build( lock , x0 , y0 , level-1 , node );
        _sbt_lock_build( lock , x1 , y1 , level-1 , node );
    }

}  // end _sbt_lock_build()

//////////////////////////////////////////////////////////////////////////////////
// This recursive function is used by the sbt_lock_acquire() function to
// get the SBT lock: It tries to get each "partial" lock on the path from bottom
// to top, using an atomic LL/SC, and starting from bottom.
// It is blocking : it poll each "partial lock until it can be taken. 
// The lock is finally obtained when all "partial" locks, at all levels are taken.
//////////////////////////////////////////////////////////////////////////////////
static void _sbt_lock_take( lock_node_t* node )
{
    // try to take "partial" lock
    unsigned int* taken = &node->taken;

    asm volatile ( "1945:                   \n"
	               "lw   $2,    0(%0)       \n"   /* $2 <= lock current value         */
	               "bnez $2,    1945b       \n"   /* retry if lock already taken      */
                   "ll   $2,    0(%0)       \n"   /* ll_buffer <= lock current value  */
                   "bnez $2,    1945b       \n"   /* retry if lock already taken      */
                   "li   $3,    1           \n"   /* $3 <= argument for sc            */
                   "sc   $3,    0(%0)       \n"   /* try to set lock                  */
                   "beqz $3,    1945b       \n"   /* retry if sc failure              */
                   :
                   : "r"(taken)
                   : "$2", "$3", "memory" );

#if GIET_DEBUG_SBT_LOCK
unsigned int    gpid = _get_procid();
unsigned int    px   = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    py   = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    pl   = gpid & ((1<<P_WIDTH)-1);
_nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] get partial SBT lock[%d,%d,%d] : vaddr = %x\n",
      px , py , pl , node->x , node->y , node->level , (unsigned int)node );
#endif

    // try to take the parent node lock until top is reached
    if ( node->parent != NULL ) _sbt_lock_take( node->parent );

} // end _sbt_lock_take()
    

/////////////////////////////////////////////////////////////////////////////////
// This recursive function is used by the sbt_lock_release() function to
// release the SBT lock: It reset all "partial" locks on the path from bottom 
// to top, using a normal write, and starting from bottom.
/////////////////////////////////////////////////////////////////////////////////
static void _sbt_lock_free( lock_node_t* node )
{
    // reset "partial" lock
    node->taken = 0;

#if GIET_DEBUG_SBT_LOCK
unsigned int    gpid = _get_procid();
unsigned int    px   = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    py   = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    pl   = gpid & ((1<<P_WIDTH)-1);
_nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] release partial SBT lock[%d,%d,%d] : vaddr = %x\n",
      px , py , pl , node->x , node->y , node->level , (unsigned int)node );
#endif

    // reset parent node until top is reached
    if ( node->parent != NULL ) _sbt_lock_free( node->parent );

} // end _sbt_lock_free()

//////////////////////////////////////////////////////////////////////////////////
// This external function initialises the distributed SBT lock.
//////////////////////////////////////////////////////////////////////////////////
void _sbt_lock_init( sbt_lock_t*  lock )
{
    unsigned int levels = 0;     // depth of the SBT (number of levels)

    // compute SBT levels
    if      ((X_SIZE == 1 ) && (Y_SIZE == 1 ))  levels = 1;
    else if ((X_SIZE == 2 ) && (Y_SIZE == 1 ))  levels = 2;
    else if ((X_SIZE == 2 ) && (Y_SIZE == 2 ))  levels = 3;
    else if ((X_SIZE == 4 ) && (Y_SIZE == 2 ))  levels = 4;
    else if ((X_SIZE == 4 ) && (Y_SIZE == 4 ))  levels = 5;
    else if ((X_SIZE == 8 ) && (Y_SIZE == 4 ))  levels = 6;
    else if ((X_SIZE == 8 ) && (Y_SIZE == 8 ))  levels = 7;
    else if ((X_SIZE == 16) && (Y_SIZE == 8 ))  levels = 8;
    else if ((X_SIZE == 16) && (Y_SIZE == 16))  levels = 9;
    else
    {
        _nolock_printf("\n[GIET ERROR] _sbt_lock_init() :illegal X_SIZE/Y_SIZE \n");
        _exit();
    }

#if GIET_DEBUG_SBT_LOCK
unsigned int    gpid = _get_procid();
unsigned int    px   = gpid >> (Y_WIDTH + P_WIDTH);
unsigned int    py   = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);
unsigned int    pl   = gpid & ((1<<P_WIDTH)-1);
_nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] initialises SBT lock %x : %d levels\n",
               px , py , pl , (unsigned int)lock , levels );
#endif

    // allocates memory for the SBT nodes and initializes SBT nodes pointers array
    // the actual number of SBT nodes in a cluster(x,y) depends on (x,y): 
    // At least 1 node / at most 9 nodes per cluster.
    unsigned int x;              // x coordinate for one SBT node
    unsigned int y;              // y coordinate for one SBT node
    unsigned int l;              // level for one SBT node
    for ( x = 0 ; x < X_SIZE ; x++ )
    {
        for ( y = 0 ; y < Y_SIZE ; y++ )
        {
            for ( l = 0 ; l < levels ; l++ )             // level 0 nodes
            {
                
                if ( ( (l == 0) && ((x&0x00) == 0) && ((y&0x00) == 0) ) ||
                     ( (l == 1) && ((x&0x01) == 0) && ((y&0x00) == 0) ) ||
                     ( (l == 2) && ((x&0x01) == 0) && ((y&0x01) == 0) ) ||
                     ( (l == 3) && ((x&0x03) == 0) && ((y&0x01) == 0) ) ||
                     ( (l == 4) && ((x&0x03) == 0) && ((y&0x03) == 0) ) ||
                     ( (l == 5) && ((x&0x07) == 0) && ((y&0x03) == 0) ) ||
                     ( (l == 6) && ((x&0x07) == 0) && ((y&0x07) == 0) ) ||
                     ( (l == 7) && ((x&0x0F) == 0) && ((y&0x07) == 0) ) ||
                     ( (l == 8) && ((x&0x0F) == 0) && ((y&0x0F) == 0) ) )
                 {
                     lock->node[x][y][l] = (lock_node_t*)_remote_malloc( sizeof(lock_node_t),
                                                                         x, y );

#if GIET_DEBUG_SBT_LOCK
_nolock_printf("\n[DEBUG SBT_LOCK] P[%d,%d,%d] allocates SBT node[%d,%d,%d] : vaddr = %x\n",
               px , py , pl , x , y , l , (unsigned int)lock->node[x][y][l] );
#endif
                 }
            }
        }
    }
            
#if GIET_DEBUG_SBT_LOCK
_nolock_printf("\n[DEBUG SBT_LOCK] SBT nodes initialisation starts\n"); 
#endif

    // recursively initialize all SBT nodes from root to bottom
    _sbt_lock_build( lock,       // pointer on the SBT lock descriptor
                     0,          // x coordinate
                     0,          // y coordinate
                     levels-1,   // level in SBT
                     NULL );     // pointer on the parent node

    asm volatile ("sync" ::: "memory");

#if GIET_DEBUG_SBT_LOCK
_nolock_printf("\n[DEBUG SBT_LOCK] SBT nodes initialisation completed\n"); 
#endif

} // end _sbt_lock_init()

//////////////////////////////////////////////////////////////////////////////////
// This external function get thes SBT lock.
// Returns only when the lock has been taken. 
/////////////////////////////////////////////////////////////////////////////////
void _sbt_lock_acquire( sbt_lock_t*  lock )
{
    // get cluster coordinates
    unsigned int gpid = _get_procid();
    unsigned int x    = (gpid >> (Y_WIDTH + P_WIDTH)) & ((1<<X_WIDTH)-1);
    unsigned int y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);

    // try to recursively take the "partial" locks (from bottom to top)
    _sbt_lock_take( lock->node[x][y][0] );
}


/////////////////////////////////////////////////////////////////////////////////
// This external function releases the SBT lock.
/////////////////////////////////////////////////////////////////////////////////
void _sbt_lock_release( sbt_lock_t*  lock )
{
    // get cluster coordinates
    unsigned int gpid = _get_procid();
    unsigned int x    = (gpid >> (Y_WIDTH + P_WIDTH)) & ((1<<X_WIDTH)-1);
    unsigned int y    = (gpid >> P_WIDTH) & ((1<<Y_WIDTH)-1);

    // recursively reset the "partial" locks (from bottom to top)
    _sbt_lock_free( lock->node[x][y][0] );
}

// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:
// vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

