//////////////////////////////////////////////////////////////////////////////////
// File     : user_lock.c         
// Date     : 01/12/2014
// Author   : alain greiner
// Copyright (c) UPMC-LIP6
///////////////////////////////////////////////////////////////////////////////////
// The user_lock.c and user_lock.h files are part of the GIET-VM nano-kernel.
///////////////////////////////////////////////////////////////////////////////////

#include "user_lock.h"
#include "malloc.h"
#include "giet_config.h"
#include "stdio.h"

//////////////////////////////////////////////////////////////////////////////////
//                atomic access functions 
//////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////
unsigned int atomic_increment( unsigned int*  ptr,
                               unsigned int   increment )
{
    unsigned int value;

    asm volatile (
        "1234:                         \n"
        "move $10,   %1                \n"   /* $10 <= ptr               */
        "move $11,   %2                \n"   /* $11 <= increment         */
        "ll   $12,   0($10)            \n"   /* $12 <= *ptr              */
        "addu $13,   $11,    $12       \n"   /* $13 <= *ptr + increment  */
        "sc   $13,   0($10)            \n"   /* M[ptr] <= new            */ 
        "beqz $13,   1234b             \n"   /* retry if failure         */
        "move %0,    $12               \n"   /* value <= *ptr if success */
        : "=r" (value) 
        : "r" (ptr), "r" (increment)
        : "$10", "$11", "$12", "$13", "memory" );

    return value;
}

///////////////////////////////////////////////////////////////////////////////////
//               simple lock access functions
///////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////
void lock_acquire( user_lock_t* lock ) 
{
    // get next free slot index from user_lock
    unsigned int ticket = atomic_increment( &lock->free, 1 );

#if GIET_DEBUG_USER_LOCK
unsigned int    x;
unsigned int    y;
unsigned int    lpid;
giet_proc_xyp( &x, &y, &lpid );
giet_tty_printf("\n[USER_LOCK DEBUG] lock_acquire() : P[%d,%d,%d] get ticket = %d"
                " for lock %x at cycle %d (current = %d / free = %d)\n",
                x, y, lpid, ticket, 
                (unsigned int)lock, giet_proctime(), lock->current, lock->free );
#endif

    // poll the current slot index
    asm volatile("1793:                       \n"
                 "lw   $10,  0(%0)            \n"
                 "move $11,  %1               \n"
                 "bne  $10,  $11,  1793b      \n"
                 :
                 : "r"(lock), "r"(ticket)
                 : "$10", "$11" );
               
#if GIET_DEBUG_USER_LOCK
giet_tty_printf("\n[USER_LOCK DEBUG] lock_acquire() : P[%d,%d,%d] get lock %x"
                " at cycle %d (current = %d / free = %d)\n",
                x, y, lpid, (unsigned int)lock, 
                giet_proctime(), lock->current, lock->free );
#endif

}

//////////////////////////////////////
void lock_release( user_lock_t* lock ) 
{
    asm volatile( "sync" );

    lock->current = lock->current + 1;

#if GIET_DEBUG_USER_LOCK
unsigned int    x;
unsigned int    y;
unsigned int    lpid;
giet_proc_xyp( &x, &y, &lpid );
giet_tty_printf("\n[USER_LOCK DEBUG] lock_release() : P[%d,%d,%d] release lock %x"
                " at cycle %d (current = %d / free = %d)\n",
                x, y, lpid, (unsigned int)lock, 
                giet_proctime(), lock->current, lock->free );
#endif

}

///////////////////////////////////
void lock_init( user_lock_t* lock )
{
    lock->current = 0;
    lock->free    = 0;

#if GIET_DEBUG_USER_LOCK
unsigned int    x;
unsigned int    y;
unsigned int    lpid;
giet_proc_xyp( &x, &y, &lpid );
giet_tty_printf("\n[USER_LOCK DEBUG] lock_init() : P[%d,%d,%d] init lock %x"
                " at cycle %d (current = %d / free = %d)\n",
                x, y, lpid, (unsigned int)lock,
                giet_proctime(), lock->current, lock->free );
#endif

}

///////////////////////////////////////////////////////////////////////////////////
//               SQT lock access functions
///////////////////////////////////////////////////////////////////////////////////

//////////////////////////////////////////////////
static void sqt_lock_build( sqt_lock_t*      lock,      // pointer on the SQT lock
                            unsigned int     x,         // node X coordinate
                            unsigned int     y,         // node Y coordinate
                            unsigned int     level,     // node level
                            sqt_lock_node_t* parent,    // pointer on parent node
                            unsigned int     xmax,      // SQT X size
                            unsigned int     ymax )     // SQT Y size
{

#if GIET_DEBUG_USER_LOCK
unsigned int    px;
unsigned int    py;
unsigned int    pl;
giet_proc_xyp( &px, &py, &pl );
#endif

    // get target node pointer
    sqt_lock_node_t* node = lock->node[x][y][level];
    
    if (level == 0 )        // terminal case
    {
        // initializes target node
        node->current  = 0;   
        node->free     = 0;
        node->level    = 0;
        node->parent   = parent;
        node->child[0] = NULL;
        node->child[1] = NULL;
        node->child[2] = NULL;
        node->child[3] = NULL;

#if GIET_DEBUG_USER_LOCK
giet_tty_printf("\n[USER_LOCK DEBUG] sqt_lock_build() : "
                "P[%d,%d,%d] initialises SQT node[%d,%d,%d] = %x :\n"
                " parent = %x / child0 = %x / child1 = %x / child2 = %x / child3 = %x\n",
                px , py , pl , x , y , level , (unsigned int)node ,
                (unsigned int)node->parent , 
                (unsigned int)node->child[0] , 
                (unsigned int)node->child[1] , 
                (unsigned int)node->child[2] , 
                (unsigned int)node->child[3] );
#endif

    }
    else                   // non terminal case
    {
        unsigned int cx[4];      // x coordinate for children
        unsigned int cy[4];      // y coordinate for children
        unsigned int i;          // child index

        // the child0 coordinates are equal to the parent coordinates
        // other childs coordinates are incremented depending on the level value
        cx[0] = x;
        cy[0] = y;

        cx[1] = x + (1 << (level-1));
        cy[1] = y;

        cx[2] = x;
        cy[2] = y + (1 << (level-1));

        cx[3] = x + (1 << (level-1));
        cy[3] = y + (1 << (level-1));

        // initializes target node
        for ( i = 0 ; i < 4 ; i++ )
        {
            if ( (cx[i] < xmax) && (cy[i] < ymax) ) 
                node->child[i] = lock->node[cx[i]][cy[i]][level-1];
            else  
                node->child[i] = NULL;
        }
        node->current  = 0;
        node->free     = 0;
        node->level    = level;
        node->parent   = parent;

#if GIET_DEBUG_USER_LOCK
giet_tty_printf("\n[USER_LOCK DEBUG] sqt_lock_init() : "
                "P[%d,%d,%d] initialises SQT node[%d,%d,%d] : \n"
                " parent = %x / childO = %x / child1 = %x / child2 = %x / child3 = %x\n",
                px , py , pl , x , y , level , 
                (unsigned int)node->parent , 
                (unsigned int)node->child[0] , 
                (unsigned int)node->child[1] , 
                (unsigned int)node->child[2] , 
                (unsigned int)node->child[3] );
#endif

       // recursive calls for children nodes
        for ( i = 0 ; i < 4 ; i++ )
        {
            if ( (cx[i] < xmax) && (cy[i] < ymax) ) 
                sqt_lock_build( lock, 
                                 cx[i], 
                                 cy[i], 
                                 level-1, 
                                 node, 
                                 xmax, 
                                 ymax );
        }
    }
}  // end _sqt_lock_build()



//////////////////////////////////////
void sqt_lock_init( sqt_lock_t*  lock,
                    unsigned int x_size,      // number of clusters in a row
                    unsigned int y_size,      // number of clusters in a col
                    unsigned int nthreads )   // threads per clusters
{
    // check parameters
    if ( x_size > 16  ) giet_pthread_exit("SQT LOCK ERROR : x_size too large");
    if ( y_size > 16  ) giet_pthread_exit("SQT LOCK ERROR : y_size too large");
    if ( nthreads > 8 ) giet_pthread_exit("SQT LOCK ERROR : nthreads too large");
    
    // compute SQT levels
    unsigned int levels; 
    unsigned int z = (x_size > y_size) ? x_size : y_size;
    levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5;

#if GIET_DEBUG_USER_LOCK
unsigned int    px;
unsigned int    py;
unsigned int    pp;
giet_proc_xyp(&px, &py, &pp);
unsigned int side   = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 4 : (z < 9) ? 8 : 16;
giet_tty_printf("\n[USER_LOCK DEBUG] sqt_lock_init() : "
                "P[%d,%d%d] makes sqt_nodes allocation for lock %x\n"
                " x_size = %d / y_size = %d / levels = %d / side = %d\n",
                px, py, pp, (unsigned int) lock, x_size , y_size , levels , side );
#endif

    
    unsigned int x;              // x coordinate for one SQT node
    unsigned int y;              // y coordinate for one SQT node
    unsigned int l;              // level for one SQT node

    for ( x = 0 ; x < x_size ; x++ )
    {
        for ( y = 0 ; y < y_size ; y++ )
        {
            for ( l = 0 ; l < levels ; l++ )             // level 0 nodes
            {
                
                if ( ( (l == 0) && ((x&0x00) == 0) && ((y&0x00) == 0) ) ||
                     ( (l == 1) && ((x&0x01) == 0) && ((y&0x01) == 0) ) ||
                     ( (l == 2) && ((x&0x03) == 0) && ((y&0x03) == 0) ) ||
                     ( (l == 3) && ((x&0x07) == 0) && ((y&0x07) == 0) ) ||
                     ( (l == 4) && ((x&0x0F) == 0) && ((y&0x0F) == 0) ) )
                 {
                     lock->node[x][y][l] = 
                     (sqt_lock_node_t*)remote_malloc( sizeof(sqt_lock_node_t),
                                                       x, y );

#if GIET_DEBUG_USER_LOCK
giet_tty_printf("\n[USER_LOCK DEBUG] squt_lock_init() : "
                "P[%d,%d,%d] allocates SQT node[%d,%d,%d] = %x\n",
                px , py , pp , x , y , l , (unsigned int)lock->node[x][y][l] );
#endif
                 }
            }
        }
    }
            
    // recursively initialize all SQT nodes from root to bottom
    sqt_lock_build( lock,
                     0,
                     0,
                     levels-1,
                     NULL,
                     x_size,
                     y_size );

    asm volatile ("sync" ::: "memory");

#if GIET_DEBUG_USER_LOCK
giet_tty_printf("\n[USER_LOCK DEBUG] sqt_lock_init() : "
                "P[%d,%d,%d] completes SQT nodes initialisation\n", px, py, pp); 
#endif

} // end sqt_lock_init()


//////////////////////////////////////////////////
static void sqt_lock_take( sqt_lock_node_t* node )
{
    // get next free ticket from local lock
    unsigned int ticket = atomic_increment( &node->free, 1 );

#if GIET_DEBUG_USER_LOCK
unsigned int    x;
unsigned int    y;
unsigned int    l;
giet_proc_xyp(&x, &y, &l);
giet_tty_printf("\n[USER_LOCK DEBUG] sqt_lock_take() : "
                "P[%d,%d,%d] get ticket %d for SQT lock %x"
                " / level = %d / current = %d / free = %d\n",
                x , y , l , ticket , (unsigned int)node ,
                node->level , node->current , node->free );
#endif

    // poll the local lock current index
    while ( (*(volatile unsigned int *)( &node->current )) != ticket ) asm volatile( "nop" );

#if GIET_DEBUG_USER_LOCK
giet_tty_printf("\n[DEBUG SQT_LOCK] sqt_lock_take() : "
                "P[%d,%d,%d] get SQT lock %x"
                " / level = %d / current = %d / free = %d\n",
                x , y , l , (unsigned int)node ,
                node->level , node->current , node->free );
#endif

    // try to take the parent node lock until top is reached
    if ( node->parent != NULL ) sqt_lock_take( node->parent );

} // end _sqt_lock_take()


//////////////////////////////////////////
void sqt_lock_acquire( sqt_lock_t*  lock )
{
    unsigned int x;
    unsigned int y;
    unsigned int p;

    // get cluster coordinates
    giet_proc_xyp( &x, &y, &p );

#if GIET_DEBUG_USER_LOCK
giet_tty_printf("\n[DEBUG SQT_LOCK] sqt_lock_acquire() : "
                "P[%d,%d,%d] try to take lock = %x / lock_node = %x\n",
                x, y, p, lock, lock->node[x][y][0] );
#endif

    // try to recursively take the distributed locks (from bottom to top)
    sqt_lock_take( lock->node[x][y][0] );
}

//////////////////////////////////////////////////
static void sqt_lock_give( sqt_lock_node_t* node )
{
    // release the local lock
    node->current = node->current + 1;

#if GIET_DEBUG_USER_LOCK
unsigned int    x;
unsigned int    y;
unsigned int    l;
giet_proc_xyp(&x, &y, &l);
giet_tty_printf("\n[USER_LOCK DEBUG] sqt_lock_give() : "
                "P[%d,%d,%d] release SQT lock_node %x"
                " / level = %d / current = %d / free = %d\n",
                x , y , l , (unsigned int)node, 
                node->level , node->current , node->free );
#endif

    // reset parent node until top is reached
    if ( node->parent != NULL ) sqt_lock_give( node->parent );

} // end _sqt_lock_give()

//////////////////////////////////////////
void sqt_lock_release( sqt_lock_t*  lock )
{
    asm volatile ( "sync" );   // for consistency

    unsigned int x;
    unsigned int y;
    unsigned int p;
    // get cluster coordinates
    giet_proc_xyp( &x, &y, &p );

    // recursively reset the distributed locks (from bottom to top)
    sqt_lock_give( lock->node[x][y][0] );
}


// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:
// vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

