//////////////////////////////////////////////////////////////////////////////////
// File     : barrier.c      
// Date     : 01/04/2012
// Author   : alain greiner
// Copyright (c) UPMC-LIP6
///////////////////////////////////////////////////////////////////////////////////

#include "barrier.h"
#include "malloc.h"
#include "stdio.h"
#include "giet_config.h"

///////////////////////////////////////////////////////////////////////////////////
//      Simple barrier access functions
///////////////////////////////////////////////////////////////////////////////////

///////////////////////////////////////////
void barrier_init( giet_barrier_t* barrier, 
                   unsigned int    ntasks ) 
{
    barrier->ntasks = ntasks;
    barrier->count  = ntasks;
    barrier->sense  = 0;

    asm volatile ("sync" ::: "memory");
}

////////////////////////////////////////////
void barrier_wait( giet_barrier_t* barrier ) 
{

#if GIET_DEBUG_USER_BARRIER
unsigned int x;
unsigned int y;
unsigned int p;
giet_proc_xyp( &x, &y, &p );
giet_shr_printf("[DEBUG BARRIER] proc[%d,%d,%d] enters barrier_wait()\n", x, y, p );
#endif

    // compute expected sense value 
    unsigned int expected;
    if ( barrier->sense == 0 ) expected = 1;
    else                       expected = 0;

    // parallel decrement barrier counter using atomic instructions LL/SC
    // - input : pointer on the barrier counter (pcount)
    // - output : counter value (count)
    volatile unsigned int* pcount  = (unsigned int *)&barrier->count;
    volatile unsigned int  count    = 0;  // avoid a warning

    asm volatile( "addu   $2,     %1,        $0      \n"
                  "barrier_llsc:                     \n"
                  "ll     $8,     0($2)              \n"
                  "addi   $9,     $8,        -1      \n"
                  "sc     $9,     0($2)              \n"
                  "beqz   $9,     barrier_llsc       \n"
                  "addu   %0,     $8,        $0      \n"
                  : "=r" (count)
                  : "r" (pcount)
                  : "$2", "$8", "$9", "memory" );

    // the last task re-initializes count and toggle sense,
    // waking up all other waiting tasks
    if (count == 1)   // last task
    {
        barrier->count = barrier->ntasks;
        barrier->sense = expected;
    }
    else              // other tasks busy waiting the sense flag
    {
        // polling sense flag
        // input: pointer on the sens flag (psense)
        // input: expected sense value (expected)
        volatile unsigned int* psense  = (unsigned int *)&barrier->sense;
        asm volatile ( "barrier_sense:                   \n"
                       "lw    $3,   0(%0)                \n"
                       "bne   $3,   %1,    barrier_sense \n"
                       :
                       : "r"(psense), "r"(expected)
                       : "$3" );
    }

    asm volatile ("sync" ::: "memory");

#if GIET_DEBUG_USER_BARRIER
giet_shr_printf("[DEBUG BARRIER] proc[%d,%d,%d] exit barrier_wait()\n", x, y, p );
#endif

}

///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
//      SBT barrier access functions
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////


////////////////////////////////////////////////////
void sbt_barrier_init( giet_sbt_barrier_t*  barrier,
                       unsigned int         ntasks )
{
    unsigned int x;          // x coordinate for one SBT node
    unsigned int y;          // y coordinate for one SBT node
    unsigned int l;          // level for one SBT node
    unsigned int x_size;     // max number of clusters in a row for the SBT
    unsigned int y_size;     // max number of clusters in a column for the SBT
    unsigned int levels;     // depth of the SBT (number of levels)


    // compute SBT characteristics
    if      ( ntasks == NB_PROCS_MAX       )  // mesh 1*1
    {
        x_size    = 1;
        y_size    = 1;
        levels    = 1;
    }
    else if ( ntasks == NB_PROCS_MAX * 2   )  // mesh 2*1
    {
        x_size    = 2;
        y_size    = 1;
        levels    = 2;
    }
    else if ( ntasks == NB_PROCS_MAX * 4   )  // mesh 2*2
    {
        x_size    = 2;
        y_size    = 2;
        levels    = 3;
    }
    else if ( ntasks == NB_PROCS_MAX * 8   )  // mesh 4*2
    {
        x_size    = 4;
        y_size    = 2;
        levels    = 4;
    }
    else if ( ntasks == NB_PROCS_MAX * 16  )  // mesh 4*4
    {
        x_size    = 4;
        y_size    = 4;
        levels    = 5;
    }
    else if ( ntasks == NB_PROCS_MAX * 32  )  // mesh 8*4
    {
        x_size    = 8;
        y_size    = 4;
        levels    = 6;
    }
    else if ( ntasks == NB_PROCS_MAX * 64  )  // mesh 8*8
    {
        x_size    = 8;
        y_size    = 8;
        levels    = 7;
    }
    else if ( ntasks == NB_PROCS_MAX * 128 )  // mesh 16*8
    {
        x_size    = 16;
        y_size    = 8;
        levels    = 8; 
    }
    else if ( ntasks == NB_PROCS_MAX * 256 )  // mesh 16*16
    {
        x_size    = 16;
        y_size    = 16;
        levels    = 9;
    }
    else
    {
        x_size    = 0;  // avoid warning
        y_size    = 0;
        levels    = 0;
        giet_exit("error in tree_barrier_init() : number of tasks must be power of 2\n");
    }

    // ntasks initialisation
    barrier->ntasks = ntasks;
    
#if GIET_DEBUG_USER_BARRIER
giet_shr_printf("\n[DEBUG BARRIER] sbt_nodes allocation / ntasks = %d\n", ntasks ); 
#endif

    // allocates memory for the SBT nodes and initializes SBT nodes pointers array
    // the number of SBT nodes in a cluster(x,y) depends on (x,y). 
    // At least 1 node / at most 9 nodes
    for ( x = 0 ; x < x_size ; x++ )
    {
        for ( y = 0 ; y < y_size ; y++ )
        {
            for ( l = 0 ; l < levels ; l++ )             // level 0 nodes
            {
                
                if ( ( (l == 0) && ((x&0x00) == 0) && ((y&0x00) == 0) ) ||
                     ( (l == 1) && ((x&0x01) == 0) && ((y&0x00) == 0) ) ||
                     ( (l == 2) && ((x&0x01) == 0) && ((y&0x01) == 0) ) ||
                     ( (l == 3) && ((x&0x03) == 0) && ((y&0x01) == 0) ) ||
                     ( (l == 4) && ((x&0x03) == 0) && ((y&0x03) == 0) ) ||
                     ( (l == 5) && ((x&0x07) == 0) && ((y&0x03) == 0) ) ||
                     ( (l == 6) && ((x&0x07) == 0) && ((y&0x07) == 0) ) ||
                     ( (l == 7) && ((x&0x0F) == 0) && ((y&0x07) == 0) ) ||
                     ( (l == 8) && ((x&0x0F) == 0) && ((y&0x0F) == 0) ) )
                 {
                     barrier->node[x][y][l] = remote_malloc( sizeof(sbt_node_t), x, y );

#if GIET_DEBUG_USER_BARRIER
giet_shr_printf("[DEBUG SBT] node[%d][%d][%d] : vaddr = %x\n",
                x, y, l, (unsigned int)barrier->node[x][y][l] );
#endif
                 }
            }
        }
    }
            
#if GIET_DEBUG_USER_BARRIER
giet_shr_printf("\n[DEBUG SBT] SBT nodes initialisation\n"); 
#endif

    // recursively initialize all SBT nodes from root to bottom
    sbt_build( barrier, 0, 0, levels-1, NULL );

    asm volatile ("sync" ::: "memory");
}

////////////////////////////////////////////////////
void sbt_barrier_wait( giet_sbt_barrier_t* barrier )
{
    // compute cluster coordinates for the calling task
    unsigned int    x;
    unsigned int    y;
    unsigned int    lpid;
    giet_proc_xyp( &x, &y, &lpid );

    // recursively decrement count from bottom to root
    sbt_decrement( barrier->node[x][y][0] );

    asm volatile ("sync" ::: "memory");
}


/////////////////////////////////////////////
void sbt_build( giet_sbt_barrier_t*  barrier, 
                unsigned int         x,
                unsigned int         y,
                unsigned int         level,
                sbt_node_t*          parent ) 
{
    // This recursive function initializes the SBT notes
    // traversing the SBT from root to bottom

    // get target node address
    sbt_node_t* node = barrier->node[x][y][level];
    
    if (level == 0 )        // terminal case
    {
        // initializes target node
        node->arity    = NB_PROCS_MAX;   
        node->count    = NB_PROCS_MAX;   
        node->sense    = 0;   
        node->level    = 0;   
        node->parent   = parent;
        node->child0   = NULL;
        node->child1   = NULL;

#if GIET_DEBUG_USER_BARRIER
giet_shr_printf("[DEBUG BARRIER] initialize sbt_node[%d][%d][%d] :"
                " arity = %d / child0 = %x / child1 = %x\n", 
                x, y, level, 
                node->arity, node->child0, node->child1 );
#endif

    }
    else                   // non terminal case
    {
        unsigned int x0;   // x coordinate for child0
        unsigned int y0;   // y coordinate for child0;
        unsigned int x1;   // x coordinate for child1;
        unsigned int y1;   // y coordinate for child1;

        // the child0 coordinates are equal to the parent coordinates
        // the child1 coordinates are incremented depending on the level value
        if ( level & 0x1 ) // odd level => X binary tree
        {
            x0 = x;
            y0 = y;
            x1 = x + (1 << ((level-1)>>1));
            y1 = y;
        }    
        else               // even level => Y binary tree
        {
            x0 = x;
            y0 = y;
            x1 = x;
            y1 = y + (1 << ((level-1)>>1));
        }

        // initializes target node
        node->arity    = 2;
        node->count    = 2;
        node->sense    = 0;   
        node->level    = level;
        node->parent   = parent;
        node->child0   = barrier->node[x0][y0][level-1];
        node->child1   = barrier->node[x1][y1][level-1];

#if GIET_DEBUG_USER_BARRIER
giet_shr_printf("[DEBUG BARRIER] initialize sbt_node[%d][%d][%d] :"
                " arity = %d / child0 = %x / child1 = %x\n", 
                x, y, level, 
                node->arity, node->child0, node->child1 );
#endif

        // recursive calls for children nodes
        sbt_build( barrier , x0 , y0 , level-1 , node );
        sbt_build( barrier , x1 , y1 , level-1 , node );
    }

}  // end sbt_build()

 
///////////////////////////////////////
void sbt_decrement( sbt_node_t* node )
{
    // This recursive function decrements the distributed "count" variables,
    // traversing the SBT from bottom to root.

    // compute expected sense value (toggle)
    unsigned int expected;
    if ( node->sense == 0) expected = 1;
    else                   expected = 0;

    // atomic decrement
    // - input  : count address (pcount)
    // - output : modified counter value (count)
    unsigned int*  pcount    = (unsigned int*)&node->count;
    unsigned int   count     = 0;  // avoid a warning

    asm volatile( "addu   $2,     %1,        $0      \n"
                  "sbt_llsc:                         \n"
                  "ll     $8,     0($2)              \n"
                  "addi   $9,     $8,        -1      \n"
                  "sc     $9,     0($2)              \n"
                  "beqz   $9,     sbt_llsc           \n"
                  "addu   %0,     $8,        $0      \n"
                  : "=r" (count)
                  : "r" (pcount)
                  : "$2", "$8", "$9", "memory" );

    if ( count == 1 )    // last task for this level 
    {
        if ( node->parent == NULL )            // root node : call sbt_release()
        {
            sbt_release( node, expected );
        }
        else                                   // call sbt_decrement() for parent
        {
            sbt_decrement( node->parent );
        }
    }
    else                 // not the last: busy waiting on current "sense" flag
    {
        // polling sense flag
        // input: pointer on the sens flag (psense)
        // input: expected sense value (expected)
        volatile unsigned int* psense  = (unsigned int *)&node->sense;
        asm volatile ( "sbt_sense:                       \n"
                       "lw    $3,   0(%0)                \n"
                       "bne   $3,   %1,    sbt_sense     \n"
                       :
                       : "r"(psense), "r"(expected)
                       : "$3" );
    }
} // end sbt_decrement()
    

///////////////////////////////////
void sbt_release( sbt_node_t* node,
                  unsigned int expected )
{
    // This recursive function reset all sense and count variables
    // traversing the SBT from root to bottom

    // toggle sense flag for the target node 
    node->sense = expected;

    // re-initialise count for the target node
    node->count = node->arity;

    // call recursively sbt_release() for children if not terminal
    if ( node->level > 0 )
    {
        sbt_release( node->child0, expected );
        sbt_release( node->child1, expected );
    }
}  // end sbt_release()

// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:
// vim: filetype=c:expandtab:shiftwidth=4:tabsroot=4:softtabsroot=4

// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:
// vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

