////////////////////////////////////////////////////////////////////////////////// // File : barrier.c // Date : 01/04/2012 // Author : alain greiner // Copyright (c) UPMC-LIP6 /////////////////////////////////////////////////////////////////////////////////// #include "barrier.h" #include "malloc.h" #include "stdio.h" #include "giet_config.h" /////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// // Simple barrier access functions /////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////// void barrier_init( giet_barrier_t* barrier, unsigned int ntasks ) { barrier->ntasks = ntasks; barrier->count = ntasks; barrier->sense = 0; asm volatile ("sync" ::: "memory"); } //////////////////////////////////////////// void barrier_wait( giet_barrier_t* barrier ) { #if GIET_DEBUG_BARRIER unsigned int x; unsigned int y; unsigned int p; giet_proc_xyp( &x, &y, &p ); giet_shr_printf("[DEBUG BARRIER] proc[%d,%d,%d] enters barrier_wait()\n", x, y, p ); #endif // compute expected sense value unsigned int expected; if ( barrier->sense == 0 ) expected = 1; else expected = 0; // parallel decrement barrier counter using atomic instructions LL/SC // - input : pointer on the barrier counter (pcount) // - output : counter value (count) volatile unsigned int* pcount = (unsigned int *)&barrier->count; volatile unsigned int count = 0; // avoid a warning asm volatile( "addu $2, %1, $0 \n" "barrier_llsc: \n" "ll $8, 0($2) \n" "addi $9, $8, -1 \n" "sc $9, 0($2) \n" "beqz $9, barrier_llsc \n" "addu %0, $8, $0 \n" : "=r" (count) : "r" (pcount) : "$2", "$8", "$9", "memory" ); // the last task re-initializes count and toggle sense, // waking up all other waiting tasks if (count == 1) // last task { barrier->count = barrier->ntasks; barrier->sense = expected; } else // other tasks busy waiting the sense flag { // polling sense flag // input: pointer on the sens flag (psense) // input: expected sense value (expected) volatile unsigned int* psense = (unsigned int *)&barrier->sense; asm volatile ( "barrier_sense: \n" "lw $3, 0(%0) \n" "bne $3, %1, barrier_sense \n" : : "r"(psense), "r"(expected) : "$3" ); } asm volatile ("sync" ::: "memory"); #if GIET_DEBUG_BARRIER giet_shr_printf("[DEBUG BARRIER] proc[%d,%d,%d] exit barrier_wait()\n", x, y, p ); #endif } /////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// // SBT barrier access functions /////////////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////// void sbt_barrier_init( giet_sbt_barrier_t* barrier, unsigned int ntasks ) { unsigned int x; // x coordinate for one SBT node unsigned int y; // y coordinate for one SBT node unsigned int l; // level for one SBT node unsigned int x_size; // max number of clusters in a row for the SBT unsigned int y_size; // max number of clusters in a column for the SBT unsigned int levels; // depth of the SBT (number of levels) // compute SBT characteristics if ( ntasks == NB_PROCS_MAX ) // mesh 1*1 { x_size = 1; y_size = 1; levels = 1; } else if ( ntasks == NB_PROCS_MAX * 2 ) // mesh 2*1 { x_size = 2; y_size = 1; levels = 2; } else if ( ntasks == NB_PROCS_MAX * 4 ) // mesh 2*2 { x_size = 2; y_size = 2; levels = 3; } else if ( ntasks == NB_PROCS_MAX * 8 ) // mesh 4*2 { x_size = 4; y_size = 2; levels = 4; } else if ( ntasks == NB_PROCS_MAX * 16 ) // mesh 4*4 { x_size = 4; y_size = 4; levels = 5; } else if ( ntasks == NB_PROCS_MAX * 32 ) // mesh 8*4 { x_size = 8; y_size = 4; levels = 6; } else if ( ntasks == NB_PROCS_MAX * 64 ) // mesh 8*8 { x_size = 8; y_size = 8; levels = 7; } else if ( ntasks == NB_PROCS_MAX * 128 ) // mesh 16*8 { x_size = 16; y_size = 8; levels = 8; } else if ( ntasks == NB_PROCS_MAX * 256 ) // mesh 16*16 { x_size = 16; y_size = 16; levels = 9; } else { x_size = 0; // avoid warning y_size = 0; levels = 0; giet_exit("error in tree_barrier_init() : number of tasks must be power of 2\n"); } // ntasks initialisation barrier->ntasks = ntasks; #if GIET_DEBUG_BARRIER giet_shr_printf("\n[DEBUG BARRIER] sbt_nodes allocation / ntasks = %d\n", ntasks ); #endif // allocates memory for the SBT nodes and initializes SBT nodes pointers array // the number of SBT nodes in a cluster(x,y) depends on (x,y). // At least 1 node / at most 9 nodes for ( x = 0 ; x < x_size ; x++ ) { for ( y = 0 ; y < y_size ; y++ ) { for ( l = 0 ; l < levels ; l++ ) // level 0 nodes { if ( ( (l == 0) && ((x&0x00) == 0) && ((y&0x00) == 0) ) || ( (l == 1) && ((x&0x01) == 0) && ((y&0x00) == 0) ) || ( (l == 2) && ((x&0x01) == 0) && ((y&0x01) == 0) ) || ( (l == 3) && ((x&0x03) == 0) && ((y&0x01) == 0) ) || ( (l == 4) && ((x&0x03) == 0) && ((y&0x03) == 0) ) || ( (l == 5) && ((x&0x07) == 0) && ((y&0x03) == 0) ) || ( (l == 6) && ((x&0x07) == 0) && ((y&0x07) == 0) ) || ( (l == 7) && ((x&0x0F) == 0) && ((y&0x07) == 0) ) || ( (l == 8) && ((x&0x0F) == 0) && ((y&0x0F) == 0) ) ) { barrier->node[x][y][l] = remote_malloc( SBT_NODE_SIZE, x, y ); #if GIET_DEBUG_BARRIER giet_shr_printf("[DEBUG SBT] node[%d][%d][%d] : vaddr = %x\n", x, y, l, (unsigned int)barrier->node[x][y][l] ); #endif } } } } #if GIET_DEBUG_BARRIER giet_shr_printf("\n[DEBUG SBT] SBT nodes initialisation\n"); #endif // recursively initialize all SBT nodes from root to bottom sbt_build( barrier, 0, 0, levels-1, NULL ); asm volatile ("sync" ::: "memory"); } //////////////////////////////////////////////////// void sbt_barrier_wait( giet_sbt_barrier_t* barrier ) { // compute cluster coordinates for the calling task unsigned int x; unsigned int y; unsigned int lpid; giet_proc_xyp( &x, &y, &lpid ); // recursively decrement count from bottom to root sbt_decrement( barrier->node[x][y][0] ); asm volatile ("sync" ::: "memory"); } ///////////////////////////////////////////// void sbt_build( giet_sbt_barrier_t* barrier, unsigned int x, unsigned int y, unsigned int level, sbt_node_t* parent ) { // This recursive function initializes the SBT notes // traversing the SBT from root to bottom // get target node address sbt_node_t* node = barrier->node[x][y][level]; if (level == 0 ) // terminal case { // initializes target node node->arity = NB_PROCS_MAX; node->count = NB_PROCS_MAX; node->sense = 0; node->level = 0; node->parent = parent; node->child0 = NULL; node->child1 = NULL; #if GIET_DEBUG_BARRIER giet_shr_printf("[DEBUG BARRIER] initialize sbt_node[%d][%d][%d] :" " arity = %d / child0 = %x / child1 = %x\n", x, y, level, node->arity, node->child0, node->child1 ); #endif } else // non terminal case { unsigned int x0; // x coordinate for child0 unsigned int y0; // y coordinate for child0; unsigned int x1; // x coordinate for child1; unsigned int y1; // y coordinate for child1; // the child0 coordinates are equal to the parent coordinates // the child1 coordinates are incremented depending on the level value if ( level & 0x1 ) // odd level => X binary tree { x0 = x; y0 = y; x1 = x + (1 << ((level-1)>>1)); y1 = y; } else // even level => Y binary tree { x0 = x; y0 = y; x1 = x; y1 = y + (1 << ((level-1)>>1)); } // initializes target node node->arity = 2; node->count = 2; node->sense = 0; node->level = level; node->parent = parent; node->child0 = barrier->node[x0][y0][level-1]; node->child1 = barrier->node[x1][y1][level-1]; #if GIET_DEBUG_BARRIER giet_shr_printf("[DEBUG BARRIER] initialize sbt_node[%d][%d][%d] :" " arity = %d / child0 = %x / child1 = %x\n", x, y, level, node->arity, node->child0, node->child1 ); #endif // recursive calls for children nodes sbt_build( barrier , x0 , y0 , level-1 , node ); sbt_build( barrier , x1 , y1 , level-1 , node ); } } // end sbt_build() /////////////////////////////////////// void sbt_decrement( sbt_node_t* node ) { // This recursive function decrements the distributed "count" variables, // traversing the SBT from bottom to root. // compute expected sense value (toggle) unsigned int expected; if ( node->sense == 0) expected = 1; else expected = 0; // atomic decrement // - input : count address (pcount) // - output : modified counter value (count) unsigned int* pcount = (unsigned int*)&node->count; unsigned int count = 0; // avoid a warning asm volatile( "addu $2, %1, $0 \n" "sbt_llsc: \n" "ll $8, 0($2) \n" "addi $9, $8, -1 \n" "sc $9, 0($2) \n" "beqz $9, sbt_llsc \n" "addu %0, $8, $0 \n" : "=r" (count) : "r" (pcount) : "$2", "$8", "$9", "memory" ); if ( count == 1 ) // last task for this level { if ( node->parent == NULL ) // root node : call sbt_release() { sbt_release( node, expected ); } else // call sbt_decrement() for parent { sbt_decrement( node->parent ); } } else // not the last: busy waiting on current "sense" flag { // polling sense flag // input: pointer on the sens flag (psense) // input: expected sense value (expected) volatile unsigned int* psense = (unsigned int *)&node->sense; asm volatile ( "sbt_sense: \n" "lw $3, 0(%0) \n" "bne $3, %1, sbt_sense \n" : : "r"(psense), "r"(expected) : "$3" ); } } // end sbt_decrement() /////////////////////////////////// void sbt_release( sbt_node_t* node, unsigned int expected ) { // This recursive function reset all sense and count variables // traversing the SBT from root to bottom // toggle sense flag for the target node node->sense = expected; // re-initialise count for the target node node->count = node->arity; // call recursively sbt_release() for children if not terminal if ( node->level > 0 ) { sbt_release( node->child0, expected ); sbt_release( node->child1, expected ); } } // end sbt_release() // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=c:expandtab:shiftwidth=4:tabsroot=4:softtabsroot=4 // Local Variables: // tab-width: 4 // c-basic-offset: 4 // c-file-offsets:((innamespace . 0)(inline-open . 0)) // indent-tabs-mode: nil // End: // vim: filetype=c:expandtab:shiftwidth=4:tabstop=4:softtabstop=4