Changeset 619 for trunk/kernel


Ignore:
Timestamp:
Feb 12, 2019, 1:15:47 PM (6 years ago)
Author:
alain
Message:

1) Fix a bug in KSH : after the "load" command,

the [ksh] prompt is now printed after completion
of the loaded application.

2) Fix a bug in vmm_handle_cow() : the copy-on-write

use now a hal_remote_memcpy() to replicate the page content.


Location:
trunk/kernel
Files:
32 edited

Legend:

Unmodified
Added
Removed
  • trunk/kernel/devices/dev_dma.c

    r565 r619  
    6363    error = thread_kernel_create( &new_thread,
    6464                                  THREAD_DEV,
    65                                   &chdev_sequencial_server,
     65                                  &chdev_server_func,
    6666                                  dma,
    6767                                  cluster_select_local_core() );
  • trunk/kernel/devices/dev_ioc.c

    r614 r619  
    7171    error = thread_kernel_create( &new_thread,
    7272                                  THREAD_DEV,
    73                                   &chdev_sequencial_server,
     73                                  &chdev_server_func,
    7474                                  ioc,
    7575                                  lid );
  • trunk/kernel/devices/dev_nic.c

    r565 r619  
    7070    error = thread_kernel_create( &new_thread,
    7171                                  THREAD_DEV,
    72                                   &chdev_sequencial_server,
     72                                  &chdev_server_func,
    7373                                  nic,
    7474                                  lid );
  • trunk/kernel/devices/dev_txt.c

    r565 r619  
    5353const char * dev_txt_type_str( dev_txt_cmd_t type )
    5454{
    55   switch (type) {
    56     case (TXT_SYNC_WRITE): return "TXT_SYNC_WRITE";
    57     case (TXT_READ):       return "TXT_READ";
    58     case (TXT_WRITE):      return "TXT_WRITE";
    59     default:               return "undefined";
    60   }
     55    switch (type)
     56    {
     57        case (TXT_SYNC_WRITE): return "TXT_SYNC_WRITE";
     58        case (TXT_READ):       return "TXT_READ";
     59        case (TXT_WRITE):      return "TXT_WRITE";
     60        default:               return "undefined";
     61    }
    6162}
    6263
     
    115116        error = thread_kernel_create( &new_thread,
    116117                                      THREAD_DEV,
    117                                       &chdev_sequencial_server,
     118                                      &chdev_server_func,
    118119                                      txt,
    119120                                      lid );
     
    178179
    179180#if DEBUG_DEV_TXT_TX
    180 uint32_t cycle = (uint32_t)hal_get_cycles();
     181thread_t * this  = CURRENT_THREAD;
     182uint32_t   cycle = (uint32_t)hal_get_cycles();
    181183if( DEBUG_DEV_TXT_TX < cycle )
    182 printk("\n[DBG] %s : thread %x enters / cycle %d\n", __FUNCTION__, CURRENT_THREAD, cycle );
     184printk("\n[%s] thread[%x,%x] enters / cycle %d\n",
     185__FUNCTION__, this->process->pid, this->trdid, cycle );
    183186#endif
    184187
     
    194197
    195198    // If we use MTTYs (vci_multi_tty), we perform only sync writes
     199    // Otherwise, we use vci_tty_tsar so we can use async writes
     200
    196201    if( dev_ptr->impl == IMPL_TXT_MTY )
    197202    {
     
    212217    }
    213218
    214     // Otherwise, we use vci_tty_tsar so we can use async writes
    215219    else
    216220    {
    217     return dev_txt_access( TXT_WRITE , channel , buffer , count );
     221        return dev_txt_access( TXT_WRITE , channel , buffer , count );
    218222    }
    219223
     
    221225cycle = (uint32_t)hal_get_cycles();
    222226if( DEBUG_DEV_TXT_TX < cycle )
    223 printk("\n[DBG] %s : thread %x exit / cycle %d\n", __FUNCTION__, CURRENT_THREAD, cycle );
     227printk("\n[%s] thread[%x,%x] exit / cycle %d\n",
     228__FUNCTION__, this->process->pid, this->trdid, cycle );
    224229#endif
    225230
     
    240245
    241246#if DEBUG_DEV_TXT_RX
    242 uint32_t cycle = (uint32_t)hal_get_cycles();
     247thread_t * this  = CURRENT_THREAD;
     248uint32_t   cycle = (uint32_t)hal_get_cycles();
    243249if( DEBUG_DEV_TXT_RX < cycle )
    244 printk("\n[DBG] %s : thread %x enters / cycle %d\n", __FUNCTION__, CURRENT_THREAD, cycle );
     250printk("\n[%s] thread[%x,%x] enters / cycle %d\n",
     251__FUNCTION__, this->process->pid, this->trdid, cycle );
    245252#endif
    246253
     
    250257cycle = (uint32_t)hal_get_cycles();
    251258if( DEBUG_DEV_TXT_RX < cycle )
    252 printk("\n[DBG] %s : thread %x exit / cycle %d\n", __FUNCTION__, CURRENT_THREAD, cycle );
     259printk("\n[%s] thread[%x,%x] exit / cycle %d\n",
     260__FUNCTION__, this->process->pid, this->trdid, cycle );
    253261#endif
    254262
  • trunk/kernel/kern/chdev.c

    r601 r619  
    168168
    169169#if (DEBUG_CHDEV_CMD_RX || DEBUG_CHDEV_CMD_TX)
    170 bool_t is_rx = hal_remote_l32( XPTR( chdev_cxy , &chdev_ptr->is_rx ) );
     170bool_t      is_rx        = hal_remote_l32( XPTR( chdev_cxy , &chdev_ptr->is_rx ) );
     171trdid_t     server_trdid = hal_remote_l32( XPTR( chdev_cxy , &server_ptr->trdid ) );
     172process_t * process_ptr  = hal_remote_lpt( XPTR( chdev_cxy , &server_ptr->process ) );
     173pid_t       server_pid   = hal_remote_l32( XPTR( chdev_cxy , &process_ptr->pid ) );
    171174#endif
    172175   
     
    175178if( (is_rx) && (DEBUG_CHDEV_CMD_RX < rx_cycle) )
    176179printk("\n[%s] client[%x,%x] enter for RX / server[%x,%x] / cycle %d\n",
    177 __FUNCTION__, this->process->pid, this->trdid,
    178 server_ptr->process->pid, server_ptr->trdid, rx_cycle );
     180__FUNCTION__, this->process->pid, this->trdid, server_pid, server_trdid, rx_cycle );
    179181#endif
    180182
     
    183185if( (is_rx == 0) && (DEBUG_CHDEV_CMD_TX < tx_cycle) )
    184186printk("\n[%s] client[%x,%x] enter for TX / server[%x,%x] / cycle %d\n",
    185 __FUNCTION__, this->process->pid, this->trdid,
    186 server_ptr->process->pid, server_ptr->trdid, tx_cycle );
     187__FUNCTION__, this->process->pid, this->trdid, server_pid, server_trdid, tx_cycle );
    187188#endif
    188189
     
    241242if( (is_rx == 0) && (DEBUG_CHDEV_CMD_TX < tx_cycle) )
    242243printk("\n[%s] TX server thread[%x,%x] unblocked\n",
    243 __FUNCTION__, server_ptr->process->pid, server_ptr->trdid );
     244__FUNCTION__, server_pid, server_trdid );
    244245#endif
    245246
     
    247248if( (is_rx) && (DEBUG_CHDEV_CMD_RX < rx_cycle) )
    248249printk("\n[%s] RX server thread[%x,%x] unblocked\n",
    249 __FUNCTION__, server_ptr->process->pid, server_ptr->trdid );
     250__FUNCTION__, server_pid, server_trdid );
    250251#endif
    251252
     
    273274if( (is_rx == 0)  && (DEBUG_CHDEV_CMD_TX < tx_cycle) )
    274275printk("\n[%s] client thread[%x,%x] sent IPI to TX server thread[%x,%x]\n",
    275 __FUNCTION__, this->process->pid, this->trdid, server_ptr->process->pid, server_ptr->trdid );
     276__FUNCTION__, this->process->pid, this->trdid, server_pid, server_trdid );
    276277#endif
    277278
     
    279280if( (is_rx)  && (DEBUG_CHDEV_CMD_RX < rx_cycle) )
    280281printk("\n[%s] client thread[%x,%x] sent IPI to RX server thread[%x,%x]\n",
    281 __FUNCTION__, this->process->pid, this->trdid, server_ptr->process->pid, server_ptr->trdid );
     282__FUNCTION__, this->process->pid, this->trdid, server_pid, server_trdid );
    282283#endif
    283284
     
    317318}  // end chdev_register_command()
    318319
    319 ///////////////////////////////////////////////
    320 void chdev_sequencial_server( chdev_t * chdev )
     320/////////////////////////////////////////
     321void chdev_server_func( chdev_t * chdev )
    321322{
    322323    xptr_t          client_xp;    // extended pointer on waiting thread
     
    340341    {
    341342
    342 #if DEBUG_CHDEV_SERVER_RX
     343#if( DEBUG_CHDEV_SERVER_RX || DEBUG_CHDEV_SERVER_TX )
    343344uint32_t rx_cycle = (uint32_t)hal_get_cycles();
    344345if( (chdev->is_rx) && (DEBUG_CHDEV_SERVER_RX < rx_cycle) )
    345 printk("\n[%s] thread[%x,%x] start RX / cycle %d\n",
    346 __FUNCTION__ , server->process->pid, server->trdid, rx_cycle );
     346printk("\n[%s] DEV thread[%x,%x] check TXT_RX channel %d / cycle %d\n",
     347__FUNCTION__ , server->process->pid, server->trdid, chdev->channel, rx_cycle );
    347348#endif
    348349
     
    350351uint32_t tx_cycle = (uint32_t)hal_get_cycles();
    351352if( (chdev->is_rx == 0) && (DEBUG_CHDEV_SERVER_TX < tx_cycle) )
    352 printk("\n[%s] thread[%x,%x] start TX / cycle %d\n",
    353 __FUNCTION__ , server->process->pid, server->trdid, tx_cycle );
     353printk("\n[%s] thread[%x,%x] check TXT_TX channel %d / cycle %d\n",
     354__FUNCTION__ , server->process->pid, server->trdid, chdev->channel, tx_cycle );
    354355#endif
    355356
     
    397398            client_ptr = GET_PTR( client_xp );
    398399
    399 #if( DDEBUG_CHDEV_SERVER_TX || DEBUG_CHDEV_SERVER_RX )
     400#if( DEBUG_CHDEV_SERVER_TX || DEBUG_CHDEV_SERVER_RX )
    400401process_t * process      = hal_remote_lpt( XPTR( client_cxy , &client_ptr->process ) );
    401402pid_t       client_pid   = hal_remote_l32( XPTR( client_cxy , &process->pid ) );
    402 process_t   client_trdid = hal_remote_l32( XPTR( client_cxy , &client_ptr->trdid ) );
     403trdid_t     client_trdid = hal_remote_l32( XPTR( client_cxy , &client_ptr->trdid ) );
    403404#endif
    404405
     
    407408if( (chdev->is_rx) && (DEBUG_CHDEV_SERVER_RX < rx_cycle) )
    408409printk("\n[%s] thread[%x,%x] for RX get client thread[%x,%x] / cycle %d\n",
    409 __FUNCTION__, server->process->pid, server->trdid, client_pid, client_trdid, cycle );
     410__FUNCTION__, server->process->pid, server->trdid, client_pid, client_trdid, rx_cycle );
    410411#endif
    411412
     
    414415if( (chdev->is_rx == 0) && (DEBUG_CHDEV_SERVER_TX < tx_cycle) )
    415416printk("\n[%s] thread[%x,%x] for TX get client thread[%x,%x] / cycle %d\n",
    416 __FUNCTION__, server->process->pid, server->trdid, client_pid, client_trdid, cycle );
     417__FUNCTION__, server->process->pid, server->trdid, client_pid, client_trdid, tx_cycle );
    417418#endif
    418419
     
    445446if( (chdev->is_rx) && (DEBUG_CHDEV_SERVER_RX < rx_cycle) )
    446447printk("\n[%s] thread[%x,%x] completes RX for client thread[%x,%x] / cycle %d\n",
    447 __FUNCTION__, server->process->pid, server->trdid, client_pid, client_trdid, cycle );
     448__FUNCTION__, server->process->pid, server->trdid, client_pid, client_trdid, rx_cycle );
    448449#endif
    449450
     
    452453if( (chdev->is_rx == 0) && (DEBUG_CHDEV_SERVER_TX < tx_cycle) )
    453454printk("\n[%s] thread[%x,%x] completes TX for client thread[%x,%x] / cycle %d\n",
    454 __FUNCTION__, server->process->pid, server->trdid, client_pid, client_trdid, cycle );
     455__FUNCTION__, server->process->pid, server->trdid, client_pid, client_trdid, tX_cycle );
    455456#endif
    456457
     
    465466        }
    466467    }  // end while
    467 }  // end chdev_sequencial_server()
     468}  // end chdev_server_func()
    468469
    469470////////////////////////////////////////
  • trunk/kernel/kern/chdev.h

    r564 r619  
    240240 * @ chdev   : local pointer on device descriptor.
    241241 *****************************************************************************************/
    242 void chdev_sequencial_server( chdev_t * chdev );
     242void chdev_server_func( chdev_t * chdev );
    243243
    244244/******************************************************************************************
  • trunk/kernel/kern/kernel_init.c

    r614 r619  
    154154    "CONDVAR_STATE",         // 17
    155155    "SEM_STATE",             // 18
    156     "RPOCESS_CWD",           // 19
    157 
    158     "unused_20",             // 20
     156    "PROCESS_CWD",           // 19
     157    "BARRIER_STATE",         // 20
    159158
    160159    "CLUSTER_PREFTBL",       // 21
  • trunk/kernel/kern/process.c

    r618 r619  
    528528    process_t        * process_ptr;       // local pointer on process copy
    529529    reg_t              save_sr;           // for critical section
    530     rpc_desc_t         rpc;               // shared RPC descriptor
    531530    thread_t         * client;            // pointer on client thread
    532531    xptr_t             client_xp;         // extended pointer on client thread
    533532    process_t        * local;             // pointer on process copy in local cluster
    534533    uint32_t           remote_nr;         // number of remote process copies
     534    rpc_desc_t         rpc;               // shared RPC descriptor
     535    uint32_t           responses;         // shared RPC responses counter
    535536
    536537    client    = CURRENT_THREAD;
     
    579580    thread_block( client_xp , THREAD_BLOCKED_RPC );
    580581
     582    // initialize RPC responses counter
     583    responses = 0;
     584
    581585    // initialize shared RPC descriptor
    582     rpc.responses = 0;
     586    // can be shared, because no out arguments
     587    rpc.rsp       = &responses;
    583588    rpc.blocking  = false;
    584589    rpc.index     = RPC_PROCESS_SIGACTION;
     
    608613            remote_nr++;
    609614
    610             // atomically increment responses counter
    611             hal_atomic_add( (void *)&rpc.responses , 1 );
     615            // atomically increment RPC responses counter
     616            hal_atomic_add( &responses , 1 );
    612617
    613618#if DEBUG_PROCESS_SIGACTION
     
    617622#endif
    618623            // call RPC in target cluster
    619             rpc_process_sigaction_client( process_cxy , &rpc );
     624            rpc_send( process_cxy , &rpc );
    620625        }
    621626    }  // end list of copies
     
    685690
    686691// check target process is an user process
    687 assert( (LPID_FROM_PID( process->pid ) != 0 ), "target process must be an user process" );
     692assert( (LPID_FROM_PID( process->pid ) != 0 ),
     693"process %x is not an user process\n", process->pid );
    688694
    689695    // get target process owner cluster
     
    773779
    774780// check target process is an user process
    775 assert( (LPID_FROM_PID( process->pid ) != 0), "process %x not an user process", process->pid );
     781assert( (LPID_FROM_PID( process->pid ) != 0),
     782"process %x is not an user process\n", process->pid );
    776783
    777784    // get lock protecting process th_tbl[]
     
    831838
    832839// check target process is an user process
    833 assert( ( process->pid != 0 ),
    834 "target process must be an user process" );
     840assert( ( LPID_FROM_PID( process->pid ) != 0 ),
     841"process %x is not an user process\n", process->pid );
    835842
    836843    // get lock protecting process th_tbl[]
  • trunk/kernel/kern/rpc.c

    r614 r619  
    143143    client_core_lid = this->core->lid;
    144144
    145     // check calling thread can yield when client thread is not the IDLE thread
     145    // check calling thread can yield when is not the IDLE thread
    146146    // RPCs executed by the IDLE thread during kernel_init do not deschedule
    147147    if( this->type != THREAD_IDLE ) thread_assert_can_yield( this , __FUNCTION__ );
     
    184184    while( full );
    185185 
    186     hal_fence();
    187 
    188186#if DEBUG_RPC_CLIENT_GENERIC
    189187uint32_t cycle = (uint32_t)hal_get_cycles();
     
    199197
    200198    // wait RPC completion before returning if blocking RPC :
    201     // - descheduling without blocking if thread idle (in kernel init)
     199    // - deschedule without blocking if thread idle (in kernel init)
    202200    // - block and deschedule policy for any other thread
    203201    if ( rpc->blocking )
     
    212210__FUNCTION__, this->process->pid, this->trdid, rpc_str[rpc->index], cycle );
    213211#endif
    214 
    215              while( rpc->responses ) sched_yield( "busy waiting on RPC");
    216    
     212             while( 1 )
     213             {
     214                 // check responses counter
     215                 if( hal_remote_l32( XPTR( local_cxy , rpc->rsp ) ) == 0 ) break;
     216                   
     217                 // deschedule
     218                 sched_yield("busy_waiting RPC completion");
     219             }
     220
    217221#if DEBUG_RPC_CLIENT_GENERIC
    218222cycle = (uint32_t)hal_get_cycles();
     
    242246cycle = (uint32_t)hal_get_cycles();
    243247if( DEBUG_RPC_CLIENT_GENERIC < cycle )
    244 printk("\n[%s] thread[%x,%x] resumes for rpc %s / cycle %d\n",
     248printk("\n[%s] thread[%x,%x] resumes after rpc %s / cycle %d\n",
    245249__FUNCTION__, this->process->pid, this->trdid, rpc_str[rpc->index], cycle );
    246250#endif
     
    248252
    249253// response must be available for a blocking RPC
    250 assert( (rpc->responses == 0) , "illegal response for RPC %s\n", rpc_str[rpc->index] );
     254assert( (*rpc->rsp == 0) , "illegal response for RPC %s\n", rpc_str[rpc->index] );
    251255
    252256    }
     
    270274
    271275////////////////////////////
    272 void rpc_thread_func( void )
     276void rpc_server_func( void )
    273277{
    274278    error_t         empty;              // local RPC fifo state
     
    278282    uint32_t        index;              // RPC request index
    279283    thread_t      * client_ptr;         // local pointer on client thread
     284    xptr_t          client_xp;          // extended pointer on client thread
     285    lid_t           client_lid;         // local index of client core
    280286        thread_t      * server_ptr;         // local pointer on server thread
    281287    xptr_t          server_xp;          // extended pointer on server thread
    282     lid_t           client_core_lid;    // local index of client core
    283     lid_t           server_core_lid;    // local index of server core
    284     bool_t          blocking;           // blocking RPC when true
     288    lid_t           server_lid;         // local index of server core
    285289        remote_fifo_t * rpc_fifo;           // local pointer on RPC fifo
    286     uint32_t        count;              // current number of expected responses
    287  
     290    uint32_t      * rsp_ptr;            // local pointer on responses counter
     291    xptr_t          rsp_xp;             // extended pointer on responses counter
     292    uint32_t        responses;          // number of expected responses
     293
    288294    // makes RPC thread not preemptable
    289295        hal_disable_irq( NULL );
     
    291297        server_ptr      = CURRENT_THREAD;
    292298    server_xp       = XPTR( local_cxy , server_ptr );
    293     server_core_lid = server_ptr->core->lid;
    294         rpc_fifo        = &LOCAL_CLUSTER->rpc_fifo[server_core_lid];
     299    server_lid      = server_ptr->core->lid;
     300        rpc_fifo        = &LOCAL_CLUSTER->rpc_fifo[server_lid];
    295301
    296302    // "infinite" RPC thread loop
     
    305311if( DEBUG_RPC_SERVER_GENERIC < cycle )
    306312printk("\n[%s] RPC thread[%x,%x] on core[%d] takes RPC_FIFO ownership / cycle %d\n",
    307 __FUNCTION__, server_ptr->process->pid, server_ptr->trdid, server_core_lid, cycle );
     313__FUNCTION__, server_ptr->process->pid, server_ptr->trdid, server_lid, cycle );
    308314#endif
    309315                // try to consume one RPC request 
     
    320326                desc_ptr = GET_PTR( desc_xp );
    321327
     328                // get relevant infos from RPC descriptor
    322329                    index      = hal_remote_l32( XPTR( desc_cxy , &desc_ptr->index ) );
    323                 blocking   = hal_remote_l32( XPTR( desc_cxy , &desc_ptr->blocking ) );
    324330                client_ptr = hal_remote_lpt( XPTR( desc_cxy , &desc_ptr->thread ) );
     331                rsp_ptr    = hal_remote_lpt( XPTR( desc_cxy , &desc_ptr->rsp ) );
     332                client_lid = hal_remote_l32( XPTR( desc_cxy , &desc_ptr->lid ) );
     333
     334                rsp_xp     = XPTR( desc_cxy , rsp_ptr );
     335                client_xp  = XPTR( desc_cxy , client_ptr );
    325336
    326337#if DEBUG_RPC_SERVER_GENERIC
     
    332343#endif
    333344                // register client thread in RPC thread descriptor
    334                 server_ptr->rpc_client_xp = XPTR( desc_cxy , client_ptr );
     345                server_ptr->rpc_client_xp = client_xp;
    335346 
    336347                // call the relevant server function
     
    343354__FUNCTION__, server_ptr->process->pid, server_ptr->trdid, rpc_str[index], desc_cxy, cycle );
    344355#endif
    345                 // decrement expected responses counter in RPC descriptor
    346                 count = hal_remote_atomic_add( XPTR( desc_cxy, &desc_ptr->responses ), -1 );
    347 
    348                 // decrement response counter in RPC descriptor if last response
    349                 if( count == 1 )
     356                // decrement expected responses counter
     357                responses = hal_remote_atomic_add( rsp_xp , -1 );
     358
     359                // unblock client thread if last response
     360                if( responses == 1 )
    350361                {
    351                     // get client thread pointer and client core lid from RPC descriptor
    352                     client_ptr      = hal_remote_lpt( XPTR( desc_cxy , &desc_ptr->thread ) );
    353                     client_core_lid = hal_remote_l32 ( XPTR( desc_cxy , &desc_ptr->lid ) );
    354 
    355362                    // unblock client thread
    356                     thread_unblock( XPTR( desc_cxy , client_ptr ) , THREAD_BLOCKED_RPC );
     363                    thread_unblock( client_xp , THREAD_BLOCKED_RPC );
    357364
    358365                    hal_fence();
     
    360367#if DEBUG_RPC_SERVER_GENERIC
    361368cycle = (uint32_t)hal_get_cycles();
     369trdid_t     client_trdid = hal_remote_l32( XPTR( desc_cxy , &client_ptr->trdid ) );
     370process_t * process      = hal_remote_lpt( XPTR( desc_cxy , &client_ptr->process ) );
     371pid_t       client_pid   = hal_remote_l32( XPTR( desc_cxy , &process->pid ) );
    362372if( DEBUG_RPC_SERVER_GENERIC < cycle )
    363373printk("\n[%s] RPC thread[%x,%x] unblocked client thread[%x,%x] / cycle %d\n",
    364374__FUNCTION__, server_ptr->process->pid, server_ptr->trdid,
    365 client_ptr->process->pid, client_ptr->trdid, cycle );
     375client_pid, client_trdid, cycle );
    366376#endif
    367377                    // send IPI to client core
    368                     dev_pic_send_ipi( desc_cxy , client_core_lid );
     378                    dev_pic_send_ipi( desc_cxy , client_lid );
    369379                }
    370380            }  // end RPC handling if fifo non empty
     
    372382
    373383        // sucide if too many RPC threads
    374         if( LOCAL_CLUSTER->rpc_threads[server_core_lid] >= CONFIG_RPC_THREADS_MAX )
     384        if( LOCAL_CLUSTER->rpc_threads[server_lid] >= CONFIG_RPC_THREADS_MAX )
    375385            {
    376386
     
    382392#endif
    383393            // update RPC threads counter
    384                 hal_atomic_add( &LOCAL_CLUSTER->rpc_threads[server_core_lid] , -1 );
     394                hal_atomic_add( &LOCAL_CLUSTER->rpc_threads[server_lid] , -1 );
    385395
    386396            // RPC thread blocks on GLOBAL
     
    397407uint32_t cycle = (uint32_t)hal_get_cycles();
    398408if( DEBUG_RPC_SERVER_GENERIC < cycle )
    399 printk("\n[%s] RPC thread[%x,%x] block IDLE & deschedules / cycle %d\n",
     409printk("\n[%s] RPC thread[%x,%x] blocks & deschedules / cycle %d\n",
    400410__FUNCTION__, server_ptr->process->pid, server_ptr->trdid, cycle );
    401411#endif
     
    407417        }
    408418        } // end infinite loop
    409 } // end rpc_thread_func()
     419
     420} // end rpc_server_func()
    410421
    411422
     
    427438#endif
    428439
    429     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     440    uint32_t responses = 1;
    430441
    431442    // initialise RPC descriptor header
     
    433444    rpc.index     = RPC_PMEM_GET_PAGES;
    434445    rpc.blocking  = true;
    435     rpc.responses = 1;
     446    rpc.rsp       = &responses;
    436447
    437448    // set input arguments in RPC descriptor
     
    485496
    486497/////////////////////////////////////////////////////////////////////////////////////////
    487 // [1]       Marshaling functions attached to RPC_PMEM_RELEASE_PAGES (blocking)
     498// [1]       Marshaling functions attached to RPC_PMEM_RELEASE_PAGES
    488499/////////////////////////////////////////////////////////////////////////////////////////
    489500
     
    500511#endif
    501512
    502     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     513    uint32_t responses = 1;
    503514
    504515    // initialise RPC descriptor header
     
    506517    rpc.index    = RPC_PMEM_RELEASE_PAGES;
    507518    rpc.blocking = true;
    508     rpc.responses = 1;
     519    rpc.rsp      = &responses;
    509520
    510521    // set input arguments in RPC descriptor
     
    559570
    560571/////////////////////////////////////////////////////////////////////////////////////////
    561 // [3]           Marshaling functions attached to RPC_PROCESS_MAKE_FORK (blocking)
     572// [3]           Marshaling functions attached to RPC_PROCESS_MAKE_FORK
    562573/////////////////////////////////////////////////////////////////////////////////////////
    563574
     
    578589#endif
    579590
    580     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     591    uint32_t responses = 1;
    581592
    582593    // initialise RPC descriptor header
     
    584595    rpc.index    = RPC_PROCESS_MAKE_FORK;
    585596    rpc.blocking = true;
    586     rpc.responses = 1;
     597    rpc.rsp      = &responses;
    587598
    588599    // set input arguments in RPC descriptor 
     
    651662
    652663/////////////////////////////////////////////////////////////////////////////////////////
    653 // [4]      Marshaling functions attached to RPC_USER_DIR_CREATE (blocking)
     664// [4]      Marshaling functions attached to RPC_USER_DIR_CREATE
    654665/////////////////////////////////////////////////////////////////////////////////////////
    655666
     
    668679#endif
    669680
    670     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     681    uint32_t responses = 1;
    671682
    672683    // initialise RPC descriptor header
     
    674685    rpc.index    = RPC_USER_DIR_CREATE;
    675686    rpc.blocking = true;
    676     rpc.responses = 1;
     687    rpc.rsp      = &responses;
    677688
    678689    // set input arguments in RPC descriptor
     
    732743
    733744/////////////////////////////////////////////////////////////////////////////////////////
    734 // [5]      Marshaling functions attached to RPC_USER_DIR_DESTROY (blocking)
     745// [5]      Marshaling functions attached to RPC_USER_DIR_DESTROY
    735746/////////////////////////////////////////////////////////////////////////////////////////
    736747
     
    748759#endif
    749760
    750     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     761    uint32_t responses = 1;
    751762
    752763    // initialise RPC descriptor header
     
    754765    rpc.index    = RPC_USER_DIR_DESTROY;
    755766    rpc.blocking = true;
    756     rpc.responses = 1;
     767    rpc.rsp      = &responses;
    757768
    758769    // set input arguments in RPC descriptor
     
    805816
    806817/////////////////////////////////////////////////////////////////////////////////////////
    807 // [6]      Marshaling functions attached to RPC_THREAD_USER_CREATE (blocking) 
     818// [6]      Marshaling functions attached to RPC_THREAD_USER_CREATE  
    808819/////////////////////////////////////////////////////////////////////////////////////////
    809820
     
    824835__FUNCTION__, this->process->pid, this->trdid, this->core->lid, cycle );
    825836#endif
    826 
    827     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     837   
     838    uint32_t responses = 1;
    828839
    829840    // initialise RPC descriptor header
     
    831842    rpc.index    = RPC_THREAD_USER_CREATE;
    832843    rpc.blocking = true;
    833     rpc.responses = 1;
     844    rpc.rsp      = &responses;
    834845
    835846    // set input arguments in RPC descriptor
     
    929940#endif
    930941
    931     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     942    uint32_t responses = 1;
    932943
    933944    // initialise RPC descriptor header
     
    935946    rpc.index    = RPC_THREAD_KERNEL_CREATE;
    936947    rpc.blocking = true;
    937     rpc.responses = 1;
     948    rpc.rsp      = &responses;
    938949
    939950    // set input arguments in RPC descriptor
     
    10071018
    10081019/////////////////////////////////////////////////////////////////////////////////////////
    1009 // [9]   Marshaling functions attached to RPC_PROCESS_SIGACTION (non blocking)
     1020// [9]   Marshaling functions attached to RPC_PROCESS_SIGACTION
    10101021/////////////////////////////////////////////////////////////////////////////////////////
    10111022
    10121023////////////////////////////////////////////////////
    10131024void rpc_process_sigaction_client( cxy_t        cxy,
    1014                                    rpc_desc_t * rpc )
     1025                                   pid_t        pid,
     1026                                   uint32_t     action )
    10151027{
    10161028#if DEBUG_RPC_PROCESS_SIGACTION
     
    10181030thread_t * this = CURRENT_THREAD;
    10191031if( DEBUG_RPC_PROCESS_SIGACTION < cycle )
    1020 printk("\n[%s] thread[%x,%x] on core %d enter / cycle %d\n",
    1021 __FUNCTION__, this->process->pid, this->trdid, this->core->lid, cycle );
    1022 #endif
    1023 
    1024 // check RPC "index" and "blocking" arguments
    1025 assert( (rpc->blocking == false) , "must be non-blocking\n");
    1026 assert( (rpc->index == RPC_PROCESS_SIGACTION ) , "bad RPC index\n" );
    1027 
    1028     // register RPC request in remote RPC fifo and return
    1029     rpc_send( cxy , rpc );
     1032printk("\n[%s] thread[%x,%x] on core %d : enter to %s process %x / cycle %d\n",
     1033__FUNCTION__, this->process->pid, this->trdid, this->core->lid,
     1034process_action_str( action ), pid, cycle );
     1035#endif
     1036
     1037    uint32_t    responses = 1;
     1038    rpc_desc_t  rpc;
     1039
     1040    // initialise RPC descriptor header
     1041    rpc.index    = RPC_PROCESS_SIGACTION;
     1042    rpc.blocking = true;
     1043    rpc.rsp      = &responses;
     1044
     1045    // set input arguments in RPC descriptor
     1046    rpc.args[0] = (uint64_t)pid;
     1047    rpc.args[1] = (uint64_t)action;
     1048
     1049    // register RPC request in remote RPC fifo
     1050    rpc_send( cxy , &rpc );
    10301051
    10311052#if DEBUG_RPC_PROCESS_SIGACTION
    10321053cycle = (uint32_t)hal_get_cycles();
    10331054if( DEBUG_RPC_PROCESS_SIGACTION < cycle )
    1034 printk("\n[%s] thread[%x,%x] on core %d exit / cycle %d\n",
    1035 __FUNCTION__, this->process->pid, this->trdid, this->core->lid, cycle );
     1055printk("\n[%s] thread[%x,%x] on core %d : exit after %s process %x / cycle %d\n",
     1056__FUNCTION__, this->process->pid, this->trdid, this->core->lid,
     1057process_action_str( action ), pid, cycle );
    10361058#endif
    10371059}  // end rpc_process_sigaction_client()
     
    10401062void rpc_process_sigaction_server( xptr_t xp )
    10411063{
    1042 #if DEBUG_RPC_PROCESS_SIGACTION
    1043 uint32_t cycle = (uint32_t)hal_get_cycles();
    1044 thread_t * this = CURRENT_THREAD;
    1045 if( DEBUG_RPC_PROCESS_SIGACTION < cycle )
    1046 printk("\n[%s] thread[%x,%x] on core %d enter / cycle %d\n",
    1047 __FUNCTION__, this->process->pid, this->trdid, this->core->lid, cycle );
    1048 #endif
    1049 
    10501064    // get client cluster identifier and pointer on RPC descriptor
    10511065    cxy_t        client_cxy = GET_CXY( xp );
     
    10561070    uint32_t action = (uint32_t)hal_remote_l64( XPTR(client_cxy , &desc->args[1]) );
    10571071
     1072#if DEBUG_RPC_PROCESS_SIGACTION
     1073uint32_t cycle = (uint32_t)hal_get_cycles();
     1074thread_t * this = CURRENT_THREAD;
     1075if( DEBUG_RPC_PROCESS_SIGACTION < cycle )
     1076printk("\n[%s] thread[%x,%x] on core %d : enter to %s process %x / cycle %d\n",
     1077__FUNCTION__, this->process->pid, this->trdid, this->core->lid,
     1078process_action_str( action ), pid, cycle );
     1079#endif
     1080
    10581081    // get client thread pointers
    10591082    thread_t * client_ptr = hal_remote_lpt( XPTR( client_cxy , &desc->thread ) );
     
    10631086    process_t * process = cluster_get_local_process_from_pid( pid );
    10641087
    1065     // call relevant kernel function
    1066     if      ( action == DELETE_ALL_THREADS  ) process_delete_threads ( process , client_xp );
    1067     else if ( action == BLOCK_ALL_THREADS   ) process_block_threads  ( process );
    1068     else if ( action == UNBLOCK_ALL_THREADS ) process_unblock_threads( process );
     1088    // call relevant kernel function if found / does nothing if not found
     1089    if( process != NULL )
     1090    {
     1091        if ( action == DELETE_ALL_THREADS  ) process_delete_threads ( process , client_xp );
     1092        if ( action == BLOCK_ALL_THREADS   ) process_block_threads  ( process );
     1093        if ( action == UNBLOCK_ALL_THREADS ) process_unblock_threads( process );
     1094    }
    10691095
    10701096#if DEBUG_RPC_PROCESS_SIGACTION
    10711097cycle = (uint32_t)hal_get_cycles();
    10721098if( DEBUG_RPC_PROCESS_SIGACTION < cycle )
    1073 printk("\n[%s] thread[%x,%x] on core %d exit / cycle %d\n",
    1074 __FUNCTION__, this->process->pid, this->trdid, this->core->lid, cycle );
     1099printk("\n[%s] thread[%x,%x] on core %d : exit after %s process %x / cycle %d\n",
     1100__FUNCTION__, this->process->pid, this->trdid, this->core->lid,
     1101process_action_str( action ), pid, cycle );
    10751102#endif
    10761103} // end rpc_process_sigaction_server()
    10771104
    10781105/////////////////////////////////////////////////////////////////////////////////////////
    1079 // [10]     Marshaling functions attached to RPC_VFS_INODE_CREATE  (blocking)
     1106// [10]     Marshaling functions attached to RPC_VFS_INODE_CREATE
    10801107/////////////////////////////////////////////////////////////////////////////////////////
    10811108
     
    10991126#endif
    11001127
    1101     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1128    uint32_t responses = 1;
    11021129
    11031130    // initialise RPC descriptor header
     
    11051132    rpc.index    = RPC_VFS_INODE_CREATE;
    11061133    rpc.blocking = true;
    1107     rpc.responses = 1;
     1134    rpc.rsp      = &responses;
    11081135
    11091136    // set input arguments in RPC descriptor
     
    11841211
    11851212/////////////////////////////////////////////////////////////////////////////////////////
    1186 // [11]          Marshaling functions attached to RPC_VFS_INODE_DESTROY  (blocking)
     1213// [11]          Marshaling functions attached to RPC_VFS_INODE_DESTROY
    11871214/////////////////////////////////////////////////////////////////////////////////////////
    11881215
     
    11991226#endif
    12001227
    1201     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1228    uint32_t responses = 1;
    12021229
    12031230    // initialise RPC descriptor header
     
    12051232    rpc.index    = RPC_VFS_INODE_DESTROY;
    12061233    rpc.blocking = true;
    1207     rpc.responses = 1;
     1234    rpc.rsp      = &responses;
    12081235
    12091236    // set input arguments in RPC descriptor
     
    12531280
    12541281/////////////////////////////////////////////////////////////////////////////////////////
    1255 // [12]          Marshaling functions attached to RPC_VFS_DENTRY_CREATE  (blocking)
     1282// [12]          Marshaling functions attached to RPC_VFS_DENTRY_CREATE
    12561283/////////////////////////////////////////////////////////////////////////////////////////
    12571284
     
    12711298#endif
    12721299
    1273     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1300    uint32_t responses = 1;
    12741301
    12751302    // initialise RPC descriptor header
     
    12771304    rpc.index    = RPC_VFS_DENTRY_CREATE;
    12781305    rpc.blocking = true;
    1279     rpc.responses = 1;
     1306    rpc.rsp      = &responses;
    12801307
    12811308    // set input arguments in RPC descriptor
     
    13441371
    13451372/////////////////////////////////////////////////////////////////////////////////////////
    1346 // [13]          Marshaling functions attached to RPC_VFS_DENTRY_DESTROY  (blocking)
     1373// [13]          Marshaling functions attached to RPC_VFS_DENTRY_DESTROY
    13471374/////////////////////////////////////////////////////////////////////////////////////////
    13481375
     
    13591386#endif
    13601387
    1361     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1388    uint32_t responses = 1;
    13621389
    13631390    // initialise RPC descriptor header
     
    13651392    rpc.index    = RPC_VFS_DENTRY_DESTROY;
    13661393    rpc.blocking = true;
    1367     rpc.responses = 1;
     1394    rpc.rsp      = &responses;
    13681395
    13691396    // set input arguments in RPC descriptor
     
    14141441
    14151442/////////////////////////////////////////////////////////////////////////////////////////
    1416 // [14]          Marshaling functions attached to RPC_VFS_FILE_CREATE  (blocking)
     1443// [14]          Marshaling functions attached to RPC_VFS_FILE_CREATE 
    14171444/////////////////////////////////////////////////////////////////////////////////////////
    14181445
     
    14321459#endif
    14331460
    1434     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1461    uint32_t responses = 1;
    14351462
    14361463    // initialise RPC descriptor header
     
    14381465    rpc.index    = RPC_VFS_FILE_CREATE;
    14391466    rpc.blocking = true;
    1440     rpc.responses = 1;
     1467    rpc.rsp      = &responses;
    14411468
    14421469    // set input arguments in RPC descriptor
     
    15011528
    15021529/////////////////////////////////////////////////////////////////////////////////////////
    1503 // [15]          Marshaling functions attached to RPC_VFS_FILE_DESTROY  (blocking)
     1530// [15]          Marshaling functions attached to RPC_VFS_FILE_DESTROY 
    15041531/////////////////////////////////////////////////////////////////////////////////////////
    15051532
     
    15161543#endif
    15171544
    1518     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1545    uint32_t responses = 1;
    15191546
    15201547    // initialise RPC descriptor header
     
    15221549    rpc.index    = RPC_VFS_FILE_DESTROY;
    15231550    rpc.blocking = true;
    1524     rpc.responses = 1;
     1551    rpc.rsp      = &responses;
    15251552
    15261553    // set input arguments in RPC descriptor
     
    15701597
    15711598/////////////////////////////////////////////////////////////////////////////////////////
    1572 // [16]      Marshaling functions attached to RPC_VFS_FS_GET_DENTRY  (blocking)
     1599// [16]      Marshaling functions attached to RPC_VFS_FS_GET_DENTRY
    15731600/////////////////////////////////////////////////////////////////////////////////////////
    15741601
     
    15881615#endif
    15891616
    1590     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1617    uint32_t responses = 1;
    15911618
    15921619    // initialise RPC descriptor header
     
    15941621    rpc.index    = RPC_VFS_FS_GET_DENTRY;
    15951622    rpc.blocking = true;
    1596     rpc.responses = 1;
     1623    rpc.rsp      = &responses;
    15971624
    15981625    // set input arguments in RPC descriptor
     
    16611688
    16621689/////////////////////////////////////////////////////////////////////////////////////////
    1663 // [17]      Marshaling function attached to RPC_VFS_FS_ADD_DENTRY  (blocking)
     1690// [17]      Marshaling function attached to RPC_VFS_FS_ADD_DENTRY 
    16641691/////////////////////////////////////////////////////////////////////////////////////////
    16651692
     
    16771704#endif
    16781705
    1679     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1706    uint32_t responses = 1;
    16801707
    16811708    // initialise RPC descriptor header
     
    16831710    rpc.index    = RPC_VFS_FS_ADD_DENTRY;
    16841711    rpc.blocking = true;
    1685     rpc.responses = 1;
     1712    rpc.rsp      = &responses;
    16861713
    16871714    // set input arguments in RPC descriptor
     
    17031730}
    17041731
    1705 /////////////////////////////////////////////////
     1732//////////////////////////////////////////////
    17061733void rpc_vfs_fs_add_dentry_server( xptr_t xp )
    17071734{
     
    17411768
    17421769/////////////////////////////////////////////////////////////////////////////////////////
    1743 // [18]      Marshaling function attached to RPC_VFS_FS_REMOVE_DENTRY  (blocking)
     1770// [18]      Marshaling function attached to RPC_VFS_FS_REMOVE_DENTRY
    17441771/////////////////////////////////////////////////////////////////////////////////////////
    17451772
     
    17571784#endif
    17581785
    1759     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1786    uint32_t responses = 1;
    17601787
    17611788    // initialise RPC descriptor header
     
    17631790    rpc.index    = RPC_VFS_FS_REMOVE_DENTRY;
    17641791    rpc.blocking = true;
    1765     rpc.responses = 1;
     1792    rpc.rsp      = &responses;
    17661793
    17671794    // set input arguments in RPC descriptor
     
    18211848
    18221849/////////////////////////////////////////////////////////////////////////////////////////
    1823 // [19]     Marshaling functions attached to RPC_VFS_INODE_LOAD_ALL_PAGES  (blocking)
     1850// [19]     Marshaling functions attached to RPC_VFS_INODE_LOAD_ALL_PAGES
    18241851/////////////////////////////////////////////////////////////////////////////////////////
    18251852
     
    18371864#endif
    18381865
    1839     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1866    uint32_t responses = 1;
    18401867
    18411868    // initialise RPC descriptor header
     
    18431870    rpc.index    = RPC_VFS_INODE_LOAD_ALL_PAGES;
    18441871    rpc.blocking = true;
    1845     rpc.responses = 1;
     1872    rpc.rsp      = &responses;
    18461873
    18471874    // set input arguments in RPC descriptor
     
    18981925
    18991926/////////////////////////////////////////////////////////////////////////////////////////
    1900 // [20]          Marshaling functions attached to RPC_VMM_GET_VSEG  (blocking)
     1927// [20]          Marshaling functions attached to RPC_VMM_GET_VSEG
    19011928/////////////////////////////////////////////////////////////////////////////////////////
    19021929
     
    19161943#endif
    19171944
    1918     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     1945    uint32_t responses = 1;
    19191946
    19201947    // initialise RPC descriptor header
     
    19221949    rpc.index    = RPC_VMM_GET_VSEG;
    19231950    rpc.blocking = true;
    1924     rpc.responses = 1;
     1951    rpc.rsp      = &responses;
    19251952
    19261953    // set input arguments in RPC descriptor
     
    19862013
    19872014/////////////////////////////////////////////////////////////////////////////////////////
    1988 // [21]    Marshaling functions attached to RPC_VMM_GLOBAL_UPDATE_PTE  (blocking)
     2015// [21]    Marshaling functions attached to RPC_VMM_GLOBAL_UPDATE_PTE
    19892016/////////////////////////////////////////////////////////////////////////////////////////
    19902017
     
    20042031#endif
    20052032
    2006     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     2033    uint32_t responses = 1;
    20072034
    20082035    // initialise RPC descriptor header
     
    20102037    rpc.index    = RPC_VMM_GLOBAL_UPDATE_PTE;
    20112038    rpc.blocking = true;
    2012     rpc.responses = 1;
     2039    rpc.rsp      = &responses;
    20132040
    20142041    // set input arguments in RPC descriptor
     
    20672094
    20682095/////////////////////////////////////////////////////////////////////////////////////////
    2069 // [22]          Marshaling functions attached to RPC_KCM_ALLOC  (blocking)
     2096// [22]          Marshaling functions attached to RPC_KCM_ALLOC
    20702097/////////////////////////////////////////////////////////////////////////////////////////
    20712098
     
    20732100void rpc_kcm_alloc_client( cxy_t      cxy,
    20742101                           uint32_t   kmem_type,   // in
    2075                            xptr_t buf_xp )     // out
     2102                           xptr_t   * buf_xp )     // out
    20762103{
    20772104#if DEBUG_RPC_KCM_ALLOC
     
    20832110#endif
    20842111
    2085     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     2112    uint32_t responses = 1;
    20862113
    20872114    // initialise RPC descriptor header
     
    20892116    rpc.index    = RPC_KCM_ALLOC;
    20902117    rpc.blocking = true;
    2091     rpc.responses = 1;
     2118    rpc.rsp      = &responses;
    20922119
    20932120    // set input arguments in RPC descriptor
     
    21452172
    21462173/////////////////////////////////////////////////////////////////////////////////////////
    2147 // [23]          Marshaling functions attached to RPC_KCM_FREE  (blocking)
     2174// [23]          Marshaling functions attached to RPC_KCM_FREE
    21482175/////////////////////////////////////////////////////////////////////////////////////////
    21492176
     
    21612188#endif
    21622189
    2163     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     2190    uint32_t responses = 1;
    21642191
    21652192    // initialise RPC descriptor header
     
    21672194    rpc.index    = RPC_KCM_FREE;
    21682195    rpc.blocking = true;
    2169     rpc.responses = 1;
     2196    rpc.rsp      = &responses;
    21702197
    21712198    // set input arguments in RPC descriptor
     
    22222249
    22232250/////////////////////////////////////////////////////////////////////////////////////////
    2224 // [25]          Marshaling functions attached to RPC_MAPPER_HANDLE_MISS (blocking)
     2251// [25]          Marshaling functions attached to RPC_MAPPER_HANDLE_MISS
    22252252/////////////////////////////////////////////////////////////////////////////////////////
    22262253
     
    22402267#endif
    22412268
    2242     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     2269    uint32_t responses = 1;
    22432270
    22442271    // initialise RPC descriptor header
     
    22462273    rpc.index    = RPC_MAPPER_HANDLE_MISS;
    22472274    rpc.blocking = true;
    2248     rpc.responses = 1;
     2275    rpc.rsp      = &responses;
    22492276
    22502277    // set input arguments in RPC descriptor
     
    23092336
    23102337/////////////////////////////////////////////////////////////////////////////////////////
    2311 // [26]  Marshaling functions attached to RPC_VMM_DELETE_VSEG (parallel / non blocking)
     2338// [26]  Marshaling functions attached to RPC_VMM_DELETE_VSEG
    23122339/////////////////////////////////////////////////////////////////////////////////////////
    23132340
    23142341//////////////////////////////////////////////////
    23152342void rpc_vmm_delete_vseg_client( cxy_t        cxy,
    2316                                  rpc_desc_t * rpc )
     2343                                 pid_t        pid,
     2344                                 intptr_t     vaddr )
    23172345{
    23182346#if DEBUG_RPC_VMM_DELETE_VSEG
     
    23242352#endif
    23252353
    2326 // check RPC "index" and "blocking" arguments
    2327 assert( (rpc->blocking == false) , "must be non-blocking\n");
    2328 assert( (rpc->index == RPC_VMM_DELETE_VSEG ) , "bad RPC index\n" );
     2354    uint32_t    responses = 1;
     2355    rpc_desc_t  rpc;
     2356
     2357    // initialise RPC descriptor header
     2358    rpc.index    = RPC_VMM_DELETE_VSEG;
     2359    rpc.blocking = true;
     2360    rpc.rsp      = &responses;
     2361
     2362    // set input arguments in RPC descriptor
     2363    rpc.args[0] = (uint64_t)pid;
     2364    rpc.args[1] = (uint64_t)vaddr;
    23292365
    23302366    // register RPC request in remote RPC fifo
    2331     rpc_send( cxy , rpc );
     2367    rpc_send( cxy , &rpc );
    23322368
    23332369#if DEBUG_RPC_VMM_DELETE_VSEG
     
    23702406
    23712407/////////////////////////////////////////////////////////////////////////////////////////
    2372 // [27]          Marshaling functions attached to RPC_VMM_CREATE_VSEG (blocking)
     2408// [27]          Marshaling functions attached to RPC_VMM_CREATE_VSEG
    23732409/////////////////////////////////////////////////////////////////////////////////////////
    23742410
     
    23932429#endif
    23942430
    2395     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     2431    uint32_t responses = 1;
    23962432
    23972433    // initialise RPC descriptor header
     
    23992435    rpc.index    = RPC_VMM_CREATE_VSEG;
    24002436    rpc.blocking = true;
    2401     rpc.responses = 1;
     2437    rpc.rsp      = &responses;
    24022438
    24032439    // set input arguments in RPC descriptor
     
    24722508
    24732509/////////////////////////////////////////////////////////////////////////////////////////
    2474 // [28]          Marshaling functions attached to RPC_VMM_SET_COW (blocking)
     2510// [28]          Marshaling functions attached to RPC_VMM_SET_COW
    24752511/////////////////////////////////////////////////////////////////////////////////////////
    24762512
     
    24792515                             process_t * process )
    24802516{
    2481     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     2517#if DEBUG_RPC_VMM_SET_COW
     2518thread_t * this = CURRENT_THREAD;
     2519uint32_t cycle = (uint32_t)hal_get_cycles();
     2520if( cycle > DEBUG_RPC_VMM_SET_COW )
     2521printk("\n[%s] thread[%x,%x] on core %d enter / cycle %d\n",
     2522__FUNCTION__, this->process->pid, this->trdid, this->core->lid , cycle );
     2523#endif
     2524
     2525    uint32_t responses = 1;
    24822526
    24832527    // initialise RPC descriptor header
     
    24852529    rpc.index    = RPC_VMM_SET_COW;
    24862530    rpc.blocking = true;
    2487     rpc.responses = 1;
     2531    rpc.rsp      = &responses;
    24882532
    24892533    // set input arguments in RPC descriptor
     
    24932537    rpc_send( cxy , &rpc );
    24942538
     2539#if DEBUG_RPC_VMM_SET_COW
     2540cycle = (uint32_t)hal_get_cycles();
     2541if( cycle > DEBUG_RPC_VMM_SET_COW )
     2542printk("\n[%s] thread[%x,%x] on core %d exit / cycle %d\n",
     2543__FUNCTION__, this->process->pid, this->trdid, this->core->lid , cycle );
     2544#endif
    24952545}
    24962546
     
    24982548void rpc_vmm_set_cow_server( xptr_t xp )
    24992549{
     2550#if DEBUG_RPC_VMM_SET_COW
     2551thread_t * this = CURRENT_THREAD;
     2552uint32_t cycle = (uint32_t)hal_get_cycles();
     2553if( cycle > DEBUG_RPC_VMM_SET_COW )
     2554printk("\n[%s] thread[%x,%x] on core %d enter / cycle %d\n",
     2555__FUNCTION__, this->process->pid, this->trdid, this->core->lid , cycle );
     2556#endif
     2557
    25002558    process_t * process;
    25012559
     
    25102568    vmm_set_cow( process );
    25112569
    2512 }
    2513 
    2514 /////////////////////////////////////////////////////////////////////////////////////////
    2515 // [29]          Marshaling functions attached to RPC_VMM_DISPLAY (blocking)
     2570#if DEBUG_RPC_VMM_SET_COW
     2571cycle = (uint32_t)hal_get_cycles();
     2572if( cycle > DEBUG_RPC_VMM_SET_COW )
     2573printk("\n[%s] thread[%x,%x] on core %d exit / cycle %d\n",
     2574__FUNCTION__, this->process->pid, this->trdid, this->core->lid , cycle );
     2575#endif
     2576}
     2577
     2578/////////////////////////////////////////////////////////////////////////////////////////
     2579// [29]          Marshaling functions attached to RPC_VMM_DISPLAY
    25162580/////////////////////////////////////////////////////////////////////////////////////////
    25172581
     
    25212585                             bool_t      detailed )
    25222586{
    2523     assert( (cxy != local_cxy) , "server cluster is not remote\n");
     2587#if DEBUG_RPC_VMM_DISPLAY
     2588thread_t * this = CURRENT_THREAD;
     2589uint32_t cycle = (uint32_t)hal_get_cycles();
     2590if( cycle > DEBUG_RPC_VMM_DISPLAY )
     2591printk("\n[%s] thread[%x,%x] on core %d enter / cycle %d\n",
     2592__FUNCTION__, this->process->pid, this->trdid, this->core->lid , cycle );
     2593#endif
     2594
     2595    uint32_t responses = 1;
    25242596
    25252597    // initialise RPC descriptor header
     
    25272599    rpc.index    = RPC_VMM_DISPLAY;
    25282600    rpc.blocking = true;
    2529     rpc.responses = 1;
     2601    rpc.rsp      = &responses;
    25302602
    25312603    // set input arguments in RPC descriptor
     
    25362608    rpc_send( cxy , &rpc );
    25372609
     2610#if DEBUG_RPC_VMM_DISPLAY
     2611cycle = (uint32_t)hal_get_cycles();
     2612if( cycle > DEBUG_RPC_VMM_DISPLAY )
     2613printk("\n[%s] thread[%x,%x] on core %d exit / cycle %d\n",
     2614__FUNCTION__, this->process->pid, this->trdid, this->core->lid , cycle );
     2615#endif
    25382616}
    25392617
     
    25412619void rpc_vmm_display_server( xptr_t xp )
    25422620{
     2621#if DEBUG_RPC_VMM_DISPLAY
     2622thread_t * this = CURRENT_THREAD;
     2623uint32_t cycle = (uint32_t)hal_get_cycles();
     2624if( cycle > DEBUG_RPC_VMM_DISPLAY )
     2625printk("\n[%s] thread[%x,%x] on core %d enter / cycle %d\n",
     2626__FUNCTION__, this->process->pid, this->trdid, this->core->lid , cycle );
     2627#endif
     2628
    25432629    process_t * process;
    25442630    bool_t      detailed;
     
    25552641    vmm_display( process , detailed );
    25562642
    2557 }
    2558 
    2559 
     2643#if DEBUG_RPC_VMM_DISPLAY
     2644cycle = (uint32_t)hal_get_cycles();
     2645if( cycle > DEBUG_RPC_VMM_DISPLAY )
     2646printk("\n[%s] thread[%x,%x] on core %d exit / cycle %d\n",
     2647__FUNCTION__, this->process->pid, this->trdid, this->core->lid , cycle );
     2648#endif
     2649}
     2650
     2651
  • trunk/kernel/kern/rpc.h

    r614 r619  
    6969    RPC_THREAD_KERNEL_CREATE      = 7,
    7070    RPC_UNDEFINED_8               = 8,
    71     RPC_PROCESS_SIGACTION         = 9,       // non blocking
     71    RPC_PROCESS_SIGACTION         = 9,
    7272
    7373    RPC_VFS_INODE_CREATE          = 10,
     
    8888    RPC_UNDEFINED_24              = 24,
    8989    RPC_MAPPER_HANDLE_MISS        = 25,
    90     RPC_VMM_DELETE_VSEG           = 26,      // non blocking
     90    RPC_VMM_DELETE_VSEG           = 26,
    9191    RPC_VMM_CREATE_VSEG           = 27,
    9292    RPC_VMM_SET_COW               = 28,
     
    105105
    106106/***********************************************************************************
    107  *  This structure defines the RPC descriptor
     107 *  This structure defines the RPC descriptor (100 bytes on a 32bits core)
    108108 **********************************************************************************/
    109109
    110110typedef struct rpc_desc_s
    111111{
    112         rpc_index_t         index;       /*! index of requested RPC service          */
    113         volatile uint32_t   responses;   /*! number of expected responses            */
    114     struct thread_s   * thread;      /*! local pointer on client thread          */
    115     uint32_t            lid;         /*! index of core running the calling thread */
    116     bool_t              blocking;    /*! blocking RPC when true                  */
    117     uint64_t            args[10];    /*! input/output arguments buffer           */
     112        rpc_index_t         index;       /*! index of requested RPC service      ( 4) */
     113        uint32_t          * rsp;         /*! local pointer ond responses counter ( 4) */
     114    struct thread_s   * thread;      /*! local pointer on client thread      ( 4) */
     115    uint32_t            lid;         /*! index of core running client thread ( 4) */
     116    bool_t              blocking;    /*! simple RPC mode when true           ( 4) */
     117    uint64_t            args[10];    /*! input/output arguments buffer       (80) */
    118118}
    119119rpc_desc_t;
     
    161161 * - it block on IDLE and deschedule otherwise. 
    162162 **********************************************************************************/
    163 void rpc_thread_func( void );
     163void rpc_server_func( void );
    164164
    165165/***********************************************************************************
     
    309309
    310310/***********************************************************************************
    311  * [9] The non blocking RPC_PROCESS_SIGACTION allows any client thread running in
    312  * any cluster to send parallel RPC requests to one or several servers (that can be
    313  * local or remote), to execute a given sigaction, defined by the <action_type>
    314  * argument[1], for a given process identified by the <pid> argument[0].
    315  *
    316  * WARNING : It is implemented as a NON BLOCKING RPC, that can be sent in parallel
    317  * to several servers. The RPC descriptor, containing the <action_type> and <pid>
    318  * arguments, as well as the RPC <index>, <blocked>, and <response> fields, must
    319  * be allocated and initialised by the calling function itself.
    320  * Each RPC server thread atomically decrements the <response> field in this
    321  * shared RPC descriptor. The last server thread unblock the client thread,
    322  * that blocked only after sending all parallel RPC requests to all servers.
     311 * [9] The RPC_PROCESS_SIGACTION allows any client thread to request to any cluster
     312 * execute a given sigaction, defined by the <action_type> for a given process,
     313 * identified by the <pid> argument.
    323314 ***********************************************************************************
    324315 * @ cxy     : server cluster identifier.
    325  * @ rpc     : pointer on shared RPC descriptor initialized by the client thread.
    326  **********************************************************************************/
    327 void rpc_process_sigaction_client( cxy_t               cxy,
    328                                    struct rpc_desc_s * rpc );
     316 * @ pid     : [in] target process identifier.
     317 * @ action  : [in] sigaction index.
     318 **********************************************************************************/
     319void rpc_process_sigaction_client( cxy_t     cxy,
     320                                   pid_t     pid,
     321                                   uint32_t  action );
    329322                             
    330323void rpc_process_sigaction_server( xptr_t xp );
     
    596589
    597590/***********************************************************************************
    598  * [26] The non blocking RPC_VMM_DELETE_VSEG allows any client thread running in
    599  * any cluster to send parallel RPC requests to one or several clusters (that can be
    600  * local or remote), to delete from a given VMM, identified by the <pid> argument[0]
    601  * a given vseg, identified by the <vaddr> argument[1].
    602  *
    603  * WARNING : It is implemented as a NON BLOCKING RPC, that can be sent in parallel
    604  * to several servers. The RPC descriptor, containing the <pid> and <vaddr>
    605  * arguments, as well as the RPC <index>, <blocked>, and <response> fields, must
    606  * be allocated and initialised by the calling function itself.
    607  * Each RPC server thread atomically decrements the the <response> field in this
    608  * shared RPC descriptor. The last server thread unblock the client thread,
    609  * that blocked only after sending all paralle RPC requests to all servers.
     591 * [26] The RPC_VMM_DELETE_VSEG allows any client thread  to request a remote
     592 * cluster to delete from a given VMM, identified by the <pid> argument
     593 * a given vseg, identified by the <vaddr> argument.
    610594 ***********************************************************************************
    611595 * @ cxy         : server cluster identifier.
    612  * @ rpc     : pointer on shared RPC descriptor initialized by the client thread.
    613  **********************************************************************************/
    614 void rpc_vmm_delete_vseg_client( cxy_t               cxy,
    615                                  struct rpc_desc_s * rpc );
     596 * @ pid         : [in] target process identifier.
     597 * @ vaddr       : [in] vseg base address.
     598 **********************************************************************************/
     599void rpc_vmm_delete_vseg_client( cxy_t       cxy,
     600                                 pid_t       pid,
     601                                 intptr_t    vaddr );
    616602 
    617603void rpc_vmm_delete_vseg_server( xptr_t xp );
  • trunk/kernel/kern/scheduler.c

    r614 r619  
    2929#include <printk.h>
    3030#include <list.h>
     31#include <rpc.h>
    3132#include <core.h>
    3233#include <thread.h>
     
    146147////////////////////////////////////////////////////////////////////////////////////////////
    147148// This static function is the only function that can actually delete a thread,
    148 // and the associated process descriptor, if required.
    149 // It is private, because it is called by the sched_yield() public function.
     149// (and the associated process descriptor if required).
     150// It is private, because it is only called by the sched_yield() public function.
    150151// It scan all threads attached to a given scheduler, and executes the relevant
    151152// actions for two types of pending requests:
     
    376377        error = thread_kernel_create( &thread,
    377378                                      THREAD_RPC,
    378                                               &rpc_thread_func,
     379                                              &rpc_server_func,
    379380                                      NULL,
    380381                                          lid );
  • trunk/kernel/kern/thread.c

    r611 r619  
    13781378}  // end thread_assert_can yield()
    13791379
    1380 //////////////////////////////////////////////////
    1381 void thread_display_busylocks( xptr_t  thread_xp )
    1382 {
    1383     // get cluster and local pointer of target thread
     1380//////////////////////////////////////////////////////
     1381void thread_display_busylocks( xptr_t       thread_xp,
     1382                               const char * string )
     1383{
    13841384    cxy_t      thread_cxy = GET_CXY( thread_xp );
    13851385    thread_t * thread_ptr = GET_PTR( thread_xp );
     
    13891389    xptr_t    iter_xp;
    13901390
    1391     // get target thread TRDID and busylocks
    1392     trdid_t  trdid = hal_remote_l32(XPTR( thread_cxy , &thread_ptr->trdid ));
    1393     uint32_t locks = hal_remote_l32(XPTR( thread_cxy , &thread_ptr->busylocks ));
    1394 
    1395     // get target thread process and PID;
    1396     process_t * process = hal_remote_lpt(XPTR( thread_cxy , &thread_ptr->process ));
    1397     pid_t       pid     = hal_remote_l32(XPTR( thread_cxy , &process->pid ));
     1391    // get relevant info from target trhead descriptor
     1392    uint32_t    locks   = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->busylocks ) );
     1393    trdid_t     trdid   = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) );
     1394    process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
     1395    pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
    13981396
    13991397    // get extended pointer on root of busylocks
    1400     xptr_t    root_xp = XPTR( thread_cxy , &thread_ptr->busylocks_root );
     1398    xptr_t root_xp = XPTR( thread_cxy , &thread_ptr->busylocks_root );
    14011399
    14021400    // get pointers on TXT0 chdev
     
    14121410
    14131411    // display header
    1414     nolock_printk("\n***** thread %x in process %x : %d busylocks at cycle %d\n",
    1415     trdid, pid, locks, (uint32_t)hal_get_cycles() );
     1412    nolock_printk("\n***** thread[%x,%x] in <%s> : %d busylocks *****\n",
     1413    pid, trdid, string, locks );
    14161414
    14171415    // scan the xlist of busylocks when required
     
    14361434
    14371435    // display a warning
    1438     printk("\n[WARNING] set the DEBUG_BUSYLOCK parmeter in kernel_config.h"
    1439     " to display busylocks for thread %x/%x\n", thread_cxy, thread_ptr );
     1436    printk("\n[WARNING] set the DEBUG_BUSYLOCK parameter in kernel_config.h"
     1437    " to display busylocks for thread(%x,%x)\n", pid, trdid );
    14401438
    14411439}  // end thread_display_busylock()
  • trunk/kernel/kern/thread.h

    r610 r619  
    143143typedef struct thread_s
    144144{
    145         void              * cpu_context;     /*! pointer on CPU context switch            */
    146         void              * fpu_context;     /*! pointer on FPU context switch            */
     145        void              * cpu_context;     /*! CPU context used by sched_yield          */
     146        void              * fpu_context;     /*! FPU context used by sched_yield          */
    147147    void              * uzone_current;   /*! used by hal_do_syscall & hal_do_except   */
    148148    void              * uzone_previous;  /*! used by hal_do_syscall & hal_do_except   */
     
    465465
    466466/***************************************************************************************
    467  * This debug function display the list of busylocks (local or remote) currently owned
    468  * by a thread identified by the <thread_xp> argument.
    469  * WARNING : it can be called by the idbg tool, but the DEBUG_BUSYLOCK parameter
    470  * must be set in the kernel_config.h file.
     467 * This debug function display the list of busylocks (local or remote)
     468 * currently owned by a the thread identified by the <thead_xp> argument.
     469 * The <string> argument is printed in header (can be the calling function name).
     470 * WARNING : the DEBUG_BUSYLOCK parameter must be set in the kernel_config.h file.
    471471 ***************************************************************************************
    472472 * @ thread_xp  : extended pointer on target thread.
    473  **************************************************************************************/
    474 void thread_display_busylocks( xptr_t  thread_xp );
     473 * @ string     : defines the calling context.
     474 **************************************************************************************/
     475void thread_display_busylocks( xptr_t       thread_xp,
     476                               const char * string );
    475477
    476478
  • trunk/kernel/kernel_config.h

    r614 r619  
    3535////////////////////////////////////////////////////////////////////////////////////////////
    3636
    37 #define DEBUG_BARRIER                     0
    38 
    39 #define DEBUG_BUSYLOCK                    0
    40 #define DEBUG_BUSYLOCK_THREAD_XP          0x0000000000ULL  // selected thread xptr
     37#define DEBUG_BARRIER_CREATE              1
     38#define DEBUG_BARRIER_DESTROY             1
     39#define DEBUG_BARRIER_WAIT                0
     40
     41#define DEBUG_BUSYLOCK                    1
     42#define DEBUG_BUSYLOCK_THREAD_XP          0x0ULL  // selected thread xptr
    4143                 
    4244#define DEBUG_CHDEV_CMD_RX                0
     
    104106#define DEBUG_KMEM                        0
    105107
    106 #define DEBUG_KERNEL_INIT                 2
     108#define DEBUG_KERNEL_INIT                 0
    107109
    108110#define DEBUG_MAPPER_GET_PAGE             0
     
    153155#define DEBUG_RPC_VMM_GET_PTE             0
    154156#define DEBUG_RPC_VMM_GET_VSEG            0
    155 #define DEBUG_RPC_VMM_UNMAP_VSEG          0
     157#define DEBUG_RPC_VMM_DELETE_VSEG         0
    156158
    157159#define DEBUG_RWLOCK_TYPE                 0    // lock type (0 is undefined)
     
    276278#define LOCK_SEM_STATE        18   // remote (B)  protect user semaphore state
    277279#define LOCK_PROCESS_CWD      19   // remote (B)  protect current working directory in process
    278 
    279 #define BUSYLOCK_TYPE_MAX     20
     280#define LOCK_BARRIER_STATE    20   // remote (B)  protect user barrier state
    280281
    281282#define LOCK_CLUSTER_PREFTBL  21   // local  (Q)  protect array of ref. processes in cluster
     
    285286#define LOCK_CLUSTER_COPIES   24   // remote (Q)  protect xlist of process copies in cluster
    286287#define LOCK_PROCESS_CHILDREN 25   // remote (Q)  protect xlist of chidren process in process
    287 #define LOCK_PROCESS_USERSYNC 26   // remote (Q)  protect all lists of user synchros in process
     288#define LOCK_PROCESS_USERSYNC 26   // remote (Q)  protect lists of user synchros in process
    288289#define LOCK_PROCESS_FDARRAY  27   // remote (Q)  protect array of open files in owner process
    289290#define LOCK_FATFS_FREE       28   // remote (Q)  protect the FATFS context (free clusters)
     
    419420
    420421////////////////////////////////////////////////////////////////////////////////////////////
    421 //     PHYSICAL MEMORY MANAGEMENT (GENERIC)       
     422//                      PHYSICAL MEMORY MANAGEMENT        
    422423////////////////////////////////////////////////////////////////////////////////////////////
    423424
  • trunk/kernel/libk/remote_barrier.c

    r581 r619  
    22 * remote_barrier.c -  POSIX barrier implementation.
    33 *
    4  * Author   Alain Greiner (2016,2017,2018)
     4 * Author   Alain Greiner (2016,2017,2018,2019)
    55 *
    66 * Copyright (c) UPMC Sorbonne Universites
     
    2323
    2424#include <hal_kernel_types.h>
     25#include <hal_macros.h>
    2526#include <hal_remote.h>
    2627#include <hal_irqmask.h>
     
    3334#include <remote_barrier.h>
    3435
     36////////////////////////////////////////////////////
     37//  generic (implementation independant) functions
     38////////////////////////////////////////////////////
    3539
    3640///////////////////////////////////////////////////
    37 xptr_t remote_barrier_from_ident( intptr_t  ident )
     41xptr_t generic_barrier_from_ident( intptr_t  ident )
    3842{
    3943    // get pointer on local process_descriptor
    4044    process_t * process = CURRENT_THREAD->process;
    4145
    42     // get extended pointer on reference process
    43     xptr_t      ref_xp = process->ref_xp;
    44 
    45     // get cluster and local pointer on reference process
     46    // get pointers on reference process
     47    xptr_t         ref_xp  = process->ref_xp;
    4648    cxy_t          ref_cxy = GET_CXY( ref_xp );
    4749    process_t    * ref_ptr = (process_t *)GET_PTR( ref_xp );
     
    5153
    5254    // scan reference process barriers list
    53     xptr_t             iter_xp;
    54     xptr_t             barrier_xp;
    55     cxy_t              barrier_cxy;
    56     remote_barrier_t * barrier_ptr;
    57     intptr_t           current;
    58     bool_t             found = false;
     55    xptr_t              iter_xp;
     56    xptr_t              barrier_xp;
     57    cxy_t               barrier_cxy;
     58    generic_barrier_t * barrier_ptr;
     59    intptr_t            current;
     60    bool_t              found = false;
    5961
    6062    XLIST_FOREACH( root_xp , iter_xp )
    6163    {
    62         barrier_xp  = XLIST_ELEMENT( iter_xp , remote_barrier_t , list );
     64        barrier_xp  = XLIST_ELEMENT( iter_xp , generic_barrier_t , list );
    6365        barrier_cxy = GET_CXY( barrier_xp );
    64         barrier_ptr = (remote_barrier_t *)GET_PTR( barrier_xp );
     66        barrier_ptr = (generic_barrier_t *)GET_PTR( barrier_xp );
    6567        current     = (intptr_t)hal_remote_lpt( XPTR( barrier_cxy , &barrier_ptr->ident ) );
    6668        if( ident == current )
     
    7375    if( found == false )  return XPTR_NULL;
    7476    else                  return barrier_xp;
    75 }
    76 
    77 //////////////////////////////////////////////
    78 error_t remote_barrier_create( intptr_t ident,
    79                                uint32_t count )
     77
     78} // end generic_barrier_from_ident()
     79
     80//////////////////////////////////////////////////////////////
     81error_t generic_barrier_create( intptr_t                ident,
     82                                uint32_t                count,
     83                                pthread_barrierattr_t * attr )
     84{
     85    xptr_t              gen_barrier_xp;   // extended pointer on generic barrier descriptor
     86    generic_barrier_t * gen_barrier_ptr;  // local pointer on generic barrier descriptor
     87    void              * barrier;          // local pointer on implementation barrier descriptor     
     88    kmem_req_t          req;              // kmem request
     89
     90    // get pointer on local process_descriptor
     91    process_t * process = CURRENT_THREAD->process;
     92
     93    // get pointers on reference process
     94    xptr_t         ref_xp  = process->ref_xp;
     95    cxy_t          ref_cxy = GET_CXY( ref_xp );
     96    process_t    * ref_ptr = (process_t *)GET_PTR( ref_xp );
     97
     98    // allocate memory for generic barrier descriptor
     99    if( ref_cxy == local_cxy )                         // reference cluster is local
     100    {
     101        req.type          = KMEM_GEN_BARRIER;
     102        req.flags         = AF_ZERO;
     103        gen_barrier_ptr   = kmem_alloc( &req );
     104        gen_barrier_xp    = XPTR( local_cxy , gen_barrier_ptr );
     105    }
     106    else                                               // reference cluster is remote
     107    {
     108        rpc_kcm_alloc_client( ref_cxy,
     109                              KMEM_GEN_BARRIER,
     110                              &gen_barrier_xp );
     111        gen_barrier_ptr = GET_PTR( gen_barrier_xp );
     112    }
     113
     114    if( gen_barrier_ptr == NULL )
     115    {
     116        printk("\n[ERROR] in %s : cannot create generic barrier\n", __FUNCTION__ );
     117        return -1;
     118    }
     119
     120    // create implementation specific barrier descriptor
     121    if( attr == NULL )                                    // simple barrier implementation
     122    {
     123        // create simple barrier descriptor
     124         barrier = simple_barrier_create( count );
     125
     126        if( barrier == NULL )
     127        {
     128            printk("\n[ERROR] in %s : cannot create simple barrier\n", __FUNCTION__);
     129            return -1;
     130        }
     131    }
     132    else                                                  // QDT barrier implementation
     133    {
     134        uint32_t x_size   = attr->x_size;
     135        uint32_t y_size   = attr->y_size;
     136        uint32_t nthreads = attr->nthreads;
     137
     138        // check attributes / count
     139        if( (x_size * y_size * nthreads) != count )
     140        {
     141            printk("\n[ERROR] in %s : count(%d) != x_size(%d) * y_size(%d) * nthreads(%d)\n",
     142            __FUNCTION__, count, x_size, y_size, nthreads );
     143            return -1;
     144        }
     145
     146        // create DQT barrier descriptor
     147        barrier = dqt_barrier_create( x_size , y_size , nthreads );
     148
     149        if( barrier == NULL )
     150        {
     151            printk("\n[ERROR] in %s : cannot create DQT barrier descriptor\n", __FUNCTION__);
     152            return -1;
     153        }
     154    }
     155
     156    // initialize the generic barrier descriptor
     157    hal_remote_spt( XPTR( ref_cxy , &gen_barrier_ptr->ident  ) , (void*)ident );
     158    hal_remote_s32( XPTR( ref_cxy , &gen_barrier_ptr->is_dqt ) , (attr != NULL) );
     159    hal_remote_spt( XPTR( ref_cxy , &gen_barrier_ptr->extend ) , barrier );
     160
     161    // build extended pointers on lock, root and entry for reference process xlist
     162    xptr_t root_xp  = XPTR( ref_cxy , &ref_ptr->barrier_root );
     163    xptr_t lock_xp  = XPTR( ref_cxy , &ref_ptr->sync_lock );
     164    xptr_t entry_xp = XPTR( ref_cxy , &gen_barrier_ptr->list );
     165
     166    // register barrier in reference process xlist of barriers
     167    remote_busylock_acquire( lock_xp );
     168    xlist_add_first( root_xp , entry_xp );
     169    remote_busylock_release( lock_xp );
     170
     171    return 0;
     172
     173}  // en generic_barrier_create()
     174
     175/////////////////////////////////////////////////////
     176void generic_barrier_destroy( xptr_t gen_barrier_xp )
     177{
     178    kmem_req_t  req;              // kmem request
     179
     180    // get pointer on local process_descriptor
     181    process_t * process = CURRENT_THREAD->process;
     182
     183    // get pointers on reference process
     184    xptr_t      ref_xp  = process->ref_xp;
     185    cxy_t       ref_cxy = GET_CXY( ref_xp );
     186    process_t * ref_ptr = GET_PTR( ref_xp );
     187
     188    // get cluster and local pointer on generic barrier descriptor
     189    generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp );
     190    cxy_t               gen_barrier_cxy = GET_CXY( gen_barrier_xp );
     191
     192    // get barrier type and extension pointer
     193    bool_t  is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) );
     194    void  * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) );
     195
     196    // build extended pointer on implementation dependant barrier descriptor
     197    xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend );
     198
     199    // delete the implementation specific barrier
     200    if( is_dqt ) dqt_barrier_destroy( barrier_xp );
     201    else         simple_barrier_destroy( barrier_xp );
     202
     203    // build extended pointers on lock and entry for reference process xlist
     204    xptr_t  lock_xp  = XPTR( ref_cxy , &ref_ptr->sync_lock );
     205    xptr_t  entry_xp = XPTR( gen_barrier_cxy , &gen_barrier_ptr->list );
     206
     207    // remove barrier from reference process xlist
     208    remote_busylock_acquire( lock_xp );
     209    xlist_unlink( entry_xp );
     210    remote_busylock_release( lock_xp );
     211
     212    // release memory allocated to barrier descriptor
     213    if( gen_barrier_cxy == local_cxy )           
     214    {
     215        req.type          = KMEM_GEN_BARRIER;
     216        req.ptr           = gen_barrier_ptr;
     217        kmem_free( &req );
     218    }
     219    else         
     220    {
     221        rpc_kcm_free_client( gen_barrier_cxy,
     222                             gen_barrier_ptr,
     223                             KMEM_GEN_BARRIER );
     224    }
     225}  // end generic_barrier_destroy()
     226
     227//////////////////////////////////////////////////
     228void generic_barrier_wait( xptr_t gen_barrier_xp )
     229{
     230    // get generic barrier descriptor cluster and pointer
     231    cxy_t               gen_barrier_cxy = GET_CXY( gen_barrier_xp );
     232    generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp );
     233
     234    // get implementation type and extend local pointer
     235    bool_t  is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) );
     236    void  * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) );
     237
     238    // build extended pointer on implementation specific barrier descriptor
     239    xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend );
     240
     241    // call the relevant wait function
     242    if( is_dqt ) dqt_barrier_wait( barrier_xp );
     243    else         simple_barrier_wait( barrier_xp );
     244   
     245}  // end generic_barrier_wait()
     246
     247
     248
     249
     250
     251/////////////////////////////////////////////////////////////
     252//      simple barrier functions
     253/////////////////////////////////////////////////////////////
     254
     255///////////////////////////////////////////////////////////
     256simple_barrier_t * simple_barrier_create( uint32_t  count )
    80257{
    81258    xptr_t             barrier_xp;
    82     remote_barrier_t * barrier_ptr;
    83 
    84     // get pointer on local process descriptor
     259    simple_barrier_t * barrier;
     260
     261    // get pointer on local client process descriptor
    85262    thread_t  * this    = CURRENT_THREAD;
    86263    process_t * process = this->process;
    87264
    88 #if DEBUG_BARRIER
     265    // get reference process cluster
     266    xptr_t         ref_xp  = process->ref_xp;
     267    cxy_t          ref_cxy = GET_CXY( ref_xp );
     268
     269    // allocate memory for simple barrier descriptor
     270    if( ref_cxy == local_cxy )                        // reference is local
     271    {
     272        kmem_req_t req;
     273        req.type      = KMEM_SMP_BARRIER;
     274        req.flags     = AF_ZERO;
     275        barrier       = kmem_alloc( &req );
     276        barrier_xp    = XPTR( local_cxy , barrier );
     277    }
     278    else                                             // reference is remote
     279    {
     280        rpc_kcm_alloc_client( ref_cxy,
     281                              KMEM_SMP_BARRIER,
     282                              &barrier_xp );
     283        barrier = GET_PTR( barrier_xp );
     284    }
     285
     286    if( barrier == NULL ) return NULL;
     287
     288    // initialise simple barrier descriptor
     289    hal_remote_s32      ( XPTR( ref_cxy , &barrier->arity )      , count );
     290    hal_remote_s32      ( XPTR( ref_cxy , &barrier->current    ) , 0 );
     291    hal_remote_s32      ( XPTR( ref_cxy , &barrier->sense      ) , 0 );
     292
     293    xlist_root_init     ( XPTR( ref_cxy , &barrier->root ) );
     294    remote_busylock_init( XPTR( ref_cxy , &barrier->lock ) , LOCK_BARRIER_STATE );
     295
     296#if DEBUG_BARRIER_CREATE
    89297uint32_t cycle = (uint32_t)hal_get_cycles();
    90 if( cycle > DEBUG_BARRIER )
    91 printk("\n[DBG] %s : thread %x in process %x enter / count %d / cycle %d\n",
    92 __FUNCTION__, this->trdid, process->pid, count, cycle );
    93 #endif
    94 
    95     // get extended pointer on reference process
    96     xptr_t      ref_xp = process->ref_xp;
    97 
    98     // get reference process cluster and local pointer
    99     cxy_t       ref_cxy = GET_CXY( ref_xp );
    100     process_t * ref_ptr = GET_PTR( ref_xp );
    101 
    102     // allocate memory for barrier descriptor
    103     if( ref_cxy == local_cxy )                  // local cluster is the reference
    104     {
    105         kmem_req_t req;
    106         req.type      = KMEM_BARRIER;
    107         req.flags     = AF_ZERO;
    108         barrier_ptr   = kmem_alloc( &req );
    109         barrier_xp    = XPTR( local_cxy , barrier_ptr );
    110     }
    111     else                                       // reference is remote
    112     {
    113         rpc_kcm_alloc_client( ref_cxy , KMEM_BARRIER , &barrier_xp );
    114         barrier_ptr = (remote_barrier_t *)GET_PTR( barrier_xp );
    115     }
    116 
    117     if( barrier_ptr == NULL ) return ENOMEM;
    118 
    119     // initialise barrier
    120     hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->nb_threads ) , count );
    121     hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->current    ) , 0 );
    122     hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->sense      ) , 0 );
    123     hal_remote_spt( XPTR( ref_cxy , &barrier_ptr->ident      ) , (void*)ident );
    124 
    125     xlist_root_init( XPTR( ref_cxy , &barrier_ptr->root ) );
    126 
    127     // register  barrier in reference process xlist
    128     xptr_t root_xp  = XPTR( ref_cxy , &ref_ptr->barrier_root );
    129     xptr_t entry_xp = XPTR( ref_cxy , &barrier_ptr->list );
    130 
    131     remote_busylock_acquire( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
    132     xlist_add_first( root_xp , entry_xp );
    133     remote_busylock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
    134 
    135 #if DEBUG_BARRIER
    136 cycle = (uint32_t)hal_get_cycles();
    137 if( cycle > DEBUG_BARRIER )
    138 printk("\n[DBG] %s : thread %x in process %x exit / barrier %x in cluster %x / cycle %d\n",
    139 __FUNCTION__, this->trdid, process->pid, barrier_ptr, ref_cxy, cycle );
    140 #endif
    141 
    142     return 0;
    143 
    144 }  // end remote_barrier_create()
     298if( cycle > DEBUG_BARRIER_CREATE )
     299printk("\n[%s] thread[%x,%x] created barrier (%x,%x) / count %d / cycle %d\n",
     300__FUNCTION__, process->pid, this->trdid, ref_cxy, barrier, count, cycle );
     301#endif
     302
     303    return barrier;
     304
     305}  // end simple_barrier_create()
    145306
    146307////////////////////////////////////////////////
    147 void remote_barrier_destroy( xptr_t barrier_xp )
     308void simple_barrier_destroy( xptr_t barrier_xp )
    148309{
    149     // get pointer on local process descriptor
    150     process_t * process = CURRENT_THREAD->process;
    151 
    152     // get extended pointer on reference process
    153     xptr_t      ref_xp = process->ref_xp;
    154 
    155     // get reference process cluster and local pointer
    156     cxy_t       ref_cxy = GET_CXY( ref_xp );
    157     process_t * ref_ptr = (process_t *)GET_PTR( ref_xp );
    158 
    159310    // get barrier cluster and local pointer
    160311    cxy_t              barrier_cxy = GET_CXY( barrier_xp );
    161     remote_barrier_t * barrier_ptr = (remote_barrier_t *)GET_PTR( barrier_xp );
    162 
    163     // remove barrier from reference process xlist
    164     remote_busylock_acquire( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
    165     xlist_unlink( XPTR( barrier_cxy , &barrier_ptr->list ) );
    166     remote_busylock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
     312    simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
    167313
    168314    // release memory allocated for barrier descriptor
    169     if( barrier_cxy == local_cxy )                        // reference is local
     315    if( barrier_cxy == local_cxy )
    170316    {
    171317        kmem_req_t  req;
    172         req.type = KMEM_BARRIER;
     318        req.type = KMEM_SMP_BARRIER;
    173319        req.ptr  = barrier_ptr;
    174320        kmem_free( &req );
    175321    }
    176     else                                                  // reference is remote
    177     {
    178         rpc_kcm_free_client( barrier_cxy , barrier_ptr , KMEM_BARRIER );
    179     }
    180 }  // end remote_barrier_destroy()
     322    else 
     323    {
     324        rpc_kcm_free_client( barrier_cxy,
     325                             barrier_ptr,
     326                             KMEM_SMP_BARRIER );
     327    }
     328
     329#if DEBUG_BARRIER_DESTROY
     330uint32_t    cycle   = (uint32_t)hal_get_cycles();
     331thread_t  * this    = CURRENT_THREAD;
     332process_t * process = this->process;
     333if( cycle > DEBUG_BARRIER_DESTROY )
     334printk("\n[%s] thread[%x,%x] deleted barrier (%x,%x) / cycle %d\n",
     335__FUNCTION__, process->pid, this->trdid, barrier_ptr, barrier_cxy, cycle );
     336#endif
     337
     338}  // end simple_barrier_destroy()
    181339
    182340/////////////////////////////////////////////
    183 void remote_barrier_wait( xptr_t barrier_xp )
     341void simple_barrier_wait( xptr_t barrier_xp )
    184342{
    185343    uint32_t  expected;
    186344    uint32_t  sense;
    187345    uint32_t  current;
    188     uint32_t  nb_threads;
     346    uint32_t  arity;
    189347    xptr_t    root_xp;
    190348    xptr_t    lock_xp;
    191349    xptr_t    current_xp;
    192350    xptr_t    sense_xp;
    193     xptr_t    nb_threads_xp;
     351    xptr_t    arity_xp;
    194352
    195353    // get pointer on calling thread
     
    200358
    201359    // get cluster and local pointer on remote barrier
    202     remote_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     360    simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
    203361    cxy_t              barrier_cxy = GET_CXY( barrier_xp );
    204362
    205 #if DEBUG_BARRIER
     363#if DEBUG_BARRIER_WAIT
    206364uint32_t cycle = (uint32_t)hal_get_cycles();
    207 if( cycle > DEBUG_BARRIER )
    208 printk("\n[DBG] %s : thread %x in process %x enter / barrier %x in cluster %x / cycle %d\n",
    209 __FUNCTION__, this->trdid, this->process->pid, barrier_ptr, barrier_cxy, cycle );
    210 #endif
    211 
    212     // compute extended pointers on various barrier fields
    213     lock_xp       = XPTR( barrier_cxy , &barrier_ptr->lock );
    214     root_xp       = XPTR( barrier_cxy , &barrier_ptr->root );
    215     current_xp    = XPTR( barrier_cxy , &barrier_ptr->current );
    216     sense_xp      = XPTR( barrier_cxy , &barrier_ptr->sense );
    217     nb_threads_xp = XPTR( barrier_cxy , &barrier_ptr->nb_threads );
    218 
    219     // take busylock protecting the remote_barrier
     365if( cycle > DEBUG_BARRIER_WAIT )
     366printk("\n[%s] thread[%x,%x] enter / barrier (%x,%x) / cycle %d\n",
     367__FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle );
     368#endif
     369
     370    // build extended pointers on various barrier descriptor fields
     371    lock_xp    = XPTR( barrier_cxy , &barrier_ptr->lock );
     372    root_xp    = XPTR( barrier_cxy , &barrier_ptr->root );
     373    current_xp = XPTR( barrier_cxy , &barrier_ptr->current );
     374    sense_xp   = XPTR( barrier_cxy , &barrier_ptr->sense );
     375    arity_xp   = XPTR( barrier_cxy , &barrier_ptr->arity );
     376
     377    // take busylock protecting the barrier state
    220378    remote_busylock_acquire( lock_xp );
    221379
    222 #if (DEBUG_BARRIER & 1)
    223 cycle = (uint32_t)hal_get_cycles();
    224 if( cycle > DEBUG_BARRIER )
    225 printk("\n[DBG] %s : thread %x in process %x get lock / cycle %d\n",
    226 __FUNCTION__, this->trdid, this->process->pid, cycle );
    227 #endif
    228 
    229     // get sense and nb_threads values from barrier descriptor
    230     sense      = hal_remote_l32( sense_xp );
    231     nb_threads = hal_remote_l32( nb_threads_xp );
     380    // get sense and threads values from barrier descriptor
     381    sense = hal_remote_l32( sense_xp );
     382    arity = hal_remote_l32( arity_xp );
    232383
    233384    // compute expected value
     
    235386    else              expected = 0;
    236387
    237 #if (DEBUG_BARRIER & 1)
    238 cycle = (uint32_t)hal_get_cycles();
    239 if( cycle > DEBUG_BARRIER )
    240 printk("\n[DBG] %s : thread %x in process %x / count %d / sense %d / cycle %d\n",
    241 __FUNCTION__, this->trdid, this->process->pid, nb_threads, sense, cycle );
    242 #endif
    243 
    244     // atomically increment current, and get value before increment
     388    // increment current number of arrived threads / get value before increment
    245389    current = hal_remote_atomic_add( current_xp , 1 );
    246390
     
    248392    // other threads block, register in queue, and deschedule
    249393
    250     if( current == (nb_threads-1) )                       // last thread
     394    if( current == (arity - 1) )                       // last thread
    251395    {
    252396        hal_remote_s32( current_xp , 0 );
     
    261405            thread_t * thread_ptr = GET_PTR( thread_xp );
    262406
    263 #if (DEBUG_BARRIER & 1)
    264 cycle = (uint32_t)hal_get_cycles();
    265 if( cycle > DEBUG_BARRIER )
    266 printk("\n[DBG] %s : thread %x in process %x / unblock thread %x / cycle %d\n",
    267 __FUNCTION__, this->trdid, this->process->pid, thread_ptr, cycle );
     407#if (DEBUG_BARRIER_WAIT & 1)
     408trdid_t     trdid   = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) );
     409process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
     410pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
     411if( cycle > DEBUG_BARRIER_WAIT )
     412printk("\n[%s] thread[%x,%x] unblocks thread[%x,%x]\n",
     413__FUNCTION__, this->process->pid, this->trdid, pid, trdid );
    268414#endif
    269415
     
    275421        }
    276422
     423        // release busylock protecting the barrier
     424        remote_busylock_release( lock_xp );
     425    }
     426    else                                             // not the last thread
     427    {
     428
     429#if (DEBUG_BARRIER_WAIT & 1)
     430if( cycle > DEBUG_BARRIER_WAIT )
     431printk("\n[%s] thread[%x,%x] blocks\n",
     432__FUNCTION__, this->process->pid, this->trdid );
     433#endif
     434
     435        // register calling thread in barrier waiting queue
     436        xlist_add_last( root_xp , XPTR( local_cxy , &this->wait_list ) );
     437
     438        // block calling thread
     439        thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC );
     440
    277441        // release busylock protecting the remote_barrier
    278442        remote_busylock_release( lock_xp );
    279     }
    280     else                                             // not the last thread
    281     {
    282 
    283 #if (DEBUG_BARRIER & 1)
     443
     444        // deschedule
     445        sched_yield("blocked on barrier");
     446    }
     447
     448#if DEBUG_BARRIER_WAIT
    284449cycle = (uint32_t)hal_get_cycles();
    285 if( cycle > DEBUG_BARRIER )
    286 printk("\n[DBG] %s : thread %x in process %x / blocked / cycle %d\n",
    287 __FUNCTION__, this->trdid, this->process->pid, cycle );
    288 #endif
    289 
     450if( cycle > DEBUG_BARRIER_WAIT )
     451printk("\n[%s] thread[%x,%x] exit / barrier (%x,%x) / cycle %d\n",
     452__FUNCTION__, this->trdid, this->process->pid, barrier_cxy, barrier_ptr, cycle );
     453#endif
     454
     455}  // end simple_barrier_wait()
     456
     457
     458/////////////////////////////////////////////////////////////
     459//      DQT barrier functions
     460/////////////////////////////////////////////////////////////
     461
     462static void dqt_barrier_increment( xptr_t node_xp );
     463
     464#if DEBUG_BARRIER_CREATE
     465static void dqt_barrier_display( xptr_t  barrier_xp );
     466#endif
     467
     468///////////////////////////////////////////////////////
     469dqt_barrier_t * dqt_barrier_create( uint32_t    x_size,
     470                                    uint32_t    y_size,
     471                                    uint32_t    nthreads )
     472{
     473    page_t        * dqt_page;
     474    xptr_t          dqt_page_xp;     
     475    page_t        * rpc_page;
     476    xptr_t          rpc_page_xp;     
     477    dqt_barrier_t * barrier;       // local pointer on DQT barrier descriptor
     478    xptr_t          barrier_xp;    // extended pointer on DQT barrier descriptor
     479    uint32_t        z;             // actual DQT size == max(x_size,y_size)
     480    uint32_t        levels;        // actual number of DQT levels
     481    kmem_req_t      req;           // kmem request
     482    xptr_t          rpc_xp;        // extended pointer on RPC descriptors array
     483    rpc_desc_t    * rpc;           // pointer on RPC descriptors array
     484    uint32_t        responses;     // responses counter for parallel RPCs
     485    reg_t           save_sr;       // for critical section
     486    uint32_t        x;             // X coordinate in QDT mesh
     487    uint32_t        y;             // Y coordinate in QDT mesh
     488    uint32_t        l;             // level coordinate
     489
     490    // compute size and number of DQT levels
     491    z      = (x_size > y_size) ? x_size : y_size;
     492    levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5;
     493
     494// check x_size and y_size arguments
     495assert( (z <= 16) , "DQT dqth larger than (16*16)\n");
     496
     497// check RPC descriptor size
     498assert( (sizeof(rpc_desc_t) <= 128), "RPC descriptor  larger than 128 bytes\n");
     499
     500// check size of an array of 5 DQT nodes
     501assert( (sizeof(dqt_node_t) * 5 <= 512 ), "array of DQT nodes larger than 512 bytes\n");
     502
     503// check size of DQT barrier descriptor
     504assert( (sizeof(dqt_barrier_t) <= 0x4000 ), "DQT barrier descriptor larger than 4 pages\n");
     505
     506    // get pointer on local client process descriptor
     507    thread_t  * this    = CURRENT_THREAD;
     508    process_t * process = this->process;
     509
     510#if DEBUG_BARRIER_CREATE
     511uint32_t   cycle = (uint32_t)hal_get_cycles();
     512if( cycle > DEBUG_BARRIER_CREATE )
     513printk("\n[%s] thread[%x,%x] enter : x_size %d / y_size %d / levels %d / cycle %d\n",
     514__FUNCTION__, process->pid, this->trdid, x_size, y_size, levels, cycle );
     515#endif
     516
     517    // get reference process cluster
     518    xptr_t         ref_xp  = process->ref_xp;
     519    cxy_t          ref_cxy = GET_CXY( ref_xp );
     520
     521    // 1. allocate memory for DQT barrier descriptor in reference cluster
     522    if( ref_cxy == local_cxy )                   
     523     {
     524        req.type     = KMEM_PAGE;
     525        req.size     = 2;               // 4 pages == 16 Kbytes
     526        req.flags    = AF_ZERO;
     527        dqt_page     = kmem_alloc( &req );
     528        dqt_page_xp  = XPTR( local_cxy , dqt_page );
     529    }
     530    else                                         
     531    {
     532        rpc_pmem_get_pages_client( ref_cxy,
     533                                   2,
     534                                   &dqt_page );
     535        dqt_page_xp  = XPTR( ref_cxy , dqt_page );
     536    }
     537
     538    if( dqt_page == NULL ) return NULL;
     539
     540    // get pointers on DQT barrier descriptor
     541    barrier_xp = ppm_page2base( dqt_page_xp );
     542    barrier    = GET_PTR( barrier_xp );
     543
     544    // initialize global parameters in DQT barrier descriptor
     545    hal_remote_s32( XPTR( ref_cxy , &barrier->x_size   ) , x_size );
     546    hal_remote_s32( XPTR( ref_cxy , &barrier->y_size   ) , x_size );
     547    hal_remote_s32( XPTR( ref_cxy , &barrier->nthreads ) , nthreads );
     548
     549#if DEBUG_BARRIER_CREATE
     550if( cycle > DEBUG_BARRIER_CREATE )
     551printk("\n[%s] thread[%x,%x] created DQT barrier descriptor at (%x,%x)\n",
     552__FUNCTION__, process->pid, this->trdid, ref_cxy, barrier );
     553#endif
     554
     555    // 2. allocate memory from local cluster for an array of 256 RPCs descriptors
     556    //    cannot share the RPC descriptor, because the returned argument is not shared
     557    req.type    = KMEM_PAGE;
     558    req.size    = 3;            // 8 pages == 32 Kbytes
     559    req.flags   = AF_ZERO;
     560    rpc_page    = kmem_alloc( &req );
     561    rpc_page_xp = XPTR( local_cxy , rpc_page );
     562
     563    // get pointers on RPC descriptors array
     564    rpc_xp    = ppm_page2base( rpc_page_xp );
     565    rpc       = GET_PTR( rpc_xp );
     566
     567#if DEBUG_BARRIER_CREATE
     568if( cycle > DEBUG_BARRIER_CREATE )
     569printk("\n[%s] thread[%x,%x] created RPC descriptors array at (%x,%s)\n",
     570__FUNCTION__, process->pid, this->trdid, local_cxy, rpc );
     571#endif
     572
     573    // 3. send parallel RPCs to all existing clusters covered by the DQT
     574    //    to allocate memory for an array of 5 DQT nodes in each cluster
     575    //    (5 nodes per cluster <= 512 bytes per cluster)
     576
     577    responses = 0;    // initialize RPC responses counter
     578
     579    // mask IRQs
     580    hal_disable_irq( &save_sr);
     581
     582    // client thread blocks itself
     583    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC );
     584
     585    for ( x = 0 ; x < x_size ; x++ )
     586    {
     587        for ( y = 0 ; y < y_size ; y++ )
     588        {
     589            // send RPC to existing clusters only
     590            if( LOCAL_CLUSTER->cluster_info[x][y] )
     591            {
     592                cxy_t cxy = HAL_CXY_FROM_XY( x , y );   // target cluster identifier
     593
     594                // build a specific RPC descriptor for each target cluster
     595                rpc[cxy].rsp       = &responses;
     596                rpc[cxy].blocking  = false;
     597                rpc[cxy].index     = RPC_KCM_ALLOC;
     598                rpc[cxy].thread    = this;
     599                rpc[cxy].lid       = this->core->lid;
     600                rpc[cxy].args[0]   = (uint64_t)KMEM_512_BYTES; 
     601
     602                // atomically increment expected responses counter
     603                hal_atomic_add( &responses , 1 );
     604
     605                // send a non-blocking RPC to allocate 512 bytes in target cluster
     606                rpc_send( cxy , &rpc[cxy] );
     607            }
     608        }
     609    }
     610
     611#if DEBUG_BARRIER_CREATE
     612if( cycle > DEBUG_BARRIER_CREATE )
     613printk("\n[%s] thread[%x,%x] sent all RPC requests to allocate dqt_nodes array\n",
     614__FUNCTION__, process->pid, this->trdid );
     615#endif
     616
     617    // client thread deschedule
     618    sched_yield("blocked on parallel rpc_kcm_alloc");
     619
     620    // restore IRQs
     621    hal_restore_irq( save_sr);
     622
     623    // 4. initialize the node_xp[x][y][l] array in DQT barrier descriptor
     624    //    the node_xp[x][y][0] value is available in rpc.args[1]
     625
     626#if DEBUG_BARRIER_CREATE
     627if( cycle > DEBUG_BARRIER_CREATE )
     628printk("\n[%s] thread[%x,%x] initialises array of pointers on dqt_nodes\n",
     629__FUNCTION__, process->pid, this->trdid );
     630#endif
     631
     632    for ( x = 0 ; x < x_size ; x++ )
     633    {
     634        for ( y = 0 ; y < y_size ; y++ )
     635        {
     636            cxy_t    cxy      = HAL_CXY_FROM_XY( x , y );   // target cluster identifier
     637            xptr_t   array_xp = (xptr_t)rpc[cxy].args[1];   // x_pointer on node array
     638            uint32_t offset   = sizeof( dqt_node_t );       // size of a DQT node
     639               
     640            // set values into the node_xp[x][y][l] array
     641            for ( l = 0 ; l < levels ; l++ )
     642            {
     643                xptr_t  node_xp = array_xp + (offset * l);
     644                hal_remote_s64( XPTR( ref_cxy , &barrier->node_xp[x][y][l] ), node_xp );
     645
     646#if DEBUG_BARRIER_CREATE
     647if( cycle > DEBUG_BARRIER_CREATE )
     648printk(" - dqt_node_xp[%d,%d,%d] = (%x,%x) / &dqt_node_xp = %x\n",
     649x , y , l , GET_CXY( node_xp ), GET_PTR( node_xp ), &barrier->node_xp[x][y][l] );
     650#endif
     651            }
     652        }
     653    }
     654
     655    // 5. release memory locally allocated for the RPCs array
     656    req.type  = KMEM_PAGE;
     657    req.ptr   = rpc_page;
     658    kmem_free( &req );
     659
     660#if DEBUG_BARRIER_CREATE
     661if( cycle > DEBUG_BARRIER_CREATE )
     662printk("\n[%s] thread[%x,%x] released memory for RPC descriptors array\n",
     663__FUNCTION__, process->pid, this->trdid );
     664#endif
     665
     666    // 6. initialise all distributed DQT nodes using remote accesses
     667    //    and the pointers stored in the node_xp[x][y][l] array
     668    for ( x = 0 ; x < x_size ; x++ )
     669    {
     670        for ( y = 0 ; y < y_size ; y++ )
     671        {
     672            // initialize existing clusters only
     673            if( LOCAL_CLUSTER->cluster_info[x][y] )
     674            {
     675                for ( l = 0 ; l < levels ; l++ )
     676                {
     677                                    xptr_t    parent_xp;
     678                    xptr_t    child_xp[4];
     679                    uint32_t  arity = 0;
     680
     681                    // get DQT node pointers
     682                    xptr_t       node_xp  = hal_remote_l64( XPTR( ref_cxy,
     683                                            &barrier->node_xp[x][y][l] ) );
     684                    cxy_t        node_cxy = GET_CXY( node_xp );
     685                    dqt_node_t * node_ptr = GET_PTR( node_xp );
     686
     687                    // compute arity and child_xp[i]
     688                    if (l == 0 )                            // bottom DQT node
     689                    {
     690                        arity       = nthreads;
     691
     692                        child_xp[0] = XPTR_NULL;
     693                        child_xp[1] = XPTR_NULL;
     694                        child_xp[2] = XPTR_NULL;
     695                        child_xp[3] = XPTR_NULL;
     696                    }
     697                    else                                    // not a bottom DQT node
     698                    {
     699                        arity = 0;
     700
     701                        // only few non-bottom nodes must be initialised
     702                        if( ((x & ((1<<l)-1)) == 0) && ((y & ((1<<l)-1)) == 0) )
     703                        {
     704                            uint32_t cx[4];       // x coordinate for children
     705                            uint32_t cy[4];       // y coordinate for children
     706                            uint32_t i;
     707
     708                            // the child0 coordinates are equal to the parent coordinates
     709                            // other children coordinates depend on the level value
     710                            cx[0] = x;
     711                            cy[0] = y;
     712
     713                            cx[1] = x;
     714                            cy[1] = y + (1 << (l-1));
     715
     716                            cx[2] = x + (1 << (l-1));
     717                            cy[2] = y;
     718
     719                            cx[3] = x + (1 << (l-1));
     720                            cy[3] = y + (1 << (l-1));
     721
     722                            for ( i = 0 ; i < 4 ; i++ )
     723                            {
     724                                // child pointer is NULL if  outside the mesh
     725                                if ( (cx[i] < x_size) && (cy[i] < y_size) )
     726                                {
     727                                    // get child_xp[i]
     728                                    child_xp[i] = hal_remote_l64( XPTR( ref_cxy,
     729                                                  &barrier->node_xp[cx[i]][cy[i]][l-1] ) );
     730
     731                                    // increment arity
     732                                    arity++;
     733                                }
     734                                else
     735                                {
     736                                    child_xp[i] = XPTR_NULL;
     737                                }
     738                            }
     739                        }
     740                    }
     741
     742                    // compute parent_xp
     743                    if( l == (levels - 1) )                      // root DQT node
     744                    {
     745                        parent_xp = XPTR_NULL;
     746                    }
     747                    else                                          // not the root
     748                    {
     749                        uint32_t px = 0;           // parent X coordinate
     750                        uint32_t py = 0;           // parent Y coordinate
     751                        bool_t   found = false;
     752
     753                        // compute macro_cluster x_min, x_max, y_min, y_max               
     754                        uint32_t x_min = x & ~((1<<(l+1))-1);
     755                        uint32_t x_max = x_min + (1<<(l+1));
     756                        uint32_t y_min = y & ~((1<<(l+1))-1);
     757                        uint32_t y_max = y_min + (1<<(l+1));
     758
     759                        // scan all clusters in macro-cluster[x][y][l] / take first active
     760                        for( px = x_min ; px < x_max ; px++ )
     761                        {
     762                            for( py = y_min ; py < y_max ; py++ )
     763                            {
     764                                if( LOCAL_CLUSTER->cluster_info[px][py] ) found = true;
     765                                if( found ) break;
     766                            }
     767                            if( found ) break;
     768                        }
     769
     770                        parent_xp = hal_remote_l64( XPTR( ref_cxy ,
     771                                    &barrier->node_xp[px][py][l+1] ) );
     772                    }
     773
     774                    // initializes  the DQT node
     775                    hal_remote_s32( XPTR( node_cxy , &node_ptr->arity )       , arity );   
     776                    hal_remote_s32( XPTR( node_cxy , &node_ptr->current )     , 0 );   
     777                    hal_remote_s32( XPTR( node_cxy , &node_ptr->sense )       , 0 );   
     778                    hal_remote_s32( XPTR( node_cxy , &node_ptr->level )       , l );   
     779                    hal_remote_s64( XPTR( node_cxy , &node_ptr->parent_xp )   , parent_xp );
     780                    hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[0] ) , child_xp[0] );
     781                    hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[1] ) , child_xp[1] );
     782                    hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[2] ) , child_xp[2] );
     783                    hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[3] ) , child_xp[3] );
     784
     785                    xlist_root_init( XPTR( node_cxy , &node_ptr->root ) );
     786
     787                    remote_busylock_init( XPTR( node_cxy , &node_ptr->lock ),
     788                                          LOCK_BARRIER_STATE );
     789                }
     790            }
     791        }
     792    }
     793
     794#if DEBUG_BARRIER_CREATE
     795cycle = (uint32_t)hal_get_cycles();
     796if( cycle > DEBUG_BARRIER_CREATE )
     797printk("\n[%s] thread[%x,%x] completed DQT barrier initialisation / cycle %d\n",
     798__FUNCTION__, process->pid, this->trdid, cycle );
     799dqt_barrier_display( barrier_xp );
     800#endif
     801
     802    return barrier;
     803
     804}  // end dqt_barrier_create()
     805
     806///////////////////////////////////////////////
     807void dqt_barrier_destroy( xptr_t   barrier_xp )
     808{
     809    page_t     * rpc_page;
     810    xptr_t       rpc_page_xp;
     811    rpc_desc_t * rpc;                      // local pointer on RPC descriptors array
     812    xptr_t       rpc_xp;                   // extended pointer on RPC descriptor array
     813    reg_t        save_sr;                  // for critical section
     814    kmem_req_t   req;                      // kmem request
     815
     816    thread_t * this = CURRENT_THREAD;
     817
     818    // get DQT barrier descriptor cluster and local pointer
     819    dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     820    cxy_t           barrier_cxy = GET_CXY( barrier_xp );
     821
     822#if DEBUG_BARRIER_DESTROY
     823uint32_t   cycle = (uint32_t)hal_get_cycles();
     824if( cycle > DEBUG_BARRIER_DESTROY )
     825printk("\n[%s] thread[%x,%x] enter for barrier (%x,%x) / cycle %d\n",
     826__FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle );
     827#endif
     828
     829    // get x_size and y_size global parameters
     830    uint32_t x_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->x_size ) );
     831    uint32_t y_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->y_size ) );
     832
     833    // 1. allocate memory from local cluster for an array of 256 RPCs descriptors
     834    //    cannot share the RPC descriptor, because the "buf" argument is not shared
     835    req.type    = KMEM_PAGE;
     836    req.size    = 3;            // 8 pages == 32 Kbytes
     837    req.flags   = AF_ZERO;
     838    rpc_page    = kmem_alloc( &req );
     839    rpc_page_xp = XPTR( local_cxy , rpc_page );
     840
     841    // get pointers on RPC descriptors array
     842    rpc_xp    = ppm_page2base( rpc_page_xp );
     843    rpc       = GET_PTR( rpc_xp );
     844   
     845    // 2. send parallel RPCs to all existing clusters covered by the DQT
     846    //    to release memory allocated for the arrays of DQT nodes in each cluster
     847
     848    uint32_t responses = 0;    // initialize RPC responses counter
     849
     850    // mask IRQs
     851    hal_disable_irq( &save_sr);
     852
     853    // client thread blocks itself
     854    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC );
     855
     856    uint32_t x , y;
     857   
     858#if DEBUG_BARRIER_DESTROY
     859if( cycle > DEBUG_BARRIER_DESTROY )
     860printk("\n[%s] thread[%x,%x] send RPCs to release the distributed dqt_node array\n",
     861__FUNCTION__, this->process->pid, this->trdid );
     862#endif
     863
     864    for ( x = 0 ; x < x_size ; x++ )
     865    {
     866        for ( y = 0 ; y < y_size ; y++ )
     867        {
     868            // send RPC to existing cluster only
     869            if( LOCAL_CLUSTER->cluster_info[x][y] )
     870            {
     871                // compute target cluster identifier
     872                cxy_t   cxy       = HAL_CXY_FROM_XY( x , y );
     873
     874                // get local pointer on dqt_nodes array in target cluster 
     875                xptr_t  buf_xp_xp = XPTR( barrier_cxy , &barrier_ptr->node_xp[x][y][0] );
     876                xptr_t  buf_xp    = hal_remote_l64( buf_xp_xp );
     877                void  * buf       = GET_PTR( buf_xp );
     878
     879assert( (cxy == GET_CXY(buf_xp)) , "bad extended pointer on dqt_nodes array\n" );
     880
     881                // build a specific RPC descriptor
     882                rpc[cxy].rsp       = &responses;
     883                rpc[cxy].blocking  = false;
     884                rpc[cxy].index     = RPC_KCM_FREE;
     885                rpc[cxy].thread    = this;
     886                rpc[cxy].lid       = this->core->lid;
     887                rpc[cxy].args[0]   = (uint64_t)(intptr_t)buf; 
     888                rpc[cxy].args[1]   = (uint64_t)KMEM_512_BYTES; 
     889
     890                // atomically increment expected responses counter
     891                hal_atomic_add( &responses , 1 );
     892           
     893#if DEBUG_BARRIER_DESTROY
     894if( cycle > DEBUG_BARRIER_DESTROY )
     895printk(" - target cluster(%d,%d) / buffer %x\n", x, y, buf );
     896#endif
     897                // send a non-blocking RPC to release 512 bytes in target cluster
     898                rpc_send( cxy , &rpc[cxy] );
     899            }
     900        }
     901    }
     902
     903    // client thread deschedule
     904    sched_yield("blocked on parallel rpc_kcm_free");
     905
     906    // restore IRQs
     907    hal_restore_irq( save_sr);
     908
     909    // 3. release memory locally allocated for the RPC descriptors array
     910    req.type  = KMEM_PAGE;
     911    req.ptr   = rpc_page;
     912    kmem_free( &req );
     913
     914    // 4. release memory allocated for barrier descriptor
     915    xptr_t   page_xp = ppm_base2page( barrier_xp );
     916    page_t * page    = GET_PTR( page_xp );
     917
     918    if( barrier_cxy == local_cxy )                   
     919    {
     920        req.type      = KMEM_PAGE;
     921        req.ptr       = page;
     922        kmem_free( &req );
     923    }
     924    else                                         
     925    {
     926        rpc_pmem_release_pages_client( barrier_cxy,
     927                                       page );
     928    }
     929
     930#if DEBUG_BARRIER_DESTROY
     931cycle = (uint32_t)hal_get_cycles();
     932if( cycle > DEBUG_BARRIER_DESTROY )
     933printk("\n[%s] thread[%x,%x] exit for barrier (%x,%x) / cycle %d\n",
     934__FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle );
     935#endif
     936
     937}  // end dqt_barrier_destroy()
     938
     939////////////////////////////////////////////
     940void dqt_barrier_wait( xptr_t   barrier_xp )
     941{
     942    thread_t * this = CURRENT_THREAD;
     943
     944    // check calling thread can yield
     945    thread_assert_can_yield( this , __FUNCTION__ );
     946
     947    // get cluster and local pointer on DQT barrier descriptor
     948    dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     949    cxy_t           barrier_cxy = GET_CXY( barrier_xp );
     950
     951#if DEBUG_BARRIER_WAIT
     952uint32_t cycle = (uint32_t)hal_get_cycles();
     953if( cycle > DEBUG_BARRIER_WAIT )
     954printk("\n[%s] thread[%x,%x] enter / barrier (%x,%x) / cycle %d\n",
     955__FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle );
     956#endif
     957
     958    // get extended pointer on local bottom DQT node
     959    uint32_t x       = HAL_X_FROM_CXY( local_cxy );
     960    uint32_t y       = HAL_Y_FROM_CXY( local_cxy );
     961    xptr_t   node_xp = hal_remote_l64( XPTR( barrier_cxy , &barrier_ptr->node_xp[x][y][0] ) );
     962
     963    // call recursive function to traverse DQT from bottom to root
     964    dqt_barrier_increment( node_xp );
     965
     966#if DEBUG_BARRIER_WAIT
     967cycle = (uint32_t)hal_get_cycles();
     968if( cycle > DEBUG_BARRIER_WAIT )
     969printk("\n[%s] thread[%x,%x] exit / barrier (%x,%x) / cycle %d\n",
     970__FUNCTION__, this->trdid, this->process->pid, barrier_cxy, barrier_ptr, cycle );
     971#endif
     972
     973}  // end dqt_barrier_wait()
     974
     975
     976////////////////////////////////////////////////////////////////////////////////////////////
     977//          DQT static functions
     978////////////////////////////////////////////////////////////////////////////////////////////
     979
     980
     981//////////////////////////////////////////////////////////////////////////////////////////
     982// This recursive function decrements the distributed "count" variables,
     983// traversing the DQT from bottom to root.
     984// The last arrived thread reset the local node before returning.
     985//////////////////////////////////////////////////////////////////////////////////////////
     986static void dqt_barrier_increment( xptr_t  node_xp )
     987{
     988    uint32_t   expected;
     989    uint32_t   sense;
     990    uint32_t   arity;
     991
     992    thread_t * this = CURRENT_THREAD;
     993
     994    // get node cluster and local pointer
     995    dqt_node_t * node_ptr = GET_PTR( node_xp );
     996    cxy_t        node_cxy = GET_CXY( node_xp );
     997
     998    // build relevant extended pointers
     999    xptr_t  arity_xp   = XPTR( node_cxy , &node_ptr->arity );
     1000    xptr_t  sense_xp   = XPTR( node_cxy , &node_ptr->sense );
     1001    xptr_t  current_xp = XPTR( node_cxy , &node_ptr->current );
     1002    xptr_t  lock_xp    = XPTR( node_cxy , &node_ptr->lock );
     1003    xptr_t  root_xp    = XPTR( node_cxy , &node_ptr->root );
     1004
     1005#if DEBUG_BARRIER_WAIT
     1006uint32_t   cycle = (uint32_t)hal_get_cycles();
     1007uint32_t   level = hal_remote_l32( XPTR( node_cxy, &node_ptr->level ) );
     1008if( cycle > DEBUG_BARRIER_WAIT )
     1009printk("\n[%s] thread[%x,%x] increments DQT node(%d,%d,%d) / cycle %d\n",
     1010__FUNCTION__ , this->process->pid, this->trdid,
     1011HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1012#endif
     1013
     1014    // get extended pointer on parent node
     1015    xptr_t  parent_xp  = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent_xp ) );
     1016
     1017    // take busylock
     1018    remote_busylock_acquire( lock_xp );
     1019   
     1020    // get sense and arity values from barrier descriptor
     1021    sense = hal_remote_l32( sense_xp );
     1022    arity = hal_remote_l32( arity_xp );
     1023
     1024    // compute expected value
     1025    expected = (sense == 0) ? 1 : 0;
     1026
     1027    // increment current number of arrived threads / get value before increment
     1028    uint32_t current = hal_remote_atomic_add( current_xp , 1 );
     1029
     1030    // last arrived thread reset the local node, makes the recursive call
     1031    // on parent node, and reactivates all waiting thread when returning.
     1032    // other threads block, register in queue, and deschedule.
     1033
     1034    if ( current == (arity - 1) )                        // last thread 
     1035    {
     1036
     1037#if DEBUG_BARRIER_WAIT
     1038if( cycle > DEBUG_BARRIER_WAIT )
     1039printk("\n[%s] thread[%x,%x] reset DQT node(%d,%d,%d)\n",
     1040__FUNCTION__ , this->process->pid, this->trdid,
     1041HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1042#endif
     1043        // reset the current node
     1044        hal_remote_s32( sense_xp   , expected );
     1045        hal_remote_s32( current_xp , 0 );
     1046
     1047        // release busylock protecting the current node
     1048        remote_busylock_release( lock_xp );
     1049
     1050        // recursive call on parent node when current node is not the root
     1051        if( parent_xp != XPTR_NULL) dqt_barrier_increment( parent_xp );
     1052
     1053        // unblock all waiting threads on this node
     1054        while( xlist_is_empty( root_xp ) == false )
     1055        {
     1056            // get pointers on first waiting thread
     1057            xptr_t     thread_xp  = XLIST_FIRST( root_xp , thread_t , wait_list );
     1058            cxy_t      thread_cxy = GET_CXY( thread_xp );
     1059            thread_t * thread_ptr = GET_PTR( thread_xp );
     1060
     1061#if (DEBUG_BARRIER_WAIT & 1)
     1062trdid_t     trdid   = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) );
     1063process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
     1064pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
     1065if( cycle > DEBUG_BARRIER_WAIT )
     1066printk("\n[%s] thread[%x,%x] unblock thread[%x,%x]\n",
     1067__FUNCTION__, this->process->pid, this->trdid, pid, trdid );
     1068#endif
     1069            // remove waiting thread from queue
     1070            xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) );
     1071
     1072            // unblock waiting thread
     1073            thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC );
     1074        }
     1075    }
     1076    else                                               // not the last thread
     1077    {
     1078        // get extended pointer on xlist entry from thread
     1079        xptr_t  entry_xp = XPTR( local_cxy , &this->wait_list );
     1080       
    2901081        // register calling thread in barrier waiting queue
    291         xlist_add_last( root_xp , XPTR( local_cxy , &this->wait_list ) );
     1082        xlist_add_last( root_xp , entry_xp );
    2921083
    2931084        // block calling thread
     
    2971088        remote_busylock_release( lock_xp );
    2981089
     1090#if DEBUG_BARRIER_WAIT
     1091if( cycle > DEBUG_BARRIER_WAIT )
     1092printk("\n[%s] thread[%x,%x] blocks on node(%d,%d,%d)\n",
     1093__FUNCTION__ , this->process->pid, this->trdid,
     1094HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level );
     1095#endif
    2991096        // deschedule
    3001097        sched_yield("blocked on barrier");
    3011098    }
    3021099
    303 #if DEBUG_BARRIER
    304 cycle = (uint32_t)hal_get_cycles();
    305 if( cycle > DEBUG_BARRIER )
    306 printk("\n[DBG] %s : thread %x in process %x exit / barrier %x in cluster %x / cycle %d\n",
    307 __FUNCTION__, this->trdid, this->process->pid, barrier_ptr, barrier_cxy, cycle );
    308 #endif
    309 
    310 }  // end remote_barrier_wait()
     1100    return;
     1101
     1102} // end dqt_barrier_decrement()
     1103
     1104#if DEBUG_BARRIER_CREATE
     1105
     1106////////////////////////////////////////////////////////////////////////////////////////////
     1107// This debug function displays all DQT nodes in all clusters.
     1108////////////////////////////////////////////////////////////////////////////////////////////
     1109// @ barrier_xp   : extended pointer on DQT barrier descriptor.
     1110////////////////////////////////////////////////////////////////////////////////////////////
     1111static void dqt_barrier_display( xptr_t  barrier_xp )
     1112{
     1113    // get cluster and local pointer on DQT barrier
     1114    dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp );
     1115    cxy_t           barrier_cxy = GET_CXY( barrier_xp );
     1116
     1117    // get barrier global parameters
     1118    uint32_t x_size   = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->x_size ) );
     1119    uint32_t y_size   = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->y_size ) );
     1120    uint32_t nthreads = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->nthreads ) );
     1121
     1122    // compute size and number of DQT levels
     1123    uint32_t z      = (x_size > y_size) ? x_size : y_size;
     1124    uint32_t levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5;
     1125
     1126    printk("\n***** DQT barrier : x_size %d / y_size %d / nthreads %d / levels %d *****\n",
     1127    x_size, y_size, nthreads, levels );
     1128
     1129    uint32_t x , y , l;
     1130
     1131    for ( x = 0 ; x < x_size ; x++ )
     1132    {
     1133        for ( y = 0 ; y < y_size ; y++ )
     1134        {
     1135            printk(" - cluster[%d,%d]\n", x , y );
     1136
     1137            for ( l = 0 ; l < levels ; l++ )
     1138            {
     1139                // get pointers on target node
     1140                xptr_t       node_xp  = hal_remote_l64( XPTR( barrier_cxy ,
     1141                                        &barrier_ptr->node_xp[x][y][l] ) );
     1142                dqt_node_t * node_ptr = GET_PTR( node_xp );
     1143                cxy_t        node_cxy = GET_CXY( node_xp );
     1144
     1145                if( node_xp != XPTR_NULL )
     1146                {
     1147                     uint32_t level = hal_remote_l32( XPTR( node_cxy , &node_ptr->level       ));
     1148                     uint32_t arity = hal_remote_l32( XPTR( node_cxy , &node_ptr->arity       ));
     1149                     xptr_t   pa_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->parent_xp   ));
     1150                     xptr_t   c0_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[0] ));
     1151                     xptr_t   c1_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[1] ));
     1152                     xptr_t   c2_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[2] ));
     1153                     xptr_t   c3_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[3] ));
     1154
     1155                     printk("   . level %d : (%x,%x) / arity %d / P(%x,%x) / C0(%x,%x)"
     1156                            " C1(%x,%x) / C2(%x,%x) / C3(%x,%x)\n",
     1157                     level, node_cxy, node_ptr, arity,
     1158                     GET_CXY(pa_xp), GET_PTR(pa_xp),
     1159                     GET_CXY(c0_xp), GET_PTR(c0_xp),
     1160                     GET_CXY(c1_xp), GET_PTR(c1_xp),
     1161                     GET_CXY(c2_xp), GET_PTR(c2_xp),
     1162                     GET_CXY(c3_xp), GET_PTR(c3_xp) );
     1163                }
     1164            }
     1165        }
     1166    }
     1167}   // end dqt_barrier_display()
     1168
     1169#endif
  • trunk/kernel/libk/remote_barrier.h

    r581 r619  
    22 * remote_barrier.h - POSIX barrier definition.               
    33 *
    4  * Author  Alain Greiner (2016,2017,2018)
     4 * Author  Alain Greiner (2016,2017,2018,2019)
    55 *
    66 * Copyright (c) UPMC Sorbonne Universites
     
    2929#include <remote_busylock.h>
    3030#include <xlist.h>
     31#include <shared_pthread.h>
    3132
    3233/***************************************************************************************
    33  *          This file defines a POSIX compliant barrier.
     34 *       This file defines two implementations for a POSIX compliant barrier.
    3435 *
    3536 * It is used by multi-threaded user applications to synchronise threads running in
    36  * different clusters, as all access functions uses hal_remote_l32() / hal_remote_s32()
    37  * remote access primitives.
    38  *
    39  * A barrier is declared by a given user process as a "pthread_barrier_t" global variable.
    40  * This user type is implemented as an unsigned long, but the value is not used by the
    41  * kernel. ALMOS-MKH uses only the barrier virtual address as an identifier.
    42  * For each user barrier, ALMOS-MKH creates a kernel "remote_barrier_t" structure,
    43  * dynamically allocated in the reference cluster by the remote_barrier_create() function,
    44  * and destroyed by the remote_barrier_destroy() function, using RPC if the calling thread
    45  * is not running in the reference cluster.
    46  *
    47  * The blocking "remote_barrier_wait()" function implements a descheduling policy when
    48  * the calling thread is not the last expected thread: the calling thread is registered
    49  * in a waiting queue, rooted in the barrier structure, and the the calling thread
    50  * is blocked on the THREAD_BLOCKED_USERSYNC condition. The last arrived thread
    51  * unblocks all registtered waiting threads.
     37 * different clusters. Access functions use RPCs for barrier creation/destruction,
     38 * and use remote access primitives for actual synchronisation (wait function).
     39 *
     40 * A barrier is declared by a given user process as a "pthread_barrier_t" user variable.
     41 * This user type is implemented in user space as an unsigned long, but the value is not
     42 * used by the kernel. ALMOS-MKH uses only the barrier virtual address as an identifier.
     43 * For each user barrier, ALMOS-MKH creates a kernel structure, dynamically allocated
     44 * by the "generic_barrier_create()" function, destroyed by the "remote_barrier_destroy()"
     45 * function, and used by the "generic_barrier_wait()" function.
     46 *
     47 * Implementation note:
     48 * ALMOS-MKH supports two barrier implementations:
     49 *
     50 * 1) simple_barrier_t
     51 *    If the pointer on the barrier attributes is NULL, the barrier is implemented as
     52 *    a shared variable localized in the reference process cluster.
     53 *    There is a risk of contention when the number of synchronizing threads is large.
     54 *
     55 * 2) dqt_barrier_t
     56 *    If the (x_size, y_size, nthreads) arguments are defined in the barrier attributes,
     57 *    the barrier is implemented as a hierarchical quad-tree covering all clusters in the
     58 *    (x_size * ysize) mesh, including cluster (0,0), with nthreads per cluster, and called
     59 *    DQT : Distributed Quad Tree. This DQT implementation supposes a regular architecture,
     60 *    and a strong contraint on the threads placement: exactly "nthreads" threads per
     61 *    cluster in the (x_size * y_size) mesh.
     62 *
     63 * For both implementations, the blocking "generic_barrier_wait()" function implements
     64 * a descheduling policy when the calling thread is not the last expected thread:
     65 * the calling thread is registered in a waiting queue, rooted in the barrier structure,
     66 * and the the calling thread is blocked on the THREAD_BLOCKED_USERSYNC condition.
     67 * The last arrived thread unblocks all registered waiting threads.
    5268 * **************************************************************************************/
    5369
    54 /*****************************************************************************************
    55  * This structure defines the barrier descriptor.
    56  * - It contains an xlist of all barriers dynamically created by a given process,
    57  *   rooted in the reference process descriptor.
    58  * - It contains the root of another xlist to register all arrived threads.
    59  ****************************************************************************************/
    60 
    61 typedef struct remote_barrier_s
    62 {
    63     remote_busylock_t  lock;          /*! lock protecting list of waiting threads       */
    64     intptr_t           ident;         /*! virtual address in user space == identifier   */
    65     uint32_t           current;       /*! number of arrived threads                     */
    66     uint32_t           sense;         /*! barrier state (toggle)                        */
    67     uint32_t           nb_threads;    /*! number of expected threads                    */
    68     xlist_entry_t      list;          /*! member of list of barriers in same process    */
    69     xlist_entry_t      root;          /*! root of list of waiting threads               */
     70
     71
     72/*****************************************************************************************
     73 *                 generic barrier descriptor and access functions
     74 *****************************************************************************************
     75 * This generic structure is used by both the simple and the QOT implementations.
     76 * It is implemented in the reference process cluster, and contains
     77 * - the barrier identifier,
     78 * - the implementation type (simple or QDT),
     79 * - an xlist implementing the set of barriers dynamically created by a given process,
     80 * - a pointer on the implementation specific descriptor (simple_barrier / sqt_barrier).
     81 ****************************************************************************************/
     82
     83typedef struct generic_barrier_s
     84{
     85    intptr_t              ident;      /*! virtual address in user space == identifier   */
     86    xlist_entry_t         list;       /*! member of list of barriers in same process    */
     87    bool_t                is_dqt;     /*! DQT implementation when true                  */
     88    void                * extend;     /*! implementation specific barrier descriptor    */
    7089}
    71 remote_barrier_t;
    72 
     90generic_barrier_t;
    7391
    7492/*****************************************************************************************
     
    7694 * by its virtual address in a given user process. It makes an associative search,
    7795 * scanning the list of barriers rooted in the reference process descriptor.
     96 * It can be used for both simple and DQT barriers, registered in the same list.
    7897 *****************************************************************************************
    7998 * @ ident    : barrier virtual address, used as identifier.
    8099 * @ returns extended pointer on barrier if success / returns XPTR_NULL if not found.
    81100 ****************************************************************************************/
    82 xptr_t remote_barrier_from_ident( intptr_t  ident );
    83 
    84 /*****************************************************************************************
    85  * This function implement the pthread_barrier_init() syscall.
    86  * It allocates memory for the barrier descriptor in the reference cluster for
    87  * the calling process, it initializes the barrier state, and register it in the
    88  * list of barriers owned by the reference process.
    89  *****************************************************************************************
    90  * @ count       : number of expected threads.
    91  * @ ident       : barrier identifier (virtual address in user space).
    92  * @ return 0 if success / return ENOMEM if failure.
    93  ****************************************************************************************/
    94 error_t remote_barrier_create( intptr_t ident,
    95                                uint32_t count );
    96 
    97 /*****************************************************************************************
    98  * This function implement the pthread_barrier_destroy() syscall.
    99  * It releases thr memory allocated for the barrier descriptor, and remove the barrier
    100  * from the list of barriers owned by the reference process.
    101  *****************************************************************************************
    102  * @ barrier_xp  : extended pointer on barrier descriptor.
    103  ****************************************************************************************/
    104 void remote_barrier_destroy( xptr_t   barrier_xp );
    105 
    106 /*****************************************************************************************
    107  * This function implement the pthread_barrier_wait() syscall.
    108  * It returns only when the number of expected threads (registered in the barrier
    109  * dexcriptor) reach the barrier.
    110  *****************************************************************************************
    111  * @ barrier_xp   : extended pointer on barrier descriptor.
    112  ****************************************************************************************/
    113 void remote_barrier_wait( xptr_t   barrier_xp );
     101xptr_t generic_barrier_from_ident( intptr_t  ident );
     102
     103/*****************************************************************************************
     104 * This function implements the pthread_barrier_init() syscall.
     105 * It allocates and initialises the generic barrier descriptor in the reference process
     106 * cluster, and - depending on the <attr> argument, calls the relevant (simple or DQT)
     107 * function to allocate and initialize the implementation dependant barrier descriptor.
     108 * Finally, it registers the barrier in the reference process xlist of user barriers.
     109 * It can be called by a thread running in any cluster, as it use RPC if required.
     110 *****************************************************************************************
     111 * @ ident    : barrier virtual address, used as identifier.
     112 * @ count    : number of expected threads.
     113 * @ attr     : barrier attributes (x_size,y_size,nthreads), used by QDT implementation.
     114 * @ returns 0 if success / returns -1 if not found.
     115 ****************************************************************************************/
     116error_t generic_barrier_create( intptr_t                ident,
     117                                uint32_t                count,
     118                                pthread_barrierattr_t * attr );
     119
     120/*****************************************************************************************
     121 * This function implements the pthread_barrier_destroy() syscall.
     122 * It calls the relevant function (simple or DQT) to release the memory allocated for
     123 * the implementation specific barrier descriptor, and releases the memory allocated
     124 * for the generic barrier descriptor.
     125 * It removes the barrier from the list of barriers rooted in the reference process.
     126 * It can be called by a thread running in any cluster, as it use RPC if required.
     127 *****************************************************************************************
     128 * @ gen_barrier_xp  : extended pointer on generic barrier descriptor.
     129 ****************************************************************************************/
     130void generic_barrier_destroy( xptr_t gen_barrier_xp );
     131
     132/*****************************************************************************************
     133 * This blocking function implements the pthread_barrier_wait() syscall.
     134 * It calls the relevant function (simple or DQT) depending on the implementation,
     135 * and returns only when all expected threads reach the barrier.
     136 * It can be called by a thread running in any cluster, as it use remote accesses.
     137 *****************************************************************************************
     138 * @ gen_barrier_xp   : extended pointer on generic barrier descriptor.
     139 ****************************************************************************************/
     140void generic_barrier_wait( xptr_t gen_barrier_xp );
     141
     142
     143
     144
     145
     146
     147/*****************************************************************************************
     148 *                        simple barrier descriptor
     149 *****************************************************************************************
     150 * This structure defines the simple barrier descriptor. It is localized in the process
     151 * reference cluster, as an extension of the generic barrier descriptor.
     152 * It implements a toggle barrier remotely accessed by all threads.
     153 * It contains the root of the xlist registering all arrived threads.
     154 ****************************************************************************************/
     155
     156typedef struct simple_barrier_s
     157{
     158    remote_busylock_t  lock;          /*! lock protecting list of waiting threads       */
     159    uint32_t           current;       /*! number of arrived threads                     */
     160    uint32_t           sense;         /*! barrier state (toggle)                        */
     161    uint32_t           arity;         /*! number of expected threads                    */
     162    xlist_entry_t      root;          /*! root of list of waiting threads               */
     163}
     164simple_barrier_t;
     165
     166/*****************************************************************************************
     167 * This function allocates memory for the simple barrier descriptor in the reference
     168 * cluster of the calling process. It initializes the barrier state and returns
     169 * a local pointer on the created simple barrier descriptor in reference cluster.
     170 * It can be called by a thread running in any cluster, as it use RPC if required.
     171 *****************************************************************************************
     172 * @ count          : [in] number of expected threads.
     173 * @ return Local pointer on barrier descriptor if success / return NULL if failure.
     174 ****************************************************************************************/
     175simple_barrier_t * simple_barrier_create( uint32_t  count );
     176
     177/*****************************************************************************************
     178 * This function releases the memory allocated for the simple barrier descriptor.
     179 * It can be called by a thread running in any cluster, as it use RPC if required.
     180 *****************************************************************************************
     181 * @ barrier_xp  : extended pointer on simple barrier descriptor.
     182 ****************************************************************************************/
     183void simple_barrier_destroy( xptr_t   barrier_xp );
     184
     185/*****************************************************************************************
     186 * This blocking function returns only when all expected threads reach the barrier.
     187 * It can be called by a thread running in any cluster, as it use remote accesses.
     188 * Waiting threads use a descheduling policy.
     189 *****************************************************************************************
     190 * @ barrier_xp   : extended pointer on simple barrier descriptor.
     191 ****************************************************************************************/
     192void simple_barrier_wait( xptr_t   barrier_xp );
     193
     194
     195
     196
     197
     198/*****************************************************************************************
     199 *                              dqt_barrier
     200 *****************************************************************************************
     201 * These structuree define  the hierarchical DQT barrier, physically distributed in a
     202 * mesh of clusters defined by the (x_size, y_size, nthreads) arguments:
     203 *   . The involved clusters form a mesh [x_size * y_size]
     204 *   . The lower left involved cluster is cluster(0,0) 
     205 *   . The number of threads per cluster is the same in all clusters.
     206 *
     207 * Implementation note:
     208 * - The quad three is implemented as a three dimensions array of node[x][y][l]
     209 *   . [x][y] are the cluster coordinates / max values are (DQT_XMAX-1), (DQT_YMAX-1)
     210 *   . [l] is the node level / 0 for terminal nodes / (DQT_LMAX-1) for the root node
     211 * - The dqt_barrier_t is the global barrier descriptor, allocated in the reference
     212 *   process cluster as an extension of the generic barrier descriptor. It contains a
     213 *   3D array of extended pointers on all DQT nodes implementing the DQT barrier.
     214 * - The dqt_node_t is a local barrier implementing a togle barrier between all threads
     215 *   of a given cluster (for a terminal node), or between all representatives of the four
     216 *   children nodes (for a non terminal node).
     217 ****************************************************************************************/
     218
     219#define  DQT_XMAX    16               // max number of clusters in a row
     220#define  DQT_YMAX    16               // max number of clusters in a column
     221#define  DQT_LMAX    5                // max depth of the quad tree
     222
     223typedef struct dqt_node_s
     224{
     225    remote_busylock_t  lock;          /*! lock protecting list of waiting threads       */
     226    volatile uint32_t  sense;         /*! barrier state (toggle)                        */
     227    volatile uint32_t  current;       /*! number of locally arrived threads             */
     228    uint32_t           arity;         /*! total number of locally expected threads      */
     229    uint32_t           level;         /*! hierarchical level (0 is bottom)              */
     230    xptr_t             parent_xp;     /*! x_pointer on parent node (NULL for root)      */
     231    xptr_t             child_xp[4];   /*! x_pointer on children node (NULL for bottom)  */
     232    xlist_entry_t      root;          /*! root of list of waiting threads               */
     233}
     234dqt_node_t;
     235
     236typedef struct dqt_barrier_s
     237{
     238    xptr_t    node_xp[DQT_XMAX][DQT_YMAX][DQT_LMAX];  /*! array of xptr on DQT nodes    */
     239
     240    uint32_t  x_size;                 /*! number of clusters in one row of DQT mesh     */
     241    uint32_t  y_size;                 /*! number of clusters in one column of DQT mesh  */
     242    uint32_t  nthreads;               /*! number of expected threads in one cluster     */
     243}
     244dqt_barrier_t;
     245
     246/*****************************************************************************************
     247 * This function allocates memory for the DQT barrier descriptor in the reference cluster
     248 * of the calling process. It allocates also memory in all clusters of the QDT mesh,
     249 * to store up to 5 QDT nodes per cluster.
     250 * It initializes the barrier descriptor, including initialisation of the parent/children
     251 * extended pointers in the distributed QDT nodes.
     252 * It returns a local pointer on the QDT barrier descriptor in reference cluster.
     253 * It can be called by a thread running in any cluster, as it use RPCs for memory
     254 * allocation, and remote access for QDT initialisation.
     255 *****************************************************************************************
     256 * @ x_size      : [in] number of clusters in a line of DQT mesh.
     257 * @ y_size      : [in] number of clusters in a column of DQT mesh.
     258 * @ nthreads    : [in] number of threads per cluster.
     259 * @ return Local pointer on barrier descriptor if success / return NULL if failure.
     260 ****************************************************************************************/
     261dqt_barrier_t * dqt_barrier_create( uint32_t  x_size,
     262                                    uint32_t  y_size,
     263                                    uint32_t  nthreads );
     264
     265/*****************************************************************************************
     266 * This function releases all memory allocated for the QDT barrier descriptor.
     267 * It removes the barrier from the list of barriers rooted in the reference process.
     268 * It can be called by a thread running in any cluster, as it use RPCs.
     269 *****************************************************************************************
     270 * @ barrier_xp  : extended pointer on DQT barrier descriptor.
     271 ****************************************************************************************/
     272void dqt_barrier_destroy( xptr_t   barrier_xp );
     273
     274/*****************************************************************************************
     275 * This blocking function returns only when all expected threads reach the barrier.
     276 * It can be called by a thread running in any cluster, as it use remote accesses.
     277 * Waiting threads use a descheduling policy.
     278 *****************************************************************************************
     279 * @ barrier_xp   : extended pointer on DQT barrier descriptor.
     280 ****************************************************************************************/
     281void dqt_barrier_wait( xptr_t   barrier_xp );
     282
    114283
    115284
  • trunk/kernel/libk/remote_busylock.c

    r600 r619  
    101101    (XPTR( local_cxy , this ) == DEBUG_BUSYLOCK_THREAD_XP) )
    102102{
    103     // get cluster and local pointer of target thread
    104     cxy_t      thread_cxy = GET_CXY( DEBUG_BUSYLOCK_THREAD_XP );
    105     thread_t * thread_ptr = GET_PTR( DEBUG_BUSYLOCK_THREAD_XP );
    106 
    107     // display message on kernel TXT0
    108103    printk("\n[%s] thread[%x,%x] ACQUIRE lock %s\n",
    109     __FUNCTION_, this->process->pid, this->trdid, lock_type_str[type] );
     104    __FUNCTION__, this->process->pid, this->trdid, lock_type_str[type] );
    110105}
    111106#endif
     
    149144    (XPTR( local_cxy , this ) == DEBUG_BUSYLOCK_THREAD_XP) )
    150145{
    151     // get cluster and local pointer of target thread
    152     cxy_t      thread_cxy = GET_CXY( DEBUG_BUSYLOCK_THREAD_XP );
    153     thread_t * thread_ptr = GET_PTR( DEBUG_BUSYLOCK_THREAD_XP );
    154 
    155     // display message on kernel TXT0
    156146    printk("\n[%s] thread[%x,%x] RELEASE lock %s\n",
    157147    __FUNCTION__, this->process->pid, this->trdid, lock_type_str[type] );
  • trunk/kernel/libk/remote_busylock.h

    r603 r619  
    4242 *   makes an atomic increment on a "ticket" allocator, and keep polling the "current"
    4343 *   value  until current == ticket.
    44 
     44 *
    4545 * - To release the lock, the owner thread increments the "current" value,
    4646 *   decrements its busylocks counter.
  • trunk/kernel/libk/remote_mutex.c

    r611 r619  
    138138thread_t * this = CURRENT_THREAD;
    139139if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX )
    140 printk("\n[DBG] %s : thread %x in %x process / mutex(%x,%x)\n",
    141 __FUNCTION__, this->trdid, this->process->pid, local_cxy, mutex_ptr );
     140printk("\n[%s] : thread[%x,%x] created mutex(%x,%x)\n",
     141__FUNCTION__, this->process->pid, this->trdid, local_cxy, mutex_ptr );
    142142#endif
    143143
     
    173173    remote_queuelock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) );
    174174
    175     // release memory allocated for mutexaphore descriptor
     175    // release memory allocated for mutex descriptor
    176176    if( mutex_cxy == local_cxy )                            // reference is local
    177177    {
     
    183183    else                                                  // reference is remote
    184184    {
    185         rpc_kcm_free_client( mutex_cxy , mutex_ptr , KMEM_BARRIER );
     185        rpc_kcm_free_client( mutex_cxy , mutex_ptr , KMEM_MUTEX );
    186186    }
    187187
     
    226226thread_t * this = CURRENT_THREAD;
    227227if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX )
    228 printk("\n[DBG] %s : thread %x in process %x SUCCESS on mutex(%x,%x)\n",
    229 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     228printk("\n[%s] thread[%x,%x] SUCCESS on mutex(%x,%x)\n",
     229__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    230230#endif
    231231
     
    247247thread_t * this = CURRENT_THREAD;
    248248if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX )
    249 printk("\n[DBG] %s : thread %x in process %x BLOCKED on mutex(%x,%x)\n",
    250 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     249printk("\n[%s] thread[%x,%x] BLOCKED on mutex(%x,%x)\n",
     250__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    251251#endif
    252252
     
    296296thread_t * this = CURRENT_THREAD;
    297297if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX )
    298 printk("\n[DBG] %s : thread %x in %x process EXIT / mutex(%x,%x)\n",
    299 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     298printk("\n[%s] thread[%x,%x] EXIT / mutex(%x,%x)\n",
     299__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    300300#endif
    301301
     
    320320process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) );
    321321pid_t       pid     = hal_remote_l32( XPTR( thread_cxy , &process->pid ) );
    322 printk("\n[DBG] %s : thread %x in process %x UNBLOCK thread %x in process %d / mutex(%x,%x)\n",
    323 __FUNCTION__, this->trdid, this->process->pid, trdid, pid, mutex_cxy, mutex_ptr );
     322printk("\n[%s] thread[%x,%x] UNBLOCK thread %x in process %d / mutex(%x,%x)\n",
     323__FUNCTION__, this->process->pid, this->trdid, trdid, pid, mutex_cxy, mutex_ptr );
    324324}
    325325#endif
     
    371371thread_t * this = CURRENT_THREAD;
    372372if( (uint32_t)hal_get_cycles() > DEBUG_QUEUELOCK )
    373 printk("\n[DBG] %s : SUCCESS for thread %x in process %x / mutex(%x,%x)\n",
    374 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     373printk("\n[%s] SUCCESS for thread[%x,%x] / mutex(%x,%x)\n",
     374__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    375375#endif
    376376        // release busylock protecting mutex state
     
    385385thread_t * this = CURRENT_THREAD;
    386386if( (uint32_t)hal_get_cycles() > DEBUG_QUEUELOCK )
    387 printk("\n[DBG] %s : FAILURE for thread %x in process %x / mutex(%x,%x)\n",
    388 __FUNCTION__, this->trdid, this->process->pid, mutex_cxy, mutex_ptr );
     387printk("\n[%s] FAILURE for thread[%x,%x] / mutex(%x,%x)\n",
     388__FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr );
    389389#endif
    390390        // release busylock protecting mutex state
  • trunk/kernel/libk/user_dir.c

    r614 r619  
    286286            printk("\n[ERROR] in %s : cannot map vpn %x in GPT\n",
    287287            __FUNCTION__, (vpn + page_id) );
    288             // use the non blocking RPC to delete the remote vseg
    289             rpc_desc_t     desc;
    290             desc.index     = RPC_VMM_DELETE_VSEG;
    291             desc.responses = 1;
    292             desc.thread    = CURRENT_THREAD;
    293             desc.lid       = CURRENT_THREAD->core->lid;
    294             desc.blocking  = true;
    295             desc.args[0]   = ref_pid;
    296             desc.args[1]   = vpn << CONFIG_PPM_PAGE_SHIFT;
    297             rpc_vmm_delete_vseg_client( ref_cxy , &desc );
     288
     289            // delete the vseg
     290            if( ref_cxy == local_cxy) vmm_delete_vseg( ref_pid, vpn<<CONFIG_PPM_PAGE_SHIFT );
     291            else rpc_vmm_delete_vseg_client( ref_cxy, ref_pid, vpn<<CONFIG_PPM_PAGE_SHIFT );
     292
    298293            // release the user_dir descriptor
    299294            req.type = KMEM_DIR;
     
    387382    lpid_t         lpid;       // process local index
    388383    rpc_desc_t     rpc;        // rpc descriptor
     384    uint32_t       responses;  // response counter
    389385     
    390386    // get pointers on calling process & thread
     
    441437    thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC );
    442438
    443     // initialize RPC descriptor shared fields
    444     rpc.responses = 0;
     439    // initialize responses counter
     440    responses = 0;
     441
     442    // initialize a shared RPC descriptor
     443    // can be shared, because no out arguments
     444    rpc.rsp       = &responses;
    445445    rpc.blocking  = false;
    446446    rpc.index     = RPC_VMM_DELETE_VSEG;
     
    461461
    462462        // atomically increment responses counter
    463         hal_atomic_add( (void *)&rpc.responses , 1 );
    464 
    465         // call RPC 
    466         rpc_vmm_delete_vseg_client( process_cxy , &rpc );
    467 
    468     }  // end list of copies
     463        hal_atomic_add( &responses , 1 );
     464
     465        // send RPC to target cluster 
     466        rpc_send( process_cxy , &rpc );
     467    }
    469468
    470469    // release the lock protecting process copies
     
    472471
    473472    // client thread deschedule
    474     sched_yield("blocked on rpc_vmm_unmap_vseg");
     473    sched_yield("blocked on rpc_vmm_delete_vseg");
    475474 
    476475    // restore IRQs
  • trunk/kernel/mm/kcm.c

    r567 r619  
    33 *
    44 * Author  Ghassan Almaless (2008,2009,2010,2011,2012)
    5  *         Alain Greiner    (2016,2017,2018)
     5 *         Alain Greiner    (2016,2017,2018,2019)
    66 *
    77 * Copyright (c) UPMC Sorbonne Universites
     
    5151
    5252#if DEBUG_KCM
     53thread_t * this = CURRENT_THREAD;
    5354uint32_t cycle = (uint32_t)hal_get_cycles();
    5455if( DEBUG_KCM < cycle )
    55 printk("\n[DBG] %s : thread %x enters for %s / page %x / count %d / active %d\n",
    56 __FUNCTION__ , CURRENT_THREAD , kmem_type_str( kcm->type ) ,
     56printk("\n[%s] thread[%x,%x] enters for %s / page %x / count %d / active %d\n",
     57__FUNCTION__, this->process->pid, this->trdid, kmem_type_str(kcm->type),
    5758(intptr_t)kcm_page , kcm_page->count , kcm_page->active );
    5859#endif
    5960
    60         assert( kcm_page->active , "kcm_page should be active" );
     61assert( kcm_page->active , "kcm_page should be active" );
    6162
    6263        // get first block available
    6364        int32_t index = bitmap_ffs( kcm_page->bitmap , kcm->blocks_nr );
    6465
    65         assert( (index != -1) , "kcm_page should not be full" );
     66assert( (index != -1) , "kcm_page should not be full" );
    6667
    6768        // allocate block
     
    9091cycle = (uint32_t)hal_get_cycles();
    9192if( DEBUG_KCM < cycle )
    92 printk("\n[DBG] %s : thread %x exit / type %s / ptr %x / page %x / count %d\n",
    93        __FUNCTION__ , CURRENT_THREAD , kmem_type_str( kcm->type ) , (intptr_t)ptr ,
    94 (intptr_t)kcm_page , kcm_page->count );
     93printk("\n[%s] thread[%x,%x] exit for %s / ptr %x / page %x / count %d\n",
     94__FUNCTION__, this->process->pid, this->trdid, kmem_type_str(kcm->type),
     95(intptr_t)ptr, (intptr_t)kcm_page, kcm_page->count );
    9596#endif
    9697
     
    115116        index = ((uint8_t *)ptr - (uint8_t *)kcm_page - CONFIG_KCM_SLOT_SIZE) / kcm->block_size;
    116117
    117         assert( !bitmap_state( kcm_page->bitmap , index ) , "page already freed" );
    118         assert( (kcm_page->count > 0) , "count already zero" );
     118assert( !bitmap_state( kcm_page->bitmap , index ) , "page already freed" );
     119
     120assert( (kcm_page->count > 0) , "count already zero" );
    119121
    120122        bitmap_set( kcm_page->bitmap , index );
     
    163165        if( page == NULL )
    164166        {
    165                 printk("\n[ERROR] in %s : failed to allocate page in cluster %d\n",
    166                        __FUNCTION__ , local_cxy );
     167                printk("\n[ERROR] in %s : failed to allocate page in cluster %x\n",
     168            __FUNCTION__ , local_cxy );
    167169                return ENOMEM;
    168170        }
     
    216218                   uint32_t   type )
    217219{
    218         // the kcm_page descriptor mut fit in the KCM slot
    219         assert( (sizeof(kcm_page_t) <= CONFIG_KCM_SLOT_SIZE) ,
    220                 "KCM slot too small\n" );
     220
     221// the kcm_page descriptor must fit in the KCM slot
     222assert( (sizeof(kcm_page_t) <= CONFIG_KCM_SLOT_SIZE) , "KCM slot too small\n" );
     223
     224// the allocated object must fit in one single page
     225assert( (kmem_type_size(type) <= (CONFIG_PPM_PAGE_SIZE - CONFIG_KCM_SLOT_SIZE)),
     226"allocated object requires more than one single page\n" );
    221227
    222228        // initialize lock
     
    241247        uint32_t  blocks_nr = (CONFIG_PPM_PAGE_SIZE - CONFIG_KCM_SLOT_SIZE) / block_size;
    242248        kcm->blocks_nr = blocks_nr;
     249
     250#if DEBUG_KCM
     251thread_t * this  = CURRENT_THREAD;
     252uint32_t   cycle = (uint32_t)hal_get_cycles();
     253if( DEBUG_KCM < cycle )
     254printk("\n[%s] thread[%x,%x] initialised KCM %s : block_size %d / blocks_nr %d\n",
     255__FUNCTION__, this->process->pid, this->trdid,
     256kmem_type_str( kcm->type ), block_size, blocks_nr );
     257#endif
     258
    243259}
    244260
     
    331347        kcm_t      * kcm;
    332348
    333         assert( (ptr != NULL) , "pointer cannot be NULL" );
     349// check argument
     350assert( (ptr != NULL) , "pointer cannot be NULL" );
    334351
    335352        kcm_page = (kcm_page_t *)((intptr_t)ptr & ~CONFIG_PPM_PAGE_MASK);
  • trunk/kernel/mm/kcm.h

    r567 r619  
    3737 * for fixed size objects. It exists a specific KCM allocator for each object type.
    3838 * The actual allocated block size is the smallest multiple of the KCM slot, that
    39  * contain one single object. The KCM slot is typically 64 bytes, as it must be large
    40  * enough to store the kcm_page descriptor, defined below.
     39 * contain one single object. The KCM slot is 64 bytes, as it must be large enough
     40 * to store the kcm_page descriptor, defined below.
    4141 * The various KCM allocators themselves are not statically allocated in the cluster
    4242 * manager, but are dynamically allocated when required, using the embedded KCM
  • trunk/kernel/mm/khm.h

    r567 r619  
    33 *
    44 * Authors  Ghassan Almaless (2008,2009,2010,2011,2012)
    5  *          Alain Greiner (2016,2017,2018)
     5 *          Alain Greiner (2016,2017,2018,2019)
    66 *
    77 * Copyright (c) UPMC Sorbonne Universites
     
    3232/*******************************************************************************************
    3333 * This structure defines a Kernel Heap Manager (KHM) in a given cluster.
    34  * It is used to allocate memory objects, that are not enough replicated to justify
    35  * a dedicated KCM allocator.
     34 * It is used to allocate memory objects, that too large, or not enough replicated
     35 * to use a dedicated KCM allocator.
    3636 ******************************************************************************************/
    3737
  • trunk/kernel/mm/kmem.c

    r612 r619  
    9090    else if( type == KMEM_CPU_CTX )       return CONFIG_CPU_CTX_SIZE;
    9191    else if( type == KMEM_FPU_CTX )       return CONFIG_FPU_CTX_SIZE;
    92     else if( type == KMEM_BARRIER )       return sizeof( remote_barrier_t );
    93 
     92    else if( type == KMEM_GEN_BARRIER )   return sizeof( generic_barrier_t );
     93
     94    else if( type == KMEM_SMP_BARRIER )   return sizeof( simple_barrier_t );
    9495    else if( type == KMEM_DEVFS_CTX )     return sizeof( fatfs_ctx_t );
    9596    else if( type == KMEM_FATFS_CTX )     return sizeof( fatfs_ctx_t );
     
    101102    else if( type == KMEM_CONDVAR )       return sizeof( remote_condvar_t );
    102103    else if( type == KMEM_MUTEX )         return sizeof( remote_mutex_t );
     104
    103105    else if( type == KMEM_DIR )           return sizeof( user_dir_t );
    104 
    105106        else if( type == KMEM_512_BYTES )     return 512;
    106107
     
    120121        else if( type == KMEM_CPU_CTX )       return "KMEM_CPU_CTX";
    121122        else if( type == KMEM_FPU_CTX )       return "KMEM_FPU_CTX";
    122         else if( type == KMEM_BARRIER )       return "KMEM_BARRIER";
    123 
     123        else if( type == KMEM_GEN_BARRIER )   return "KMEM_GEN_BARRIER";
     124
     125    else if( type == KMEM_SMP_BARRIER )   return "KMEM_SMP_BARRIER";
    124126    else if( type == KMEM_DEVFS_CTX )     return "KMEM_DEVFS_CTX";
    125127    else if( type == KMEM_FATFS_CTX )     return "KMEM_FATFS_CTX";
     
    131133    else if( type == KMEM_CONDVAR )       return "KMEM_CONDVAR";
    132134    else if( type == KMEM_MUTEX )         return "KMEM_MUTEX";
     135
    133136    else if( type == KMEM_DIR )           return "KMEM_DIR";
    134 
    135137        else if( type == KMEM_512_BYTES )     return "KMEM_512_BYTES";
    136138
  • trunk/kernel/mm/kmem.h

    r611 r619  
    4545    KMEM_CPU_CTX          = 7,   /*! hal_cpu_context_t                              */
    4646    KMEM_FPU_CTX          = 8,   /*! hal_fpu_context_t                              */
    47     KMEM_BARRIER          = 9,   /*! remote_barrier_t                               */
     47    KMEM_GEN_BARRIER      = 9,   /*! generi_cbarrier_t                              */
    4848
    49     KMEM_DEVFS_CTX        = 10,  /*! fatfs_inode_t                                  */
    50     KMEM_FATFS_CTX        = 11,  /*! fatfs_ctx_t                                    */
    51     KMEM_VFS_CTX          = 12,  /*! vfs_context_t                                  */
    52     KMEM_VFS_INODE        = 13,  /*! vfs_inode_t                                    */
    53     KMEM_VFS_DENTRY       = 14,  /*! vfs_dentry_t                                   */
    54     KMEM_VFS_FILE         = 15,  /*! vfs_file_t                                     */
    55     KMEM_SEM              = 16,  /*! remote_sem_t                                   */
    56     KMEM_CONDVAR          = 17,  /*! remote_condvar_t                               */
    57     KMEM_MUTEX            = 18,  /*! remote_mutex_t                                 */
    58     KMEM_DIR              = 19,  /*! remote_dir_t                                   */
     49    KMEM_SMP_BARRIER      = 10,  /*! simple_barrier_t                               */
     50    KMEM_DEVFS_CTX        = 11,  /*! fatfs_inode_t                                  */
     51    KMEM_FATFS_CTX        = 12,  /*! fatfs_ctx_t                                    */
     52    KMEM_VFS_CTX          = 13,  /*! vfs_context_t                                  */
     53    KMEM_VFS_INODE        = 14,  /*! vfs_inode_t                                    */
     54    KMEM_VFS_DENTRY       = 15,  /*! vfs_dentry_t                                   */
     55    KMEM_VFS_FILE         = 16,  /*! vfs_file_t                                     */
     56    KMEM_SEM              = 17,  /*! remote_sem_t                                   */
     57    KMEM_CONDVAR          = 18,  /*! remote_condvar_t                               */
     58    KMEM_MUTEX            = 19,  /*! remote_mutex_t                                 */
    5959
    60     KMEM_512_BYTES        = 20,  /*! 512 bytes aligned                              */
     60    KMEM_DIR              = 20,  /*! remote_dir_t                                   */
     61    KMEM_512_BYTES        = 21,  /*! 512 bytes aligned                              */
    6162
    62     KMEM_TYPES_NR         = 21,
     63    KMEM_TYPES_NR         = 22,
    6364};
    6465
     
    9798 *************************************************************************************
    9899 * @ req   : local pointer to allocation request.
    99  * @ return a local pointer on page descriptor if PPM (i.e. type KMEM_PAGE).
     100 * @ return a local pointer on page descriptor if KMEM_PAGE.
    100101 *   return a local pointer to allocated buffer if KCM or KHM.
    101102 *   return NULL if no physical memory available.
  • trunk/kernel/mm/vmm.c

    r617 r619  
    800800    // scan the VSL to delete all registered vsegs
    801801    // (don't use a FOREACH for item deletion in xlist)
    802         while( !xlist_is_empty( root_xp ) )
     802
     803uint32_t count = 0;
     804
     805        while( !xlist_is_empty( root_xp ) && (count < 10 ) )
    803806        {
    804807        // get pointer on first vseg in VSL
     
    814817__FUNCTION__ , vseg_type_str( vseg->type ), vseg->vpn_base, vseg->vpn_size );
    815818#endif
     819
     820count++;
    816821
    817822        }
     
    14631468
    14641469#if DEBUG_VMM_ALLOCATE_PAGE
    1465 uint32_t   cycle = (uint32_t)hal_get_cycles();
    1466 thread_t * this  = CURRENT_THREAD;
     1470uint32_t   cycle   = (uint32_t)hal_get_cycles();
     1471thread_t * this    = CURRENT_THREAD;
     1472xptr_t     this_xp = XPTR( local_cxy , this );
    14671473if( DEBUG_VMM_ALLOCATE_PAGE < (uint32_t)hal_get_cycles() )
    14681474printk("\n[%s] thread[%x,%x] enter for vpn %x / cycle %d\n",
     
    19501956
    19511957#if DEBUG_VMM_HANDLE_COW
    1952 uint32_t   cycle = (uint32_t)hal_get_cycles();
    1953 thread_t * this  = CURRENT_THREAD;
     1958uint32_t   cycle   = (uint32_t)hal_get_cycles();
     1959thread_t * this    = CURRENT_THREAD;
     1960xptr_t     this_xp = XPTR( local_cxy , this );
    19541961if( DEBUG_VMM_HANDLE_COW < cycle )
    19551962printk("\n[%s] thread[%x,%x] enter for vpn %x / core[%x,%d] / cycle %d\n",
    1956 __FUNCTION__, process->pid, this->trdid, vpn, local_cxy, this->core->lid, cycle );
     1963__FUNCTION__, this->process->pid, this->trdid, vpn, local_cxy, this->core->lid, cycle );
    19571964#endif
    19581965
     
    19741981    }
    19751982
     1983#if( DEBUG_VMM_HANDLE_COW & 1)
     1984if( DEBUG_VMM_HANDLE_COW < cycle )
     1985printk("\n[%s] thread[%x,%x] get vseg for vpn %x\n",
     1986__FUNCTION__, this->process->pid, this->trdid, vpn );
     1987#endif
     1988
    19761989    // get reference GPT cluster and local pointer
    19771990    ref_cxy = GET_CXY( process->ref_xp );
     
    20012014                     &old_ppn );
    20022015
     2016#if( DEBUG_VMM_HANDLE_COW & 1)
     2017if( DEBUG_VMM_HANDLE_COW < cycle )
     2018printk("\n[%s] thread[%x,%x] get pte for vpn %x : ppn %x / attr %x\n",
     2019__FUNCTION__, this->process->pid, this->trdid, vpn, old_ppn, old_attr );
     2020#endif
     2021
    20032022    // the PTE must be mapped for a COW
    20042023    if( (old_attr & GPT_MAPPED) == 0 )
     
    20082027
    20092028        // release GPT lock in write mode
    2010         remote_rwlock_wr_acquire( gpt_lock_xp );
     2029        remote_rwlock_wr_release( gpt_lock_xp );
    20112030
    20122031        return EXCP_KERNEL_PANIC;
    20132032    }
    20142033
    2015     // get extended pointer, cluster and local pointer on physical page descriptor
     2034    // get pointers on physical page descriptor
    20162035    xptr_t   page_xp  = ppm_ppn2page( old_ppn );
    20172036    cxy_t    page_cxy = GET_CXY( page_xp );
     
    20282047    uint32_t forks = hal_remote_l32( forks_xp );
    20292048
     2049#if( DEBUG_VMM_HANDLE_COW & 1)
     2050if( DEBUG_VMM_HANDLE_COW < cycle )
     2051printk("\n[%s] thread[%x,%x] get forks = %d for vpn %x\n",
     2052__FUNCTION__, this->process->pid, this->trdid, forks, vpn );
     2053#endif
     2054
    20302055    if( forks )        // pending fork => allocate a new page, and copy old to new
    20312056    {
    2032         // allocate a new physical page
     2057        // decrement pending forks counter in page descriptor
     2058        hal_remote_atomic_add( forks_xp , -1 );
     2059
     2060        // release lock protecting "forks" counter
     2061        remote_busylock_release( forks_lock_xp );
     2062
     2063        // allocate a new page
    20332064        page_xp = vmm_page_allocate( vseg , vpn );
     2065
    20342066        if( page_xp == XPTR_NULL )
    20352067        {
     
    20402072            remote_rwlock_wr_acquire( gpt_lock_xp );
    20412073
    2042             // release lock protecting "forks" counter
    2043             remote_busylock_release( forks_lock_xp );
    2044 
    20452074            return EXCP_KERNEL_PANIC;
    20462075        }
     
    20492078        new_ppn = ppm_page2ppn( page_xp );
    20502079
     2080#if( DEBUG_VMM_HANDLE_COW & 1)
     2081if( DEBUG_VMM_HANDLE_COW < cycle )
     2082printk("\n[%s] thread[%x,%x] get new ppn %x for vpn %x\n",
     2083__FUNCTION__, this->process->pid, this->trdid, new_ppn, vpn );
     2084#endif
     2085
    20512086        // copy old page content to new page
    2052         xptr_t  old_base_xp = ppm_ppn2base( old_ppn );
    2053         xptr_t  new_base_xp = ppm_ppn2base( new_ppn );
    2054         memcpy( GET_PTR( new_base_xp ),
    2055                 GET_PTR( old_base_xp ),
    2056                 CONFIG_PPM_PAGE_SIZE );
    2057 
    2058         // decrement pending forks counter in page descriptor
    2059         hal_remote_atomic_add( forks_xp , -1 );
     2087        hal_remote_memcpy( ppm_ppn2base( new_ppn ),
     2088                           ppm_ppn2base( old_ppn ),
     2089                           CONFIG_PPM_PAGE_SIZE );
    20602090
    20612091#if(DEBUG_VMM_HANDLE_COW & 1)
    20622092if( DEBUG_VMM_HANDLE_COW < cycle )
    2063 printk("\n[%s] thread[%x,%x] : pending forks => allocate a new PPN %x\n",
    2064 __FUNCTION__, process->pid, this->trdid, new_ppn );
     2093printk("\n[%s] thread[%x,%x] copied old page to new page\n",
     2094__FUNCTION__, this->process->pid, this->trdid );
    20652095#endif
    20662096
     
    20682098    else               // no pending fork => keep the existing page
    20692099    {
     2100        // release lock protecting "forks" counter
     2101        remote_busylock_release( forks_lock_xp );
    20702102
    20712103#if(DEBUG_VMM_HANDLE_COW & 1)
    20722104if( DEBUG_VMM_HANDLE_COW < cycle )
    2073 printk("\n[%s] thread[%x,%x]  no pending forks => keep existing PPN %x\n",
    2074 __FUNCTION__, process->pid, this->trdid, new_ppn );
     2105printk("\n[%s] thread[%x,%x]  no pending forks / keep existing PPN %x\n",
     2106__FUNCTION__, this->process->pid, this->trdid, old_ppn );
    20752107#endif
    20762108        new_ppn = old_ppn;
    20772109    }
    2078 
    2079     // release lock protecting "forks" counter
    2080     remote_busylock_release( forks_lock_xp );
    20812110
    20822111    // build new_attr : reset COW and set WRITABLE,
    20832112    new_attr = (old_attr | GPT_WRITABLE) & (~GPT_COW);
    20842113
    2085     // update the relevan GPT
     2114    // update the relevant GPT
    20862115    // - private vseg => update local GPT
    20872116    // - public vseg => update all GPT copies
     
    21192148if( DEBUG_VMM_HANDLE_COW < cycle )
    21202149printk("\n[%s] thread[%x,%x] exit for vpn %x / core[%x,%d] / cycle %d\n",
    2121 __FUNCTION__, process->pid, this->trdid, vpn, local_cxy, this->core->lid, cycle );
     2150__FUNCTION__, this->process->pid, this->trdid, vpn, local_cxy, this->core->lid, cycle );
    21222151#endif
    21232152
  • trunk/kernel/syscalls/sys_barrier.c

    r581 r619  
    2424#include <hal_kernel_types.h>
    2525#include <hal_special.h>
     26#include <hal_uspace.h>
    2627#include <errno.h>
    2728#include <thread.h>
     
    4344
    4445//////////////////////////////////
    45 int sys_barrier( void     * vaddr,
     46int sys_barrier( intptr_t  vaddr,
    4647                 uint32_t   operation,
    47                  uint32_t   count )
     48                 uint32_t   count,
     49                 intptr_t   attr )   
    4850{
    49         error_t      error;
    50     vseg_t     * vseg;
    51  
    52     thread_t   * this    = CURRENT_THREAD;
    53     process_t  * process = this->process;
     51        error_t                 error;
     52    vseg_t                * vseg;
     53    pthread_barrierattr_t   k_attr;
     54
     55    thread_t  * this    = CURRENT_THREAD;
     56    process_t * process = this->process;
    5457
    5558#if DEBUG_SYS_BARRIER
     
    5861tm_start = hal_get_cycles();
    5962if( DEBUG_SYS_BARRIER < tm_start )
    60 printk("\n[DBG] %s : thread %x in process %x enter for %s / count %d / cycle %d\n",
    61 __FUNCTION__, this->trdid, process->pid, sys_barrier_op_str(operation), count,
     63printk("\n[%s] thread[%x,%x] enters for %s / count %d / cycle %d\n",
     64__FUNCTION__, process->pid, this->trdid, sys_barrier_op_str(operation), count,
    6265(uint32_t)tm_start );
    6366#endif
    6467
    6568    // check vaddr in user vspace
    66         error = vmm_get_vseg( process , (intptr_t)vaddr , &vseg );
    67 
     69        error = vmm_get_vseg( process , vaddr , &vseg );
    6870        if( error )
    6971    {
     
    7173#if DEBUG_SYSCALLS_ERROR
    7274printk("\n[ERROR] in %s : unmapped barrier %x / thread %x / process %x\n",
    73 __FUNCTION__ , (intptr_t)vaddr , this->trdid , process->pid );
     75__FUNCTION__ , vaddr , this->trdid , process->pid );
    7476vmm_display( process , false );
    7577#endif
     
    8486            case BARRIER_INIT:
    8587        {
    86             error = remote_barrier_create( (intptr_t)vaddr , count );
    87    
     88            if( attr != 0 )   // QDT barrier required
     89            {
     90                error = vmm_get_vseg( process , attr , &vseg );
     91                if( error )
     92                {
     93
     94#if DEBUG_SYSCALLS_ERROR
     95printk("\n[ERROR] in %s : unmapped barrier attributes %x / thread %x / process %x\n",
     96__FUNCTION__ , attr , this->trdid , process->pid );
     97vmm_display( process , false );
     98#endif
     99                    this->errno = EINVAL;
     100                    return -1;
     101                }
     102 
     103                // copy barrier attributes into kernel space
     104                hal_copy_from_uspace( &k_attr , (void*)attr , sizeof(pthread_barrierattr_t) );
     105
     106                if ( count != k_attr.x_size * k_attr.y_size *k_attr.nthreads ) 
     107                {
     108
     109#if DEBUG_SYSCALLS_ERROR
     110printk("\n[ERROR] in %s : wrong arguments / count %d / x_size %d / y_size %d / nthreads %x\n",
     111__FUNCTION__, count, k_attr.x_size, k_attr.y_size, k_attr.nthreads );
     112#endif
     113                    this->errno = EINVAL;
     114                    return -1;
     115                }
     116 
     117
     118                // call relevant system function
     119                error = generic_barrier_create( vaddr , count , &k_attr );
     120            }
     121            else               // simple barrier required
     122            {
     123                error = generic_barrier_create( vaddr , count , NULL );
     124            }
     125
    88126                    if( error )
    89127            {
     
    91129#if DEBUG_SYSCALLS_ERROR
    92130printk("\n[ERROR] in %s : cannot create barrier %x / thread %x / process %x\n",
    93 __FUNCTION__ , (intptr_t)vaddr , this->trdid , process->pid );
     131__FUNCTION__ , vaddr , this->trdid , process->pid );
    94132#endif
    95                 this->errno = error;
     133                this->errno = ENOMEM;
    96134                return -1;
    97135            }
     
    101139            case BARRIER_WAIT:
    102140        {
    103             xptr_t barrier_xp = remote_barrier_from_ident( (intptr_t)vaddr );
     141            xptr_t barrier_xp = generic_barrier_from_ident( vaddr );
    104142
    105143            if( barrier_xp == XPTR_NULL )     // user error
     
    115153            else                          // success
    116154            {
    117                 remote_barrier_wait( barrier_xp );
     155                generic_barrier_wait( barrier_xp );
    118156            }
    119157            break;
     
    122160            case BARRIER_DESTROY:
    123161        {
    124             xptr_t barrier_xp = remote_barrier_from_ident( (intptr_t)vaddr );
     162            xptr_t barrier_xp = generic_barrier_from_ident( vaddr );
    125163
    126164            if( barrier_xp == XPTR_NULL )     // user error
     
    136174            else                          // success
    137175            {
    138                 remote_barrier_destroy( barrier_xp );
     176                generic_barrier_destroy( barrier_xp );
    139177            }
    140178            break;
     
    149187tm_end = hal_get_cycles();
    150188if( DEBUG_SYS_BARRIER < tm_end )
    151 printk("\n[DBG] %s : thread %x in process %x exit for %s / cost %d / cycle %d\n",
    152 __FUNCTION__, this->trdid, process->pid, sys_barrier_op_str(operation),
     189printk("\n[%s] thread[%x,%x] exit for %s / cost %d / cycle %d\n",
     190__FUNCTION__, process->pid, this->trdid, sys_barrier_op_str(operation),
    153191(uint32_t)(tm_end - tm_start), (uint32_t)tm_end );
    154192#endif
  • trunk/kernel/syscalls/sys_display.c

    r614 r619  
    7777tm_start = hal_get_cycles();
    7878if( DEBUG_SYS_DISPLAY < tm_start )
    79 printk("\n[DBG] %s : thread[%x,%x] enter / type  %s / cycle = %d\n",
     79printk("\n[%s] thread[%x,%x] enter / type  %s / cycle = %d\n",
    8080__FUNCTION__, process->pid, this->trdid, display_type_str(type), (uint32_t)tm_start );
    8181#endif
     
    8484    if( type == DISPLAY_STRING )
    8585    {
    86         char      kbuf[256];
     86        char      kbuf[512];
    8787        uint32_t  length;
    8888
     
    106106        length = hal_strlen_from_uspace( string );
    107107
    108         if( length >= 256 )
     108        if( length >= 512 )
    109109        {
    110110
     
    118118
    119119        // copy string to kernel space
    120         hal_strcpy_from_uspace( kbuf , string , 256 );
     120        hal_strcpy_from_uspace( kbuf , string , 512 );
    121121
    122122        // print message on TXT0 kernel terminal
     
    281281        }
    282282
    283         thread_display_busylocks( thread_xp );
     283        thread_display_busylocks( thread_xp , __FUNCTION__ );
    284284    }
    285285    /////////////////////////////////
     
    388388#if DEBUG_SYS_DISPLAY
    389389if( DEBUG_SYS_DISPLAY < tm_end )
    390 printk("\n[DBG] %s : thread[%x,%x] exit / cycle %d\n",
     390printk("\n[%s] thread[%x,%x] exit / cycle %d\n",
    391391__FUNCTION__, process->pid, this->trdid, (uint32_t)tm_end );
    392392#endif
  • trunk/kernel/syscalls/sys_exit.c

    r584 r619  
    5858tm_start = hal_get_cycles();
    5959if( DEBUG_SYS_EXIT < tm_start )
    60 printk("\n[DBG] %s : thread[%x,%x] enter / status %x / cycle %d\n",
     60printk("\n[%s] thread[%x,%x] enter / status %x / cycle %d\n",
    6161__FUNCTION__, process->pid, this->trdid , status , (uint32_t)tm_start );
    6262#endif
     
    6969#if (DEBUG_SYS_EXIT & 1)
    7070if( DEBUG_SYS_EXIT < tm_start )
    71 printk("\n[DBG] %s : thread[%x,%x] get owner process in cluster %x\n",
     71printk("\n[%s] thread[%x,%x] get owner process in cluster %x\n",
    7272__FUNCTION__, process->pid, this->trdid, owner_cxy );
    7373#endif
     
    8383#if (DEBUG_SYS_EXIT & 1)
    8484if( DEBUG_SYS_EXIT < tm_start )
    85 printk("\n[DBG] %s : thread[%x,%x] get parent process in cluster %x\n",
     85printk("\n[%s] thread[%x,%x] get parent process in cluster %x\n",
    8686__FUNCTION__, process->pid, this->trdid, parent_cxy );
    8787#endif
     
    9696#if( DEBUG_SYS_EXIT & 1)
    9797if( DEBUG_SYS_EXIT < tm_start )
    98 printk("\n[DBG] %s : thread[%x,%x] detached process from TXT\n",
     98printk("\n[%s] thread[%x,%x] detached process from TXT\n",
    9999__FUNCTION__, process->pid, this->trdid );
    100100#endif
     
    106106#if( DEBUG_SYS_EXIT & 1)
    107107if( DEBUG_SYS_EXIT < tm_start )
    108 printk("\n[DBG] %s : thread[%x,%x] deleted all threads but itself\n",
     108printk("\n[%s] thread[%x,%x] deleted all threads but itself\n",
    109109__FUNCTION__, process->pid, this->trdid );
    110110#endif
     
    116116#if( DEBUG_SYS_EXIT & 1)
    117117if( tm_start > DEBUG_SYS_EXIT )
    118 printk("\n[DBG] %s : thread[%x,%x] marked iself for delete\n",
     118printk("\n[%u] thread[%x,%x] marked iself for delete\n",
    119119__FUNCTION__, process->pid, this->trdid );
    120120#endif
     
    127127#if( DEBUG_SYS_EXIT & 1)
    128128if( tm_start > DEBUG_SYS_EXIT )
    129 printk("\n[DBG] %s : thread[%x,%x] blocked main thread\n",
     129printk("\n[%s] thread[%x,%x] blocked main thread\n",
    130130__FUNCTION__, process->pid, this->trdid );
    131131#endif
     
    138138#if( DEBUG_SYS_EXIT & 1)
    139139if( tm_start > DEBUG_SYS_EXIT )
    140 printk("\n[DBG] %s : thread[%x,%x] set exit status %x in owner process\n",
     140printk("\n[%s] thread[%x,%x] set exit status %x in owner process\n",
    141141__FUNCTION__, process->pid, this->trdid, term_state );
    142142#endif
     
    147147#if( DEBUG_SYS_EXIT & 1)
    148148if( tm_start > DEBUG_SYS_EXIT )
    149 printk("\n[DBG] %s : thread[%x,%x] unblocked parent main thread in process %x\n",
     149printk("\n[%s] thread[%x,%x] unblocked parent main thread in process %x\n",
    150150__FUNCTION__ , process->pid, this->trdid,
    151151hal_remote_l32( XPTR( parent_cxy , &parent_ptr->pid) ) );
     
    157157tm_end = hal_get_cycles();
    158158if( DEBUG_SYS_EXIT < tm_end )
    159 printk("\n[DBG] %s : thread[%x,%x] exit / status %x / cost = %d / cycle %d\n",
     159printk("\n[%s] thread[%x,%x] exit / status %x / cost = %d / cycle %d\n",
    160160__FUNCTION__, process->pid, this->trdid, status,
    161161(uint32_t)(tm_end - tm_start), (uint32_t)tm_end );
  • trunk/kernel/syscalls/sys_thread_create.c

    r594 r619  
    7070if( DEBUG_SYS_THREAD_CREATE < tm_start )
    7171printk("\n[%s] thread[%x,%x] enter / cycle %d\n",
    72 __FUNCTION__, process_pid, parent->trdid, (uint32_t)tm_start );
     72__FUNCTION__, process->pid, parent->trdid, (uint32_t)tm_start );
    7373#endif
    7474
  • trunk/kernel/syscalls/sys_thread_exit.c

    r584 r619  
    6464uint64_t     tm_start = hal_get_cycles();
    6565if( DEBUG_SYS_THREAD_EXIT < tm_start )
    66 printk("\n[DBG] %s : thread[%x,%x] / main => delete process / cycle %d\n",
     66printk("\n[%s] thread[%x,%x] / main => delete process / cycle %d\n",
    6767__FUNCTION__ , pid , trdid , (uint32_t)tm_start );
    6868#endif
     
    7676uint64_t     tm_start = hal_get_cycles();
    7777if( DEBUG_SYS_THREAD_EXIT < tm_start )
    78 printk("\n[DBG] %s : thread[%x,%x] / not main => delete thread / cycle %d\n",
     78printk("\n[%s] thread[%x,%x] / not main => delete thread / cycle %d\n",
    7979__FUNCTION__ , pid , trdid , (uint32_t)tm_start );
    8080#endif
  • trunk/kernel/syscalls/syscalls.h

    r611 r619  
    146146 * The code implementting the operations is defined in the remote_barrier.c file.
    147147 ******************************************************************************************
    148  * @ vaddr     : barrier virtual address in user space == identifier.
     148 * @ vaddr     : barrier address in user space == identifier.
    149149 * @ operation : BARRIER_INIT / BARRIER_DESTROY / BARRIER_WAIT.
    150  * @ count     : number of expected threads (only used by BARRIER_INIT operation).
    151  * @ return 0 if success / return -1 if failure.
    152  *****************************************************************************************/
    153 int sys_barrier( void     * vaddr,
     150 * @ count     : number of expected threads (only used by BARRIER_INIT).
     151 * @ attr      : barrier attributes address in user space (only used by BARRIER_INIT).
     152 * @ return 0 if success / return -1 if failure.
     153 *****************************************************************************************/
     154int sys_barrier( intptr_t   vaddr,
    154155                 uint32_t   operation,
    155                  uint32_t   count );
     156                 uint32_t   count,
     157                 intptr_t   attr );
    156158
    157159/******************************************************************************************
Note: See TracChangeset for help on using the changeset viewer.