Changeset 619 for trunk/kernel/libk
- Timestamp:
- Feb 12, 2019, 1:15:47 PM (6 years ago)
- Location:
- trunk/kernel/libk
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/kernel/libk/remote_barrier.c
r581 r619 2 2 * remote_barrier.c - POSIX barrier implementation. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 23 23 24 24 #include <hal_kernel_types.h> 25 #include <hal_macros.h> 25 26 #include <hal_remote.h> 26 27 #include <hal_irqmask.h> … … 33 34 #include <remote_barrier.h> 34 35 36 //////////////////////////////////////////////////// 37 // generic (implementation independant) functions 38 //////////////////////////////////////////////////// 35 39 36 40 /////////////////////////////////////////////////// 37 xptr_t remote_barrier_from_ident( intptr_t ident )41 xptr_t generic_barrier_from_ident( intptr_t ident ) 38 42 { 39 43 // get pointer on local process_descriptor 40 44 process_t * process = CURRENT_THREAD->process; 41 45 42 // get extended pointer on reference process 43 xptr_t ref_xp = process->ref_xp; 44 45 // get cluster and local pointer on reference process 46 // get pointers on reference process 47 xptr_t ref_xp = process->ref_xp; 46 48 cxy_t ref_cxy = GET_CXY( ref_xp ); 47 49 process_t * ref_ptr = (process_t *)GET_PTR( ref_xp ); … … 51 53 52 54 // scan reference process barriers list 53 xptr_t iter_xp;54 xptr_t barrier_xp;55 cxy_t barrier_cxy;56 remote_barrier_t * barrier_ptr;57 intptr_t current;58 bool_t found = false;55 xptr_t iter_xp; 56 xptr_t barrier_xp; 57 cxy_t barrier_cxy; 58 generic_barrier_t * barrier_ptr; 59 intptr_t current; 60 bool_t found = false; 59 61 60 62 XLIST_FOREACH( root_xp , iter_xp ) 61 63 { 62 barrier_xp = XLIST_ELEMENT( iter_xp , remote_barrier_t , list );64 barrier_xp = XLIST_ELEMENT( iter_xp , generic_barrier_t , list ); 63 65 barrier_cxy = GET_CXY( barrier_xp ); 64 barrier_ptr = ( remote_barrier_t *)GET_PTR( barrier_xp );66 barrier_ptr = (generic_barrier_t *)GET_PTR( barrier_xp ); 65 67 current = (intptr_t)hal_remote_lpt( XPTR( barrier_cxy , &barrier_ptr->ident ) ); 66 68 if( ident == current ) … … 73 75 if( found == false ) return XPTR_NULL; 74 76 else return barrier_xp; 75 } 76 77 ////////////////////////////////////////////// 78 error_t remote_barrier_create( intptr_t ident, 79 uint32_t count ) 77 78 } // end generic_barrier_from_ident() 79 80 ////////////////////////////////////////////////////////////// 81 error_t generic_barrier_create( intptr_t ident, 82 uint32_t count, 83 pthread_barrierattr_t * attr ) 84 { 85 xptr_t gen_barrier_xp; // extended pointer on generic barrier descriptor 86 generic_barrier_t * gen_barrier_ptr; // local pointer on generic barrier descriptor 87 void * barrier; // local pointer on implementation barrier descriptor 88 kmem_req_t req; // kmem request 89 90 // get pointer on local process_descriptor 91 process_t * process = CURRENT_THREAD->process; 92 93 // get pointers on reference process 94 xptr_t ref_xp = process->ref_xp; 95 cxy_t ref_cxy = GET_CXY( ref_xp ); 96 process_t * ref_ptr = (process_t *)GET_PTR( ref_xp ); 97 98 // allocate memory for generic barrier descriptor 99 if( ref_cxy == local_cxy ) // reference cluster is local 100 { 101 req.type = KMEM_GEN_BARRIER; 102 req.flags = AF_ZERO; 103 gen_barrier_ptr = kmem_alloc( &req ); 104 gen_barrier_xp = XPTR( local_cxy , gen_barrier_ptr ); 105 } 106 else // reference cluster is remote 107 { 108 rpc_kcm_alloc_client( ref_cxy, 109 KMEM_GEN_BARRIER, 110 &gen_barrier_xp ); 111 gen_barrier_ptr = GET_PTR( gen_barrier_xp ); 112 } 113 114 if( gen_barrier_ptr == NULL ) 115 { 116 printk("\n[ERROR] in %s : cannot create generic barrier\n", __FUNCTION__ ); 117 return -1; 118 } 119 120 // create implementation specific barrier descriptor 121 if( attr == NULL ) // simple barrier implementation 122 { 123 // create simple barrier descriptor 124 barrier = simple_barrier_create( count ); 125 126 if( barrier == NULL ) 127 { 128 printk("\n[ERROR] in %s : cannot create simple barrier\n", __FUNCTION__); 129 return -1; 130 } 131 } 132 else // QDT barrier implementation 133 { 134 uint32_t x_size = attr->x_size; 135 uint32_t y_size = attr->y_size; 136 uint32_t nthreads = attr->nthreads; 137 138 // check attributes / count 139 if( (x_size * y_size * nthreads) != count ) 140 { 141 printk("\n[ERROR] in %s : count(%d) != x_size(%d) * y_size(%d) * nthreads(%d)\n", 142 __FUNCTION__, count, x_size, y_size, nthreads ); 143 return -1; 144 } 145 146 // create DQT barrier descriptor 147 barrier = dqt_barrier_create( x_size , y_size , nthreads ); 148 149 if( barrier == NULL ) 150 { 151 printk("\n[ERROR] in %s : cannot create DQT barrier descriptor\n", __FUNCTION__); 152 return -1; 153 } 154 } 155 156 // initialize the generic barrier descriptor 157 hal_remote_spt( XPTR( ref_cxy , &gen_barrier_ptr->ident ) , (void*)ident ); 158 hal_remote_s32( XPTR( ref_cxy , &gen_barrier_ptr->is_dqt ) , (attr != NULL) ); 159 hal_remote_spt( XPTR( ref_cxy , &gen_barrier_ptr->extend ) , barrier ); 160 161 // build extended pointers on lock, root and entry for reference process xlist 162 xptr_t root_xp = XPTR( ref_cxy , &ref_ptr->barrier_root ); 163 xptr_t lock_xp = XPTR( ref_cxy , &ref_ptr->sync_lock ); 164 xptr_t entry_xp = XPTR( ref_cxy , &gen_barrier_ptr->list ); 165 166 // register barrier in reference process xlist of barriers 167 remote_busylock_acquire( lock_xp ); 168 xlist_add_first( root_xp , entry_xp ); 169 remote_busylock_release( lock_xp ); 170 171 return 0; 172 173 } // en generic_barrier_create() 174 175 ///////////////////////////////////////////////////// 176 void generic_barrier_destroy( xptr_t gen_barrier_xp ) 177 { 178 kmem_req_t req; // kmem request 179 180 // get pointer on local process_descriptor 181 process_t * process = CURRENT_THREAD->process; 182 183 // get pointers on reference process 184 xptr_t ref_xp = process->ref_xp; 185 cxy_t ref_cxy = GET_CXY( ref_xp ); 186 process_t * ref_ptr = GET_PTR( ref_xp ); 187 188 // get cluster and local pointer on generic barrier descriptor 189 generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp ); 190 cxy_t gen_barrier_cxy = GET_CXY( gen_barrier_xp ); 191 192 // get barrier type and extension pointer 193 bool_t is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) ); 194 void * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) ); 195 196 // build extended pointer on implementation dependant barrier descriptor 197 xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend ); 198 199 // delete the implementation specific barrier 200 if( is_dqt ) dqt_barrier_destroy( barrier_xp ); 201 else simple_barrier_destroy( barrier_xp ); 202 203 // build extended pointers on lock and entry for reference process xlist 204 xptr_t lock_xp = XPTR( ref_cxy , &ref_ptr->sync_lock ); 205 xptr_t entry_xp = XPTR( gen_barrier_cxy , &gen_barrier_ptr->list ); 206 207 // remove barrier from reference process xlist 208 remote_busylock_acquire( lock_xp ); 209 xlist_unlink( entry_xp ); 210 remote_busylock_release( lock_xp ); 211 212 // release memory allocated to barrier descriptor 213 if( gen_barrier_cxy == local_cxy ) 214 { 215 req.type = KMEM_GEN_BARRIER; 216 req.ptr = gen_barrier_ptr; 217 kmem_free( &req ); 218 } 219 else 220 { 221 rpc_kcm_free_client( gen_barrier_cxy, 222 gen_barrier_ptr, 223 KMEM_GEN_BARRIER ); 224 } 225 } // end generic_barrier_destroy() 226 227 ////////////////////////////////////////////////// 228 void generic_barrier_wait( xptr_t gen_barrier_xp ) 229 { 230 // get generic barrier descriptor cluster and pointer 231 cxy_t gen_barrier_cxy = GET_CXY( gen_barrier_xp ); 232 generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp ); 233 234 // get implementation type and extend local pointer 235 bool_t is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) ); 236 void * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) ); 237 238 // build extended pointer on implementation specific barrier descriptor 239 xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend ); 240 241 // call the relevant wait function 242 if( is_dqt ) dqt_barrier_wait( barrier_xp ); 243 else simple_barrier_wait( barrier_xp ); 244 245 } // end generic_barrier_wait() 246 247 248 249 250 251 ///////////////////////////////////////////////////////////// 252 // simple barrier functions 253 ///////////////////////////////////////////////////////////// 254 255 /////////////////////////////////////////////////////////// 256 simple_barrier_t * simple_barrier_create( uint32_t count ) 80 257 { 81 258 xptr_t barrier_xp; 82 remote_barrier_t * barrier_ptr;83 84 // get pointer on local process descriptor259 simple_barrier_t * barrier; 260 261 // get pointer on local client process descriptor 85 262 thread_t * this = CURRENT_THREAD; 86 263 process_t * process = this->process; 87 264 88 #if DEBUG_BARRIER 265 // get reference process cluster 266 xptr_t ref_xp = process->ref_xp; 267 cxy_t ref_cxy = GET_CXY( ref_xp ); 268 269 // allocate memory for simple barrier descriptor 270 if( ref_cxy == local_cxy ) // reference is local 271 { 272 kmem_req_t req; 273 req.type = KMEM_SMP_BARRIER; 274 req.flags = AF_ZERO; 275 barrier = kmem_alloc( &req ); 276 barrier_xp = XPTR( local_cxy , barrier ); 277 } 278 else // reference is remote 279 { 280 rpc_kcm_alloc_client( ref_cxy, 281 KMEM_SMP_BARRIER, 282 &barrier_xp ); 283 barrier = GET_PTR( barrier_xp ); 284 } 285 286 if( barrier == NULL ) return NULL; 287 288 // initialise simple barrier descriptor 289 hal_remote_s32 ( XPTR( ref_cxy , &barrier->arity ) , count ); 290 hal_remote_s32 ( XPTR( ref_cxy , &barrier->current ) , 0 ); 291 hal_remote_s32 ( XPTR( ref_cxy , &barrier->sense ) , 0 ); 292 293 xlist_root_init ( XPTR( ref_cxy , &barrier->root ) ); 294 remote_busylock_init( XPTR( ref_cxy , &barrier->lock ) , LOCK_BARRIER_STATE ); 295 296 #if DEBUG_BARRIER_CREATE 89 297 uint32_t cycle = (uint32_t)hal_get_cycles(); 90 if( cycle > DEBUG_BARRIER ) 91 printk("\n[DBG] %s : thread %x in process %x enter / count %d / cycle %d\n", 92 __FUNCTION__, this->trdid, process->pid, count, cycle ); 93 #endif 94 95 // get extended pointer on reference process 96 xptr_t ref_xp = process->ref_xp; 97 98 // get reference process cluster and local pointer 99 cxy_t ref_cxy = GET_CXY( ref_xp ); 100 process_t * ref_ptr = GET_PTR( ref_xp ); 101 102 // allocate memory for barrier descriptor 103 if( ref_cxy == local_cxy ) // local cluster is the reference 104 { 105 kmem_req_t req; 106 req.type = KMEM_BARRIER; 107 req.flags = AF_ZERO; 108 barrier_ptr = kmem_alloc( &req ); 109 barrier_xp = XPTR( local_cxy , barrier_ptr ); 110 } 111 else // reference is remote 112 { 113 rpc_kcm_alloc_client( ref_cxy , KMEM_BARRIER , &barrier_xp ); 114 barrier_ptr = (remote_barrier_t *)GET_PTR( barrier_xp ); 115 } 116 117 if( barrier_ptr == NULL ) return ENOMEM; 118 119 // initialise barrier 120 hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->nb_threads ) , count ); 121 hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->current ) , 0 ); 122 hal_remote_s32( XPTR( ref_cxy , &barrier_ptr->sense ) , 0 ); 123 hal_remote_spt( XPTR( ref_cxy , &barrier_ptr->ident ) , (void*)ident ); 124 125 xlist_root_init( XPTR( ref_cxy , &barrier_ptr->root ) ); 126 127 // register barrier in reference process xlist 128 xptr_t root_xp = XPTR( ref_cxy , &ref_ptr->barrier_root ); 129 xptr_t entry_xp = XPTR( ref_cxy , &barrier_ptr->list ); 130 131 remote_busylock_acquire( XPTR( ref_cxy , &ref_ptr->sync_lock ) ); 132 xlist_add_first( root_xp , entry_xp ); 133 remote_busylock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) ); 134 135 #if DEBUG_BARRIER 136 cycle = (uint32_t)hal_get_cycles(); 137 if( cycle > DEBUG_BARRIER ) 138 printk("\n[DBG] %s : thread %x in process %x exit / barrier %x in cluster %x / cycle %d\n", 139 __FUNCTION__, this->trdid, process->pid, barrier_ptr, ref_cxy, cycle ); 140 #endif 141 142 return 0; 143 144 } // end remote_barrier_create() 298 if( cycle > DEBUG_BARRIER_CREATE ) 299 printk("\n[%s] thread[%x,%x] created barrier (%x,%x) / count %d / cycle %d\n", 300 __FUNCTION__, process->pid, this->trdid, ref_cxy, barrier, count, cycle ); 301 #endif 302 303 return barrier; 304 305 } // end simple_barrier_create() 145 306 146 307 //////////////////////////////////////////////// 147 void remote_barrier_destroy( xptr_t barrier_xp )308 void simple_barrier_destroy( xptr_t barrier_xp ) 148 309 { 149 // get pointer on local process descriptor150 process_t * process = CURRENT_THREAD->process;151 152 // get extended pointer on reference process153 xptr_t ref_xp = process->ref_xp;154 155 // get reference process cluster and local pointer156 cxy_t ref_cxy = GET_CXY( ref_xp );157 process_t * ref_ptr = (process_t *)GET_PTR( ref_xp );158 159 310 // get barrier cluster and local pointer 160 311 cxy_t barrier_cxy = GET_CXY( barrier_xp ); 161 remote_barrier_t * barrier_ptr = (remote_barrier_t *)GET_PTR( barrier_xp ); 162 163 // remove barrier from reference process xlist 164 remote_busylock_acquire( XPTR( ref_cxy , &ref_ptr->sync_lock ) ); 165 xlist_unlink( XPTR( barrier_cxy , &barrier_ptr->list ) ); 166 remote_busylock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) ); 312 simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); 167 313 168 314 // release memory allocated for barrier descriptor 169 if( barrier_cxy == local_cxy ) // reference is local315 if( barrier_cxy == local_cxy ) 170 316 { 171 317 kmem_req_t req; 172 req.type = KMEM_ BARRIER;318 req.type = KMEM_SMP_BARRIER; 173 319 req.ptr = barrier_ptr; 174 320 kmem_free( &req ); 175 321 } 176 else // reference is remote 177 { 178 rpc_kcm_free_client( barrier_cxy , barrier_ptr , KMEM_BARRIER ); 179 } 180 } // end remote_barrier_destroy() 322 else 323 { 324 rpc_kcm_free_client( barrier_cxy, 325 barrier_ptr, 326 KMEM_SMP_BARRIER ); 327 } 328 329 #if DEBUG_BARRIER_DESTROY 330 uint32_t cycle = (uint32_t)hal_get_cycles(); 331 thread_t * this = CURRENT_THREAD; 332 process_t * process = this->process; 333 if( cycle > DEBUG_BARRIER_DESTROY ) 334 printk("\n[%s] thread[%x,%x] deleted barrier (%x,%x) / cycle %d\n", 335 __FUNCTION__, process->pid, this->trdid, barrier_ptr, barrier_cxy, cycle ); 336 #endif 337 338 } // end simple_barrier_destroy() 181 339 182 340 ///////////////////////////////////////////// 183 void remote_barrier_wait( xptr_t barrier_xp )341 void simple_barrier_wait( xptr_t barrier_xp ) 184 342 { 185 343 uint32_t expected; 186 344 uint32_t sense; 187 345 uint32_t current; 188 uint32_t nb_threads;346 uint32_t arity; 189 347 xptr_t root_xp; 190 348 xptr_t lock_xp; 191 349 xptr_t current_xp; 192 350 xptr_t sense_xp; 193 xptr_t nb_threads_xp;351 xptr_t arity_xp; 194 352 195 353 // get pointer on calling thread … … 200 358 201 359 // get cluster and local pointer on remote barrier 202 remote_barrier_t * barrier_ptr = GET_PTR( barrier_xp );360 simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); 203 361 cxy_t barrier_cxy = GET_CXY( barrier_xp ); 204 362 205 #if DEBUG_BARRIER 363 #if DEBUG_BARRIER_WAIT 206 364 uint32_t cycle = (uint32_t)hal_get_cycles(); 207 if( cycle > DEBUG_BARRIER )208 printk("\n[ DBG] %s : thread %x in process %x enter / barrier %x in cluster %x/ cycle %d\n",209 __FUNCTION__, this-> trdid, this->process->pid, barrier_ptr, barrier_cxy, cycle );210 #endif 211 212 // compute extended pointers on various barrier fields213 lock_xp 214 root_xp 215 current_xp 216 sense_xp 217 nb_threads_xp = XPTR( barrier_cxy , &barrier_ptr->nb_threads);218 219 // take busylock protecting the remote_barrier365 if( cycle > DEBUG_BARRIER_WAIT ) 366 printk("\n[%s] thread[%x,%x] enter / barrier (%x,%x) / cycle %d\n", 367 __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); 368 #endif 369 370 // build extended pointers on various barrier descriptor fields 371 lock_xp = XPTR( barrier_cxy , &barrier_ptr->lock ); 372 root_xp = XPTR( barrier_cxy , &barrier_ptr->root ); 373 current_xp = XPTR( barrier_cxy , &barrier_ptr->current ); 374 sense_xp = XPTR( barrier_cxy , &barrier_ptr->sense ); 375 arity_xp = XPTR( barrier_cxy , &barrier_ptr->arity ); 376 377 // take busylock protecting the barrier state 220 378 remote_busylock_acquire( lock_xp ); 221 379 222 #if (DEBUG_BARRIER & 1) 223 cycle = (uint32_t)hal_get_cycles(); 224 if( cycle > DEBUG_BARRIER ) 225 printk("\n[DBG] %s : thread %x in process %x get lock / cycle %d\n", 226 __FUNCTION__, this->trdid, this->process->pid, cycle ); 227 #endif 228 229 // get sense and nb_threads values from barrier descriptor 230 sense = hal_remote_l32( sense_xp ); 231 nb_threads = hal_remote_l32( nb_threads_xp ); 380 // get sense and threads values from barrier descriptor 381 sense = hal_remote_l32( sense_xp ); 382 arity = hal_remote_l32( arity_xp ); 232 383 233 384 // compute expected value … … 235 386 else expected = 0; 236 387 237 #if (DEBUG_BARRIER & 1) 238 cycle = (uint32_t)hal_get_cycles(); 239 if( cycle > DEBUG_BARRIER ) 240 printk("\n[DBG] %s : thread %x in process %x / count %d / sense %d / cycle %d\n", 241 __FUNCTION__, this->trdid, this->process->pid, nb_threads, sense, cycle ); 242 #endif 243 244 // atomically increment current, and get value before increment 388 // increment current number of arrived threads / get value before increment 245 389 current = hal_remote_atomic_add( current_xp , 1 ); 246 390 … … 248 392 // other threads block, register in queue, and deschedule 249 393 250 if( current == ( nb_threads-1) ) // last thread394 if( current == (arity - 1) ) // last thread 251 395 { 252 396 hal_remote_s32( current_xp , 0 ); … … 261 405 thread_t * thread_ptr = GET_PTR( thread_xp ); 262 406 263 #if (DEBUG_BARRIER & 1) 264 cycle = (uint32_t)hal_get_cycles(); 265 if( cycle > DEBUG_BARRIER ) 266 printk("\n[DBG] %s : thread %x in process %x / unblock thread %x / cycle %d\n", 267 __FUNCTION__, this->trdid, this->process->pid, thread_ptr, cycle ); 407 #if (DEBUG_BARRIER_WAIT & 1) 408 trdid_t trdid = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) ); 409 process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) ); 410 pid_t pid = hal_remote_l32( XPTR( thread_cxy , &process->pid ) ); 411 if( cycle > DEBUG_BARRIER_WAIT ) 412 printk("\n[%s] thread[%x,%x] unblocks thread[%x,%x]\n", 413 __FUNCTION__, this->process->pid, this->trdid, pid, trdid ); 268 414 #endif 269 415 … … 275 421 } 276 422 423 // release busylock protecting the barrier 424 remote_busylock_release( lock_xp ); 425 } 426 else // not the last thread 427 { 428 429 #if (DEBUG_BARRIER_WAIT & 1) 430 if( cycle > DEBUG_BARRIER_WAIT ) 431 printk("\n[%s] thread[%x,%x] blocks\n", 432 __FUNCTION__, this->process->pid, this->trdid ); 433 #endif 434 435 // register calling thread in barrier waiting queue 436 xlist_add_last( root_xp , XPTR( local_cxy , &this->wait_list ) ); 437 438 // block calling thread 439 thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC ); 440 277 441 // release busylock protecting the remote_barrier 278 442 remote_busylock_release( lock_xp ); 279 } 280 else // not the last thread 281 { 282 283 #if (DEBUG_BARRIER & 1) 443 444 // deschedule 445 sched_yield("blocked on barrier"); 446 } 447 448 #if DEBUG_BARRIER_WAIT 284 449 cycle = (uint32_t)hal_get_cycles(); 285 if( cycle > DEBUG_BARRIER ) 286 printk("\n[DBG] %s : thread %x in process %x / blocked / cycle %d\n", 287 __FUNCTION__, this->trdid, this->process->pid, cycle ); 288 #endif 289 450 if( cycle > DEBUG_BARRIER_WAIT ) 451 printk("\n[%s] thread[%x,%x] exit / barrier (%x,%x) / cycle %d\n", 452 __FUNCTION__, this->trdid, this->process->pid, barrier_cxy, barrier_ptr, cycle ); 453 #endif 454 455 } // end simple_barrier_wait() 456 457 458 ///////////////////////////////////////////////////////////// 459 // DQT barrier functions 460 ///////////////////////////////////////////////////////////// 461 462 static void dqt_barrier_increment( xptr_t node_xp ); 463 464 #if DEBUG_BARRIER_CREATE 465 static void dqt_barrier_display( xptr_t barrier_xp ); 466 #endif 467 468 /////////////////////////////////////////////////////// 469 dqt_barrier_t * dqt_barrier_create( uint32_t x_size, 470 uint32_t y_size, 471 uint32_t nthreads ) 472 { 473 page_t * dqt_page; 474 xptr_t dqt_page_xp; 475 page_t * rpc_page; 476 xptr_t rpc_page_xp; 477 dqt_barrier_t * barrier; // local pointer on DQT barrier descriptor 478 xptr_t barrier_xp; // extended pointer on DQT barrier descriptor 479 uint32_t z; // actual DQT size == max(x_size,y_size) 480 uint32_t levels; // actual number of DQT levels 481 kmem_req_t req; // kmem request 482 xptr_t rpc_xp; // extended pointer on RPC descriptors array 483 rpc_desc_t * rpc; // pointer on RPC descriptors array 484 uint32_t responses; // responses counter for parallel RPCs 485 reg_t save_sr; // for critical section 486 uint32_t x; // X coordinate in QDT mesh 487 uint32_t y; // Y coordinate in QDT mesh 488 uint32_t l; // level coordinate 489 490 // compute size and number of DQT levels 491 z = (x_size > y_size) ? x_size : y_size; 492 levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5; 493 494 // check x_size and y_size arguments 495 assert( (z <= 16) , "DQT dqth larger than (16*16)\n"); 496 497 // check RPC descriptor size 498 assert( (sizeof(rpc_desc_t) <= 128), "RPC descriptor larger than 128 bytes\n"); 499 500 // check size of an array of 5 DQT nodes 501 assert( (sizeof(dqt_node_t) * 5 <= 512 ), "array of DQT nodes larger than 512 bytes\n"); 502 503 // check size of DQT barrier descriptor 504 assert( (sizeof(dqt_barrier_t) <= 0x4000 ), "DQT barrier descriptor larger than 4 pages\n"); 505 506 // get pointer on local client process descriptor 507 thread_t * this = CURRENT_THREAD; 508 process_t * process = this->process; 509 510 #if DEBUG_BARRIER_CREATE 511 uint32_t cycle = (uint32_t)hal_get_cycles(); 512 if( cycle > DEBUG_BARRIER_CREATE ) 513 printk("\n[%s] thread[%x,%x] enter : x_size %d / y_size %d / levels %d / cycle %d\n", 514 __FUNCTION__, process->pid, this->trdid, x_size, y_size, levels, cycle ); 515 #endif 516 517 // get reference process cluster 518 xptr_t ref_xp = process->ref_xp; 519 cxy_t ref_cxy = GET_CXY( ref_xp ); 520 521 // 1. allocate memory for DQT barrier descriptor in reference cluster 522 if( ref_cxy == local_cxy ) 523 { 524 req.type = KMEM_PAGE; 525 req.size = 2; // 4 pages == 16 Kbytes 526 req.flags = AF_ZERO; 527 dqt_page = kmem_alloc( &req ); 528 dqt_page_xp = XPTR( local_cxy , dqt_page ); 529 } 530 else 531 { 532 rpc_pmem_get_pages_client( ref_cxy, 533 2, 534 &dqt_page ); 535 dqt_page_xp = XPTR( ref_cxy , dqt_page ); 536 } 537 538 if( dqt_page == NULL ) return NULL; 539 540 // get pointers on DQT barrier descriptor 541 barrier_xp = ppm_page2base( dqt_page_xp ); 542 barrier = GET_PTR( barrier_xp ); 543 544 // initialize global parameters in DQT barrier descriptor 545 hal_remote_s32( XPTR( ref_cxy , &barrier->x_size ) , x_size ); 546 hal_remote_s32( XPTR( ref_cxy , &barrier->y_size ) , x_size ); 547 hal_remote_s32( XPTR( ref_cxy , &barrier->nthreads ) , nthreads ); 548 549 #if DEBUG_BARRIER_CREATE 550 if( cycle > DEBUG_BARRIER_CREATE ) 551 printk("\n[%s] thread[%x,%x] created DQT barrier descriptor at (%x,%x)\n", 552 __FUNCTION__, process->pid, this->trdid, ref_cxy, barrier ); 553 #endif 554 555 // 2. allocate memory from local cluster for an array of 256 RPCs descriptors 556 // cannot share the RPC descriptor, because the returned argument is not shared 557 req.type = KMEM_PAGE; 558 req.size = 3; // 8 pages == 32 Kbytes 559 req.flags = AF_ZERO; 560 rpc_page = kmem_alloc( &req ); 561 rpc_page_xp = XPTR( local_cxy , rpc_page ); 562 563 // get pointers on RPC descriptors array 564 rpc_xp = ppm_page2base( rpc_page_xp ); 565 rpc = GET_PTR( rpc_xp ); 566 567 #if DEBUG_BARRIER_CREATE 568 if( cycle > DEBUG_BARRIER_CREATE ) 569 printk("\n[%s] thread[%x,%x] created RPC descriptors array at (%x,%s)\n", 570 __FUNCTION__, process->pid, this->trdid, local_cxy, rpc ); 571 #endif 572 573 // 3. send parallel RPCs to all existing clusters covered by the DQT 574 // to allocate memory for an array of 5 DQT nodes in each cluster 575 // (5 nodes per cluster <= 512 bytes per cluster) 576 577 responses = 0; // initialize RPC responses counter 578 579 // mask IRQs 580 hal_disable_irq( &save_sr); 581 582 // client thread blocks itself 583 thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC ); 584 585 for ( x = 0 ; x < x_size ; x++ ) 586 { 587 for ( y = 0 ; y < y_size ; y++ ) 588 { 589 // send RPC to existing clusters only 590 if( LOCAL_CLUSTER->cluster_info[x][y] ) 591 { 592 cxy_t cxy = HAL_CXY_FROM_XY( x , y ); // target cluster identifier 593 594 // build a specific RPC descriptor for each target cluster 595 rpc[cxy].rsp = &responses; 596 rpc[cxy].blocking = false; 597 rpc[cxy].index = RPC_KCM_ALLOC; 598 rpc[cxy].thread = this; 599 rpc[cxy].lid = this->core->lid; 600 rpc[cxy].args[0] = (uint64_t)KMEM_512_BYTES; 601 602 // atomically increment expected responses counter 603 hal_atomic_add( &responses , 1 ); 604 605 // send a non-blocking RPC to allocate 512 bytes in target cluster 606 rpc_send( cxy , &rpc[cxy] ); 607 } 608 } 609 } 610 611 #if DEBUG_BARRIER_CREATE 612 if( cycle > DEBUG_BARRIER_CREATE ) 613 printk("\n[%s] thread[%x,%x] sent all RPC requests to allocate dqt_nodes array\n", 614 __FUNCTION__, process->pid, this->trdid ); 615 #endif 616 617 // client thread deschedule 618 sched_yield("blocked on parallel rpc_kcm_alloc"); 619 620 // restore IRQs 621 hal_restore_irq( save_sr); 622 623 // 4. initialize the node_xp[x][y][l] array in DQT barrier descriptor 624 // the node_xp[x][y][0] value is available in rpc.args[1] 625 626 #if DEBUG_BARRIER_CREATE 627 if( cycle > DEBUG_BARRIER_CREATE ) 628 printk("\n[%s] thread[%x,%x] initialises array of pointers on dqt_nodes\n", 629 __FUNCTION__, process->pid, this->trdid ); 630 #endif 631 632 for ( x = 0 ; x < x_size ; x++ ) 633 { 634 for ( y = 0 ; y < y_size ; y++ ) 635 { 636 cxy_t cxy = HAL_CXY_FROM_XY( x , y ); // target cluster identifier 637 xptr_t array_xp = (xptr_t)rpc[cxy].args[1]; // x_pointer on node array 638 uint32_t offset = sizeof( dqt_node_t ); // size of a DQT node 639 640 // set values into the node_xp[x][y][l] array 641 for ( l = 0 ; l < levels ; l++ ) 642 { 643 xptr_t node_xp = array_xp + (offset * l); 644 hal_remote_s64( XPTR( ref_cxy , &barrier->node_xp[x][y][l] ), node_xp ); 645 646 #if DEBUG_BARRIER_CREATE 647 if( cycle > DEBUG_BARRIER_CREATE ) 648 printk(" - dqt_node_xp[%d,%d,%d] = (%x,%x) / &dqt_node_xp = %x\n", 649 x , y , l , GET_CXY( node_xp ), GET_PTR( node_xp ), &barrier->node_xp[x][y][l] ); 650 #endif 651 } 652 } 653 } 654 655 // 5. release memory locally allocated for the RPCs array 656 req.type = KMEM_PAGE; 657 req.ptr = rpc_page; 658 kmem_free( &req ); 659 660 #if DEBUG_BARRIER_CREATE 661 if( cycle > DEBUG_BARRIER_CREATE ) 662 printk("\n[%s] thread[%x,%x] released memory for RPC descriptors array\n", 663 __FUNCTION__, process->pid, this->trdid ); 664 #endif 665 666 // 6. initialise all distributed DQT nodes using remote accesses 667 // and the pointers stored in the node_xp[x][y][l] array 668 for ( x = 0 ; x < x_size ; x++ ) 669 { 670 for ( y = 0 ; y < y_size ; y++ ) 671 { 672 // initialize existing clusters only 673 if( LOCAL_CLUSTER->cluster_info[x][y] ) 674 { 675 for ( l = 0 ; l < levels ; l++ ) 676 { 677 xptr_t parent_xp; 678 xptr_t child_xp[4]; 679 uint32_t arity = 0; 680 681 // get DQT node pointers 682 xptr_t node_xp = hal_remote_l64( XPTR( ref_cxy, 683 &barrier->node_xp[x][y][l] ) ); 684 cxy_t node_cxy = GET_CXY( node_xp ); 685 dqt_node_t * node_ptr = GET_PTR( node_xp ); 686 687 // compute arity and child_xp[i] 688 if (l == 0 ) // bottom DQT node 689 { 690 arity = nthreads; 691 692 child_xp[0] = XPTR_NULL; 693 child_xp[1] = XPTR_NULL; 694 child_xp[2] = XPTR_NULL; 695 child_xp[3] = XPTR_NULL; 696 } 697 else // not a bottom DQT node 698 { 699 arity = 0; 700 701 // only few non-bottom nodes must be initialised 702 if( ((x & ((1<<l)-1)) == 0) && ((y & ((1<<l)-1)) == 0) ) 703 { 704 uint32_t cx[4]; // x coordinate for children 705 uint32_t cy[4]; // y coordinate for children 706 uint32_t i; 707 708 // the child0 coordinates are equal to the parent coordinates 709 // other children coordinates depend on the level value 710 cx[0] = x; 711 cy[0] = y; 712 713 cx[1] = x; 714 cy[1] = y + (1 << (l-1)); 715 716 cx[2] = x + (1 << (l-1)); 717 cy[2] = y; 718 719 cx[3] = x + (1 << (l-1)); 720 cy[3] = y + (1 << (l-1)); 721 722 for ( i = 0 ; i < 4 ; i++ ) 723 { 724 // child pointer is NULL if outside the mesh 725 if ( (cx[i] < x_size) && (cy[i] < y_size) ) 726 { 727 // get child_xp[i] 728 child_xp[i] = hal_remote_l64( XPTR( ref_cxy, 729 &barrier->node_xp[cx[i]][cy[i]][l-1] ) ); 730 731 // increment arity 732 arity++; 733 } 734 else 735 { 736 child_xp[i] = XPTR_NULL; 737 } 738 } 739 } 740 } 741 742 // compute parent_xp 743 if( l == (levels - 1) ) // root DQT node 744 { 745 parent_xp = XPTR_NULL; 746 } 747 else // not the root 748 { 749 uint32_t px = 0; // parent X coordinate 750 uint32_t py = 0; // parent Y coordinate 751 bool_t found = false; 752 753 // compute macro_cluster x_min, x_max, y_min, y_max 754 uint32_t x_min = x & ~((1<<(l+1))-1); 755 uint32_t x_max = x_min + (1<<(l+1)); 756 uint32_t y_min = y & ~((1<<(l+1))-1); 757 uint32_t y_max = y_min + (1<<(l+1)); 758 759 // scan all clusters in macro-cluster[x][y][l] / take first active 760 for( px = x_min ; px < x_max ; px++ ) 761 { 762 for( py = y_min ; py < y_max ; py++ ) 763 { 764 if( LOCAL_CLUSTER->cluster_info[px][py] ) found = true; 765 if( found ) break; 766 } 767 if( found ) break; 768 } 769 770 parent_xp = hal_remote_l64( XPTR( ref_cxy , 771 &barrier->node_xp[px][py][l+1] ) ); 772 } 773 774 // initializes the DQT node 775 hal_remote_s32( XPTR( node_cxy , &node_ptr->arity ) , arity ); 776 hal_remote_s32( XPTR( node_cxy , &node_ptr->current ) , 0 ); 777 hal_remote_s32( XPTR( node_cxy , &node_ptr->sense ) , 0 ); 778 hal_remote_s32( XPTR( node_cxy , &node_ptr->level ) , l ); 779 hal_remote_s64( XPTR( node_cxy , &node_ptr->parent_xp ) , parent_xp ); 780 hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[0] ) , child_xp[0] ); 781 hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[1] ) , child_xp[1] ); 782 hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[2] ) , child_xp[2] ); 783 hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[3] ) , child_xp[3] ); 784 785 xlist_root_init( XPTR( node_cxy , &node_ptr->root ) ); 786 787 remote_busylock_init( XPTR( node_cxy , &node_ptr->lock ), 788 LOCK_BARRIER_STATE ); 789 } 790 } 791 } 792 } 793 794 #if DEBUG_BARRIER_CREATE 795 cycle = (uint32_t)hal_get_cycles(); 796 if( cycle > DEBUG_BARRIER_CREATE ) 797 printk("\n[%s] thread[%x,%x] completed DQT barrier initialisation / cycle %d\n", 798 __FUNCTION__, process->pid, this->trdid, cycle ); 799 dqt_barrier_display( barrier_xp ); 800 #endif 801 802 return barrier; 803 804 } // end dqt_barrier_create() 805 806 /////////////////////////////////////////////// 807 void dqt_barrier_destroy( xptr_t barrier_xp ) 808 { 809 page_t * rpc_page; 810 xptr_t rpc_page_xp; 811 rpc_desc_t * rpc; // local pointer on RPC descriptors array 812 xptr_t rpc_xp; // extended pointer on RPC descriptor array 813 reg_t save_sr; // for critical section 814 kmem_req_t req; // kmem request 815 816 thread_t * this = CURRENT_THREAD; 817 818 // get DQT barrier descriptor cluster and local pointer 819 dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); 820 cxy_t barrier_cxy = GET_CXY( barrier_xp ); 821 822 #if DEBUG_BARRIER_DESTROY 823 uint32_t cycle = (uint32_t)hal_get_cycles(); 824 if( cycle > DEBUG_BARRIER_DESTROY ) 825 printk("\n[%s] thread[%x,%x] enter for barrier (%x,%x) / cycle %d\n", 826 __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); 827 #endif 828 829 // get x_size and y_size global parameters 830 uint32_t x_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->x_size ) ); 831 uint32_t y_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->y_size ) ); 832 833 // 1. allocate memory from local cluster for an array of 256 RPCs descriptors 834 // cannot share the RPC descriptor, because the "buf" argument is not shared 835 req.type = KMEM_PAGE; 836 req.size = 3; // 8 pages == 32 Kbytes 837 req.flags = AF_ZERO; 838 rpc_page = kmem_alloc( &req ); 839 rpc_page_xp = XPTR( local_cxy , rpc_page ); 840 841 // get pointers on RPC descriptors array 842 rpc_xp = ppm_page2base( rpc_page_xp ); 843 rpc = GET_PTR( rpc_xp ); 844 845 // 2. send parallel RPCs to all existing clusters covered by the DQT 846 // to release memory allocated for the arrays of DQT nodes in each cluster 847 848 uint32_t responses = 0; // initialize RPC responses counter 849 850 // mask IRQs 851 hal_disable_irq( &save_sr); 852 853 // client thread blocks itself 854 thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC ); 855 856 uint32_t x , y; 857 858 #if DEBUG_BARRIER_DESTROY 859 if( cycle > DEBUG_BARRIER_DESTROY ) 860 printk("\n[%s] thread[%x,%x] send RPCs to release the distributed dqt_node array\n", 861 __FUNCTION__, this->process->pid, this->trdid ); 862 #endif 863 864 for ( x = 0 ; x < x_size ; x++ ) 865 { 866 for ( y = 0 ; y < y_size ; y++ ) 867 { 868 // send RPC to existing cluster only 869 if( LOCAL_CLUSTER->cluster_info[x][y] ) 870 { 871 // compute target cluster identifier 872 cxy_t cxy = HAL_CXY_FROM_XY( x , y ); 873 874 // get local pointer on dqt_nodes array in target cluster 875 xptr_t buf_xp_xp = XPTR( barrier_cxy , &barrier_ptr->node_xp[x][y][0] ); 876 xptr_t buf_xp = hal_remote_l64( buf_xp_xp ); 877 void * buf = GET_PTR( buf_xp ); 878 879 assert( (cxy == GET_CXY(buf_xp)) , "bad extended pointer on dqt_nodes array\n" ); 880 881 // build a specific RPC descriptor 882 rpc[cxy].rsp = &responses; 883 rpc[cxy].blocking = false; 884 rpc[cxy].index = RPC_KCM_FREE; 885 rpc[cxy].thread = this; 886 rpc[cxy].lid = this->core->lid; 887 rpc[cxy].args[0] = (uint64_t)(intptr_t)buf; 888 rpc[cxy].args[1] = (uint64_t)KMEM_512_BYTES; 889 890 // atomically increment expected responses counter 891 hal_atomic_add( &responses , 1 ); 892 893 #if DEBUG_BARRIER_DESTROY 894 if( cycle > DEBUG_BARRIER_DESTROY ) 895 printk(" - target cluster(%d,%d) / buffer %x\n", x, y, buf ); 896 #endif 897 // send a non-blocking RPC to release 512 bytes in target cluster 898 rpc_send( cxy , &rpc[cxy] ); 899 } 900 } 901 } 902 903 // client thread deschedule 904 sched_yield("blocked on parallel rpc_kcm_free"); 905 906 // restore IRQs 907 hal_restore_irq( save_sr); 908 909 // 3. release memory locally allocated for the RPC descriptors array 910 req.type = KMEM_PAGE; 911 req.ptr = rpc_page; 912 kmem_free( &req ); 913 914 // 4. release memory allocated for barrier descriptor 915 xptr_t page_xp = ppm_base2page( barrier_xp ); 916 page_t * page = GET_PTR( page_xp ); 917 918 if( barrier_cxy == local_cxy ) 919 { 920 req.type = KMEM_PAGE; 921 req.ptr = page; 922 kmem_free( &req ); 923 } 924 else 925 { 926 rpc_pmem_release_pages_client( barrier_cxy, 927 page ); 928 } 929 930 #if DEBUG_BARRIER_DESTROY 931 cycle = (uint32_t)hal_get_cycles(); 932 if( cycle > DEBUG_BARRIER_DESTROY ) 933 printk("\n[%s] thread[%x,%x] exit for barrier (%x,%x) / cycle %d\n", 934 __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); 935 #endif 936 937 } // end dqt_barrier_destroy() 938 939 //////////////////////////////////////////// 940 void dqt_barrier_wait( xptr_t barrier_xp ) 941 { 942 thread_t * this = CURRENT_THREAD; 943 944 // check calling thread can yield 945 thread_assert_can_yield( this , __FUNCTION__ ); 946 947 // get cluster and local pointer on DQT barrier descriptor 948 dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); 949 cxy_t barrier_cxy = GET_CXY( barrier_xp ); 950 951 #if DEBUG_BARRIER_WAIT 952 uint32_t cycle = (uint32_t)hal_get_cycles(); 953 if( cycle > DEBUG_BARRIER_WAIT ) 954 printk("\n[%s] thread[%x,%x] enter / barrier (%x,%x) / cycle %d\n", 955 __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); 956 #endif 957 958 // get extended pointer on local bottom DQT node 959 uint32_t x = HAL_X_FROM_CXY( local_cxy ); 960 uint32_t y = HAL_Y_FROM_CXY( local_cxy ); 961 xptr_t node_xp = hal_remote_l64( XPTR( barrier_cxy , &barrier_ptr->node_xp[x][y][0] ) ); 962 963 // call recursive function to traverse DQT from bottom to root 964 dqt_barrier_increment( node_xp ); 965 966 #if DEBUG_BARRIER_WAIT 967 cycle = (uint32_t)hal_get_cycles(); 968 if( cycle > DEBUG_BARRIER_WAIT ) 969 printk("\n[%s] thread[%x,%x] exit / barrier (%x,%x) / cycle %d\n", 970 __FUNCTION__, this->trdid, this->process->pid, barrier_cxy, barrier_ptr, cycle ); 971 #endif 972 973 } // end dqt_barrier_wait() 974 975 976 //////////////////////////////////////////////////////////////////////////////////////////// 977 // DQT static functions 978 //////////////////////////////////////////////////////////////////////////////////////////// 979 980 981 ////////////////////////////////////////////////////////////////////////////////////////// 982 // This recursive function decrements the distributed "count" variables, 983 // traversing the DQT from bottom to root. 984 // The last arrived thread reset the local node before returning. 985 ////////////////////////////////////////////////////////////////////////////////////////// 986 static void dqt_barrier_increment( xptr_t node_xp ) 987 { 988 uint32_t expected; 989 uint32_t sense; 990 uint32_t arity; 991 992 thread_t * this = CURRENT_THREAD; 993 994 // get node cluster and local pointer 995 dqt_node_t * node_ptr = GET_PTR( node_xp ); 996 cxy_t node_cxy = GET_CXY( node_xp ); 997 998 // build relevant extended pointers 999 xptr_t arity_xp = XPTR( node_cxy , &node_ptr->arity ); 1000 xptr_t sense_xp = XPTR( node_cxy , &node_ptr->sense ); 1001 xptr_t current_xp = XPTR( node_cxy , &node_ptr->current ); 1002 xptr_t lock_xp = XPTR( node_cxy , &node_ptr->lock ); 1003 xptr_t root_xp = XPTR( node_cxy , &node_ptr->root ); 1004 1005 #if DEBUG_BARRIER_WAIT 1006 uint32_t cycle = (uint32_t)hal_get_cycles(); 1007 uint32_t level = hal_remote_l32( XPTR( node_cxy, &node_ptr->level ) ); 1008 if( cycle > DEBUG_BARRIER_WAIT ) 1009 printk("\n[%s] thread[%x,%x] increments DQT node(%d,%d,%d) / cycle %d\n", 1010 __FUNCTION__ , this->process->pid, this->trdid, 1011 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1012 #endif 1013 1014 // get extended pointer on parent node 1015 xptr_t parent_xp = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent_xp ) ); 1016 1017 // take busylock 1018 remote_busylock_acquire( lock_xp ); 1019 1020 // get sense and arity values from barrier descriptor 1021 sense = hal_remote_l32( sense_xp ); 1022 arity = hal_remote_l32( arity_xp ); 1023 1024 // compute expected value 1025 expected = (sense == 0) ? 1 : 0; 1026 1027 // increment current number of arrived threads / get value before increment 1028 uint32_t current = hal_remote_atomic_add( current_xp , 1 ); 1029 1030 // last arrived thread reset the local node, makes the recursive call 1031 // on parent node, and reactivates all waiting thread when returning. 1032 // other threads block, register in queue, and deschedule. 1033 1034 if ( current == (arity - 1) ) // last thread 1035 { 1036 1037 #if DEBUG_BARRIER_WAIT 1038 if( cycle > DEBUG_BARRIER_WAIT ) 1039 printk("\n[%s] thread[%x,%x] reset DQT node(%d,%d,%d)\n", 1040 __FUNCTION__ , this->process->pid, this->trdid, 1041 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1042 #endif 1043 // reset the current node 1044 hal_remote_s32( sense_xp , expected ); 1045 hal_remote_s32( current_xp , 0 ); 1046 1047 // release busylock protecting the current node 1048 remote_busylock_release( lock_xp ); 1049 1050 // recursive call on parent node when current node is not the root 1051 if( parent_xp != XPTR_NULL) dqt_barrier_increment( parent_xp ); 1052 1053 // unblock all waiting threads on this node 1054 while( xlist_is_empty( root_xp ) == false ) 1055 { 1056 // get pointers on first waiting thread 1057 xptr_t thread_xp = XLIST_FIRST( root_xp , thread_t , wait_list ); 1058 cxy_t thread_cxy = GET_CXY( thread_xp ); 1059 thread_t * thread_ptr = GET_PTR( thread_xp ); 1060 1061 #if (DEBUG_BARRIER_WAIT & 1) 1062 trdid_t trdid = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) ); 1063 process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) ); 1064 pid_t pid = hal_remote_l32( XPTR( thread_cxy , &process->pid ) ); 1065 if( cycle > DEBUG_BARRIER_WAIT ) 1066 printk("\n[%s] thread[%x,%x] unblock thread[%x,%x]\n", 1067 __FUNCTION__, this->process->pid, this->trdid, pid, trdid ); 1068 #endif 1069 // remove waiting thread from queue 1070 xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) ); 1071 1072 // unblock waiting thread 1073 thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC ); 1074 } 1075 } 1076 else // not the last thread 1077 { 1078 // get extended pointer on xlist entry from thread 1079 xptr_t entry_xp = XPTR( local_cxy , &this->wait_list ); 1080 290 1081 // register calling thread in barrier waiting queue 291 xlist_add_last( root_xp , XPTR( local_cxy , &this->wait_list ));1082 xlist_add_last( root_xp , entry_xp ); 292 1083 293 1084 // block calling thread … … 297 1088 remote_busylock_release( lock_xp ); 298 1089 1090 #if DEBUG_BARRIER_WAIT 1091 if( cycle > DEBUG_BARRIER_WAIT ) 1092 printk("\n[%s] thread[%x,%x] blocks on node(%d,%d,%d)\n", 1093 __FUNCTION__ , this->process->pid, this->trdid, 1094 HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); 1095 #endif 299 1096 // deschedule 300 1097 sched_yield("blocked on barrier"); 301 1098 } 302 1099 303 #if DEBUG_BARRIER 304 cycle = (uint32_t)hal_get_cycles(); 305 if( cycle > DEBUG_BARRIER ) 306 printk("\n[DBG] %s : thread %x in process %x exit / barrier %x in cluster %x / cycle %d\n", 307 __FUNCTION__, this->trdid, this->process->pid, barrier_ptr, barrier_cxy, cycle ); 308 #endif 309 310 } // end remote_barrier_wait() 1100 return; 1101 1102 } // end dqt_barrier_decrement() 1103 1104 #if DEBUG_BARRIER_CREATE 1105 1106 //////////////////////////////////////////////////////////////////////////////////////////// 1107 // This debug function displays all DQT nodes in all clusters. 1108 //////////////////////////////////////////////////////////////////////////////////////////// 1109 // @ barrier_xp : extended pointer on DQT barrier descriptor. 1110 //////////////////////////////////////////////////////////////////////////////////////////// 1111 static void dqt_barrier_display( xptr_t barrier_xp ) 1112 { 1113 // get cluster and local pointer on DQT barrier 1114 dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); 1115 cxy_t barrier_cxy = GET_CXY( barrier_xp ); 1116 1117 // get barrier global parameters 1118 uint32_t x_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->x_size ) ); 1119 uint32_t y_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->y_size ) ); 1120 uint32_t nthreads = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->nthreads ) ); 1121 1122 // compute size and number of DQT levels 1123 uint32_t z = (x_size > y_size) ? x_size : y_size; 1124 uint32_t levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5; 1125 1126 printk("\n***** DQT barrier : x_size %d / y_size %d / nthreads %d / levels %d *****\n", 1127 x_size, y_size, nthreads, levels ); 1128 1129 uint32_t x , y , l; 1130 1131 for ( x = 0 ; x < x_size ; x++ ) 1132 { 1133 for ( y = 0 ; y < y_size ; y++ ) 1134 { 1135 printk(" - cluster[%d,%d]\n", x , y ); 1136 1137 for ( l = 0 ; l < levels ; l++ ) 1138 { 1139 // get pointers on target node 1140 xptr_t node_xp = hal_remote_l64( XPTR( barrier_cxy , 1141 &barrier_ptr->node_xp[x][y][l] ) ); 1142 dqt_node_t * node_ptr = GET_PTR( node_xp ); 1143 cxy_t node_cxy = GET_CXY( node_xp ); 1144 1145 if( node_xp != XPTR_NULL ) 1146 { 1147 uint32_t level = hal_remote_l32( XPTR( node_cxy , &node_ptr->level )); 1148 uint32_t arity = hal_remote_l32( XPTR( node_cxy , &node_ptr->arity )); 1149 xptr_t pa_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->parent_xp )); 1150 xptr_t c0_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[0] )); 1151 xptr_t c1_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[1] )); 1152 xptr_t c2_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[2] )); 1153 xptr_t c3_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[3] )); 1154 1155 printk(" . level %d : (%x,%x) / arity %d / P(%x,%x) / C0(%x,%x)" 1156 " C1(%x,%x) / C2(%x,%x) / C3(%x,%x)\n", 1157 level, node_cxy, node_ptr, arity, 1158 GET_CXY(pa_xp), GET_PTR(pa_xp), 1159 GET_CXY(c0_xp), GET_PTR(c0_xp), 1160 GET_CXY(c1_xp), GET_PTR(c1_xp), 1161 GET_CXY(c2_xp), GET_PTR(c2_xp), 1162 GET_CXY(c3_xp), GET_PTR(c3_xp) ); 1163 } 1164 } 1165 } 1166 } 1167 } // end dqt_barrier_display() 1168 1169 #endif -
trunk/kernel/libk/remote_barrier.h
r581 r619 2 2 * remote_barrier.h - POSIX barrier definition. 3 3 * 4 * Author Alain Greiner (2016,2017,2018 )4 * Author Alain Greiner (2016,2017,2018,2019) 5 5 * 6 6 * Copyright (c) UPMC Sorbonne Universites … … 29 29 #include <remote_busylock.h> 30 30 #include <xlist.h> 31 #include <shared_pthread.h> 31 32 32 33 /*************************************************************************************** 33 * This file definesa POSIX compliant barrier.34 * This file defines two implementations for a POSIX compliant barrier. 34 35 * 35 36 * It is used by multi-threaded user applications to synchronise threads running in 36 * different clusters, as all access functions uses hal_remote_l32() / hal_remote_s32() 37 * remote access primitives. 38 * 39 * A barrier is declared by a given user process as a "pthread_barrier_t" global variable. 40 * This user type is implemented as an unsigned long, but the value is not used by the 41 * kernel. ALMOS-MKH uses only the barrier virtual address as an identifier. 42 * For each user barrier, ALMOS-MKH creates a kernel "remote_barrier_t" structure, 43 * dynamically allocated in the reference cluster by the remote_barrier_create() function, 44 * and destroyed by the remote_barrier_destroy() function, using RPC if the calling thread 45 * is not running in the reference cluster. 46 * 47 * The blocking "remote_barrier_wait()" function implements a descheduling policy when 48 * the calling thread is not the last expected thread: the calling thread is registered 49 * in a waiting queue, rooted in the barrier structure, and the the calling thread 50 * is blocked on the THREAD_BLOCKED_USERSYNC condition. The last arrived thread 51 * unblocks all registtered waiting threads. 37 * different clusters. Access functions use RPCs for barrier creation/destruction, 38 * and use remote access primitives for actual synchronisation (wait function). 39 * 40 * A barrier is declared by a given user process as a "pthread_barrier_t" user variable. 41 * This user type is implemented in user space as an unsigned long, but the value is not 42 * used by the kernel. ALMOS-MKH uses only the barrier virtual address as an identifier. 43 * For each user barrier, ALMOS-MKH creates a kernel structure, dynamically allocated 44 * by the "generic_barrier_create()" function, destroyed by the "remote_barrier_destroy()" 45 * function, and used by the "generic_barrier_wait()" function. 46 * 47 * Implementation note: 48 * ALMOS-MKH supports two barrier implementations: 49 * 50 * 1) simple_barrier_t 51 * If the pointer on the barrier attributes is NULL, the barrier is implemented as 52 * a shared variable localized in the reference process cluster. 53 * There is a risk of contention when the number of synchronizing threads is large. 54 * 55 * 2) dqt_barrier_t 56 * If the (x_size, y_size, nthreads) arguments are defined in the barrier attributes, 57 * the barrier is implemented as a hierarchical quad-tree covering all clusters in the 58 * (x_size * ysize) mesh, including cluster (0,0), with nthreads per cluster, and called 59 * DQT : Distributed Quad Tree. This DQT implementation supposes a regular architecture, 60 * and a strong contraint on the threads placement: exactly "nthreads" threads per 61 * cluster in the (x_size * y_size) mesh. 62 * 63 * For both implementations, the blocking "generic_barrier_wait()" function implements 64 * a descheduling policy when the calling thread is not the last expected thread: 65 * the calling thread is registered in a waiting queue, rooted in the barrier structure, 66 * and the the calling thread is blocked on the THREAD_BLOCKED_USERSYNC condition. 67 * The last arrived thread unblocks all registered waiting threads. 52 68 * **************************************************************************************/ 53 69 54 /***************************************************************************************** 55 * This structure defines the barrier descriptor. 56 * - It contains an xlist of all barriers dynamically created by a given process, 57 * rooted in the reference process descriptor. 58 * - It contains the root of another xlist to register all arrived threads. 59 ****************************************************************************************/ 60 61 typedef struct remote_barrier_s 62 { 63 remote_busylock_t lock; /*! lock protecting list of waiting threads */ 64 intptr_t ident; /*! virtual address in user space == identifier */ 65 uint32_t current; /*! number of arrived threads */ 66 uint32_t sense; /*! barrier state (toggle) */ 67 uint32_t nb_threads; /*! number of expected threads */ 68 xlist_entry_t list; /*! member of list of barriers in same process */ 69 xlist_entry_t root; /*! root of list of waiting threads */ 70 71 72 /***************************************************************************************** 73 * generic barrier descriptor and access functions 74 ***************************************************************************************** 75 * This generic structure is used by both the simple and the QOT implementations. 76 * It is implemented in the reference process cluster, and contains 77 * - the barrier identifier, 78 * - the implementation type (simple or QDT), 79 * - an xlist implementing the set of barriers dynamically created by a given process, 80 * - a pointer on the implementation specific descriptor (simple_barrier / sqt_barrier). 81 ****************************************************************************************/ 82 83 typedef struct generic_barrier_s 84 { 85 intptr_t ident; /*! virtual address in user space == identifier */ 86 xlist_entry_t list; /*! member of list of barriers in same process */ 87 bool_t is_dqt; /*! DQT implementation when true */ 88 void * extend; /*! implementation specific barrier descriptor */ 70 89 } 71 remote_barrier_t; 72 90 generic_barrier_t; 73 91 74 92 /***************************************************************************************** … … 76 94 * by its virtual address in a given user process. It makes an associative search, 77 95 * scanning the list of barriers rooted in the reference process descriptor. 96 * It can be used for both simple and DQT barriers, registered in the same list. 78 97 ***************************************************************************************** 79 98 * @ ident : barrier virtual address, used as identifier. 80 99 * @ returns extended pointer on barrier if success / returns XPTR_NULL if not found. 81 100 ****************************************************************************************/ 82 xptr_t remote_barrier_from_ident( intptr_t ident ); 83 84 /***************************************************************************************** 85 * This function implement the pthread_barrier_init() syscall. 86 * It allocates memory for the barrier descriptor in the reference cluster for 87 * the calling process, it initializes the barrier state, and register it in the 88 * list of barriers owned by the reference process. 89 ***************************************************************************************** 90 * @ count : number of expected threads. 91 * @ ident : barrier identifier (virtual address in user space). 92 * @ return 0 if success / return ENOMEM if failure. 93 ****************************************************************************************/ 94 error_t remote_barrier_create( intptr_t ident, 95 uint32_t count ); 96 97 /***************************************************************************************** 98 * This function implement the pthread_barrier_destroy() syscall. 99 * It releases thr memory allocated for the barrier descriptor, and remove the barrier 100 * from the list of barriers owned by the reference process. 101 ***************************************************************************************** 102 * @ barrier_xp : extended pointer on barrier descriptor. 103 ****************************************************************************************/ 104 void remote_barrier_destroy( xptr_t barrier_xp ); 105 106 /***************************************************************************************** 107 * This function implement the pthread_barrier_wait() syscall. 108 * It returns only when the number of expected threads (registered in the barrier 109 * dexcriptor) reach the barrier. 110 ***************************************************************************************** 111 * @ barrier_xp : extended pointer on barrier descriptor. 112 ****************************************************************************************/ 113 void remote_barrier_wait( xptr_t barrier_xp ); 101 xptr_t generic_barrier_from_ident( intptr_t ident ); 102 103 /***************************************************************************************** 104 * This function implements the pthread_barrier_init() syscall. 105 * It allocates and initialises the generic barrier descriptor in the reference process 106 * cluster, and - depending on the <attr> argument, calls the relevant (simple or DQT) 107 * function to allocate and initialize the implementation dependant barrier descriptor. 108 * Finally, it registers the barrier in the reference process xlist of user barriers. 109 * It can be called by a thread running in any cluster, as it use RPC if required. 110 ***************************************************************************************** 111 * @ ident : barrier virtual address, used as identifier. 112 * @ count : number of expected threads. 113 * @ attr : barrier attributes (x_size,y_size,nthreads), used by QDT implementation. 114 * @ returns 0 if success / returns -1 if not found. 115 ****************************************************************************************/ 116 error_t generic_barrier_create( intptr_t ident, 117 uint32_t count, 118 pthread_barrierattr_t * attr ); 119 120 /***************************************************************************************** 121 * This function implements the pthread_barrier_destroy() syscall. 122 * It calls the relevant function (simple or DQT) to release the memory allocated for 123 * the implementation specific barrier descriptor, and releases the memory allocated 124 * for the generic barrier descriptor. 125 * It removes the barrier from the list of barriers rooted in the reference process. 126 * It can be called by a thread running in any cluster, as it use RPC if required. 127 ***************************************************************************************** 128 * @ gen_barrier_xp : extended pointer on generic barrier descriptor. 129 ****************************************************************************************/ 130 void generic_barrier_destroy( xptr_t gen_barrier_xp ); 131 132 /***************************************************************************************** 133 * This blocking function implements the pthread_barrier_wait() syscall. 134 * It calls the relevant function (simple or DQT) depending on the implementation, 135 * and returns only when all expected threads reach the barrier. 136 * It can be called by a thread running in any cluster, as it use remote accesses. 137 ***************************************************************************************** 138 * @ gen_barrier_xp : extended pointer on generic barrier descriptor. 139 ****************************************************************************************/ 140 void generic_barrier_wait( xptr_t gen_barrier_xp ); 141 142 143 144 145 146 147 /***************************************************************************************** 148 * simple barrier descriptor 149 ***************************************************************************************** 150 * This structure defines the simple barrier descriptor. It is localized in the process 151 * reference cluster, as an extension of the generic barrier descriptor. 152 * It implements a toggle barrier remotely accessed by all threads. 153 * It contains the root of the xlist registering all arrived threads. 154 ****************************************************************************************/ 155 156 typedef struct simple_barrier_s 157 { 158 remote_busylock_t lock; /*! lock protecting list of waiting threads */ 159 uint32_t current; /*! number of arrived threads */ 160 uint32_t sense; /*! barrier state (toggle) */ 161 uint32_t arity; /*! number of expected threads */ 162 xlist_entry_t root; /*! root of list of waiting threads */ 163 } 164 simple_barrier_t; 165 166 /***************************************************************************************** 167 * This function allocates memory for the simple barrier descriptor in the reference 168 * cluster of the calling process. It initializes the barrier state and returns 169 * a local pointer on the created simple barrier descriptor in reference cluster. 170 * It can be called by a thread running in any cluster, as it use RPC if required. 171 ***************************************************************************************** 172 * @ count : [in] number of expected threads. 173 * @ return Local pointer on barrier descriptor if success / return NULL if failure. 174 ****************************************************************************************/ 175 simple_barrier_t * simple_barrier_create( uint32_t count ); 176 177 /***************************************************************************************** 178 * This function releases the memory allocated for the simple barrier descriptor. 179 * It can be called by a thread running in any cluster, as it use RPC if required. 180 ***************************************************************************************** 181 * @ barrier_xp : extended pointer on simple barrier descriptor. 182 ****************************************************************************************/ 183 void simple_barrier_destroy( xptr_t barrier_xp ); 184 185 /***************************************************************************************** 186 * This blocking function returns only when all expected threads reach the barrier. 187 * It can be called by a thread running in any cluster, as it use remote accesses. 188 * Waiting threads use a descheduling policy. 189 ***************************************************************************************** 190 * @ barrier_xp : extended pointer on simple barrier descriptor. 191 ****************************************************************************************/ 192 void simple_barrier_wait( xptr_t barrier_xp ); 193 194 195 196 197 198 /***************************************************************************************** 199 * dqt_barrier 200 ***************************************************************************************** 201 * These structuree define the hierarchical DQT barrier, physically distributed in a 202 * mesh of clusters defined by the (x_size, y_size, nthreads) arguments: 203 * . The involved clusters form a mesh [x_size * y_size] 204 * . The lower left involved cluster is cluster(0,0) 205 * . The number of threads per cluster is the same in all clusters. 206 * 207 * Implementation note: 208 * - The quad three is implemented as a three dimensions array of node[x][y][l] 209 * . [x][y] are the cluster coordinates / max values are (DQT_XMAX-1), (DQT_YMAX-1) 210 * . [l] is the node level / 0 for terminal nodes / (DQT_LMAX-1) for the root node 211 * - The dqt_barrier_t is the global barrier descriptor, allocated in the reference 212 * process cluster as an extension of the generic barrier descriptor. It contains a 213 * 3D array of extended pointers on all DQT nodes implementing the DQT barrier. 214 * - The dqt_node_t is a local barrier implementing a togle barrier between all threads 215 * of a given cluster (for a terminal node), or between all representatives of the four 216 * children nodes (for a non terminal node). 217 ****************************************************************************************/ 218 219 #define DQT_XMAX 16 // max number of clusters in a row 220 #define DQT_YMAX 16 // max number of clusters in a column 221 #define DQT_LMAX 5 // max depth of the quad tree 222 223 typedef struct dqt_node_s 224 { 225 remote_busylock_t lock; /*! lock protecting list of waiting threads */ 226 volatile uint32_t sense; /*! barrier state (toggle) */ 227 volatile uint32_t current; /*! number of locally arrived threads */ 228 uint32_t arity; /*! total number of locally expected threads */ 229 uint32_t level; /*! hierarchical level (0 is bottom) */ 230 xptr_t parent_xp; /*! x_pointer on parent node (NULL for root) */ 231 xptr_t child_xp[4]; /*! x_pointer on children node (NULL for bottom) */ 232 xlist_entry_t root; /*! root of list of waiting threads */ 233 } 234 dqt_node_t; 235 236 typedef struct dqt_barrier_s 237 { 238 xptr_t node_xp[DQT_XMAX][DQT_YMAX][DQT_LMAX]; /*! array of xptr on DQT nodes */ 239 240 uint32_t x_size; /*! number of clusters in one row of DQT mesh */ 241 uint32_t y_size; /*! number of clusters in one column of DQT mesh */ 242 uint32_t nthreads; /*! number of expected threads in one cluster */ 243 } 244 dqt_barrier_t; 245 246 /***************************************************************************************** 247 * This function allocates memory for the DQT barrier descriptor in the reference cluster 248 * of the calling process. It allocates also memory in all clusters of the QDT mesh, 249 * to store up to 5 QDT nodes per cluster. 250 * It initializes the barrier descriptor, including initialisation of the parent/children 251 * extended pointers in the distributed QDT nodes. 252 * It returns a local pointer on the QDT barrier descriptor in reference cluster. 253 * It can be called by a thread running in any cluster, as it use RPCs for memory 254 * allocation, and remote access for QDT initialisation. 255 ***************************************************************************************** 256 * @ x_size : [in] number of clusters in a line of DQT mesh. 257 * @ y_size : [in] number of clusters in a column of DQT mesh. 258 * @ nthreads : [in] number of threads per cluster. 259 * @ return Local pointer on barrier descriptor if success / return NULL if failure. 260 ****************************************************************************************/ 261 dqt_barrier_t * dqt_barrier_create( uint32_t x_size, 262 uint32_t y_size, 263 uint32_t nthreads ); 264 265 /***************************************************************************************** 266 * This function releases all memory allocated for the QDT barrier descriptor. 267 * It removes the barrier from the list of barriers rooted in the reference process. 268 * It can be called by a thread running in any cluster, as it use RPCs. 269 ***************************************************************************************** 270 * @ barrier_xp : extended pointer on DQT barrier descriptor. 271 ****************************************************************************************/ 272 void dqt_barrier_destroy( xptr_t barrier_xp ); 273 274 /***************************************************************************************** 275 * This blocking function returns only when all expected threads reach the barrier. 276 * It can be called by a thread running in any cluster, as it use remote accesses. 277 * Waiting threads use a descheduling policy. 278 ***************************************************************************************** 279 * @ barrier_xp : extended pointer on DQT barrier descriptor. 280 ****************************************************************************************/ 281 void dqt_barrier_wait( xptr_t barrier_xp ); 282 114 283 115 284 -
trunk/kernel/libk/remote_busylock.c
r600 r619 101 101 (XPTR( local_cxy , this ) == DEBUG_BUSYLOCK_THREAD_XP) ) 102 102 { 103 // get cluster and local pointer of target thread104 cxy_t thread_cxy = GET_CXY( DEBUG_BUSYLOCK_THREAD_XP );105 thread_t * thread_ptr = GET_PTR( DEBUG_BUSYLOCK_THREAD_XP );106 107 // display message on kernel TXT0108 103 printk("\n[%s] thread[%x,%x] ACQUIRE lock %s\n", 109 __FUNCTION_ , this->process->pid, this->trdid, lock_type_str[type] );104 __FUNCTION__, this->process->pid, this->trdid, lock_type_str[type] ); 110 105 } 111 106 #endif … … 149 144 (XPTR( local_cxy , this ) == DEBUG_BUSYLOCK_THREAD_XP) ) 150 145 { 151 // get cluster and local pointer of target thread152 cxy_t thread_cxy = GET_CXY( DEBUG_BUSYLOCK_THREAD_XP );153 thread_t * thread_ptr = GET_PTR( DEBUG_BUSYLOCK_THREAD_XP );154 155 // display message on kernel TXT0156 146 printk("\n[%s] thread[%x,%x] RELEASE lock %s\n", 157 147 __FUNCTION__, this->process->pid, this->trdid, lock_type_str[type] ); -
trunk/kernel/libk/remote_busylock.h
r603 r619 42 42 * makes an atomic increment on a "ticket" allocator, and keep polling the "current" 43 43 * value until current == ticket. 44 44 * 45 45 * - To release the lock, the owner thread increments the "current" value, 46 46 * decrements its busylocks counter. -
trunk/kernel/libk/remote_mutex.c
r611 r619 138 138 thread_t * this = CURRENT_THREAD; 139 139 if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX ) 140 printk("\n[ DBG] %s : thread %x in %x process /mutex(%x,%x)\n",141 __FUNCTION__, this-> trdid, this->process->pid, local_cxy, mutex_ptr );140 printk("\n[%s] : thread[%x,%x] created mutex(%x,%x)\n", 141 __FUNCTION__, this->process->pid, this->trdid, local_cxy, mutex_ptr ); 142 142 #endif 143 143 … … 173 173 remote_queuelock_release( XPTR( ref_cxy , &ref_ptr->sync_lock ) ); 174 174 175 // release memory allocated for mutex aphoredescriptor175 // release memory allocated for mutex descriptor 176 176 if( mutex_cxy == local_cxy ) // reference is local 177 177 { … … 183 183 else // reference is remote 184 184 { 185 rpc_kcm_free_client( mutex_cxy , mutex_ptr , KMEM_ BARRIER);185 rpc_kcm_free_client( mutex_cxy , mutex_ptr , KMEM_MUTEX ); 186 186 } 187 187 … … 226 226 thread_t * this = CURRENT_THREAD; 227 227 if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX ) 228 printk("\n[ DBG] %s : thread %x in process %xSUCCESS on mutex(%x,%x)\n",229 __FUNCTION__, this-> trdid, this->process->pid, mutex_cxy, mutex_ptr );228 printk("\n[%s] thread[%x,%x] SUCCESS on mutex(%x,%x)\n", 229 __FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr ); 230 230 #endif 231 231 … … 247 247 thread_t * this = CURRENT_THREAD; 248 248 if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX ) 249 printk("\n[ DBG] %s : thread %x in process %xBLOCKED on mutex(%x,%x)\n",250 __FUNCTION__, this-> trdid, this->process->pid, mutex_cxy, mutex_ptr );249 printk("\n[%s] thread[%x,%x] BLOCKED on mutex(%x,%x)\n", 250 __FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr ); 251 251 #endif 252 252 … … 296 296 thread_t * this = CURRENT_THREAD; 297 297 if( (uint32_t)hal_get_cycles() > DEBUG_MUTEX ) 298 printk("\n[ DBG] %s : thread %x in %x processEXIT / mutex(%x,%x)\n",299 __FUNCTION__, this-> trdid, this->process->pid, mutex_cxy, mutex_ptr );298 printk("\n[%s] thread[%x,%x] EXIT / mutex(%x,%x)\n", 299 __FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr ); 300 300 #endif 301 301 … … 320 320 process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) ); 321 321 pid_t pid = hal_remote_l32( XPTR( thread_cxy , &process->pid ) ); 322 printk("\n[ DBG] %s : thread %x in process %xUNBLOCK thread %x in process %d / mutex(%x,%x)\n",323 __FUNCTION__, this-> trdid, this->process->pid, trdid, pid, mutex_cxy, mutex_ptr );322 printk("\n[%s] thread[%x,%x] UNBLOCK thread %x in process %d / mutex(%x,%x)\n", 323 __FUNCTION__, this->process->pid, this->trdid, trdid, pid, mutex_cxy, mutex_ptr ); 324 324 } 325 325 #endif … … 371 371 thread_t * this = CURRENT_THREAD; 372 372 if( (uint32_t)hal_get_cycles() > DEBUG_QUEUELOCK ) 373 printk("\n[ DBG] %s : SUCCESS for thread %x in process %x/ mutex(%x,%x)\n",374 __FUNCTION__, this-> trdid, this->process->pid, mutex_cxy, mutex_ptr );373 printk("\n[%s] SUCCESS for thread[%x,%x] / mutex(%x,%x)\n", 374 __FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr ); 375 375 #endif 376 376 // release busylock protecting mutex state … … 385 385 thread_t * this = CURRENT_THREAD; 386 386 if( (uint32_t)hal_get_cycles() > DEBUG_QUEUELOCK ) 387 printk("\n[ DBG] %s : FAILURE for thread %x in process %x/ mutex(%x,%x)\n",388 __FUNCTION__, this-> trdid, this->process->pid, mutex_cxy, mutex_ptr );387 printk("\n[%s] FAILURE for thread[%x,%x] / mutex(%x,%x)\n", 388 __FUNCTION__, this->process->pid, this->trdid, mutex_cxy, mutex_ptr ); 389 389 #endif 390 390 // release busylock protecting mutex state -
trunk/kernel/libk/user_dir.c
r614 r619 286 286 printk("\n[ERROR] in %s : cannot map vpn %x in GPT\n", 287 287 __FUNCTION__, (vpn + page_id) ); 288 // use the non blocking RPC to delete the remote vseg 289 rpc_desc_t desc; 290 desc.index = RPC_VMM_DELETE_VSEG; 291 desc.responses = 1; 292 desc.thread = CURRENT_THREAD; 293 desc.lid = CURRENT_THREAD->core->lid; 294 desc.blocking = true; 295 desc.args[0] = ref_pid; 296 desc.args[1] = vpn << CONFIG_PPM_PAGE_SHIFT; 297 rpc_vmm_delete_vseg_client( ref_cxy , &desc ); 288 289 // delete the vseg 290 if( ref_cxy == local_cxy) vmm_delete_vseg( ref_pid, vpn<<CONFIG_PPM_PAGE_SHIFT ); 291 else rpc_vmm_delete_vseg_client( ref_cxy, ref_pid, vpn<<CONFIG_PPM_PAGE_SHIFT ); 292 298 293 // release the user_dir descriptor 299 294 req.type = KMEM_DIR; … … 387 382 lpid_t lpid; // process local index 388 383 rpc_desc_t rpc; // rpc descriptor 384 uint32_t responses; // response counter 389 385 390 386 // get pointers on calling process & thread … … 441 437 thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC ); 442 438 443 // initialize RPC descriptor shared fields 444 rpc.responses = 0; 439 // initialize responses counter 440 responses = 0; 441 442 // initialize a shared RPC descriptor 443 // can be shared, because no out arguments 444 rpc.rsp = &responses; 445 445 rpc.blocking = false; 446 446 rpc.index = RPC_VMM_DELETE_VSEG; … … 461 461 462 462 // atomically increment responses counter 463 hal_atomic_add( (void *)&rpc.responses , 1 ); 464 465 // call RPC 466 rpc_vmm_delete_vseg_client( process_cxy , &rpc ); 467 468 } // end list of copies 463 hal_atomic_add( &responses , 1 ); 464 465 // send RPC to target cluster 466 rpc_send( process_cxy , &rpc ); 467 } 469 468 470 469 // release the lock protecting process copies … … 472 471 473 472 // client thread deschedule 474 sched_yield("blocked on rpc_vmm_ unmap_vseg");473 sched_yield("blocked on rpc_vmm_delete_vseg"); 475 474 476 475 // restore IRQs
Note: See TracChangeset
for help on using the changeset viewer.