[1] | 1 | /* |
---|
[563] | 2 | * remote_barrier.c - POSIX barrier implementation. |
---|
[104] | 3 | * |
---|
[619] | 4 | * Author Alain Greiner (2016,2017,2018,2019) |
---|
[1] | 5 | * |
---|
| 6 | * Copyright (c) UPMC Sorbonne Universites |
---|
| 7 | * |
---|
| 8 | * This file is part of ALMOS-MKH. |
---|
| 9 | * |
---|
| 10 | * ALMOS-MKH is free software; you can redistribute it and/or modify it |
---|
| 11 | * under the terms of the GNU General Public License as published by |
---|
| 12 | * the Free Software Foundation; version 2.0 of the License. |
---|
| 13 | * |
---|
| 14 | * ALMOS-MKH is distributed in the hope that it will be useful, but |
---|
| 15 | * WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
---|
| 17 | * General Public License for more details. |
---|
| 18 | * |
---|
| 19 | * You should have received a copy of the GNU General Public License |
---|
| 20 | * along with ALMOS-MKH; if not, write to the Free Software Foundation, |
---|
| 21 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
---|
| 22 | */ |
---|
| 23 | |
---|
[457] | 24 | #include <hal_kernel_types.h> |
---|
[619] | 25 | #include <hal_macros.h> |
---|
[1] | 26 | #include <hal_remote.h> |
---|
[23] | 27 | #include <hal_irqmask.h> |
---|
[563] | 28 | #include <remote_busylock.h> |
---|
[23] | 29 | #include <thread.h> |
---|
| 30 | #include <kmem.h> |
---|
| 31 | #include <printk.h> |
---|
| 32 | #include <process.h> |
---|
| 33 | #include <vmm.h> |
---|
[1] | 34 | #include <remote_barrier.h> |
---|
| 35 | |
---|
[619] | 36 | //////////////////////////////////////////////////// |
---|
| 37 | // generic (implementation independant) functions |
---|
| 38 | //////////////////////////////////////////////////// |
---|
[1] | 39 | |
---|
[23] | 40 | /////////////////////////////////////////////////// |
---|
[619] | 41 | xptr_t generic_barrier_from_ident( intptr_t ident ) |
---|
[23] | 42 | { |
---|
| 43 | // get pointer on local process_descriptor |
---|
| 44 | process_t * process = CURRENT_THREAD->process; |
---|
[1] | 45 | |
---|
[619] | 46 | // get pointers on reference process |
---|
| 47 | xptr_t ref_xp = process->ref_xp; |
---|
[23] | 48 | cxy_t ref_cxy = GET_CXY( ref_xp ); |
---|
| 49 | process_t * ref_ptr = (process_t *)GET_PTR( ref_xp ); |
---|
| 50 | |
---|
[104] | 51 | // get extended pointer on root of barriers list |
---|
[23] | 52 | xptr_t root_xp = XPTR( ref_cxy , &ref_ptr->barrier_root ); |
---|
[104] | 53 | |
---|
[23] | 54 | // scan reference process barriers list |
---|
[619] | 55 | xptr_t iter_xp; |
---|
| 56 | xptr_t barrier_xp; |
---|
| 57 | cxy_t barrier_cxy; |
---|
| 58 | generic_barrier_t * barrier_ptr; |
---|
| 59 | intptr_t current; |
---|
| 60 | bool_t found = false; |
---|
[104] | 61 | |
---|
[23] | 62 | XLIST_FOREACH( root_xp , iter_xp ) |
---|
| 63 | { |
---|
[619] | 64 | barrier_xp = XLIST_ELEMENT( iter_xp , generic_barrier_t , list ); |
---|
[23] | 65 | barrier_cxy = GET_CXY( barrier_xp ); |
---|
[619] | 66 | barrier_ptr = (generic_barrier_t *)GET_PTR( barrier_xp ); |
---|
[104] | 67 | current = (intptr_t)hal_remote_lpt( XPTR( barrier_cxy , &barrier_ptr->ident ) ); |
---|
[23] | 68 | if( ident == current ) |
---|
| 69 | { |
---|
| 70 | found = true; |
---|
| 71 | break; |
---|
| 72 | } |
---|
| 73 | } |
---|
| 74 | |
---|
| 75 | if( found == false ) return XPTR_NULL; |
---|
| 76 | else return barrier_xp; |
---|
| 77 | |
---|
[619] | 78 | } // end generic_barrier_from_ident() |
---|
| 79 | |
---|
| 80 | ////////////////////////////////////////////////////////////// |
---|
| 81 | error_t generic_barrier_create( intptr_t ident, |
---|
| 82 | uint32_t count, |
---|
| 83 | pthread_barrierattr_t * attr ) |
---|
[23] | 84 | { |
---|
[619] | 85 | xptr_t gen_barrier_xp; // extended pointer on generic barrier descriptor |
---|
| 86 | generic_barrier_t * gen_barrier_ptr; // local pointer on generic barrier descriptor |
---|
| 87 | void * barrier; // local pointer on implementation barrier descriptor |
---|
| 88 | kmem_req_t req; // kmem request |
---|
[23] | 89 | |
---|
[619] | 90 | // get pointer on local process_descriptor |
---|
| 91 | process_t * process = CURRENT_THREAD->process; |
---|
[23] | 92 | |
---|
[619] | 93 | // get pointers on reference process |
---|
| 94 | xptr_t ref_xp = process->ref_xp; |
---|
| 95 | cxy_t ref_cxy = GET_CXY( ref_xp ); |
---|
| 96 | process_t * ref_ptr = (process_t *)GET_PTR( ref_xp ); |
---|
[581] | 97 | |
---|
[619] | 98 | // allocate memory for generic barrier descriptor |
---|
| 99 | if( ref_cxy == local_cxy ) // reference cluster is local |
---|
| 100 | { |
---|
| 101 | req.type = KMEM_GEN_BARRIER; |
---|
| 102 | req.flags = AF_ZERO; |
---|
| 103 | gen_barrier_ptr = kmem_alloc( &req ); |
---|
| 104 | gen_barrier_xp = XPTR( local_cxy , gen_barrier_ptr ); |
---|
| 105 | } |
---|
| 106 | else // reference cluster is remote |
---|
| 107 | { |
---|
| 108 | rpc_kcm_alloc_client( ref_cxy, |
---|
| 109 | KMEM_GEN_BARRIER, |
---|
| 110 | &gen_barrier_xp ); |
---|
| 111 | gen_barrier_ptr = GET_PTR( gen_barrier_xp ); |
---|
| 112 | } |
---|
[23] | 113 | |
---|
[619] | 114 | if( gen_barrier_ptr == NULL ) |
---|
| 115 | { |
---|
| 116 | printk("\n[ERROR] in %s : cannot create generic barrier\n", __FUNCTION__ ); |
---|
| 117 | return -1; |
---|
| 118 | } |
---|
[23] | 119 | |
---|
[619] | 120 | // create implementation specific barrier descriptor |
---|
| 121 | if( attr == NULL ) // simple barrier implementation |
---|
[23] | 122 | { |
---|
[619] | 123 | // create simple barrier descriptor |
---|
| 124 | barrier = simple_barrier_create( count ); |
---|
| 125 | |
---|
| 126 | if( barrier == NULL ) |
---|
| 127 | { |
---|
| 128 | printk("\n[ERROR] in %s : cannot create simple barrier\n", __FUNCTION__); |
---|
| 129 | return -1; |
---|
| 130 | } |
---|
[23] | 131 | } |
---|
[619] | 132 | else // QDT barrier implementation |
---|
[23] | 133 | { |
---|
[619] | 134 | uint32_t x_size = attr->x_size; |
---|
| 135 | uint32_t y_size = attr->y_size; |
---|
| 136 | uint32_t nthreads = attr->nthreads; |
---|
[23] | 137 | |
---|
[619] | 138 | // check attributes / count |
---|
| 139 | if( (x_size * y_size * nthreads) != count ) |
---|
| 140 | { |
---|
| 141 | printk("\n[ERROR] in %s : count(%d) != x_size(%d) * y_size(%d) * nthreads(%d)\n", |
---|
| 142 | __FUNCTION__, count, x_size, y_size, nthreads ); |
---|
| 143 | return -1; |
---|
| 144 | } |
---|
[23] | 145 | |
---|
[619] | 146 | // create DQT barrier descriptor |
---|
| 147 | barrier = dqt_barrier_create( x_size , y_size , nthreads ); |
---|
[23] | 148 | |
---|
[619] | 149 | if( barrier == NULL ) |
---|
| 150 | { |
---|
| 151 | printk("\n[ERROR] in %s : cannot create DQT barrier descriptor\n", __FUNCTION__); |
---|
| 152 | return -1; |
---|
| 153 | } |
---|
| 154 | } |
---|
[23] | 155 | |
---|
[619] | 156 | // initialize the generic barrier descriptor |
---|
| 157 | hal_remote_spt( XPTR( ref_cxy , &gen_barrier_ptr->ident ) , (void*)ident ); |
---|
| 158 | hal_remote_s32( XPTR( ref_cxy , &gen_barrier_ptr->is_dqt ) , (attr != NULL) ); |
---|
| 159 | hal_remote_spt( XPTR( ref_cxy , &gen_barrier_ptr->extend ) , barrier ); |
---|
| 160 | |
---|
| 161 | // build extended pointers on lock, root and entry for reference process xlist |
---|
[23] | 162 | xptr_t root_xp = XPTR( ref_cxy , &ref_ptr->barrier_root ); |
---|
[619] | 163 | xptr_t lock_xp = XPTR( ref_cxy , &ref_ptr->sync_lock ); |
---|
| 164 | xptr_t entry_xp = XPTR( ref_cxy , &gen_barrier_ptr->list ); |
---|
[23] | 165 | |
---|
[619] | 166 | // register barrier in reference process xlist of barriers |
---|
| 167 | remote_busylock_acquire( lock_xp ); |
---|
[23] | 168 | xlist_add_first( root_xp , entry_xp ); |
---|
[619] | 169 | remote_busylock_release( lock_xp ); |
---|
[23] | 170 | |
---|
| 171 | return 0; |
---|
| 172 | |
---|
[619] | 173 | } // en generic_barrier_create() |
---|
[581] | 174 | |
---|
[619] | 175 | ///////////////////////////////////////////////////// |
---|
| 176 | void generic_barrier_destroy( xptr_t gen_barrier_xp ) |
---|
[23] | 177 | { |
---|
[619] | 178 | kmem_req_t req; // kmem request |
---|
| 179 | |
---|
| 180 | // get pointer on local process_descriptor |
---|
[23] | 181 | process_t * process = CURRENT_THREAD->process; |
---|
| 182 | |
---|
[619] | 183 | // get pointers on reference process |
---|
| 184 | xptr_t ref_xp = process->ref_xp; |
---|
[23] | 185 | cxy_t ref_cxy = GET_CXY( ref_xp ); |
---|
[619] | 186 | process_t * ref_ptr = GET_PTR( ref_xp ); |
---|
[23] | 187 | |
---|
[619] | 188 | // get cluster and local pointer on generic barrier descriptor |
---|
| 189 | generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp ); |
---|
| 190 | cxy_t gen_barrier_cxy = GET_CXY( gen_barrier_xp ); |
---|
| 191 | |
---|
| 192 | // get barrier type and extension pointer |
---|
| 193 | bool_t is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) ); |
---|
| 194 | void * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) ); |
---|
| 195 | |
---|
| 196 | // build extended pointer on implementation dependant barrier descriptor |
---|
| 197 | xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend ); |
---|
| 198 | |
---|
| 199 | // delete the implementation specific barrier |
---|
| 200 | if( is_dqt ) dqt_barrier_destroy( barrier_xp ); |
---|
| 201 | else simple_barrier_destroy( barrier_xp ); |
---|
| 202 | |
---|
| 203 | // build extended pointers on lock and entry for reference process xlist |
---|
| 204 | xptr_t lock_xp = XPTR( ref_cxy , &ref_ptr->sync_lock ); |
---|
| 205 | xptr_t entry_xp = XPTR( gen_barrier_cxy , &gen_barrier_ptr->list ); |
---|
| 206 | |
---|
| 207 | // remove barrier from reference process xlist |
---|
| 208 | remote_busylock_acquire( lock_xp ); |
---|
| 209 | xlist_unlink( entry_xp ); |
---|
| 210 | remote_busylock_release( lock_xp ); |
---|
| 211 | |
---|
| 212 | // release memory allocated to barrier descriptor |
---|
| 213 | if( gen_barrier_cxy == local_cxy ) |
---|
| 214 | { |
---|
| 215 | req.type = KMEM_GEN_BARRIER; |
---|
| 216 | req.ptr = gen_barrier_ptr; |
---|
| 217 | kmem_free( &req ); |
---|
| 218 | } |
---|
| 219 | else |
---|
| 220 | { |
---|
| 221 | rpc_kcm_free_client( gen_barrier_cxy, |
---|
| 222 | gen_barrier_ptr, |
---|
| 223 | KMEM_GEN_BARRIER ); |
---|
| 224 | } |
---|
| 225 | } // end generic_barrier_destroy() |
---|
| 226 | |
---|
| 227 | ////////////////////////////////////////////////// |
---|
| 228 | void generic_barrier_wait( xptr_t gen_barrier_xp ) |
---|
| 229 | { |
---|
| 230 | // get generic barrier descriptor cluster and pointer |
---|
| 231 | cxy_t gen_barrier_cxy = GET_CXY( gen_barrier_xp ); |
---|
| 232 | generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp ); |
---|
| 233 | |
---|
| 234 | // get implementation type and extend local pointer |
---|
| 235 | bool_t is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) ); |
---|
| 236 | void * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) ); |
---|
| 237 | |
---|
| 238 | // build extended pointer on implementation specific barrier descriptor |
---|
| 239 | xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend ); |
---|
| 240 | |
---|
| 241 | // call the relevant wait function |
---|
| 242 | if( is_dqt ) dqt_barrier_wait( barrier_xp ); |
---|
| 243 | else simple_barrier_wait( barrier_xp ); |
---|
| 244 | |
---|
| 245 | } // end generic_barrier_wait() |
---|
| 246 | |
---|
[623] | 247 | ///////////////////////////////////////////////////// |
---|
| 248 | void generic_barrier_display( xptr_t gen_barrier_xp ) |
---|
| 249 | { |
---|
| 250 | // get cluster and local pointer |
---|
| 251 | generic_barrier_t * gen_barrier_ptr = GET_PTR( gen_barrier_xp ); |
---|
| 252 | cxy_t gen_barrier_cxy = GET_CXY( gen_barrier_xp ); |
---|
[619] | 253 | |
---|
[623] | 254 | // get barrier type and extend pointer |
---|
| 255 | bool_t is_dqt = hal_remote_l32( XPTR( gen_barrier_cxy , &gen_barrier_ptr->is_dqt ) ); |
---|
| 256 | void * extend = hal_remote_lpt( XPTR( gen_barrier_cxy , &gen_barrier_ptr->extend ) ); |
---|
[619] | 257 | |
---|
[623] | 258 | // buil extended pointer on the implementation specific barrier descriptor |
---|
| 259 | xptr_t barrier_xp = XPTR( gen_barrier_cxy , extend ); |
---|
[619] | 260 | |
---|
[623] | 261 | // display barrier state |
---|
| 262 | if( is_dqt ) dqt_barrier_display( barrier_xp ); |
---|
| 263 | else simple_barrier_display( barrier_xp ); |
---|
| 264 | } |
---|
[619] | 265 | |
---|
[623] | 266 | |
---|
| 267 | |
---|
[619] | 268 | ///////////////////////////////////////////////////////////// |
---|
| 269 | // simple barrier functions |
---|
| 270 | ///////////////////////////////////////////////////////////// |
---|
| 271 | |
---|
| 272 | /////////////////////////////////////////////////////////// |
---|
| 273 | simple_barrier_t * simple_barrier_create( uint32_t count ) |
---|
| 274 | { |
---|
| 275 | xptr_t barrier_xp; |
---|
| 276 | simple_barrier_t * barrier; |
---|
| 277 | |
---|
| 278 | // get pointer on local client process descriptor |
---|
| 279 | thread_t * this = CURRENT_THREAD; |
---|
| 280 | process_t * process = this->process; |
---|
| 281 | |
---|
| 282 | // get reference process cluster |
---|
| 283 | xptr_t ref_xp = process->ref_xp; |
---|
| 284 | cxy_t ref_cxy = GET_CXY( ref_xp ); |
---|
| 285 | |
---|
| 286 | // allocate memory for simple barrier descriptor |
---|
| 287 | if( ref_cxy == local_cxy ) // reference is local |
---|
| 288 | { |
---|
| 289 | kmem_req_t req; |
---|
| 290 | req.type = KMEM_SMP_BARRIER; |
---|
| 291 | req.flags = AF_ZERO; |
---|
| 292 | barrier = kmem_alloc( &req ); |
---|
| 293 | barrier_xp = XPTR( local_cxy , barrier ); |
---|
| 294 | } |
---|
| 295 | else // reference is remote |
---|
| 296 | { |
---|
| 297 | rpc_kcm_alloc_client( ref_cxy, |
---|
| 298 | KMEM_SMP_BARRIER, |
---|
| 299 | &barrier_xp ); |
---|
| 300 | barrier = GET_PTR( barrier_xp ); |
---|
| 301 | } |
---|
| 302 | |
---|
| 303 | if( barrier == NULL ) return NULL; |
---|
| 304 | |
---|
| 305 | // initialise simple barrier descriptor |
---|
| 306 | hal_remote_s32 ( XPTR( ref_cxy , &barrier->arity ) , count ); |
---|
| 307 | hal_remote_s32 ( XPTR( ref_cxy , &barrier->current ) , 0 ); |
---|
| 308 | hal_remote_s32 ( XPTR( ref_cxy , &barrier->sense ) , 0 ); |
---|
| 309 | |
---|
| 310 | xlist_root_init ( XPTR( ref_cxy , &barrier->root ) ); |
---|
| 311 | remote_busylock_init( XPTR( ref_cxy , &barrier->lock ) , LOCK_BARRIER_STATE ); |
---|
| 312 | |
---|
| 313 | #if DEBUG_BARRIER_CREATE |
---|
| 314 | uint32_t cycle = (uint32_t)hal_get_cycles(); |
---|
| 315 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 316 | printk("\n[%s] thread[%x,%x] created barrier (%x,%x) / count %d / cycle %d\n", |
---|
| 317 | __FUNCTION__, process->pid, this->trdid, ref_cxy, barrier, count, cycle ); |
---|
| 318 | #endif |
---|
| 319 | |
---|
| 320 | return barrier; |
---|
| 321 | |
---|
| 322 | } // end simple_barrier_create() |
---|
| 323 | |
---|
| 324 | //////////////////////////////////////////////// |
---|
| 325 | void simple_barrier_destroy( xptr_t barrier_xp ) |
---|
| 326 | { |
---|
[23] | 327 | // get barrier cluster and local pointer |
---|
| 328 | cxy_t barrier_cxy = GET_CXY( barrier_xp ); |
---|
[619] | 329 | simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); |
---|
[23] | 330 | |
---|
| 331 | // release memory allocated for barrier descriptor |
---|
[619] | 332 | if( barrier_cxy == local_cxy ) |
---|
[23] | 333 | { |
---|
| 334 | kmem_req_t req; |
---|
[619] | 335 | req.type = KMEM_SMP_BARRIER; |
---|
[23] | 336 | req.ptr = barrier_ptr; |
---|
| 337 | kmem_free( &req ); |
---|
| 338 | } |
---|
[619] | 339 | else |
---|
[23] | 340 | { |
---|
[619] | 341 | rpc_kcm_free_client( barrier_cxy, |
---|
| 342 | barrier_ptr, |
---|
| 343 | KMEM_SMP_BARRIER ); |
---|
[23] | 344 | } |
---|
| 345 | |
---|
[619] | 346 | #if DEBUG_BARRIER_DESTROY |
---|
| 347 | uint32_t cycle = (uint32_t)hal_get_cycles(); |
---|
| 348 | thread_t * this = CURRENT_THREAD; |
---|
| 349 | process_t * process = this->process; |
---|
| 350 | if( cycle > DEBUG_BARRIER_DESTROY ) |
---|
| 351 | printk("\n[%s] thread[%x,%x] deleted barrier (%x,%x) / cycle %d\n", |
---|
| 352 | __FUNCTION__, process->pid, this->trdid, barrier_ptr, barrier_cxy, cycle ); |
---|
| 353 | #endif |
---|
| 354 | |
---|
| 355 | } // end simple_barrier_destroy() |
---|
| 356 | |
---|
[23] | 357 | ///////////////////////////////////////////// |
---|
[619] | 358 | void simple_barrier_wait( xptr_t barrier_xp ) |
---|
[23] | 359 | { |
---|
| 360 | uint32_t expected; |
---|
[581] | 361 | uint32_t sense; |
---|
[23] | 362 | uint32_t current; |
---|
[619] | 363 | uint32_t arity; |
---|
[23] | 364 | xptr_t root_xp; |
---|
[581] | 365 | xptr_t lock_xp; |
---|
| 366 | xptr_t current_xp; |
---|
| 367 | xptr_t sense_xp; |
---|
[619] | 368 | xptr_t arity_xp; |
---|
[23] | 369 | |
---|
[581] | 370 | // get pointer on calling thread |
---|
| 371 | thread_t * this = CURRENT_THREAD; |
---|
[23] | 372 | |
---|
[581] | 373 | // check calling thread can yield |
---|
| 374 | thread_assert_can_yield( this , __FUNCTION__ ); |
---|
[563] | 375 | |
---|
[23] | 376 | // get cluster and local pointer on remote barrier |
---|
[619] | 377 | simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); |
---|
[23] | 378 | cxy_t barrier_cxy = GET_CXY( barrier_xp ); |
---|
| 379 | |
---|
[619] | 380 | #if DEBUG_BARRIER_WAIT |
---|
[581] | 381 | uint32_t cycle = (uint32_t)hal_get_cycles(); |
---|
[619] | 382 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 383 | printk("\n[%s] thread[%x,%x] enter / barrier (%x,%x) / cycle %d\n", |
---|
| 384 | __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); |
---|
[581] | 385 | #endif |
---|
[23] | 386 | |
---|
[619] | 387 | // build extended pointers on various barrier descriptor fields |
---|
| 388 | lock_xp = XPTR( barrier_cxy , &barrier_ptr->lock ); |
---|
| 389 | root_xp = XPTR( barrier_cxy , &barrier_ptr->root ); |
---|
| 390 | current_xp = XPTR( barrier_cxy , &barrier_ptr->current ); |
---|
| 391 | sense_xp = XPTR( barrier_cxy , &barrier_ptr->sense ); |
---|
| 392 | arity_xp = XPTR( barrier_cxy , &barrier_ptr->arity ); |
---|
[23] | 393 | |
---|
[619] | 394 | // take busylock protecting the barrier state |
---|
[581] | 395 | remote_busylock_acquire( lock_xp ); |
---|
| 396 | |
---|
[619] | 397 | // get sense and threads values from barrier descriptor |
---|
| 398 | sense = hal_remote_l32( sense_xp ); |
---|
| 399 | arity = hal_remote_l32( arity_xp ); |
---|
[581] | 400 | |
---|
[104] | 401 | // compute expected value |
---|
[23] | 402 | if ( sense == 0 ) expected = 1; |
---|
| 403 | else expected = 0; |
---|
| 404 | |
---|
[619] | 405 | // increment current number of arrived threads / get value before increment |
---|
[581] | 406 | current = hal_remote_atomic_add( current_xp , 1 ); |
---|
| 407 | |
---|
[23] | 408 | // last thread reset current, toggle sense, and activate all waiting threads |
---|
[104] | 409 | // other threads block, register in queue, and deschedule |
---|
[23] | 410 | |
---|
[619] | 411 | if( current == (arity - 1) ) // last thread |
---|
[23] | 412 | { |
---|
[581] | 413 | hal_remote_s32( current_xp , 0 ); |
---|
| 414 | hal_remote_s32( sense_xp , expected ); |
---|
[23] | 415 | |
---|
[581] | 416 | // unblock all waiting threads |
---|
| 417 | while( xlist_is_empty( root_xp ) == false ) |
---|
[23] | 418 | { |
---|
[581] | 419 | // get pointers on first waiting thread |
---|
| 420 | xptr_t thread_xp = XLIST_FIRST( root_xp , thread_t , wait_list ); |
---|
| 421 | cxy_t thread_cxy = GET_CXY( thread_xp ); |
---|
| 422 | thread_t * thread_ptr = GET_PTR( thread_xp ); |
---|
[104] | 423 | |
---|
[619] | 424 | #if (DEBUG_BARRIER_WAIT & 1) |
---|
| 425 | trdid_t trdid = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) ); |
---|
| 426 | process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) ); |
---|
| 427 | pid_t pid = hal_remote_l32( XPTR( thread_cxy , &process->pid ) ); |
---|
| 428 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 429 | printk("\n[%s] thread[%x,%x] unblocks thread[%x,%x]\n", |
---|
| 430 | __FUNCTION__, this->process->pid, this->trdid, pid, trdid ); |
---|
[581] | 431 | #endif |
---|
[104] | 432 | |
---|
[581] | 433 | // remove waiting thread from queue |
---|
| 434 | xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) ); |
---|
[23] | 435 | |
---|
[581] | 436 | // unblock waiting thread |
---|
| 437 | thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC ); |
---|
| 438 | } |
---|
[23] | 439 | |
---|
[619] | 440 | // release busylock protecting the barrier |
---|
[581] | 441 | remote_busylock_release( lock_xp ); |
---|
[23] | 442 | } |
---|
[104] | 443 | else // not the last thread |
---|
[23] | 444 | { |
---|
[104] | 445 | |
---|
[619] | 446 | #if (DEBUG_BARRIER_WAIT & 1) |
---|
| 447 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 448 | printk("\n[%s] thread[%x,%x] blocks\n", |
---|
| 449 | __FUNCTION__, this->process->pid, this->trdid ); |
---|
[581] | 450 | #endif |
---|
| 451 | |
---|
[23] | 452 | // register calling thread in barrier waiting queue |
---|
[581] | 453 | xlist_add_last( root_xp , XPTR( local_cxy , &this->wait_list ) ); |
---|
[23] | 454 | |
---|
[581] | 455 | // block calling thread |
---|
| 456 | thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC ); |
---|
[23] | 457 | |
---|
[581] | 458 | // release busylock protecting the remote_barrier |
---|
| 459 | remote_busylock_release( lock_xp ); |
---|
| 460 | |
---|
| 461 | // deschedule |
---|
[408] | 462 | sched_yield("blocked on barrier"); |
---|
[581] | 463 | } |
---|
[23] | 464 | |
---|
[619] | 465 | #if DEBUG_BARRIER_WAIT |
---|
[581] | 466 | cycle = (uint32_t)hal_get_cycles(); |
---|
[619] | 467 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 468 | printk("\n[%s] thread[%x,%x] exit / barrier (%x,%x) / cycle %d\n", |
---|
[629] | 469 | __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); |
---|
[581] | 470 | #endif |
---|
| 471 | |
---|
[619] | 472 | } // end simple_barrier_wait() |
---|
| 473 | |
---|
[623] | 474 | ///////////////////////////////////////////////// |
---|
| 475 | void simple_barrier_display( xptr_t barrier_xp ) |
---|
| 476 | { |
---|
| 477 | // get cluster and local pointer on simple barrier |
---|
| 478 | simple_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); |
---|
| 479 | cxy_t barrier_cxy = GET_CXY( barrier_xp ); |
---|
[619] | 480 | |
---|
[623] | 481 | // get barrier global parameters |
---|
| 482 | uint32_t current = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->current ) ); |
---|
| 483 | uint32_t arity = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->arity ) ); |
---|
| 484 | |
---|
| 485 | printk("\n***** simple barrier : %d arrived threads on %d *****\n", |
---|
| 486 | current, arity ); |
---|
| 487 | |
---|
| 488 | } // end simple_barrier_display() |
---|
| 489 | |
---|
| 490 | |
---|
| 491 | |
---|
| 492 | |
---|
[619] | 493 | ///////////////////////////////////////////////////////////// |
---|
| 494 | // DQT barrier functions |
---|
| 495 | ///////////////////////////////////////////////////////////// |
---|
| 496 | |
---|
| 497 | static void dqt_barrier_increment( xptr_t node_xp ); |
---|
| 498 | |
---|
| 499 | #if DEBUG_BARRIER_CREATE |
---|
| 500 | static void dqt_barrier_display( xptr_t barrier_xp ); |
---|
| 501 | #endif |
---|
| 502 | |
---|
| 503 | /////////////////////////////////////////////////////// |
---|
| 504 | dqt_barrier_t * dqt_barrier_create( uint32_t x_size, |
---|
| 505 | uint32_t y_size, |
---|
| 506 | uint32_t nthreads ) |
---|
| 507 | { |
---|
| 508 | page_t * dqt_page; |
---|
| 509 | xptr_t dqt_page_xp; |
---|
| 510 | page_t * rpc_page; |
---|
| 511 | xptr_t rpc_page_xp; |
---|
| 512 | dqt_barrier_t * barrier; // local pointer on DQT barrier descriptor |
---|
| 513 | xptr_t barrier_xp; // extended pointer on DQT barrier descriptor |
---|
| 514 | uint32_t z; // actual DQT size == max(x_size,y_size) |
---|
| 515 | uint32_t levels; // actual number of DQT levels |
---|
| 516 | kmem_req_t req; // kmem request |
---|
| 517 | xptr_t rpc_xp; // extended pointer on RPC descriptors array |
---|
| 518 | rpc_desc_t * rpc; // pointer on RPC descriptors array |
---|
| 519 | uint32_t responses; // responses counter for parallel RPCs |
---|
| 520 | reg_t save_sr; // for critical section |
---|
| 521 | uint32_t x; // X coordinate in QDT mesh |
---|
| 522 | uint32_t y; // Y coordinate in QDT mesh |
---|
| 523 | uint32_t l; // level coordinate |
---|
| 524 | |
---|
| 525 | // compute size and number of DQT levels |
---|
| 526 | z = (x_size > y_size) ? x_size : y_size; |
---|
| 527 | levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5; |
---|
| 528 | |
---|
| 529 | // check x_size and y_size arguments |
---|
[623] | 530 | assert( (z <= 16) , "DQT mesh size larger than (16*16)\n"); |
---|
[619] | 531 | |
---|
| 532 | // check RPC descriptor size |
---|
| 533 | assert( (sizeof(rpc_desc_t) <= 128), "RPC descriptor larger than 128 bytes\n"); |
---|
| 534 | |
---|
| 535 | // check size of an array of 5 DQT nodes |
---|
| 536 | assert( (sizeof(dqt_node_t) * 5 <= 512 ), "array of DQT nodes larger than 512 bytes\n"); |
---|
| 537 | |
---|
| 538 | // check size of DQT barrier descriptor |
---|
| 539 | assert( (sizeof(dqt_barrier_t) <= 0x4000 ), "DQT barrier descriptor larger than 4 pages\n"); |
---|
| 540 | |
---|
| 541 | // get pointer on local client process descriptor |
---|
| 542 | thread_t * this = CURRENT_THREAD; |
---|
| 543 | process_t * process = this->process; |
---|
| 544 | |
---|
| 545 | #if DEBUG_BARRIER_CREATE |
---|
| 546 | uint32_t cycle = (uint32_t)hal_get_cycles(); |
---|
| 547 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 548 | printk("\n[%s] thread[%x,%x] enter : x_size %d / y_size %d / levels %d / cycle %d\n", |
---|
| 549 | __FUNCTION__, process->pid, this->trdid, x_size, y_size, levels, cycle ); |
---|
| 550 | #endif |
---|
| 551 | |
---|
| 552 | // get reference process cluster |
---|
| 553 | xptr_t ref_xp = process->ref_xp; |
---|
| 554 | cxy_t ref_cxy = GET_CXY( ref_xp ); |
---|
| 555 | |
---|
| 556 | // 1. allocate memory for DQT barrier descriptor in reference cluster |
---|
| 557 | if( ref_cxy == local_cxy ) |
---|
| 558 | { |
---|
| 559 | req.type = KMEM_PAGE; |
---|
| 560 | req.size = 2; // 4 pages == 16 Kbytes |
---|
| 561 | req.flags = AF_ZERO; |
---|
| 562 | dqt_page = kmem_alloc( &req ); |
---|
| 563 | dqt_page_xp = XPTR( local_cxy , dqt_page ); |
---|
| 564 | } |
---|
| 565 | else |
---|
| 566 | { |
---|
| 567 | rpc_pmem_get_pages_client( ref_cxy, |
---|
| 568 | 2, |
---|
| 569 | &dqt_page ); |
---|
| 570 | dqt_page_xp = XPTR( ref_cxy , dqt_page ); |
---|
| 571 | } |
---|
| 572 | |
---|
| 573 | if( dqt_page == NULL ) return NULL; |
---|
| 574 | |
---|
| 575 | // get pointers on DQT barrier descriptor |
---|
| 576 | barrier_xp = ppm_page2base( dqt_page_xp ); |
---|
| 577 | barrier = GET_PTR( barrier_xp ); |
---|
| 578 | |
---|
| 579 | // initialize global parameters in DQT barrier descriptor |
---|
| 580 | hal_remote_s32( XPTR( ref_cxy , &barrier->x_size ) , x_size ); |
---|
| 581 | hal_remote_s32( XPTR( ref_cxy , &barrier->y_size ) , x_size ); |
---|
| 582 | hal_remote_s32( XPTR( ref_cxy , &barrier->nthreads ) , nthreads ); |
---|
| 583 | |
---|
| 584 | #if DEBUG_BARRIER_CREATE |
---|
| 585 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 586 | printk("\n[%s] thread[%x,%x] created DQT barrier descriptor at (%x,%x)\n", |
---|
| 587 | __FUNCTION__, process->pid, this->trdid, ref_cxy, barrier ); |
---|
| 588 | #endif |
---|
| 589 | |
---|
| 590 | // 2. allocate memory from local cluster for an array of 256 RPCs descriptors |
---|
| 591 | // cannot share the RPC descriptor, because the returned argument is not shared |
---|
| 592 | req.type = KMEM_PAGE; |
---|
| 593 | req.size = 3; // 8 pages == 32 Kbytes |
---|
| 594 | req.flags = AF_ZERO; |
---|
| 595 | rpc_page = kmem_alloc( &req ); |
---|
| 596 | rpc_page_xp = XPTR( local_cxy , rpc_page ); |
---|
| 597 | |
---|
| 598 | // get pointers on RPC descriptors array |
---|
| 599 | rpc_xp = ppm_page2base( rpc_page_xp ); |
---|
| 600 | rpc = GET_PTR( rpc_xp ); |
---|
| 601 | |
---|
| 602 | #if DEBUG_BARRIER_CREATE |
---|
| 603 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 604 | printk("\n[%s] thread[%x,%x] created RPC descriptors array at (%x,%s)\n", |
---|
| 605 | __FUNCTION__, process->pid, this->trdid, local_cxy, rpc ); |
---|
| 606 | #endif |
---|
| 607 | |
---|
| 608 | // 3. send parallel RPCs to all existing clusters covered by the DQT |
---|
| 609 | // to allocate memory for an array of 5 DQT nodes in each cluster |
---|
| 610 | // (5 nodes per cluster <= 512 bytes per cluster) |
---|
| 611 | |
---|
| 612 | responses = 0; // initialize RPC responses counter |
---|
| 613 | |
---|
| 614 | // mask IRQs |
---|
| 615 | hal_disable_irq( &save_sr); |
---|
| 616 | |
---|
| 617 | // client thread blocks itself |
---|
| 618 | thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC ); |
---|
| 619 | |
---|
| 620 | for ( x = 0 ; x < x_size ; x++ ) |
---|
| 621 | { |
---|
| 622 | for ( y = 0 ; y < y_size ; y++ ) |
---|
| 623 | { |
---|
| 624 | // send RPC to existing clusters only |
---|
| 625 | if( LOCAL_CLUSTER->cluster_info[x][y] ) |
---|
| 626 | { |
---|
| 627 | cxy_t cxy = HAL_CXY_FROM_XY( x , y ); // target cluster identifier |
---|
| 628 | |
---|
| 629 | // build a specific RPC descriptor for each target cluster |
---|
| 630 | rpc[cxy].rsp = &responses; |
---|
| 631 | rpc[cxy].blocking = false; |
---|
| 632 | rpc[cxy].index = RPC_KCM_ALLOC; |
---|
| 633 | rpc[cxy].thread = this; |
---|
| 634 | rpc[cxy].lid = this->core->lid; |
---|
| 635 | rpc[cxy].args[0] = (uint64_t)KMEM_512_BYTES; |
---|
| 636 | |
---|
| 637 | // atomically increment expected responses counter |
---|
| 638 | hal_atomic_add( &responses , 1 ); |
---|
| 639 | |
---|
| 640 | // send a non-blocking RPC to allocate 512 bytes in target cluster |
---|
| 641 | rpc_send( cxy , &rpc[cxy] ); |
---|
| 642 | } |
---|
| 643 | } |
---|
| 644 | } |
---|
| 645 | |
---|
| 646 | #if DEBUG_BARRIER_CREATE |
---|
| 647 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 648 | printk("\n[%s] thread[%x,%x] sent all RPC requests to allocate dqt_nodes array\n", |
---|
| 649 | __FUNCTION__, process->pid, this->trdid ); |
---|
| 650 | #endif |
---|
| 651 | |
---|
| 652 | // client thread deschedule |
---|
| 653 | sched_yield("blocked on parallel rpc_kcm_alloc"); |
---|
| 654 | |
---|
| 655 | // restore IRQs |
---|
| 656 | hal_restore_irq( save_sr); |
---|
| 657 | |
---|
| 658 | // 4. initialize the node_xp[x][y][l] array in DQT barrier descriptor |
---|
| 659 | // the node_xp[x][y][0] value is available in rpc.args[1] |
---|
| 660 | |
---|
| 661 | #if DEBUG_BARRIER_CREATE |
---|
| 662 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 663 | printk("\n[%s] thread[%x,%x] initialises array of pointers on dqt_nodes\n", |
---|
| 664 | __FUNCTION__, process->pid, this->trdid ); |
---|
| 665 | #endif |
---|
| 666 | |
---|
| 667 | for ( x = 0 ; x < x_size ; x++ ) |
---|
| 668 | { |
---|
| 669 | for ( y = 0 ; y < y_size ; y++ ) |
---|
| 670 | { |
---|
| 671 | cxy_t cxy = HAL_CXY_FROM_XY( x , y ); // target cluster identifier |
---|
| 672 | xptr_t array_xp = (xptr_t)rpc[cxy].args[1]; // x_pointer on node array |
---|
| 673 | uint32_t offset = sizeof( dqt_node_t ); // size of a DQT node |
---|
| 674 | |
---|
| 675 | // set values into the node_xp[x][y][l] array |
---|
| 676 | for ( l = 0 ; l < levels ; l++ ) |
---|
| 677 | { |
---|
| 678 | xptr_t node_xp = array_xp + (offset * l); |
---|
| 679 | hal_remote_s64( XPTR( ref_cxy , &barrier->node_xp[x][y][l] ), node_xp ); |
---|
| 680 | |
---|
| 681 | #if DEBUG_BARRIER_CREATE |
---|
| 682 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 683 | printk(" - dqt_node_xp[%d,%d,%d] = (%x,%x) / &dqt_node_xp = %x\n", |
---|
| 684 | x , y , l , GET_CXY( node_xp ), GET_PTR( node_xp ), &barrier->node_xp[x][y][l] ); |
---|
| 685 | #endif |
---|
| 686 | } |
---|
| 687 | } |
---|
| 688 | } |
---|
| 689 | |
---|
| 690 | // 5. release memory locally allocated for the RPCs array |
---|
| 691 | req.type = KMEM_PAGE; |
---|
| 692 | req.ptr = rpc_page; |
---|
| 693 | kmem_free( &req ); |
---|
| 694 | |
---|
| 695 | #if DEBUG_BARRIER_CREATE |
---|
| 696 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 697 | printk("\n[%s] thread[%x,%x] released memory for RPC descriptors array\n", |
---|
| 698 | __FUNCTION__, process->pid, this->trdid ); |
---|
| 699 | #endif |
---|
| 700 | |
---|
| 701 | // 6. initialise all distributed DQT nodes using remote accesses |
---|
| 702 | // and the pointers stored in the node_xp[x][y][l] array |
---|
| 703 | for ( x = 0 ; x < x_size ; x++ ) |
---|
| 704 | { |
---|
| 705 | for ( y = 0 ; y < y_size ; y++ ) |
---|
| 706 | { |
---|
| 707 | // initialize existing clusters only |
---|
| 708 | if( LOCAL_CLUSTER->cluster_info[x][y] ) |
---|
| 709 | { |
---|
| 710 | for ( l = 0 ; l < levels ; l++ ) |
---|
| 711 | { |
---|
| 712 | xptr_t parent_xp; |
---|
| 713 | xptr_t child_xp[4]; |
---|
| 714 | uint32_t arity = 0; |
---|
| 715 | |
---|
| 716 | // get DQT node pointers |
---|
| 717 | xptr_t node_xp = hal_remote_l64( XPTR( ref_cxy, |
---|
| 718 | &barrier->node_xp[x][y][l] ) ); |
---|
| 719 | cxy_t node_cxy = GET_CXY( node_xp ); |
---|
| 720 | dqt_node_t * node_ptr = GET_PTR( node_xp ); |
---|
| 721 | |
---|
| 722 | // compute arity and child_xp[i] |
---|
| 723 | if (l == 0 ) // bottom DQT node |
---|
| 724 | { |
---|
| 725 | arity = nthreads; |
---|
| 726 | |
---|
| 727 | child_xp[0] = XPTR_NULL; |
---|
| 728 | child_xp[1] = XPTR_NULL; |
---|
| 729 | child_xp[2] = XPTR_NULL; |
---|
| 730 | child_xp[3] = XPTR_NULL; |
---|
| 731 | } |
---|
| 732 | else // not a bottom DQT node |
---|
| 733 | { |
---|
| 734 | arity = 0; |
---|
| 735 | |
---|
| 736 | // only few non-bottom nodes must be initialised |
---|
| 737 | if( ((x & ((1<<l)-1)) == 0) && ((y & ((1<<l)-1)) == 0) ) |
---|
| 738 | { |
---|
| 739 | uint32_t cx[4]; // x coordinate for children |
---|
| 740 | uint32_t cy[4]; // y coordinate for children |
---|
| 741 | uint32_t i; |
---|
| 742 | |
---|
| 743 | // the child0 coordinates are equal to the parent coordinates |
---|
| 744 | // other children coordinates depend on the level value |
---|
| 745 | cx[0] = x; |
---|
| 746 | cy[0] = y; |
---|
| 747 | |
---|
| 748 | cx[1] = x; |
---|
| 749 | cy[1] = y + (1 << (l-1)); |
---|
| 750 | |
---|
| 751 | cx[2] = x + (1 << (l-1)); |
---|
| 752 | cy[2] = y; |
---|
| 753 | |
---|
| 754 | cx[3] = x + (1 << (l-1)); |
---|
| 755 | cy[3] = y + (1 << (l-1)); |
---|
| 756 | |
---|
| 757 | for ( i = 0 ; i < 4 ; i++ ) |
---|
| 758 | { |
---|
| 759 | // child pointer is NULL if outside the mesh |
---|
| 760 | if ( (cx[i] < x_size) && (cy[i] < y_size) ) |
---|
| 761 | { |
---|
| 762 | // get child_xp[i] |
---|
| 763 | child_xp[i] = hal_remote_l64( XPTR( ref_cxy, |
---|
| 764 | &barrier->node_xp[cx[i]][cy[i]][l-1] ) ); |
---|
| 765 | |
---|
| 766 | // increment arity |
---|
| 767 | arity++; |
---|
| 768 | } |
---|
| 769 | else |
---|
| 770 | { |
---|
| 771 | child_xp[i] = XPTR_NULL; |
---|
| 772 | } |
---|
| 773 | } |
---|
| 774 | } |
---|
| 775 | } |
---|
| 776 | |
---|
| 777 | // compute parent_xp |
---|
| 778 | if( l == (levels - 1) ) // root DQT node |
---|
| 779 | { |
---|
| 780 | parent_xp = XPTR_NULL; |
---|
| 781 | } |
---|
| 782 | else // not the root |
---|
| 783 | { |
---|
| 784 | uint32_t px = 0; // parent X coordinate |
---|
| 785 | uint32_t py = 0; // parent Y coordinate |
---|
| 786 | bool_t found = false; |
---|
| 787 | |
---|
| 788 | // compute macro_cluster x_min, x_max, y_min, y_max |
---|
| 789 | uint32_t x_min = x & ~((1<<(l+1))-1); |
---|
| 790 | uint32_t x_max = x_min + (1<<(l+1)); |
---|
| 791 | uint32_t y_min = y & ~((1<<(l+1))-1); |
---|
| 792 | uint32_t y_max = y_min + (1<<(l+1)); |
---|
| 793 | |
---|
| 794 | // scan all clusters in macro-cluster[x][y][l] / take first active |
---|
| 795 | for( px = x_min ; px < x_max ; px++ ) |
---|
| 796 | { |
---|
| 797 | for( py = y_min ; py < y_max ; py++ ) |
---|
| 798 | { |
---|
| 799 | if( LOCAL_CLUSTER->cluster_info[px][py] ) found = true; |
---|
| 800 | if( found ) break; |
---|
| 801 | } |
---|
| 802 | if( found ) break; |
---|
| 803 | } |
---|
| 804 | |
---|
| 805 | parent_xp = hal_remote_l64( XPTR( ref_cxy , |
---|
| 806 | &barrier->node_xp[px][py][l+1] ) ); |
---|
| 807 | } |
---|
| 808 | |
---|
| 809 | // initializes the DQT node |
---|
| 810 | hal_remote_s32( XPTR( node_cxy , &node_ptr->arity ) , arity ); |
---|
| 811 | hal_remote_s32( XPTR( node_cxy , &node_ptr->current ) , 0 ); |
---|
| 812 | hal_remote_s32( XPTR( node_cxy , &node_ptr->sense ) , 0 ); |
---|
| 813 | hal_remote_s32( XPTR( node_cxy , &node_ptr->level ) , l ); |
---|
| 814 | hal_remote_s64( XPTR( node_cxy , &node_ptr->parent_xp ) , parent_xp ); |
---|
| 815 | hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[0] ) , child_xp[0] ); |
---|
| 816 | hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[1] ) , child_xp[1] ); |
---|
| 817 | hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[2] ) , child_xp[2] ); |
---|
| 818 | hal_remote_s64( XPTR( node_cxy , &node_ptr->child_xp[3] ) , child_xp[3] ); |
---|
| 819 | |
---|
| 820 | xlist_root_init( XPTR( node_cxy , &node_ptr->root ) ); |
---|
| 821 | |
---|
| 822 | remote_busylock_init( XPTR( node_cxy , &node_ptr->lock ), |
---|
| 823 | LOCK_BARRIER_STATE ); |
---|
| 824 | } |
---|
| 825 | } |
---|
| 826 | } |
---|
| 827 | } |
---|
| 828 | |
---|
| 829 | #if DEBUG_BARRIER_CREATE |
---|
| 830 | cycle = (uint32_t)hal_get_cycles(); |
---|
| 831 | if( cycle > DEBUG_BARRIER_CREATE ) |
---|
| 832 | printk("\n[%s] thread[%x,%x] completed DQT barrier initialisation / cycle %d\n", |
---|
| 833 | __FUNCTION__, process->pid, this->trdid, cycle ); |
---|
| 834 | dqt_barrier_display( barrier_xp ); |
---|
| 835 | #endif |
---|
| 836 | |
---|
| 837 | return barrier; |
---|
| 838 | |
---|
| 839 | } // end dqt_barrier_create() |
---|
| 840 | |
---|
| 841 | /////////////////////////////////////////////// |
---|
| 842 | void dqt_barrier_destroy( xptr_t barrier_xp ) |
---|
| 843 | { |
---|
| 844 | page_t * rpc_page; |
---|
| 845 | xptr_t rpc_page_xp; |
---|
| 846 | rpc_desc_t * rpc; // local pointer on RPC descriptors array |
---|
| 847 | xptr_t rpc_xp; // extended pointer on RPC descriptor array |
---|
| 848 | reg_t save_sr; // for critical section |
---|
| 849 | kmem_req_t req; // kmem request |
---|
| 850 | |
---|
| 851 | thread_t * this = CURRENT_THREAD; |
---|
| 852 | |
---|
| 853 | // get DQT barrier descriptor cluster and local pointer |
---|
| 854 | dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); |
---|
| 855 | cxy_t barrier_cxy = GET_CXY( barrier_xp ); |
---|
| 856 | |
---|
| 857 | #if DEBUG_BARRIER_DESTROY |
---|
| 858 | uint32_t cycle = (uint32_t)hal_get_cycles(); |
---|
| 859 | if( cycle > DEBUG_BARRIER_DESTROY ) |
---|
| 860 | printk("\n[%s] thread[%x,%x] enter for barrier (%x,%x) / cycle %d\n", |
---|
| 861 | __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); |
---|
| 862 | #endif |
---|
| 863 | |
---|
| 864 | // get x_size and y_size global parameters |
---|
| 865 | uint32_t x_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->x_size ) ); |
---|
| 866 | uint32_t y_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->y_size ) ); |
---|
| 867 | |
---|
| 868 | // 1. allocate memory from local cluster for an array of 256 RPCs descriptors |
---|
| 869 | // cannot share the RPC descriptor, because the "buf" argument is not shared |
---|
| 870 | req.type = KMEM_PAGE; |
---|
| 871 | req.size = 3; // 8 pages == 32 Kbytes |
---|
| 872 | req.flags = AF_ZERO; |
---|
| 873 | rpc_page = kmem_alloc( &req ); |
---|
| 874 | rpc_page_xp = XPTR( local_cxy , rpc_page ); |
---|
| 875 | |
---|
| 876 | // get pointers on RPC descriptors array |
---|
| 877 | rpc_xp = ppm_page2base( rpc_page_xp ); |
---|
| 878 | rpc = GET_PTR( rpc_xp ); |
---|
| 879 | |
---|
| 880 | // 2. send parallel RPCs to all existing clusters covered by the DQT |
---|
| 881 | // to release memory allocated for the arrays of DQT nodes in each cluster |
---|
| 882 | |
---|
| 883 | uint32_t responses = 0; // initialize RPC responses counter |
---|
| 884 | |
---|
| 885 | // mask IRQs |
---|
| 886 | hal_disable_irq( &save_sr); |
---|
| 887 | |
---|
| 888 | // client thread blocks itself |
---|
| 889 | thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_RPC ); |
---|
| 890 | |
---|
| 891 | uint32_t x , y; |
---|
| 892 | |
---|
| 893 | #if DEBUG_BARRIER_DESTROY |
---|
| 894 | if( cycle > DEBUG_BARRIER_DESTROY ) |
---|
| 895 | printk("\n[%s] thread[%x,%x] send RPCs to release the distributed dqt_node array\n", |
---|
| 896 | __FUNCTION__, this->process->pid, this->trdid ); |
---|
| 897 | #endif |
---|
| 898 | |
---|
| 899 | for ( x = 0 ; x < x_size ; x++ ) |
---|
| 900 | { |
---|
| 901 | for ( y = 0 ; y < y_size ; y++ ) |
---|
| 902 | { |
---|
| 903 | // send RPC to existing cluster only |
---|
| 904 | if( LOCAL_CLUSTER->cluster_info[x][y] ) |
---|
| 905 | { |
---|
| 906 | // compute target cluster identifier |
---|
| 907 | cxy_t cxy = HAL_CXY_FROM_XY( x , y ); |
---|
| 908 | |
---|
| 909 | // get local pointer on dqt_nodes array in target cluster |
---|
| 910 | xptr_t buf_xp_xp = XPTR( barrier_cxy , &barrier_ptr->node_xp[x][y][0] ); |
---|
| 911 | xptr_t buf_xp = hal_remote_l64( buf_xp_xp ); |
---|
| 912 | void * buf = GET_PTR( buf_xp ); |
---|
| 913 | |
---|
| 914 | assert( (cxy == GET_CXY(buf_xp)) , "bad extended pointer on dqt_nodes array\n" ); |
---|
| 915 | |
---|
| 916 | // build a specific RPC descriptor |
---|
| 917 | rpc[cxy].rsp = &responses; |
---|
| 918 | rpc[cxy].blocking = false; |
---|
| 919 | rpc[cxy].index = RPC_KCM_FREE; |
---|
| 920 | rpc[cxy].thread = this; |
---|
| 921 | rpc[cxy].lid = this->core->lid; |
---|
| 922 | rpc[cxy].args[0] = (uint64_t)(intptr_t)buf; |
---|
| 923 | rpc[cxy].args[1] = (uint64_t)KMEM_512_BYTES; |
---|
| 924 | |
---|
| 925 | // atomically increment expected responses counter |
---|
| 926 | hal_atomic_add( &responses , 1 ); |
---|
| 927 | |
---|
| 928 | #if DEBUG_BARRIER_DESTROY |
---|
| 929 | if( cycle > DEBUG_BARRIER_DESTROY ) |
---|
| 930 | printk(" - target cluster(%d,%d) / buffer %x\n", x, y, buf ); |
---|
| 931 | #endif |
---|
| 932 | // send a non-blocking RPC to release 512 bytes in target cluster |
---|
| 933 | rpc_send( cxy , &rpc[cxy] ); |
---|
| 934 | } |
---|
| 935 | } |
---|
| 936 | } |
---|
| 937 | |
---|
| 938 | // client thread deschedule |
---|
| 939 | sched_yield("blocked on parallel rpc_kcm_free"); |
---|
| 940 | |
---|
| 941 | // restore IRQs |
---|
| 942 | hal_restore_irq( save_sr); |
---|
| 943 | |
---|
| 944 | // 3. release memory locally allocated for the RPC descriptors array |
---|
| 945 | req.type = KMEM_PAGE; |
---|
| 946 | req.ptr = rpc_page; |
---|
| 947 | kmem_free( &req ); |
---|
| 948 | |
---|
| 949 | // 4. release memory allocated for barrier descriptor |
---|
| 950 | xptr_t page_xp = ppm_base2page( barrier_xp ); |
---|
| 951 | page_t * page = GET_PTR( page_xp ); |
---|
| 952 | |
---|
| 953 | if( barrier_cxy == local_cxy ) |
---|
| 954 | { |
---|
| 955 | req.type = KMEM_PAGE; |
---|
| 956 | req.ptr = page; |
---|
| 957 | kmem_free( &req ); |
---|
| 958 | } |
---|
| 959 | else |
---|
| 960 | { |
---|
| 961 | rpc_pmem_release_pages_client( barrier_cxy, |
---|
| 962 | page ); |
---|
| 963 | } |
---|
| 964 | |
---|
| 965 | #if DEBUG_BARRIER_DESTROY |
---|
| 966 | cycle = (uint32_t)hal_get_cycles(); |
---|
| 967 | if( cycle > DEBUG_BARRIER_DESTROY ) |
---|
| 968 | printk("\n[%s] thread[%x,%x] exit for barrier (%x,%x) / cycle %d\n", |
---|
| 969 | __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); |
---|
| 970 | #endif |
---|
| 971 | |
---|
| 972 | } // end dqt_barrier_destroy() |
---|
| 973 | |
---|
| 974 | //////////////////////////////////////////// |
---|
| 975 | void dqt_barrier_wait( xptr_t barrier_xp ) |
---|
| 976 | { |
---|
| 977 | thread_t * this = CURRENT_THREAD; |
---|
| 978 | |
---|
| 979 | // check calling thread can yield |
---|
| 980 | thread_assert_can_yield( this , __FUNCTION__ ); |
---|
| 981 | |
---|
| 982 | // get cluster and local pointer on DQT barrier descriptor |
---|
| 983 | dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); |
---|
| 984 | cxy_t barrier_cxy = GET_CXY( barrier_xp ); |
---|
| 985 | |
---|
| 986 | #if DEBUG_BARRIER_WAIT |
---|
| 987 | uint32_t cycle = (uint32_t)hal_get_cycles(); |
---|
| 988 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 989 | printk("\n[%s] thread[%x,%x] enter / barrier (%x,%x) / cycle %d\n", |
---|
| 990 | __FUNCTION__, this->process->pid, this->trdid, barrier_cxy, barrier_ptr, cycle ); |
---|
| 991 | #endif |
---|
| 992 | |
---|
| 993 | // get extended pointer on local bottom DQT node |
---|
| 994 | uint32_t x = HAL_X_FROM_CXY( local_cxy ); |
---|
| 995 | uint32_t y = HAL_Y_FROM_CXY( local_cxy ); |
---|
| 996 | xptr_t node_xp = hal_remote_l64( XPTR( barrier_cxy , &barrier_ptr->node_xp[x][y][0] ) ); |
---|
| 997 | |
---|
| 998 | // call recursive function to traverse DQT from bottom to root |
---|
| 999 | dqt_barrier_increment( node_xp ); |
---|
| 1000 | |
---|
| 1001 | #if DEBUG_BARRIER_WAIT |
---|
| 1002 | cycle = (uint32_t)hal_get_cycles(); |
---|
| 1003 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 1004 | printk("\n[%s] thread[%x,%x] exit / barrier (%x,%x) / cycle %d\n", |
---|
| 1005 | __FUNCTION__, this->trdid, this->process->pid, barrier_cxy, barrier_ptr, cycle ); |
---|
| 1006 | #endif |
---|
| 1007 | |
---|
| 1008 | } // end dqt_barrier_wait() |
---|
| 1009 | |
---|
[623] | 1010 | ////////////////////////////////////////////// |
---|
| 1011 | void dqt_barrier_display( xptr_t barrier_xp ) |
---|
| 1012 | { |
---|
| 1013 | // get cluster and local pointer on DQT barrier |
---|
| 1014 | dqt_barrier_t * barrier_ptr = GET_PTR( barrier_xp ); |
---|
| 1015 | cxy_t barrier_cxy = GET_CXY( barrier_xp ); |
---|
[619] | 1016 | |
---|
[623] | 1017 | // get barrier global parameters |
---|
| 1018 | uint32_t x_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->x_size ) ); |
---|
| 1019 | uint32_t y_size = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->y_size ) ); |
---|
| 1020 | uint32_t nthreads = hal_remote_l32( XPTR( barrier_cxy , &barrier_ptr->nthreads ) ); |
---|
[619] | 1021 | |
---|
[623] | 1022 | // compute size and number of DQT levels |
---|
| 1023 | uint32_t z = (x_size > y_size) ? x_size : y_size; |
---|
| 1024 | uint32_t levels = (z < 2) ? 1 : (z < 3) ? 2 : (z < 5) ? 3 : (z < 9) ? 4 : 5; |
---|
[619] | 1025 | |
---|
[623] | 1026 | printk("\n***** DQT barrier : x_size %d / y_size %d / nthreads %d / levels %d *****\n", |
---|
| 1027 | x_size, y_size, nthreads, levels ); |
---|
| 1028 | |
---|
| 1029 | uint32_t x , y , l; |
---|
| 1030 | |
---|
| 1031 | for ( x = 0 ; x < x_size ; x++ ) |
---|
| 1032 | { |
---|
| 1033 | for ( y = 0 ; y < y_size ; y++ ) |
---|
| 1034 | { |
---|
| 1035 | printk(" - cluster[%d,%d]\n", x , y ); |
---|
| 1036 | |
---|
| 1037 | for ( l = 0 ; l < levels ; l++ ) |
---|
| 1038 | { |
---|
| 1039 | // get pointers on target node |
---|
| 1040 | xptr_t node_xp = hal_remote_l64( XPTR( barrier_cxy , |
---|
| 1041 | &barrier_ptr->node_xp[x][y][l] ) ); |
---|
| 1042 | dqt_node_t * node_ptr = GET_PTR( node_xp ); |
---|
| 1043 | cxy_t node_cxy = GET_CXY( node_xp ); |
---|
| 1044 | |
---|
| 1045 | if( node_xp != XPTR_NULL ) |
---|
| 1046 | { |
---|
| 1047 | uint32_t level = hal_remote_l32( XPTR( node_cxy , &node_ptr->level )); |
---|
| 1048 | uint32_t arity = hal_remote_l32( XPTR( node_cxy , &node_ptr->arity )); |
---|
| 1049 | uint32_t count = hal_remote_l32( XPTR( node_cxy , &node_ptr->current )); |
---|
| 1050 | xptr_t pa_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->parent_xp )); |
---|
| 1051 | xptr_t c0_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[0] )); |
---|
| 1052 | xptr_t c1_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[1] )); |
---|
| 1053 | xptr_t c2_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[2] )); |
---|
| 1054 | xptr_t c3_xp = hal_remote_l32( XPTR( node_cxy , &node_ptr->child_xp[3] )); |
---|
| 1055 | |
---|
| 1056 | printk(" . level %d : (%x,%x) / %d on %d / P(%x,%x) / C0(%x,%x)" |
---|
| 1057 | " C1(%x,%x) / C2(%x,%x) / C3(%x,%x)\n", |
---|
| 1058 | level, node_cxy, node_ptr, count, arity, |
---|
| 1059 | GET_CXY(pa_xp), GET_PTR(pa_xp), |
---|
| 1060 | GET_CXY(c0_xp), GET_PTR(c0_xp), |
---|
| 1061 | GET_CXY(c1_xp), GET_PTR(c1_xp), |
---|
| 1062 | GET_CXY(c2_xp), GET_PTR(c2_xp), |
---|
| 1063 | GET_CXY(c3_xp), GET_PTR(c3_xp) ); |
---|
| 1064 | } |
---|
| 1065 | } |
---|
| 1066 | } |
---|
| 1067 | } |
---|
| 1068 | } // end dqt_barrier_display() |
---|
| 1069 | |
---|
| 1070 | |
---|
[619] | 1071 | ////////////////////////////////////////////////////////////////////////////////////////// |
---|
[623] | 1072 | // This static (recursive) function is called by the dqt_barrier_wait() function. |
---|
| 1073 | // It traverses the DQT from bottom to root, and decrements the "current" variables. |
---|
| 1074 | // For each traversed node, it blocks and deschedules if it is not the last expected |
---|
| 1075 | // thread. The last arrived thread reset the local node before returning. |
---|
[619] | 1076 | ////////////////////////////////////////////////////////////////////////////////////////// |
---|
| 1077 | static void dqt_barrier_increment( xptr_t node_xp ) |
---|
| 1078 | { |
---|
| 1079 | uint32_t expected; |
---|
| 1080 | uint32_t sense; |
---|
| 1081 | uint32_t arity; |
---|
| 1082 | |
---|
| 1083 | thread_t * this = CURRENT_THREAD; |
---|
| 1084 | |
---|
| 1085 | // get node cluster and local pointer |
---|
| 1086 | dqt_node_t * node_ptr = GET_PTR( node_xp ); |
---|
| 1087 | cxy_t node_cxy = GET_CXY( node_xp ); |
---|
| 1088 | |
---|
| 1089 | // build relevant extended pointers |
---|
| 1090 | xptr_t arity_xp = XPTR( node_cxy , &node_ptr->arity ); |
---|
| 1091 | xptr_t sense_xp = XPTR( node_cxy , &node_ptr->sense ); |
---|
| 1092 | xptr_t current_xp = XPTR( node_cxy , &node_ptr->current ); |
---|
| 1093 | xptr_t lock_xp = XPTR( node_cxy , &node_ptr->lock ); |
---|
| 1094 | xptr_t root_xp = XPTR( node_cxy , &node_ptr->root ); |
---|
| 1095 | |
---|
| 1096 | #if DEBUG_BARRIER_WAIT |
---|
| 1097 | uint32_t cycle = (uint32_t)hal_get_cycles(); |
---|
| 1098 | uint32_t level = hal_remote_l32( XPTR( node_cxy, &node_ptr->level ) ); |
---|
| 1099 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 1100 | printk("\n[%s] thread[%x,%x] increments DQT node(%d,%d,%d) / cycle %d\n", |
---|
| 1101 | __FUNCTION__ , this->process->pid, this->trdid, |
---|
| 1102 | HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); |
---|
| 1103 | #endif |
---|
| 1104 | |
---|
| 1105 | // get extended pointer on parent node |
---|
| 1106 | xptr_t parent_xp = hal_remote_l64( XPTR( node_cxy , &node_ptr->parent_xp ) ); |
---|
| 1107 | |
---|
| 1108 | // take busylock |
---|
| 1109 | remote_busylock_acquire( lock_xp ); |
---|
| 1110 | |
---|
| 1111 | // get sense and arity values from barrier descriptor |
---|
| 1112 | sense = hal_remote_l32( sense_xp ); |
---|
| 1113 | arity = hal_remote_l32( arity_xp ); |
---|
| 1114 | |
---|
| 1115 | // compute expected value |
---|
| 1116 | expected = (sense == 0) ? 1 : 0; |
---|
| 1117 | |
---|
| 1118 | // increment current number of arrived threads / get value before increment |
---|
| 1119 | uint32_t current = hal_remote_atomic_add( current_xp , 1 ); |
---|
| 1120 | |
---|
| 1121 | // last arrived thread reset the local node, makes the recursive call |
---|
| 1122 | // on parent node, and reactivates all waiting thread when returning. |
---|
| 1123 | // other threads block, register in queue, and deschedule. |
---|
| 1124 | |
---|
| 1125 | if ( current == (arity - 1) ) // last thread |
---|
| 1126 | { |
---|
| 1127 | |
---|
| 1128 | #if DEBUG_BARRIER_WAIT |
---|
| 1129 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 1130 | printk("\n[%s] thread[%x,%x] reset DQT node(%d,%d,%d)\n", |
---|
| 1131 | __FUNCTION__ , this->process->pid, this->trdid, |
---|
| 1132 | HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); |
---|
| 1133 | #endif |
---|
| 1134 | // reset the current node |
---|
| 1135 | hal_remote_s32( sense_xp , expected ); |
---|
| 1136 | hal_remote_s32( current_xp , 0 ); |
---|
| 1137 | |
---|
| 1138 | // release busylock protecting the current node |
---|
| 1139 | remote_busylock_release( lock_xp ); |
---|
| 1140 | |
---|
| 1141 | // recursive call on parent node when current node is not the root |
---|
| 1142 | if( parent_xp != XPTR_NULL) dqt_barrier_increment( parent_xp ); |
---|
| 1143 | |
---|
| 1144 | // unblock all waiting threads on this node |
---|
| 1145 | while( xlist_is_empty( root_xp ) == false ) |
---|
| 1146 | { |
---|
| 1147 | // get pointers on first waiting thread |
---|
| 1148 | xptr_t thread_xp = XLIST_FIRST( root_xp , thread_t , wait_list ); |
---|
| 1149 | cxy_t thread_cxy = GET_CXY( thread_xp ); |
---|
| 1150 | thread_t * thread_ptr = GET_PTR( thread_xp ); |
---|
| 1151 | |
---|
| 1152 | #if (DEBUG_BARRIER_WAIT & 1) |
---|
| 1153 | trdid_t trdid = hal_remote_l32( XPTR( thread_cxy , &thread_ptr->trdid ) ); |
---|
| 1154 | process_t * process = hal_remote_lpt( XPTR( thread_cxy , &thread_ptr->process ) ); |
---|
| 1155 | pid_t pid = hal_remote_l32( XPTR( thread_cxy , &process->pid ) ); |
---|
| 1156 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 1157 | printk("\n[%s] thread[%x,%x] unblock thread[%x,%x]\n", |
---|
| 1158 | __FUNCTION__, this->process->pid, this->trdid, pid, trdid ); |
---|
| 1159 | #endif |
---|
| 1160 | // remove waiting thread from queue |
---|
| 1161 | xlist_unlink( XPTR( thread_cxy , &thread_ptr->wait_list ) ); |
---|
| 1162 | |
---|
| 1163 | // unblock waiting thread |
---|
| 1164 | thread_unblock( thread_xp , THREAD_BLOCKED_USERSYNC ); |
---|
| 1165 | } |
---|
| 1166 | } |
---|
| 1167 | else // not the last thread |
---|
| 1168 | { |
---|
| 1169 | // get extended pointer on xlist entry from thread |
---|
| 1170 | xptr_t entry_xp = XPTR( local_cxy , &this->wait_list ); |
---|
| 1171 | |
---|
| 1172 | // register calling thread in barrier waiting queue |
---|
| 1173 | xlist_add_last( root_xp , entry_xp ); |
---|
| 1174 | |
---|
| 1175 | // block calling thread |
---|
| 1176 | thread_block( XPTR( local_cxy , this ) , THREAD_BLOCKED_USERSYNC ); |
---|
| 1177 | |
---|
| 1178 | // release busylock protecting the remote_barrier |
---|
| 1179 | remote_busylock_release( lock_xp ); |
---|
| 1180 | |
---|
| 1181 | #if DEBUG_BARRIER_WAIT |
---|
| 1182 | if( cycle > DEBUG_BARRIER_WAIT ) |
---|
| 1183 | printk("\n[%s] thread[%x,%x] blocks on node(%d,%d,%d)\n", |
---|
| 1184 | __FUNCTION__ , this->process->pid, this->trdid, |
---|
| 1185 | HAL_X_FROM_CXY(node_cxy), HAL_Y_FROM_CXY(node_cxy), level ); |
---|
| 1186 | #endif |
---|
| 1187 | // deschedule |
---|
| 1188 | sched_yield("blocked on barrier"); |
---|
| 1189 | } |
---|
| 1190 | |
---|
| 1191 | return; |
---|
| 1192 | |
---|
| 1193 | } // end dqt_barrier_decrement() |
---|
| 1194 | |
---|
| 1195 | |
---|