Changeset 588 for trunk/user/fft
- Timestamp:
- Nov 1, 2018, 12:44:35 PM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/user/fft/fft.c
r582 r588 29 29 // 30 30 // This application uses 4 shared data arrays, that are distributed 31 // in all clusters (one sub-buffer per cluster):31 // in all clusters (one buffer per cluster): 32 32 // - data[N] contains N input data points, with 2 double per point. 33 33 // - trans[N] contains N intermediate data points, 2 double per point. … … 86 86 // parameters 87 87 88 #define DEFAULT_M 688 #define DEFAULT_M 14 // 16 K data points 89 89 #define MODE COSIN 90 90 #define CHECK 0 91 #define DEBUG_MAIN 1 // trace main() function (detailed if odd) 92 #define DEBUG_FFT1D 1 // trace FFT1D() function (detailed if odd) 91 #define DEBUG_MAIN 0 // trace main() function (detailed if odd) 92 #define DEBUG_SLAVE 0 // trace slave() function (detailed if odd) 93 #define DEBUG_FFT1D 0 // trace FFT1D() function (detailed if odd) 93 94 #define DEBUG_ROW 0 // trace FFTRow() function (detailed if odd) 94 95 #define PRINT_ARRAY 0 … … 97 98 #define SWAP(a,b) { double tmp; tmp = a; a = b; b = tmp; } 98 99 99 ///////////////////////////////////////////////////////////////////////////////// 100 ///////////////////////////////////////////////////////////////////////////////////// 101 // structure containing the arguments for the slave() function 102 ///////////////////////////////////////////////////////////////////////////////////// 103 104 typedef struct args_s 105 { 106 unsigned int tid; // thread continuous index 107 unsigned int main_tid; // main thread continuous index 108 } 109 args_t; 110 111 ///////////////////////////////////////////////////////////////////////////////////// 100 112 // global variables 101 ///////////////////////////////////////////////////////////////////////////////// 113 ///////////////////////////////////////////////////////////////////////////////////// 102 114 103 115 unsigned int x_size; // number of clusters per row in the mesh … … 120 132 121 133 // instrumentation counters 122 longparallel_time[THREADS_MAX]; // total computation time (per thread)123 long sync_time[THREADS_MAX]; // cumulativewaiting time in barriers (per thread)124 longinit_time; // initialisation time (in main)134 unsigned int parallel_time[THREADS_MAX]; // total computation time (per thread) 135 unsigned int sync_time[THREADS_MAX]; // cumulated waiting time in barriers (per thread) 136 unsigned int init_time; // initialisation time (in main) 125 137 126 138 // synchronisation barrier (all threads) … … 131 143 pthread_t trdid[THREADS_MAX]; // kernel threads identifiers 132 144 pthread_attr_t attr[THREADS_MAX]; // POSIX thread attributes 133 unsigned intargs[THREADS_MAX]; // slave function arguments145 args_t args[THREADS_MAX]; // slave function arguments 134 146 135 147 ///////////////////////////////////////////////////////////////////////////////// … … 137 149 ///////////////////////////////////////////////////////////////////////////////// 138 150 139 void slave( unsigned int * tid);151 void slave( args_t * args ); 140 152 141 153 double CheckSum( void ); … … 215 227 216 228 unsigned long long start_init_cycle; 217 unsigned long long start_exec_cycle; 218 unsigned long long end_exec_cycle; 229 unsigned long long end_init_cycle; 219 230 220 231 #if CHECK … … 224 235 225 236 // get FFT application start cycle 226 if( get_cycle( &start_init_cycle ) ) 227 { 228 printf("[FFT ERROR] cannot get start cycle\n"); 229 } 237 get_cycle( &start_init_cycle ); 230 238 231 239 // get platform parameters to compute nthreads & nclusters … … 279 287 main_tid = (((main_x * y_size) + main_y) * ncores) + main_lid; 280 288 281 printf("\n[FFT] main starts on core[%x,%d] / %d complex points / %d thread(s)\n",282 main_cxy, main_lid, N, nthreads );289 printf("\n[FFT] starts on core[%x,%d] / %d complex points / %d thread(s) / PID %x\n", 290 main_cxy, main_lid, N, nthreads, getpid() ); 283 291 284 292 // allocate memory for the distributed data[i], trans[i], umain[i], twid[i] buffers … … 307 315 InitT( twid ); 308 316 309 printf("\n[FFT] main complete arrays init\n");317 printf("\n[FFT] main completes arrays init\n"); 310 318 311 319 #if CHECK … … 344 352 for (y = 0 ; y < y_size ; y++) 345 353 { 354 // compute cluster identifier 355 cxy = HAL_CXY_FROM_XY( x , y ); 356 346 357 for ( lid = 0 ; lid < ncores ; lid++ ) 347 358 { … … 351 362 // set thread attributes 352 363 attr[tid].attributes = PT_ATTR_CLUSTER_DEFINED | PT_ATTR_CORE_DEFINED; 353 attr[tid].cxy = HAL_CXY_FROM_XY( x , y );364 attr[tid].cxy = cxy; 354 365 attr[tid].lid = lid; 355 366 356 367 // set slave function argument 357 args[tid] = tid; 368 args[tid].tid = tid; 369 args[tid].main_tid = main_tid; 358 370 359 371 // create thread … … 368 380 exit( 0 ); 369 381 } 370 #if DEBUG_MAIN 371 printf("\n[FFT] main created thread %x on core %d in cluster(%d,%d) \n", tid, lid, x, y ); 382 #if DEBUG_MAIN 383 unsigned long long debug_cycle; 384 get_cycle( &debug_cycle ); 385 printf("\n[FFT] main created thread %x on core[%x,%d] / cycle %d\n", 386 tid, cxy, lid, (unsigned int)debug_cycle ); 372 387 #endif 373 388 } … … 377 392 378 393 // register sequencial initalisation completion cycle 379 get_cycle( &start_exec_cycle ); 380 init_time = (long)(start_exec_cycle - start_init_cycle); 381 printf("\n[FFT] main enter parallel execution\n"); 394 get_cycle( &end_init_cycle ); 395 init_time = (unsigned int)(end_init_cycle - start_init_cycle); 396 397 printf("\n[FFT] main enters parallel execution\n"); 382 398 383 // main execute itselfthe slave() function399 // main itself executes the slave() function 384 400 slave( &args[main_tid] ); 385 401 … … 396 412 if( tid != main_tid ) 397 413 { 398 #if DEBUG_MAIN399 printf("\n[FFT] main join thread %x\n", trdid[tid] );400 #endif401 414 if( pthread_join( trdid[tid] , NULL ) ) 402 415 { 403 printf("\n[FFT ERROR] joining thread %x\n", trdid[tid]);416 printf("\n[FFT ERROR] in main thread joining thread %x\n", tid ); 404 417 exit( 0 ); 405 418 } 419 420 #if DEBUG_MAIN 421 printf("\n[FFT] main thread %d joined thread %d\n", main_tid, tid ); 422 #endif 406 423 407 424 } … … 409 426 } 410 427 } 411 412 // register parallel execution completion cycle413 get_cycle( &end_exec_cycle );414 printf("\n[FFT] complete parallel execution / cycle %d\n", (long)end_exec_cycle );415 428 416 429 #if PRINT_ARRAY … … 433 446 434 447 // open instrumentation file 435 436 437 438 439 440 448 // FILE * f = fopen( string , NULL ); 449 // if ( f == NULL ) 450 // { 451 // printf("\n[FFT ERROR] cannot open instrumentation file %s\n", string ); 452 // exit( 0 ); 453 // } 441 454 442 455 snprintf( string , 256 , "\n[FFT] instrumentation : (%dx%dx%d) threads / %d points\n", … … 445 458 // display on terminal, and save to instrumentation file 446 459 printf( "%s" , string ); 447 fprintf( f , string ); 448 460 // fprintf( f , string ); 461 462 for (tid = 0 ; tid < nthreads ; tid++) 463 { 464 snprintf( string , 256 , "\ntid %d : Init %d / Parallel %d / Sync %d\n", 465 tid, init_time, parallel_time[tid], sync_time[tid] ); 466 467 // display on terminal, and save to instrumentation file 468 printf("%s" , string ); 469 // fprintf( f , string ); 470 } 471 472 // close instrumentation file and exit 473 // fclose( f ); 474 475 476 /* 449 477 long min_para = parallel_time[0]; 450 478 long max_para = parallel_time[0]; … … 461 489 462 490 snprintf( string , 256 , "\n Init Parallel Barrier\n" 463 "MIN : %d | %d | %d(cycles)\n"464 "MAX : %d | %d | %d(cycles)\n",491 "MIN : %d\t | %d\t | %d\t (cycles)\n" 492 "MAX : %d\t | %d\t | %d\t (cycles)\n", 465 493 (int)init_time, (int)min_para, (int)min_sync, 466 494 (int)init_time, (int)max_para, (int)max_sync ); 467 468 // display on terminal, and save to instrumentation file 469 printf("%s" , string ); 470 fprintf( f , string ); 471 472 // close instrumentation file and exit 473 fclose( f ); 474 475 exit( 0 ); 495 */ 496 497 pthread_exit( NULL ); 476 498 477 499 } // end main() … … 480 502 // This function is executed in parallel by all threads. 481 503 /////////////////////////////////////////////////////////////// 482 void slave( unsigned int * tid)504 void slave( args_t * args ) 483 505 { 484 506 unsigned int i; 485 unsigned int MyNum; // continuous thread index 507 unsigned int MyNum; // this thread index 508 unsigned int MainNum; // main thread index 486 509 unsigned int MyFirst; // index first row allocated to thread 487 510 unsigned int MyLast; // index last row allocated to thread … … 495 518 unsigned long long barrier_stop; 496 519 497 MyNum = *tid; 498 499 // BARRIER before parallel exec 500 pthread_barrier_wait( &barrier ); 520 MyNum = args->tid; 521 MainNum = args->main_tid; 501 522 502 523 // initialise instrumentation 503 524 get_cycle( ¶llel_start ); 525 526 #if DEBUG_SLAVE 527 printf("\n[FFT] %s : thread %x enter / cycle %d\n", 528 __FUNCTION__, MyNum, (unsigned int)parallel_start ); 529 #endif 504 530 505 531 // allocate and initialise local array upriv[] … … 526 552 get_cycle( &barrier_stop ); 527 553 528 sync_time[MyNum] = (long)(barrier_stop - barrier_start);554 sync_time[MyNum] += (barrier_stop - barrier_start); 529 555 530 556 #if CHECK … … 540 566 // register computation time 541 567 get_cycle( ¶llel_stop ); 542 parallel_time[MyNum] = (long)(parallel_stop - parallel_start); 543 544 // exit if MyNum != 0 545 if( MyNum ) pthread_exit( 0 ); 568 parallel_time[MyNum] = (parallel_stop - parallel_start); 569 570 #if DEBUG_SLAVE 571 printf("\n[FFT] %s : thread %x exit / parallel_time %d / sync_time %d / cycle %d\n", 572 __FUNCTION__, MyNum, parallel_time[MyNum], sync_time[MyNum], (unsigned int)parallel_stop ); 573 #endif 574 575 // exit only if MyNum != MainNum 576 if( MyNum != MainNum ) pthread_exit( NULL ); 546 577 547 578 } // end slave() … … 772 803 773 804 #if DEBUG_FFT1D 774 printf("\n[FFT] %s : thread %x enter / first %d / last %d\n", 775 __FUNCTION__, MyNum, MyFirst, MyLast ); 805 unsigned long long cycle; 806 get_cycle( &cycle ); 807 printf("\n[FFT] %s : thread %x enter / first %d / last %d / cycle %d\n", 808 __FUNCTION__, MyNum, MyFirst, MyLast, (unsigned int)cycle ); 776 809 #endif 777 810 … … 780 813 781 814 #if( DEBUG_FFT1D & 1 ) 782 unsigned long long cycle;783 815 get_cycle( &cycle ); 784 816 printf("\n[FFT] %s : thread %x after first transpose / cycle %d\n", … … 891 923 if( PRINT_ARRAY ) PrintArray( x , N ); 892 924 #endif 893 894 925 895 926 } // end FFT1D()
Note: See TracChangeset
for help on using the changeset viewer.