Changeset 628 for trunk/user/fft/fft.c
- Timestamp:
- May 6, 2019, 1:28:01 PM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/user/fft/fft.c
r596 r628 28 28 // - RANDOM : data points have pseudo random values 29 29 // 30 // This application uses 4 shared data arrays, that are distributed 31 // in all clusters (one buffer per cluster): 30 // The main parameters for this generic application are the following: 31 // - M : N = 2**M = number of data points / M must be an even number. 32 // - T : nthreads = ncores defined by the hardware / must be power of 2. 33 // 34 // This application uses 4 shared data arrays, that are dynamically 35 // allocated an distributed, using the remote_malloc() function, with 36 // one sub-buffer per cluster: 32 37 // - data[N] contains N input data points, with 2 double per point. 33 38 // - trans[N] contains N intermediate data points, 2 double per point. … … 36 41 // For data, trans, twid, each sub-buffer contains (N/nclusters) points. 37 42 // For umain, each sub-buffer contains (rootN/nclusters) points. 38 //39 // The main parameters for this generic application are the following:40 // - M : N = 2**M = number of data points / M must be an even number.41 // - T : nthreads = ncores defined by the hardware / must be power of 2.42 43 // 43 44 // There is one thread per core. … … 50 51 // - DEBUG_MAIN : Display intermediate results in main() 51 52 // - DEBUG_FFT1D : Display intermediate results in FFT1D() 52 // - DEBUG_ROW :53 // - DEBUG_ROW : Display intermedite results in FFTrow() 53 54 // 54 55 // Regarding final instrumentation: … … 87 88 88 89 #define DEFAULT_M 12 // 4096 data points 89 #define MODE COSIN 90 #define CHECK 0 91 #define DEBUG_MAIN 2 // trace main() function (detailed if odd) 92 #define DEBUG_SLAVE 2 // trace slave() function (detailed if odd) 93 #define DEBUG_FFT1D 2 // trace FFT1D() function (detailed if odd) 90 #define USE_DQT_BARRIER 0 // use DDT barrier if non zero 91 #define MODE COSIN // DATA array initialisation mode 92 #define CHECK 0 93 #define DEBUG_MAIN 1 // trace main() function (detailed if odd) 94 #define DEBUG_SLAVE 0 // trace slave() function (detailed if odd) 95 #define DEBUG_FFT1D 0 // trace FFT1D() function (detailed if odd) 94 96 #define DEBUG_ROW 0 // trace FFTRow() function (detailed if odd) 95 97 #define PRINT_ARRAY 0 … … 138 140 // synchronisation barrier (all threads) 139 141 pthread_barrier_t barrier; 140 pthread_barrierattr_t barrier attr;142 pthread_barrierattr_t barrier_attr; 141 143 142 144 // threads identifiers, attributes, and arguments … … 213 215 void main ( void ) 214 216 { 217 int error; 218 215 219 unsigned int main_cxy; // main thread cluster 216 220 unsigned int main_x; // main thread X coordinate … … 240 244 if( get_config( &x_size , &y_size , &ncores ) ) 241 245 { 242 printf("\n[ FFT ERROR] cannot get hardware configuration\n");246 printf("\n[fft error] cannot get hardware configuration\n"); 243 247 exit( 0 ); 244 248 } … … 247 251 if( (ncores != 1) && (ncores != 2) && (ncores != 4) ) 248 252 { 249 printf("\n[ FFT ERROR] number of cores per cluster must be 1/2/4\n");253 printf("\n[fft error] number of cores per cluster must be 1/2/4\n"); 250 254 exit( 0 ); 251 255 } … … 254 258 if( (x_size != 1) && (x_size != 2) && (x_size != 4) && (x_size != 8) && (x_size != 16) ) 255 259 { 256 printf("\n[ FFT ERROR] x_size must be 1/2/4/8/16\n");260 printf("\n[fft error] x_size must be 1/2/4/8/16\n"); 257 261 exit( 0 ); 258 262 } … … 261 265 if( (y_size != 1) && (y_size != 2) && (y_size != 4) && (y_size != 8) && (y_size != 16) ) 262 266 { 263 printf("\n[ FFT ERROR] y_size must be 1/2/4/8/16\n");267 printf("\n[fft error] y_size must be 1/2/4/8/16\n"); 264 268 exit( 0 ); 265 269 } … … 277 281 if( rootN < nthreads ) 278 282 { 279 printf("\n[ FFT ERROR] sqrt(N) must be larger than T\n");283 printf("\n[fft error] sqrt(N) must be larger than T\n"); 280 284 exit( 0 ); 281 285 } … … 287 291 main_tid = (((main_x * y_size) + main_y) * ncores) + main_lid; 288 292 289 printf("\n[ FFT] starts on core[%x,%d] / %d complex points / %d thread(s) / PID %x\n",293 printf("\n[fft] starts on core[%x,%d] / %d complex points / %d thread(s) / PID %x\n", 290 294 main_cxy, main_lid, N, nthreads, getpid() ); 291 295 … … 308 312 } 309 313 310 printf("\n[ FFT] main completes remote_malloc\n");314 printf("\n[fft] main completes remote_malloc\n"); 311 315 312 316 // arrays initialisation … … 315 319 InitT( twid ); 316 320 317 printf("\n[ FFT] main completes arrays init\n");321 printf("\n[fft] main completes arrays init\n"); 318 322 319 323 #if CHECK … … 335 339 336 340 // initialise barrier 337 barrierattr.x_size = x_size; 338 barrierattr.y_size = y_size; 339 barrierattr.nthreads = ncores; 340 if( pthread_barrier_init( &barrier, &barrierattr , nthreads) ) 341 { 342 printf("\n[FFT ERROR] cannot initialize barrier\n"); 341 if( USE_DQT_BARRIER ) 342 { 343 barrier_attr.x_size = x_size; 344 barrier_attr.y_size = y_size; 345 barrier_attr.nthreads = ncores; 346 error = pthread_barrier_init( &barrier, &barrier_attr , nthreads ); 347 } 348 else 349 { 350 error = pthread_barrier_init( &barrier, NULL , nthreads ); 351 } 352 353 if( error ) 354 { 355 printf("\n[fft error] cannot initialize barrier\n"); 343 356 exit( 0 ); 344 357 } 345 358 346 printf("\n[ FFT] main completes barrier init\n");359 printf("\n[fft] main completes barrier init\n"); 347 360 348 361 // launch other threads to execute the slave() function … … 377 390 &args[tid]) ) // pointer on function arguments 378 391 { 379 printf("\n[ FFT ERROR] creating thread %x\n", tid );392 printf("\n[fft error] creating thread %x\n", tid ); 380 393 exit( 0 ); 381 394 } 382 #if DEBUG_MAIN395 #if (DEBUG_MAIN & 1) 383 396 unsigned long long debug_cycle; 384 397 get_cycle( &debug_cycle ); 385 printf("\n[ FFT] main created thread %xon core[%x,%d] / cycle %d\n",398 printf("\n[fft] main created thread %d on core[%x,%d] / cycle %d\n", 386 399 tid, cxy, lid, (unsigned int)debug_cycle ); 387 400 #endif … … 390 403 } 391 404 } 405 392 406 393 407 // register sequencial initalisation completion cycle … … 395 409 init_time = (unsigned int)(end_init_cycle - start_init_cycle); 396 410 397 printf("\n[ FFT] main enters parallel execution\n");411 printf("\n[fft] main completes threads creation\n"); 398 412 399 413 // main itself executes the slave() function … … 414 428 if( pthread_join( trdid[tid] , NULL ) ) 415 429 { 416 printf("\n[ FFT ERROR] in main thread joining thread %x\n", tid );430 printf("\n[fft error] in main thread joining thread %x\n", tid ); 417 431 exit( 0 ); 418 432 } 419 433 420 #if DEBUG_MAIN421 printf("\n[ FFT] main thread %d joined thread %d\n", main_tid, tid );434 #if (DEBUG_MAIN & 1) 435 printf("\n[fft] main thread %d joined thread %d\n", main_tid, tid ); 422 436 #endif 423 437 … … 441 455 442 456 // instrumentation 457 char name[64]; 458 char path[128]; 443 459 char string[256]; 444 445 snprintf( string , 256 , "/home/fft_%d_%d_%d_%d", x_size , y_size , ncores , N ); 460 int ret; 461 462 // build file name 463 if( USE_DQT_BARRIER ) 464 snprintf( name , 64 , "fft_dqt_%d_%d_%d_%d", x_size , y_size , ncores , N ); 465 else 466 snprintf( name , 64 , "fft_smp_%d_%d_%d_%d", x_size , y_size , ncores , N ); 467 468 // build pathname 469 snprintf( path , 128 , "/home/%s", name ); 446 470 447 471 // open instrumentation file 448 // FILE * f = fopen( string , NULL ); 449 // if ( f == NULL ) 450 // { 451 // printf("\n[FFT ERROR] cannot open instrumentation file %s\n", string ); 452 // exit( 0 ); 453 // } 454 455 snprintf( string , 256 , "\n[FFT] instrumentation : (%dx%dx%d) threads / %d points\n", 456 x_size, y_size, ncores , N ); 457 458 // display on terminal, and save to instrumentation file 459 printf( "%s" , string ); 460 // fprintf( f , string ); 461 472 FILE * f = fopen( path , NULL ); 473 if ( f == NULL ) 474 { 475 printf("\n[fft error] cannot open instrumentation file <%s>\n", path ); 476 exit( 0 ); 477 } 478 printf("\n[fft] file <%s> open\n", path ); 479 480 // display header on terminal, and save to file 481 printf("\n----- %s -----\n", name ); 482 483 ret = fprintf( f , "\n----- %s -----\n", name ); 484 if( ret < 0 ) 485 { 486 printf("\n[fft error] cannot write header to file <%s>\n", path ); 487 exit(0); 488 } 489 490 // display results for each thread on terminal, and save to file 462 491 for (tid = 0 ; tid < nthreads ; tid++) 463 492 { 464 snprintf( string , 256 , " \ntid %d : Init %d / Parallel %d / Sync%d\n",493 snprintf( string , 256 , "- tid %d : Sequencial %d / Parallel %d / Barrier %d\n", 465 494 tid, init_time, parallel_time[tid], sync_time[tid] ); 466 495 467 496 // display on terminal, and save to instrumentation file 468 497 printf("%s" , string ); 469 // fprintf( f , string ); 470 } 471 472 // close instrumentation file and exit 473 // fclose( f ); 474 475 476 /* 477 long min_para = parallel_time[0]; 478 long max_para = parallel_time[0]; 479 long min_sync = sync_time[0]; 480 long max_sync = sync_time[0]; 498 fprintf( f , "%s" , string ); 499 if( ret < 0 ) 500 { 501 printf("\n[fft error] cannot write thread %d to file <%s>\n", tid, path ); 502 exit(0); 503 } 504 } 505 506 // display MIN/MAX values on terminal and save to file 507 unsigned int min_para = parallel_time[0]; 508 unsigned int max_para = parallel_time[0]; 509 unsigned int min_sync = sync_time[0]; 510 unsigned int max_sync = sync_time[0]; 481 511 482 512 for (tid = 1 ; tid < nthreads ; tid++) … … 488 518 } 489 519 490 snprintf( string , 256 , "\n Init ParallelBarrier\n"520 snprintf( string , 256 , "\n Sequencial Parallel Barrier\n" 491 521 "MIN : %d\t | %d\t | %d\t (cycles)\n" 492 522 "MAX : %d\t | %d\t | %d\t (cycles)\n", 493 523 (int)init_time, (int)min_para, (int)min_sync, 494 524 (int)init_time, (int)max_para, (int)max_sync ); 495 */ 496 497 pthread_exit( NULL ); 525 printf("%s", string ); 526 ret = fprintf( f , "%s", string ); 527 if( ret < 0 ) 528 { 529 printf("\n[fft error] cannot write MIN/MAX to file <%s>\n", path ); 530 exit(0); 531 } 532 533 // close instrumentation file 534 ret = fclose( f ); 535 if( ret ) 536 { 537 printf("\n[fft error] cannot close file <%s>\n", path ); 538 exit(0); 539 } 540 printf("\n[sort] file <%s> closed\n", path ); 541 542 exit( 0 ); 498 543 499 544 } // end main() … … 525 570 526 571 #if DEBUG_SLAVE 527 printf("\n[ FFT] %s : thread %x enter / cycle %d\n",572 printf("\n[fft] %s : thread %x enter / cycle %d\n", 528 573 __FUNCTION__, MyNum, (unsigned int)parallel_start ); 529 574 #endif … … 569 614 570 615 #if DEBUG_SLAVE 571 printf("\n[FFT] %s : thread %x exit / parallel_time %d / sync_time %d / cycle %d\n", 572 __FUNCTION__, MyNum, parallel_time[MyNum], sync_time[MyNum], (unsigned int)parallel_stop ); 616 printf("\n[fft] %s : thread %x exit / cycle %d\n", __FUNCTION__, MyNum, parallel_stop ); 573 617 #endif 574 618 … … 805 849 unsigned long long cycle; 806 850 get_cycle( &cycle ); 807 printf("\n[ FFT] %s : thread %x enter / first %d / last %d / cycle %d\n",851 printf("\n[fft] %s : thread %x enter / first %d / last %d / cycle %d\n", 808 852 __FUNCTION__, MyNum, MyFirst, MyLast, (unsigned int)cycle ); 809 853 #endif … … 814 858 #if( DEBUG_FFT1D & 1 ) 815 859 get_cycle( &cycle ); 816 printf("\n[ FFT] %s : thread %x after first transpose / cycle %d\n",860 printf("\n[fft] %s : thread %x after first transpose / cycle %d\n", 817 861 __FUNCTION__, MyNum, (unsigned int)cycle ); 818 862 if( PRINT_ARRAY ) PrintArray( tmp , N ); … … 827 871 #if( DEBUG_FFT1D & 1 ) 828 872 get_cycle( &cycle ); 829 printf("\n[ FFT] %s : thread %x exit barrier after first transpose / cycle %d\n",873 printf("\n[fft] %s : thread %x exit barrier after first transpose / cycle %d\n", 830 874 __FUNCTION__, MyNum, (unsigned int)cycle ); 831 875 #endif … … 840 884 841 885 #if( DEBUG_FFT1D & 1 ) 842 printf("\n[ FFT] %s : thread %x after first twiddle\n", __FUNCTION__, MyNum);886 printf("\n[fft] %s : thread %x after first twiddle\n", __FUNCTION__, MyNum); 843 887 if( PRINT_ARRAY ) PrintArray( tmp , N ); 844 888 #endif … … 850 894 851 895 #if( DEBUG_FFT1D & 1 ) 852 printf("\n[ FFT] %s : thread %x exit barrier after first twiddle\n", __FUNCTION__, MyNum);896 printf("\n[fft] %s : thread %x exit barrier after first twiddle\n", __FUNCTION__, MyNum); 853 897 #endif 854 898 … … 859 903 860 904 #if( DEBUG_FFT1D & 1 ) 861 printf("\n[ FFT] %s : thread %x after second transpose\n", __FUNCTION__, MyNum);905 printf("\n[fft] %s : thread %x after second transpose\n", __FUNCTION__, MyNum); 862 906 if( PRINT_ARRAY ) PrintArray( x , N ); 863 907 #endif … … 869 913 870 914 #if( DEBUG_FFT1D & 1 ) 871 printf("\n[ FFT] %s : thread %x exit barrier after second transpose\n", __FUNCTION__, MyNum);915 printf("\n[fft] %s : thread %x exit barrier after second transpose\n", __FUNCTION__, MyNum); 872 916 #endif 873 917 … … 882 926 883 927 #if( DEBUG_FFT1D & 1 ) 884 printf("\n[ FFT] %s : thread %x after FFT on rows\n", __FUNCTION__, MyNum);928 printf("\n[fft] %s : thread %x after FFT on rows\n", __FUNCTION__, MyNum); 885 929 if( PRINT_ARRAY ) PrintArray( x , N ); 886 930 #endif … … 892 936 893 937 #if( DEBUG_FFT1D & 1 ) 894 printf("\n[ FFT] %s : thread %x exit barrier after FFT on rows\n", __FUNCTION__, MyNum);938 printf("\n[fft] %s : thread %x exit barrier after FFT on rows\n", __FUNCTION__, MyNum); 895 939 #endif 896 940 sync_time[MyNum] += (long)(barrier_stop - barrier_start); … … 900 944 901 945 #if( DEBUG_FFT1D & 1 ) 902 printf("\n[ FFT] %s : thread %x after third transpose\n", __FUNCTION__, MyNum);946 printf("\n[fft] %s : thread %x after third transpose\n", __FUNCTION__, MyNum); 903 947 if( PRINT_ARRAY ) PrintArray( x , N ); 904 948 #endif … … 910 954 911 955 #if( DEBUG_FFT1D & 1 ) 912 printf("\n[ FFT] %s : thread %x exit barrier after third transpose\n", __FUNCTION__, MyNum);956 printf("\n[fft] %s : thread %x exit barrier after third transpose\n", __FUNCTION__, MyNum); 913 957 #endif 914 958 … … 920 964 921 965 #if DEBUG_FFT1D 922 printf("\n[ FFT] %s : thread %x completed\n", __FUNCTION__, MyNum);966 printf("\n[fft] %s : thread %x completed\n", __FUNCTION__, MyNum); 923 967 if( PRINT_ARRAY ) PrintArray( x , N ); 924 968 #endif … … 1111 1155 #if DEBUG_ROW 1112 1156 unsigned int p; 1113 printf("\n[ FFT] ROW data in / %d points / offset = %d\n", rootN , offset_x );1157 printf("\n[fft] ROW data in / %d points / offset = %d\n", rootN , offset_x ); 1114 1158 1115 1159 for ( p = 0 ; p < rootN ; p++ ) … … 1127 1171 1128 1172 #if DEBUG_ROW 1129 printf("\n[ FFT] ROW data after reverse / %d points / offset = %d\n", rootN , offset_x );1173 printf("\n[fft] ROW data after reverse / %d points / offset = %d\n", rootN , offset_x ); 1130 1174 1131 1175 for ( p = 0 ; p < rootN ; p++ ) … … 1198 1242 1199 1243 #if DEBUG_ROW 1200 printf("\n[ FFT] ROW data out / %d points / offset = %d\n", rootN , offset_x );1244 printf("\n[fft] ROW data out / %d points / offset = %d\n", rootN , offset_x ); 1201 1245 for ( p = 0 ; p < rootN ; p++ ) 1202 1246 {
Note: See TracChangeset
for help on using the changeset viewer.