Changeset 574 for trunk/user/fft/fft.c
- Timestamp:
- Oct 5, 2018, 12:26:30 AM (6 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/user/fft/fft.c
r503 r574 46 46 // 47 47 // Several configuration parameters can be defined below: 48 // - VERBOSE : Print out complex data points arrays. 49 // - CHECK : Perform both FFT and inverse FFT to check output/input. 50 // - DEBUG : Display intermediate results 48 // - PRINT_ARRAY : Print out complex data points arrays. 49 // - CHECK : Perform both FFT and inverse FFT to check output/input. 50 // - DEBUG_MAIN : Display intermediate results in main() 51 // - DEBUG_FFT1D : Display intermediate results in FFT1D() 52 // - DEBUG_ROW : 51 53 // 52 54 // Regarding final instrumentation: … … 85 87 86 88 #define DEFAULT_M 6 87 #define VERBOSE 089 #define MODE COSIN 88 90 #define CHECK 0 89 #define DEBUG_MAIN 190 #define DEBUG_FFT1D 191 #define DEBUG_ ONCE 092 #define MODE COSIN91 #define DEBUG_MAIN 0 // trace main() function (detailed if odd) 92 #define DEBUG_FFT1D 0 // trace FFT1D() function (detailed if odd) 93 #define DEBUG_ROW 0 // trace FFTRow() function (detailed if odd) 94 #define PRINT_ARRAY 0 93 95 94 96 // macro to swap two variables … … 178 180 unsigned int offset_x ); 179 181 180 void FFT 1DOnce( int direction,182 void FFTRow( int direction, 181 183 double * u, 182 184 double ** x, … … 298 300 } 299 301 302 printf("\n[FFT] complete remote_malloc\n"); 303 300 304 // arrays initialisation 301 305 InitX( data , MODE ); … … 303 307 InitT( twid ); 304 308 309 printf("\n[FFT] complete init arrays\n"); 310 305 311 #if CHECK 306 312 ck1 = CheckSum(); 307 313 #endif 308 314 309 #if VERBOSE315 #if PRINT_ARRAY 310 316 printf("\nData values / base = %x\n", &data[0][0] ); 311 317 PrintArray( data , N ); … … 373 379 get_cycle( &start_exec_cycle ); 374 380 init_time = (long)(start_exec_cycle - start_init_cycle); 375 printf("\n[FFT] enter parallel execution / cycle %d\n", (long)start_exec_cycle);381 printf("\n[FFT] main enter parallel execution\n"); 376 382 377 383 // main execute itself the slave() function … … 391 397 { 392 398 #if DEBUG_MAIN 393 printf("\n[FFT] before join for thread %x\n", trdid[tid] ); 394 #endif 395 399 printf("\n[FFT] main join thread %x\n", trdid[tid] ); 400 #endif 396 401 if( pthread_join( trdid[tid] , NULL ) ) 397 402 { … … 400 405 } 401 406 402 #if DEBUG_MAIN403 printf("\n[FFT] after join for thread %x\n", trdid[tid] );404 #endif405 407 } 406 408 } … … 412 414 printf("\n[FFT] complete parallel execution / cycle %d\n", (long)end_exec_cycle ); 413 415 414 #if VERBOSE416 #if PRINT_ARRAY 415 417 printf("\nData values after FFT:\n"); 416 418 PrintArray( data , N ); … … 525 527 526 528 #if CHECK 527 528 529 get_cycle( &barrier_start ); 529 530 pthread_barrier_wait( &barrier ); … … 533 534 534 535 FFT1D( -1 , data , trans , upriv , twid , MyNum , MyFirst , MyLast ); 535 536 536 #endif 537 537 … … 541 541 542 542 // exit if MyNum != 0 543 if( MyNum ) exit( 0 );543 if( MyNum ) pthread_exit( 0 ); 544 544 545 545 } // end slave() … … 753 753 // 3) it transpose (rootN/nthreads) columns from tmp to x. 754 754 // 4) it make (rootN/nthreads) FFT on the x rows. 755 // It calls the FFT 1DOnce() 2*(rootN/nthreads) times to perform the in place FFT755 // It calls the FFTRow() 2*(rootN/nthreads) times to perform the in place FFT 756 756 // on the rootN points contained in a row. 757 757 //////////////////////////////////////////////////////////////////////////////////////// … … 769 769 unsigned long long barrier_stop; 770 770 771 #if DEBUG_FFT1D 772 printf("\n[FFT] %s : thread %x enter / first %d / last %d\n", 773 __FUNCTION__, MyNum, MyFirst, MyLast ); 774 #endif 775 771 776 // transpose (rootN/nthreads) rows from x to tmp 772 777 Transpose( x , tmp , MyFirst , MyLast ); 773 778 774 #if DEBUG_FFT1D 775 printf("\n[FFT] %s : thread %x after first transpose\n", __FUNCTION__, MyNum); 776 if( VERBOSE ) PrintArray( tmp , N ); 779 #if( DEBUG_FFT1D & 1 ) 780 unsigned long long cycle; 781 get_cycle( &cycle ); 782 printf("\n[FFT] %s : thread %x after first transpose / cycle %d\n", 783 __FUNCTION__, MyNum, (unsigned int)cycle ); 784 if( PRINT_ARRAY ) PrintArray( tmp , N ); 777 785 #endif 778 786 … … 781 789 pthread_barrier_wait( &barrier ); 782 790 get_cycle( &barrier_stop ); 783 784 791 sync_time[MyNum] = (long)(barrier_stop - barrier_start); 792 793 #if( DEBUG_FFT1D & 1 ) 794 get_cycle( &cycle ); 795 printf("\n[FFT] %s : thread %x exit barrier after first transpose / cycle %d\n", 796 __FUNCTION__, MyNum, (unsigned int)cycle ); 797 #endif 785 798 786 799 // do FFTs on rows of tmp (i.e. columns of x) and apply twiddle factor 787 800 for (j = MyFirst; j < MyLast; j++) 788 801 { 789 printf("@@@ before FFT1Once / j = %d\n", j ); 790 FFT1DOnce( direction , upriv , tmp , j * rootN ); 791 printf("@@@ after FFT1Once / j = %d\n", j ); 802 FFTRow( direction , upriv , tmp , j * rootN ); 803 792 804 TwiddleOneCol( direction , j , twid , tmp , j * rootN ); 793 printf("@@@ after Twiddle / j = %d\n", j );794 805 } 795 806 796 #if DEBUG_FFT1D807 #if( DEBUG_FFT1D & 1 ) 797 808 printf("\n[FFT] %s : thread %x after first twiddle\n", __FUNCTION__, MyNum); 798 if( VERBOSE) PrintArray( tmp , N );809 if( PRINT_ARRAY ) PrintArray( tmp , N ); 799 810 #endif 800 811 … … 804 815 get_cycle( &barrier_stop ); 805 816 817 #if( DEBUG_FFT1D & 1 ) 818 printf("\n[FFT] %s : thread %x exit barrier after first twiddle\n", __FUNCTION__, MyNum); 819 #endif 820 806 821 sync_time[MyNum] += (long)(barrier_stop - barrier_start); 807 822 … … 809 824 Transpose( tmp , x , MyFirst , MyLast ); 810 825 811 #if DEBUG_FFT1D826 #if( DEBUG_FFT1D & 1 ) 812 827 printf("\n[FFT] %s : thread %x after second transpose\n", __FUNCTION__, MyNum); 813 if( VERBOSE) PrintArray( x , N );828 if( PRINT_ARRAY ) PrintArray( x , N ); 814 829 #endif 815 830 … … 819 834 get_cycle( &barrier_stop ); 820 835 836 #if( DEBUG_FFT1D & 1 ) 837 printf("\n[FFT] %s : thread %x exit barrier after second transpose\n", __FUNCTION__, MyNum); 838 #endif 839 821 840 sync_time[MyNum] += (long)(barrier_stop - barrier_start); 822 841 … … 824 843 for (j = MyFirst; j < MyLast; j++) 825 844 { 826 FFT 1DOnce( direction , upriv , x , j * rootN );845 FFTRow( direction , upriv , x , j * rootN ); 827 846 if (direction == -1) Scale( x , j * rootN ); 828 847 } 829 848 830 #if DEBUG_FFT1D849 #if( DEBUG_FFT1D & 1 ) 831 850 printf("\n[FFT] %s : thread %x after FFT on rows\n", __FUNCTION__, MyNum); 832 if( VERBOSE) PrintArray( x , N );851 if( PRINT_ARRAY ) PrintArray( x , N ); 833 852 #endif 834 853 … … 838 857 get_cycle( &barrier_stop ); 839 858 859 #if( DEBUG_FFT1D & 1 ) 860 printf("\n[FFT] %s : thread %x exit barrier after FFT on rows\n", __FUNCTION__, MyNum); 861 #endif 840 862 sync_time[MyNum] += (long)(barrier_stop - barrier_start); 841 863 … … 843 865 Transpose( x , tmp , MyFirst , MyLast ); 844 866 845 #if DEBUG_FFT1D867 #if( DEBUG_FFT1D & 1 ) 846 868 printf("\n[FFT] %s : thread %x after third transpose\n", __FUNCTION__, MyNum); 847 if( VERBOSE) PrintArray( x , N );869 if( PRINT_ARRAY ) PrintArray( x , N ); 848 870 #endif 849 871 … … 853 875 get_cycle( &barrier_stop ); 854 876 877 #if( DEBUG_FFT1D & 1 ) 878 printf("\n[FFT] %s : thread %x exit barrier after third transpose\n", __FUNCTION__, MyNum); 879 #endif 880 881 sync_time[MyNum] += (long)(barrier_stop - barrier_start); 855 882 sync_time[MyNum] += (long)(barrier_stop - barrier_start); 856 883 … … 859 886 860 887 #if DEBUG_FFT1D 861 printf("\n[FFT] %s : thread %x after final copy\n", __FUNCTION__, MyNum);862 if( VERBOSE) PrintArray( x , N );888 printf("\n[FFT] %s : thread %x completed\n", __FUNCTION__, MyNum); 889 if( PRINT_ARRAY ) PrintArray( x , N ); 863 890 #endif 864 891 … … 1017 1044 // (i.e. rootN points) of the x[nclusters][points_per_cluster] array. 1018 1045 ///////////////////////////////////////////////////////////////////////////// 1019 void FFT 1DOnce( int direction, // 1 direct / -1 inverse1046 void FFTRow( int direction, // 1 direct / -1 inverse 1020 1047 double * u, // private coefs array 1021 1048 double ** x, // array of pointers on distributed buffers … … 1049 1076 unsigned int c_offset_2; // offset for second butterfly input 1050 1077 1051 #if DEBUG_ ONCE1078 #if DEBUG_ROW 1052 1079 unsigned int p; 1053 printf("\n @@@ FFT ROW data in / %d points / offset = %d\n",1054 rootN , offset_x ); 1080 printf("\n[FFT] ROW data in / %d points / offset = %d\n", rootN , offset_x ); 1081 1055 1082 for ( p = 0 ; p < rootN ; p++ ) 1056 1083 { … … 1066 1093 Reverse( x , offset_x ); 1067 1094 1068 #if DEBUG_ONCE 1069 printf("\n@@@ FFT ROW data after reverse\n"); 1095 #if DEBUG_ROW 1096 printf("\n[FFT] ROW data after reverse / %d points / offset = %d\n", rootN , offset_x ); 1097 1070 1098 for ( p = 0 ; p < rootN ; p++ ) 1071 1099 { … … 1090 1118 offset_x2 = offset_x + (k * L + Lstar); // index second point 1091 1119 1092 #if DEBUG_ONCE 1093 printf("\n ### q = %d / k = %d / x1 = %d / x2 = %d\n", 1094 q , k , offset_x1 , offset_x2 ); 1120 #if (DEBUG_ROW & 1) 1121 printf("\n ### q = %d / k = %d / x1 = %d / x2 = %d\n", q , k , offset_x1 , offset_x2 ); 1095 1122 #endif 1096 1123 // makes all in-place butterfly(s) for subset … … 1113 1140 d2_c = x[c_id_2][2*c_offset_2+1]; 1114 1141 1115 #if DEBUG_ONCE1142 #if (DEBUG_ROW & 1) 1116 1143 printf("\n ### d1_in = (%f , %f) / d2_in = (%f , %f) / coef = (%f , %f)\n", 1117 1144 d1_r , d1_c , d2_r , d2_c , omega_r , omega_c); … … 1129 1156 x[c_id_2][2*c_offset_2+1] = d1_c - tau_c; 1130 1157 1131 #if DEBUG_ONCE1158 #if (DEBUG_ROW & 1) 1132 1159 printf("\n ### d1_out = (%f , %f) / d2_out = (%f , %f)\n", 1133 1160 d1_r + tau_r , d1_c + tau_c , d2_r - tau_r , d2_c - tau_c ); … … 1137 1164 } 1138 1165 1139 #if DEBUG_ ONCE1140 printf("\n @@@ FFT ROW data out\n");1166 #if DEBUG_ROW 1167 printf("\n[FFT] ROW data out / %d points / offset = %d\n", rootN , offset_x ); 1141 1168 for ( p = 0 ; p < rootN ; p++ ) 1142 1169 { … … 1149 1176 #endif 1150 1177 1151 } // end FFT 1DOnce()1178 } // end FFTRow() 1152 1179 1153 1180 ///////////////////////////////////////
Note: See TracChangeset
for help on using the changeset viewer.