Changeset 473 for trunk/user/fft
 Timestamp:
 Aug 21, 2018, 6:01:01 PM (6 years ago)
 File:

 1 edited
Legend:
 Unmodified
 Added
 Removed

trunk/user/fft/fft.c
r469 r473 88 88 #define CHECK 0 89 89 #define DEBUG_MAIN 1 90 #define DEBUG_FFT1D 090 #define DEBUG_FFT1D 1 91 91 #define DEBUG_ONCE 0 92 92 #define MODE COSIN … … 102 102 unsigned int y_size; // number of clusters per column in the mesh 103 103 unsigned int ncores; // number of cores per cluster 104 longnthreads; // total number of threads (one thread per core)105 longnclusters; // total number of clusters106 longM = DEFAULT_M; // log2(number of points)107 longN; // number of points (N = 2^M)108 longrootN; // rootN = 2^M/2109 longrows_per_thread; // number of data "rows" handled by a single thread110 longpoints_per_cluster; // number of data points per cluster104 unsigned int nthreads; // total number of threads (one thread per core) 105 unsigned int nclusters; // total number of clusters 106 unsigned int M = DEFAULT_M; // log2(number of points) 107 unsigned int N; // number of points (N = 2^M) 108 unsigned int rootN; // rootN = 2^M/2 109 unsigned int rows_per_thread; // number of data "rows" handled by a single thread 110 unsigned int points_per_cluster; // number of data points per cluster 111 111 112 112 // arrays of pointers on distributed buffers (one subbuffer per cluster) … … 129 129 pthread_t trdid[THREADS_MAX]; // kernel threads identifiers 130 130 pthread_attr_t attr[THREADS_MAX]; // POSIX thread attributes 131 longargs[THREADS_MAX]; // slave function arguments131 unsigned int args[THREADS_MAX]; // slave function arguments 132 132 133 133 ///////////////////////////////////////////////////////////////////////////////// … … 137 137 void slave(); 138 138 139 double CheckSum( double ** x);139 double CheckSum(); 140 140 141 141 void InitX(double ** x , unsigned int mode); … … 145 145 void InitT(double ** u); 146 146 147 long BitReverse( long k ); 148 149 void FFT1D( long direction , double ** x , double ** tmp , double * upriv, 150 double ** twid , long MyNum , long MyFirst , long MyLast ); 151 152 void TwiddleOneCol(long direction, long j, double ** u, double ** x, long offset_x ); 153 154 void Scale( double **x, long offset_x ); 155 156 void Transpose( double ** src, double ** dest, long MyFirst, long MyLast ); 157 158 void Copy( double ** src, double ** dest, long MyFirst , long MyLast ); 159 160 void Reverse( double ** x, long offset_x ); 161 162 void FFT1DOnce( long direction , double * u , double ** x , long offset_x ); 163 164 void PrintArray( double ** x , long size ); 165 166 void SimpleDft( long direction , long size , double ** src , long src_offset , 167 double ** dst , long dst_offset ); 147 unsigned int BitReverse( unsigned int k ); 148 149 void FFT1D( int direction, 150 double ** x, 151 double ** tmp, 152 double * upriv, 153 double ** twid, 154 unsigned int MyNum, 155 unsigned int MyFirst, 156 unsigned int MyLast ); 157 158 void TwiddleOneCol( int direction, 159 unsigned int j, 160 double ** u, 161 double ** x, 162 unsigned int offset_x ); 163 164 void Scale( double ** x, 165 unsigned int offset_x ); 166 167 void Transpose( double ** src, 168 double ** dest, 169 unsigned int MyFirst, 170 unsigned int MyLast ); 171 172 void Copy( double ** src, 173 double ** dest, 174 unsigned int MyFirst, 175 unsigned int MyLast ); 176 177 void Reverse( double ** x, 178 unsigned int offset_x ); 179 180 void FFT1DOnce( int direction, 181 double * u, 182 double ** x, 183 unsigned int offset_x ); 184 185 void PrintArray( double ** x, 186 unsigned int size ); 187 188 void SimpleDft( int direction, 189 unsigned int size, 190 double ** src, 191 unsigned int src_offset, 192 double ** dst, 193 unsigned int dst_offset ); 168 194 169 195 /////////////////////////////////////////////////////////////////// … … 256 282 // allocate memory for the distributed data[i], trans[i], umain[i], twid[i] buffers 257 283 // the index (i) is a continuous cluster index 258 longdata_size = (N / nclusters) * 2 * sizeof(double);259 longcoefs_size = (rootN / nclusters) * 2 * sizeof(double);284 unsigned int data_size = (N / nclusters) * 2 * sizeof(double); 285 unsigned int coefs_size = (rootN / nclusters) * 2 * sizeof(double); 260 286 for (x = 0 ; x < x_size ; x++) 261 287 { … … 278 304 279 305 #if CHECK 280 ck1 = CheckSum( data);306 ck1 = CheckSum(); 281 307 #endif 282 308 … … 298 324 barrierattr.y_size = y_size; 299 325 barrierattr.nthreads = ncores; 300 pthread_barrier_init( &barrier, &barrierattr , nthreads); 326 if( pthread_barrier_init( &barrier, &barrierattr , nthreads) ) 327 { 328 printf("\n[FFT ERROR] cannot initialize barrier\n"); 329 exit( 0 ); 330 } 331 332 printf("\n[FFT] main completes barrier init\n"); 301 333 302 334 // launch other threads to execute the slave() function … … 331 363 } 332 364 #if DEBUG_MAIN 333 printf("\n[FFT] thread %x created\n", trdid[tid] );365 printf("\n[FFT] main created thread %x\n", trdid[tid] ); 334 366 #endif 335 367 } … … 354 386 { 355 387 // compute thread continuous index 356 longtid = (((x * y_size) + y) * ncores) + lid;388 tid = (((x * y_size) + y) * ncores) + lid; 357 389 358 390 if( tid != main_tid ) … … 386 418 387 419 #if CHECK 388 ck3 = CheckSum( data);420 ck3 = CheckSum(); 389 421 printf("\n*** Results ***\n"); 390 422 printf("Checksum difference is %f (%f, %f)\n", ck1  ck3, ck1, ck3); … … 446 478 // This function is executed in parallel by all threads. 447 479 /////////////////////////////////////////////////////////////// 448 void slave( long* tid )449 { 450 longi;451 longMyNum; // continuous thread index452 longMyFirst; // index first row allocated to thread453 longMyLast; // index last row allocated to thread454 double * upriv;455 longc_id;456 longc_offset;480 void slave( unsigned int * tid ) 481 { 482 unsigned int i; 483 unsigned int MyNum; // continuous thread index 484 unsigned int MyFirst; // index first row allocated to thread 485 unsigned int MyLast; // index last row allocated to thread 486 double * upriv; 487 unsigned int c_id; 488 unsigned int c_offset; 457 489 458 490 unsigned long long parallel_start; … … 517 549 // buffer, to the dst[nclusters][points_per_cluster] distributed buffer. 518 550 //////////////////////////////////////////////////////////////////////////////////////// 519 void SimpleDft( long direction,520 longsize, // number of points521 double ** src, // source distributed buffer522 longsrc_offset, // offset in source array523 double ** dst, // destination distributed buffer524 longdst_offset ) // offset in destination array525 { 526 longn , k;527 double phi; // 2*PI*n*k/N528 double u_r; // cos( phi )529 double u_c; // sin( phi )530 double d_r; // Re(data[n])531 double d_c; // Im(data[n])532 double accu_r; // Re(accu)533 double accu_c; // Im(accu)534 longc_id; // distributed buffer cluster index535 longc_offset; // offset in distributed buffer551 void SimpleDft( int direction, // 1 direct / 1 reverse 552 unsigned int size, // number of points 553 double ** src, // source distributed buffer 554 unsigned int src_offset, // offset in source array 555 double ** dst, // destination distributed buffer 556 unsigned int dst_offset ) // offset in destination array 557 { 558 unsigned int n , k; 559 double phi; // 2*PI*n*k/N 560 double u_r; // cos( phi ) 561 double u_c; // sin( phi ) 562 double d_r; // Re(data[n]) 563 double d_c; // Im(data[n]) 564 double accu_r; // Re(accu) 565 double accu_c; // Im(accu) 566 unsigned int c_id; // distributed buffer cluster index 567 unsigned int c_offset; // offset in distributed buffer 536 568 537 569 for ( k = 0 ; k < size ; k++ ) // loop on the output data points … … 551 583 c_id = (src_offset + n) / (points_per_cluster); 552 584 c_offset = (src_offset + n) % (points_per_cluster); 553 d_r = data[c_id][2*c_offset];554 d_c = data[c_id][2*c_offset+1];585 d_r = src[c_id][2*c_offset]; 586 d_c = src[c_id][2*c_offset+1]; 555 587 556 588 // increment accu … … 575 607 } // end SimpleDft() 576 608 577 ///////////////// ///////////578 double CheckSum( double ** x)579 { 580 longi , j;609 ///////////////// 610 double CheckSum() 611 { 612 unsigned int i , j; 581 613 double cks; 582 longc_id;583 longc_offset;614 unsigned int c_id; 615 unsigned int c_offset; 584 616 585 617 cks = 0.0; … … 602 634 unsigned int mode ) 603 635 { 604 longi , j;605 longc_id;606 longc_offset;607 longindex;636 unsigned int i , j; 637 unsigned int c_id; 638 unsigned int c_offset; 639 unsigned int index; 608 640 609 641 for ( j = 0 ; j < rootN ; j++ ) // loop on row index … … 618 650 if ( mode == RANDOM ) 619 651 { 620 data[c_id][2*c_offset] = ( (double)rand() ) / 65536;621 data[c_id][2*c_offset+1] = ( (double)rand() ) / 65536;652 x[c_id][2*c_offset] = ( (double)rand() ) / 65536; 653 x[c_id][2*c_offset+1] = ( (double)rand() ) / 65536; 622 654 } 623 655 … … 627 659 { 628 660 double phi = (double)( 2 * PI * index) / N; 629 data[c_id][2*c_offset] = cos( phi );630 data[c_id][2*c_offset+1] = sin( phi );661 x[c_id][2*c_offset] = cos( phi ); 662 x[c_id][2*c_offset+1] = sin( phi ); 631 663 } 632 664 … … 634 666 if ( mode == CONSTANT ) 635 667 { 636 data[c_id][2*c_offset] = 1.0;637 data[c_id][2*c_offset+1] = 0.0;668 x[c_id][2*c_offset] = 1.0; 669 x[c_id][2*c_offset+1] = 0.0; 638 670 } 639 671 } … … 644 676 void InitU( double ** u ) 645 677 { 646 longq;647 longj;648 longbase;649 longn1;650 longc_id;651 longc_offset;678 unsigned int q; 679 unsigned int j; 680 unsigned int base; 681 unsigned int n1; 682 unsigned int c_id; 683 unsigned int c_offset; 652 684 double phi; 653 longstop = 0;654 655 for (q = 0 ; (( 1 << q) < N) && (stop == 0) ; q++)685 unsigned int stop = 0; 686 687 for (q = 0 ; ((unsigned int)(1 << q) < N) && (stop == 0) ; q++) 656 688 { 657 689 n1 = 1 << q; … … 673 705 void InitT( double ** u ) 674 706 { 675 longi, j;676 longindex;677 longc_id;678 longc_offset;707 unsigned int i, j; 708 unsigned int index; 709 unsigned int c_id; 710 unsigned int c_offset; 679 711 double phi; 680 712 … … 697 729 // This function returns an index value that is the bit reverse of the input value. 698 730 //////////////////////////////////////////////////////////////////////////////////////// 699 long BitReverse( longk )700 { 701 longi;702 longj;703 longtmp;731 unsigned int BitReverse( unsigned int k ) 732 { 733 unsigned int i; 734 unsigned int j; 735 unsigned int tmp; 704 736 705 737 j = 0; … … 724 756 // on the rootN points contained in a row. 725 757 //////////////////////////////////////////////////////////////////////////////////////// 726 void FFT1D( long direction, // direct : 1 / inverse :1727 double ** x, // input & output distributed data points array728 double ** tmp, // auxiliary distributed data points array729 double *upriv, // local array containing coefs for rootN FFT730 double ** twid, // distributed arrays containing N twiddle factors731 long MyNum,732 longMyFirst,733 longMyLast )734 { 735 longj;758 void FFT1D( int direction, // direct 1 / inverse 1 759 double ** x, // input & output distributed data points array 760 double ** tmp, // auxiliary distributed data points array 761 double * upriv, // local array containing coefs for rootN FFT 762 double ** twid, // distributed arrays containing N twiddle factors 763 unsigned int MyNum, // thread continuous index 764 unsigned int MyFirst, 765 unsigned int MyLast ) 766 { 767 unsigned int j; 736 768 unsigned long long barrier_start; 737 769 unsigned long long barrier_stop; … … 741 773 742 774 #if DEBUG_FFT1D 743 printf("\n @@@ tmp after first transpose\n");744 PrintArray( tmp , N );775 printf("\n[FFT] %s : thread %x after first transpose\n", __FUNCTION__, MyNum); 776 if( VERBOSE ) PrintArray( tmp , N ); 745 777 #endif 746 778 … … 755 787 for (j = MyFirst; j < MyLast; j++) 756 788 { 789 printf("@@@ before FFT1Once / j = %d\n", j ); 757 790 FFT1DOnce( direction , upriv , tmp , j * rootN ); 791 printf("@@@ after FFT1Once / j = %d\n", j ); 758 792 TwiddleOneCol( direction , j , twid , tmp , j * rootN ); 793 printf("@@@ after Twiddle / j = %d\n", j ); 759 794 } 760 795 761 796 #if DEBUG_FFT1D 762 printf("\n @@@ tmp after columns FFT + twiddle \n");763 PrintArray( tmp , N );797 printf("\n[FFT] %s : thread %x after first twiddle\n", __FUNCTION__, MyNum); 798 if( VERBOSE ) PrintArray( tmp , N ); 764 799 #endif 765 800 … … 775 810 776 811 #if DEBUG_FFT1D 777 printf("\n @@@ x after second transpose \n");778 PrintArray( x , N );812 printf("\n[FFT] %s : thread %x after second transpose\n", __FUNCTION__, MyNum); 813 if( VERBOSE ) PrintArray( x , N ); 779 814 #endif 780 815 … … 794 829 795 830 #if DEBUG_FFT1D 796 printf("\n @@@ x after rows FFT + scaling \n");797 PrintArray( x , N );831 printf("\n[FFT] %s : thread %x after FFT on rows\n", __FUNCTION__, MyNum); 832 if( VERBOSE ) PrintArray( x , N ); 798 833 #endif 799 834 … … 809 844 810 845 #if DEBUG_FFT1D 811 printf("\n @@@ tmp after third transpose \n");812 PrintArray( tmp, N );846 printf("\n[FFT] %s : thread %x after third transpose\n", __FUNCTION__, MyNum); 847 if( VERBOSE ) PrintArray( x , N ); 813 848 #endif 814 849 … … 824 859 825 860 #if DEBUG_FFT1D 826 printf("\n @@@ x after final copy \n");827 PrintArray( x , N );861 printf("\n[FFT] %s : thread %x after final copy\n", __FUNCTION__, MyNum); 862 if( VERBOSE ) PrintArray( x , N ); 828 863 #endif 829 864 … … 835 870 // x[] array by the corresponding twiddle factor, contained in the u[] array. 836 871 ///////////////////////////////////////////////////////////////////////////////////// 837 void TwiddleOneCol( longdirection,838 longj, // y coordinate in 2D view of coef array839 double ** u, // coef array base address840 double ** x, // data array base address841 longoffset_x ) // first point in N points data array842 { 843 longi;872 void TwiddleOneCol( int direction, 873 unsigned int j, // y coordinate in 2D view of coef array 874 double ** u, // coef array base address 875 double ** x, // data array base address 876 unsigned int offset_x ) // first point in N points data array 877 { 878 unsigned int i; 844 879 double omega_r; 845 880 double omega_c; 846 881 double x_r; 847 882 double x_c; 848 longc_id;849 longc_offset;883 unsigned int c_id; 884 unsigned int c_offset; 850 885 851 886 for (i = 0; i < rootN ; i++) // loop on the rootN points … … 868 903 } // end TwiddleOneCol() 869 904 870 //////////////////////// 871 void Scale( double ** x, // data array base address872 longoffset_x ) // first point of the row to be scaled873 { 874 longi;875 longc_id;876 longc_offset;905 //////////////////////////// 906 void Scale( double ** x, // data array base address 907 unsigned int offset_x ) // first point of the row to be scaled 908 { 909 unsigned int i; 910 unsigned int c_id; 911 unsigned int c_offset; 877 912 878 913 for (i = 0; i < rootN ; i++) … … 880 915 c_id = (offset_x + i) / (points_per_cluster); 881 916 c_offset = (offset_x + i) % (points_per_cluster); 882 data[c_id][2*c_offset] /= N;883 data[c_id][2*c_offset + 1] /= N;917 x[c_id][2*c_offset] /= N; 918 x[c_id][2*c_offset + 1] /= N; 884 919 } 885 920 } 886 921 887 //////////////////////////// 888 void Transpose( double ** src, // source buffer (array of pointers)889 double ** dest, // destination buffer (array of pointers)890 longMyFirst, // first row allocated to the thread891 longMyLast ) // last row allocated to the thread892 { 893 longrow; // row index894 longpoint; // data point index in a row895 896 longindex_src; // absolute index in the source N points array897 longc_id_src; // cluster for the source buffer898 longc_offset_src; // offset in the source buffer899 900 longindex_dst; // absolute index in the dest N points array901 longc_id_dst; // cluster for the dest buffer902 longc_offset_dst; // offset in the dest buffer922 /////////////////////////////////// 923 void Transpose( double ** src, // source buffer (array of pointers) 924 double ** dest, // destination buffer (array of pointers) 925 unsigned int MyFirst, // first row allocated to the thread 926 unsigned int MyLast ) // last row allocated to the thread 927 { 928 unsigned int row; // row index 929 unsigned int point; // data point index in a row 930 931 unsigned int index_src; // absolute index in the source N points array 932 unsigned int c_id_src; // cluster for the source buffer 933 unsigned int c_offset_src; // offset in the source buffer 934 935 unsigned int index_dst; // absolute index in the dest N points array 936 unsigned int c_id_dst; // cluster for the dest buffer 937 unsigned int c_offset_dst; // offset in the dest buffer 903 938 904 939 … … 924 959 } // end Transpose() 925 960 926 ///////////////////////// 927 void Copy( double ** src, // source buffer (array of pointers)928 double ** dest, // destination buffer (array of pointers)929 longMyFirst, // first row allocated to the thread930 longMyLast ) // last row allocated to the thread931 { 932 longrow; // row index933 longpoint; // data point index in a row934 935 longindex; // absolute index in the N points array936 longc_id; // cluster index937 longc_offset; // offset in local buffer961 ////////////////////////////// 962 void Copy( double ** src, // source buffer (array of pointers) 963 double ** dest, // destination buffer (array of pointers) 964 unsigned int MyFirst, // first row allocated to the thread 965 unsigned int MyLast ) // last row allocated to the thread 966 { 967 unsigned int row; // row index 968 unsigned int point; // data point index in a row 969 970 unsigned int index; // absolute index in the N points array 971 unsigned int c_id; // cluster index 972 unsigned int c_offset; // offset in local buffer 938 973 939 974 // scan all data points allocated to the thread … … 952 987 } // end Copy() 953 988 954 ////////////////////////// 955 void Reverse( double ** x,956 longoffset_x )957 { 958 longj, k;959 longc_id_j;960 longc_offset_j;961 longc_id_k;962 longc_offset_k;989 /////////////////////////////// 990 void Reverse( double ** x, 991 unsigned int offset_x ) 992 { 993 unsigned int j, k; 994 unsigned int c_id_j; 995 unsigned int c_offset_j; 996 unsigned int c_id_k; 997 unsigned int c_offset_k; 963 998 964 999 for (k = 0 ; k < rootN ; k++) … … 982 1017 // (i.e. rootN points) of the x[nclusters][points_per_cluster] array. 983 1018 ///////////////////////////////////////////////////////////////////////////// 984 void FFT1DOnce( long direction, // direct /inverse985 double *u, // private coefs array986 double ** x, // array of pointers on distributed buffers987 longoffset_x ) // absolute offset in the x array988 { 989 longj;990 longk;991 longq;992 longL;993 longr;994 longLstar;1019 void FFT1DOnce( int direction, // 1 direct / 1 inverse 1020 double * u, // private coefs array 1021 double ** x, // array of pointers on distributed buffers 1022 unsigned int offset_x ) // absolute offset in the x array 1023 { 1024 unsigned int j; 1025 unsigned int k; 1026 unsigned int q; 1027 unsigned int L; 1028 unsigned int r; 1029 unsigned int Lstar; 995 1030 double * u1; 996 1031 997 longoffset_x1; // index first butterfly input998 longoffset_x2; // index second butterfly output999 1000 double omega_r; // real part butterfy coef1001 double omega_c; // complex part butterfly coef1002 1003 double tau_r;1004 double tau_c;1005 1006 double d1_r; // real part first butterfly input1007 double d1_c; // imag part first butterfly input1008 double d2_r; // real part second butterfly input1009 double d2_c; // imag part second butterfly input1010 1011 longc_id_1; // cluster index for first butterfly input1012 longc_offset_1; // offset for first butterfly input1013 longc_id_2; // cluster index for second butterfly input1014 longc_offset_2; // offset for second butterfly input1032 unsigned int offset_x1; // index first butterfly input 1033 unsigned int offset_x2; // index second butterfly output 1034 1035 double omega_r; // real part butterfy coef 1036 double omega_c; // complex part butterfly coef 1037 1038 double tau_r; 1039 double tau_c; 1040 1041 double d1_r; // real part first butterfly input 1042 double d1_c; // imag part first butterfly input 1043 double d2_r; // real part second butterfly input 1044 double d2_c; // imag part second butterfly input 1045 1046 unsigned int c_id_1; // cluster index for first butterfly input 1047 unsigned int c_offset_1; // offset for first butterfly input 1048 unsigned int c_id_2; // cluster index for second butterfly input 1049 unsigned int c_offset_2; // offset for second butterfly input 1015 1050 1016 1051 #if DEBUG_ONCE … … 1020 1055 for ( p = 0 ; p < rootN ; p++ ) 1021 1056 { 1022 longindex = offset_x + p;1023 longc_id = index / (points_per_cluster);1024 longc_offset = index % (points_per_cluster);1057 unsigned int index = offset_x + p; 1058 unsigned int c_id = index / (points_per_cluster); 1059 unsigned int c_offset = index % (points_per_cluster); 1025 1060 printf("%f , %f  ", x[c_id][2*c_offset] , x[c_id][2*c_offset+1] ); 1026 1061 } … … 1035 1070 for ( p = 0 ; p < rootN ; p++ ) 1036 1071 { 1037 longindex = offset_x + p;1038 longc_id = index / (points_per_cluster);1039 longc_offset = index % (points_per_cluster);1072 unsigned int index = offset_x + p; 1073 unsigned int c_id = index / (points_per_cluster); 1074 unsigned int c_offset = index % (points_per_cluster); 1040 1075 printf("%f , %f  ", x[c_id][2*c_offset] , x[c_id][2*c_offset+1] ); 1041 1076 } … … 1106 1141 for ( p = 0 ; p < rootN ; p++ ) 1107 1142 { 1108 longindex = offset_x + p;1109 longc_id = index / (points_per_cluster);1110 longc_offset = index % (points_per_cluster);1143 unsigned int index = offset_x + p; 1144 unsigned int c_id = index / (points_per_cluster); 1145 unsigned int c_offset = index % (points_per_cluster); 1111 1146 printf("%f , %f  ", x[c_id][2*c_offset] , x[c_id][2*c_offset+1] ); 1112 1147 } … … 1116 1151 } // end FFT1DOnce() 1117 1152 1118 ////////////////////////////////// 1119 void PrintArray( double ** array,1120 longsize )1121 { 1122 longi;1123 longc_id;1124 longc_offset;1153 /////////////////////////////////////// 1154 void PrintArray( double ** array, 1155 unsigned int size ) 1156 { 1157 unsigned int i; 1158 unsigned int c_id; 1159 unsigned int c_offset; 1125 1160 1126 1161 // float display
Note: See TracChangeset
for help on using the changeset viewer.