Changeset 171 for trunk/softs/soft_transpose_giet
- Timestamp:
- May 16, 2011, 12:35:03 PM (14 years ago)
- Location:
- trunk/softs/soft_transpose_giet
- Files:
-
- 2 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/softs/soft_transpose_giet/main.c
r158 r171 1 1 #include "stdio.h" 2 2 3 #define NL 1284 #define NP 1285 #define NB_IMAGES 26 #define BLOCK_SIZE 1283 #define NL 512 4 #define NP 512 5 #define NB_IMAGES 1 6 #define BLOCK_SIZE 512 7 7 8 8 #define PRINTF if(local_id == 0) tty_printf … … 23 23 unsigned int image = 0; 24 24 unsigned int date = 0; 25 unsigned int delta = 0;26 25 27 26 unsigned int c; // cluster index for loops … … 45 44 { 46 45 PRINTF("NB_PROCS must be 1, 2 or 4\n"); 47 48 exit();49 46 } 50 47 if( (nclusters != 1) && (nclusters != 2) && (nclusters != 4) && (nclusters != 8) && 51 (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters !=128) ) 52 { 53 PRINTF("NB_CLUSTERS must be a power of 2 between 1 and 128\n"); 54 exit(); 55 } 56 if( ntasks > 128 ) 57 { 58 PRINTF("NB_PROCS * NB_CLUSTERS cannot be larger than 128 4\n"); 59 exit(); 48 (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters !=128) && 49 (nclusters != 256) ) 50 { 51 PRINTF("NB_CLUSTERS must be a power of 1 between 1 and 256\n"); 52 } 53 if( ntasks > 1024 ) 54 { 55 PRINTF("NB_PROCS * NB_CLUSTERS cannot be larger than 1024\n"); 60 56 } 61 57 if( proc_id >= ntasks ) … … 64 60 } 65 61 66 // Arrays of pointers on the shared, distributed buffers 67 // containing the images (sized for the worst case : 128clusters)68 unsigned char* A[ 128];69 unsigned char* B[ 128];62 // Arrays of pointers on the shared, distributed buffers containing the images 63 // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters) 64 unsigned char* A[256]; 65 unsigned char* B[256]; 70 66 67 // Arrays of pointers on the instrumentation arrays 68 // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters) 69 // each pointer points on the base adress of an array of NPROCS unsigned int 70 unsigned int* LOAD_START[256]; 71 unsigned int* LOAD_ENDED[256]; 72 unsigned int* TRSP_START[256]; 73 unsigned int* TRSP_ENDED[256]; 74 unsigned int* DISP_START[256]; 75 unsigned int* DISP_ENDED[256]; 76 71 77 // shared buffers address definition 72 // from the seg_heap_base and segment_increment 73 // values defined in the ldscript file. 74 // These arrays of pointers are identical and 75 // replicated in the stack of each task 78 // from the seg_heap_base and increment depending on the cluster index 79 // These arrays of pointers are identical and replicated in the stack of each task 76 80 for( c=0 ; c<nclusters ; c++) 77 81 { 78 A[c] = (unsigned char*)(base + increment*c); 79 B[c] = (unsigned char*)(base + NL*NP + increment*c); 82 A[c] = (unsigned char*)(base + increment*c); 83 B[c] = (unsigned char*)(base + NL*NP + increment*c); 84 LOAD_START[c] = (unsigned int*) (base + 2*NL*NP + increment*c); 85 LOAD_ENDED[c] = (unsigned int*) (base + 3*NL*NP + increment*c); 86 TRSP_START[c] = (unsigned int*) (base + 4*NL*NP + increment*c); 87 TRSP_ENDED[c] = (unsigned int*) (base + 5*NL*NP + increment*c); 88 DISP_START[c] = (unsigned int*) (base + 6*NL*NP + increment*c); 89 DISP_ENDED[c] = (unsigned int*) (base + 7*NL*NP + increment*c); 80 90 } 81 91 … … 98 108 // only task running on processor with (local_id == 0) does it 99 109 100 delta = proctime() - date;101 date = date + delta;102 103 110 if ( local_id == 0 ) 104 111 { 105 PRINTF("\n*** Starting load for image %d *** at cycle %d (%d)\n", image, date, delta); 112 int p; 113 114 date = proctime(); 115 PRINTF("\n*** Starting load for image %d at cycle %d\n", image, date); 116 for ( p=0 ; p<nprocs ; p++ ) LOAD_START[cluster_id][p] = date; 106 117 107 118 if( ioc_read(image*nblocks + nblocks*cluster_id/nclusters , A[cluster_id], nblocks/nclusters) ) … … 115 126 exit(); 116 127 } 117 delta = proctime() - date; 118 date = date + delta; 119 PRINTF("*** Completing load for image %d *** at cycle %d (%d)\n", image, date, delta); 128 129 date = proctime(); 130 PRINTF("*** Completing load for image %d at cycle %d\n", image, date); 131 for ( p=0 ; p<nprocs ; p++ ) LOAD_ENDED[cluster_id][p] = date; 120 132 } 121 133 … … 126 138 // (p,l) are the (x,y) pixel coordinates in the source image 127 139 128 delta = proctime() - date; 129 date = date + delta;130 131 PRINTF("\n*** Starting transpose for image %d at cycle %d (%d)\n", image, date, delta);140 141 date = proctime(); 142 PRINTF("\n*** Starting transpose for image %d at cycle %d\n", image, date); 143 TRSP_START[cluster_id][local_id] = date; 132 144 133 145 unsigned int nlt = NL/ntasks; … … 148 160 149 161 } 150 d elta = proctime() - date;151 date = date + delta;152 PRINTF("*** Completing transpose for image %d *** at cycle %d (%d)\n", image, date, delta);162 date = proctime(); 163 PRINTF("*** Completing transpose for image %d at cycle %d\n", image, date); 164 TRSP_ENDED[cluster_id][local_id] = date; 153 165 154 166 barrier_wait(1); … … 157 169 // each processor uses its private dma to display NL*NP/ntasks pixels 158 170 159 delta = proctime() - date; 160 date = date + delta; 161 162 PRINTF("\n*** Starting display for image %d at cycle %d (%d)\n", image, date, delta); 171 date = proctime(); 172 PRINTF("\n*** Starting display for image %d at cycle %d\n", image, date); 173 DISP_START[cluster_id][local_id] = date; 163 174 164 175 unsigned int npxt = NL*NP/ntasks; // number of pixels per task … … 175 186 } 176 187 177 d elta = proctime() - date;178 date = date + delta;179 PRINTF("*** Completing display for image %d at cycle %d (%d)\n", image, date, delta);188 date = proctime(); 189 PRINTF("*** Completing display for image %d at cycle %d\n", image, date); 190 DISP_ENDED[cluster_id][local_id] = date; 180 191 181 192 barrier_wait(2); 182 193 194 // Instrumentation (done by processor 0 in cluster 0) 195 if ( local_id == 0 ) 196 { 197 date = proctime(); 198 PRINTF("\n*** Starting Instrumentation for image %d at cycle %d\n\n", image, date); 199 200 int cc, pp; 201 unsigned int min_load_start = 1000000000; 202 unsigned int max_load_start = 0; 203 unsigned int min_load_ended = 1000000000; 204 unsigned int max_load_ended = 0; 205 unsigned int min_trsp_start = 1000000000; 206 unsigned int max_trsp_start = 0; 207 unsigned int min_trsp_ended = 1000000000; 208 unsigned int max_trsp_ended = 0; 209 unsigned int min_disp_start = 1000000000; 210 unsigned int max_disp_start = 0; 211 unsigned int min_disp_ended = 1000000000; 212 unsigned int max_disp_ended = 0; 213 214 for ( cc=0 ; cc<nclusters ; cc++ ) 215 { 216 for ( pp=0 ; pp<nprocs ; pp++ ) 217 { 218 if ( LOAD_START[cc][pp] < min_load_start ) min_load_start = LOAD_START[cc][pp]; 219 if ( LOAD_START[cc][pp] > max_load_start ) max_load_start = LOAD_START[cc][pp]; 220 if ( LOAD_ENDED[cc][pp] < min_load_ended ) min_load_ended = LOAD_ENDED[cc][pp]; 221 if ( LOAD_ENDED[cc][pp] > max_load_ended ) max_load_ended = LOAD_ENDED[cc][pp]; 222 223 if ( TRSP_START[cc][pp] < min_trsp_start ) min_trsp_start = TRSP_START[cc][pp]; 224 if ( TRSP_START[cc][pp] > max_trsp_start ) max_trsp_start = TRSP_START[cc][pp]; 225 if ( TRSP_ENDED[cc][pp] < min_trsp_ended ) min_trsp_ended = TRSP_ENDED[cc][pp]; 226 if ( TRSP_ENDED[cc][pp] > max_trsp_ended ) max_trsp_ended = TRSP_ENDED[cc][pp]; 227 228 if ( DISP_START[cc][pp] < min_disp_start ) min_disp_start = DISP_START[cc][pp]; 229 if ( DISP_START[cc][pp] > max_disp_start ) max_disp_start = DISP_START[cc][pp]; 230 if ( DISP_ENDED[cc][pp] < min_disp_ended ) min_disp_ended = DISP_ENDED[cc][pp]; 231 if ( DISP_ENDED[cc][pp] > max_disp_ended ) max_disp_ended = DISP_ENDED[cc][pp]; 232 233 } 234 } 235 PRINTF(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n", 236 min_load_start, max_load_start, (min_load_start+max_load_start)/2, max_load_start-min_load_start); 237 PRINTF(" - LOAD_END : min = %d / max = %d / med = %d / delta = %d\n", 238 min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2, max_load_ended-min_load_ended); 239 240 PRINTF(" - TRSP_START : min = %d / max = %d / med = %d / delta = %d\n", 241 min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2, max_trsp_start-min_trsp_start); 242 PRINTF(" - TRSP_END : min = %d / max = %d / med = %d / delta = %d\n", 243 min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2, max_trsp_ended-min_trsp_ended); 244 245 PRINTF(" - DISP_START : min = %d / max = %d / med = %d / delta = %d\n", 246 min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2, max_disp_start-min_disp_start); 247 PRINTF(" - DISP_END : min = %d / max = %d / med = %d / delta = %d\n", 248 min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, max_disp_ended-min_disp_ended); 249 250 PRINTF(" - BARRIER TRSP/DISP = %d\n", min_disp_start - max_trsp_ended); 251 } 183 252 // next image 184 253 image++; 254 185 255 } // end while image 256 186 257 while(1); 187 258 } // end main()
Note: See TracChangeset
for help on using the changeset viewer.