Changeset 589 for soft/giet_vm/applications/transpose/main.c
- Timestamp:
- Jul 8, 2015, 3:57:15 PM (9 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/transpose/main.c
r574 r589 4 4 // author : Alain Greiner 5 5 /////////////////////////////////////////////////////////////////////////////////////// 6 // This multi-threaded application makes a transpose for a NN*NN pixels 7 // sequence of images. 6 // This multi-threaded application makes a transpose for a NN*NN pixels image. 8 7 // It can run on a multi-processors, multi-clusters architecture, with one thread 9 8 // per processor. 10 9 // 11 // The image sequence is read from a file (one byte per pixel). 10 // The image is read from a file (one byte per pixel), transposed and 11 // saved in a second file. Then the transposed image is read from the second file, 12 // transposed again and saved in a third file. 13 // 12 14 // The input and output buffers containing the image are distributed in all clusters. 13 15 // 14 // - The image size NN must fit the frame buffer size : 128 bytes16 // - The image size NN must fit the frame buffer size. 15 17 // - The block size in block device must be 512 bytes. 16 // - The number of clusters must be a power of 2 no larger than 3217 // - The number of processors per cluster must be a power of 2 no larger than 4 18 // - The number of clusters must be a power of 2 no larger than 64. 19 // - The number of processors per cluster must be a power of 2 no larger than 4. 18 20 // 19 21 // For each image the application makes a self test (checksum for each line). … … 25 27 #include "malloc.h" 26 28 27 #define BLOCK_SIZE 512 // block size on disk 28 #define CLUSTERS_MAX 32 // max number of clusters 29 #define PROCS_MAX 4 // max number of processors per cluster 30 #define NN 256 // image size : nlines = npixels 31 #define NB_IMAGES 1 // number of images to be handled 32 #define FILE_PATHNAME "misc/lena.raw" // pathname on virtual disk 33 #define INSTRUMENTATION_OK 0 // display statistics on TTY when non zero 29 #define BLOCK_SIZE 512 // block size on disk 30 #define X_MAX 8 // max number of clusters in row 31 #define Y_MAX 8 // max number of clusters in column 32 #define PROCS_MAX 4 // max number of procs per cluster 33 #define CLUSTER_MAX (X_MAX * Y_MAX) // max number of clusters 34 #define NN 256 // image size : nlines = npixels 35 #define INITIAL_FILE_PATH "misc/lena.raw" // pathname on virtual disk 36 #define TRANSPOSED_FILE_PATH "/home/lena_transposed.raw" // pathname on virtual disk 37 #define RESTORED_FILE_PATH "/home/lena_restored.raw" // pathname on virtual disk 38 #define INSTRUMENTATION_OK 1 // display statistics on TTY 34 39 35 40 /////////////////////////////////////////////////////// … … 38 43 39 44 // instrumentation counters for each processor in each cluster 40 unsigned int LOAD_START[CLUSTERS_MAX][PROCS_MAX]; 41 unsigned int LOAD_END [CLUSTERS_MAX][PROCS_MAX]; 42 unsigned int TRSP_START[CLUSTERS_MAX][PROCS_MAX]; 43 unsigned int TRSP_END [CLUSTERS_MAX][PROCS_MAX]; 44 unsigned int DISP_START[CLUSTERS_MAX][PROCS_MAX]; 45 unsigned int DISP_END [CLUSTERS_MAX][PROCS_MAX]; 45 unsigned int LOAD_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 46 unsigned int LOAD_END [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 47 unsigned int TRSP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 48 unsigned int TRSP_END [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 49 unsigned int DISP_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 50 unsigned int DISP_END [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 51 unsigned int STOR_START[X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 52 unsigned int STOR_END [X_MAX][Y_MAX][PROCS_MAX] = {{{ 0 }}}; 46 53 47 54 // arrays of pointers on distributed buffers 48 55 // one input buffer & one output buffer per cluster 49 unsigned char* buf_in [CLUSTER S_MAX];50 unsigned char* buf_out[CLUSTER S_MAX];56 unsigned char* buf_in [CLUSTER_MAX]; 57 unsigned char* buf_out[CLUSTER_MAX]; 51 58 52 59 // checksum variables … … 57 64 giet_sqt_barrier_t barrier; 58 65 59 volatile unsigned int init_ok = 0; 66 volatile unsigned int global_init_ok = 0; 67 volatile unsigned int local_init_ok[X_MAX][Y_MAX] = {{ 0 }}; 60 68 61 69 ////////////////////////////////////////// … … 63 71 ////////////////////////////////////////// 64 72 { 65 66 73 unsigned int l; // line index for loops 67 74 unsigned int p; // pixel index for loops 68 unsigned int c; // cluster index for loops69 75 70 76 // processor identifiers … … 82 88 giet_procs_number( &x_size , &y_size , &nprocs ); 83 89 84 unsigned int nclusters = x_size * y_size; // number of clusters 85 unsigned int ntasks = x_size * y_size * nprocs; // number of tasks 86 unsigned int npixels = NN * NN; // pixels per image 87 unsigned int nblocks = npixels / BLOCK_SIZE; // blocks per image 88 unsigned int image = 0; // image counter 89 int file = 0; // file descriptor 90 unsigned int cluster_id = (x * y_size) + y; // "continuous" index 91 unsigned int task_id = (cluster_id * nprocs) + lpid; // "continuous" task index 92 93 // Processor [0,0,0] makes initialisation 94 // It includes parameters checking, barrier initialization, 95 // distributed buffers allocation, and file open 90 unsigned int nclusters = x_size * y_size; // number of clusters 91 unsigned int ntasks = x_size * y_size * nprocs; // number of tasks 92 unsigned int npixels = NN * NN; // pixels per image 93 unsigned int iteration = 0; // iiteration iter 94 int fd_initial = 0; // initial file descriptor 95 int fd_transposed = 0; // transposed file descriptor 96 int fd_restored = 0; // restored file descriptor 97 unsigned int cluster_id = (x * y_size) + y; // "continuous" index 98 unsigned int task_id = (cluster_id * nprocs) + lpid; // "continuous" task index 99 100 101 /////////////////////////////////////////////////////////////////////// 102 // Processor [0,0,0] makes global initialisation 103 // It includes parameters checking, heap and barrier initialization. 104 // Others processors wait initialisation completion 105 /////////////////////////////////////////////////////////////////////// 106 96 107 if ( (x==0) && (y==0) && (lpid==0) ) 97 108 { … … 101 112 } 102 113 if ((nclusters != 1) && (nclusters != 2) && (nclusters != 4) && 103 (nclusters != 8) && (nclusters != 16) && (nclusters != 32) )104 { 105 giet_exit("[TRANSPOSE ERROR] number of clusters must be 1,2,4,8,16,32 ");114 (nclusters != 8) && (nclusters != 16) && (nclusters != 32) && (nclusters != 64) ) 115 { 116 giet_exit("[TRANSPOSE ERROR] number of clusters must be 1,2,4,8,16,32,64"); 106 117 } 107 118 if ( ntasks > NN ) … … 110 121 } 111 122 112 // Distributed buffers allocation 113 // The buffers containing one image are distributed in the user 114 // heap (one buf_in and one buf_out per cluster). 115 // Each buffer contains (NN*NN / nclusters) bytes. 116 for ( c = 0 ; c < nclusters ; c++ ) 117 { 118 unsigned int rx = c / y_size; 119 unsigned int ry = c % y_size; 120 121 heap_init( rx, ry ); 122 buf_in[c] = remote_malloc( npixels/nclusters, rx, ry ); 123 buf_out[c] = remote_malloc( npixels/nclusters, rx, ry ); 124 125 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes buffer allocation" 126 " for cluster[%d,%d] at cycle %d\n" 127 " - buf_in = %x\n" 128 " - buf_out = %x\n", 129 rx, ry, giet_proctime(), 130 (unsigned int)buf_in[c], 131 (unsigned int)buf_out[c] ); 132 } 133 134 // Barrier initialisation 123 // distributed heap initialisation 124 unsigned int cx , cy; 125 for ( cx = 0 ; cx < x_size ; cx++ ) 126 { 127 for ( cy = 0 ; cy < y_size ; cy++ ) 128 { 129 heap_init( cx , cy ); 130 } 131 } 132 133 // barrier initialisation 135 134 sqt_barrier_init( &barrier, x_size , y_size , nprocs ); 136 135 137 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes barrier init at cycle %d\n",136 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] completes heap & barrier init at cycle %d\n", 138 137 giet_proctime() ); 139 138 140 // open file containing images 141 file = giet_fat_open( FILE_PATHNAME , 0 ); 142 143 if (file < 0) 144 { 145 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d]" 146 " cannot open file %s", 147 x , y , lpid , FILE_PATHNAME ); 148 giet_exit(" open() failure"); 149 } 150 else 151 { 152 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file misc/images.raw\n"); 153 } 154 init_ok = 1; 139 // diplay disk content 140 giet_fat_list( "/" ); 141 giet_fat_list( "/misc" ); 142 giet_fat_list( "/home" ); 143 giet_fat_list( "/build" ); 144 giet_fat_list( "/build/kernel" ); 145 giet_fat_list( "/build/transpose" ); 146 147 global_init_ok = 1; 155 148 } 156 else // others processors wait initialisation completion149 else 157 150 { 158 while ( init_ok == 0 );151 while ( global_init_ok == 0 ); 159 152 } 160 153 161 ///////////////////////// 162 // Main loop (on images) 163 while (image < NB_IMAGES) 154 /////////////////////////////////////////////////////////////////////// 155 // In each cluster, only task running on processor[x,y,0] allocates 156 // the local buffers containing the images in the distributed heap 157 // (one buf_in and one buf_out per cluster). 158 // Other processors in cluster wait completion. 159 /////////////////////////////////////////////////////////////////////// 160 161 if ( lpid == 0 ) 164 162 { 165 // pseudo parallel load from disk to buf_in buffer : nblocks/nclusters blocks 166 // only task running on processor with (lpid == 0) does it 167 168 LOAD_START[cluster_id][lpid] = giet_proctime(); 163 buf_in[cluster_id] = remote_malloc( npixels/nclusters, x, y ); 164 buf_out[cluster_id] = remote_malloc( npixels/nclusters, x, y ); 165 166 if ( (x==0) && (y==0) ) 167 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes buffer allocation" 168 " for cluster[%d,%d] at cycle %d\n" 169 " - buf_in = %x\n" 170 " - buf_out = %x\n", 171 x, y, lpid, x, y, giet_proctime(), 172 (unsigned int)buf_in[cluster_id], (unsigned int)buf_out[cluster_id] ); 173 174 /////////////////////////////////////////////////////////////////////// 175 // In each cluster, only task running on procesor[x,y,0] open the 176 // three private file descriptors for the three files 177 /////////////////////////////////////////////////////////////////////// 178 179 // open initial file 180 fd_initial = giet_fat_open( INITIAL_FILE_PATH , O_RDONLY ); // read_only 181 if ( fd_initial < 0 ) 182 { 183 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n", 184 x , y , lpid , INITIAL_FILE_PATH ); 185 giet_exit(" open() failure"); 186 } 187 else if ( (x==0) && (y==0) && (lpid==0) ) 188 { 189 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n", 190 INITIAL_FILE_PATH , fd_initial ); 191 } 192 193 // open transposed file 194 fd_transposed = giet_fat_open( TRANSPOSED_FILE_PATH , O_CREATE ); // create if required 195 if ( fd_transposed < 0 ) 196 { 197 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n", 198 x , y , lpid , TRANSPOSED_FILE_PATH ); 199 giet_exit(" open() failure"); 200 } 201 else if ( (x==0) && (y==0) && (lpid==0) ) 202 { 203 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n", 204 TRANSPOSED_FILE_PATH , fd_transposed ); 205 } 206 207 // open restored file 208 fd_restored = giet_fat_open( RESTORED_FILE_PATH , O_CREATE ); // create if required 209 if ( fd_restored < 0 ) 210 { 211 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot open file %s\n", 212 x , y , lpid , RESTORED_FILE_PATH ); 213 giet_exit(" open() failure"); 214 } 215 else if ( (x==0) && (y==0) && (lpid==0) ) 216 { 217 giet_shr_printf("\n[TRANSPOSE] Proc [0,0,0] open file %s / fd = %d\n", 218 RESTORED_FILE_PATH , fd_restored ); 219 } 220 221 local_init_ok[x][y] = 1; 222 } 223 else 224 { 225 while( local_init_ok[x][y] == 0 ); 226 } 227 228 /////////////////////////////////////////////////////////////////////// 229 // Main loop / two iterations: 230 // - first makes initial => transposed 231 // - second makes transposed => restored 232 // All processors execute this main loop. 233 /////////////////////////////////////////////////////////////////////// 234 235 unsigned int fd_in = fd_initial; 236 unsigned int fd_out = fd_transposed; 237 238 while (iteration < 2) 239 { 240 /////////////////////////////////////////////////////////////////////// 241 // pseudo parallel load from disk to buf_in buffers: npixels/nclusters 242 // only task running on processor(x,y,0) does it 243 /////////////////////////////////////////////////////////////////////// 244 245 LOAD_START[x][y][lpid] = giet_proctime(); 169 246 170 247 if (lpid == 0) 171 248 { 172 giet_fat_read( file, 173 buf_in[cluster_id], 174 (nblocks / nclusters), 175 ((image*nblocks) + ((nblocks*cluster_id)/nclusters)) ); 249 unsigned int offset = ((npixels*cluster_id)/nclusters); 250 if ( giet_fat_lseek( fd_in, 251 offset, 252 SEEK_SET ) != offset ) 253 { 254 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot seek fd = %d\n", 255 x , y , lpid , fd_in ); 256 giet_exit(" seek() failure"); 257 } 258 259 unsigned int pixels = npixels / nclusters; 260 if ( giet_fat_read( fd_in, 261 buf_in[cluster_id], 262 pixels ) != pixels ) 263 { 264 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot read fd = %d\n", 265 x , y , lpid , fd_in ); 266 giet_exit(" read() failure"); 267 } 176 268 177 269 if ( (x==0) && (y==0) ) 178 270 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes load" 179 " for i mage%d at cycle %d\n",180 x, y, lpid, i mage, giet_proctime() );181 } 182 183 LOAD_END[ cluster_id][lpid] = giet_proctime();271 " for iteration %d at cycle %d\n", 272 x, y, lpid, iteration, giet_proctime() ); 273 } 274 275 LOAD_END[x][y][lpid] = giet_proctime(); 184 276 185 277 ///////////////////////////// 186 278 sqt_barrier_wait( &barrier ); 187 279 280 /////////////////////////////////////////////////////////////////////// 188 281 // parallel transpose from buf_in to buf_out 189 282 // each task makes the transposition for nlt lines (nlt = NN/ntasks) 190 283 // from line [task_id*nlt] to line [(task_id + 1)*nlt - 1] 191 284 // (p,l) are the absolute pixel coordinates in the source image 192 193 194 TRSP_START[ cluster_id][lpid] = giet_proctime();285 /////////////////////////////////////////////////////////////////////// 286 287 TRSP_START[x][y][lpid] = giet_proctime(); 195 288 196 289 unsigned int nlt = NN / ntasks; // number of lines per task … … 233 326 if ( (x==0) && (y==0) ) 234 327 giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] completes transpose" 235 " for i mage%d at cycle %d\n",236 x, y, i mage, giet_proctime() );237 238 } 239 TRSP_END[ cluster_id][lpid] = giet_proctime();328 " for iteration %d at cycle %d\n", 329 x, y, iteration, giet_proctime() ); 330 331 } 332 TRSP_END[x][y][lpid] = giet_proctime(); 240 333 241 334 ///////////////////////////// 242 335 sqt_barrier_wait( &barrier ); 243 336 244 245 if ( USE_FBF ) // external frame buffer available 246 { 247 // parallel display from local buf_out to frame buffer 248 // all processors contribute to display using memcpy... 249 250 DISP_START[cluster_id][lpid] = giet_proctime(); 251 252 unsigned int npt = npixels / ntasks; // number of pixels per task 253 254 giet_fbf_sync_write( npt * task_id, 255 &buf_out[cluster_id][lpid*npt], 256 npt ); 257 258 if ( (x==0) && (y==0) && (lpid==0) ) 259 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes display" 260 " for image %d at cycle %d\n", 261 x, y, lpid, image, giet_proctime() ); 262 263 DISP_END[cluster_id][lpid] = giet_proctime(); 264 265 ///////////////////////////// 266 sqt_barrier_wait( &barrier ); 267 } 268 else // checksum by processor(x,y,0) in each cluster 269 { 270 if ( lpid == 0 ) 271 { 272 unsigned int success = 1; 273 unsigned int start = cluster_id * nlc; 274 unsigned int stop = start + nlc; 275 276 for ( l = start ; l < stop ; l++ ) 277 { 278 check_line_after[l] = 0; 279 280 for ( p = 0 ; p < NN ; p++ ) 281 { 282 // read one byte in remote buffer 283 src_cluster = p / nlc; 284 src_index = (p % nlc)*NN + l; 285 286 unsigned char byte = buf_out[src_cluster][src_index]; 287 288 check_line_after[l] = check_line_after[l] + byte; 289 } 290 291 if ( check_line_before[l] != check_line_after[l] ) success = 0; 292 } 293 294 if ( success ) 295 { 296 giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] checksum OK" 297 " for image %d at cycle %d\n", 298 x, y, image, giet_proctime() ); 299 } 300 else 301 { 302 giet_shr_printf("\n[TRANSPOSE] proc [%d,%d,0] checksum KO" 303 " for image %d at cycle %d\n", 304 x, y, image, giet_proctime() ); 305 } 306 } 307 } 337 /////////////////////////////////////////////////////////////////////// 338 // parallel display from local buf_out to frame buffer 339 // all tasks contribute to display using memcpy... 340 /////////////////////////////////////////////////////////////////////// 341 342 DISP_START[x][y][lpid] = giet_proctime(); 343 344 unsigned int npt = npixels / ntasks; // number of pixels per task 345 346 giet_fbf_sync_write( npt * task_id, 347 &buf_out[cluster_id][lpid*npt], 348 npt ); 349 350 if ( (x==0) && (y==0) && (lpid==0) ) 351 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes display" 352 " for iteration %d at cycle %d\n", 353 x, y, lpid, iteration, giet_proctime() ); 354 355 DISP_END[x][y][lpid] = giet_proctime(); 308 356 309 357 ///////////////////////////// 310 358 sqt_barrier_wait( &barrier ); 311 359 360 /////////////////////////////////////////////////////////////////////// 361 // pseudo parallel store : buf_out buffers to disk : npixels/nclusters 362 // only task running on processor(x,y,0) does it 363 /////////////////////////////////////////////////////////////////////// 364 365 STOR_START[x][y][lpid] = giet_proctime(); 366 367 if ( lpid == 0 ) 368 { 369 unsigned int offset = ((npixels*cluster_id)/nclusters); 370 if ( giet_fat_lseek( fd_out, 371 offset, 372 SEEK_SET ) != offset ) 373 { 374 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot seek fr = %d\n", 375 x , y , lpid , fd_out ); 376 giet_exit(" seek() failure"); 377 } 378 379 unsigned int pixels = npixels / nclusters; 380 if ( giet_fat_write( fd_out, 381 buf_out[cluster_id], 382 pixels ) != pixels ) 383 { 384 giet_shr_printf("\n[TRANSPOSE ERROR] Proc [%d,%d,%d] cannot write fd = %d\n", 385 x , y , lpid , fd_out ); 386 giet_exit(" write() failure"); 387 } 388 389 if ( (x==0) && (y==0) ) 390 giet_shr_printf("\n[TRANSPOSE] Proc [%d,%d,%d] completes store" 391 " for iteration %d at cycle %d\n", 392 x, y, lpid, iteration, giet_proctime() ); 393 } 394 395 STOR_END[x][y][lpid] = giet_proctime(); 396 397 ///////////////////////////// 398 sqt_barrier_wait( &barrier ); 399 312 400 // instrumentation done by processor [0,0,0] 313 401 if ( (x==0) && (y==0) && (lpid==0) && INSTRUMENTATION_OK ) 314 402 { 315 int c c, pp;403 int cx , cy , pp ; 316 404 unsigned int min_load_start = 0xFFFFFFFF; 317 405 unsigned int max_load_start = 0; … … 326 414 unsigned int min_disp_ended = 0xFFFFFFFF; 327 415 unsigned int max_disp_ended = 0; 328 329 for (cc = 0; cc < nclusters; cc++) 330 { 331 for (pp = 0; pp < NB_PROCS_MAX; pp++) 332 { 333 if (LOAD_START[cc][pp] < min_load_start) min_load_start = LOAD_START[cc][pp]; 334 if (LOAD_START[cc][pp] > max_load_start) max_load_start = LOAD_START[cc][pp]; 335 if (LOAD_END[cc][pp] < min_load_ended) min_load_ended = LOAD_END[cc][pp]; 336 if (LOAD_END[cc][pp] > max_load_ended) max_load_ended = LOAD_END[cc][pp]; 337 if (TRSP_START[cc][pp] < min_trsp_start) min_trsp_start = TRSP_START[cc][pp]; 338 if (TRSP_START[cc][pp] > max_trsp_start) max_trsp_start = TRSP_START[cc][pp]; 339 if (TRSP_END[cc][pp] < min_trsp_ended) min_trsp_ended = TRSP_END[cc][pp]; 340 if (TRSP_END[cc][pp] > max_trsp_ended) max_trsp_ended = TRSP_END[cc][pp]; 341 if (DISP_START[cc][pp] < min_disp_start) min_disp_start = DISP_START[cc][pp]; 342 if (DISP_START[cc][pp] > max_disp_start) max_disp_start = DISP_START[cc][pp]; 343 if (DISP_END[cc][pp] < min_disp_ended) min_disp_ended = DISP_END[cc][pp]; 344 if (DISP_END[cc][pp] > max_disp_ended) max_disp_ended = DISP_END[cc][pp]; 345 } 346 } 416 unsigned int min_stor_start = 0xFFFFFFFF; 417 unsigned int max_stor_start = 0; 418 unsigned int min_stor_ended = 0xFFFFFFFF; 419 unsigned int max_stor_ended = 0; 420 421 for (cx = 0; cx < x_size; cx++) 422 { 423 for (cy = 0; cy < y_size; cy++) 424 { 425 for (pp = 0; pp < NB_PROCS_MAX; pp++) 426 { 427 if (LOAD_START[cx][cy][pp] < min_load_start) min_load_start = LOAD_START[cx][cy][pp]; 428 if (LOAD_START[cx][cy][pp] > max_load_start) max_load_start = LOAD_START[cx][cy][pp]; 429 if (LOAD_END[cx][cy][pp] < min_load_ended) min_load_ended = LOAD_END[cx][cy][pp]; 430 if (LOAD_END[cx][cy][pp] > max_load_ended) max_load_ended = LOAD_END[cx][cy][pp]; 431 if (TRSP_START[cx][cy][pp] < min_trsp_start) min_trsp_start = TRSP_START[cx][cy][pp]; 432 if (TRSP_START[cx][cy][pp] > max_trsp_start) max_trsp_start = TRSP_START[cx][cy][pp]; 433 if (TRSP_END[cx][cy][pp] < min_trsp_ended) min_trsp_ended = TRSP_END[cx][cy][pp]; 434 if (TRSP_END[cx][cy][pp] > max_trsp_ended) max_trsp_ended = TRSP_END[cx][cy][pp]; 435 if (DISP_START[cx][cy][pp] < min_disp_start) min_disp_start = DISP_START[cx][cy][pp]; 436 if (DISP_START[cx][cy][pp] > max_disp_start) max_disp_start = DISP_START[cx][cy][pp]; 437 if (DISP_END[cx][cy][pp] < min_disp_ended) min_disp_ended = DISP_END[cx][cy][pp]; 438 if (DISP_END[cx][cy][pp] > max_disp_ended) max_disp_ended = DISP_END[cx][cy][pp]; 439 if (STOR_START[cx][cy][pp] < min_stor_start) min_stor_start = STOR_START[cx][cy][pp]; 440 if (STOR_START[cx][cy][pp] > max_stor_start) max_stor_start = STOR_START[cx][cy][pp]; 441 if (STOR_END[cx][cy][pp] < min_stor_ended) min_stor_ended = STOR_END[cx][cy][pp]; 442 if (STOR_END[cx][cy][pp] > max_stor_ended) max_stor_ended = STOR_END[cx][cy][pp]; 443 } 444 } 445 } 446 447 giet_shr_printf("\n ---------------- Instrumentation Results ---------------------\n"); 347 448 348 449 giet_shr_printf(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n", … … 369 470 min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, 370 471 max_disp_ended-min_disp_ended); 371 } 372 373 image++; 472 473 giet_shr_printf(" - STOR_START : min = %d / max = %d / med = %d / delta = %d\n", 474 min_stor_start, max_stor_start, (min_stor_start+max_stor_start)/2, 475 max_stor_start-min_stor_start); 476 477 giet_shr_printf(" - STOR_END : min = %d / max = %d / med = %d / delta = %d\n", 478 min_stor_ended, max_stor_ended, (min_stor_ended+max_stor_ended)/2, 479 max_stor_ended-min_stor_ended); 480 } 374 481 375 482 ///////////////////////////// 376 483 sqt_barrier_wait( &barrier ); 377 484 378 } // end while image 379 380 // Processor[0,0,0] releases the Distributed buffers 381 if ( (x==0) && (y==0) && (lpid==0) ) 485 // update iteration variables 486 fd_in = fd_transposed; 487 fd_out = fd_restored; 488 iteration++; 489 490 } // end while 491 492 /////////////////////////////////////////////////////////////////////// 493 // In each cluster, only task running on Processor[x,y,0] releases 494 // the distributed buffers and close the file descriptors. 495 /////////////////////////////////////////////////////////////////////// 496 497 if ( lpid==0 ) 382 498 { 383 for ( c = 0 ; c < nclusters ; c++ ) 384 { 385 free( buf_in[c] ); 386 free( buf_in[c] ); 387 } 499 free( buf_in[cluster_id] ); 500 free( buf_out[cluster_id] ); 501 502 giet_fat_close( fd_initial ); 503 giet_fat_close( fd_transposed ); 504 giet_fat_close( fd_restored ); 388 505 } 389 506 507 // display disk content 508 if ( (x==0) && (y == 0) && (lpid == 0) ) 509 { 510 giet_fat_list( "/" ); 511 giet_fat_list( "/misc" ); 512 giet_fat_list( "/home" ); 513 giet_fat_list( "/build" ); 514 giet_fat_list( "/build/kernel" ); 515 giet_fat_list( "/build/transpose" ); 516 517 giet_fat_remove( "/home/lena_transposed" , 0 ); 518 giet_fat_remove( "/home/lena_restored" , 0 ); 519 520 giet_fat_remove( "/home" , 1 ); 521 522 giet_fat_list( "/" ); 523 giet_fat_list( "/misc" ); 524 giet_fat_list( "/home" ); 525 giet_fat_list( "/build" ); 526 giet_fat_list( "/build/kernel" ); 527 giet_fat_list( "/build/transpose" ); 528 } 529 390 530 giet_exit("Completed"); 391 531
Note: See TracChangeset
for help on using the changeset viewer.