Changeset 629 for trunk/softs/soft_transpose_giet
- Timestamp:
- Feb 12, 2014, 9:51:23 AM (11 years ago)
- Location:
- trunk/softs/soft_transpose_giet
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/softs/soft_transpose_giet/ldscript
r623 r629 10 10 peripherals are not present in the architecture */ 11 11 12 seg_reset_base = 0x 10000000; /* le code de boot*/12 seg_reset_base = 0x00000000; /* boot code */ 13 13 14 seg_kcode_base = 0x000 01000; /* le code du système */15 seg_kdata_base = 0x000 10000; /* les donnees du système*/16 seg_kunc_base = 0x000 20000; /* les données non cachées du système*/14 seg_kcode_base = 0x00010000; /* kernel code */ 15 seg_kdata_base = 0x00020000; /* kernel cacheable data */ 16 seg_kunc_base = 0x00030000; /* kernel uncacheable data */ 17 17 18 seg_code_base = 0x000 30000; /* le code utilisateur*/19 seg_data_base = 0x000 40000; /* les données utilisateur*/18 seg_code_base = 0x00040000; /* application code */ 19 seg_data_base = 0x00050000; /* application data */ 20 20 21 seg_heap_base = 0x00100000; /* le tas utilisateur*/22 seg_stack_base = 0x00 400000; /* la pile utilisateur*/21 seg_heap_base = 0x00100000; /* heaps for application tasks */ 22 seg_stack_base = 0x00300000; /* stacks */ 23 23 24 seg_xcu_base = 0xF0000000; /* controleur XCU */ 25 seg_dma_base = 0xF1000000; /* controleur DMA */ 26 seg_tty_base = 0xF2000000; /* controleur TTY */ 27 seg_fbf_base = 0xF3000000; /* controleur FBF */ 28 seg_ioc_base = 0xF4000000; /* controleur IOC */ 24 seg_xcu_base = 0xF0000000; /* controler XCU */ 25 seg_tty_base = 0xF4000000; /* controler TTY */ 26 seg_fbf_base = 0xF3000000; /* controler FBF */ 27 seg_ioc_base = 0xF2000000; /* controler IOC */ 28 seg_nic_base = 0xF7000000; /* controler NIC */ 29 seg_cma_base = 0xF8000000; /* controler CMA */ 30 seg_pic_base = 0xF9000000; /* controler PIC */ 31 seg_mmc_base = 0xE0000000; /* config MMC */ 29 32 30 33 -
trunk/softs/soft_transpose_giet/main.c
r248 r629 1 1 2 #include "hard_config.h" 2 3 #include "stdio.h" 3 4 #include "limits.h" 4 5 #include "../giet_tsar/block_device.h" 5 6 6 #define NL 512 7 #define NP 512 8 #define NB_IMAGES 1 9 #define NB_CLUSTER_MAX 256 10 11 #define PRINTF(...) ({ if (proc_id == 0) { tty_printf(__VA_ARGS__); } }) 12 13 //#define DISPLAY_ONLY 14 15 /////////////////////////////////////////// 7 #define NL 128 8 #define NP 128 9 #define NB_IMAGES 5 10 11 #define PRINTF(...) ({ if (lpid == 0) { _tty_printf(__VA_ARGS__); } }) 12 13 #define DISPLAY_OK 14 16 15 // tricks to read parameters from ldscript 17 /////////////////////////////////////////// 18 19 struct plaf; 20 21 extern struct plouf seg_ioc_base; 16 extern struct plaf seg_ioc_base; 22 17 extern struct plaf seg_heap_base; 23 extern struct plaf NB_PROCS; 24 extern struct plaf NB_CLUSTERS; 18 19 // global variables stored in seg_data (cluster 0) 20 21 // instrumentation counters for each processor 22 unsigned int LOAD_START[256][4]; 23 unsigned int LOAD_END [256][4]; 24 unsigned int TRSP_START[256][4]; 25 unsigned int TRSP_END [256][4]; 26 unsigned int DISP_START[256][4]; 27 unsigned int DISP_END [256][4]; 28 29 // checksum variables 30 unsigned check_line_before[NL]; 31 unsigned check_line_after[NL]; 25 32 26 33 ///////////// 27 void main(){ 28 unsigned int frame = 0; 29 unsigned int date = 0; 30 31 unsigned int c; // cluster index for loops 32 unsigned int l; // line index for loops 33 unsigned int p; // pixel index for loops 34 35 unsigned int proc_id = procid(); // processor id 36 unsigned int nlocal_procs = (unsigned int) &NB_PROCS; // number of processors per cluster 37 unsigned int nclusters = (unsigned int) &NB_CLUSTERS; // number of clusters 38 unsigned int local_id = proc_id % nlocal_procs; // local processor id 39 unsigned int cluster_id = proc_id / nlocal_procs; // cluster id 40 unsigned int base = (unsigned int) &seg_heap_base; // base address for shared buffers 41 unsigned int increment = 0x80000000 / nclusters * 2; // cluster increment 42 unsigned int nglobal_procs = nclusters * nlocal_procs; // number of tasks 43 unsigned int npixels = NP * NL; // number of pixel per frame 44 45 unsigned int * ioc_address = (unsigned int *) &seg_ioc_base; 46 unsigned int block_size = ioc_address[BLOCK_DEVICE_BLOCK_SIZE]; 47 unsigned int nblocks = npixels / block_size; // number of blocks per frame 48 49 PRINTF("\n *** Entering main at cycle %d ***\n\n", proctime()); 34 void main() 35 { 36 unsigned int image = 0; 37 38 unsigned int l; // line index for loops 39 unsigned int p; // pixel index for loops 40 41 unsigned int * ioc_address = (unsigned int *) &seg_ioc_base; 42 unsigned int block_size = ioc_address[BLOCK_DEVICE_BLOCK_SIZE]; 43 44 unsigned int proc_id = _procid(); // processor id 45 unsigned int nclusters = X_SIZE*Y_SIZE; // number of clusters 46 unsigned int lpid = proc_id % NB_PROCS_MAX; // local processor id 47 unsigned int cluster_xy = proc_id / NB_PROCS_MAX; // cluster index (8 bits format) 48 unsigned int x = cluster_xy >> Y_WIDTH; // x coordinate 49 unsigned int y = cluster_xy & ((1<<Y_WIDTH)-1); // y coordinate 50 unsigned int ntasks = nclusters * NB_PROCS_MAX; // number of tasks 51 unsigned int npixels = NP * NL; // number of pixel per image 52 unsigned int nblocks = npixels / block_size; // number of blocks per image 53 54 // task_id is a "continuous" index for the the task running on processor (x,y,lpid) 55 unsigned int task_id = (((x * Y_SIZE) + y) * NB_PROCS_MAX) + lpid; 56 57 // cluster_id is a "continuous" index for cluster(x,y) 58 unsigned int cluster_id = (x * Y_SIZE) + y; 59 60 PRINTF("\n *** Proc 0 in cluster [%d,%d] enters main at cycle %d ***\n\n", 61 x, y, _proctime()); 50 62 51 63 // parameters checking 52 if ((nlocal_procs != 1) && (nlocal_procs != 2) && (nlocal_procs != 4)){ 53 PRINTF("NB_PROCS must be 1, 2 or 4\n"); 54 exit(1); 64 if ((NB_PROCS_MAX != 1) && (NB_PROCS_MAX != 2) && (NB_PROCS_MAX != 4)) 65 { 66 PRINTF("NB_PROCS_MAX must be 1, 2 or 4\n"); 67 _exit(); 55 68 } 56 69 if ((nclusters != 1) && (nclusters != 2) && (nclusters != 4) && (nclusters != 8) && 57 70 (nclusters != 16) && (nclusters != 32) && (nclusters != 64) && (nclusters != 128) && 58 (nclusters != 256)){ 71 (nclusters != 256)) 72 { 59 73 PRINTF("NB_CLUSTERS must be a power of 1 between 1 and 256\n"); 60 exit(1);74 _exit(); 61 75 } 62 if (nglobal_procs > 1024){ 63 PRINTF("NB_PROCS * NB_CLUSTERS cannot be larger than 1024\n"); 64 exit(1); 65 } 66 if (proc_id >= nglobal_procs){ 67 PRINTF("processor id %d larger than NB_CLUSTERS*NB_PROCS\n", proc_id); 68 exit(1); 69 } 70 71 // Arrays of pointers on the shared, distributed buffers containing the frames 72 // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters) 73 unsigned char * A[NB_CLUSTER_MAX]; 74 unsigned char * B[NB_CLUSTER_MAX]; 75 76 // Arrays of pointers on the instrumentation arrays 77 // These arrays are indexed by the cluster index (sized for the worst case : 256 clusters) 78 // each pointer points on the base adress of an array of NPROCS unsigned int 79 unsigned int * LOAD_START[NB_CLUSTER_MAX]; 80 unsigned int * LOAD_END[NB_CLUSTER_MAX]; 81 unsigned int * TRSP_START[NB_CLUSTER_MAX]; 82 unsigned int * TRSP_END[NB_CLUSTER_MAX]; 83 unsigned int * DISP_START[NB_CLUSTER_MAX]; 84 unsigned int * DISP_END[NB_CLUSTER_MAX]; 85 86 // shared buffers address definition 87 // from the seg_heap_base and increment depending on the cluster index 88 // These arrays of pointers are identical and replicated in the stack of each task 89 for (c = 0; c < nclusters; c++){ 90 A[c] = (unsigned char *) (base + increment * c); 91 B[c] = (unsigned char *) (base + npixels + increment * c); 92 LOAD_START[c] = (unsigned int *) (base + 2 * npixels + increment * c); 93 LOAD_END[c] = (unsigned int *) (base + 2 * npixels + nlocal_procs + increment * c); 94 TRSP_START[c] = (unsigned int *) (base + 2 * npixels + 2 * nlocal_procs + increment * c); 95 TRSP_END[c] = (unsigned int *) (base + 2 * npixels + 3 * nlocal_procs + increment * c); 96 DISP_START[c] = (unsigned int *) (base + 2 * npixels + 4 * nlocal_procs + increment * c); 97 DISP_END[c] = (unsigned int *) (base + 2 * npixels + 5 * nlocal_procs + increment * c); 98 } 76 77 // pointers on the distributed buffers containing the images, 78 // allocated in the heap segment: each buffer contains 256 Kbytes 79 unsigned char* buf_in = (unsigned char*)&seg_heap_base; 80 unsigned char* buf_out = buf_in + 0x00100000; 99 81 100 82 PRINTF("NB_CLUSTERS = %d\n", nclusters); 101 PRINTF("NB_LOCAL_PROCS = %d\n", nlocal_procs);102 PRINTF("NB_ GLOBAL_PROCS = %d\n", nglobal_procs);83 PRINTF("NB_LOCAL_PROCS = %d\n", NB_PROCS_MAX); 84 PRINTF("NB_TASKS = %d\n", ntasks); 103 85 PRINTF("NB_PIXELS = %d\n", npixels); 104 86 PRINTF("BLOCK_SIZE = %d\n", block_size); 105 87 PRINTF("NB_BLOCKS = %d\n\n", nblocks); 106 88 107 108 PRINTF("*** Starting barrier init at cycle %d ***\n",proctime());89 PRINTF("*** Proc 0 in cluster [%d,%d] starts barrier init at cycle %d\n", 90 x, y, _proctime()); 109 91 110 92 // barriers initialization 111 barrier_init(0, nglobal_procs); 112 barrier_init(1, nglobal_procs); 113 barrier_init(2, nglobal_procs); 114 115 PRINTF("*** Completing barrier init at cycle %d ***\n", proctime()); 116 117 // Main loop (on frames) 118 while (frame < NB_IMAGES){ 119 // pseudo parallel load from disk to A[c] buffer : nblocks/nclusters blocks 120 // only task running on processor with (local_id == 0) does it 121 122 if (local_id == 0){ 123 int p; 124 125 date = proctime(); 126 PRINTF("\n*** Starting load for frame %d at cycle %d\n", frame, date); 93 _barrier_init(0, ntasks); 94 _barrier_init(1, ntasks); 95 _barrier_init(2, ntasks); 96 _barrier_init(3, ntasks); 97 98 PRINTF("*** Proc 0 in cluster [%d,%d] completes barrier init at cycle %d\n", 99 x, y, _proctime()); 100 101 // Main loop (on images) 102 while (image < NB_IMAGES) 103 { 104 // pseudo parallel load from disk to buf_in buffer : nblocks/nclusters blocks 105 // only task running on processor with (lpid == 0) does it 106 107 LOAD_START[cluster_id][lpid] = _proctime(); 108 109 if (lpid == 0) 110 { 111 _ioc_read( ((image * nblocks) + ((nblocks * cluster_id) / nclusters)), 112 buf_in, 113 (nblocks / nclusters), 114 cluster_xy ); 115 116 PRINTF("\n*** Proc 0 in cluster [%d,%d] starts load for image %d at cycle %d\n", 117 x, y, image, _proctime() ); 118 119 _ioc_completed(); 120 121 PRINTF("*** Proc 0 in cluster [%d,%d] completes load for image %d at cycle %d\n", 122 x, y, image, _proctime() ); 123 } 124 125 LOAD_END[cluster_id][lpid] = _proctime(); 126 127 _barrier_wait(0); 128 129 // parallel transpose from buf_in to buf_out buffers 130 // each processor makes the transposition for (NL/ntasks) lines 131 // (p,l) are the pixel coordinates in the source image 132 133 PRINTF("\n*** proc 0 in cluster [%d,%d] starts transpose for image %d at cycle %d\n", 134 x, y, image, _proctime()); 135 136 TRSP_START[cluster_id][lpid] = _proctime(); 137 138 unsigned int nlt = NL / ntasks; // number of lines per processor 139 unsigned int first = task_id * nlt; // first line index 140 unsigned int last = first + nlt; // last line index 141 unsigned int nlines_clusters = NL / nclusters; // number of lines per cluster 142 unsigned int npix_clusters = NP / nclusters; // number of pixels per cluster 143 144 unsigned int src_cluster; 145 unsigned int src_index; 146 unsigned int dst_cluster; 147 unsigned int dst_index; 148 149 unsigned int word; 150 151 for (l = first; l < last; l++) 152 { 153 PRINTF(" - processing line %d\n", l); 154 155 check_line_before[l] = 0; 127 156 128 for (p = 0; p < nlocal_procs; p++){ 129 LOAD_START[cluster_id][p] = date; 130 } 131 if (ioc_read(frame * nblocks + nblocks * cluster_id / nclusters, A[cluster_id], nblocks / nclusters)){ 132 PRINTF("echec ioc_read\n"); 133 exit(); 134 } 135 if (ioc_completed()){ 136 PRINTF("echec ioc_completed\n"); 137 exit(); 138 } 139 140 date = proctime(); 141 PRINTF("*** Completing load for frame %d at cycle %d\n", frame, date); 142 for (p = 0; p < nlocal_procs; p++){ 143 LOAD_END[cluster_id][p] = date; 157 // in each iteration we read one word an write four bytes 158 for (p = 0 ; p < NP ; p = p+4) 159 { 160 // read one word, with extended address from local buffer 161 src_cluster = cluster_xy; 162 src_index = (l % nlines_clusters) * NP + p; 163 word = _word_extended_read( src_cluster, 164 (unsigned int)&buf_in[src_index] ); 165 166 unsigned char byte0 = (unsigned char)( word & 0x000000FF); 167 unsigned char byte1 = (unsigned char)((word>>8) & 0x000000FF); 168 unsigned char byte2 = (unsigned char)((word>>16) & 0x000000FF); 169 unsigned char byte3 = (unsigned char)((word>>24) & 0x000000FF); 170 171 // compute checksum 172 check_line_before[l] = check_line_before[l] + byte0 + byte1 + byte2 + byte3; 173 174 // write four bytes with extended address to four remote buffers 175 dst_cluster = (((p / npix_clusters) / Y_SIZE) << Y_WIDTH) + 176 ((p / npix_clusters) % Y_SIZE); 177 dst_index = (p % npix_clusters) * NL + l; 178 _byte_extended_write( dst_cluster, 179 (unsigned int)&buf_out[dst_index], 180 byte0 ); 181 182 dst_cluster = ((((p+1) / npix_clusters) / Y_SIZE) << Y_WIDTH) + 183 (((p+1) / npix_clusters) % Y_SIZE); 184 dst_index = ((p+1) % npix_clusters) * NL + l; 185 _byte_extended_write( dst_cluster, 186 (unsigned int)&buf_out[dst_index], 187 byte1 ); 188 189 dst_cluster = ((((p+2) / npix_clusters) / Y_SIZE) << Y_WIDTH) + 190 (((p+2) / npix_clusters) % Y_SIZE); 191 dst_index = ((p+2) % npix_clusters) * NL + l; 192 _byte_extended_write( dst_cluster, 193 (unsigned int)&buf_out[dst_index], 194 byte2 ); 195 196 dst_cluster = ((((p+3) / npix_clusters) / Y_SIZE) << Y_WIDTH) + 197 (((p+3) / npix_clusters) % Y_SIZE); 198 dst_index = ((p+3) % npix_clusters) * NL + l; 199 _byte_extended_write( dst_cluster, 200 (unsigned int)&buf_out[dst_index], 201 byte3 ); 144 202 } 145 203 } 146 204 147 barrier_wait(0); 148 149 // parallel transpose from A to B buffers 150 // each processor makes the transposition for (NL/nglobal_procs) lines 151 // (p,l) are the (x,y) pixel coordinates in the source frame 152 153 #ifndef DISPLAY_ONLY 154 date = proctime(); 155 PRINTF("\n*** Starting transpose for frame %d at cycle %d\n", frame, date); 156 TRSP_START[cluster_id][local_id] = date; 157 158 unsigned int nlt = NL / nglobal_procs; // Nombre de ligne à traiter par processeur 159 unsigned int first = proc_id * nlt; // Index de la premiÚre ligne à traiter pour le proc courant (celui qui exécute le code) 160 unsigned int last = first + nlt; // Index de la derniÚre ligne 161 unsigned int nlines_clusters = NL / nclusters; // Nombre de lignes à traiter par cluster 162 unsigned int npix_clusters = NP / nclusters; // Nombre de pixels par ligne à traiter par cluster 163 164 for (l = first; l < last; l++){ 165 PRINTF(" - processing line %d\n", l); 166 for (p = 0; p < NP; p++){ 167 unsigned int source_index = (l % nlines_clusters) * NP + p; 168 unsigned int dest_cluster = p / npix_clusters; 169 unsigned int dest_index = (p % npix_clusters) * NL + l; 170 B[dest_cluster][dest_index] = A[cluster_id][source_index]; 205 PRINTF("*** proc 0 in cluster [%d,%d] complete transpose for image %d at cycle %d\n", 206 x, y, image, _proctime() ); 207 208 TRSP_END[cluster_id][lpid] = _proctime(); 209 210 _barrier_wait(1); 211 212 // optional parallel display from local buf_out to frame buffer 213 214 #ifdef DISPLAY_OK 215 216 PRINTF("\n*** proc 0 in cluster [%d,%d] starts display for image %d at cycle %d\n", 217 x, y, image, _proctime() ); 218 219 DISP_START[cluster_id][lpid] = _proctime(); 220 221 unsigned int npxt = npixels / ntasks; // number of pixels per task 222 unsigned int buffer = (unsigned int)buf_out + npxt*lpid; 223 224 _fb_sync_write( npxt * task_id, buffer, npxt, cluster_xy ); 225 226 PRINTF("*** Proc 0 in cluster [%d,%d] completes display for image %d at cycle %d\n", 227 x, y, image, _proctime() ); 228 229 DISP_END[cluster_id][lpid] = _proctime(); 230 231 _barrier_wait(2); 232 233 #endif 234 235 // Instrumentation and checksum (done by processor 0 in cluster 0) 236 if (proc_id == 0) 237 { 238 PRINTF("\n*** Proc [0,0,0] starts checks for image %d at cycle %d\n\n", 239 image, _proctime() ); 240 241 unsigned int success = 1; 242 243 for ( l = 0 ; l < NL ; l++ ) 244 { 245 check_line_after[l] = 0; 246 247 for ( p = 0 ; p < NP ; p++ ) 248 { 249 // read one byte in remote buffer 250 src_cluster = (((p / npix_clusters) / Y_SIZE) << Y_WIDTH) + 251 ((p / npix_clusters) % Y_SIZE); 252 src_index = (p % npix_clusters) * NL + l; 253 254 unsigned char byte = _byte_extended_read( src_cluster, 255 (unsigned int)&buf_out[src_index] ); 256 257 check_line_after[l] = check_line_after[l] + byte; 258 } 259 260 PRINTF(" - l = %d / before = %d / after = %d \n", 261 l, check_line_before[l], check_line_after[l] ); 262 263 if ( check_line_before[l] != check_line_after[l] ) success = 0; 171 264 } 172 } 173 174 date = proctime(); 175 PRINTF("*** Completing transpose for frame %d at cycle %d\n", frame, date); 176 TRSP_END[cluster_id][local_id] = date; 177 barrier_wait(1); 178 #endif 179 180 // parallel display from B[c] to frame buffer 181 // each processor uses its private dma to display NL*NP/nglobal_procs pixels 182 183 date = proctime(); 184 PRINTF("\n*** Starting display for frame %d at cycle %d\n", frame, date); 185 DISP_START[cluster_id][local_id] = date; 186 187 unsigned int npxt = npixels / nglobal_procs; // number of pixels per proc 188 189 #ifndef DISPLAY_ONLY 190 if (fb_write(npxt * proc_id, B[cluster_id] + npxt * local_id, npxt)){ 191 PRINTF("[%d]: echec fb_sync_write\n", proc_id); 192 exit(); 193 } 194 #else 195 if (fb_write(npxt * proc_id, A[cluster_id] + npxt * local_id, npxt)){ 196 PRINTF("[%d]: echec fb_sync_write\n", proc_id); 197 exit(); 198 } 199 #endif 200 201 if (fb_completed()){ 202 PRINTF("[%d]: echec fb_completed\n", proc_id); 203 exit(); 204 } 205 206 date = proctime(); 207 PRINTF("*** Completing display for frame %d at cycle %d\n", frame, date); 208 DISP_END[cluster_id][local_id] = date; 209 210 barrier_wait(2); 211 212 // Instrumentation (done by processor 0 in cluster 0) 213 if (local_id == 0){ 214 date = proctime(); 215 PRINTF("\n*** Starting Instrumentation for frame %d at cycle %d\n\n", frame, date); 265 266 if ( success ) PRINTF("\n*** proc [0,0,0] : CHECKSUM OK \n\n"); 267 else PRINTF("\n*** proc [0,0,0] : CHECKSUM KO \n\n"); 216 268 217 269 int cc, pp; … … 229 281 unsigned int max_disp_ended = 0; 230 282 231 for (cc = 0; cc < nclusters; cc++){ 232 for (pp = 0; pp < nlocal_procs; pp++){ 233 if (LOAD_START[cc][pp] < min_load_start){ 234 min_load_start = LOAD_START[cc][pp]; 235 } 236 if (LOAD_START[cc][pp] > max_load_start){ 237 max_load_start = LOAD_START[cc][pp]; 238 } 239 if (LOAD_END[cc][pp] < min_load_ended){ 240 min_load_ended = LOAD_END[cc][pp]; 241 } 242 if (LOAD_END[cc][pp] > max_load_ended){ 243 max_load_ended = LOAD_END[cc][pp]; 244 } 245 246 if (TRSP_START[cc][pp] < min_trsp_start){ 247 min_trsp_start = TRSP_START[cc][pp]; 248 } 249 if (TRSP_START[cc][pp] > max_trsp_start){ 250 max_trsp_start = TRSP_START[cc][pp]; 251 } 252 if (TRSP_END[cc][pp] < min_trsp_ended){ 253 min_trsp_ended = TRSP_END[cc][pp]; 254 } 255 if (TRSP_END[cc][pp] > max_trsp_ended){ 256 max_trsp_ended = TRSP_END[cc][pp]; 257 } 258 259 if (DISP_START[cc][pp] < min_disp_start){ 260 min_disp_start = DISP_START[cc][pp]; 261 } 262 if (DISP_START[cc][pp] > max_disp_start){ 263 max_disp_start = DISP_START[cc][pp]; 264 } 265 if (DISP_END[cc][pp] < min_disp_ended){ 266 min_disp_ended = DISP_END[cc][pp]; 267 } 268 if (DISP_END[cc][pp] > max_disp_ended){ 269 max_disp_ended = DISP_END[cc][pp]; 270 } 283 for (cc = 0; cc < nclusters; cc++) 284 { 285 for (pp = 0; pp < NB_PROCS_MAX; pp++) 286 { 287 if (LOAD_START[cc][pp] < min_load_start) min_load_start = LOAD_START[cc][pp]; 288 if (LOAD_START[cc][pp] > max_load_start) max_load_start = LOAD_START[cc][pp]; 289 if (LOAD_END[cc][pp] < min_load_ended) min_load_ended = LOAD_END[cc][pp]; 290 if (LOAD_END[cc][pp] > max_load_ended) max_load_ended = LOAD_END[cc][pp]; 291 if (TRSP_START[cc][pp] < min_trsp_start) min_trsp_start = TRSP_START[cc][pp]; 292 if (TRSP_START[cc][pp] > max_trsp_start) max_trsp_start = TRSP_START[cc][pp]; 293 if (TRSP_END[cc][pp] < min_trsp_ended) min_trsp_ended = TRSP_END[cc][pp]; 294 if (TRSP_END[cc][pp] > max_trsp_ended) max_trsp_ended = TRSP_END[cc][pp]; 295 if (DISP_START[cc][pp] < min_disp_start) min_disp_start = DISP_START[cc][pp]; 296 if (DISP_START[cc][pp] > max_disp_start) max_disp_start = DISP_START[cc][pp]; 297 if (DISP_END[cc][pp] < min_disp_ended) min_disp_ended = DISP_END[cc][pp]; 298 if (DISP_END[cc][pp] > max_disp_ended) max_disp_ended = DISP_END[cc][pp]; 271 299 } 272 300 } 273 301 274 302 PRINTF(" - LOAD_START : min = %d / max = %d / med = %d / delta = %d\n", 275 min_load_start, max_load_start, (min_load_start+max_load_start)/2, max_load_start-min_load_start); 303 min_load_start, max_load_start, (min_load_start+max_load_start)/2, 304 max_load_start-min_load_start); 305 276 306 PRINTF(" - LOAD_END : min = %d / max = %d / med = %d / delta = %d\n", 277 min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2, max_load_ended-min_load_ended); 307 min_load_ended, max_load_ended, (min_load_ended+max_load_ended)/2, 308 max_load_ended-min_load_ended); 278 309 279 310 PRINTF(" - TRSP_START : min = %d / max = %d / med = %d / delta = %d\n", 280 min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2, max_trsp_start-min_trsp_start); 311 min_trsp_start, max_trsp_start, (min_trsp_start+max_trsp_start)/2, 312 max_trsp_start-min_trsp_start); 313 281 314 PRINTF(" - TRSP_END : min = %d / max = %d / med = %d / delta = %d\n", 282 min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2, max_trsp_ended-min_trsp_ended); 315 min_trsp_ended, max_trsp_ended, (min_trsp_ended+max_trsp_ended)/2, 316 max_trsp_ended-min_trsp_ended); 283 317 284 318 PRINTF(" - DISP_START : min = %d / max = %d / med = %d / delta = %d\n", 285 min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2, max_disp_start-min_disp_start); 319 min_disp_start, max_disp_start, (min_disp_start+max_disp_start)/2, 320 max_disp_start-min_disp_start); 321 286 322 PRINTF(" - DISP_END : min = %d / max = %d / med = %d / delta = %d\n", 287 min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, max_disp_ended-min_disp_ended); 288 289 PRINTF(" - BARRIER TRSP/DISP = %d\n", min_disp_start - max_trsp_ended); 323 min_disp_ended, max_disp_ended, (min_disp_ended+max_disp_ended)/2, 324 max_disp_ended-min_disp_ended); 290 325 } 291 frame++; 292 293 } // end while frame 294 295 PRINTF("*** End of main ***\n"); 296 297 while(1); 326 327 image++; 328 329 _barrier_wait( 3 ); 330 } // end while image 331 332 333 _exit(); 334 298 335 } // end main() 299 336
Note: See TracChangeset
for help on using the changeset viewer.