Changeset 502 for soft/giet_vm/applications/convol
- Timestamp:
- Feb 8, 2015, 9:20:45 PM (10 years ago)
- Location:
- soft/giet_vm/applications/convol
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/convol/convol.py
r457 r502 12 12 # This include both the mapping of virtual segments on the clusters, 13 13 # and the mapping of tasks on processors. 14 # There is one task per processor. 15 # The mapping of virtual segments is the following: 16 # - There is one shared data vseg in cluster[0][0] 17 # - The code vsegs are replicated on all clusters containing processors. 18 # - There is one heap vseg per cluster containing processors. 19 # - The stacks vsegs are distibuted on all clusters containing processors. 14 20 # This mapping uses 5 platform parameters, (obtained from the "mapping" argument) 15 # - x_size : number of clusters in a row16 # - y_size : number of clusters in a column17 # - x_width : number of bits coding x coordinate18 # - y_width : number of bits coding y coordinate19 # - nprocs : number of processors per cluster21 # - x_size : number of clusters in a row 22 # - y_size : number of clusters in a column 23 # - x_width : number of bits coding x coordinate 24 # - y_width : number of bits coding y coordinate 25 # - nprocs : number of processors per cluster 20 26 #################################################################################### 21 27 … … 46 52 47 53 # data vseg in cluster[0,0] : non local 48 mapping.addVseg( vspace, 'conv_data', data_base , data_size, 'C_WU', vtype = 'ELF', 49 x = 0, y = 0, pseg = 'RAM', binpath = 'build/convol/convol.elf', 54 mapping.addVseg( vspace, 'conv_data', data_base , data_size, 55 'C_WU', vtype = 'ELF', x = 0, y = 0, pseg = 'RAM', 56 binpath = 'build/convol/convol.elf', 50 57 local = False ) 51 58 … … 53 60 for x in xrange (x_size): 54 61 for y in xrange (y_size): 55 size = code_size 56 base = code_base 57 mapping.addVseg( vspace, 'conv_code_%d_%d' % (x,y), base, size, 58 'CXWU', vtype = 'ELF', x = x , y = y , pseg = 'RAM', 59 binpath = 'build/convol/convol.elf', 60 local = True ) 62 cluster_id = (x * y_size) + y 63 if ( mapping.clusters[cluster_id].procs ): 64 size = code_size 65 base = code_base 66 67 mapping.addVseg( vspace, 'conv_code_%d_%d' % (x,y), base, size, 68 'CXWU', vtype = 'ELF', x = x , y = y , pseg = 'RAM', 69 binpath = 'build/convol/convol.elf', 70 local = True ) 61 71 62 72 # stack vsegs : local (one stack per processor) 63 73 for x in xrange (x_size): 64 74 for y in xrange (y_size): 65 for p in xrange( nprocs ): 66 proc_id = (((x * y_size) + y) * nprocs) + p 67 size = (stack_size / nprocs) & 0xFFFFF000 68 base = stack_base + (proc_id * size) 69 mapping.addVseg( vspace, 'conv_stack_%d_%d_%d' % (x,y,p), base, size, 70 'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM', 71 local = True, big = True ) 75 cluster_id = (x * y_size) + y 76 if ( mapping.clusters[cluster_id].procs ): 77 for p in xrange( nprocs ): 78 proc_id = (((x * y_size) + y) * nprocs) + p 79 size = (stack_size / nprocs) & 0xFFFFF000 80 base = stack_base + (proc_id * size) 81 82 mapping.addVseg( vspace, 'conv_stack_%d_%d_%d' % (x,y,p), 83 base, size, 'C_WU', vtype = 'BUFFER', 84 x = x , y = y , pseg = 'RAM', 85 local = True, big = True ) 72 86 73 # heap vsegs : distributed but non local (a ll heap vsegs can be accessed by all tasks)87 # heap vsegs : distributed but non local (any heap can be accessed by any task) 74 88 for x in xrange (x_size): 75 89 for y in xrange (y_size): 76 90 cluster_id = (x * y_size) + y 77 size = heap_size 78 base = heap_base + (cluster_id * size) 79 mapping.addVseg( vspace, 'conv_heap_%d_%d' % (x,y), base, size, 80 'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM', 81 local = False, big = True ) 91 if ( mapping.clusters[cluster_id].procs ): 92 size = heap_size 93 base = heap_base + (cluster_id * size) 94 95 mapping.addVseg( vspace, 'conv_heap_%d_%d' % (x,y), base, size, 96 'C_WU', vtype = 'BUFFER', x = x , y = y , pseg = 'RAM', 97 local = False, big = True ) 82 98 83 99 # distributed tasks : one task per processor 84 100 for x in xrange (x_size): 85 101 for y in xrange (y_size): 86 for p in xrange( nprocs ): 87 trdid = (((x * y_size) + y) * nprocs) + p 88 mapping.addTask( vspace, 'conv_%d_%d_%d' % (x,y,p), trdid, x, y, p, 89 'conv_stack_%d_%d_%d' % (x,y,p), 90 'conv_heap_%d_%d' % (x,y), 0 ) 102 cluster_id = (x * y_size) + y 103 if ( mapping.clusters[cluster_id].procs ): 104 for p in xrange( nprocs ): 105 trdid = (((x * y_size) + y) * nprocs) + p 106 107 mapping.addTask( vspace, 'conv_%d_%d_%d' % (x,y,p), 108 trdid, x, y, p, 109 'conv_stack_%d_%d_%d' % (x,y,p), 110 'conv_heap_%d_%d' % (x,y), 0 ) 91 111 92 112 # extend mapping name … … 95 115 return vspace # useful for test 96 116 97 ################################ test ################################################ ######117 ################################ test ################################################ 98 118 99 119 if __name__ == '__main__': -
soft/giet_vm/applications/convol/main.c
r488 r502 1 /////////////////////////////////////////////////////////////////////////////////////// /////1 /////////////////////////////////////////////////////////////////////////////////////// 2 2 // File : main.c (for convol application) 3 3 // Date : june 2014 4 4 // author : Alain Greiner 5 /////////////////////////////////////////////////////////////////////////////////////// /////5 /////////////////////////////////////////////////////////////////////////////////////// 6 6 // This multi-threaded application application implements a 2D convolution product. 7 7 // The convolution kernel is [201]*[35] pixels, but it can be factored in two 8 8 // independant line and column convolution products. 9 9 // It can run on a multi-processors, multi-clusters architecture, with one thread 10 // per processor. It uses the he following hardware parameters, that must be defined 11 // in the hard_config.h file: 12 // - X_SIZE : number of clusters in a row 13 // - Y_SIZE : number of clusters in a column 14 // - NB_PROCS_MAX : number of processors per cluster 15 // - FBUF_X_SIZE : number of pixels per line in frame buffer 16 // - FBUF_Y_SIZE : number of lines in frame buffer 10 // per processor. 17 11 // 18 12 // The (1024 * 1024) pixels image is read from a file (2 bytes per pixel). 19 13 // 20 // - The number of clusters containing processors must be a power of 2. 21 // - The number of processors per cluster must be a power of 2. 22 //////////////////////////////////////////////////////////////////////////////////////////// 23 24 #include "hard_config.h" 14 // - number of clusters containing processors must be power of 2 no larger than 256. 15 // - number of processors per cluster must be power of 2 no larger than 8. 16 /////////////////////////////////////////////////////////////////////////////////////// 17 25 18 #include "stdio.h" 26 19 #include "stdlib.h" 27 #include " barrier.h"20 #include "user_barrier.h" 28 21 #include "malloc.h" 29 22 30 #define USE_S BT_BARRIER 123 #define USE_SQT_BARRIER 1 31 24 #define VERBOSE 0 32 25 #define SUPER_VERBOSE 0 33 26 27 #define X_SIZE_MAX 16 28 #define Y_SIZE_MAX 16 29 #define PROCS_MAX 8 30 #define CLUSTERS_MAX (X_SIZE_MAX * Y_SIZE_MAX) 31 34 32 #define INITIAL_DISPLAY_ENABLE 0 35 33 #define FINAL_DISPLAY_ENABLE 1 36 34 37 #define NB_CLUSTERS (X_SIZE * Y_SIZE)38 35 #define PIXEL_SIZE 2 39 36 #define NL 1024 … … 53 50 // global instrumentation counters (cluster_id, lpid] 54 51 55 unsigned int START[ NB_CLUSTERS][NB_PROCS_MAX];56 unsigned int H_BEG[ NB_CLUSTERS][NB_PROCS_MAX];57 unsigned int H_END[ NB_CLUSTERS][NB_PROCS_MAX];58 unsigned int V_BEG[ NB_CLUSTERS][NB_PROCS_MAX];59 unsigned int V_END[ NB_CLUSTERS][NB_PROCS_MAX];60 unsigned int D_BEG[ NB_CLUSTERS][NB_PROCS_MAX];61 unsigned int D_END[ NB_CLUSTERS][NB_PROCS_MAX];52 unsigned int START[CLUSTERS_MAX][PROCS_MAX]; 53 unsigned int H_BEG[CLUSTERS_MAX][PROCS_MAX]; 54 unsigned int H_END[CLUSTERS_MAX][PROCS_MAX]; 55 unsigned int V_BEG[CLUSTERS_MAX][PROCS_MAX]; 56 unsigned int V_END[CLUSTERS_MAX][PROCS_MAX]; 57 unsigned int D_BEG[CLUSTERS_MAX][PROCS_MAX]; 58 unsigned int D_END[CLUSTERS_MAX][PROCS_MAX]; 62 59 63 60 // global synchronization barrier 64 61 65 #if USE_S BT_BARRIER66 giet_s bt_barrier_t barrier;62 #if USE_SQT_BARRIER 63 giet_sqt_barrier_t barrier; 67 64 #else 68 65 giet_barrier_t barrier; … … 74 71 75 72 // global pointers on distributed buffers in all clusters 76 unsigned short * GA[ NB_CLUSTERS];77 int * GB[ NB_CLUSTERS];78 int * GC[ NB_CLUSTERS];79 int * GD[ NB_CLUSTERS];80 unsigned char * GZ[ NB_CLUSTERS];73 unsigned short * GA[CLUSTERS_MAX]; 74 int * GB[CLUSTERS_MAX]; 75 int * GC[CLUSTERS_MAX]; 76 int * GD[CLUSTERS_MAX]; 77 unsigned char * GZ[CLUSTERS_MAX]; 81 78 82 79 /////////////////////////////////////////// … … 109 106 int z; // vertical filter index for loops 110 107 108 // plat-form parameters 109 unsigned int x_size; // number of clusters in a row 110 unsigned int y_size; // number of clusters in a column 111 unsigned int nprocs; // number of processors per cluster 112 113 giet_procs_number( &x_size , &y_size , &nprocs ); 114 111 115 // processor identifiers 112 unsigned int x; 113 unsigned int y; 114 unsigned int lpid; 116 unsigned int x; // x coordinate 117 unsigned int y; // y coordinate 118 unsigned int lpid; // local proc/task id 115 119 giet_proc_xyp( &x, &y, &lpid ); 116 120 117 int file = 0; // file descriptor 118 unsigned int nprocs = NB_PROCS_MAX; // procs per cluster 119 unsigned int nclusters = NB_CLUSTERS; // number of clusters 120 unsigned int cluster_id = (x * Y_SIZE) + y; // continuous cluster index 121 unsigned int task_id = (cluster_id * nprocs) + lpid; // continuous task index 122 unsigned int ntasks = nclusters * nprocs; // number of tasks 123 unsigned int frame_size = FRAME_SIZE; // total size (bytes) 124 unsigned int nblocks = frame_size / 512; // number of blocks per frame 125 126 unsigned int lines_per_task = NL / ntasks; // lines per task 127 unsigned int lines_per_cluster = NL / nclusters; // lines per cluster 128 unsigned int pixels_per_task = NP / ntasks; // columns per task 129 unsigned int pixels_per_cluster = NP / nclusters; // columns per cluster 121 int file = 0; // file descriptor 122 unsigned int nclusters = x_size * y_size; // number of clusters 123 unsigned int cluster_id = (x * y_size) + y; // continuous cluster index 124 unsigned int task_id = (cluster_id * nprocs) + lpid; // continuous task index 125 unsigned int ntasks = nclusters * nprocs; // number of tasks 126 unsigned int frame_size = FRAME_SIZE; // total size (bytes) 127 unsigned int nblocks = frame_size / 512; // number of blocks/frame 128 129 unsigned int lines_per_task = NL / ntasks; // lines per task 130 unsigned int lines_per_cluster = NL / nclusters; // lines per cluster 131 unsigned int pixels_per_task = NP / ntasks; // columns per task 132 unsigned int pixels_per_cluster = NP / nclusters; // columns per cluster 130 133 131 134 int first, last; … … 140 143 // parameters checking 141 144 142 if ( (NP != FBUF_X_SIZE) || (NL != FBUF_Y_SIZE) ) 143 { 144 giet_exit("[TRANSPOSE ERROR] Frame buffer size does not fit image size"); 145 } 146 if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4)) 147 giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2 or 4\n"); 148 149 if ((X_SIZE!=1) && (X_SIZE!=2) && (X_SIZE!=4) && (X_SIZE!=8) && (X_SIZE!=16)) 150 giet_exit( "[CONVOL ERROR] X_SIZE must be 1, 2, 4, 8, 16\n"); 145 if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4) && (nprocs != 8)) 146 giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2, 4 or 8\n"); 147 148 if ((x_size!=1) && (x_size!=2) && (x_size!=4) && (x_size!=8) && (x_size!=16)) 149 giet_exit( "[CONVOL ERROR] x_size must be 1, 2, 4, 8, 16\n"); 151 150 152 if (( Y_SIZE!=1) && (Y_SIZE!=2) && (Y_SIZE!=4) && (Y_SIZE!=8) && (Y_SIZE!=16))153 giet_exit( "[CONVOL ERROR] Y_SIZEmust be 1, 2, 4, 8, 16\n");151 if ((y_size!=1) && (y_size!=2) && (y_size!=4) && (y_size!=8) && (y_size!=16)) 152 giet_exit( "[CONVOL ERROR] y_size must be 1, 2, 4, 8, 16\n"); 154 153 155 154 if ( NL % nclusters != 0 ) 156 giet_exit( "[CONVOL ERROR] NB_CLUSTERSmust be a divider of NL");155 giet_exit( "[CONVOL ERROR] CLUSTERS_MAX must be a divider of NL"); 157 156 158 157 if ( NP % nclusters != 0 ) 159 giet_exit( "[CONVOL ERROR] NB_CLUSTERSmust be a divider of NP");158 giet_exit( "[CONVOL ERROR] CLUSTERS_MAX must be a divider of NP"); 160 159 161 160 … … 166 165 if ( (x==0) && (y==0) && (lpid==0) ) 167 166 { 168 // parameters checking169 if ( (NP != FBUF_X_SIZE) || (NL != FBUF_Y_SIZE) )170 giet_exit("[TRANSPOSE ERROR] Frame buffer size does not fit image size");171 172 if ((nprocs != 1) && (nprocs != 2) && (nprocs != 4))173 giet_exit( "[CONVOL ERROR] NB_PROCS_MAX must be 1, 2 or 4\n");174 175 if ((X_SIZE!=1) && (X_SIZE!=2) && (X_SIZE!=4) && (X_SIZE!=8) && (X_SIZE!=16))176 giet_exit( "[CONVOL ERROR] X_SIZE must be 1, 2, 4, 8, 16\n");177 178 if ((Y_SIZE!=1) && (Y_SIZE!=2) && (Y_SIZE!=4) && (Y_SIZE!=8) && (Y_SIZE!=16))179 giet_exit( "[CONVOL ERROR] Y_SIZE must be 1, 2, 4, 8, 16\n");180 181 if ( NL % nclusters != 0 )182 giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NL");183 184 if ( NP % nclusters != 0 )185 giet_exit( "[CONVOL ERROR] NB_CLUSTERS must be a divider of NP");186 187 188 167 giet_shr_printf("\n[CONVOL] task[0,0,0] starts barrier init at cycle %d\n" 189 "- NB_CLUSTERS= %d\n"190 "- NB_PROCS_MAX= %d\n"191 "- NB_TASKS= %d\n"192 "- NB_BLOCKS = %x\n",168 "- CLUSTERS = %d\n" 169 "- PROCS = %d\n" 170 "- TASKS = %d\n" 171 "- BLOCKS = %d\n", 193 172 giet_proctime(), nclusters, nprocs, ntasks, nblocks ); 194 #if USE_S BT_BARRIER195 s bt_barrier_init( &barrier, nclusters, nprocs );173 #if USE_SQT_BARRIER 174 sqt_barrier_init( &barrier, x_size , y_size , nprocs ); 196 175 #else 197 176 barrier_init( &barrier, ntasks ); … … 216 195 217 196 #if VERBOSE 218 giet_shr_printf( "\n[CONVOL] task[%d,%d,%d] enters malloc at cycle %d\n", x,y,lpid, date ); 197 giet_shr_printf( "\n[CONVOL] task[%d,%d,%d] enters malloc at cycle %d\n", 198 x,y,lpid, date ); 219 199 #endif 220 200 … … 242 222 243 223 /////////////////////////////// 244 #if USE_S BT_BARRIER245 s bt_barrier_wait( &barrier );224 #if USE_SQT_BARRIER 225 sqt_barrier_wait( &barrier ); 246 226 #else 247 227 barrier_wait( &barrier ); … … 253 233 /////////////////////////////////////////////////////////////////// 254 234 255 unsigned short * A[ NB_CLUSTERS];256 int * B[NB_CLUSTERS];257 int * C[NB_CLUSTERS];258 int * D[NB_CLUSTERS];259 unsigned char * Z[NB_CLUSTERS];235 unsigned short * A[CLUSTERS_MAX]; 236 int * B[CLUSTERS_MAX]; 237 int * C[CLUSTERS_MAX]; 238 int * D[CLUSTERS_MAX]; 239 unsigned char * Z[CLUSTERS_MAX]; 260 240 261 241 for (c = 0; c < nclusters; c++) … … 283 263 " at cycle %d\n", giet_proctime() ); 284 264 285 for ( c = 0 ; c < NB_CLUSTERS; c++ )265 for ( c = 0 ; c < nclusters ; c++ ) 286 266 { 287 267 giet_shr_printf( "\n[CONVOL] task[0,0,0] starts load " … … 341 321 342 322 //////////////////////////// 343 #if USE_S BT_BARRIER344 s bt_barrier_wait( &barrier );323 #if USE_SQT_BARRIER 324 sqt_barrier_wait( &barrier ); 345 325 #else 346 326 barrier_wait( &barrier ); … … 447 427 448 428 ///////////////////////////// 449 #if USE_S BT_BARRIER450 s bt_barrier_wait( &barrier );429 #if USE_SQT_BARRIER 430 sqt_barrier_wait( &barrier ); 451 431 #else 452 432 barrier_wait( &barrier ); … … 567 547 568 548 //////////////////////////// 569 #if USE_S BT_BARRIER570 s bt_barrier_wait( &barrier );549 #if USE_SQT_BARRIER 550 sqt_barrier_wait( &barrier ); 571 551 #else 572 552 barrier_wait( &barrier ); … … 626 606 627 607 ////////////////////////////// 628 #if USE_S BT_BARRIER629 s bt_barrier_wait( &barrier );608 #if USE_SQT_BARRIER 609 sqt_barrier_wait( &barrier ); 630 610 #else 631 611 barrier_wait( &barrier );
Note: See TracChangeset
for help on using the changeset viewer.