Changeset 657 for trunk/user
- Timestamp:
- Mar 18, 2020, 11:16:59 PM (5 years ago)
- Location:
- trunk/user
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/user/display/display.c
r656 r657 15 15 #include <almosmkh.h> 16 16 17 #define FILENAME "misc/images_128.raw" 17 #define PATH_MAX_LENGHT 128 18 18 19 #define FBF_TYPE 420 19 20 #define NPIXELS 128 … … 56 57 } 57 58 } 59 58 60 //////////////// 59 61 int main( void ) … … 64 66 unsigned int fbf_type; 65 67 unsigned int nbytes; 66 unsigned int image = 0;67 68 int val; 68 69 int error; 69 70 unsigned long long start_cycle; 71 72 // get start cycle 73 get_cycle( &start_cycle ); 74 75 printf("\n[display] starts at cycle %d\n", (unsigned int)start_cycle ); 70 char pathname[PATH_MAX_LENGHT]; 76 71 77 72 // check frame buffer size … … 92 87 } 93 88 89 // get pathname for input file 90 while( 1 ) 91 { 92 printf("\n[display] path = "); 93 94 error = get_string( pathname , PATH_MAX_LENGHT ); 95 96 if ( error ) printf("\n[display error] cannot get path for input file\n" ); 97 else break; 98 } 99 94 100 // open file 101 int fd = open( pathname , O_RDONLY , 0 ); 95 102 96 FILE * f = fopen( FILENAME , NULL ); 97 // int fd = open( FILENAME, O_RDONLY , 0 ); 98 99 if( f == NULL ) 103 if( fd < 0 ) 100 104 { 101 printf("\n[display error] Cannot open file <%s>\n", FILENAME);105 printf("\n[display error] Cannot open file <%s>\n", pathname ); 102 106 exit( 0 ); 103 107 } 104 108 105 printf("\n[display] open file <%s>\n", FILENAME);109 printf("\n[display] open file <%s>\n", pathname ); 106 110 107 // loop on images 108 while ( 1 ) 111 // load buffer from file 112 nbytes = read( fd , buffer , NPIXELS * NLINES ); 113 114 if( nbytes != NPIXELS * NLINES ) 109 115 { 110 // load image from file 111 nbytes = fread( buffer , 1 , NPIXELS * NLINES , f ); 112 // nbytes = read( fd , buffer , NPIXELS * NLINES ); 113 if( nbytes != NPIXELS * NLINES ) 114 { 115 printf("\n[display error] Cannot load image %d\n", image ); 116 exit( 0 ); 117 } 118 119 printf("\n[display] load image %d in buffer %x\n", image, buffer ); 120 121 // display image 122 error = fbf_write( buffer , NPIXELS * NLINES , 0 ); 123 124 if( error ) 125 { 126 printf("\n[display error] Cannot access frame buffer\n"); 127 exit( 0 ); 128 } 129 130 printf("\n[display] display image %d\n", image ); 131 132 image++; 133 134 // interactive behaviour 135 val = getchar(); 116 printf("\n[display error] Cannot load image \n" ); 117 exit( 0 ); 136 118 } 137 119 120 printf("\n[display] load image in buffer %x\n", buffer ); 121 122 // display image 123 error = fbf_write( buffer , NPIXELS * NLINES , 0 ); 124 125 if( error ) 126 { 127 printf("\n[display error] Cannot access frame buffer\n"); 128 exit( 0 ); 129 } 130 131 printf("\n[display] display image\n"); 132 138 133 // close file 139 fclose( f ); 140 // close( fd ); 134 close( fd ); 141 135 142 136 exit(0); -
trunk/user/ksh/ksh.c
r656 r657 413 413 " display barrier pid\n" 414 414 " display mapper path page nbytes\n" 415 " display fat page entries\n"416 " display fat cxy 0\n" );415 " display fat min nslots\n" 416 " display fat cxy 0\n" ); 417 417 } 418 418 //////////////////////////////////// … … 581 581 if( argc != 4 ) 582 582 { 583 printf(" usage: display fat page_id nb_entries\n");583 printf(" usage: display fat min_slot nb_slots\n"); 584 584 } 585 585 else 586 586 { 587 unsigned int page_id= atoi(argv[2]);588 unsigned int nb_ entries = atoi(argv[3]);589 590 if( display_fat( page_id, nb_entries ) )587 unsigned int min_slot = atoi(argv[2]); 588 unsigned int nb_slots = atoi(argv[3]); 589 590 if( display_fat( min_slot, nb_slots ) ) 591 591 { 592 printf(" error: cannot display page %d of fat\n", page_id);592 printf(" error: cannot display fat\n"); 593 593 } 594 594 } … … 1234 1234 else 1235 1235 { 1236 strcpy( cmd , "load bin/user/ kleenex.elf" );1236 strcpy( cmd , "load bin/user/transpose.elf" ); 1237 1237 printf("[ksh] %s\n", cmd ); 1238 1238 execute( cmd ); -
trunk/user/pgcd/pgcd.c
r637 r657 18 18 void main( void ) 19 19 { 20 intopx;21 intopy;22 intx;23 inty;20 unsigned int opx; 21 unsigned int opy; 22 unsigned int x; 23 unsigned int y; 24 24 unsigned long long cycle; 25 25 unsigned int cxy; … … 34 34 // get operand X 35 35 printf("operand X = "); 36 opx = get_uint32();36 get_uint32( &opx ); 37 37 printf("\n"); 38 38 39 39 // get operand Y 40 40 printf("operand Y = "); 41 opy = get_uint32();41 get_uint32( &opy ); 42 42 printf("\n"); 43 43 -
trunk/user/transpose/transpose.c
r656 r657 15 15 // A core is identified by two indexes [cxy,lid] : cxy is the cluster identifier, 16 16 // (that is NOT required to be a continuous index), and lid is the local core index, 17 // (that must be in the [ Ø,NCORES-1] range).17 // (that must be in the [0,NCORES-1] range). 18 18 // 19 19 // The main() function can run on any core in any cluster. This main thread 20 // makes the initialisations, uses the pthread_create() syscall to launch (NTHREADS-1) 21 // other threads in "attached" mode running in parallel the execute() function, calls 22 // himself the execute() function, wait completion of the (NTHREADS-1) other threads 23 // with a pthread_join(), and finally calls the instrument() function to display 24 // and register the instrumentation results when execution is completed. 25 // All threads run the execute() function, but each thread transposes only 26 // (NLINES / NTHREADS) lines. This requires that NLINES == k * NTHREADS. 20 // makes the initialisations, load the input file to the "image_in" buffer, 21 // launches the working threads, calls the instrument() function when all working 22 // threads complete, and saves the result "image_out" buffer to the output file. 27 23 // 28 // The number Nof working threads is always defined by the number of cores availables24 // The number of working threads is always defined by the number of cores availables 29 25 // in the architecture, but this application supports three placement modes. 30 26 // In all modes, the working threads are identified by the [tid] continuous index … … 51 47 // per core, and the same relation between the thread[tid] and the core[cxy][lpid]. 52 48 // 53 // The buf_in[x,y] and buf_out[put buffers containing the direct and transposed images 54 // are distributed in clusters: each thread[cid][0] allocate a local input buffer 55 // and load in this buffer all lines that must be handled by the threads sharing the 56 // same cid, from the mapper of the input image file. 57 // In the execute function, all threads in the group defined by the cid index read pixels 58 // from the local buf_in[cid] buffer, and write pixels to all remote buf_out[cid] buffers. 59 // Finally, each thread displays a part of the transposed image to the frame buffer. 49 // Each working thread[cid][lid] run the "execute" function, that uses the "buf_in" and 50 // "buf_out" local buffers, containing the direct and transposed images: 51 // Each thread[cid][0] allocates two buf_in[cid] and buf_out[cid] buffers, load from 52 // "image_in" to buf_in[cid] all lines that must be handled by the threads sharing the 53 // same cid, and finally save from buf_out[cid] to "image_out" all lines that have been 54 // transposed to buf_out[cid]. 55 // Each thread[cid][lid] in the group defined by the cid index read pixels from the 56 // local buf_in[cid] buffer, and write pixels to all remote // buf_out[cid] buffers. 60 57 // 61 58 // - The image must fit the frame buffer size, that must be power of 2. … … 82 79 #define THREADS_MAX (X_MAX * Y_MAX * CORES_MAX) // max number of threads 83 80 84 #define IMAGE_SIZE 512 // image size85 81 #define IMAGE_TYPE 420 // pixel encoding type 86 #define INPUT_FILE_PATH "/misc/couple_512.raw" // input file pathname 87 #define OUTPUT_FILE_PATH "/misc/transposed_512.raw" // output file pathname 82 83 //#define IMAGE_SIZE 128 // image size 84 //#define INPUT_FILE_PATH "/misc/images_128.raw" // input file pathname 85 //#define OUTPUT_FILE_PATH "/misc/transposed_128.raw" // output file pathname 86 87 //#define IMAGE_SIZE 256 // image size 88 //#define INPUT_FILE_PATH "/misc/lena_256.raw" // input file pathname 89 #//define OUTPUT_FILE_PATH "/misc/transposed_256.raw" // output file pathname 90 91 //#define IMAGE_SIZE 512 // image size 92 //#define INPUT_FILE_PATH "/misc/couple_512.raw" // input file pathname 93 //#define OUTPUT_FILE_PATH "/misc/transposed_512.raw" // output file pathname 94 95 #define IMAGE_SIZE 1024 // image size 96 #define INPUT_FILE_PATH "/misc/philips_1024.raw" // input file pathname 97 #define OUTPUT_FILE_PATH "/misc/transposed_1024.raw" // output file pathname 88 98 89 99 #define SAVE_RESULT_FILE 0 // save result image on disk 90 #define USE_DQT_BARRIER 1// quad-tree barrier if non zero100 #define USE_DQT_BARRIER 0 // quad-tree barrier if non zero 91 101 92 102 #define NO_PLACEMENT 0 // uncontrolefdthread placement 93 #define EXPLICIT_PLACEMENT 0// explicit threads placement94 #define PARALLEL_PLACEMENT 1// parallel threads placement95 96 #define VERBOSE_MAIN 0// main function print comments97 #define VERBOSE_ EXEC 0 // execfunction print comments98 #define VERBOSE_ INSTRU 0 // instrufunction print comments103 #define EXPLICIT_PLACEMENT 1 // explicit threads placement 104 #define PARALLEL_PLACEMENT 0 // parallel threads placement 105 106 #define VERBOSE_MAIN 1 // main function print comments 107 #define VERBOSE_MAIN_DETAILED 0 // main function print comments 108 #define VERBOSE_EXEC 1 // exec function print comments 99 109 100 110 … … 109 119 // instrumentation counters for each thread in each cluster 110 120 // indexed by [cid][lid] : cluster continuous index / thread local index 121 unsigned int ALOC_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 122 unsigned int ALOC_END [CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 111 123 unsigned int LOAD_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 112 124 unsigned int LOAD_END [CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 113 125 unsigned int TRSP_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 114 126 unsigned int TRSP_END [CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 115 unsigned int DISP_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 116 unsigned int DISP_END [CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 117 118 // pointer on buffer containing the input image, maped by the main to the input file 119 unsigned char * image_in; 120 121 // pointer on buffer containing the output image, maped by the main to the output file 122 unsigned char * image_out; 123 124 // arrays of pointers on distributed buffers indexed by [cid] : cluster continuous index 125 unsigned char * buf_in_ptr [CLUSTERS_MAX]; 126 unsigned char * buf_out_ptr[CLUSTERS_MAX]; 127 unsigned int SAVE_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 128 unsigned int SAVE_END [CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 129 unsigned int FREE_START[CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 130 unsigned int FREE_END [CLUSTERS_MAX][CORES_MAX] = {{ 0 }}; 131 132 // buffer containing the input image, loaded by the main from input file 133 unsigned char image_in[IMAGE_SIZE * IMAGE_SIZE]; 134 135 // buffer containing the output image, saved by the main to output file 136 unsigned char image_out[IMAGE_SIZE * IMAGE_SIZE]; 137 138 // arrays of pointers on distributed buffers indexed by [cid] 139 unsigned char * buf_in [CLUSTERS_MAX]; 140 unsigned char * buf_out[CLUSTERS_MAX]; 141 142 // pointer and identifier for dynamically allocated FBF window 143 void * win_buf; 144 int wid; 127 145 128 146 // synchronisation barrier (all working threads) … … 205 223 } 206 224 207 // main threadget identifiers for core executing main225 // get identifiers for core executing main 208 226 unsigned int cxy_main; 209 227 unsigned int lid_main; … … 214 232 unsigned int nthreads = nclusters * ncores; 215 233 216 // main thread get FBF size and type 234 if( nthreads > IMAGE_SIZE ) 235 { 236 printf("\n[transpose error] number of threads larger than number of lines\n"); 237 exit( 0 ); 238 } 239 240 // get FBF size and type 217 241 unsigned int fbf_width; 218 242 unsigned int fbf_height; … … 220 244 fbf_get_config( &fbf_width , &fbf_height , &fbf_type ); 221 245 222 if( (fbf_width != IMAGE_SIZE) || (fbf_height !=IMAGE_SIZE) || (fbf_type != IMAGE_TYPE) )246 if( (fbf_width < IMAGE_SIZE) || (fbf_height < IMAGE_SIZE) || (fbf_type != IMAGE_TYPE) ) 223 247 { 224 248 printf("\n[transpose error] image does not fit FBF size or type\n"); … … 226 250 } 227 251 228 if( nthreads > IMAGE_SIZE ) 229 { 230 printf("\n[transpose error] number of threads larger than number of lines\n"); 231 exit( 0 ); 232 } 233 234 unsigned int npixels = IMAGE_SIZE * IMAGE_SIZE; 252 // define total number of pixels 253 int npixels = IMAGE_SIZE * IMAGE_SIZE; 235 254 236 255 // define instrumentation file name 237 256 if( NO_PLACEMENT ) 238 257 { 239 printf("\n[transpose] %d cluster(s) / %d core(s) / FBF[%d*%d]/ PID %x / NO_PLACE\n",240 nclusters, ncores, fbf_width, fbf_height, getpid() );258 printf("\n[transpose] %d cluster(s) / %d core(s) / <%s> / PID %x / NO_PLACE\n", 259 nclusters, ncores, INPUT_FILE_PATH , getpid() ); 241 260 242 261 // build instrumentation file name … … 251 270 if( EXPLICIT_PLACEMENT ) 252 271 { 253 printf("\n[transpose] %d cluster(s) / %d core(s) / FBF[%d*%d]/ PID %x / EXPLICIT\n",254 nclusters, ncores, fbf_width, fbf_height, getpid() );272 printf("\n[transpose] %d cluster(s) / %d core(s) / <%s> / PID %x / EXPLICIT\n", 273 nclusters, ncores, INPUT_FILE_PATH , getpid() ); 255 274 256 275 // build instrumentation file name … … 265 284 if( PARALLEL_PLACEMENT ) 266 285 { 267 printf("\n[transpose] %d cluster(s) / %d core(s) / FBF[%d*%d]/ PID %x / PARALLEL\n",268 nclusters, ncores, fbf_width, fbf_height, getpid() );286 printf("\n[transpose] %d cluster(s) / %d core(s) / <%s> / PID %x / PARALLEL\n", 287 nclusters, ncores, INPUT_FILE_PATH , getpid() ); 269 288 270 289 // build instrumentation file name … … 277 296 } 278 297 298 // open a window in FBF 299 wid = fbf_create_window( 0, // l_zero 300 0, // p_zero 301 IMAGE_SIZE, // lines 302 IMAGE_SIZE, // pixels 303 &win_buf ); 304 if( wid < 0) 305 { 306 printf("\n[transpose error] cannot open FBF window\n"); 307 exit( 0 ); 308 } 309 310 #if VERBOSE_MAIN 311 printf("\n[transpose] main on core[%x,%d] created FBF window %d / buffer %x\n", 312 cxy_main, lid_main, wid , win_buf ); 313 #endif 314 279 315 // open instrumentation file 280 316 snprintf( pathname , 64 , "/home/%s", filename ); 281 317 FILE * f = fopen( pathname , NULL ); 318 282 319 if ( f == NULL ) 283 320 { 284 printf("\n[transpose error] cannot open instru mentationfile %s\n", pathname );321 printf("\n[transpose error] cannot open instru file %s\n", pathname ); 285 322 exit( 0 ); 286 323 } … … 312 349 313 350 #if VERBOSE_MAIN 314 printf("\n[transpose] main on core[%x,%d] complete sbarrier initialisation\n",351 printf("\n[transpose] main on core[%x,%d] completed barrier initialisation\n", 315 352 cxy_main, lid_main ); 316 353 #endif 317 354 318 // main threadopen input file355 // open input file 319 356 int fd_in = open( INPUT_FILE_PATH , O_RDONLY , 0 ); 320 357 … … 329 366 #endif 330 367 331 // main thread map image_in buffer to input image file 332 image_in = (unsigned char *)mmap( NULL, 333 npixels, 334 PROT_READ, 335 MAP_FILE | MAP_SHARED, 336 fd_in, 337 0 ); // offset 338 if ( image_in == NULL ) 339 { 340 printf("\n[transpose error] main cannot map buffer to file %s\n", INPUT_FILE_PATH ); 341 exit( 0 ); 342 } 343 344 #if VERBOSE_MAIN 345 printf("\n[transpose] main map buffer to file <%s>\n", INPUT_FILE_PATH ); 346 #endif 347 348 // main thread display input image on FBF 349 if( fbf_write( image_in, 350 npixels, 351 0 ) ) 352 { 353 printf("\n[transpose error] main cannot access FBF\n"); 354 exit( 0 ); 355 } 356 357 #if SAVE_RESULT_IMAGE 358 359 // main thread open output file 368 // open output file 360 369 int fd_out = open( OUTPUT_FILE_PATH , O_CREAT , 0 ); 361 370 … … 366 375 } 367 376 377 // move input image to input buffer 378 if( read( fd_in , image_in , npixels ) != npixels ) 379 { 380 printf("\n[transpose error] main cannot read input image\n"); 381 exit( 0 ); 382 } 383 368 384 #if VERBOSE_MAIN 369 printf("\n[transpose] main open file <%s> / fd = %d\n", OUTPUT_FILE_PATH , fd_out ); 370 #endif 371 372 // main thread map image_out buffer to output image file 373 image_out = (unsigned char *)mmap( NULL, 374 npixels, 375 PROT_WRITE, 376 MAP_FILE | MAP_SHARED, 377 fd_out, 378 0 ); // offset 379 if ( image_out == NULL ) 380 { 381 printf("\n[transpose error] main cannot map buf_out to file %s\n", OUTPUT_FILE_PATH ); 382 exit( 0 ); 383 } 384 385 #if VERBOSE_MAIN 386 printf("\n[transpose] main map buffer to file <%s>\n", OUTPUT_FILE_PATH ); 387 #endif 388 389 #endif // SAVE_RESULT_IMAGE 385 printf("\n[transpose] main moved file <%s> to buf_in\n", INPUT_FILE_PATH ); 386 #endif 390 387 391 388 ///////////////////////////////////////////////////////////////////////////////////// … … 417 414 } 418 415 419 #if VERBOSE_MAIN 416 #if VERBOSE_MAIN_DETAILED 420 417 printf("\n[transpose] main created thread %d\n", tid ); 421 418 #endif … … 450 447 } 451 448 452 #if VERBOSE_MAIN 453 printf("\n[transpose] main successfullyjoined thread %x\n", tid );449 #if VERBOSE_MAIN_DETAILED 450 printf("\n[transpose] main joined thread %x\n", tid ); 454 451 #endif 455 452 … … 500 497 exit( 0 ); 501 498 } 502 #if VERBOSE_MAIN 499 500 #if VERBOSE_MAIN_DETAILED 503 501 printf("\n[transpose] main created thread[%d] on core[%x,%d]\n", tid, cxy, l ); 504 502 #endif … … 536 534 exit( 0 ); 537 535 } 538 #if VERBOSE_MAIN 539 printf("\n[transpose] main joined thread %d on core[%x,%d]\n", tid , cxy , l ); 536 537 #if VERBOSE_MAIN_DETAILED 538 printf("\n[transpose] main joined thread %d\n", tid ); 540 539 #endif 541 540 } … … 567 566 ///////////////////////////////////////////////////////////////////////////// 568 567 569 // main threadregister instrumentation results568 // register instrumentation results 570 569 instrument( f , filename ); 571 570 572 // main thread close input file 571 #if VERBOSE_MAIN 572 printf("\n[transpose] main completed instrumentation\n"); 573 #endif 574 575 /* 576 printf("\n> "); 577 getchar(); 578 579 // move window 580 if( fbf_move_window( wid , 100 , 100 ) ) 581 { 582 printf("\n[transpose error] main cannot move FBF window\n"); 583 exit( 0 ); 584 } 585 586 printf("\n> "); 587 getchar(); 588 */ 589 // save image_out to output file 590 if( write( fd_out , image_out , npixels ) != npixels ) 591 { 592 printf("\n[transpose error] main cannot write output image\n"); 593 exit( 0 ); 594 } 595 596 #if VERBOSE_MAIN 597 printf("\n[transpose] main saved buf_out to output file\n"); 598 #endif 599 600 // close input file 573 601 close( fd_in ); 574 602 575 #if SAVE_RESULT_IMAGE 576 577 // main thread close output file 603 #if VERBOSE_MAIN 604 printf("\n[transpose] main closed input file\n"); 605 #endif 606 607 // close output file 578 608 close( fd_out ); 579 609 580 #endif 581 582 // main close instrumentation file 610 #if VERBOSE_MAIN 611 printf("\n[transpose] main closed output file\n"); 612 #endif 613 614 // close instrumentation file 583 615 fclose( f ); 616 617 #if VERBOSE_MAIN 618 printf("\n[transpose] main closed instrumentation file\n"); 619 #endif 620 621 // delete FBF window 622 if( fbf_delete_window( wid ) ) 623 { 624 printf("\n[transpose error] main cannot delete FBF window\n"); 625 exit( 0 ); 626 } 584 627 585 628 // main thread suicide … … 597 640 { 598 641 unsigned long long date; 642 unsigned int l; // line index for loop 643 unsigned int p; // pixel index for loop 644 int error; 645 646 unsigned char * wbuf = win_buf; 599 647 600 unsigned int l; // line index for loop601 unsigned int p; // pixel index for loop602 603 648 pthread_parallel_work_args_t * args = (pthread_parallel_work_args_t *)arguments; 604 649 … … 613 658 // get thread abstract identifiers 614 659 unsigned int tid = args->tid; 615 unsigned int cid = tid / ncores; 616 unsigned int lid = tid % ncores; 660 unsigned int cid = tid / ncores; // abstract cluster index 661 unsigned int lid = tid % ncores; // local thread index 617 662 618 663 #if VERBOSE_EXEC … … 620 665 unsigned int lpid; 621 666 get_core_id( &cxy , &lpid ); // get core physical identifiers 667 #endif 668 669 #if VERBOSE_EXEC 622 670 printf("\n[transpose] exec[%d] on core[%x,%d] enters parallel exec\n", 623 671 tid , cxy , lpid ); … … 625 673 626 674 get_cycle( &date ); 627 LOAD_START[cid][lid] = (unsigned int)date;628 629 // buildtotal number of pixels per image675 ALOC_START[cid][lid] = (unsigned int)date; 676 677 // compute total number of pixels per image 630 678 unsigned int npixels = IMAGE_SIZE * IMAGE_SIZE; 631 679 632 // buildtotal number of threads and clusters680 // compute total number of threads and clusters 633 681 unsigned int nclusters = x_size * y_size; 634 682 unsigned int nthreads = nclusters * ncores; 635 683 636 unsigned int buf_size = npixels / nclusters; // number of bytes in buf_in & buf_out 637 unsigned int offset = cid * buf_size; // offset in file (bytes) 638 639 unsigned char * buf_in = NULL; // private pointer on local input buffer 640 unsigned char * buf_out = NULL; // private pointer on local output buffer 641 642 // Each thread[cid,0] allocate a local buffer buf_in, and register 643 // the base adress in the global variable buf_in_ptr[cid] 644 // this local buffer is shared by all threads with the same cid 684 // compute number of pixels per cid & per thread 685 unsigned int pixels_per_cid = npixels / nclusters; 686 unsigned int pixels_per_lid = pixels_per_cid / ncores; 687 688 // compute first and last line per thread 689 unsigned int lines_per_cid = pixels_per_cid / IMAGE_SIZE; 690 unsigned int lines_per_lid = pixels_per_lid / IMAGE_SIZE; 691 692 unsigned int line_first = (cid * lines_per_cid) + (lid * lines_per_lid); 693 unsigned int line_last = line_first + lines_per_lid; 694 695 // Each thread[cid,0] allocates two local buffers, and register the base 696 // adresses in the global variable buf_in_ptr[cid] & buf_out_ptr[cid]. 697 645 698 if( lid == 0 ) 646 699 { 647 700 // allocate buf_in 648 buf_in = (unsigned char *)malloc( buf_size);649 650 if( buf_in == NULL )701 buf_in[cid] = (unsigned char *)malloc( pixels_per_cid ); 702 703 if( buf_in[cid] == NULL ) 651 704 { 652 705 printf("\n[transpose error] thread[%d] cannot allocate buf_in\n", tid ); … … 654 707 } 655 708 656 // register buf_in buffer in global array of pointers657 buf_in_ptr[cid] = buf_in;658 659 709 #if VERBOSE_EXEC 660 710 printf("\n[transpose] exec[%d] on core[%x,%d] allocated buf_in = %x\n", … … 662 712 #endif 663 713 664 }665 666 // Each thread[cid,0] copy relevant part of the image_in to buf_in667 if( lid == 0 )668 {669 memcpy( buf_in,670 image_in + offset,671 buf_size );672 }673 674 #if VERBOSE_EXEC675 printf("\n[transpose] exec[%d] on core[%x,%d] loaded buf_in[%d]\n",676 tid , cxy , lpid , cid );677 #endif678 679 // Each thread[cid,0] allocate a local buffer buf_out, and register680 // the base adress in the global variable buf_out_ptr[cid]681 if( lid == 0 )682 {683 714 // allocate buf_out 684 buf_out = (unsigned char *)malloc( buf_size);685 686 if( buf_out == NULL )715 buf_out[cid] = (unsigned char *)malloc( pixels_per_cid ); 716 717 if( buf_out[cid] == NULL ) 687 718 { 688 719 printf("\n[transpose error] thread[%d] cannot allocate buf_in\n", tid ); … … 690 721 } 691 722 692 // register buf_in buffer in global array of pointers693 buf_out_ptr[cid] = buf_out;694 695 723 #if VERBOSE_EXEC 696 724 printf("\n[transpose] exec[%d] on core[%x,%d] allocated buf_out = %x\n", … … 699 727 700 728 } 701 729 730 get_cycle( &date ); 731 ALOC_END[cid][lid] = (unsigned int)date; 732 733 ///////////////////////////////// 734 pthread_barrier_wait( &barrier ); 735 ///////////////////////////////// 736 737 get_cycle( &date ); 738 LOAD_START[cid][lid] = (unsigned int)date; 739 740 // all threads copy relevant part of the image_in to buf_in[cid] 741 memcpy( buf_in[cid] + (lid * pixels_per_lid), 742 image_in + (cid * pixels_per_cid) + (lid * pixels_per_lid), 743 pixels_per_lid ); 744 745 #if VERBOSE_EXEC 746 printf("\n[transpose] exec[%d] on core[%x,%d] loaded buf_in[%d]\n", 747 tid , cxy , lpid , cid ); 748 #endif 749 750 // all local threads copy part of buf_in[cid] to FBF window for display 751 memcpy( wbuf + (cid * pixels_per_cid) + (lid * pixels_per_lid), 752 buf_in[cid] + (lid * pixels_per_lid), 753 pixels_per_lid ); 754 755 #if VERBOSE_EXEC 756 printf("\n[transpose] exec[%d] on core[%x,%d] loaded buf_in to FBF (first %d / last %d)\n", 757 tid , cxy , lpid , line_first , line_last ); 758 #endif 759 760 // retresh window 761 error = fbf_refresh_window( wid , line_first , line_last ); 762 763 if( error ) 764 { 765 printf("\n[transpose error] exec[%d] cannot refresh FBF window\n", tid ); 766 exit( 0 ); 767 } 768 702 769 get_cycle( &date ); 703 770 LOAD_END[cid][lid] = (unsigned int)date; … … 705 772 ///////////////////////////////// 706 773 pthread_barrier_wait( &barrier ); 774 ///////////////////////////////// 707 775 708 776 get_cycle( &date ); 709 777 TRSP_START[cid][lid] = (unsigned int)date; 710 778 711 // All threads contribute to parallel transpose from buf_in to buf_out 779 // All threads contribute to parallel transpose from buf_in to buf_out: 712 780 // each thread makes the transposition for nlt lines (nlt = npixels/nthreads) 713 781 // from line [tid*nlt] to line [(tid + 1)*nlt - 1] 714 782 // (p,l) are the absolute pixel coordinates in the source image 715 // (l,p) are the absolute pixel coordinates in the source image 716 // (p,l) are the absolute pixel coordinates in the dest image 717 718 get_cycle( &date ); 719 TRSP_START[cid][lid] = (unsigned int)date; 783 // (l,p) are the absolute pixel coordinates in the dest image 720 784 721 785 unsigned int nlt = IMAGE_SIZE / nthreads; // number of lines per thread … … 729 793 unsigned char byte; 730 794 731 unsigned int first = tid * nlt; // first line index for a given thread795 unsigned int first = tid * nlt; // first line index for a given thread 732 796 unsigned int last = first + nlt; // last line index for a given thread 733 797 … … 742 806 src_index = (l % nlc) * IMAGE_SIZE + p; 743 807 744 byte = buf_in_ptr[src_cid][src_index];808 byte = buf_in[src_cid][src_index]; 745 809 746 810 // write one byte to remote buf_out … … 748 812 dst_index = (p % nlc) * IMAGE_SIZE + l; 749 813 750 buf_out _ptr[dst_cid][dst_index] = byte;814 buf_out[dst_cid][dst_index] = byte; 751 815 } 752 816 } … … 762 826 ///////////////////////////////// 763 827 pthread_barrier_wait( &barrier ); 828 ///////////////////////////////// 764 829 765 830 get_cycle( &date ); 766 DISP_START[cid][lid] = (unsigned int)date; 767 768 // All threads contribute to parallel display 769 // from local buf_out to frame buffer 770 unsigned int npt = npixels / nthreads; // number of pixels per thread 771 772 if( fbf_write( &buf_out_ptr[cid][lid * npt], 773 npt, 774 npt * tid ) ) 775 { 776 printf("\n[transpose error] thread[%d] cannot access FBF\n", tid ); 777 pthread_exit( &THREAD_EXIT_FAILURE ); 778 } 779 780 #if VERBOSE_EXEC 781 printf("\n[transpose] exec[%d] on core [%x,%d] completes display\n", 782 tid, cxy , lpid ); 783 #endif 784 785 get_cycle( &date ); 786 DISP_END[cid][lid] = (unsigned int)date; 787 788 ///////////////////////////////// 789 pthread_barrier_wait( &barrier ); 790 791 #if SAVE_RESULT_IMAGE 792 793 // Each thread[cid,0] copy buf_out to relevant part of image_out 794 if( lid == 0 ) 795 { 796 memcpy( image_out + offset, 797 buf_out, 798 buf_size ); 799 } 831 SAVE_START[cid][lid] = (unsigned int)date; 832 833 // each local threads copy part of buf_out[cid] to FBF window for display 834 memcpy( wbuf + (cid * pixels_per_cid) + (lid * pixels_per_lid), 835 buf_out[cid] + (lid * pixels_per_lid), 836 pixels_per_lid ); 837 838 #if VERBOSE_EXEC 839 printf("\n[transpose] exec[%d] on core[%x,%d] loaded buf_out to FBF (first %d / last %d)\n", 840 tid , cxy , lpid , line_first , line_last ); 841 #endif 842 843 // refresh window 844 error = fbf_refresh_window( wid , line_first , line_last ); 845 846 if( error ) 847 { 848 printf("\n[transpose error] exec[%d] cannot refresh FBF window\n", tid ); 849 exit( 0 ); 850 } 851 852 // each local thread copy relevant part of buf_out to image_out 853 memcpy( image_out + (cid * pixels_per_cid) + (lid * pixels_per_lid), 854 buf_out[cid] + (lid * pixels_per_lid), 855 pixels_per_lid ); 800 856 801 857 #if VERBOSE_EXEC … … 804 860 #endif 805 861 806 #endif 807 808 // Each thread[cid,0] releases local buffer buf_out 862 get_cycle( &date ); 863 SAVE_END[cid][lid] = (unsigned int)date; 864 865 ///////////////////////////////// 866 pthread_barrier_wait( &barrier ); 867 ///////////////////////////////// 868 869 get_cycle( &date ); 870 FREE_START[cid][lid] = (unsigned int)date; 871 872 // Each thread[cid,0] release local buffers buf_in & buf_out 873 809 874 if( lid == 0 ) 810 875 { 811 // release buf_out 812 free( buf_in ); 813 free( buf_out ); 814 } 876 // release local buffers 877 free( buf_in[cid] ); 878 free( buf_out[cid] ); 879 880 #if VERBOSE_EXEC 881 printf("\n[transpose] exec[%d] on core[%x,%d] released buf_in & buf_out\n", 882 tid , cxy , lpid ); 883 #endif 884 885 } 886 887 get_cycle( &date ); 888 FREE_END[cid][lid] = (unsigned int)date; 889 890 ///////////////////////////////// 891 pthread_barrier_wait( &barrier ); 892 ///////////////////////////////// 815 893 816 894 // thread termination depends on the placement policy … … 829 907 // <work> threads are running in attached mode 830 908 // each thread, but de main, simply exit 831 if ( tid != tid_main ) pthread_exit( &THREAD_EXIT_SUCCESS ); 909 if ( tid != tid_main ) 910 { 911 912 #if VERBOSE_EXEC 913 printf("\n[transpose] exec[%d] on core[%x,%d] exit\n", 914 tid , cxy , lpid ); 915 #endif 916 pthread_exit( &THREAD_EXIT_SUCCESS ); 917 } 832 918 } 833 919 … … 838 924 839 925 840 ////////////////////////// /926 ////////////////////////// 841 927 void instrument( FILE * f, 842 928 char * filename ) 843 929 { 844 unsigned int x, y, l; 845 846 #if VERBOSE_EXEC 847 printf("\n[transpose] main enters instrument\n" ); 848 #endif 849 930 unsigned int cid; 931 unsigned int l; 932 933 unsigned int min_aloc_start = 0xFFFFFFFF; 934 unsigned int max_aloc_start = 0; 935 unsigned int min_aloc_ended = 0xFFFFFFFF; 936 unsigned int max_aloc_ended = 0; 850 937 unsigned int min_load_start = 0xFFFFFFFF; 851 938 unsigned int max_load_start = 0; … … 856 943 unsigned int min_trsp_ended = 0xFFFFFFFF; 857 944 unsigned int max_trsp_ended = 0; 858 unsigned int min_disp_start = 0xFFFFFFFF; 859 unsigned int max_disp_start = 0; 860 unsigned int min_disp_ended = 0xFFFFFFFF; 861 unsigned int max_disp_ended = 0; 945 unsigned int min_save_start = 0xFFFFFFFF; 946 unsigned int max_save_start = 0; 947 unsigned int min_save_ended = 0xFFFFFFFF; 948 unsigned int max_save_ended = 0; 949 unsigned int min_free_start = 0xFFFFFFFF; 950 unsigned int max_free_start = 0; 951 unsigned int min_free_ended = 0xFFFFFFFF; 952 unsigned int max_free_ended = 0; 862 953 863 for ( x = 0; x < x_size; x++)864 { 865 for ( y = 0; y < y_size; y++)954 for (cid = 0; cid < (x_size * y_size) ; cid++) 955 { 956 for ( l = 0 ; l < ncores ; l++ ) 866 957 { 867 unsigned int cid = y_size * x + y; 868 869 for ( l = 0 ; l < ncores ; l++ ) 870 { 871 if (LOAD_START[cid][l] < min_load_start) min_load_start = LOAD_START[cid][l]; 872 if (LOAD_START[cid][l] > max_load_start) max_load_start = LOAD_START[cid][l]; 873 if (LOAD_END[cid][l] < min_load_ended) min_load_ended = LOAD_END[cid][l]; 874 if (LOAD_END[cid][l] > max_load_ended) max_load_ended = LOAD_END[cid][l]; 875 if (TRSP_START[cid][l] < min_trsp_start) min_trsp_start = TRSP_START[cid][l]; 876 if (TRSP_START[cid][l] > max_trsp_start) max_trsp_start = TRSP_START[cid][l]; 877 if (TRSP_END[cid][l] < min_trsp_ended) min_trsp_ended = TRSP_END[cid][l]; 878 if (TRSP_END[cid][l] > max_trsp_ended) max_trsp_ended = TRSP_END[cid][l]; 879 if (DISP_START[cid][l] < min_disp_start) min_disp_start = DISP_START[cid][l]; 880 if (DISP_START[cid][l] > max_disp_start) max_disp_start = DISP_START[cid][l]; 881 if (DISP_END[cid][l] < min_disp_ended) min_disp_ended = DISP_END[cid][l]; 882 if (DISP_END[cid][l] > max_disp_ended) max_disp_ended = DISP_END[cid][l]; 883 } 958 if (ALOC_START[cid][l] < min_aloc_start) min_aloc_start = ALOC_START[cid][l]; 959 if (ALOC_START[cid][l] > max_aloc_start) max_aloc_start = ALOC_START[cid][l]; 960 if (ALOC_END[cid][l] < min_aloc_ended) min_aloc_ended = ALOC_END[cid][l]; 961 if (ALOC_END[cid][l] > max_aloc_ended) max_aloc_ended = ALOC_END[cid][l]; 962 if (LOAD_START[cid][l] < min_load_start) min_load_start = LOAD_START[cid][l]; 963 if (LOAD_START[cid][l] > max_load_start) max_load_start = LOAD_START[cid][l]; 964 if (LOAD_END[cid][l] < min_load_ended) min_load_ended = LOAD_END[cid][l]; 965 if (LOAD_END[cid][l] > max_load_ended) max_load_ended = LOAD_END[cid][l]; 966 if (TRSP_START[cid][l] < min_trsp_start) min_trsp_start = TRSP_START[cid][l]; 967 if (TRSP_START[cid][l] > max_trsp_start) max_trsp_start = TRSP_START[cid][l]; 968 if (TRSP_END[cid][l] < min_trsp_ended) min_trsp_ended = TRSP_END[cid][l]; 969 if (TRSP_END[cid][l] > max_trsp_ended) max_trsp_ended = TRSP_END[cid][l]; 970 if (SAVE_START[cid][l] < min_save_start) min_save_start = SAVE_START[cid][l]; 971 if (SAVE_START[cid][l] > max_save_start) max_save_start = SAVE_START[cid][l]; 972 if (SAVE_END[cid][l] < min_save_ended) min_save_ended = SAVE_END[cid][l]; 973 if (SAVE_END[cid][l] > max_save_ended) max_save_ended = SAVE_END[cid][l]; 974 if (FREE_START[cid][l] < min_free_start) min_free_start = FREE_START[cid][l]; 975 if (FREE_START[cid][l] > max_free_start) max_free_start = FREE_START[cid][l]; 976 if (FREE_END[cid][l] < min_free_ended) min_free_ended = FREE_END[cid][l]; 977 if (FREE_END[cid][l] > max_free_ended) max_free_ended = FREE_END[cid][l]; 884 978 } 885 979 } … … 887 981 printf( "\n ------ %s ------\n" , filename ); 888 982 fprintf( f , "\n ------ %s ------\n" , filename ); 983 984 printf( " - ALOC_START : min = %d / max = %d / delta = %d\n", 985 min_aloc_start, max_aloc_start, max_aloc_start-min_aloc_start ); 986 fprintf( f , " - ALOC_START : min = %d / max = %d / delta = %d\n", 987 min_aloc_start, max_aloc_start, max_aloc_start-min_aloc_start ); 988 989 printf( " - ALOC_END : min = %d / max = %d / delta = %d\n", 990 min_aloc_start, max_aloc_start, max_aloc_start-min_aloc_start ); 991 fprintf( f , " - ALOC_END : min = %d / max = %d / delta = %d\n", 992 min_aloc_start, max_aloc_start, max_aloc_start-min_aloc_start ); 889 993 890 994 printf( " - LOAD_START : min = %d / max = %d / delta = %d\n", … … 908 1012 min_trsp_ended, max_trsp_ended, max_trsp_ended-min_trsp_ended ); 909 1013 910 printf( " - DISP_START : min = %d / max = %d / delta = %d\n", 911 min_disp_start, max_disp_start, max_disp_start-min_disp_start ); 912 fprintf( f , " - DISP_START : min = %d / max = %d / delta = %d\n", 913 min_disp_start, max_disp_start, max_disp_start-min_disp_start ); 914 915 printf( " - DISP_END : min = %d / max = %d / delta = %d\n", 916 min_disp_ended, max_disp_ended, max_disp_ended-min_disp_ended ); 917 fprintf( f , " - DISP_END : min = %d / max = %d / delta = %d\n", 918 min_disp_ended, max_disp_ended, max_disp_ended-min_disp_ended ); 919 920 printf( "\n Sequencial = %d / Parallel = %d\n", SEQUENCIAL_TIME, PARALLEL_TIME ); 921 fprintf( f , "\n Sequencial = %d / Parallel = %d\n", SEQUENCIAL_TIME, PARALLEL_TIME ); 922 1014 printf( " - SAVE_START : min = %d / max = %d / delta = %d\n", 1015 min_save_start, max_save_start, max_save_start-min_save_start ); 1016 fprintf( f , " - SAVE_START : min = %d / max = %d / delta = %d\n", 1017 min_save_start, max_save_start, max_save_start-min_save_start ); 1018 1019 printf( " - SAVE_END : min = %d / max = %d / delta = %d\n", 1020 min_save_ended, max_save_ended, max_save_ended-min_save_ended ); 1021 fprintf( f , " - SAVE_END : min = %d / max = %d / delta = %d\n", 1022 min_save_ended, max_save_ended, max_save_ended-min_save_ended ); 1023 1024 printf( " - FREE_START : min = %d / max = %d / delta = %d\n", 1025 min_free_start, max_free_start, max_free_start-min_free_start ); 1026 fprintf( f , " - FREE_START : min = %d / max = %d / delta = %d\n", 1027 min_free_start, max_free_start, max_free_start-min_free_start ); 1028 1029 printf( " - FREE_END : min = %d / max = %d / delta = %d\n", 1030 min_free_start, max_free_start, max_free_start-min_free_start ); 1031 fprintf( f , " - FREE_END : min = %d / max = %d / delta = %d\n", 1032 min_free_start, max_free_start, max_free_start-min_free_start ); 1033 1034 1035 printf( "\n Sequencial %d" 1036 "\n Parallel %d" 1037 "\n Alloc %d" 1038 "\n Load %d" 1039 "\n Transpose %d" 1040 "\n Save %d" 1041 "\n Free %d\n" , 1042 SEQUENCIAL_TIME / 1000 , 1043 PARALLEL_TIME / 1000 , 1044 (max_aloc_ended - min_aloc_start) / 1000 , 1045 (max_load_ended - min_load_start) / 1000 , 1046 (max_trsp_ended - min_trsp_start) / 1000 , 1047 (max_save_ended - min_save_start) / 1000 , 1048 (max_free_ended - min_free_start) / 1000 ); 1049 1050 fprintf( f , "\n Sequencial %d" 1051 "\n Parallel %d" 1052 "\n Alloc %d" 1053 "\n Load %d" 1054 "\n Transpose %d" 1055 "\n Save %d" 1056 "\n Free %d\n" , 1057 SEQUENCIAL_TIME / 1000 , 1058 PARALLEL_TIME / 1000 , 1059 (max_aloc_ended - min_aloc_start) / 1000 , 1060 (max_load_ended - min_load_start) / 1000 , 1061 (max_trsp_ended - min_trsp_start) / 1000 , 1062 (max_save_ended - min_save_start) / 1000 , 1063 (max_free_ended - min_free_start) / 1000 ); 923 1064 } // end instrument() 924 1065
Note: See TracChangeset
for help on using the changeset viewer.