Changeset 825
- Timestamp:
- Jan 5, 2017, 9:46:21 PM (8 years ago)
- Location:
- soft/giet_vm/applications
- Files:
-
- 6 edited
Legend:
- Unmodified
- Added
- Removed
-
soft/giet_vm/applications/classif/classif.c
r720 r825 4 4 // author : Alain Greiner 5 5 /////////////////////////////////////////////////////////////////////////////////////// 6 // This multi-threaded application takes a stream of Gigabit Ethernet packets, 7 // and makes packet analysis and classification, based on the source MAC address. 8 // It uses the NIC peripheral, and the distributed kernel chbufs accessed by the CMA 9 // component to receive and send packets on the Gigabit Ethernet port. 10 // 11 // It can run on architectures containing up to 16 * 16 clusters, 12 // and from 3 to 8 processors per cluster. 13 // 6 // This multi-threaded application takes a stream of ETH/IP/UDP packets, and makes 7 // packets classification, based on the SRC_IP (IP header) and SRC_PORT (UDP header). 8 // 9 // It uses the VciMasterNic peripheral, that can have up to 4 channels. 10 // Each channel implement a private TX-QUEUE, and a private RX queue. 11 // 12 // There is one analyse thread per core. All threads behave as one single server 13 // (i.e. all threads use the same local port number). After each packet analysis, 14 // the SRC and DST IP addresses and port numbers are exchanged and a response 15 // packet is sent to the remote client. 16 // 17 // This application can run on architectures containing up to 16 * 16 clusters, 14 18 // It requires one shared TTY terminal. 15 19 // 16 // This application is described as a TCG (Thread and Communication Graph) 17 // containing (N+2) threads per cluster. 18 // - one "load" thread 19 // - one "store" thread 20 // - N "analyse" threads 21 // The containers are distributed (N+2 containers per cluster): 22 // - one RX container (part of the kernel rx_chbuf), in the kernel heap. 23 // - one TX container (part of the kernel tx-chbuf), in the kernel heap. 24 // - N working containers (one per analysis thread), in the user heap. 25 // In each cluster, the "load", analysis" and "store" threads communicates through 26 // three local MWMR fifos: 27 // - fifo_l2a : tranfer a full container from "load" to "analyse" thread. 28 // - fifo_a2s : transfer a full container from "analyse" to "store" thread. 29 // - fifo_s2l : transfer an empty container from "store" to "load" thread. 30 // For each fifo, one item is a 32 bits word defining the index of an 31 // available working container. 32 // The pointers on the working containers, and the pointers on the MWMR fifos 33 // are global arrays stored in cluster[0][0]. 34 // 35 // The main thread exit after global initialisation, and launching the other threads: 36 // It does not use the pthread_join() construct. It is executed on P[0,0,1], 37 // toavoid overload on P[0,0,0]. 38 // 39 // Initialisation is made in two steps: 40 // 41 // 1) The global, shared, variables are initialised by the main thread: 42 // - shared TTY 43 // - distributed heap (one heap per cluster) 44 // - distributed rx_barrier (between all "load" threads) 45 // - distributed tx_barrier (between all "store" threads) 46 // - RX kernel chbufs (used by "load" threads) 47 // - TX kernel chbufs (used by "store" threads) 48 // Then the main thread exit, after launching the "load, "store", and "analyse" 49 // threads in all clusters. 50 // 51 // 2) Each "load" thread allocates containers[x][y][n] from local heap, 52 // and register containers pointers in the local stack. 53 // Each "load" thread allocates data buffers & mwmr fifo descriptors 54 // from local heap, and register pointers in global arrays. 55 // Each "load" thread initialises the containers as empty in fifo_s2l. 56 // Then each "load" thread signals mwmr fifos initialisation completion 57 // to other threads in same cluster, using the local_sync[x][y] variables. 58 // 59 // When initialisation is completed, all threads are running in parallel: 60 // 61 // 1) The "load" thread get an empty working container from the fifo_s2l, 62 // transfer one container from the kernel rx_chbuf to this user container, 63 // and transfer ownership of this container to one "analysis" thread by writing 64 // into the fifo_l2a. 65 // 2) The "analyse" thread get one working container from the fifo_l2a, analyse 66 // each packet header, compute the packet type (depending on the SRC MAC address), 67 // increment the correspondint classification counter, and transpose the SRC 68 // and the DST MAC addresses fot TX tranmission. 69 // 3) The "store" thread transfer get a full working container from the fifo_a2s, 70 // transfer this user container content to the the kernel tx_chbuf, 71 // and transfer ownership of this empty container to the "load" thread by writing 72 // into the fifo_s2l. 73 // 74 // Instrumentation results display is done by the "store" thread in cluster[0][0] 75 // when all "store" threads completed the number of clusters specified by the 76 // CONTAINERS_MAX parameter. 20 // The main thread exit after launching analyse threads. 21 // It does not use the pthread_join() construct. 77 22 /////////////////////////////////////////////////////////////////////////////////////// 78 23 79 24 #include "stdio.h" 25 #include "user_lock.h" 80 26 #include "user_barrier.h" 81 #include "malloc.h"82 #include "user_lock.h"83 #include "mwmr_channel.h"84 27 85 28 #define X_SIZE_MAX 16 86 29 #define Y_SIZE_MAX 16 87 #define NPROCS_MAX 8 88 #define CONTAINERS_MAX 5000 89 #define VERBOSE_ANALYSE 0 30 #define NPROCS_MAX 4 31 #define NBYTES_MAX 2048 32 #define SERVER_IP 0x77777777 33 #define SERVER_PORT 0x8888 34 #define MAX_PACKETS 25 35 #define VERBOSE 1 90 36 91 37 // macro to use a shared TTY … … 97 43 /////////////////////////////////////////////////////////////////////////////////////// 98 44 // Global variables 99 // The MWMR channels (descriptors and buffers), as well as the working containers 100 // used by the "analysis" threads are distributed in clusters. 101 // But the pointers on these distributed structures are stored in cluster[0][0]. 102 /////////////////////////////////////////////////////////////////////////////////////// 103 104 // pointers on distributed containers 105 unsigned int* container[X_SIZE_MAX][Y_SIZE_MAX][NPROCS_MAX-2]; 106 107 // pointers on distributed mwmr fifos containing container descriptors 108 mwmr_channel_t* mwmr_l2a[X_SIZE_MAX][Y_SIZE_MAX]; 109 mwmr_channel_t* mwmr_a2s[X_SIZE_MAX][Y_SIZE_MAX]; 110 mwmr_channel_t* mwmr_s2l[X_SIZE_MAX][Y_SIZE_MAX]; 111 112 // local synchros signaling local MWMR fifos initialisation completion 113 volatile unsigned int local_sync[X_SIZE_MAX][Y_SIZE_MAX]; 45 /////////////////////////////////////////////////////////////////////////////////////// 114 46 115 47 // lock protecting shared TTY 116 user_lock_t tty_lock; 117 118 // distributed barrier between "load" threads 119 giet_sqt_barrier_t rx_barrier; 120 121 // distributed barrier between "store" threads 122 giet_sqt_barrier_t tx_barrier; 48 user_lock_t tty_lock; 49 50 // barrier for instrumentation 51 giet_sqt_barrier_t barrier; 123 52 124 53 // instrumentation counters 125 unsigned int 54 unsigned int counter[16]; 126 55 127 56 // threads arguments array 128 57 unsigned int thread_arg[16][16][4]; 129 58 130 //////////////////////////////////////////////////////////// 131 __attribute__ ((constructor)) void load( unsigned int* arg )132 //////////////////////////////////////////////////////////// 59 ///////////////////////////////////////////////////////////////// 60 __attribute__ ((constructor)) void analyse( unsigned int * arg ) 61 ///////////////////////////////////////////////////////////////// 133 62 { 134 // get plat-form parameters 135 unsigned int x_size; // number of clusters in a row 136 unsigned int y_size; // number of clusters in a column 137 unsigned int nprocs; // number of processors per cluster 138 giet_procs_number( &x_size , &y_size , &nprocs ); 139 140 // each "load" thread get processor identifiers 141 unsigned int x; 142 unsigned int y; 143 unsigned int p; 144 giet_proc_xyp( &x, &y, &p ); 145 146 // each "load" thread allocates containers[x][y][n] (from local heap) 147 // and register pointers in the local stack 148 unsigned int n; 149 unsigned int* cont[NPROCS_MAX-2]; 150 151 for ( n = 0 ; n < (nprocs - 2) ; n++ ) 152 { 153 container[x][y][n] = malloc( 4096 ); 154 cont[n] = container[x][y][n]; 155 } 156 157 // each "load" thread allocates data buffers for mwmr fifos (from local heap) 158 unsigned int* data_l2a = malloc( (nprocs - 2)<<2 ); 159 unsigned int* data_a2s = malloc( (nprocs - 2)<<2 ); 160 unsigned int* data_s2l = malloc( (nprocs - 2)<<2 ); 161 162 // each "load" thread allocates mwmr fifos descriptors (from local heap) 163 mwmr_l2a[x][y] = malloc( sizeof(mwmr_channel_t) ); 164 mwmr_a2s[x][y] = malloc( sizeof(mwmr_channel_t) ); 165 mwmr_s2l[x][y] = malloc( sizeof(mwmr_channel_t) ); 166 167 // each "load" thread registers local pointers on mwmr fifos in local stack 168 mwmr_channel_t* fifo_l2a = mwmr_l2a[x][y]; 169 mwmr_channel_t* fifo_a2s = mwmr_a2s[x][y]; 170 mwmr_channel_t* fifo_s2l = mwmr_s2l[x][y]; 171 172 // each "load" thread initialises local mwmr fifos descriptors 173 // ( width = 4 bytes / depth = number of analysis threads ) 174 mwmr_init( fifo_l2a , data_l2a , 1 , (nprocs - 2) ); 175 mwmr_init( fifo_a2s , data_a2s , 1 , (nprocs - 2) ); 176 mwmr_init( fifo_s2l , data_s2l , 1 , (nprocs - 2) ); 177 178 179 // each "load" thread initialises local containers as empty in fifo_s2l 180 for ( n = 0 ; n < (nprocs - 2) ; n++ ) mwmr_write( fifo_s2l , &n , 1 ); 181 182 // each "load" thread signals mwmr fifos initialisation completion 183 // to other threads in same cluster. 184 local_sync[x][y] = 1; 185 186 // only "load" thread[0][0] displays status 187 if ( (x==0) && (y==0) ) 188 { 189 printf("\n[CLASSIF] load on P[%d,%d,%d] enters main loop at cycle %d\n" 190 " &mwmr_l2a = %x / &data_l2a = %x\n" 191 " &mwmr_a2s = %x / &data_a2s = %x\n" 192 " &mwmr_s2l = %x / &data_s2l = %x\n" 193 " &cont[0] = %x\n" 194 " x_size = %d / y_size = %d / nprocs = %d\n", 195 x , y , p , giet_proctime(), 196 (unsigned int)fifo_l2a, (unsigned int)data_l2a, 197 (unsigned int)fifo_a2s, (unsigned int)data_a2s, 198 (unsigned int)fifo_s2l, (unsigned int)data_s2l, 199 (unsigned int)cont[0], 200 x_size, y_size, nprocs ); 201 } 202 203 ///////////////////////////////////////////////////////////// 204 // "load" thread enters the main loop (on containers) 205 unsigned int count = 0; // loaded containers count 206 unsigned int index; // available container index 207 unsigned int* temp; // pointer on available container 208 209 while ( count < CONTAINERS_MAX ) 210 { 211 // get one empty container index from fifo_s2l 212 mwmr_read( fifo_s2l , &index , 1 ); 213 temp = cont[index]; 214 215 // get one container from kernel rx_chbuf 216 giet_nic_rx_move( temp ); 217 218 // get packets number 219 unsigned int npackets = temp[0] & 0x0000FFFF; 220 unsigned int nwords = temp[0] >> 16; 221 222 if ( (x==0) && (y==0) ) 223 { 224 printf("\n[CLASSIF] load on P[%d,%d,%d] get container %d at cycle %d" 225 " : %d packets / %d words\n", 226 x, y, p, index, giet_proctime(), npackets, nwords ); 227 } 228 229 // put the full container index to fifo_l2a 230 mwmr_write( fifo_l2a, &index , 1 ); 231 232 count++; 233 } 234 235 // all "load" threads synchronise before stats 236 sqt_barrier_wait( &rx_barrier ); 237 238 // "load" thread[0][0] displays stats 239 if ( (x==0) && (y==0) ) giet_nic_rx_stats(); 240 241 // all "load" thread exit 242 giet_pthread_exit("completed"); 243 244 } // end load() 245 246 247 ////////////////////////////////////////////////////////////// 248 __attribute__ ((constructor)) void store( unsigned int * arg ) 249 ////////////////////////////////////////////////////////////// 250 { 251 // get plat-form parameters 252 unsigned int x_size; // number of clusters in a row 253 unsigned int y_size; // number of clusters in a column 254 unsigned int nprocs; // number of processors per cluster 255 giet_procs_number( &x_size , &y_size , &nprocs ); 256 257 // get processor identifiers 258 unsigned int x; 259 unsigned int y; 260 unsigned int p; 261 giet_proc_xyp( &x, &y, &p ); 262 263 // each "store" thread wait mwmr channels initialisation 264 while ( local_sync[x][y] == 0 ) asm volatile ("nop"); 265 266 // each "store" thread registers pointers on working containers in local stack 267 unsigned int n; 268 unsigned int* cont[NPROCS_MAX-2]; 269 270 for ( n = 0 ; n < (nprocs - 2) ; n++ ) 271 { 272 cont[n] = container[x][y][n]; 273 } 274 275 // each "store" thread registers pointers on mwmr fifos in local stack 276 mwmr_channel_t* fifo_l2a = mwmr_l2a[x][y]; 277 mwmr_channel_t* fifo_a2s = mwmr_a2s[x][y]; 278 mwmr_channel_t* fifo_s2l = mwmr_s2l[x][y]; 279 280 // only "store" thread[0][0] displays status 281 if ( (x==0) && (y==0) ) 282 { 283 printf("\n[CLASSIF] store on P[%d,%d,%d] enters main loop at cycle %d\n" 284 " &mwmr_l2a = %x\n" 285 " &mwmr_a2s = %x\n" 286 " &mwmr_s2l = %x\n" 287 " &cont[0] = %x\n", 288 x , y , p , giet_proctime(), 289 (unsigned int)fifo_l2a, 290 (unsigned int)fifo_a2s, 291 (unsigned int)fifo_s2l, 292 (unsigned int)cont[0] ); 293 } 294 295 ///////////////////////////////////////////////////////////// 296 // "store" thread enter the main loop (on containers) 297 unsigned int count = 0; // stored containers count 298 unsigned int index; // empty container index 299 unsigned int* temp; // pointer on empty container 300 301 while ( count < CONTAINERS_MAX ) 302 { 303 // get one working container index from fifo_a2s 304 mwmr_read( fifo_a2s , &index , 1 ); 305 temp = cont[index]; 306 307 // put one container to kernel tx_chbuf 308 giet_nic_tx_move( temp ); 309 310 // get packets number 311 unsigned int npackets = temp[0] & 0x0000FFFF; 312 unsigned int nwords = temp[0] >> 16; 313 314 if ( (x==0) && (y==0) ) 315 { 316 printf("\n[CLASSIF] store on P[%d,%d,%d] get container %d at cycle %d" 317 " : %d packets / %d words\n", 318 x, y, p, index, giet_proctime(), npackets, nwords ); 319 } 320 321 // put the working container index to fifo_s2l 322 mwmr_write( fifo_s2l, &index , 1 ); 323 324 count++; 325 } 326 327 // all "store" threads synchronise before result display 328 sqt_barrier_wait( &tx_barrier ); 329 330 // "store" thread[0,0] and displays results 331 if ( (x==0) && (y==0) ) 332 { 63 unsigned char buffer[NBYTES_MAX]; // buffer for one raw packet 64 sockaddr_t server_addr; // local socket address 65 sockaddr_t client_addr; // remote socket address 66 int length; // received packet length 67 int count; // packets counter 68 int error; 69 70 unsigned int tid = *arg; 71 72 printf("\n[CLASSIF] analyse thread %x starts at cycle %d\n", 73 tid , giet_proctime() ); 74 75 // create socket 76 int socket = giet_nic_socket( AF_INET , SOCK_DGRAM , 0 ); 77 78 if( socket == -1 ) 79 { 80 printf("\n[CLASSIF ERROR] thread %x cannot create socket\n", tid ); 81 giet_pthread_exit( NULL ); 82 } 83 84 // bind socket 85 server_addr.sin_family = AF_INET; 86 server_addr.sin_addr = HTONL( SERVER_IP ); 87 server_addr.sin_port = HTONS( SERVER_PORT ); 88 89 error = giet_nic_bind( socket , &server_addr , sizeof(server_addr) ); 90 91 if( error ) 92 { 93 printf("\n[CLASSIF ERROR] thread %x cannot bind socket\n", tid ); 94 giet_pthread_exit( NULL ); 95 } 96 97 printf("\n[CLASSIF] socket %x created by thread %x\n", socket , tid ); 98 99 // reset NIC counters 100 giet_nic_clear_stats(); 101 102 ///////// loop to receive, analyse, and send packets /////////// 103 for( count = 0 ; count < MAX_PACKETS ; count++ ) 104 { 105 length = sizeof(sockaddr_t); 106 107 // get one packet from client 108 error = giet_nic_recvfrom( socket, 109 buffer, 110 NBYTES_MAX, 111 0, 112 &client_addr, 113 &length ); 114 if( error ) 115 { 116 printf("\n[CLASSIF ERROR] thread %x cannot receive packet\n", tid ); 117 giet_pthread_exit( NULL ); 118 } 119 120 // get type & pktid 121 unsigned int client_ip = client_addr.sin_addr; 122 unsigned short client_port = client_addr.sin_port; 123 unsigned int type = ((client_ip & 0x3) << 2) + (client_port & 0x3); 124 unsigned int pktid = (((unsigned int )buffer[0]) << 24) | 125 (((unsigned int )buffer[1]) << 16) | 126 (((unsigned int )buffer[2]) << 8) | 127 (((unsigned int )buffer[3]) ) ; 128 if( VERBOSE ) 129 { 130 printf("\n[CLASSIF] thread %x receive packet at cycle %d\n" 131 " type = %x / length = %d / pktid = %d\n", 132 tid , giet_proctime() , type , length , pktid ); 133 } 134 135 atomic_increment( &counter[type], 1 ); 136 137 // send response packet 138 error = giet_nic_sendto( socket, 139 buffer, 140 length, 141 0, 142 &client_addr, 143 sizeof(sockaddr_t) ); 144 if( error ) 145 { 146 printf("\n[CLASSIF ERROR] thread %x cannot send packet\n", tid ); 147 giet_pthread_exit( NULL ); 148 } 149 150 if( VERBOSE ) 151 { 152 printf("\n[CLASSIF] thread %x sent packet at cycle %d\n" 153 " type = %x / length = %d / pktid = %d\n", 154 tid , giet_proctime() , type , length , pktid ); 155 } 156 157 } // end for 158 159 // synchro before stats 160 sqt_barrier_wait( &barrier ); 161 162 if ( tid == 0 ) 163 { 164 // give time to flush the TX pipe-line 165 char byte; 166 printf("\n ###### enter any key to get stats ######\n"); 167 giet_tty_getc( &byte ); 168 169 // display classification results 333 170 printf("\nClassification Results\n" 334 171 " - TYPE 0 : %d packets\n" … … 348 185 " - TYPE E : %d packets\n" 349 186 " - TYPE F : %d packets\n" 350 " TOTAL= %d packets\n",187 " TOTAL = %d packets\n", 351 188 counter[0x0], counter[0x1], counter[0x2], counter[0x3], 352 189 counter[0x4], counter[0x5], counter[0x6], counter[0x7], … … 358 195 counter[0xC]+ counter[0xD]+ counter[0xE]+ counter[0xF] ); 359 196 360 giet_nic_tx_stats(); 197 // display NIC instrumentation counters 198 giet_nic_print_stats(); 361 199 } 362 200 363 // all "store" thread exit 364 giet_pthread_exit("Thread completed"); 365 366 } // end store() 367 368 369 /////////////////////////////////////////////////////////////// 370 __attribute__ ((constructor)) void analyse( unsigned int* arg ) 371 /////////////////////////////////////////////////////////////// 372 { 373 // get platform parameters 374 unsigned int x_size; // number of clusters in row 375 unsigned int y_size; // number of clusters in a column 376 unsigned int nprocs; // number of processors per cluster 377 giet_procs_number( &x_size, &y_size, &nprocs ); 378 379 // get processor identifiers 380 unsigned int x; 381 unsigned int y; 382 unsigned int p; 383 giet_proc_xyp( &x, &y, &p ); 384 385 // each "analyse" thread wait mwmr channels initialisation 386 while ( local_sync[x][y] == 0 ) asm volatile ("nop"); 387 388 // each "analyse" threads register pointers on working containers in local stack 389 unsigned int n; 390 unsigned int* cont[NPROCS_MAX-2]; 391 for ( n = 0 ; n < (nprocs - 2) ; n++ ) 392 { 393 cont[n] = container[x][y][n]; 394 } 395 396 // each "analyse" threads register pointers on mwmr fifos in local stack 397 mwmr_channel_t* fifo_l2a = mwmr_l2a[x][y]; 398 mwmr_channel_t* fifo_a2s = mwmr_a2s[x][y]; 399 400 // only "analyse" thread[0][0] display status 401 if ( (x==0) && (y==0) ) 402 { 403 printf("\n[CLASSIF] analyse on P[%d,%d,%d] enters main loop at cycle %d\n" 404 " &mwmr_l2a = %x\n" 405 " &mwmr_a2s = %x\n" 406 " &cont[0] = %x\n", 407 x, y, p, giet_proctime(), 408 (unsigned int)fifo_l2a, 409 (unsigned int)fifo_a2s, 410 (unsigned int)cont[0] ); 411 } 412 413 ////////////////////////////////////////////////////////////////////// 414 // all "analyse" threads enter the main infinite loop (on containers) 415 unsigned int index; // available container index 416 unsigned int* temp; // pointer on available container 417 unsigned int nwords; // number of words in container 418 unsigned int npackets; // number of packets in container 419 unsigned int length; // number of bytes in current packet 420 unsigned int first; // current packet first word in container 421 unsigned int type; // current packet type 422 unsigned int pid; // current packet index 423 424 #if VERBOSE_ANALYSE 425 unsigned int verbose_len[10]; // save length for 10 packets in one container 426 unsigned long long verbose_dst[10]; // save dest for 10 packets in one container 427 unsigned long long verbose_src[10]; // save source for 10 packets in one container 428 #endif 429 430 while ( 1 ) 431 { 432 433 #if VERBOSE_ANALYSE 434 for( pid = 0 ; pid < 10 ; pid++ ) 435 { 436 verbose_len[pid] = 0; 437 verbose_dst[pid] = 0; 438 verbose_src[pid] = 0; 439 } 440 #endif 441 // get one working container index from fifo_l2a 442 mwmr_read( fifo_l2a , &index , 1 ); 443 temp = cont[index]; 444 445 // get packets number and words number 446 npackets = temp[0] & 0x0000FFFF; 447 nwords = temp[0] >> 16; 448 449 if ( (x==0) && (y==0) ) 450 { 451 printf("\n[CLASSIF] analyse on P[%d,%d,%d] get container at cycle %d" 452 " : %d packets / %d words\n", 453 x, y, p, giet_proctime(), npackets, nwords ); 454 } 455 456 // initialize word index in container 457 first = 34; 458 459 // loop on packets 460 for( pid = 0 ; pid < npackets ; pid++ ) 461 { 462 // get packet length from container header 463 if ( (pid & 0x1) == 0 ) length = temp[1+(pid>>1)] >> 16; 464 else length = temp[1+(pid>>1)] & 0x0000FFFF; 465 466 // compute packet DST and SRC MAC addresses 467 unsigned int word0 = temp[first]; 468 unsigned int word1 = temp[first + 1]; 469 unsigned int word2 = temp[first + 2]; 470 471 #if VERBOSE_ANALYSE 472 unsigned long long dst = ((unsigned long long)(word1 & 0xFFFF0000)>>16) | 473 (((unsigned long long)word0)<<16); 474 unsigned long long src = ((unsigned long long)(word1 & 0x0000FFFF)<<32) | 475 ((unsigned long long)word2); 476 if ( pid < 10 ) 477 { 478 verbose_len[pid] = length; 479 verbose_dst[pid] = dst; 480 verbose_src[pid] = src; 481 } 482 #endif 483 // compute type from SRC MAC address and increment counter 484 type = word1 & 0x0000000F; 485 atomic_increment( &counter[type], 1 ); 486 487 // exchange SRC & DST MAC addresses for TX 488 temp[first] = ((word1 & 0x0000FFFF)<<16) | ((word2 & 0xFFFF0000)>>16); 489 temp[first + 1] = ((word2 & 0x0000FFFF)<<16) | ((word0 & 0xFFFF0000)>>16); 490 temp[first + 2] = ((word0 & 0x0000FFFF)<<16) | ((word1 & 0xFFFF0000)>>16); 491 492 // update first word index 493 if ( length & 0x3 ) first += (length>>2)+1; 494 else first += (length>>2); 495 } 496 497 #if VERBOSE_ANALYSE 498 if ( (x==0) && (y==0) ) 499 { 500 printf("\n*** Thread analyse on P[%d,%d,%d] / container %d at cycle %d\n" 501 " - Packet 0 : plen = %d / dst_mac = %l / src_mac = %l\n" 502 " - Packet 1 : plen = %d / dst_mac = %l / src_mac = %l\n" 503 " - Packet 2 : plen = %d / dst_mac = %l / src_mac = %l\n" 504 " - Packet 3 : plen = %d / dst_mac = %l / src_mac = %l\n" 505 " - Packet 4 : plen = %d / dst_mac = %l / src_mac = %l\n" 506 " - Packet 5 : plen = %d / dst_mac = %l / src_mac = %l\n" 507 " - Packet 6 : plen = %d / dst_mac = %l / src_mac = %l\n" 508 " - Packet 7 : plen = %d / dst_mac = %l / src_mac = %l\n" 509 " - Packet 8 : plen = %d / dst_mac = %l / src_mac = %l\n" 510 " - Packet 9 : plen = %d / dst_mac = %l / src_mac = %l\n", 511 x , y , p , index , giet_proctime() , 512 verbose_len[0] , verbose_dst[0] , verbose_src[0] , 513 verbose_len[1] , verbose_dst[1] , verbose_src[1] , 514 verbose_len[2] , verbose_dst[2] , verbose_src[2] , 515 verbose_len[3] , verbose_dst[3] , verbose_src[3] , 516 verbose_len[4] , verbose_dst[4] , verbose_src[4] , 517 verbose_len[5] , verbose_dst[5] , verbose_src[5] , 518 verbose_len[6] , verbose_dst[6] , verbose_src[6] , 519 verbose_len[7] , verbose_dst[7] , verbose_src[7] , 520 verbose_len[8] , verbose_dst[8] , verbose_src[8] , 521 verbose_len[9] , verbose_dst[9] , verbose_src[9] ); 522 } 523 #endif 524 525 // pseudo-random delay 526 unsigned int delay = giet_rand()>>3; 527 unsigned int time; 528 for( time = 0 ; time < delay ; time++ ) asm volatile ("nop"); 529 530 // put the working container index to fifo_a2s 531 mwmr_write( fifo_a2s , &index , 1 ); 532 } 201 giet_pthread_exit( "completed" ); 202 533 203 } // end analyse() 204 205 534 206 535 207 ////////////////////////////////////////// … … 537 209 ////////////////////////////////////////// 538 210 { 539 // indexes for loops540 211 unsigned int x , y , n; 541 542 // get identifiers for proc executing main 543 unsigned int x_id; // x cluster coordinate 544 unsigned int y_id; // y cluster coordinate 545 unsigned int p_id; // local processor index 546 giet_proc_xyp( &x_id , &y_id , &p_id ); 212 unsigned int error; 213 pthread_t trdid; // thread index required by pthread_create() 547 214 548 215 // get plat-form parameters … … 556 223 lock_init( &tty_lock); 557 224 558 // check plat-form parameters559 giet_pthread_assert( ((nprocs >= 3) && (nprocs <= 8)),560 "[CLASSIF ERROR] number of procs per cluster must in [3...8]");561 562 225 giet_pthread_assert( ((x_size >= 1) && (x_size <= 16)), 563 226 "[CLASSIF ERROR] x_size must be in [1...16]"); … … 566 229 "[CLASSIF ERROR] y_size must be in [1...16]"); 567 230 568 // distributed heap initialisation 569 for ( x = 0 ; x < x_size ; x++ ) 570 { 571 for ( y = 0 ; y < y_size ; y++ ) 231 printf("\n[CLASSIF] main thread starts at cycle %d\n", giet_proctime() ); 232 233 // distributed heap[x,y] initialisation 234 for ( x = 0 ; x < x_size ; x++ ) 235 { 236 for ( y = 0 ; y < y_size ; y++ ) 572 237 { 573 238 heap_init( x , y ); … … 575 240 } 576 241 577 printf("\n[CLASSIF] start at cycle %d on %d cores\n", 578 giet_proctime(), (x_size * y_size * nprocs) ); 579 580 // thread index 581 // required by pthread_create() 582 // unused in this appli because no pthread_join() 583 pthread_t trdid; 584 585 // rx_barrier initialisation 586 sqt_barrier_init( &rx_barrier, x_size , y_size , 1 ); 587 588 // tx_barrier initialisation 589 sqt_barrier_init( &tx_barrier, x_size , y_size , 1 ); 590 591 // allocate and start RX NIC and CMA channels 592 giet_nic_rx_alloc( x_size , y_size ); 593 giet_nic_rx_start(); 594 595 // allocate and start TX NIC and CMA channels 596 giet_nic_tx_alloc( x_size , y_size ); 597 giet_nic_tx_start(); 598 599 // Initialisation completed 600 printf("\n[CLASSIF] initialisation completed at cycle %d\n", giet_proctime() ); 601 602 // launch load, store and analyse threads 242 printf("\n[CLASSIF] heap initialized at cycle %d\n", giet_proctime() ); 243 244 // barrier initialisation 245 sqt_barrier_init( &barrier, x_size , y_size , nprocs ); 246 247 printf("\n[CLASSIF] barrier initialized at cycle %d\n", giet_proctime() ); 248 249 // lauch analyse threads 603 250 for ( x = 0 ; x < x_size ; x++ ) 604 251 { … … 607 254 for ( n = 0 ; n < nprocs ; n++ ) 608 255 { 609 // compute argument value 610 thread_arg[x][y][n] = (x<<8) | (y<<4) | n; 611 612 if ( n == 0 ) // "load" thread 256 thread_arg[x][y][n] = (x << 16) | (y << 8) | n; 257 258 error = giet_pthread_create( &trdid, 259 NULL, // no attribute 260 &analyse, 261 &thread_arg[x][y][n] ); 262 if( error ) 613 263 { 614 if ( giet_pthread_create( &trdid, 615 NULL, // no attribute 616 &load, 617 &thread_arg[x][y][n] ) ) 618 { 619 printf("\n[CLASSIF ERROR] launching thread load\n" ); 620 giet_pthread_exit( NULL ); 621 } 622 else 623 { 624 printf("\n[CLASSIF] thread load activated : trdid = %x\n", trdid ); 625 } 626 } 627 else if ( n == 1 ) // "store" thread 628 { 629 if ( giet_pthread_create( &trdid, 630 NULL, // no attribute 631 &store, 632 &thread_arg[x][y][n] ) ) 633 { 634 printf("\n[CLASSIF ERROR] launching thread store\n" ); 635 giet_pthread_exit( NULL ); 636 } 637 else 638 { 639 printf("\n[CLASSIF] thread store activated : trdid = %x\n", trdid ); 640 } 641 } 642 else // "analyse" threads 643 { 644 if ( giet_pthread_create( &trdid, 645 NULL, // no attribute 646 &analyse, 647 &thread_arg[x][y][n] ) ) 648 { 649 printf("\n[CLASSIF ERROR] launching thread analyse\n" ); 650 giet_pthread_exit( NULL ); 651 } 652 else 653 { 654 printf("\n[CLASSIF] thread analyse activated : trdid = %x\n", trdid ); 655 } 264 printf("\n[CLASSIF ERROR] cannot create thread on core[%d,%d,%d]\n", 265 x, y, n ); 266 giet_pthread_exit( NULL ); 656 267 } 657 268 } -
soft/giet_vm/applications/classif/classif.py
r720 r825 12 12 # The mapping of threads on processors is the following: 13 13 # - the "main" on cluster[0][0] 14 # - one "load" thread per cluster containing processors, 15 # - one "store" thread per cluster containing processors, 16 # - (nprocs-2) "analyse" thread per cluster containing processors. 14 # - one "analyse" thread per processor, 17 15 # The mapping of virtual segments is the following: 18 16 # - There is one shared data vseg in cluster[0][0] … … 26 24 # - y_width : number of bits for y field 27 25 # - nprocs : number of processors per cluster 28 #29 # WARNING: The target architecture cannot contain less30 # than 3 processors per cluster.31 26 ################################################################################## 32 27 … … 40 35 y_width = mapping.y_width 41 36 42 assert (nprocs >= 3) and (nprocs <= 8)43 44 37 # define vsegs base & size 45 38 code_base = 0x10000000 … … 49 42 data_size = 0x00010000 # 64 Kbytes (non replicated) 50 43 51 heap_base = 0x3000000052 heap_size = 0x00200000 # 2M bytes (per cluster)44 stack_base = 0x30000000 45 stack_size = 0x00010000 # 64 Kbytes (per thread) 53 46 54 stack_base = 0x4000000055 stack_size = 0x00010000 # 64 Kbytes (per thread)47 heap_base = 0x40000000 48 heap_size = 0x00200000 # 2 Mbytes (per cluster) 56 49 57 50 # create vspace 58 51 vspace = mapping.addVspace( name = 'classif', 59 52 startname = 'classif_data', 60 active = False )53 active = True ) 61 54 62 55 # data vseg : shared / cluster[0][0] … … 65 58 binpath = 'bin/classif/appli.elf', 66 59 local = False ) 67 68 # heap vsegs : shared (one per cluster)69 for x in xrange (x_size):70 for y in xrange (y_size):71 cluster_id = (x * y_size) + y72 if ( mapping.clusters[cluster_id].procs ):73 size = heap_size74 base = heap_base + (cluster_id * size)75 76 mapping.addVseg( vspace, 'classif_heap_%d_%d' %(x,y), base , size,77 'C_WU', vtype = 'HEAP', x = x, y = y, pseg = 'RAM',78 local = False, big = True )79 60 80 61 # code vsegs : local (one copy per cluster) … … 110 91 local = True ) 111 92 93 # heap vsegs : distributed but non local (any heap can be accessed by any thread) 94 for x in xrange (x_size): 95 for y in xrange (y_size): 96 cluster_id = (x * y_size) + y 97 if ( mapping.clusters[cluster_id].procs ): 98 base = heap_base + (cluster_id * heap_size) 99 100 mapping.addVseg( vspace, 'classif_heap_%d_%d' % (x,y), base, heap_size, 101 'C_WU' , vtype = 'HEAP' , x = x , y = y , pseg = 'RAM', 102 local = False, big = True ) 103 112 104 # distributed threads / one thread per processor 113 105 # ... plus main on P[0][0][0] 114 mapping.addThread( vspace, 'main', True, 0, 0, 1, 115 'main_stack', 116 'classif_heap_0_0', 106 mapping.addThread( vspace, 'main', True, 0, 0, 0, 107 'main_stack', '' , 117 108 0 ) # index in start_vector 118 109 … … 122 113 if ( mapping.clusters[cluster_id].procs ): 123 114 for p in xrange( nprocs ): 124 if ( p== 0 ): # thread load 125 start_index = 3 126 thread_name = 'load_%d_%d_%d' %(x,y,p) 127 elif ( p== 1 ): # thread store 128 start_index = 2 129 thread_name = 'stor_%d_%d_%d' %(x,y,p) 130 else : # thread analyse 131 start_index = 1 132 thread_name = 'anal_%d_%d_%d' % (x,y,p) 115 start_index = 1 116 thread_name = 'analyse_%d_%d_%d' % (x,y,p) 133 117 134 118 mapping.addThread( vspace, thread_name, False , x, y, p, 135 'classif_stack_%d_%d_%d' % (x,y,p), 119 'classif_stack_%d_%d_%d' % (x,y,p), 136 120 'classif_heap_%d_%d' % (x,y), 137 start_index )121 1 ) # index in start_vector 138 122 139 123 # extend mapping name -
soft/giet_vm/applications/convol/convol.py
r708 r825 3 3 from mapping import * 4 4 5 ##################################################################################### #5 ##################################################################################### 6 6 # file : convol.py 7 7 # date : may 2014 8 8 # author : Alain Greiner 9 ##################################################################################### ##9 ##################################################################################### 10 10 # This file describes the mapping of the multi-threaded "convol" 11 11 # application on a multi-clusters, multi-processors architecture. … … 94 94 95 95 mapping.addVseg( vspace, 'conv_heap_%d_%d' % (x,y), base, size, 96 'C_WU', vtype = ' BUFFER', x = x , y = y , pseg = 'RAM',96 'C_WU', vtype = 'HEAP', x = x , y = y , pseg = 'RAM', 97 97 local = False, big = True ) 98 98 -
soft/giet_vm/applications/fft/fft.py
r812 r825 89 89 90 90 mapping.addVseg( vspace, 'fft_heap_%d_%d' % (x,y), base, size, 91 'C_WU', vtype = ' BUFFER', x = x , y = y , pseg = 'RAM',91 'C_WU', vtype = 'HEAP', x = x , y = y , pseg = 'RAM', 92 92 local = False, big = True ) 93 93 -
soft/giet_vm/applications/shell/shell.py
r782 r825 59 59 # heap vseg 60 60 mapping.addVseg( vspace, 'shell_heap', heap_base, heap_size, 61 'C_WU', vtype = ' BUFFER', x = xmap , y = ymap , pseg = 'RAM',61 'C_WU', vtype = 'HEAP', x = xmap , y = ymap , pseg = 'RAM', 62 62 local = False ) 63 63 -
soft/giet_vm/applications/sort/sort.py
r718 r825 88 88 89 89 mapping.addVseg( vspace, 'sort_heap_%d_%d' % (x,y), base, size, 90 'C_WU', vtype = ' BUFFER', x = x, y = y, pseg = 'RAM',90 'C_WU', vtype = 'HEAP', x = x, y = y, pseg = 'RAM', 91 91 local = False, big = True ) 92 92
Note: See TracChangeset
for help on using the changeset viewer.