///////////////////////////////////////////////////////////////////////////////////////////// // File : main.c (for classif application) // Date : november 2014 // author : Alain Greiner ///////////////////////////////////////////////////////////////////////////////////////////// // This multi-threaded application takes a stream of Gigabit Ethernet packets, // and makes packet analysis and classification, based on the source MAC address. // It uses the NIC peripheral, and the distributed kernel chbuf filled by the CMA // component to consume packets on the Gigabit Ethernet port. // // This application is described as a TCG (Task and Communication Graph) containing // - one "load" task per cluster. // - from one to three "analyse" tasks per cluster. // In each cluster, the "load" task communicates with the local "analyse" tasks through // a local MWMR fifo containing NB_PROCS_MAX containers (one item = one container). // The MWMR fifo descriptors array is defined as a global variable in cluster[0][0]. // // Initialisation is done in two steps by the "load" tasks: // - Task "load" in cluster[0][0] initialises NIC & CMA channel, and initialises // the barrier between all "load" tasks. Other "load" tasks are waiting on the // global_sync synchronisation variable. // - In each cluster[x][y], the "load" task allocates the MWMR fifo descriptor and // the data buffer in the local heap, and initializes the MWMR descriptor. // The "analyse" tasks are waiting on the sync[x][y] variables. // // When initialisation is completed, all "load and "analyse" tasks loop on containers: // 1) The "load" task transfer containers from the kernel chbuf associated to the // NIC_RX channel (in cluster[0][0]), to the local MWMR fifo (in cluster[x][y]), // after an in termediate copy in a private stack buffer. // Each "load" task loads CONTAINERS_MAX containers before exit, and the // task in cluster[0,0] displays the results stored in global counters filled // by the "analyse" tasks when all "load" tasks reach the barrier. // // 2) The "analyse" task transfer one container from the local MWMR fifo to a private // local buffer. It analyse each packet contained in the container, compute the // packet type, depending on the source MAC address, and increment the counters. // // It uses the he following hardware parameters, defined in the hard_config.h file: // - X_SIZE : number of clusters in a row // - Y_SIZE : number of clusters in a column // - NB_PROCS_MAX : number of processors per cluster ///////////////////////////////////////////////////////////////////////////////////////////// #include "stdio.h" #include "barrier.h" #include "malloc.h" #include "user_lock.h" #include "mwmr_channel.h" #include "hard_config.h" #define CONTAINERS_MAX 10 /////////////////////////////////////////////////////////////////////////////////////////// // Global variables // The MWMR channels (descriptors and buffers) are distributed in the clusters, // but the pointers array is stored in cluster[0][0] /////////////////////////////////////////////////////////////////////////////////////////// mwmr_channel_t* mwmr[X_SIZE][Y_SIZE]; // distributed MWMR fifos pointers unsigned int local_sync[X_SIZE][Y_SIZE]; // distributed synchros "load" / "analyse" unsigned int global_sync = 0; // global synchro between "load" tasks unsigned int count[16]; // instrumentation counters giet_barrier_t barrier; // barrier between "load" (instrumentation) unsigned int nic_channel; // allocated NIC channel index ///////////////////////////////////////// __attribute__ ((constructor)) void load() ///////////////////////////////////////// { // get processor identifiers unsigned int x; unsigned int y; unsigned int l; giet_proc_xyp( &x, &y, &l ); // local buffer to store one container in private stack unsigned int temp[1024]; // allocates data buffer for MWMR fifo in local heap unsigned int* data = malloc( NB_PROCS_MAX<<12 ); // allocates MWMR fifo descriptor in local heap mwmr[x][y] = malloc( sizeof(mwmr_channel_t) ); mwmr_channel_t* fifo = mwmr[x][y]; // initialises local MWMR fifo : width = 4kbytes / depth = NB_PROCS_MAX mwmr_init( fifo , data , 1024 , NB_PROCS_MAX ); // display for cluster[X_SIZE-1][Y_SIZE-1] if ( (x==X_SIZE-1) && (y==Y_SIZE-1) ) giet_shr_printf("\n*** Task load starts on P[%d,%d,%d] at cycle %d\n" " &mwmr = %x / &data = %x / &sync = %x\n" " x_size = %d / y_size = %d / nprocs = %d\n", x , y , l , giet_proctime() , (unsigned int)fifo, (unsigned int)data, (unsigned int)(&local_sync[x][y]), X_SIZE, Y_SIZE, NB_PROCS_MAX ); // Task load on cluster[0,0] makes global initialisation: // - NIC & CMA channels allocation & initialisation. // - barrier for load tasks initialisation. // Other load task wait completion. if ( (x==0) && (y==0) ) { // get NIC_RX channel nic_channel = giet_nic_rx_alloc(); // start CMA transfer giet_nic_rx_start(); // barrier init barrier_init( &barrier, X_SIZE * Y_SIZE ); global_sync = 1; } else { while ( global_sync == 0 ) asm volatile ("nop"); } // "load" task signals MWMR initialisation to "analyse" tasks local_sync[x][y] = 1; // main loop (on containers) unsigned int container = 0; while ( container < CONTAINERS_MAX ) { // get one container from kernel chbuf giet_nic_rx_move( nic_channel, temp ); // get packets number unsigned int npackets = temp[0] & 0x0000FFFF; unsigned int nwords = temp[0] >> 16; if ( (x==X_SIZE-1) && (y==Y_SIZE-1) ) giet_shr_printf("\nTask load on P[%d,%d,%d] get container %d at cycle %d" " : %d packets / %d words\n", x, y, l, container, giet_proctime(), npackets, nwords ); // put container to MWMR channel mwmr_write( fifo, temp, 1 ); container++; } // all load tasks synchronise before result display barrier_wait( &barrier ); // Task load in cluster[0,0] stops NIC and displays results if ( (x==0) && (y==0) ) { giet_nic_rx_stop(); giet_shr_printf("\n@@@@ Classification Results @@@\n" " - TYPE 0 : %d packets\n" " - TYPE 1 : %d packets\n" " - TYPE 2 : %d packets\n" " - TYPE 3 : %d packets\n" " - TYPE 4 : %d packets\n" " - TYPE 5 : %d packets\n" " - TYPE 6 : %d packets\n" " - TYPE 7 : %d packets\n" " - TYPE 8 : %d packets\n" " - TYPE 9 : %d packets\n" " - TYPE A : %d packets\n" " - TYPE B : %d packets\n" " - TYPE C : %d packets\n" " - TYPE D : %d packets\n" " - TYPE E : %d packets\n" " - TYPE F : %d packets\n", count[0x0], count[0x1], count[0x2], count[0x3], count[0x4], count[0x5], count[0x6], count[0x7], count[0x8], count[0x9], count[0xA], count[0xB], count[0xC], count[0xD], count[0xE], count[0xF] ); giet_nic_rx_stats(); } // all load tasks synchronise before exit barrier_wait( &barrier ); giet_exit("Task completed"); } // end load() //////////////////////////////////////////// __attribute__ ((constructor)) void analyse() //////////////////////////////////////////// { // get processor identifiers unsigned int x; unsigned int y; unsigned int l; giet_proc_xyp( &x, &y, &l ); // local buffer to store one container unsigned int temp[1024]; // wait MWMR channel initialisation (done by task load) while ( local_sync[x][y] == 0 ) asm volatile ("nop"); // get pointer on MWMR channel descriptor mwmr_channel_t* fifo = mwmr[x][y]; // display status for cluster[X_SIZE-1][Y_SIZE-1] if ( (x==X_SIZE-1) && (y==Y_SIZE-1) ) giet_shr_printf("\n*** Task analyse starts on P[%d,%d,%d] at cycle %d\n" " &mwmr = %x / &sync = %x\n", x, y, l, giet_proctime(), (unsigned int)fifo, (unsigned int)(&local_sync[x][y]) ); // main loop (on containers) unsigned int nwords; // number of words in container unsigned int npackets; // number of packets in container unsigned int length; // number of bytes in current packet unsigned int word; // current packet first word in container unsigned int type; // current packet type unsigned int p; // current packet index while ( 1 ) { // get one container from MWMR fifo mwmr_read( fifo, temp, 1 ); // get packets number npackets = temp[0] & 0x0000FFFF; nwords = temp[0] >> 16; if ( (x==X_SIZE-1) && (y==Y_SIZE-1) ) giet_shr_printf("\nTask analyse on P[%d,%d,%d] get container at cycle %d" " : %d packets / %d words\n", x, y, l, giet_proctime(), npackets, nwords ); // initialize word index in container word = 34; // loop on packets for( p = 0 ; p < npackets ; p++ ) { // get packet length from container header if ( (p & 0x1) == 0 ) length = temp[1+(p>>1)] >> 16; else length = temp[1+(p>>1)] & 0x0000FFFF; // get packet type (source mac address) type = (temp[word+1] & 0x0000000F); // increment counter atomic_increment( &count[type], 1 ); // update word index if ( length & 0x3 ) word += (length>>2)+1; else word += (length>>2); } } } // end analyse()