/////////////////////////////////////////////////////////////////////////////////////////////
// File   : main.c   (for classif application)
// Date   : november 2014
// author : Alain Greiner
/////////////////////////////////////////////////////////////////////////////////////////////
// This multi-threaded application takes a stream of Gigabit Ethernet packets,
// and makes packet analysis and classification, based on the source MAC address.
// It uses the NIC peripheral, and the distributed kernel chbuf filled by the CMA 
// component to consume packets on the Gigabit Ethernet port. 
//
// This application is described as a TCG (Task and Communication Graph) containing
// - one "load" task per cluster.
// - from one to three "analyse" tasks per cluster.
// In each cluster, the "load" task communicates with the local "analyse" tasks through 
// a local MWMR fifo containing NB_PROCS_MAX containers (one item = one container).
// The MWMR fifo descriptors array is defined as a global variable in cluster[0][0]. 
//
// Initialisation is done in two steps by the "load" tasks:
// - Task "load" in cluster[0][0] initialises NIC & CMA channel, and initialises 
//   the barrier between all "load" tasks. Other "load" tasks are waiting on the
//   global_sync synchronisation variable.
// - In each cluster[x][y], the "load" task allocates the MWMR fifo descriptor and
//   the data buffer in the local heap, and initializes the MWMR descriptor.
//   The "analyse" tasks are waiting on the sync[x][y] variables.
//     
// When initialisation is completed, all "load and "analyse" tasks loop on containers:
// 1) The "load" task transfer containers from the kernel chbuf associated to the
//    NIC_RX channel (in cluster[0][0]), to the local MWMR fifo (in cluster[x][y]),
//    after an in termediate copy in a private stack buffer. 
//    Each "load" task loads CONTAINERS_MAX containers before exit, and the
//    task in cluster[0,0] displays the results stored in global counters filled
//    by the "analyse" tasks when all "load" tasks reach the barrier.
//
// 2) The "analyse" task transfer one container from the local MWMR fifo to a private
//    local buffer. It analyse each packet contained in the container, compute the
//    packet type, depending on the source MAC address, and increment the counters.
//
// It uses the he following hardware parameters, defined in the hard_config.h file:
// - X_SIZE       : number of clusters in a row
// - Y_SIZE       : number of clusters in a column
// - NB_PROCS_MAX : number of processors per cluster
/////////////////////////////////////////////////////////////////////////////////////////////

#include "stdio.h"
#include "barrier.h"
#include "malloc.h"
#include "user_lock.h"
#include "mwmr_channel.h"
#include "hard_config.h"

#define CONTAINERS_MAX   10

///////////////////////////////////////////////////////////////////////////////////////////
//    Global variables
// The MWMR channels (descriptors and buffers) are distributed in the clusters, 
// but the pointers array is stored in cluster[0][0]
///////////////////////////////////////////////////////////////////////////////////////////

mwmr_channel_t*  mwmr[X_SIZE][Y_SIZE];        // distributed MWMR fifos pointers

unsigned int     local_sync[X_SIZE][Y_SIZE];  // distributed synchros "load" / "analyse"

unsigned int     global_sync = 0;             // global synchro between "load" tasks

unsigned int     count[16];                   // instrumentation counters

giet_barrier_t   barrier;                     // barrier between "load" (instrumentation)

unsigned int     nic_channel;                 // allocated NIC channel index

/////////////////////////////////////////
__attribute__ ((constructor)) void load()
/////////////////////////////////////////
{
    // get processor identifiers
    unsigned int    x;
    unsigned int    y;
    unsigned int    l;
    giet_proc_xyp( &x, &y, &l );

    // local buffer to store one container in private stack
    unsigned int  temp[1024];

    // allocates data buffer for MWMR fifo in local heap
    unsigned int*  data = malloc( NB_PROCS_MAX<<12 );

    // allocates MWMR fifo descriptor in local heap
    mwmr[x][y] = malloc( sizeof(mwmr_channel_t) );
    mwmr_channel_t* fifo = mwmr[x][y];

    // initialises local MWMR fifo : width = 4kbytes / depth = NB_PROCS_MAX
    mwmr_init( fifo , data , 1024 , NB_PROCS_MAX );

    // display for cluster[X_SIZE-1][Y_SIZE-1]
    if ( (x==X_SIZE-1) && (y==Y_SIZE-1) )
    giet_shr_printf("\n*** Task load starts on P[%d,%d,%d] at cycle %d\n"
                    "      &mwmr  = %x / &data  = %x / &sync  = %x\n"
                    "      x_size = %d / y_size = %d / nprocs = %d\n",
                    x , y , l , giet_proctime() , 
                    (unsigned int)fifo, (unsigned int)data, (unsigned int)(&local_sync[x][y]),
                    X_SIZE, Y_SIZE, NB_PROCS_MAX ); 

    // Task load on cluster[0,0] makes global initialisation:
    // - NIC & CMA channels allocation & initialisation.
    // - barrier for load tasks initialisation.
    // Other load task wait completion.
    if ( (x==0) && (y==0) )
    {
        // get NIC_RX channel 
        nic_channel = giet_nic_rx_alloc();

        // start CMA transfer
        giet_nic_rx_start();

        // barrier init
        barrier_init( &barrier, X_SIZE * Y_SIZE );

        global_sync = 1;
    }
    else
    {
        while ( global_sync == 0 ) asm volatile ("nop");
    }    

    // "load" task signals MWMR initialisation to "analyse" tasks
    local_sync[x][y] = 1;

    // main loop (on containers)
    unsigned int container = 0;
    while ( container < CONTAINERS_MAX ) 
    { 
        // get one container from kernel chbuf
        giet_nic_rx_move( nic_channel, temp );

        // get packets number
        unsigned int npackets = temp[0] & 0x0000FFFF;
        unsigned int nwords   = temp[0] >> 16;

        if ( (x==X_SIZE-1) && (y==Y_SIZE-1) )
        giet_shr_printf("\nTask load on P[%d,%d,%d] get container %d at cycle %d"
                        " : %d packets / %d words\n",
                        x, y, l, container, giet_proctime(), npackets, nwords );

        // put container to MWMR channel
        mwmr_write( fifo, temp, 1 );

        container++;
    }

    // all load tasks synchronise before result display
    barrier_wait( &barrier );

    // Task load in cluster[0,0] stops NIC and displays results 
    if ( (x==0) && (y==0) )
    {
        giet_nic_rx_stop();

        giet_shr_printf("\n@@@@ Classification Results @@@\n"
                        " - TYPE 0 : %d packets\n"
                        " - TYPE 1 : %d packets\n"
                        " - TYPE 2 : %d packets\n"
                        " - TYPE 3 : %d packets\n"
                        " - TYPE 4 : %d packets\n"
                        " - TYPE 5 : %d packets\n"
                        " - TYPE 6 : %d packets\n"
                        " - TYPE 7 : %d packets\n"
                        " - TYPE 8 : %d packets\n"
                        " - TYPE 9 : %d packets\n"
                        " - TYPE A : %d packets\n"
                        " - TYPE B : %d packets\n"
                        " - TYPE C : %d packets\n"
                        " - TYPE D : %d packets\n"
                        " - TYPE E : %d packets\n"
                        " - TYPE F : %d packets\n",
                        count[0x0], count[0x1], count[0x2], count[0x3],
                        count[0x4], count[0x5], count[0x6], count[0x7],
                        count[0x8], count[0x9], count[0xA], count[0xB],
                        count[0xC], count[0xD], count[0xE], count[0xF] );

        giet_nic_rx_stats();

    }

    // all load tasks synchronise before exit
    barrier_wait( &barrier );

    giet_exit("Task completed");
 
} // end load()

////////////////////////////////////////////
__attribute__ ((constructor)) void analyse()
////////////////////////////////////////////
{
    // get processor identifiers
    unsigned int    x;
    unsigned int    y;
    unsigned int    l;
    giet_proc_xyp( &x, &y, &l );

    // local buffer to store one container
    unsigned int  temp[1024];

    // wait MWMR channel initialisation (done by task load)
    while ( local_sync[x][y] == 0 ) asm volatile ("nop");

    // get pointer on MWMR channel descriptor 
    mwmr_channel_t* fifo = mwmr[x][y];

    // display status for cluster[X_SIZE-1][Y_SIZE-1]
    if ( (x==X_SIZE-1) && (y==Y_SIZE-1) )
    giet_shr_printf("\n*** Task analyse starts on P[%d,%d,%d] at cycle %d\n"
                    "       &mwmr = %x / &sync = %x\n",
                    x, y, l, giet_proctime(), 
                    (unsigned int)fifo, (unsigned int)(&local_sync[x][y]) );
    
    // main loop (on containers)
    unsigned int nwords;     // number of words in container
    unsigned int npackets;   // number of packets in container
    unsigned int length;     // number of bytes in current packet
    unsigned int word;       // current packet first word in container
    unsigned int type;       // current packet type
    unsigned int p;          // current packet index
    while ( 1 )
    { 
        // get one container from MWMR fifo
        mwmr_read( fifo, temp, 1 );

        // get packets number
        npackets = temp[0] & 0x0000FFFF;
        nwords   = temp[0] >> 16;

        if ( (x==X_SIZE-1) && (y==Y_SIZE-1) )
        giet_shr_printf("\nTask analyse on P[%d,%d,%d] get container at cycle %d"
                        " : %d packets / %d words\n",
						x, y, l, giet_proctime(), npackets, nwords );

        // initialize word index in container
        word = 34;

        // loop on packets
        for( p = 0 ; p < npackets ; p++ )
        {
            // get packet length from container header
            if ( (p & 0x1) == 0 )  length = temp[1+(p>>1)] >> 16;
            else                   length = temp[1+(p>>1)] & 0x0000FFFF;

            // get packet type (source mac address)
            type = (temp[word+1] & 0x0000000F);

            // increment counter
            atomic_increment( &count[type], 1 );

            // update word index 
            if ( length & 0x3 ) word += (length>>2)+1;
            else                word += (length>>2);
        }
    }
} // end analyse()

