/////////////////////////////////////////////////////////////////////////
// File: top.cpp 
// Author: Alain Greiner 
// Copyright: UPMC/LIP6
// Date : august 2012
// This program is released under the GNU public license
/////////////////////////////////////////////////////////////////////////
// This file define a generic TSAR architecture with virtual memory.
// The physical address space is 32 bits.
// The number of clusters cannot be larger than 256.
// The number of processors per cluster cannot be larger than 8.
// 
// - It uses four dspin_local_crossbar per cluster as local interconnect 
// - It uses two virtual_dspin routers per cluster as global interconnect
// - It uses the vci_cc_vcache_wrapper 
// - It uses the vci_mem_cache
// - It contains one vci_xicu and one vci_multi_dma per cluster.
//
// All clusters are identical, but the cluster containing address
// 0xBFC00000 (called io_cluster), contains 5 extra components:
// - the boot rom (BROM)
// - the disk controller (BDEV)
// - the multi-channel network controller (MNIC)
// - the multi-channel tty controller (MTTY)
// - the frame buffer controller (FBUF)
//
// It is build with one single component implementing a cluster:
// The Tsarv4ClusterMmu component is defined in files
// tsarv4_cluster_mmu.* (with * = cpp, h, sd)
//
// The IRQs are connected to XICUs as follow:
// - The IRQ_IN[0] to IRQ_IN[7] ports are not used in all clusters.
// - The DMA IRQs are connected to IRQ_IN[8] to IRQ_IN[15] in all clusters.
// - The TTY IRQs are connected to IRQ_IN[16] to IRQ_IN[30] in I/O cluster.
// - The BDEV IRQ is connected to IRQ_IN[31] in I/O cluster.
// 
// The main hardware parameters must be defined in the hard_config.h file :
// - CLUSTER_X        : number of clusters in a row (power of 2)
// - CLUSTER_Y        : number of clusters in a column (power of 2)
// - CLUSTER_SIZE     : size of the segment allocated to a cluster
// - NB_PROCS_MAX     : number of processors per cluster (power of 2)
// - NB_DMAS_MAX      : number of DMA channels per cluster (< 9)
// - NB_TTYS          : number of TTY channels in I/O cluster (< 16)
// - NB_NICS          : number of NIC channels in I/O cluster (< 9)
// 
// Some secondary hardware parameters must be defined in this top.cpp file:
// - XRAM_LATENCY     : external ram latency 
// - MEMC_WAYS        : L2 cache number of ways
// - MEMC_SETS        : L2 cache number of sets
// - L1_IWAYS     
// - L1_ISETS    
// - L1_DWAYS   
// - L1_DSETS  
// - FBUF_X_SIZE      : width of frame buffer (pixels)
// - FBUF_Y_SIZE      : heigth of frame buffer (lines)
// - BDEV_SECTOR_SIZE : block size for block drvice
// - BDEV_IMAGE_NAME  : file pathname for block device 
// - NIC_RX_NAME      : file pathname for NIC received packets
// - NIC_TX_NAME      : file pathname for NIC transmited packets
// - NIC_TIMEOUT      : max number of cycles before closing a container
//
// General policy for 32 bits physical address decoding:
// All segments base addresses are multiple of 64 Kbytes
// Therefore the 16 address MSB bits completely define the target: 
// The (x_width + y_width) MSB bits (left aligned) define
// the cluster index, and the 8 LSB bits define the local index:
//      | X_ID  | Y_ID  |---| LADR |     OFFSET          |
//      |x_width|y_width|---|  8   |       16            |
//
// General policy for hardware component indexing:
// Each component is identified by (x_id,y_id,l_id) tuple.
//      | X_ID  | Y_ID  | L_ID |
//      |x_width|y_width|  4   |
/////////////////////////////////////////////////////////////////////////

#include <systemc>
#include <sys/time.h>
#include <iostream>
#include <sstream>
#include <cstdlib>
#include <cstdarg>
#include <stdint.h>

#include "gdbserver.h"
#include "mapping_table.h"
#include "tsarv5_cluster_mmu.h"
#include "alloc_elems.h"

///////////////////////////////////////////////////
//      OS
///////////////////////////////////////////////////
#define USE_ALMOS 0

#define almos_bootloader_pathname "bootloader.bin"
#define almos_kernel_pathname     "kernel-soclib.bin@0xbfc10000:D"
#define almos_archinfo_pathname   "arch-info.bin@0xBFC08000:D"

///////////////////////////////////////////////////
//               Parallelisation
///////////////////////////////////////////////////
#define USE_OPENMP               0

#if USE_OPENMP
#include <omp.h>
#endif

//  cluster index (computed from x,y coordinates)
#define cluster(x,y)   (y + CLUSTER_Y*x)

///////////////////////////////////////////////////////////
//          DSPIN parameters           
///////////////////////////////////////////////////////////

#define cmd_width            40
#define rsp_width            33

///////////////////////////////////////////////////////////
//          VCI parameters           
///////////////////////////////////////////////////////////

#define cell_width            4
#define address_width         32
#define plen_width            8
#define error_width           2
#define clen_width            1
#define rflag_width           1
#define srcid_width           14
#define pktid_width           4
#define trdid_width           4
#define wrplen_width          1

////////////////////////////////////////////////////////////
//    Main Hardware Parameters values         
//////////////////////i/////////////////////////////////////

#include "hard_config.h"

////////////////////////////////////////////////////////////
//    Secondary Hardware Parameters values         
//////////////////////i/////////////////////////////////////

#define XRAM_LATENCY          0

#define MEMC_WAYS             16
#define MEMC_SETS             256

#define L1_IWAYS              4
#define L1_ISETS              64

#define L1_DWAYS              4
#define L1_DSETS              64

#define FBUF_X_SIZE           128
#define FBUF_Y_SIZE           128

#define BDEV_SECTOR_SIZE      512
#define BDEV_IMAGE_NAME       "giet_vm/display/images.raw"

#define NIC_RX_NAME           "giet_vm/nic/rx_data.txt"
#define NIC_TX_NAME           "giet_vm/nic/tx_data.txt"
#define NIC_TIMEOUT           10000

////////////////////////////////////////////////////////////
//    Software to be loaded in ROM & RAM         
//////////////////////i/////////////////////////////////////

#define BOOT_SOFT_NAME        "giet_vm/soft.elf"

////////////////////////////////////////////////////////////
//     DEBUG Parameters default values         
//////////////////////i/////////////////////////////////////

#define MAX_FROZEN_CYCLES     10000

#define TRACE_MEMC_ID         1000000
#define TRACE_PROC_ID         1000000

/////////////////////////////////////////////////////////
//    Physical segments definition
/////////////////////////////////////////////////////////
// There is 3 segments replicated in all clusters
// and 5 specific segments in the "IO" cluster 
// (containing address 0xBF000000)
/////////////////////////////////////////////////////////

// specific segments in "IO" cluster : absolute physical address

#define BROM_BASE               0xBFC00000      
#define BROM_SIZE               0x00100000   // 1 Mbytes

#define FBUF_BASE               0xBFD00000      
#define FBUF_SIZE               0x00200000   // 2 Mbytes

#define BDEV_BASE               0xBFF10000      
#define BDEV_SIZE               0x00001000   // 4 Kbytes

#define MTTY_BASE               0xBFF20000      
#define MTTY_SIZE               0x00001000   // 4 Kbytes

#define MNIC_BASE               0xBFF80000      
#define MNIC_SIZE               0x00002000 * (NB_NICS + 1)  // 8 Kbytes per channel + 8 Kbytes

// replicated segments : address is incremented by a cluster offset 
//     offset  = cluster(x,y) << (address_width-x_width-y_width);

#define MEMC_BASE               0x00000000      
#define MEMC_SIZE               0x00C00000   // 12 Mbytes

#define XICU_BASE               0x00F00000      
#define XICU_SIZE               0x00001000   // 4 Kbytes

#define CDMA_BASE               0x00F30000      
#define CDMA_SIZE               0x00001000 * NB_DMAS_MAX  // 4 Kbytes per channel  

////////////////////////////////////////////////////////////////////
//     TGTID definition in direct space
// For all components:  global TGTID = global SRCID = cluster_index
////////////////////////////////////////////////////////////////////

#define MEMC_TGTID               0
#define XICU_TGTID               1
#define CDMA_TGTID               2
#define MTTY_TGTID               3
#define FBUF_TGTID               4
#define BROM_TGTID               5
#define BDEV_TGTID               6
#define MNIC_TGTID               7

/////////////////////////////////
int _main(int argc, char *argv[])
{
   using namespace sc_core;
   using namespace soclib::caba;
   using namespace soclib::common;


   char     soft_name[256]   = BOOT_SOFT_NAME;     // pathname to binary code
   size_t   ncycles          = 1000000000;         // simulated cycles
   char     disk_name[256]   = BDEV_IMAGE_NAME;    // pathname to the disk image
   char     nic_rx_name[256] = NIC_RX_NAME;        // pathname to the rx packets file
   char     nic_tx_name[256] = NIC_TX_NAME;        // pathname to the tx packets file
   ssize_t  threads_nr       = 1;                  // simulator's threads number
   bool     debug_ok         = false;              // trace activated
   size_t   debug_period     = 1;                  // trace period
   size_t   debug_memc_id    = TRACE_MEMC_ID;      // index of memc to be traced (cluster_id)  
   size_t   debug_proc_id    = TRACE_PROC_ID;      // index of proc to be traced
   uint32_t debug_from       = 0;                  // trace start cycle
   uint32_t frozen_cycles    = MAX_FROZEN_CYCLES;  // monitoring frozen processor

   ////////////// command line arguments //////////////////////
   if (argc > 1)
   {
      for (int n = 1; n < argc; n = n + 2)
      {
         if ((strcmp(argv[n],"-NCYCLES") == 0) && (n+1<argc))
         {
            ncycles = atoi(argv[n+1]);
         }
         else if ((strcmp(argv[n],"-SOFT") == 0) && (n+1<argc) )
         {
            strcpy(soft_name, argv[n+1]);
         }
         else if ((strcmp(argv[n],"-DISK") == 0) && (n+1<argc) )
         {
            strcpy(disk_name, argv[n+1]);
         }
         else if ((strcmp(argv[n],"-DEBUG") == 0) && (n+1<argc) )
         {
            debug_ok = true;
            debug_from = atoi(argv[n+1]);
         }
         else if ((strcmp(argv[n],"-MEMCID") == 0) && (n+1<argc) )
         {
            debug_memc_id = atoi(argv[n+1]);
            assert( (debug_memc_id < (CLUSTER_X*CLUSTER_Y) ) && 
                   "debug_memc_id larger than XMAX * YMAX" );
         }
         else if ((strcmp(argv[n],"-PROCID") == 0) && (n+1<argc) )
         {
            debug_proc_id = atoi(argv[n+1]);
            assert( (debug_proc_id < (CLUSTER_X * CLUSTER_Y * NB_PROCS_MAX) ) && 
                   "debug_proc_id larger than XMAX * YMAX * NB_PROCS" );
         }
         else if ((strcmp(argv[n], "-THREADS") == 0) && ((n+1) < argc))
         {
            threads_nr = atoi(argv[n+1]);
            threads_nr = (threads_nr < 1) ? 1 : threads_nr;
         }
         else if ((strcmp(argv[n], "-FROZEN") == 0) && (n+1 < argc))
         {
            frozen_cycles = atoi(argv[n+1]);
         }
         else if ((strcmp(argv[n], "-PERIOD") == 0) && (n+1 < argc))
         {
            debug_period = atoi(argv[n+1]);
         }
         else
         {
            std::cout << "   Arguments are (key,value) couples." << std::endl;
            std::cout << "   The order is not important." << std::endl;
            std::cout << "   Accepted arguments are :" << std::endl << std::endl;
            std::cout << "     -SOFT pathname_for_embedded_soft" << std::endl;
            std::cout << "     -DISK pathname_for_disk_image" << std::endl;
            std::cout << "     -NCYCLES number_of_simulated_cycles" << std::endl;
            std::cout << "     -DEBUG debug_start_cycle" << std::endl;
            std::cout << "     -THREADS simulator's threads number" << std::endl;
            std::cout << "     -FROZEN max_number_of_lines" << std::endl;
            std::cout << "     -PERIOD number_of_cycles between trace" << std::endl;
            std::cout << "     -MEMCID index_memc_to_be_traced" << std::endl;
            std::cout << "     -PROCID index_proc_to_be_traced" << std::endl;
            exit(0);
         }
      }
   }

   // checking hardware parameters
   assert( ( (CLUSTER_X == 1) or (CLUSTER_X == 2) or (CLUSTER_X == 4) or
             (CLUSTER_X == 8) or (CLUSTER_X == 16) ) and
           "The CLUSTER_X parameter must be 1, 2, 4, 8 or 16" );

   assert( ( (CLUSTER_Y == 1) or (CLUSTER_Y == 2) or (CLUSTER_Y == 4) or
             (CLUSTER_Y == 8) or (CLUSTER_Y == 16) ) and
           "The CLUSTER_Y parameter must be 1, 2, 4, 8 or 16" );

   assert( ( (NB_PROCS_MAX == 1) or (NB_PROCS_MAX == 2) or
             (NB_PROCS_MAX == 4) or (NB_PROCS_MAX == 8) ) and
           "The NB_PROCS_MAX parameter must be 1, 2, 4 or 8" );

   assert( (NB_DMAS_MAX < 9) and
           "The NB_DMAS_MAX parameter must be smaller than 9" );

   assert( (NB_TTYS < 15) and
           "The NB_TTYS parameter must be smaller than 15" );

   assert( (NB_NICS < 9) and
           "The NB_NICS parameter must be smaller than 9" );

   std::cout << std::endl;
   std::cout << " - CLUSTER_X    = " << CLUSTER_X << std::endl;
   std::cout << " - CLUSTER_Y    = " << CLUSTER_Y << std::endl;
   std::cout << " - NB_PROCS_MAX = " << NB_PROCS_MAX <<  std::endl;
   std::cout << " - NB_DMAS_MAX  = " << NB_DMAS_MAX <<  std::endl;
   std::cout << " - NB_TTYS      = " << NB_TTYS <<  std::endl;
   std::cout << " - NB_NICS      = " << NB_NICS <<  std::endl;
   std::cout << " - MEMC_WAYS    = " << MEMC_WAYS << std::endl;
   std::cout << " - MEMC_SETS    = " << MEMC_SETS << std::endl;
   std::cout << " - RAM_LATENCY  = " << XRAM_LATENCY << std::endl;
   std::cout << " - MAX_FROZEN   = " << frozen_cycles << std::endl;

   std::cout << std::endl;

#if USE_OPENMP
   omp_set_dynamic(false);
   omp_set_num_threads(threads_nr);
   std::cerr << "Built with openmp version " << _OPENMP << std::endl;
#endif

   // Define VCI parameters
   typedef soclib::caba::VciParams<cell_width,
           plen_width,
           address_width,
           error_width,                                   
           clen_width,
           rflag_width,
           srcid_width,
           pktid_width,
           trdid_width,
           wrplen_width> vci_param;

   // Define parameters depending on mesh size
   size_t   cluster_io_id;
   size_t   x_width;
   size_t   y_width;

   if      (CLUSTER_X == 1) x_width = 0;
   else if (CLUSTER_X == 2) x_width = 1;
   else if (CLUSTER_X <= 4) x_width = 2;
   else if (CLUSTER_X <= 8) x_width = 3;
   else                        x_width = 4;

   if      (CLUSTER_Y == 1) y_width = 0;
   else if (CLUSTER_Y == 2) y_width = 1;
   else if (CLUSTER_Y <= 4) y_width = 2;
   else if (CLUSTER_Y <= 8) y_width = 3;
   else                        y_width = 4;

   cluster_io_id = 0xBF >> (8 - x_width - y_width);

   /////////////////////
   //  Mapping Tables
   /////////////////////

   // direct network
   MappingTable maptabd(address_width, 
         IntTab(x_width + y_width, 16 - x_width - y_width), 
         IntTab(x_width + y_width, srcid_width - x_width - y_width), 
         0x00FF0000);

   for (size_t x = 0; x < CLUSTER_X; x++)
   {
      for (size_t y = 0; y < CLUSTER_Y; y++)
      {
         sc_uint<address_width> offset  = cluster(x,y) << (address_width-x_width-y_width);

         std::ostringstream    sh;
         sh << "d_seg_memc_" << x << "_" << y;
         maptabd.add(Segment(sh.str(), MEMC_BASE+offset, MEMC_SIZE, IntTab(cluster(x,y),MEMC_TGTID), true));

         std::ostringstream    si;
         si << "d_seg_xicu_" << x << "_" << y;
         maptabd.add(Segment(si.str(), XICU_BASE+offset, XICU_SIZE, IntTab(cluster(x,y),XICU_TGTID), false));

         std::ostringstream    sd;
         sd << "d_seg_mdma_" << x << "_" << y;
         maptabd.add(Segment(sd.str(), CDMA_BASE+offset, CDMA_SIZE, IntTab(cluster(x,y),CDMA_TGTID), false));

         if ( cluster(x,y) == cluster_io_id )
         {
            maptabd.add(Segment("d_seg_mtty", MTTY_BASE, MTTY_SIZE, IntTab(cluster(x,y),MTTY_TGTID), false));
            maptabd.add(Segment("d_seg_fbuf", FBUF_BASE, FBUF_SIZE, IntTab(cluster(x,y),FBUF_TGTID), false));
            maptabd.add(Segment("d_seg_bdev", BDEV_BASE, BDEV_SIZE, IntTab(cluster(x,y),BDEV_TGTID), false));
            maptabd.add(Segment("d_seg_mnic", MNIC_BASE, MNIC_SIZE, IntTab(cluster(x,y),MNIC_TGTID), false));
            maptabd.add(Segment("d_seg_brom", BROM_BASE, BROM_SIZE, IntTab(cluster(x,y),BROM_TGTID), true));
         }
      }
   }
   std::cout << maptabd << std::endl;

/*
   WE DONT NEED any COHERENCE MAPPING TABLE, AS THE DIRECT NETWORK
   USES DIRECT ADRESSING (XID,YID,LID)

   // coherence network
   // - tgtid_c_proc = srcid_c_proc = local procid
   // - tgtid_c_memc = srcid_c_memc = NB_PROCS_MAX
   MappingTable maptabc(address_width, 
         IntTab(x_width + y_width, srcid_width - x_width - y_width), 
         IntTab(x_width + y_width, srcid_width - x_width - y_width), 
         0x00FF0000);

   for (size_t x = 0; x < CLUSTER_X; x++)
   {
      for (size_t y = 0; y < CLUSTER_Y; y++)
      {
         sc_uint<address_width> offset  = cluster(x,y) << (address_width-x_width-y_width);

         // cleanup requests must be routed to the memory cache
         std::ostringstream sh;
         sh << "c_seg_memc_" << x << "_" << y;
         maptabc.add(Segment(sh.str(), (NB_PROCS_MAX << (address_width - srcid_width)) + offset, 
                     0x10, IntTab(cluster(x,y), NB_PROCS_MAX), false));

         // update & invalidate requests must be routed to the proper processor
         for ( size_t p = 0 ; p < NB_PROCS_MAX ; p++) 
         {
            std::ostringstream sp;
            sp << "c_seg_proc_" << x << "_" << y << "_" << p;
            maptabc.add( Segment( sp.str() , (p << (address_width - srcid_width)) + offset , 
                         0x10 , IntTab(cluster(x,y), p) , false)); 
         }
      }
   }
   std::cout << maptabc << std::endl;

*/

   // external network
   MappingTable maptabx(address_width, IntTab(1), IntTab(x_width+y_width), 0xF0000000);

   for (size_t x = 0; x < CLUSTER_X; x++)
   {
      for (size_t y = 0; y < CLUSTER_Y ; y++)
      { 
         sc_uint<address_width> offset  = cluster(x,y) << (address_width-x_width-y_width);
         std::ostringstream sh;
         sh << "x_seg_memc_" << x << "_" << y;
         maptabx.add(Segment(sh.str(), MEMC_BASE+offset, 
                     MEMC_SIZE, IntTab(cluster(x,y)), false));
      }
   }
   std::cout << maptabx << std::endl;

   ////////////////////
   // Signals
   ///////////////////

   sc_clock      signal_clk("clk");
   sc_signal<bool>    signal_resetn("resetn");

   // Horizontal inter-clusters DSPIN signals
   DspinSignals<cmd_width>*** signal_dspin_h_cmd_inc =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_h_cmd_inc", CLUSTER_X-1, CLUSTER_Y, 2);
   DspinSignals<cmd_width>*** signal_dspin_h_cmd_dec =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_h_cmd_dec", CLUSTER_X-1, CLUSTER_Y, 2);
   DspinSignals<rsp_width>*** signal_dspin_h_rsp_inc =
      alloc_elems<DspinSignals<rsp_width> >("signal_dspin_h_rsp_inc", CLUSTER_X-1, CLUSTER_Y, 2);
   DspinSignals<rsp_width>*** signal_dspin_h_rsp_dec =
      alloc_elems<DspinSignals<rsp_width> >("signal_dspin_h_rsp_dec", CLUSTER_X-1, CLUSTER_Y, 2);

   // Vertical inter-clusters DSPIN signals
   DspinSignals<cmd_width>*** signal_dspin_v_cmd_inc =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_v_cmd_inc", CLUSTER_X, CLUSTER_Y-1, 2);
   DspinSignals<cmd_width>*** signal_dspin_v_cmd_dec =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_v_cmd_dec", CLUSTER_X, CLUSTER_Y-1, 2);
   DspinSignals<rsp_width>*** signal_dspin_v_rsp_inc =
      alloc_elems<DspinSignals<rsp_width> >("signal_dspin_v_rsp_inc", CLUSTER_X, CLUSTER_Y-1, 2);
   DspinSignals<rsp_width>*** signal_dspin_v_rsp_dec =
      alloc_elems<DspinSignals<rsp_width> >("signal_dspin_v_rsp_dec", CLUSTER_X, CLUSTER_Y-1, 2);

   // Mesh boundaries DSPIN signals
   DspinSignals<cmd_width>**** signal_dspin_false_cmd_in =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_false_cmd_in", CLUSTER_X, CLUSTER_Y, 2, 4);
   DspinSignals<cmd_width>**** signal_dspin_false_cmd_out =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_false_cmd_out", CLUSTER_X, CLUSTER_Y, 2, 4);
   DspinSignals<rsp_width>**** signal_dspin_false_rsp_in =
      alloc_elems<DspinSignals<rsp_width> >("signal_dspin_false_rsp_in", CLUSTER_X, CLUSTER_Y, 2, 4);
   DspinSignals<rsp_width>**** signal_dspin_false_rsp_out =
      alloc_elems<DspinSignals<rsp_width> >("signal_dspin_false_rsp_out", CLUSTER_X, CLUSTER_Y, 2, 4);


   ////////////////////////////
   //      Loader    
   ////////////////////////////

#if USE_ALMOS
   soclib::common::Loader loader(almos_bootloader_pathname,
                                 almos_archinfo_pathname,
                                 almos_kernel_pathname);
#else
   soclib::common::Loader loader(soft_name);
#endif

   typedef soclib::common::GdbServer<soclib::common::Mips32ElIss> proc_iss;
   proc_iss::set_loader(loader);

   ////////////////////////////
   // Clusters construction
   ////////////////////////////

   TsarV5ClusterMmu<vci_param, proc_iss, cmd_width, rsp_width>* clusters[CLUSTER_X][CLUSTER_Y];

#if USE_OPENMP
#pragma omp parallel
    {
#pragma omp for
#endif
        for(size_t i = 0; i  < (CLUSTER_X * CLUSTER_Y); i++)
        {
            size_t x = i / CLUSTER_Y;
            size_t y = i % CLUSTER_Y;

#if USE_OPENMP
#pragma omp critical
            {
#endif
            std::ostringstream sc;
            sc << "cluster_" << x << "_" << y;
            clusters[x][y] = new TsarV5ClusterMmu<vci_param, proc_iss, cmd_width, rsp_width>
            (
                sc.str().c_str(),
                NB_PROCS_MAX,
                NB_TTYS,  
                NB_DMAS_MAX, 
                x,
                y,
                cluster(x,y),
                maptabd,
                maptabx,
                x_width,
                y_width,
                4,            // l_id width,
                MEMC_TGTID,
                XICU_TGTID,
                CDMA_TGTID,
                FBUF_TGTID,
                MTTY_TGTID,
                BROM_TGTID,
                MNIC_TGTID,
                BDEV_TGTID,
                MEMC_WAYS,
                MEMC_SETS,
                L1_IWAYS,
                L1_ISETS,
                L1_DWAYS,
                L1_DSETS,
                XRAM_LATENCY,
                (cluster(x,y) == cluster_io_id),
                FBUF_X_SIZE,
                FBUF_Y_SIZE,
                disk_name,
                BDEV_SECTOR_SIZE,
                NB_NICS,
                nic_rx_name,
                nic_tx_name,
                NIC_TIMEOUT,
                loader,
                frozen_cycles,
                debug_from,
                debug_ok and (cluster(x,y) == debug_memc_id),
                debug_ok and (cluster(x,y) == debug_proc_id) 
            );

            std::cout << "cluster_" << x << "_" << y << " constructed" << std::endl;
#if USE_OPENMP
            } // end critical
#endif
        } // end for
#if USE_OPENMP
    }
#endif

   ///////////////////////////////////////////////////////////////
   //     Net-list 
   ///////////////////////////////////////////////////////////////

   // Clock & RESET
   for (size_t x = 0; x < (CLUSTER_X); x++){
      for (size_t y = 0; y < CLUSTER_Y; y++){
         clusters[x][y]->p_clk     (signal_clk);
         clusters[x][y]->p_resetn  (signal_resetn);
      }
   }

   // Inter Clusters horizontal connections
   if (CLUSTER_X > 1){
      for (size_t x = 0; x < (CLUSTER_X-1); x++){
         for (size_t y = 0; y < CLUSTER_Y; y++){
            for (size_t k = 0; k < 2; k++){
               clusters[x][y]->p_cmd_out[k][EAST]      (signal_dspin_h_cmd_inc[x][y][k]);
               clusters[x+1][y]->p_cmd_in[k][WEST]     (signal_dspin_h_cmd_inc[x][y][k]);
               clusters[x][y]->p_cmd_in[k][EAST]       (signal_dspin_h_cmd_dec[x][y][k]);
               clusters[x+1][y]->p_cmd_out[k][WEST]    (signal_dspin_h_cmd_dec[x][y][k]);
               clusters[x][y]->p_rsp_out[k][EAST]      (signal_dspin_h_rsp_inc[x][y][k]);
               clusters[x+1][y]->p_rsp_in[k][WEST]     (signal_dspin_h_rsp_inc[x][y][k]);
               clusters[x][y]->p_rsp_in[k][EAST]       (signal_dspin_h_rsp_dec[x][y][k]);
               clusters[x+1][y]->p_rsp_out[k][WEST]    (signal_dspin_h_rsp_dec[x][y][k]);
            }
         }
      }
   }
   std::cout << std::endl << "Horizontal connections established" << std::endl;   

   // Inter Clusters vertical connections
   if (CLUSTER_Y > 1) {
      for (size_t y = 0; y < (CLUSTER_Y-1); y++){
         for (size_t x = 0; x < CLUSTER_X; x++){
            for (size_t k = 0; k < 2; k++){
               clusters[x][y]->p_cmd_out[k][NORTH]     (signal_dspin_v_cmd_inc[x][y][k]);
               clusters[x][y+1]->p_cmd_in[k][SOUTH]    (signal_dspin_v_cmd_inc[x][y][k]);
               clusters[x][y]->p_cmd_in[k][NORTH]      (signal_dspin_v_cmd_dec[x][y][k]);
               clusters[x][y+1]->p_cmd_out[k][SOUTH]   (signal_dspin_v_cmd_dec[x][y][k]);
               clusters[x][y]->p_rsp_out[k][NORTH]     (signal_dspin_v_rsp_inc[x][y][k]);
               clusters[x][y+1]->p_rsp_in[k][SOUTH]    (signal_dspin_v_rsp_inc[x][y][k]);
               clusters[x][y]->p_rsp_in[k][NORTH]      (signal_dspin_v_rsp_dec[x][y][k]);
               clusters[x][y+1]->p_rsp_out[k][SOUTH]   (signal_dspin_v_rsp_dec[x][y][k]);
            }
         }
      }
   }
   std::cout << "Vertical connections established" << std::endl;

   // East & West boundary cluster connections
   for (size_t y = 0; y < CLUSTER_Y; y++)
   {
      for (size_t k = 0; k < 2; k++)
      {
         clusters[0][y]->p_cmd_in[k][WEST]          (signal_dspin_false_cmd_in[0][y][k][WEST]);
         clusters[0][y]->p_cmd_out[k][WEST]         (signal_dspin_false_cmd_out[0][y][k][WEST]);
         clusters[0][y]->p_rsp_in[k][WEST]          (signal_dspin_false_rsp_in[0][y][k][WEST]);
         clusters[0][y]->p_rsp_out[k][WEST]         (signal_dspin_false_rsp_out[0][y][k][WEST]);

         clusters[CLUSTER_X-1][y]->p_cmd_in[k][EAST]     (signal_dspin_false_cmd_in[CLUSTER_X-1][y][k][EAST]);
         clusters[CLUSTER_X-1][y]->p_cmd_out[k][EAST]    (signal_dspin_false_cmd_out[CLUSTER_X-1][y][k][EAST]);
         clusters[CLUSTER_X-1][y]->p_rsp_in[k][EAST]     (signal_dspin_false_rsp_in[CLUSTER_X-1][y][k][EAST]);
         clusters[CLUSTER_X-1][y]->p_rsp_out[k][EAST]    (signal_dspin_false_rsp_out[CLUSTER_X-1][y][k][EAST]);
      }
   }

   // North & South boundary clusters connections
   for (size_t x = 0; x < CLUSTER_X; x++)
   {
      for (size_t k = 0; k < 2; k++)
      {
         clusters[x][0]->p_cmd_in[k][SOUTH]         (signal_dspin_false_cmd_in[x][0][k][SOUTH]);
         clusters[x][0]->p_cmd_out[k][SOUTH]        (signal_dspin_false_cmd_out[x][0][k][SOUTH]);
         clusters[x][0]->p_rsp_in[k][SOUTH]         (signal_dspin_false_rsp_in[x][0][k][SOUTH]);
         clusters[x][0]->p_rsp_out[k][SOUTH]        (signal_dspin_false_rsp_out[x][0][k][SOUTH]);

         clusters[x][CLUSTER_Y-1]->p_cmd_in[k][NORTH]    (signal_dspin_false_cmd_in[x][CLUSTER_Y-1][k][NORTH]);
         clusters[x][CLUSTER_Y-1]->p_cmd_out[k][NORTH]   (signal_dspin_false_cmd_out[x][CLUSTER_Y-1][k][NORTH]);
         clusters[x][CLUSTER_Y-1]->p_rsp_in[k][NORTH]    (signal_dspin_false_rsp_in[x][CLUSTER_Y-1][k][NORTH]);
         clusters[x][CLUSTER_Y-1]->p_rsp_out[k][NORTH]   (signal_dspin_false_rsp_out[x][CLUSTER_Y-1][k][NORTH]);
      }
   }


   ////////////////////////////////////////////////////////
   //   Simulation
   ///////////////////////////////////////////////////////

   sc_start(sc_core::sc_time(0, SC_NS));
   signal_resetn = false;

   // network boundaries signals
   for (size_t x = 0; x < CLUSTER_X ; x++){
      for (size_t y = 0; y < CLUSTER_Y ; y++){
         for (size_t k = 0; k < 2; k++){
            for (size_t a = 0; a < 4; a++){
               signal_dspin_false_cmd_in[x][y][k][a].write = false;
               signal_dspin_false_cmd_in[x][y][k][a].read = true;
               signal_dspin_false_cmd_out[x][y][k][a].write = false;
               signal_dspin_false_cmd_out[x][y][k][a].read = true;

               signal_dspin_false_rsp_in[x][y][k][a].write = false;
               signal_dspin_false_rsp_in[x][y][k][a].read = true;
               signal_dspin_false_rsp_out[x][y][k][a].write = false;
               signal_dspin_false_rsp_out[x][y][k][a].read = true;
            }
         }
      }
   }

   sc_start(sc_core::sc_time(1, SC_NS));
   signal_resetn = true;

   for (size_t n = 1; n < ncycles; n++)
   {

      if (debug_ok and (n > debug_from) and (n % debug_period == 0))
      {
         std::cout << "****************** cycle " << std::dec << n ;
         std::cout << " ************************************************" << std::endl;

         // trace proc[debug_proc_id] 
         if ( debug_proc_id < (CLUSTER_X * CLUSTER_Y * NB_PROCS_MAX) )
         {
             size_t proc_x = debug_proc_id / CLUSTER_Y;
             size_t proc_y = debug_proc_id % CLUSTER_Y;

             clusters[proc_x][proc_y]->proc[0]->print_trace();
             clusters[proc_x][proc_y]->signal_vci_ini_proc[0].print_trace("proc_0");
         }

         // trace memc[debug_memc_id] 
         if ( debug_memc_id < (CLUSTER_X * CLUSTER_Y) )
         {
             size_t memc_x = debug_memc_id / CLUSTER_Y;
             size_t memc_y = debug_memc_id % CLUSTER_Y;

             clusters[memc_x][memc_y]->memc->print_trace();
             clusters[memc_x][memc_y]->signal_vci_tgt_memc.print_trace("memc");
         }

// clusters[0][0]->signal_vci_tgt_xicu.print_trace("xicu_0_0");
// clusters[0][1]->signal_vci_tgt_xicu.print_trace("xicu_0_1");
// clusters[1][0]->signal_vci_tgt_xicu.print_trace("xicu_1_0");
// clusters[1][1]->signal_vci_tgt_xicu.print_trace("xicu_1_1");

// if ( clusters[1][1]->signal_irq_mdma[0].read() ) 
//    std::cout << std::endl << " IRQ_DMA_1_1 activated" << std::endl;
// if ( clusters[1][1]->signal_proc_it[0].read() )
//    std::cout <<  " IRQ_PROC_1_1 activated" << std::endl << std::endl;

// trace ioc component 
// size_t io_x   = cluster_io_id / CLUSTER_Y;
// size_t io_y   = cluster_io_id % CLUSTER_Y;
// clusters[io_x][io_y]->bdev->print_trace();
// clusters[io_x][io_y]->signal_vci_tgt_bdev.print_trace("bdev_tgt  ");
// clusters[io_x][io_y]->signal_vci_ini_bdev.print_trace("bdev_ini  ");

// clusters[1][1]->mdma->print_trace();
// clusters[1][1]->signal_vci_tgt_mdma.print_trace("mdma_1_1_tgt  ");
// clusters[1][1]->signal_vci_ini_mdma.print_trace("mdma_1_1_ini  ");

      }

      sc_start(sc_core::sc_time(1, SC_NS));
   }
   return EXIT_SUCCESS;
}

int sc_main (int argc, char *argv[])
{
   try {
      return _main(argc, argv);
   } catch (std::exception &e) {
      std::cout << e.what() << std::endl;
   } catch (...) {
      std::cout << "Unknown exception occured" << std::endl;
      throw;
   }
   return 1;
}


// Local Variables:
// tab-width: 3
// c-basic-offset: 3
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=3:tabstop=3:softtabstop=3




