///////////////////////////////////////////////////////////////////////////////
// File: top.cpp
// Author: Alain Greiner
// Copyright: UPMC/LIP6
// Date : august 2013
// This program is released under the GNU public license
//
// Modified by: Cesar Fuguet
// Modified on: mars 2014
///////////////////////////////////////////////////////////////////////////////
// This file define a generic TSAR architecture with an IO network emulating
// an external bus (i.e. Hypertransport) to access external peripherals:
//
// - BROM : boot ROM
// - FBUF : Frame Buffer
// - MTTY : multi TTY (up to 15 channels)
// - MNIC : Network controller (up to 2 channels)
// - CDMA : Chained Buffer DMA controller (up to 4 channels)
// - BDEV : Dlock Device controler (1 channel)
//
// The internal physical address space is 40 bits.
//
// It contains a 2D mesh of XMAX*YMAX clusters, and the cluster index
// is encoded on 8 bits (X_WIDTH = 4 / Y_WIDTH = 4) whatever the mesh size.
//
// It contains 3 networks:
//
// 1) the INT network supports Read/Write transactions
//    between processors and L2 caches or peripherals.
//    (VCI ADDDRESS = 40 bits / VCI DATA width = 32 bits)
//    It supports also coherence transactions between L1 & L2 caches.
// 3) the RAM network is emulating the 3D network between L2 caches
//    and L3 caches, and is implemented as a 2D mesh between the L2 caches,
//    the two IO bridges and the physical RAMs disributed in all clusters.
//    (VCI ADDRESS = 40 bits / VCI DATA = 64 bits)
// 4) the IOX network connects the two IO bridge components to the
//    6 external peripheral controllers.
//    (VCI ADDDRESS = 40 bits / VCI DATA width = 64 bits)
//
// The external peripherals IRQs are connected to the XPIC component
// in IOX interconnect.
//
// All clusters are identical, but cluster(0,0) and cluster(XMAX-1,YMAX-1)
// contain an extra IO bridge component. These IOB0 & IOB1 components are
// connected to the three networks (INT, RAM, IOX).
// The number of clusters cannot be larger than 256.
// The number of processors per cluster cannot be larger than 4.
//
// - It uses two dspin_local_crossbar per cluster to implement the
//   local interconnect correponding to the INT network.
// - It uses three dspin_local_crossbar per cluster to implement the
//   local interconnect correponding to the coherence INT network.
// - It uses two virtual_dspin_router per cluster to implement
//   the INT network (routing both the direct and coherence trafic).
// - It uses two dspin_router per cluster to implement the RAM network.
// - It uses the vci_cc_vcache_wrapper.
// - It uses the vci_mem_cache.
// - It contains one vci_xicu and one vci_multi_dma per cluster.
// - It contains one vci_simple ram per cluster to model the L3 cache.
// - It contains one vci_simple_rom per cluster which can be used for a
//   distributed boot.
// - It contains one vci_multi_tty per cluster for debug purposes (number
//   of channels can be 0). This TTY is mostly useful when using distributed
//   boot.
//
// General policy for 40 bits physical address decoding:
// All physical segments base addresses are multiple of 1 Mbytes
// (=> the 24 LSB bits = 0, and the 16 MSB bits define the target)
// The (x_width + y_width) MSB bits define the cluster index, and the
// LADR bits define the local index:
//      | X_ID  | Y_ID  |---| LADR |     OFFSET          |
//      |x_width|y_width|---|  8   |       24            |
//
// General policy for 14 bits SRCID decoding:
// Each component is identified by (x_id, y_id, l_id) tuple.
//      | X_ID  | Y_ID  |---| L_ID |
//      |x_width|y_width|---|  6   |
/////////////////////////////////////////////////////////////////////////

#include <systemc>
#include <sys/time.h>
#include <iostream>
#include <sstream>
#include <cstdlib>
#include <cstdarg>
#include <stdint.h>
#include <string>

#include "gdbserver.h"
#include "mapping_table.h"

#include "tsar_iob_cluster.h"
#include "vci_chbuf_dma.h"
#include "vci_multi_tty.h"
#include "vci_multi_nic.h"
#include "vci_block_device_tsar.h"
#include "vci_framebuffer.h"
#include "vci_iopic.h"
#include "vci_iox_network.h"

#include "alloc_elems.h"
#include "hard_config.h"

////////////////////////////////////////////////////////////////////////
//               Parallelization
////////////////////////////////////////////////////////////////////////

#define USE_OPENMP 0

#if USE_OPENMP
#include <omp.h>
#endif

////////////////////////////////////////////////////////////////////////
//          DSPIN parameters
////////////////////////////////////////////////////////////////////////

#define dspin_int_cmd_width 39
#define dspin_int_rsp_width 32

#define dspin_ram_cmd_width 64
#define dspin_ram_rsp_width 64

////////////////////////////////////////////////////////////////////////
//         VCI fields width  for the 3 VCI networks
////////////////////////////////////////////////////////////////////////

#define vci_cell_width_int 4
#define vci_cell_width_ext 8

#define vci_plen_width     8
#define vci_address_width  40
#define vci_rerror_width   1
#define vci_clen_width     1
#define vci_rflag_width    1
#define vci_srcid_width    14
#define vci_pktid_width    4
#define vci_trdid_width    4
#define vci_wrplen_width   1

////////////////////////////////////////////////////////////////////////
//    Secondary Hardware Parameters values
////////////////////////////////////////////////////////////////////////

#define XRAM_LATENCY     0

#define MEMC_WAYS        16
#define MEMC_SETS        256

#define L1_IWAYS         4
#define L1_ISETS         64

#define L1_DWAYS         4
#define L1_DSETS         64

#define FBUF_X_SIZE      128
#define FBUF_Y_SIZE      128

#define BDEV_SECTOR_SIZE 512
#define BDEV_IMAGE_NAME  "/dev/null"

#define NIC_RX_NAME      "/dev/null"
#define NIC_TX_NAME      "/dev/null"
#define NIC_TIMEOUT      10000

#define cluster(x,y)     TsarIobClusterType::clusterId((x),(y))

////////////////////////////////////////////////////////////////////////
//    Software to be loaded in ROM & RAM
////////////////////////////////////////////////////////////////////////

#define BOOT_SOFT_NAME "/dev/null"

////////////////////////////////////////////////////////////////////////
//     DEBUG Parameters default values
////////////////////////////////////////////////////////////////////////

#define MAX_FROZEN_CYCLES 10000

////////////////////////////////////////////////////////////////////////
//          SRCID definition
////////////////////////////////////////////////////////////////////////
// All initiators are in the same indexing space (14 bits).
// The SRCID is structured in two fields:
// - The 10 MSB bits define the cluster index
// - The 4  LSB bits define the local index.
// Two different initiators cannot have the same SRCID, but a given
// initiator can have two alias SRCIDs:
// - Internal initiators (procs, mdma) are replicated in all clusters,
//   and each initiator has one single SRCID.
// - External initiators (bdev, cdma) are not replicated, but can be
//   accessed in 2 clusters : cluster_iob0 and cluster_iob1.
//   They have the same local index, but two different cluster indexes.
// As cluster_iob0 and cluster_iob1 contain both internal initiators
// and external initiators, they must have different local indexes.
// Consequence: For a local interconnect, the INI_ID port index
// is NOT equal to the SRCID local index, and the local interconnect
// must make a translation: SRCID => INI_ID (port index)
////////////////////////////////////////////////////////////////////////

#define PROC_LOCAL_SRCID 0x0 // from 0 to 7
#define MDMA_LOCAL_SRCID 0x8
#define IOBX_LOCAL_SRCID 0x9
#define CDMA_LOCAL_SRCID 0xA
#define BDEV_LOCAL_SRCID 0xB
#define XPIC_LOCAL_SRCID 0xC

///////////////////////////////////////////////////////////////////////
//     TGT_ID and INI_ID port indexing for IOX local interconnect
///////////////////////////////////////////////////////////////////////

#define IOX_FBUF_TGT_ID 0
#define IOX_BDEV_TGT_ID 1
#define IOX_MNIC_TGT_ID 2
#define IOX_CDMA_TGT_ID 3
#define IOX_MTTY_TGT_ID 4
#define IOX_XPIC_TGT_ID 5
#define IOX_IOB0_TGT_ID 6
#define IOX_IOB1_TGT_ID 7

#define IOX_BDEV_INI_ID 0
#define IOX_CDMA_INI_ID 1
#define IOX_XPIC_INI_ID 2
#define IOX_IOB0_INI_ID 3
#define IOX_IOB1_INI_ID 4

////////////////////////////////////////////////////////////////////////
int _main(int argc, char *argv[]) {
   using namespace sc_core;
   using namespace soclib::caba;
   using namespace soclib::common;

   char     soft_name[256]   = BOOT_SOFT_NAME;    // path: binary code
   uint64_t ncycles          = 1000000000;        // simulated cycles
   char     disk_name[256]   = BDEV_IMAGE_NAME;   // path: disk image
   char     nic_rx_name[256] = NIC_RX_NAME;       // path: rx packets file
   char     nic_tx_name[256] = NIC_TX_NAME;       // path: tx packets file
   ssize_t  threads_nr       = 1;                 // simulator's threads
   bool     debug_ok         = false;
   size_t   debug_period     = 1;                 // trace period
   size_t   debug_memc_id    = 0xFFFFFFFF;        // idx of traced memc
   size_t   debug_proc_id    = 0xFFFFFFFF;        // idx of traced proc
   bool     debug_iob        = false;             // trace iobs when true
   uint32_t debug_from       = 0;                 // trace start cycle
   uint32_t frozen_cycles    = MAX_FROZEN_CYCLES; // monitoring frozen procs
   bool     distboot         = false;             // distributed boot
   const size_t block_size   = BDEV_SECTOR_SIZE;  // disk block size
   const size_t x_size       = X_SIZE;
   const size_t y_size       = Y_SIZE;

   assert((X_WIDTH == 4) and (Y_WIDTH == 4));

   ////////////// command line arguments //////////////////////
   if (argc > 1) {
      for (int n = 1; n < argc; n = n + 2) {
         if ((strcmp(argv[n],"-NCYCLES") == 0) && ((n+1) < argc)) {
            ncycles = strtoll(argv[n+1], NULL, 0);
            continue;
         }
         if ((strcmp(argv[n],"-SOFT") == 0) && ((n+1) < argc) ) {
            strcpy(soft_name, argv[n+1]);
            continue;
         }
         if ((strcmp(argv[n],"-DISK") == 0) && ((n+1) < argc) ) {
            strcpy(disk_name, argv[n+1]);
            continue;
         }
         if ((strcmp(argv[n],"-DEBUG") == 0) && ((n+1) < argc) ) {
            debug_ok = true;
            debug_from = strtol(argv[n+1], NULL, 0);
            continue;
         }
         if ((strcmp(argv[n],"-MEMCID") == 0) && ((n+1) < argc) ) {
            debug_memc_id = strtol(argv[n+1], NULL, 0);
            size_t x = debug_memc_id >> Y_WIDTH;
            size_t y = debug_memc_id  & ((1 << Y_WIDTH) - 1);
            assert((x < x_size) && (y < y_size));
            continue;
         }
         if ((strcmp(argv[n],"-IOB") == 0) && ((n+1) < argc) ) {
            debug_iob = (strtol(argv[n+1], NULL, 0) != 0) ? 1 : 0;
            continue;
         }
         if ((strcmp(argv[n],"-PROCID") == 0) && ((n+1) < argc) ) {
            debug_proc_id     = strtol(argv[n+1], NULL, 0);
            size_t cluster_xy = debug_proc_id / NB_PROCS ;
            size_t x          = cluster_xy >> Y_WIDTH;
            size_t y          = cluster_xy  & ((1 << Y_WIDTH) - 1);
            assert((x < x_size) && (y < y_size));
            continue;
         }
         if ((strcmp(argv[n], "-THREADS") == 0) && ((n+1) < argc)) {
            threads_nr = strtol(argv[n+1], NULL, 0);
            assert(threads_nr > 0);
            continue;
         }
         if ((strcmp(argv[n], "-FROZEN") == 0) && ((n+1) < argc)) {
            frozen_cycles = strtol(argv[n+1], NULL, 0);
            assert(frozen_cycles > 0);
            continue;
         }
         if ((strcmp(argv[n], "-PERIOD") == 0) && ((n+1) < argc)) {
            debug_period = strtol(argv[n+1], NULL, 0);
            assert(debug_period > 0);
            continue;
         }
         if ((strcmp(argv[n], "-DISTBOOT") == 0)) {
            distboot = true;
            continue;
         }

         std::cout
            << "\nArguments are (key,value) couples."
            << "\nThe order is not important."
            << "\nAccepted arguments are :\n"
            << "\n    -NCYCLES   number of simulated_cycles"
            << "\n    -SOFT      pathname for embedded soft"
            << "\n    -DISK      pathname for disk image"
            << "\n    -DEBUG     debug start cycle"
            << "\n    -MEMCID    index of memc to trace"
            << "\n    -IOB       debug IOBs if non_zero_value"
            << "\n    -PROCID    index of proc to trace"
            << "\n    -THREADS   simulator's threads number"
            << "\n    -FROZEN    max number of frozen cycles"
            << "\n    -PERIOD    number of cycles between trace"
            << "\n    -DISTBOOT  use distributed boot ROM"
            << "\n               (processors physical address extention is"
            << "\n               initialized with local cluster id)"
            << std::endl;

         exit(0);
      }
   }

   assert( (NB_TTY_CHANNELS < 16) and
         "The NB_TTY_CHANNELS parameter must be smaller than 16" );

   assert( (NB_NIC_CHANNELS == 1) and
         "The NB_NIC_CHANNELS parameter must be 1" );

   assert( (x_size > 0) and (y_size > 0) and
         "Number of clusters on X and Y must be at least 1" );

   std::cout << "\n- X_SIZE          = " << x_size
             << "\n- Y_SIZE          = " << y_size
             << "\n- NB_PROCS        = " << NB_PROCS
             << "\n- NB_DMA_CHANNELS = " << NB_DMA_CHANNELS
             << "\n- NB_TTY_CHANNELS = " << NB_TTY_CHANNELS
             << "\n- NB_NIC_CHANNELS = " << NB_NIC_CHANNELS
             << "\n- MEMC_WAYS       = " << MEMC_WAYS
             << "\n- MEMC_SETS       = " << MEMC_SETS
             << "\n- RAM_LATENCY     = " << XRAM_LATENCY
             << "\n- MAX_FROZEN      = " << frozen_cycles
             << "\n- DISTBOOT        = " << distboot
             << std::endl;

   std::cout << std::endl;

#if USE_OPENMP
   omp_set_dynamic(false);
   omp_set_num_threads(threads_nr);
   std::cerr << "Built with openmp version " << _OPENMP << std::endl;
#endif

   // Define VciParams objects
   typedef soclib::caba::VciParams<vci_cell_width_int,
           vci_plen_width,
           vci_address_width,
           vci_rerror_width,
           vci_clen_width,
           vci_rflag_width,
           vci_srcid_width,
           vci_pktid_width,
           vci_trdid_width,
           vci_wrplen_width> vci_param_int;

   typedef soclib::caba::VciParams<vci_cell_width_ext,
           vci_plen_width,
           vci_address_width,
           vci_rerror_width,
           vci_clen_width,
           vci_rflag_width,
           vci_srcid_width,
           vci_pktid_width,
           vci_trdid_width,
           vci_wrplen_width> vci_param_ext;

   // Clusters
   typedef TsarIobCluster<vci_param_int, vci_param_ext, dspin_int_cmd_width,
           dspin_int_rsp_width, dspin_ram_cmd_width, dspin_ram_rsp_width>
              TsarIobClusterType;

   // clusters containing IOB0 and IOB1
   size_t cluster_iob0 = cluster(0, 0);
   size_t cluster_iob1 = cluster(x_size - 1, y_size - 1);

   // using mono cluster configuration (only one IO bridge) ?
   bool is_mono_cluster = ((x_size == 1) && (y_size == 1));

   /////////////////////////////////////////////////////////////////////
   // INT network mapping table
   // - two levels address decoding for commands
   // - two levels srcid decoding for responses
   // - NB_PROCS_MAX + 2 (MDMA, IOBX) local initiators per cluster
   // - 4 local targets (MEMC, XICU, MDMA, IOBX) per cluster
   /////////////////////////////////////////////////////////////////////
   MappingTable maptab_int(
         vci_address_width,
         IntTab(X_WIDTH + Y_WIDTH, 16 - X_WIDTH - Y_WIDTH),
         IntTab(X_WIDTH + Y_WIDTH, vci_param_int::S - X_WIDTH - Y_WIDTH),
         0x00FF000000);

   for (size_t x = 0; x < x_size; x++) {
      for (size_t y = 0; y < y_size; y++) {
         uint64_t offset = ((uint64_t)cluster(x,y))
            << (vci_address_width - X_WIDTH - Y_WIDTH);
         const bool config    = true;
         const bool cacheable = true;

         // the five following segments are defined in all clusters

         std::ostringstream smemc_conf;
         smemc_conf << "int_seg_memc_conf_" << x << "_" << y;
         maptab_int.add(Segment(smemc_conf.str(), MEMC_BASE+offset, MEMC_SIZE,
                  IntTab(cluster(x,y),INT_MEMC_TGT_ID),
                  not cacheable, config ));

         std::ostringstream smemc_xram;
         smemc_xram << "int_seg_memc_xram_" << x << "_" << y;
         maptab_int.add(Segment(smemc_xram.str(), XRAM_BASE+offset, XRAM_SIZE,
                  IntTab(cluster(x,y),INT_MEMC_TGT_ID),
                  cacheable));

         std::ostringstream sxicu;
         sxicu << "int_seg_xicu_" << x << "_" << y;
         maptab_int.add(Segment(sxicu.str(), XICU_BASE+offset, XICU_SIZE,
                  IntTab(cluster(x,y),INT_XICU_TGT_ID),
                  not cacheable));

         std::ostringstream sbrom;
         sbrom << "int_seg_brom_" << x << "_" << y;
         maptab_int.add(Segment(sbrom.str(), BROM_BASE+offset, BROM_SIZE,
                  IntTab(cluster(x,y),INT_BROM_TGT_ID),
                  cacheable));

         std::ostringstream smtty;
         smtty << "int_seg_mtty_" << x << "_" << y;
         maptab_int.add(Segment(smtty.str(), MTTY_BASE+offset, MTTY_SIZE,
                  IntTab(cluster(x,y),INT_MTTY_TGT_ID),
                  not cacheable));

         std::ostringstream smdma;
         smdma << "int_seg_mdma_" << x << "_" << y;
         maptab_int.add(Segment(smdma.str(), MDMA_BASE+offset, MDMA_SIZE,
                  IntTab(cluster(x,y),INT_MDMA_TGT_ID),
                  not cacheable));

         // the following segments are only defined in cluster_iob0 or in
         // cluster_iob1
         if ((cluster(x,y) == cluster_iob0) ||
             (cluster(x,y) == cluster_iob1)) {
            std::ostringstream siobx;
            siobx << "int_seg_iobx_" << x << "_" << y;
            maptab_int.add(Segment(siobx.str(), IOBX_BASE+offset, IOBX_SIZE,
                     IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                     not cacheable, config ));

            std::ostringstream stty;
            stty << "int_seg_mtty_" << x << "_" << y;
            maptab_int.add(Segment(stty.str(), XTTY_BASE+offset, XTTY_SIZE,
                     IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                     not cacheable));

            std::ostringstream sfbf;
            sfbf << "int_seg_fbuf_" << x << "_" << y;
            maptab_int.add(Segment(sfbf.str(), FBUF_BASE+offset, FBUF_SIZE,
                     IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                     not cacheable));

            std::ostringstream sbdv;
            sbdv << "int_seg_bdev_" << x << "_" << y;
            maptab_int.add(Segment(sbdv.str(), BDEV_BASE+offset, BDEV_SIZE,
                     IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                     not cacheable));

            std::ostringstream snic;
            snic << "int_seg_mnic_" << x << "_" << y;
            maptab_int.add(Segment(snic.str(), MNIC_BASE+offset, MNIC_SIZE,
                     IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                     not cacheable));

            std::ostringstream sdma;
            sdma << "int_seg_cdma_" << x << "_" << y;
            maptab_int.add(Segment(sdma.str(), CDMA_BASE+offset, CDMA_SIZE,
                     IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                     not cacheable));

            std::ostringstream spic;
            sdma << "int_seg_xpic_" << x << "_" << y;
            maptab_int.add(Segment(spic.str(), XPIC_BASE+offset, XPIC_SIZE,
                     IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                     not cacheable));
         }

         // This define the mapping between the SRCIDs
         // and the port index on the local interconnect.

         maptab_int.srcid_map(IntTab(cluster(x,y), MDMA_LOCAL_SRCID),
               IntTab(cluster(x,y), INT_MDMA_INI_ID));
         maptab_int.srcid_map(IntTab(cluster(x,y), IOBX_LOCAL_SRCID),
               IntTab(cluster(x,y), INT_IOBX_INI_ID));
         maptab_int.srcid_map(IntTab(cluster(x,y), XPIC_LOCAL_SRCID),
               IntTab(cluster(x,y), INT_IOBX_INI_ID));

         for ( size_t p = 0 ; p < NB_PROCS ; p++ ) {
            maptab_int.srcid_map(IntTab(cluster(x,y), PROC_LOCAL_SRCID + p),
                  IntTab(cluster(x,y), INT_PROC_INI_ID  + p));
         }
      }
   }
   std::cout << "INT network " << maptab_int << std::endl;

   /////////////////////////////////////////////////////////////////////////
   // RAM network mapping table
   // - two levels address decoding for commands
   // - two levels srcid decoding for responses
   // - 2 local initiators (MEMC, IOBX) per cluster
   //   (IOBX component only in cluster_iob0 and cluster_iob1)
   // - 1 local target (XRAM) per cluster
   ////////////////////////////////////////////////////////////////////////
   MappingTable maptab_ram(
         vci_address_width,
         IntTab(X_WIDTH + Y_WIDTH, 0),
         IntTab(X_WIDTH + Y_WIDTH, vci_param_int::S - X_WIDTH - Y_WIDTH),
         0x00FF000000);

   for (size_t x = 0; x < x_size; x++) {
      for (size_t y = 0; y < y_size ; y++) {
         uint64_t offset = ((uint64_t)cluster(x,y))
            << (vci_address_width - X_WIDTH - Y_WIDTH);

         std::ostringstream sxram;
         sxram << "ext_seg_xram_" << x << "_" << y;
         maptab_ram.add(Segment(sxram.str(), XRAM_BASE+offset, XRAM_SIZE,
                  IntTab(cluster(x,y), RAM_XRAM_TGT_ID), false));
      }
   }

   // This define the mapping between the initiators SRCID
   // and the port index on the RAM local interconnect.
   // External initiator have two alias SRCID (iob0 / iob1)

   maptab_ram.srcid_map(IntTab(cluster_iob0, CDMA_LOCAL_SRCID),
         IntTab(cluster_iob0, RAM_IOBX_INI_ID));
   maptab_ram.srcid_map(IntTab(cluster_iob0, BDEV_LOCAL_SRCID),
         IntTab(cluster_iob0, RAM_IOBX_INI_ID));
   maptab_ram.srcid_map(IntTab(cluster_iob0, XPIC_LOCAL_SRCID),
         IntTab(cluster_iob0, RAM_IOBX_INI_ID));
   maptab_ram.srcid_map(IntTab(cluster_iob0, RAM_MEMC_INI_ID),
         IntTab(cluster_iob0, RAM_MEMC_INI_ID));

   if (not is_mono_cluster) {
      maptab_ram.srcid_map(IntTab(cluster_iob1, CDMA_LOCAL_SRCID),
            IntTab(cluster_iob1, RAM_IOBX_INI_ID));
      maptab_ram.srcid_map(IntTab(cluster_iob1, BDEV_LOCAL_SRCID),
            IntTab(cluster_iob1, RAM_IOBX_INI_ID));
      maptab_ram.srcid_map(IntTab(cluster_iob1, XPIC_LOCAL_SRCID),
            IntTab(cluster_iob1, RAM_IOBX_INI_ID));
      maptab_ram.srcid_map(IntTab(cluster_iob1, RAM_MEMC_INI_ID),
            IntTab(cluster_iob1, RAM_MEMC_INI_ID));
   }

   std::cout << "RAM network " << maptab_ram << std::endl;

   ///////////////////////////////////////////////////////////////////////
   // IOX network mapping table
   // - two levels address decoding for commands
   // - two levels srcid decoding for responses
   // - 4 initiators (IOB0, IOB1, BDEV, CDMA)
   // - 8 targets (IOB0, IOB1, BDEV, CDMA, MTTY, FBUF, BROM, MNIC)
   ///////////////////////////////////////////////////////////////////////
   
   const size_t iox_addr_drop_bits = X_WIDTH + Y_WIDTH - 1;
   const size_t iox_addr_decd_bits = 16 - X_WIDTH - Y_WIDTH + 1;
   MappingTable maptab_iox(
         vci_address_width,
         IntTab(iox_addr_drop_bits, iox_addr_decd_bits),
         IntTab(X_WIDTH + Y_WIDTH , vci_param_ext::S - X_WIDTH - Y_WIDTH),
         0x00FF000000);


   // Each peripheral can be accessed through two segments,
   // depending on the used IOB (IOB0 or IOB1).

   uint64_t iob0_base = ((uint64_t)cluster_iob0)
      << (vci_address_width - X_WIDTH - Y_WIDTH);

   maptab_iox.add(Segment("iox_seg_mtty_0", XTTY_BASE + iob0_base, XTTY_SIZE,
            IntTab(0, IOX_MTTY_TGT_ID), false));
   maptab_iox.add(Segment("iox_seg_fbuf_0", FBUF_BASE + iob0_base, FBUF_SIZE,
            IntTab(0, IOX_FBUF_TGT_ID), false));
   maptab_iox.add(Segment("iox_seg_bdev_0", BDEV_BASE + iob0_base, BDEV_SIZE,
            IntTab(0, IOX_BDEV_TGT_ID), false));
   maptab_iox.add(Segment("iox_seg_mnic_0", MNIC_BASE + iob0_base, MNIC_SIZE,
            IntTab(0, IOX_MNIC_TGT_ID), false));
   maptab_iox.add(Segment("iox_seg_cdma_0", CDMA_BASE + iob0_base, CDMA_SIZE,
            IntTab(0, IOX_CDMA_TGT_ID), false));
   maptab_iox.add(Segment("iox_seg_xpic_0", XPIC_BASE + iob0_base, XPIC_SIZE,
            IntTab(0, IOX_XPIC_TGT_ID), false));

   if (not is_mono_cluster) {
      uint64_t iob1_base = ((uint64_t)cluster_iob1)
         << (vci_address_width - X_WIDTH - Y_WIDTH);

      maptab_iox.add(Segment("iox_seg_mtty_1", XTTY_BASE + iob1_base,
               XTTY_SIZE, IntTab(0, IOX_MTTY_TGT_ID), false));
      maptab_iox.add(Segment("iox_seg_fbuf_1", FBUF_BASE + iob1_base,
               FBUF_SIZE, IntTab(0, IOX_FBUF_TGT_ID), false));
      maptab_iox.add(Segment("iox_seg_bdev_1", BDEV_BASE + iob1_base,
               BDEV_SIZE, IntTab(0, IOX_BDEV_TGT_ID), false));
      maptab_iox.add(Segment("iox_seg_mnic_1", MNIC_BASE + iob1_base,
               MNIC_SIZE, IntTab(0, IOX_MNIC_TGT_ID), false));
      maptab_iox.add(Segment("iox_seg_cdma_1", CDMA_BASE + iob1_base,
               CDMA_SIZE, IntTab(0, IOX_CDMA_TGT_ID), false));
      maptab_iox.add(Segment("iox_seg_xpic_1", XPIC_BASE + iob1_base,
               XPIC_SIZE, IntTab(0, IOX_XPIC_TGT_ID), false));
   }

   ///////////////////////////////////////////////////////////////////////////
   // - For external DMA peripherals, each physical RAM and replicated
   //   XICU can be accessed through IOB0, or through IOB1 depending on address
   //   bit A[32] (0 => IOB0, 1 => IOB1).
   //
   // NOTE: the special attribute in the XICU segments is used by the IOB to
   //       route commands through the INT network. The commands on not special
   //       segments (RAM) are routed by the IOB through the RAM network
   //
   // NOTE: The IOX interconnect is implemented as a local interconnect because
   //       the global bits need to be dropped, but no locality check is
   //       performed
   ///////////////////////////////////////////////////////////////////////////

   for (size_t x = 0; x < x_size; x++) {
      for (size_t y = 0; y < y_size ; y++) {
         const bool special   = true;
         const bool cacheable = true;

         const uint64_t offset = static_cast<uint64_t>(cluster(x,y))
            << (vci_address_width - X_WIDTH - Y_WIDTH);

         const uint64_t xicu_base = XICU_BASE + offset;
         if ( (y & 0x1) == 0 ) {
            // segments mapped to IOB0
            std::ostringstream sxcu0;
            sxcu0 << "iox_seg_xcu0_" << x << "_" << y;
            maptab_iox.add(Segment(sxcu0.str(), xicu_base, XICU_SIZE,
                           IntTab(0, IOX_IOB0_TGT_ID), not cacheable, special));

            std::ostringstream sram0;
            sram0 << "iox_seg_ram0_" << x << "_" << y;
            maptab_iox.add(Segment(sram0.str(), offset, XICU_BASE,
                           IntTab(0, IOX_IOB0_TGT_ID), not cacheable, not special));
         } else {
            // segments mapped to IOB1
            std::ostringstream sxcu1;
            sxcu1 << "iox_seg_xcu1_" << x << "_" << y;
            maptab_iox.add(Segment(sxcu1.str(), xicu_base | (1ULL<<32), XICU_SIZE,
                           IntTab(0, IOX_IOB1_TGT_ID), not cacheable, special));

            std::ostringstream sram1;
            sram1 << "iox_seg_ram1_" << x << "_" << y;
            maptab_iox.add(Segment(sram1.str(), offset | (1ULL<<32), XICU_BASE,
                           IntTab(0, IOX_IOB1_TGT_ID), not cacheable, not special));
         }
      }
   }

   // This define the mapping between the initiators (identified by the SRCID)
   // and the port index on the IOX local interconnect.

   maptab_iox.srcid_map(IntTab(0, CDMA_LOCAL_SRCID),
                        IntTab(0, IOX_CDMA_INI_ID));
   maptab_iox.srcid_map(IntTab(0, BDEV_LOCAL_SRCID),
                        IntTab(0, IOX_BDEV_INI_ID));
   maptab_iox.srcid_map(IntTab(0, XPIC_LOCAL_SRCID),
                        IntTab(0, IOX_XPIC_INI_ID));
   maptab_iox.srcid_map(IntTab(0, IOX_IOB0_INI_ID),
                        IntTab(0, IOX_IOB0_INI_ID));

   if (not is_mono_cluster) {
      maptab_iox.srcid_map(IntTab(0, IOX_IOB1_INI_ID),
                           IntTab(0, IOX_IOB1_INI_ID));
   }

   std::cout << "IOX network " << maptab_iox << std::endl;

   ////////////////////
   // Signals
   ////////////////////

   sc_clock        signal_clk("clk");
   sc_signal<bool> signal_resetn("resetn");

   sc_signal<bool> signal_irq_false;
   sc_signal<bool> signal_irq_bdev;
   sc_signal<bool> signal_irq_mnic_rx[NB_NIC_CHANNELS];
   sc_signal<bool> signal_irq_mnic_tx[NB_NIC_CHANNELS];
   sc_signal<bool> signal_irq_mtty[NB_TTY_CHANNELS];
   sc_signal<bool> signal_irq_cdma[NB_NIC_CHANNELS*2];

   // DSPIN signals for loopback in cluster_iob0 & cluster_iob1
   DspinSignals<dspin_ram_cmd_width> signal_dspin_cmd_iob0_loopback;
   DspinSignals<dspin_ram_rsp_width> signal_dspin_rsp_iob0_loopback;
   DspinSignals<dspin_ram_cmd_width> signal_dspin_cmd_iob1_loopback;
   DspinSignals<dspin_ram_rsp_width> signal_dspin_rsp_iob1_loopback;

   // VCI signals for IOX network
   VciSignals<vci_param_ext> signal_vci_ini_iob0("signal_vci_ini_iob0");
   VciSignals<vci_param_ext> signal_vci_ini_iob1("signal_vci_ini_iob1");
   VciSignals<vci_param_ext> signal_vci_ini_bdev("signal_vci_ini_bdev");
   VciSignals<vci_param_ext> signal_vci_ini_cdma("signal_vci_ini_cdma");
   VciSignals<vci_param_ext> signal_vci_ini_xpic("signal_vci_ini_xpic");

   VciSignals<vci_param_ext> signal_vci_tgt_iob0("signal_vci_tgt_iob0");
   VciSignals<vci_param_ext> signal_vci_tgt_iob1("signal_vci_tgt_iob1");
   VciSignals<vci_param_ext> signal_vci_tgt_mtty("signal_vci_tgt_mtty");
   VciSignals<vci_param_ext> signal_vci_tgt_fbuf("signal_vci_tgt_fbuf");
   VciSignals<vci_param_ext> signal_vci_tgt_mnic("signal_vci_tgt_mnic");
   VciSignals<vci_param_ext> signal_vci_tgt_bdev("signal_vci_tgt_bdev");
   VciSignals<vci_param_ext> signal_vci_tgt_cdma("signal_vci_tgt_cdma");
   VciSignals<vci_param_ext> signal_vci_tgt_xpic("signal_vci_tgt_xpic");

   // Horizontal inter-clusters INT network DSPIN
   DspinSignals<dspin_int_cmd_width>*** signal_dspin_int_cmd_h_inc =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_int_cmd_h_inc", x_size-1, y_size, 3);
   DspinSignals<dspin_int_cmd_width>*** signal_dspin_int_cmd_h_dec =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_int_cmd_h_dec", x_size-1, y_size, 3);
   DspinSignals<dspin_int_rsp_width>*** signal_dspin_int_rsp_h_inc =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_int_rsp_h_inc", x_size-1, y_size, 2);
   DspinSignals<dspin_int_rsp_width>*** signal_dspin_int_rsp_h_dec =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_int_rsp_h_dec", x_size-1, y_size, 2);

   // Vertical inter-clusters INT network DSPIN
   DspinSignals<dspin_int_cmd_width>*** signal_dspin_int_cmd_v_inc =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_int_cmd_v_inc", x_size, y_size-1, 3);
   DspinSignals<dspin_int_cmd_width>*** signal_dspin_int_cmd_v_dec =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_int_cmd_v_dec", x_size, y_size-1, 3);
   DspinSignals<dspin_int_rsp_width>*** signal_dspin_int_rsp_v_inc =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_int_rsp_v_inc", x_size, y_size-1, 2);
   DspinSignals<dspin_int_rsp_width>*** signal_dspin_int_rsp_v_dec =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_int_rsp_v_dec", x_size, y_size-1, 2);

   // Mesh boundaries INT network DSPIN
   DspinSignals<dspin_int_cmd_width>**** signal_dspin_false_int_cmd_in =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_false_int_cmd_in", x_size, y_size, 4, 3);
   DspinSignals<dspin_int_cmd_width>**** signal_dspin_false_int_cmd_out =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_false_int_cmd_out", x_size, y_size, 4, 3);
   DspinSignals<dspin_int_rsp_width>**** signal_dspin_false_int_rsp_in =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_false_int_rsp_in", x_size, y_size, 4, 2);
   DspinSignals<dspin_int_rsp_width>**** signal_dspin_false_int_rsp_out =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_false_int_rsp_out", x_size, y_size, 4, 2);


   // Horizontal inter-clusters RAM network DSPIN
   DspinSignals<dspin_ram_cmd_width>** signal_dspin_ram_cmd_h_inc =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_ram_cmd_h_inc", x_size-1, y_size);
   DspinSignals<dspin_ram_cmd_width>** signal_dspin_ram_cmd_h_dec =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_ram_cmd_h_dec", x_size-1, y_size);
   DspinSignals<dspin_ram_rsp_width>** signal_dspin_ram_rsp_h_inc =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_ram_rsp_h_inc", x_size-1, y_size);
   DspinSignals<dspin_ram_rsp_width>** signal_dspin_ram_rsp_h_dec =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_ram_rsp_h_dec", x_size-1, y_size);

   // Vertical inter-clusters RAM network DSPIN
   DspinSignals<dspin_ram_cmd_width>** signal_dspin_ram_cmd_v_inc =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_ram_cmd_v_inc", x_size, y_size-1);
   DspinSignals<dspin_ram_cmd_width>** signal_dspin_ram_cmd_v_dec =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_ram_cmd_v_dec", x_size, y_size-1);
   DspinSignals<dspin_ram_rsp_width>** signal_dspin_ram_rsp_v_inc =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_ram_rsp_v_inc", x_size, y_size-1);
   DspinSignals<dspin_ram_rsp_width>** signal_dspin_ram_rsp_v_dec =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_ram_rsp_v_dec", x_size, y_size-1);

   // Mesh boundaries RAM network DSPIN
   DspinSignals<dspin_ram_cmd_width>*** signal_dspin_false_ram_cmd_in =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_false_ram_cmd_in", x_size, y_size, 4);
   DspinSignals<dspin_ram_cmd_width>*** signal_dspin_false_ram_cmd_out =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_false_ram_cmd_out", x_size, y_size, 4);
   DspinSignals<dspin_ram_rsp_width>*** signal_dspin_false_ram_rsp_in =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_false_ram_rsp_in", x_size, y_size, 4);
   DspinSignals<dspin_ram_rsp_width>*** signal_dspin_false_ram_rsp_out =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_false_ram_rsp_out", x_size, y_size, 4);

   ////////////////////////////
   //      Loader
   ////////////////////////////

   soclib::common::Loader loader(soft_name);

   typedef soclib::common::GdbServer<soclib::common::Mips32ElIss> proc_iss;
   proc_iss::set_loader(loader);

   ////////////////////////////////////////
   //  Instanciated Hardware Components
   ////////////////////////////////////////

   std::cout << std::endl << "External Bus and Peripherals" << std::endl
      << std::endl;

   const size_t nb_iox_initiators = (not is_mono_cluster) ? 5 : 4;
   const size_t nb_iox_targets = (not is_mono_cluster) ? 8 : 7;

   // IOX network
   VciIoxNetwork<vci_param_ext>* iox_network;
   iox_network = new VciIoxNetwork<vci_param_ext>("iox_network",
         maptab_iox,
         nb_iox_targets,      // number of targets
         nb_iox_initiators ); // number of initiators

   // Network Controller
   VciMultiNic<vci_param_ext>*  mnic;
   mnic = new VciMultiNic<vci_param_ext>("mnic",
         IntTab(0, IOX_MNIC_TGT_ID),
         maptab_iox,
         NB_NIC_CHANNELS,
         0,           // mac_4 address
         0,           // mac_2 address
         nic_rx_name,
         nic_tx_name);

   // Frame Buffer
   VciFrameBuffer<vci_param_ext>*  fbuf;
   fbuf = new VciFrameBuffer<vci_param_ext>("fbuf",
         IntTab(0, IOX_FBUF_TGT_ID),
         maptab_iox,
         FBUF_X_SIZE, FBUF_Y_SIZE );

   // Block Device
   // for AHCI
   // std::vector<std::string> filenames;
   // filenames.push_back(disk_name); // one single disk
   VciBlockDeviceTsar<vci_param_ext>*  bdev;
   bdev = new VciBlockDeviceTsar<vci_param_ext>("bdev",
         maptab_iox,
         IntTab(0, BDEV_LOCAL_SRCID),
         IntTab(0, IOX_BDEV_TGT_ID),
         disk_name,
         block_size,
         64,  // burst size (bytes)
         0 ); // disk latency

   // Chained Buffer DMA controller
   VciChbufDma<vci_param_ext>*  cdma;
   cdma = new VciChbufDma<vci_param_ext>("cdma",
         maptab_iox,
         IntTab(0, CDMA_LOCAL_SRCID),
         IntTab(0, IOX_CDMA_TGT_ID),
         64,  // burst size (bytes)
         NB_CMA_CHANNELS);

   // Multi-TTY controller
   std::vector<std::string> vect_names;
   for( size_t tid = 0 ; tid < NB_TTY_CHANNELS ; tid++ )
   {
      std::ostringstream term_name;
      term_name <<  "mtty_iox_" << tid;
      vect_names.push_back(term_name.str().c_str());
   }
   VciMultiTty<vci_param_ext>*  mtty;
   mtty = new VciMultiTty<vci_param_ext>("mtty_iox",
         IntTab(0, IOX_MTTY_TGT_ID),
         maptab_iox,
         vect_names);

   // IOPIC
   VciIopic<vci_param_ext>* xpic;
   xpic = new VciIopic<vci_param_ext>( "xpic",
                                       maptab_iox,
                                       IntTab(0, XPIC_LOCAL_SRCID),
                                       IntTab(0, IOX_XPIC_TGT_ID),
                                       32 );        // number of input HWI


   TsarIobClusterType* clusters[x_size][y_size];

#if USE_OPENMP
#pragma omp parallel
   {
#pragma omp for
#endif

      for(size_t i = 0; i  < (x_size * y_size); i++) {
         size_t x = i / y_size;
         size_t y = i % y_size;

#if USE_OPENMP
#pragma omp critical
         {
#endif
            std::cout << std::endl;
            std::cout << "Cluster_" << std::dec << x << "_" << y << std::endl;
            std::cout << std::endl;

            std::ostringstream sc;
            sc << "cluster_" << x << "_" << y;

            bool memc_debug = (cluster(x,y) == debug_memc_id);
            bool proc_debug = (cluster(x,y) == (debug_proc_id / NB_PROCS));

            bool is_io0 = (cluster(x,y) == cluster_iob0);
            bool is_io1 = (cluster(x,y) == cluster_iob1);
            bool is_io = is_io0 || is_io1;

            IntTab iox_iob_tgtid =
               IntTab(0, is_io0 ? IOX_IOB0_TGT_ID : IOX_IOB1_TGT_ID);
            IntTab iox_iob_srcid =
               IntTab(0, is_io0 ? IOX_IOB0_INI_ID : IOX_IOB1_INI_ID);

            TsarIobClusterType::ClusterParams params = {
               .insname           = sc.str().c_str(),

               .x_id              = x,
               .y_id              = y,

               .mt_int            = maptab_int,
               .mt_ext            = maptab_ram,
               .mt_iox            = maptab_iox,

               .is_io             = is_io,
               .iox_iob_tgtid     = iox_iob_tgtid,
               .iox_iob_srcid     = iox_iob_srcid,

               .memc_ways         = MEMC_WAYS,
               .memc_sets         = MEMC_SETS,
               .l1_i_ways         = L1_IWAYS,
               .l1_i_sets         = L1_ISETS,
               .l1_d_ways         = L1_DWAYS,
               .l1_d_sets         = L1_DSETS,
               .xram_latency      = XRAM_LATENCY,

               .loader            = loader,

               .distboot          = distboot,

               .frozen_cycles     = frozen_cycles,
               .debug_start_cycle = debug_from,
               .memc_debug_ok     = memc_debug,
               .proc_debug_ok     = proc_debug,
               .iob_debug_ok      = debug_iob
            };

            clusters[x][y] = new TsarIobClusterType(params);

#if USE_OPENMP
         } // end critical
#endif
      } // end for
#if USE_OPENMP
   }
#endif

   std::cout << std::endl;

   ///////////////////////////////////////////////////////////////////////////
   //     Net-list
   ///////////////////////////////////////////////////////////////////////////

   // IOX network connexion
   iox_network->p_clk                     (signal_clk);
   iox_network->p_resetn                  (signal_resetn);
   iox_network->p_to_ini[IOX_BDEV_INI_ID] (signal_vci_ini_bdev);
   iox_network->p_to_ini[IOX_CDMA_INI_ID] (signal_vci_ini_cdma);
   iox_network->p_to_ini[IOX_XPIC_INI_ID] (signal_vci_ini_xpic);
   iox_network->p_to_ini[IOX_IOB0_INI_ID] (signal_vci_ini_iob0);
   iox_network->p_to_tgt[IOX_MTTY_TGT_ID] (signal_vci_tgt_mtty);
   iox_network->p_to_tgt[IOX_FBUF_TGT_ID] (signal_vci_tgt_fbuf);
   iox_network->p_to_tgt[IOX_MNIC_TGT_ID] (signal_vci_tgt_mnic);
   iox_network->p_to_tgt[IOX_BDEV_TGT_ID] (signal_vci_tgt_bdev);
   iox_network->p_to_tgt[IOX_CDMA_TGT_ID] (signal_vci_tgt_cdma);
   iox_network->p_to_tgt[IOX_XPIC_TGT_ID] (signal_vci_tgt_xpic);
   iox_network->p_to_tgt[IOX_IOB0_TGT_ID] (signal_vci_tgt_iob0);

   if (not is_mono_cluster) {
      iox_network->p_to_ini[IOX_IOB1_INI_ID] (signal_vci_ini_iob1);
      iox_network->p_to_tgt[IOX_IOB1_TGT_ID] (signal_vci_tgt_iob1);
   }

   // BDEV connexion
   bdev->p_clk           (signal_clk);
   bdev->p_resetn        (signal_resetn);
   bdev->p_irq           (signal_irq_bdev);
   bdev->p_vci_target    (signal_vci_tgt_bdev);
   bdev->p_vci_initiator (signal_vci_ini_bdev);

   std::cout << "  - BDEV connected" << std::endl;

   // FBUF connexion
   fbuf->p_clk    (signal_clk);
   fbuf->p_resetn (signal_resetn);
   fbuf->p_vci    (signal_vci_tgt_fbuf);

   std::cout << "  - FBUF connected" << std::endl;

   // MNIC connexion
   mnic->p_clk    (signal_clk);
   mnic->p_resetn (signal_resetn);
   mnic->p_vci    (signal_vci_tgt_mnic);
   for ( size_t i=0 ; i<NB_NIC_CHANNELS ; i++ )
   {
      mnic->p_rx_irq[i] (signal_irq_mnic_rx[i]);
      mnic->p_tx_irq[i] (signal_irq_mnic_tx[i]);
   }

   std::cout << "  - MNIC connected" << std::endl;

   // MTTY connexion
   mtty->p_clk        (signal_clk);
   mtty->p_resetn     (signal_resetn);
   mtty->p_vci        (signal_vci_tgt_mtty);
   for ( size_t i=0 ; i<NB_TTY_CHANNELS ; i++ ) {
      mtty->p_irq[i] (signal_irq_mtty[i]);
   }

   std::cout << "  - MTTY connected" << std::endl;

   // CDMA connexion
   cdma->p_clk           (signal_clk);
   cdma->p_resetn        (signal_resetn);
   cdma->p_vci_target    (signal_vci_tgt_cdma);
   cdma->p_vci_initiator (signal_vci_ini_cdma);
   for ( size_t i=0 ; i<NB_CMA_CHANNELS ; i++) {
      cdma->p_irq[i]    (signal_irq_cdma[i]);
   }

   std::cout << "  - CDMA connected" << std::endl;

   // XPIC connexion
   xpic->p_clk           (signal_clk);
   xpic->p_resetn        (signal_resetn);
   xpic->p_vci_target    (signal_vci_tgt_xpic);
   xpic->p_vci_initiator (signal_vci_ini_xpic);
   for ( size_t i=0 ; i<32 ; i++)
   {
      if      (i < NB_NIC_CHANNELS)   xpic->p_hwi[i] (signal_irq_mnic_rx[i]);
      else if (i < 2)                 xpic->p_hwi[i] (signal_irq_false);
      else if (i < 2+NB_NIC_CHANNELS) xpic->p_hwi[i] (signal_irq_mnic_tx[i-2]);
      else if (i < 4)                 xpic->p_hwi[i] (signal_irq_false);
      else if (i < 4+NB_CMA_CHANNELS) xpic->p_hwi[i] (signal_irq_cdma[i-4]);
      else if (i < 8)                 xpic->p_hwi[i] (signal_irq_false);
      else if (i < 9)                 xpic->p_hwi[i] (signal_irq_bdev);
      else if (i < 9+NB_TTY_CHANNELS) xpic->p_hwi[i] (signal_irq_mtty[i-9]);
      else                            xpic->p_hwi[i] (signal_irq_false);
   }

   std::cout << "  - XPIC connected" << std::endl;

   // IOB0 cluster connexion to IOX network
   (*clusters[0][0]->p_vci_iob_iox_ini) (signal_vci_ini_iob0);
   (*clusters[0][0]->p_vci_iob_iox_tgt) (signal_vci_tgt_iob0);

   // IOB1 cluster connexion to IOX network
   if (not is_mono_cluster) {
      (*clusters[x_size-1][y_size-1]->p_vci_iob_iox_ini) (signal_vci_ini_iob1);
      (*clusters[x_size-1][y_size-1]->p_vci_iob_iox_tgt) (signal_vci_tgt_iob1);
   }

   // All clusters Clock & RESET connexions
   for ( size_t x = 0; x < (x_size); x++ ) {
      for (size_t y = 0; y < y_size; y++) {
         clusters[x][y]->p_clk    (signal_clk);
         clusters[x][y]->p_resetn (signal_resetn);
      }
   }

   const int& NORTH = VirtualDspinRouter<dspin_int_cmd_width>::NORTH;
   const int& SOUTH = VirtualDspinRouter<dspin_int_cmd_width>::SOUTH;
   const int& EAST  = VirtualDspinRouter<dspin_int_cmd_width>::EAST;
   const int& WEST  = VirtualDspinRouter<dspin_int_cmd_width>::WEST;

   // Inter Clusters horizontal connections
   if (x_size > 1) {
      for (size_t x = 0; x < (x_size-1); x++) {
         for (size_t y = 0; y < y_size; y++) {
            for (size_t k = 0; k < 3; k++) {
               clusters[x][y]->p_dspin_int_cmd_out[EAST][k](
                     signal_dspin_int_cmd_h_inc[x][y][k]);
               clusters[x+1][y]->p_dspin_int_cmd_in[WEST][k](
                     signal_dspin_int_cmd_h_inc[x][y][k]);
               clusters[x][y]->p_dspin_int_cmd_in[EAST][k](
                     signal_dspin_int_cmd_h_dec[x][y][k]);
               clusters[x+1][y]->p_dspin_int_cmd_out[WEST][k](
                     signal_dspin_int_cmd_h_dec[x][y][k]);
            }

            for (size_t k = 0; k < 2; k++) {
               clusters[x][y]->p_dspin_int_rsp_out[EAST][k](
                     signal_dspin_int_rsp_h_inc[x][y][k]);
               clusters[x+1][y]->p_dspin_int_rsp_in[WEST][k](
                     signal_dspin_int_rsp_h_inc[x][y][k]);
               clusters[x][y]->p_dspin_int_rsp_in[EAST][k](
                     signal_dspin_int_rsp_h_dec[x][y][k]);
               clusters[x+1][y]->p_dspin_int_rsp_out[WEST][k](
                     signal_dspin_int_rsp_h_dec[x][y][k]);
            }

            clusters[x][y]->p_dspin_ram_cmd_out[EAST](
                  signal_dspin_ram_cmd_h_inc[x][y]);
            clusters[x+1][y]->p_dspin_ram_cmd_in[WEST](
                  signal_dspin_ram_cmd_h_inc[x][y]);
            clusters[x][y]->p_dspin_ram_cmd_in[EAST](
                  signal_dspin_ram_cmd_h_dec[x][y]);
            clusters[x+1][y]->p_dspin_ram_cmd_out[WEST](
                  signal_dspin_ram_cmd_h_dec[x][y]);
            clusters[x][y]->p_dspin_ram_rsp_out[EAST](
                  signal_dspin_ram_rsp_h_inc[x][y]);
            clusters[x+1][y]->p_dspin_ram_rsp_in[WEST](
                  signal_dspin_ram_rsp_h_inc[x][y]);
            clusters[x][y]->p_dspin_ram_rsp_in[EAST](
                  signal_dspin_ram_rsp_h_dec[x][y]);
            clusters[x+1][y]->p_dspin_ram_rsp_out[WEST](
                  signal_dspin_ram_rsp_h_dec[x][y]);
         }
      }
   }

   std::cout << std::endl << "Horizontal connections established"
      << std::endl;

   // Inter Clusters vertical connections
   if (y_size > 1) {
      for (size_t y = 0; y < (y_size-1); y++) {
         for (size_t x = 0; x < x_size; x++) {
            for (size_t k = 0; k < 3; k++) {
               clusters[x][y]->p_dspin_int_cmd_out[NORTH][k](
                     signal_dspin_int_cmd_v_inc[x][y][k]);
               clusters[x][y+1]->p_dspin_int_cmd_in[SOUTH][k](
                     signal_dspin_int_cmd_v_inc[x][y][k]);
               clusters[x][y]->p_dspin_int_cmd_in[NORTH][k](
                     signal_dspin_int_cmd_v_dec[x][y][k]);
               clusters[x][y+1]->p_dspin_int_cmd_out[SOUTH][k](
                     signal_dspin_int_cmd_v_dec[x][y][k]);
            }

            for (size_t k = 0; k < 2; k++) {
               clusters[x][y]->p_dspin_int_rsp_out[NORTH][k](
                     signal_dspin_int_rsp_v_inc[x][y][k]);
               clusters[x][y+1]->p_dspin_int_rsp_in[SOUTH][k](
                     signal_dspin_int_rsp_v_inc[x][y][k]);
               clusters[x][y]->p_dspin_int_rsp_in[NORTH][k](
                     signal_dspin_int_rsp_v_dec[x][y][k]);
               clusters[x][y+1]->p_dspin_int_rsp_out[SOUTH][k](
                     signal_dspin_int_rsp_v_dec[x][y][k]);
            }

            clusters[x][y]->p_dspin_ram_cmd_out[NORTH](
                  signal_dspin_ram_cmd_v_inc[x][y]);
            clusters[x][y+1]->p_dspin_ram_cmd_in[SOUTH](
                  signal_dspin_ram_cmd_v_inc[x][y]);
            clusters[x][y]->p_dspin_ram_cmd_in[NORTH](
                  signal_dspin_ram_cmd_v_dec[x][y]);
            clusters[x][y+1]->p_dspin_ram_cmd_out[SOUTH](
                  signal_dspin_ram_cmd_v_dec[x][y]);
            clusters[x][y]->p_dspin_ram_rsp_out[NORTH](
                  signal_dspin_ram_rsp_v_inc[x][y]);
            clusters[x][y+1]->p_dspin_ram_rsp_in[SOUTH](
                  signal_dspin_ram_rsp_v_inc[x][y]);
            clusters[x][y]->p_dspin_ram_rsp_in[NORTH](
                  signal_dspin_ram_rsp_v_dec[x][y]);
            clusters[x][y+1]->p_dspin_ram_rsp_out[SOUTH](
                  signal_dspin_ram_rsp_v_dec[x][y]);
         }
      }
   }

   std::cout << "Vertical connections established" << std::endl;

   // East & West boundary cluster connections
   for (size_t y = 0; y < y_size; y++) {
      // L1-L2 cmd network boundary connections
      for (size_t k = 0; k < 3; k++) {
         clusters[0][y]->p_dspin_int_cmd_in[WEST][k](
               signal_dspin_false_int_cmd_in[0][y][WEST][k]);
         clusters[0][y]->p_dspin_int_cmd_out[WEST][k](
               signal_dspin_false_int_cmd_out[0][y][WEST][k]);
         clusters[x_size-1][y]->p_dspin_int_cmd_in[EAST][k](
               signal_dspin_false_int_cmd_in[x_size-1][y][EAST][k]);
         clusters[x_size-1][y]->p_dspin_int_cmd_out[EAST][k](
               signal_dspin_false_int_cmd_out[x_size-1][y][EAST][k]);
      }

      // L1-L2 rsp network boundary connections
      for (size_t k = 0; k < 2; k++) {
         clusters[0][y]->p_dspin_int_rsp_in[WEST][k](
               signal_dspin_false_int_rsp_in[0][y][WEST][k]);
         clusters[0][y]->p_dspin_int_rsp_out[WEST][k](
               signal_dspin_false_int_rsp_out[0][y][WEST][k]);
         clusters[x_size-1][y]->p_dspin_int_rsp_in[EAST][k](
               signal_dspin_false_int_rsp_in[x_size-1][y][EAST][k]);
         clusters[x_size-1][y]->p_dspin_int_rsp_out[EAST][k](
               signal_dspin_false_int_rsp_out[x_size-1][y][EAST][k]);
      }

      // L2-XRAM cmd network boundary connections
      clusters[0][y]->p_dspin_ram_cmd_in[WEST](
            signal_dspin_false_ram_cmd_in[0][y][WEST]);
      clusters[0][y]->p_dspin_ram_cmd_out[WEST](
            signal_dspin_false_ram_cmd_out[0][y][WEST]);
      clusters[x_size-1][y]->p_dspin_ram_cmd_in[EAST](
            signal_dspin_false_ram_cmd_in[x_size-1][y][EAST]);
      clusters[x_size-1][y]->p_dspin_ram_cmd_out[EAST](
            signal_dspin_false_ram_cmd_out[x_size-1][y][EAST]);

      // L2-XRAM rsp network boundary connections
      clusters[0][y]->p_dspin_ram_rsp_in[WEST](
            signal_dspin_false_ram_rsp_in[0][y][WEST]);
      clusters[0][y]->p_dspin_ram_rsp_out[WEST](
            signal_dspin_false_ram_rsp_out[0][y][WEST]);
      clusters[x_size-1][y]->p_dspin_ram_rsp_in[EAST](
            signal_dspin_false_ram_rsp_in[x_size-1][y][EAST]);
      clusters[x_size-1][y]->p_dspin_ram_rsp_out[EAST](
            signal_dspin_false_ram_rsp_out[x_size-1][y][EAST]);
   }

   std::cout << "East & West boundaries established" << std::endl;

   // North & South boundary clusters connections
   for (size_t x = 0; x < x_size; x++) {
      for (size_t k = 0; k < 3; k++) {
         clusters[x][0]->p_dspin_int_cmd_in[SOUTH][k](
               signal_dspin_false_int_cmd_in[x][0][SOUTH][k]);
         clusters[x][0]->p_dspin_int_cmd_out[SOUTH][k](
               signal_dspin_false_int_cmd_out[x][0][SOUTH][k]);
         clusters[x][y_size-1]->p_dspin_int_cmd_in[NORTH][k](
               signal_dspin_false_int_cmd_in[x][y_size-1][NORTH][k]);
         clusters[x][y_size-1]->p_dspin_int_cmd_out[NORTH][k](
               signal_dspin_false_int_cmd_out[x][y_size-1][NORTH][k]);
      }

      for (size_t k = 0; k < 2; k++) {
         clusters[x][0]->p_dspin_int_rsp_in[SOUTH][k](
               signal_dspin_false_int_rsp_in[x][0][SOUTH][k]);
         clusters[x][0]->p_dspin_int_rsp_out[SOUTH][k](
               signal_dspin_false_int_rsp_out[x][0][SOUTH][k]);
         clusters[x][y_size-1]->p_dspin_int_rsp_in[NORTH][k](
               signal_dspin_false_int_rsp_in[x][y_size-1][NORTH][k]);
         clusters[x][y_size-1]->p_dspin_int_rsp_out[NORTH][k](
               signal_dspin_false_int_rsp_out[x][y_size-1][NORTH][k]);
      }

      clusters[x][0]->p_dspin_ram_cmd_in[SOUTH](
            signal_dspin_false_ram_cmd_in[x][0][SOUTH]);
      clusters[x][0]->p_dspin_ram_cmd_out[SOUTH](
            signal_dspin_false_ram_cmd_out[x][0][SOUTH]);
      clusters[x][0]->p_dspin_ram_rsp_in[SOUTH](
            signal_dspin_false_ram_rsp_in[x][0][SOUTH]);
      clusters[x][0]->p_dspin_ram_rsp_out[SOUTH](
            signal_dspin_false_ram_rsp_out[x][0][SOUTH]);

      clusters[x][y_size-1]->p_dspin_ram_cmd_in[NORTH](
            signal_dspin_false_ram_cmd_in[x][y_size-1][NORTH]);
      clusters[x][y_size-1]->p_dspin_ram_cmd_out[NORTH](
            signal_dspin_false_ram_cmd_out[x][y_size-1][NORTH]);
      clusters[x][y_size-1]->p_dspin_ram_rsp_in[NORTH](
            signal_dspin_false_ram_rsp_in[x][y_size-1][NORTH]);
      clusters[x][y_size-1]->p_dspin_ram_rsp_out[NORTH](
            signal_dspin_false_ram_rsp_out[x][y_size-1][NORTH]);
   }

   std::cout << "North & South boundaries established" << std::endl
      << std::endl;

   ////////////////////////////////////////////////////////
   //   Simulation
   ///////////////////////////////////////////////////////

   sc_start(sc_core::sc_time(0, SC_NS));

   signal_resetn = false;
   signal_irq_false = false;

   // network boundaries signals
   for (size_t x = 0; x < x_size ; x++) {
      for (size_t y = 0; y < y_size ; y++) {
         for (size_t a = 0; a < 4; a++) {
            for (size_t k = 0; k < 3; k++) {
               signal_dspin_false_int_cmd_in[x][y][a][k].write  = false;
               signal_dspin_false_int_cmd_in[x][y][a][k].read   = true;
               signal_dspin_false_int_cmd_out[x][y][a][k].write = false;
               signal_dspin_false_int_cmd_out[x][y][a][k].read  = true;
            }

            for (size_t k = 0; k < 2; k++) {
               signal_dspin_false_int_rsp_in[x][y][a][k].write  = false;
               signal_dspin_false_int_rsp_in[x][y][a][k].read   = true;
               signal_dspin_false_int_rsp_out[x][y][a][k].write = false;
               signal_dspin_false_int_rsp_out[x][y][a][k].read  = true;
            }

            signal_dspin_false_ram_cmd_in[x][y][a].write  = false;
            signal_dspin_false_ram_cmd_in[x][y][a].read   = true;
            signal_dspin_false_ram_cmd_out[x][y][a].write = false;
            signal_dspin_false_ram_cmd_out[x][y][a].read  = true;

            signal_dspin_false_ram_rsp_in[x][y][a].write  = false;
            signal_dspin_false_ram_rsp_in[x][y][a].read   = true;
            signal_dspin_false_ram_rsp_out[x][y][a].write = false;
            signal_dspin_false_ram_rsp_out[x][y][a].read  = true;
         }
      }
   }

   sc_start(sc_core::sc_time(1, SC_NS));
   signal_resetn = true;

   struct timeval t1, t2;
   const uint64_t stats_period = 100000;
   gettimeofday(&t1, NULL);
   for (uint64_t n = 1; n < ncycles; n++) {
      // stats display
      if((n % stats_period) == 0) {
         gettimeofday(&t2, NULL);

         uint64_t ms1 = (uint64_t) t1.tv_sec  * 1000 +
            (uint64_t) t1.tv_usec / 1000;
         uint64_t ms2 = (uint64_t) t2.tv_sec  * 1000 +
            (uint64_t) t2.tv_usec / 1000;
         double freq  = (double) stats_period / (ms2 - ms1);

         std::cerr << "Platform Clock Frequency: " << freq << " Khz"
            << std::endl;

         gettimeofday(&t1, NULL);
      }

      if (debug_ok and (n > debug_from) and ((n % debug_period) == 0)) {
         std::cout << " ***********************"
            << " cycle " << std::dec << n
            << " ***********************"
            << std::endl;

         // trace proc[debug_proc_id]
         if ( debug_proc_id != 0xFFFFFFFF ) {
            size_t l          = debug_proc_id % NB_PROCS ;
            size_t cluster_xy = debug_proc_id / NB_PROCS ;
            size_t x          = cluster_xy >> Y_WIDTH;
            size_t y          = cluster_xy  & ((1 << Y_WIDTH) - 1);

            clusters[x][y]->proc[l]->print_trace(1);

            std::ostringstream proc_signame;
            proc_signame << "[SIG]PROC_" << x << "_" << y << "_" << l ;
            clusters[x][y]->signal_int_vci_ini_proc[l].print_trace(
                  proc_signame.str());

            clusters[x][y]->xicu->print_trace(l);

            std::ostringstream xicu_signame;
            xicu_signame << "[SIG]XICU_" << x << "_" << y;
            clusters[x][y]->signal_int_vci_tgt_xicu.print_trace(
                  xicu_signame.str());

            if( clusters[x][y]->signal_proc_it[l].read() ) {
               std::cout << "### IRQ_PROC_" << std::dec
                  << x << "_" << y << "_" << l
                  << " ACTIVE" << std::endl;
            }
         }

         // trace RAM xbar (between MEMC and IOB)
         // clusters[0][0]->ram_xbar_cmd->print_trace();
         // clusters[0][0]->ram_xbar_rsp->print_trace();
         // clusters[x_size-1][y_size-1]->ram_xbar_cmd->print_trace();
         // clusters[x_size-1][y_size-1]->ram_xbar_rsp->print_trace();

         // trace INT network
         // clusters[0][0]->int_xbar_d->print_trace();

         // clusters[0][0]->signal_int_dspin_cmd_l2g_d.print_trace(
         //    "[SIG] INT_CMD_L2G_D_0_0");
         // clusters[0][0]->signal_int_dspin_rsp_g2l_d.print_trace(
         //    "[SIG] INT_RSP_G2L_D_0_0");

         // clusters[0][0]->int_router_cmd->print_trace(0);
         // clusters[0][0]->int_router_rsp->print_trace(0);

         // trace INT_CMD_D xbar and router in cluster 0_1
         // clusters[0][1]->int_router_cmd->print_trace(0);
         // clusters[0][1]->int_router_rsp->print_trace(0);

         // clusters[0][1]->signal_int_dspin_cmd_g2l_d.print_trace(
         //    "[SIG] INT_CMD_G2L_D_0_0");
         // clusters[0][1]->signal_int_dspin_rsp_l2g_d.print_trace(
         //    "[SIG] INT_RSP_L2G_D_0_0");

         // clusters[0][1]->int_xbar_cmd_d->print_trace();

         // trace memc[debug_memc_id]
         if ( debug_memc_id != 0xFFFFFFFF ) {
            size_t x = debug_memc_id >> Y_WIDTH;
            size_t y = debug_memc_id  & ((1 << Y_WIDTH) - 1);

            clusters[x][y]->memc->print_trace(0);
            std::ostringstream smemc_tgt;
            smemc_tgt << "[SIG]MEMC_TGT_" << x << "_" << y;
            clusters[x][y]->signal_int_vci_tgt_memc.print_trace(
                  smemc_tgt.str());
            std::ostringstream smemc_ini;
            smemc_ini << "[SIG]MEMC_INI_" << x << "_" << y;
            clusters[x][y]->signal_ram_vci_ini_memc.print_trace(
                  smemc_ini.str());
            clusters[x][y]->xram->print_trace();
            std::ostringstream sxram_tgt;
            sxram_tgt << "[SIG]XRAM_TGT_" << x << "_" << y;
            clusters[x][y]->signal_ram_vci_tgt_xram.print_trace(
                  sxram_tgt.str());
         }

         // trace iob, iox and external peripherals
         //if ( debug_iob ) {
         //   clusters[0][0]->iob->print_trace();
         //   clusters[0][0]->signal_int_vci_tgt_iobx.print_trace(
         //         "[SIG]IOB0_INT_TGT");
         //   clusters[0][0]->signal_int_vci_ini_iobx.print_trace(
         //         "[SIG]IOB0_INT_INI");
         //   clusters[0][0]->signal_ram_vci_ini_iobx.print_trace(
         //         "[SIG]IOB0_RAM_INI");

         //   signal_vci_ini_iob0.print_trace("[SIG]IOB0_IOX_INI");
         //   signal_vci_tgt_iob0.print_trace("[SIG]IOB0_IOX_TGT");

         //   cdma->print_trace();
         //   signal_vci_tgt_cdma.print_trace("[SIG]IOX_CDMA_TGT");
         //   signal_vci_ini_cdma.print_trace("[SIG]IOX_CDMA_INI");

         //   iox_network->print_trace();

         //   // interrupts
         //   if (signal_irq_bdev) std::cout << "### IRQ_BDEV ACTIVATED"
         //      << std::endl;
         //}
      }
      sc_start(sc_core::sc_time(1, SC_NS));
   }

   delete iox_network;
   delete mnic;
   delete fbuf;
   delete bdev;
   delete cdma;
   delete mtty;
   delete xpic;

   for(size_t x = 0; x < x_size; x++) {
      for(size_t y = 0; y < y_size; y++) {
         delete clusters[x][y];
      }
   }
   return EXIT_SUCCESS;
}

int sc_main (int argc, char *argv[]) {
   try {
      return _main(argc, argv);
   } catch (std::exception &e) {
      std::cout << e.what() << std::endl;
   } catch (...) {
      std::cout << "Unknown exception occured" << std::endl;
      throw;
   }
   return 1;
}


// Local Variables:
// tab-width: 3
// c-basic-offset: 3
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=3:tabstop=3:softtabstop=3

