///////////////////////////////////////////////////////////////////////////////
// File: top.cpp
// Author: Alain Greiner
// Copyright: UPMC/LIP6
// Date : august 2013
// This program is released under the GNU public license
//
// Modified by: Cesar Fuguet
// Modified on: mars 2014
///////////////////////////////////////////////////////////////////////////////
// This file define a generic TSAR architecture with an IO network emulating
// an external bus (i.e. Hypertransport) to access external peripherals:
//
// - BROM : boot ROM
// - FBUF : Frame Buffer
// - MTTY : multi TTY (up to 15 channels)
// - MNIC : Network controller (up to 2 channels)
// - CDMA : Chained Buffer DMA controller (up to 4 channels)
// - BDEV : Dlock Device controler (1 channel)
//
// The internal physical address space is 40 bits.
//
// It contains a 2D mesh of XMAX*YMAX clusters, and the cluster index
// is encoded on 8 bits (X_WIDTH = 4 / Y_WIDTH = 4) whatever the mesh size.
//
// It contains 3 networks:
//
// 1) the INT network supports Read/Write transactions
//    between processors and L2 caches or peripherals.
//    (VCI ADDDRESS = 40 bits / VCI DATA width = 32 bits)
//    It supports also coherence transactions between L1 & L2 caches.
// 3) the RAM network is emulating the 3D network between L2 caches
//    and L3 caches, and is implemented as a 2D mesh between the L2 caches,
//    the two IO bridges and the physical RAMs disributed in all clusters.
//    (VCI ADDRESS = 40 bits / VCI DATA = 64 bits)
// 4) the IOX network connects the two IO bridge components to the
//    6 external peripheral controllers.
//    (VCI ADDDRESS = 40 bits / VCI DATA width = 64 bits)
//
// The external peripherals IRQs are connected to the XICU component
// in cluster(0,0): therefore, the number of channels for the external
// peripherals (MTTY, MNIC, CDMA) is limited by the number of IRQ ports...
//
// In cluster(0,0), the XICU HWI input ports are connected as follow:
// - IRQ_IN[0]  to IRQ_IN[7]  grounded (reserved for PTI or SWI)
// - IRQ_IN[8]  to IRQ_IN[9]  are connected to 2 NIC_RX channels.
// - IRQ_IN[10] to IRQ_IN[11] are connected to 2 NIC_TX channels.
// - IRQ_IN[12] to IRQ_IN[15] are connected to 4 CDMA channels
// - IRQ_IN[16] to IRQ_IN[30] are connected to 15 TTY channels
// - IRQ_IN[31]               is connected to BDEV
// In other clusters, the XICU HWI input ports are grounded.
//
// All clusters are identical, but cluster(0,0) and cluster(XMAX-1,YMAX-1)
// contain an extra IO bridge component. These IOB0 & IOB1 components are
// connected to the three networks (INT, RAM, IOX).
// The number of clusters cannot be larger than 256.
// The number of processors per cluster cannot be larger than 4.
//
// - It uses two dspin_local_crossbar per cluster to implement the
//   local interconnect correponding to the INT network.
// - It uses two dspin_local_crossbar per cluster to implement the
//   local interconnect correponding to the coherence INT network.
// - It uses two virtual_dspin_router per cluster to implement
//   the INT network (routing both the direct and coherence trafic).
// - It uses two dspin_router per cluster to implement the RAM network.
// - It uses the vci_cc_vcache_wrapper.
// - It uses the vci_mem_cache.
// - It contains one vci_xicu and one vci_multi_dma per cluster.
// - It contains one vci_simple ram per cluster to model the L3 cache.
//
// The TsarIobCluster component is defined in files
// tsar_iob_cluster.* (with * = cpp, h, sd)
//
// The main hardware parameters must be defined in the hard_config.h file :
// - XMAX        : number of clusters in a row (power of 2)
// - YMAX        : number of clusters in a column (power of 2)
// - CLUSTER_SIZE     : size of the segment allocated to a cluster
// - NB_PROCS_MAX     : number of processors per cluster (power of 2)
// - NB_DMA_CHANNELS  : number of DMA channels per cluster (< 9)
// - NB_TTY_CHANNELS  : number of TTY channels in I/O network (< 16)
// - NB_NIC_CHANNELS  : number of NIC channels in I/O network (< 9)
//
// Some secondary hardware parameters must be defined in this top.cpp file:
// - XRAM_LATENCY     : external ram latency
// - MEMC_WAYS        : L2 cache number of ways
// - MEMC_SETS        : L2 cache number of sets
// - L1_IWAYS
// - L1_ISETS
// - L1_DWAYS
// - L1_DSETS
// - FBUF_X_SIZE      : width of frame buffer (pixels)
// - FBUF_Y_SIZE      : heigth of frame buffer (lines)
// - BDEV_SECTOR_SIZE : block size for block drvice
// - BDEV_IMAGE_NAME  : file pathname for block device
// - NIC_RX_NAME      : file pathname for NIC received packets
// - NIC_TX_NAME      : file pathname for NIC transmited packets
// - NIC_TIMEOUT      : max number of cycles before closing a container
//
// General policy for 40 bits physical address decoding:
// All physical segments base addresses are multiple of 1 Mbytes
// (=> the 24 LSB bits = 0, and the 16 MSB bits define the target)
// The (x_width + y_width) MSB bits (left aligned) define
// the cluster index, and the LADR bits define the local index:
//      | X_ID  | Y_ID  |---| LADR |     OFFSET          |
//      |x_width|y_width|---|  8   |       24            |
//
// General policy for 14 bits SRCID decoding:
// Each component is identified by (x_id, y_id, l_id) tuple.
//      | X_ID  | Y_ID  |---| L_ID |
//      |x_width|y_width|---|  6   |
/////////////////////////////////////////////////////////////////////////

#include <systemc>
#include <sys/time.h>
#include <iostream>
#include <sstream>
#include <cstdlib>
#include <cstdarg>
#include <stdint.h>

#include "gdbserver.h"
#include "mapping_table.h"

#include "tsar_iob_cluster.h"
#include "vci_chbuf_dma.h"
#include "vci_multi_tty.h"
#include "vci_multi_nic.h"
#include "vci_block_device_tsar.h"
#include "vci_framebuffer.h"
#include "vci_iox_network.h"

#include "alloc_elems.h"

///////////////////////////////////////////////////
//      OS
///////////////////////////////////////////////////
#define USE_ALMOS 0

#define almos_bootloader_pathname "bootloader.bin"
#define almos_kernel_pathname     "kernel-soclib.bin@0xbfc10000:D"
#define almos_archinfo_pathname   "arch-info.bin@0xBFC08000:D"

///////////////////////////////////////////////////
//               Parallelisation
///////////////////////////////////////////////////
#define USE_OPENMP 0

#if USE_OPENMP
#include <omp.h>
#endif

///////////////////////////////////////////////////////////
//          DSPIN parameters
///////////////////////////////////////////////////////////

#define dspin_int_cmd_width   39
#define dspin_int_rsp_width   32

#define dspin_ram_cmd_width   64
#define dspin_ram_rsp_width   64

///////////////////////////////////////////////////////////
//         VCI fields width  for the 3 VCI networks
///////////////////////////////////////////////////////////

#define vci_cell_width_int 4
#define vci_cell_width_ext 8

#define vci_plen_width     8
#define vci_address_width  40
#define vci_rerror_width   1
#define vci_clen_width     1
#define vci_rflag_width    1
#define vci_srcid_width    14
#define vci_pktid_width    4
#define vci_trdid_width    4
#define vci_wrplen_width   1

////////////////////////////////////////////////////////////
//    Main Hardware Parameters values
//////////////////////i/////////////////////////////////////

#define X_WIDTH   4
#define Y_WIDTH   4
#define X_MAX     (1<<X_WIDTH)
#define Y_MAX     (1<<Y_WIDTH)

////////////////////////////////////////////////////////////
//    Secondary Hardware Parameters values
//////////////////////i/////////////////////////////////////

#define XRAM_LATENCY       0

#define MEMC_WAYS          16
#define MEMC_SETS          256

#define L1_IWAYS           4
#define L1_ISETS           64

#define L1_DWAYS           4
#define L1_DSETS           64

#define FBUF_X_SIZE        128
#define FBUF_Y_SIZE        128

#define BDEV_SECTOR_SIZE   512
#define BDEV_IMAGE_NAME    "../../../giet_vm/hdd/virt_hdd.dmg"

#define NIC_RX_NAME        "giet_vm/nic/rx_packets.txt"
#define NIC_TX_NAME        "giet_vm/nic/tx_packets.txt"
#define NIC_TIMEOUT        10000

#define cluster(x,y)       ((y) + ((x)<<4))

////////////////////////////////////////////////////////////
//    Software to be loaded in ROM & RAM
//////////////////////i/////////////////////////////////////

#define BOOT_SOFT_NAME     "../../softs/tsar_boot/preloader.elf"

////////////////////////////////////////////////////////////
//     DEBUG Parameters default values
//////////////////////i/////////////////////////////////////

#define MAX_FROZEN_CYCLES  10000

/////////////////////////////////////////////////////////
//    Physical segments definition
/////////////////////////////////////////////////////////

// Non replicated peripherals (must be in cluster 0)

#define BROM_BASE 0x00BFC00000
#define BROM_SIZE 0x0000010000 // 64 Kbytes

#define IOBX_BASE 0x00BE000000
#define IOBX_SIZE 0x0000001000 // 4  Kbytes

#define BDEV_BASE 0x00B3000000
#define BDEV_SIZE 0x0000008000 // 4  Kbytes

#define MTTY_BASE 0x00B4000000
#define MTTY_SIZE (0x0000001000 * 16)  // 4 Kbytes

#define MNIC_BASE 0x00B5000000
#define MNIC_SIZE 0x0000080000 // 512 Kbytes

#define CDMA_BASE 0x00B6000000
#define CDMA_SIZE (0x0000001000 * 2)  // 4 Kbytes per channel

#define FBUF_BASE 0x00B7000000
#define FBUF_SIZE (800 * 600 * 2)

// Replicated peripherals : address is incremented by a cluster offset
//    offset  = cluster(x,y) << (address_width-x_width-y_width);

#define XRAM_BASE 0x0000000000
#define XRAM_SIZE 0x0010000000 // 256 Mbytes

#define XICU_BASE 0x00B0000000
#define XICU_SIZE 0x0000001000 // 4 Kbytes

#define MDMA_BASE 0x00B1000000
#define MDMA_SIZE 0x0000001000 * 4 // 4 Kbytes per channel

// Replicated mem segments (XRAM) : address is incremented by a cluster offset
//    offset = cluster(x,y) << (address_width-x_width-y_width);

#define MEMC_BASE 0x00B2000000
#define MEMC_SIZE 0x0000001000 // 4 Kbytes

////////////////////////////////////////////////////////////////////////
//          SRCID definition
////////////////////////////////////////////////////////////////////////
// All initiators are in the same indexing space (14 bits).
// The SRCID is structured in two fields:
// - The 10 MSB bits define the cluster index (left aligned)
// - The 4  LSB bits define the local index.
// Two different initiators cannot have the same SRCID, but a given
// initiator can have two alias SRCIDs:
// - Internal initiators (procs, mdma) are replicated in all clusters,
//   and each initiator has one single SRCID.
// - External initiators (bdev, cdma) are not replicated, but can be
//   accessed in 2 clusters : cluster_iob0 and cluster_iob1.
//   They have the same local index, but two different cluster indexes.
// As cluster_iob0 and cluster_iob1 contain both internal initiators
// and external initiators, they must have different local indexes.
// Consequence: For a local interconnect, the INI_ID port index
// is NOT equal to the SRCID local index, and the local interconnect
// must make a translation: SRCID => INI_ID (port index)
////////////////////////////////////////////////////////////////////////

#define PROC_LOCAL_SRCID   0x0 // from 0 to 7
#define MDMA_LOCAL_SRCID   0x8
#define IOBX_LOCAL_SRCID   0x9
#define MEMC_LOCAL_SRCID   0xA
#define CDMA_LOCAL_SRCID   0xE // hard-coded in dspin_tsar
#define BDEV_LOCAL_SRCID   0xF // hard-coded in dspin_tsar

///////////////////////////////////////////////////////////////////////
//     TGT_ID and INI_ID port indexing for INT local interconnect
///////////////////////////////////////////////////////////////////////

#define INT_MEMC_TGT_ID 0
#define INT_XICU_TGT_ID 1
#define INT_BROM_TGT_ID 2
#define INT_MDMA_TGT_ID 3
#define INT_IOBX_TGT_ID 4

#define INT_PROC_INI_ID 0 // from 0 to 7
#define INT_MDMA_INI_ID nb_procs
#define INT_IOBX_INI_ID (nb_procs + 1)

///////////////////////////////////////////////////////////////////////
//     TGT_ID and INI_ID port indexing for RAM local interconnect
///////////////////////////////////////////////////////////////////////

#define RAM_XRAM_TGT_ID 0

#define RAM_MEMC_INI_ID 0
#define RAM_IOBX_INI_ID 1

///////////////////////////////////////////////////////////////////////
//     TGT_ID and INI_ID port indexing for I0X local interconnect
///////////////////////////////////////////////////////////////////////

#define IOX_IOB0_TGT_ID 0 // don't change this value
#define IOX_IOB1_TGT_ID 1 // don't change this value
#define IOX_FBUF_TGT_ID 2
#define IOX_BDEV_TGT_ID 3
#define IOX_MNIC_TGT_ID 4
#define IOX_CDMA_TGT_ID 5
#define IOX_MTTY_TGT_ID 6

#define IOX_IOB0_INI_ID 0 // Don't change this value
#define IOX_IOB1_INI_ID 1 // Don't change this value
#define IOX_BDEV_INI_ID 2
#define IOX_CDMA_INI_ID 3

////////////////////////////////////////////////////////////////////////
int _main(int argc, char *argv[])
////////////////////////////////////////////////////////////////////////
{
   using namespace sc_core;
   using namespace soclib::caba;
   using namespace soclib::common;

   char     soft_name[256]   = BOOT_SOFT_NAME;    // pathname: binary code
   size_t   ncycles          = 1000000000;        // simulated cycles
   char     disk_name[256]   = BDEV_IMAGE_NAME;   // pathname: disk image
   char     nic_rx_name[256] = NIC_RX_NAME;       // pathname: rx packets file
   char     nic_tx_name[256] = NIC_TX_NAME;       // pathname: tx packets file
   ssize_t  threads_nr       = 1;                 // simulator's threads number
   bool     debug_ok         = false;             // trace activated
   size_t   debug_period     = 1;                 // trace period
   size_t   debug_memc_id    = 0xFFFFFFFF;        // idx of traced memc
   size_t   debug_proc_id    = 0xFFFFFFFF;        // idx of traced proc
   bool     debug_iob        = false;             // trace iobs when true
   uint32_t debug_from       = 0;                 // trace start cycle
   uint32_t frozen_cycles    = MAX_FROZEN_CYCLES; // monitoring frozen procs
   size_t   block_size       = BDEV_SECTOR_SIZE;  // disk block size
   size_t   nb_procs         = 1;
   size_t   x_size           = 2;
   size_t   y_size           = 2;
   size_t   nb_tty_channels  = 1;
   size_t   nb_nic_channels  = 1;

   assert((X_WIDTH == 4) and (Y_WIDTH == 4));
      
   ////////////// command line arguments //////////////////////
   if (argc > 1)
   {
      for (int n = 1; n < argc; n = n + 2)
      {
         if ((strcmp(argv[n],"-NCYCLES") == 0) && (n+1<argc))
         {
            ncycles = atoi(argv[n+1]);
            continue;
         }
         if ((strcmp(argv[n],"-SOFT") == 0) && (n+1<argc) )
         {
            strcpy(soft_name, argv[n+1]);
            continue;
         }
         if ((strcmp(argv[n],"-DISK") == 0) && (n+1<argc) )
         {
            strcpy(disk_name, argv[n+1]);
            continue;
         }
         if ((strcmp(argv[n],"-NPROCS") == 0) && (n+1<argc))
         {
            nb_procs = atoi(argv[n+1]);
            assert((nb_procs > 0) && (nb_procs < 5));
            continue;
         }
         if ((strcmp(argv[n],"-XSIZE") == 0) && (n+1<argc))
         {
            x_size = atoi(argv[n+1]);
            assert((x_size > 0) && (x_size < X_MAX));
            continue;
         }
         if ((strcmp(argv[n],"-YSIZE") == 0) && (n+1<argc))
         {
            y_size = atoi(argv[n+1]);
            assert((y_size > 0) && (y_size < Y_MAX));
            continue;
         }
         if ((strcmp(argv[n],"-DEBUG") == 0) && (n+1<argc) )
         {
            debug_ok   = true;
            debug_from = atoi(argv[n+1]);
            continue;
         }
         if ((strcmp(argv[n],"-MEMCID") == 0) && (n+1<argc) )
         {
            debug_memc_id = atoi(argv[n+1]);
            size_t x = debug_memc_id >> Y_WIDTH;
            size_t y = debug_memc_id  & ((1 << Y_WIDTH) - 1);
            assert((x < x_size) && (y < y_size));
            continue;
         }
         if ((strcmp(argv[n],"-IOB") == 0) && (n+1<argc) )
         {
            debug_iob = (atoi(argv[n+1]) != 0) ? 1 : 0;
            continue;
         }
         if ((strcmp(argv[n],"-PROCID") == 0) && (n+1<argc) )
         {
            debug_proc_id     = atoi(argv[n+1]);
            size_t cluster_xy = debug_proc_id / nb_procs ;
            size_t x          = cluster_xy >> Y_WIDTH;
            size_t y          = cluster_xy  & ((1 << Y_WIDTH) - 1);
            assert((x < x_size) && (y < y_size));
            continue;
         }
         if ((strcmp(argv[n], "-THREADS") == 0) && ((n+1) < argc))
         {
            threads_nr = atoi(argv[n+1]);
            assert(threads_nr > 0);
            continue;
         }
         if ((strcmp(argv[n], "-FROZEN") == 0) && (n+1 < argc))
         {
            frozen_cycles = atoi(argv[n+1]);
            assert(frozen_cycles > 0);
            continue;
         }
         if ((strcmp(argv[n], "-PERIOD") == 0) && (n+1 < argc))
         {
            debug_period = atoi(argv[n+1]);
            assert(debug_period > 0);
            continue;
         }

         std::cout << "   Arguments are (key,value) couples.\n"
                   << "   The order is not important.\n"
                   << "   Accepted arguments are :\n\n"
                   << "     -NCYCLES number of simulated_cycles\n"
                   << "     -SOFT    pathname for embedded soft\n"
                   << "     -DISK    pathname for disk image\n"
                   << "     -NPROCS  number of processors per cluster\n"
                   << "     -XSIZE   number of clusters on X\n"
                   << "     -YSIZE   number of clusters on Y\n"
                   << "     -DEBUG   debug start cycle\n"
                   << "     -MEMCID  index of memc to trace\n"
                   << "     -IOB     debug IOBs if non_zero_value\n\n"
                   << "     -PROCID  index of proc to trace\n"
                   << "     -THREADS simulator's threads number\n"
                   << "     -FROZEN  max number of frozen cycles\n"
                   << "     -PERIOD  number of cycles between trace\n\n";
         exit(0);
      }
   }

   // one DMA channel per proc
   size_t nb_dma_channels = nb_procs;

   // clusters containing IOB0 and IOB1
   size_t cluster_iob0 = cluster(0,0);
   size_t cluster_iob1 = cluster(x_size - 1, y_size - 1);

   assert( (nb_tty_channels < 16) and
           "The NB_TTY_CHANNELS parameter must be smaller than 16" );

   assert( (nb_nic_channels == 1) and
           "The NB_NIC_CHANNELS parameter must be 1" );

   std::cout << std::endl;
   std::cout << " - X_SIZE          = " << x_size          << std::endl;
   std::cout << " - Y_SIZE          = " << y_size          << std::endl;
   std::cout << " - NB_PROCS        = " << nb_procs        << std::endl;
   std::cout << " - NB_DMA_CHANNELS = " << nb_dma_channels << std::endl;
   std::cout << " - NB_TTY_CHANNELS = " << nb_tty_channels << std::endl;
   std::cout << " - NB_NIC_CHANNELS = " << nb_nic_channels << std::endl;
   std::cout << " - MEMC_WAYS       = " << MEMC_WAYS       << std::endl;
   std::cout << " - MEMC_SETS       = " << MEMC_SETS       << std::endl;
   std::cout << " - RAM_LATENCY     = " << XRAM_LATENCY    << std::endl;
   std::cout << " - MAX_FROZEN      = " << frozen_cycles   << std::endl;

   std::cout << std::endl;

#if USE_OPENMP
   omp_set_dynamic(false);
   omp_set_num_threads(threads_nr);
   std::cerr << "Built with openmp version " << _OPENMP << std::endl;
#endif

   // Define VciParams objects
   typedef soclib::caba::VciParams<vci_cell_width_int,
                                   vci_plen_width,
                                   vci_address_width,
                                   vci_rerror_width,
                                   vci_clen_width,
                                   vci_rflag_width,
                                   vci_srcid_width,
                                   vci_pktid_width,
                                   vci_trdid_width,
                                   vci_wrplen_width> vci_param_int;

   typedef soclib::caba::VciParams<vci_cell_width_ext,
                                   vci_plen_width,
                                   vci_address_width,
                                   vci_rerror_width,
                                   vci_clen_width,
                                   vci_rflag_width,
                                   vci_srcid_width,
                                   vci_pktid_width,
                                   vci_trdid_width,
                                   vci_wrplen_width> vci_param_ext;

   /////////////////////////////////////////////////////////////////////
   // INT network mapping table
   // - two levels address decoding for commands
   // - two levels srcid decoding for responses
   // - NB_PROCS_MAX + 2 (MDMA, IOBX) local initiators per cluster
   // - 4 local targets (MEMC, XICU, MDMA, IOBX) per cluster
   /////////////////////////////////////////////////////////////////////
   MappingTable maptab_int(
         vci_address_width,
         IntTab(X_WIDTH + Y_WIDTH, 16 - X_WIDTH - Y_WIDTH),
         IntTab(X_WIDTH + Y_WIDTH, vci_srcid_width - X_WIDTH - Y_WIDTH),
         0x00FF000000);

   for (size_t x = 0; x < x_size; x++)
   {
      for (size_t y = 0; y < y_size; y++)
      {
         uint64_t offset = ((uint64_t)cluster(x,y))
            << (vci_address_width - X_WIDTH - Y_WIDTH);
         bool config    = true;
         bool cacheable = true;

         // the five following segments are defined in all clusters

         std::ostringstream smemc_conf;
         smemc_conf << "int_seg_memc_conf_" << x << "_" << y;
         maptab_int.add(Segment(smemc_conf.str(), MEMC_BASE+offset, MEMC_SIZE,
                                IntTab(cluster(x,y),INT_MEMC_TGT_ID),
                                not cacheable, config ));

         std::ostringstream smemc_xram;
         smemc_xram << "int_seg_memc_xram_" << x << "_" << y;
         maptab_int.add(Segment(smemc_xram.str(), XRAM_BASE+offset, XRAM_SIZE,
                                IntTab(cluster(x,y),INT_MEMC_TGT_ID),
                                cacheable));

         std::ostringstream sxicu;
         sxicu << "int_seg_xicu_" << x << "_" << y;
         maptab_int.add(Segment(sxicu.str(), XICU_BASE+offset, XICU_SIZE,
                                IntTab(cluster(x,y),INT_XICU_TGT_ID),
                                not cacheable));

         std::ostringstream sbrom;
         sbrom << "int_seg_brom_" << x << "_" << y;
         maptab_int.add(Segment(sbrom.str(), BROM_BASE+offset, BROM_SIZE,
                                IntTab(cluster(x,y),INT_BROM_TGT_ID),
                                cacheable));

         std::ostringstream smdma;
         smdma << "int_seg_mdma_" << x << "_" << y;
         maptab_int.add(Segment(smdma.str(), MDMA_BASE+offset, MDMA_SIZE,
                                IntTab(cluster(x,y),INT_MDMA_TGT_ID),
                                not cacheable));

         // the following segments are only defined in cluster_iob0 or in
         // cluster_iob1
         if ((cluster(x,y) == cluster_iob0) || (cluster(x,y) == cluster_iob1))
         {
            std::ostringstream siobx;
            siobx << "int_seg_iobx_" << x << "_" << y;
            maptab_int.add(Segment(siobx.str(), IOBX_BASE+offset, IOBX_SIZE,
                                   IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                                   not cacheable, config ));

            std::ostringstream stty;
            stty << "int_seg_mtty_" << x << "_" << y;
            maptab_int.add(Segment(stty.str(), MTTY_BASE+offset, MTTY_SIZE,
                                   IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                                   not cacheable));

            std::ostringstream sfbf;
            sfbf << "int_seg_fbuf_" << x << "_" << y;
            maptab_int.add(Segment(sfbf.str(), FBUF_BASE+offset, FBUF_SIZE,
                                   IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                                   not cacheable));

            std::ostringstream sbdv;
            sbdv << "int_seg_bdev_" << x << "_" << y;
            maptab_int.add(Segment(sbdv.str(), BDEV_BASE+offset, BDEV_SIZE,
                                   IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                                   not cacheable));

            std::ostringstream snic;
            snic << "int_seg_mnic_" << x << "_" << y;
            maptab_int.add(Segment(snic.str(), MNIC_BASE+offset, MNIC_SIZE,
                                   IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                                   not cacheable));

            std::ostringstream sdma;
            sdma << "int_seg_cdma_" << x << "_" << y;
            maptab_int.add(Segment(sdma.str(), CDMA_BASE+offset, CDMA_SIZE,
                                   IntTab(cluster(x,y), INT_IOBX_TGT_ID),
                                   not cacheable));
         }

         // This define the mapping between the SRCIDs
         // and the port index on the local interconnect.

         maptab_int.srcid_map(IntTab(cluster(x,y), MDMA_LOCAL_SRCID),
                              IntTab(cluster(x,y), INT_MDMA_INI_ID));
         maptab_int.srcid_map(IntTab(cluster(x,y), IOBX_LOCAL_SRCID),
                              IntTab(cluster(x,y), INT_IOBX_INI_ID));

         for ( size_t p = 0 ; p < nb_procs ; p++ )
         {
            maptab_int.srcid_map(IntTab(cluster(x,y), PROC_LOCAL_SRCID + p),
                                 IntTab(cluster(x,y), INT_PROC_INI_ID  + p));
         }
      }
   }
   std::cout << "INT network " << maptab_int << std::endl;

    /////////////////////////////////////////////////////////////////////////
    // RAM network mapping table
    // - two levels address decoding for commands
    // - two levels srcid decoding for responses
    // - 2 local initiators (MEMC, IOBX) per cluster
    //   (IOBX component only in cluster_iob0 and cluster_iob1)
    // - 1 local target (XRAM) per cluster
    ////////////////////////////////////////////////////////////////////////
    MappingTable maptab_ram(
          vci_address_width,
          IntTab(X_WIDTH + Y_WIDTH, 16 - X_WIDTH - Y_WIDTH),
          IntTab(X_WIDTH + Y_WIDTH, vci_srcid_width - X_WIDTH - Y_WIDTH),
          0x00FF000000);

    for (size_t x = 0; x < x_size; x++)
    {
        for (size_t y = 0; y < y_size ; y++)
        {
           uint64_t offset = ((uint64_t)cluster(x,y))
              << (vci_address_width - X_WIDTH - Y_WIDTH);

            std::ostringstream sxram;
            sxram << "ext_seg_xram_" << x << "_" << y;
            maptab_ram.add(Segment(sxram.str(), XRAM_BASE+offset,
                                   XRAM_SIZE, IntTab(cluster(x,y), 0), false));
        }
    }

    // This define the mapping between the initiators SRCID
    // and the port index on the RAM local interconnect.
    // External initiator have two alias SRCID (iob0 / iob1)

    maptab_ram.srcid_map(IntTab(cluster_iob0, CDMA_LOCAL_SRCID),
                         IntTab(cluster_iob0, RAM_IOBX_INI_ID));
    maptab_ram.srcid_map(IntTab(cluster_iob1, CDMA_LOCAL_SRCID),
                         IntTab(cluster_iob1, RAM_IOBX_INI_ID));
    maptab_ram.srcid_map(IntTab(cluster_iob0, BDEV_LOCAL_SRCID),
                         IntTab(cluster_iob0, RAM_IOBX_INI_ID));
    maptab_ram.srcid_map(IntTab(cluster_iob1, BDEV_LOCAL_SRCID),
                         IntTab(cluster_iob1, RAM_IOBX_INI_ID));
    maptab_ram.srcid_map(IntTab(cluster_iob1, MEMC_LOCAL_SRCID),
                         IntTab(cluster_iob1, RAM_MEMC_INI_ID));

    std::cout << "RAM network " << maptab_ram << std::endl;

    ///////////////////////////////////////////////////////////////////////
    // IOX network mapping table
    // - two levels address decoding for commands
    // - two levels srcid decoding for responses
    // - 4 initiators (IOB0, IOB1, BDEV, CDMA)
    // - 8 targets (IOB0, IOB1, BDEV, CDMA, MTTY, FBUF, BROM, MNIC)
    ///////////////////////////////////////////////////////////////////////
    MappingTable maptab_iox(
          vci_address_width,
          IntTab(X_WIDTH + Y_WIDTH, 16 - X_WIDTH - Y_WIDTH),
          IntTab(X_WIDTH + Y_WIDTH, vci_srcid_width - X_WIDTH - Y_WIDTH),
          0x00FF000000);

    // compute base addresses for cluster_iob0 and cluster_iob1
    uint64_t iob0_base = ((uint64_t)cluster_iob0)
       << (vci_address_width - X_WIDTH - Y_WIDTH);
    uint64_t iob1_base = ((uint64_t)cluster_iob1)
       << (vci_address_width - X_WIDTH - Y_WIDTH);

    // Each peripheral can be accessed through two segments,
    // depending on the used IOB (IOB0 or IOB1).
    maptab_iox.add(Segment("iox_seg_mtty_0", MTTY_BASE + iob0_base, MTTY_SIZE,
                           IntTab(cluster_iob0, IOX_MTTY_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_mtty_1", MTTY_BASE + iob1_base, MTTY_SIZE,
                           IntTab(cluster_iob1, IOX_MTTY_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_fbuf_0", FBUF_BASE + iob0_base, FBUF_SIZE,
                           IntTab(cluster_iob0, IOX_FBUF_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_fbuf_1", FBUF_BASE + iob1_base, FBUF_SIZE,
                           IntTab(cluster_iob1, IOX_FBUF_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_bdev_0", BDEV_BASE + iob0_base, BDEV_SIZE,
                           IntTab(cluster_iob0, IOX_BDEV_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_bdev_1", BDEV_BASE + iob1_base, BDEV_SIZE,
                           IntTab(cluster_iob1, IOX_BDEV_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_mnic_0", MNIC_BASE + iob0_base, MNIC_SIZE,
                           IntTab(cluster_iob0, IOX_MNIC_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_mnic_1", MNIC_BASE + iob1_base, MNIC_SIZE,
                           IntTab(cluster_iob1, IOX_MNIC_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_cdma_0", CDMA_BASE + iob0_base, CDMA_SIZE,
                           IntTab(cluster_iob0, IOX_CDMA_TGT_ID), false));
    maptab_iox.add(Segment("iox_seg_cdma_1", CDMA_BASE + iob1_base, CDMA_SIZE,
                           IntTab(cluster_iob1, IOX_CDMA_TGT_ID), false));

    // Each physical RAM can be accessed through IOB0, or through IOB1.
    // if IOMMU is not activated, addresses are 40 bits (physical addresses),
    // and the choice depends on on address bit A[39].
    // if IOMMU is activated the addresses use only 32 bits (virtual
    // addresses), and the choice depends on address bit A[31].
    for (size_t x = 0; x < x_size; x++)
    {
        for (size_t y = 0; y < y_size ; y++)
        {
            uint64_t offset = ((uint64_t)cluster(x,y))
               << (vci_address_width - X_WIDTH - Y_WIDTH);

            // send command to XRAM through IOB0
            if ( x < (x_size/2) )
            {
                std::ostringstream siob0;
                siob0 << "iox_seg_xram_" << x << "_" << y;
                maptab_iox.add(Segment(siob0.str(), offset, 0x80000000,
                                       IntTab(cluster_iob0,IOX_IOB0_TGT_ID),
                                       false));
            }
            // send command to XRAM through IOB1
            else
            {
                std::ostringstream siob1;
                siob1 << "iox_seg_xram_" << x << "_" << y;
                maptab_iox.add(Segment(siob1.str(), offset, 0x80000000,
                                       IntTab(cluster_iob1,IOX_IOB1_TGT_ID),
                                       false));
            }
        }
    }
    // useful when IOMMU activated
    maptab_iox.add(Segment("iox_seg_xram", 0xc0000000, 0x40000000,
                           IntTab(cluster_iob1,IOX_IOB1_TGT_ID), false));

    // This define the mapping between the initiators (identified by the SRCID)
    // and the port index on the IOX local interconnect.
    // External initiator have two alias SRCID (iob0 / iob1 access)

    maptab_iox.srcid_map(IntTab(cluster_iob0, CDMA_LOCAL_SRCID),
                         IntTab(cluster_iob0, IOX_CDMA_INI_ID));
    maptab_iox.srcid_map(IntTab(cluster_iob1, CDMA_LOCAL_SRCID),
                         IntTab(cluster_iob1, IOX_CDMA_INI_ID));
    maptab_iox.srcid_map(IntTab(cluster_iob0, BDEV_LOCAL_SRCID),
                         IntTab(cluster_iob0, IOX_BDEV_INI_ID));
    maptab_iox.srcid_map(IntTab(cluster_iob1, BDEV_LOCAL_SRCID),
                         IntTab(cluster_iob0, IOX_BDEV_INI_ID));

    for (size_t x = 0; x < x_size; x++)
    {
        for (size_t y = 0; y < y_size ; y++)
        {
            size_t iob = (x < (x_size / 2)) ? IOX_IOB0_INI_ID
                                            : IOX_IOB1_INI_ID;

            for (size_t p = 0 ; p < nb_procs ; p++)
            {
               maptab_iox.srcid_map(IntTab(cluster(x,y), PROC_LOCAL_SRCID + p),
                                    IntTab(cluster(x,y), iob));
            }
            maptab_iox.srcid_map(IntTab( cluster(x,y), MDMA_LOCAL_SRCID),
                                 IntTab( cluster(x,y), IOX_IOB0_INI_ID));
        }
    }

    std::cout << "IOX network " << maptab_iox << std::endl;

    ////////////////////
    // Signals
    ////////////////////

    sc_clock        signal_clk("clk");
    sc_signal<bool> signal_resetn("resetn");

    sc_signal<bool> signal_irq_false;
    sc_signal<bool> signal_irq_bdev;
    sc_signal<bool> signal_irq_mnic_rx[1];
    sc_signal<bool> signal_irq_mnic_tx[1];
    sc_signal<bool> signal_irq_mtty[16];
    sc_signal<bool> signal_irq_cdma[1*2];

    // DSPIN signals for loopback in cluster_iob0 & cluster_iob1
    DspinSignals<dspin_ram_cmd_width> signal_dspin_cmd_iob0_loopback;
    DspinSignals<dspin_ram_rsp_width> signal_dspin_rsp_iob0_loopback;
    DspinSignals<dspin_ram_cmd_width> signal_dspin_cmd_iob1_loopback;
    DspinSignals<dspin_ram_rsp_width> signal_dspin_rsp_iob1_loopback;

    // VCI signals for IOX network
    VciSignals<vci_param_ext> signal_vci_ini_iob0("signal_vci_ini_iob0");
    VciSignals<vci_param_ext> signal_vci_ini_iob1("signal_vci_ini_iob1");
    VciSignals<vci_param_ext> signal_vci_ini_bdev("signal_vci_ini_bdev");
    VciSignals<vci_param_ext> signal_vci_ini_cdma("signal_vci_ini_cdma");

    VciSignals<vci_param_ext> signal_vci_tgt_iob0("signal_vci_tgt_iob0");
    VciSignals<vci_param_ext> signal_vci_tgt_iob1("signal_vci_tgt_iob1");
    VciSignals<vci_param_ext> signal_vci_tgt_mtty("signal_vci_tgt_mtty");
    VciSignals<vci_param_ext> signal_vci_tgt_fbuf("signal_vci_tgt_fbuf");
    VciSignals<vci_param_ext> signal_vci_tgt_mnic("signal_vci_tgt_mnic");
    VciSignals<vci_param_ext> signal_vci_tgt_bdev("signal_vci_tgt_bdev");
    VciSignals<vci_param_ext> signal_vci_tgt_cdma("signal_vci_tgt_cdma");

   // Horizontal inter-clusters INT network DSPIN
   DspinSignals<dspin_int_cmd_width>*** signal_dspin_int_cmd_h_inc =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_int_cmd_h_inc", x_size-1, y_size, 3);
   DspinSignals<dspin_int_cmd_width>*** signal_dspin_int_cmd_h_dec =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_int_cmd_h_dec", x_size-1, y_size, 3);
   DspinSignals<dspin_int_rsp_width>*** signal_dspin_int_rsp_h_inc =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_int_rsp_h_inc", x_size-1, y_size, 2);
   DspinSignals<dspin_int_rsp_width>*** signal_dspin_int_rsp_h_dec =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_int_rsp_h_dec", x_size-1, y_size, 2);

   // Vertical inter-clusters INT network DSPIN
   DspinSignals<dspin_int_cmd_width>*** signal_dspin_int_cmd_v_inc =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_int_cmd_v_inc", x_size, y_size-1, 3);
   DspinSignals<dspin_int_cmd_width>*** signal_dspin_int_cmd_v_dec =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_int_cmd_v_dec", x_size, y_size-1, 3);
   DspinSignals<dspin_int_rsp_width>*** signal_dspin_int_rsp_v_inc =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_int_rsp_v_inc", x_size, y_size-1, 2);
   DspinSignals<dspin_int_rsp_width>*** signal_dspin_int_rsp_v_dec =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_int_rsp_v_dec", x_size, y_size-1, 2);

   // Mesh boundaries INT network DSPIN
   DspinSignals<dspin_int_cmd_width>**** signal_dspin_false_int_cmd_in =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_false_int_cmd_in", x_size, y_size, 4, 3);
   DspinSignals<dspin_int_cmd_width>**** signal_dspin_false_int_cmd_out =
      alloc_elems<DspinSignals<dspin_int_cmd_width> >(
            "signal_dspin_false_int_cmd_out", x_size, y_size, 4, 3);
   DspinSignals<dspin_int_rsp_width>**** signal_dspin_false_int_rsp_in =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_false_int_rsp_in", x_size, y_size, 4, 2);
   DspinSignals<dspin_int_rsp_width>**** signal_dspin_false_int_rsp_out =
      alloc_elems<DspinSignals<dspin_int_rsp_width> >(
            "signal_dspin_false_int_rsp_out", x_size, y_size, 4, 2);


   // Horizontal inter-clusters RAM network DSPIN
   DspinSignals<dspin_ram_cmd_width>** signal_dspin_ram_cmd_h_inc =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_ram_cmd_h_inc", x_size-1, y_size);
   DspinSignals<dspin_ram_cmd_width>** signal_dspin_ram_cmd_h_dec =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_ram_cmd_h_dec", x_size-1, y_size);
   DspinSignals<dspin_ram_rsp_width>** signal_dspin_ram_rsp_h_inc =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_ram_rsp_h_inc", x_size-1, y_size);
   DspinSignals<dspin_ram_rsp_width>** signal_dspin_ram_rsp_h_dec =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_ram_rsp_h_dec", x_size-1, y_size);

   // Vertical inter-clusters RAM network DSPIN
   DspinSignals<dspin_ram_cmd_width>** signal_dspin_ram_cmd_v_inc =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_ram_cmd_v_inc", x_size, y_size-1);
   DspinSignals<dspin_ram_cmd_width>** signal_dspin_ram_cmd_v_dec =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_ram_cmd_v_dec", x_size, y_size-1);
   DspinSignals<dspin_ram_rsp_width>** signal_dspin_ram_rsp_v_inc =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_ram_rsp_v_inc", x_size, y_size-1);
   DspinSignals<dspin_ram_rsp_width>** signal_dspin_ram_rsp_v_dec =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_ram_rsp_v_dec", x_size, y_size-1);

   // Mesh boundaries RAM network DSPIN
   DspinSignals<dspin_ram_cmd_width>*** signal_dspin_false_ram_cmd_in =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_false_ram_cmd_in", x_size, y_size, 4);
   DspinSignals<dspin_ram_cmd_width>*** signal_dspin_false_ram_cmd_out =
      alloc_elems<DspinSignals<dspin_ram_cmd_width> >(
            "signal_dspin_false_ram_cmd_out", x_size, y_size, 4);
   DspinSignals<dspin_ram_rsp_width>*** signal_dspin_false_ram_rsp_in =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_false_ram_rsp_in", x_size, y_size, 4);
   DspinSignals<dspin_ram_rsp_width>*** signal_dspin_false_ram_rsp_out =
      alloc_elems<DspinSignals<dspin_ram_rsp_width> >(
            "signal_dspin_false_ram_rsp_out", x_size, y_size, 4);

   ////////////////////////////
   //      Loader
   ////////////////////////////

#if USE_ALMOS
   soclib::common::Loader loader(almos_bootloader_pathname,
                                 almos_archinfo_pathname,
                                 almos_kernel_pathname);
#else
   soclib::common::Loader loader(soft_name);
#endif

   typedef soclib::common::GdbServer<soclib::common::Mips32ElIss> proc_iss;
   proc_iss::set_loader(loader);

   ////////////////////////////////////////
   //  Instanciated Hardware Components
   ////////////////////////////////////////

   std::cout << std::endl << "External Bus and Peripherals" << std::endl
             << std::endl;

   // IOX network
   VciIoxNetwork<vci_param_ext>* iox_network;
   iox_network = new VciIoxNetwork<vci_param_ext>("iox_network",
                                                  maptab_iox,
                                                  7,   // number of targets
                                                  4 ); // number of initiators

   // Network Controller
   VciMultiNic<vci_param_ext>*  mnic;
   mnic = new VciMultiNic<vci_param_ext>("mnic",
                                         IntTab(0, IOX_MNIC_TGT_ID),
                                         maptab_iox,
                                         nb_nic_channels,
                                         0,           // mac_4 address
                                         0,           // mac_2 address
                                         nic_rx_name,
                                         nic_tx_name);

   // Frame Buffer
   VciFrameBuffer<vci_param_ext>*  fbuf;
   fbuf = new VciFrameBuffer<vci_param_ext>("fbuf",
                                            IntTab(0, IOX_FBUF_TGT_ID),
                                            maptab_iox,
                                            FBUF_X_SIZE, FBUF_Y_SIZE );

   // Block Device
   // for AHCI
   // std::vector<std::string> filenames;
   // filenames.push_back(disk_name); // one single disk
   VciBlockDeviceTsar<vci_param_ext>*  bdev;
   bdev = new VciBlockDeviceTsar<vci_param_ext>("bdev",
                                                maptab_iox,
                                                IntTab(0, BDEV_LOCAL_SRCID),
                                                IntTab(0, IOX_BDEV_TGT_ID),
                                                disk_name,
                                                block_size,
                                                64,  // burst size (bytes)
                                                0 ); // disk latency

   // Chained Buffer DMA controller
   VciChbufDma<vci_param_ext>*  cdma;
   cdma = new VciChbufDma<vci_param_ext>("cdma",
                                         maptab_iox,
                                         IntTab(0, CDMA_LOCAL_SRCID),
                                         IntTab(0, IOX_CDMA_TGT_ID),
                                         64,  // burst size (bytes)
                                         2 * nb_nic_channels);
   // Multi-TTY controller
   std::vector<std::string> vect_names;
   for( size_t tid = 0 ; tid < nb_tty_channels ; tid++ )
   {
      std::ostringstream term_name;
      term_name <<  "term" << tid;
      vect_names.push_back(term_name.str().c_str());
   }
   VciMultiTty<vci_param_ext>*  mtty;
   mtty = new VciMultiTty<vci_param_ext>("mtty_iox",
                                         IntTab(0, IOX_MTTY_TGT_ID),
                                         maptab_iox,
                                         vect_names);
   // Clusters
   typedef TsarIobCluster<vci_param_int, vci_param_ext, dspin_int_cmd_width,
           dspin_int_rsp_width, dspin_ram_cmd_width, dspin_ram_rsp_width>
           TsarIobClusterType;
   
   TsarIobClusterType* clusters[x_size][y_size];

#if USE_OPENMP
#pragma omp parallel
    {
#pragma omp for
#endif

        for(size_t i = 0; i  < (x_size * y_size); i++)
        {
            size_t x = i / y_size;
            size_t y = i % y_size;

#if USE_OPENMP
#pragma omp critical
            {
#endif
            std::cout << std::endl;
            std::cout << "Cluster_" << std::dec << x << "_" << y << std::endl;
            std::cout << std::endl;

            std::ostringstream sc;
            sc << "cluster_" << x << "_" << y;

            bool memc_debug =
               debug_ok && (cluster(x,y) == debug_memc_id);
            bool proc_debug = 
               debug_ok && (cluster(x,y) == (debug_proc_id / nb_procs));

            TsarIobClusterType::ClusterParams params = {
               .insname           = sc.str().c_str(),
               .nb_procs          = nb_procs,
               .nb_dmas           = nb_dma_channels,
               .x_id              = x,
               .y_id              = y,
               .x_size            = x_size,
               .y_size            = y_size,
               .mt_int            = maptab_int,
               .mt_ext            = maptab_ram,
               .mt_iox            = maptab_iox,
               .x_width           = X_WIDTH,
               .y_width           = Y_WIDTH,
               .l_width           = vci_srcid_width - X_WIDTH - Y_WIDTH,
               .int_memc_tgtid    = INT_MEMC_TGT_ID,
               .int_xicu_tgtid    = INT_XICU_TGT_ID,
               .int_mdma_tgtid    = INT_MDMA_TGT_ID,
               .int_iobx_tgtid    = INT_IOBX_TGT_ID,
               .int_brom_tgtid    = INT_BROM_TGT_ID,
               .int_proc_srcid    = INT_PROC_INI_ID,
               .int_mdma_srcid    = INT_MDMA_INI_ID,
               .int_iobx_srcid    = INT_IOBX_INI_ID,
               .ext_xram_tgtid    = RAM_XRAM_TGT_ID,
               .ext_memc_srcid    = RAM_MEMC_INI_ID,
               .ext_iobx_srcid    = RAM_IOBX_INI_ID,
               .memc_ways         = MEMC_WAYS,
               .memc_sets         = MEMC_SETS,
               .l1_i_ways         = L1_IWAYS,
               .l1_i_sets         = L1_ISETS,
               .l1_d_ways         = L1_DWAYS,
               .l1_d_sets         = L1_DSETS,
               .xram_latency      = XRAM_LATENCY,
               .loader            = loader,
               .frozen_cycles     = frozen_cycles,
               .debug_start_cycle = debug_from,
               .memc_debug_ok     = memc_debug, 
               .proc_debug_ok     = proc_debug, 
               .iob_debug_ok      = debug_ok and debug_iob
            };

            clusters[x][y] = new TsarIobClusterType(params);

#if USE_OPENMP
            } // end critical
#endif
        } // end for
#if USE_OPENMP
    }
#endif

    std::cout << std::endl;

    ///////////////////////////////////////////////////////////////////////////
    //     Net-list
    ///////////////////////////////////////////////////////////////////////////

    // IOX network connexion
    iox_network->p_clk                     (signal_clk);
    iox_network->p_resetn                  (signal_resetn);
    iox_network->p_to_ini[IOX_IOB0_INI_ID] (signal_vci_ini_iob0);
    iox_network->p_to_ini[IOX_IOB1_INI_ID] (signal_vci_ini_iob1);
    iox_network->p_to_ini[IOX_BDEV_INI_ID] (signal_vci_ini_bdev);
    iox_network->p_to_ini[IOX_CDMA_INI_ID] (signal_vci_ini_cdma);
    iox_network->p_to_tgt[IOX_IOB0_TGT_ID] (signal_vci_tgt_iob0);
    iox_network->p_to_tgt[IOX_IOB1_TGT_ID] (signal_vci_tgt_iob1);
    iox_network->p_to_tgt[IOX_MTTY_TGT_ID] (signal_vci_tgt_mtty);
    iox_network->p_to_tgt[IOX_FBUF_TGT_ID] (signal_vci_tgt_fbuf);
    iox_network->p_to_tgt[IOX_MNIC_TGT_ID] (signal_vci_tgt_mnic);
    iox_network->p_to_tgt[IOX_BDEV_TGT_ID] (signal_vci_tgt_bdev);
    iox_network->p_to_tgt[IOX_CDMA_TGT_ID] (signal_vci_tgt_cdma);

    // BDEV connexion
    bdev->p_clk    (signal_clk);
    bdev->p_resetn (signal_resetn);
    bdev->p_irq    (signal_irq_bdev);

    // For AHCI
    // bdev->p_channel_irq[0]                             (signal_irq_bdev);

    bdev->p_vci_target    (signal_vci_tgt_bdev);
    bdev->p_vci_initiator (signal_vci_ini_bdev);

    std::cout << "  - BDEV connected" << std::endl;

    // FBUF connexion
    fbuf->p_clk    (signal_clk);
    fbuf->p_resetn (signal_resetn);
    fbuf->p_vci    (signal_vci_tgt_fbuf);

    std::cout << "  - FBUF connected" << std::endl;

    // MNIC connexion
    mnic->p_clk    (signal_clk);
    mnic->p_resetn (signal_resetn);
    mnic->p_vci    (signal_vci_tgt_mnic);
    for ( size_t i=0 ; i<nb_nic_channels ; i++ )
    {
         mnic->p_rx_irq[i] (signal_irq_mnic_rx[i]);
         mnic->p_tx_irq[i] (signal_irq_mnic_tx[i]);
    }

    std::cout << "  - MNIC connected" << std::endl;

    // MTTY connexion
    mtty->p_clk        (signal_clk);
    mtty->p_resetn     (signal_resetn);
    mtty->p_vci        (signal_vci_tgt_mtty);
    for ( size_t i=0 ; i<nb_tty_channels ; i++ )
    {
        mtty->p_irq[i] (signal_irq_mtty[i]);
    }

    std::cout << "  - MTTY connected" << std::endl;

    // CDMA connexion
    cdma->p_clk           (signal_clk);
    cdma->p_resetn        (signal_resetn);
    cdma->p_vci_target    (signal_vci_tgt_cdma);
    cdma->p_vci_initiator (signal_vci_ini_cdma);
    for ( size_t i=0 ; i<(nb_nic_channels*2) ; i++)
    {
        cdma->p_irq[i]    (signal_irq_cdma[i]);
    }

    std::cout << "  - CDMA connected" << std::endl;

    // IRQ connexions from external peripherals (cluster_iob0 only)
    // IRQ_MNIC_RX  -> IRQ[08] to IRQ[09]
    // IRQ_MNIC_TX  -> IRQ[10] to IRQ[11]
    // IRQ_CDMA     -> IRQ[12] to IRQ[15]
    // IRQ_MTTY     -> IRQ[16] to IRQ[30]
    // IRQ_BDEV     -> IRQ[31]

    size_t mx = 16 + nb_tty_channels;
    for ( size_t n=0 ; n<32 ; n++ )
    {
        if      ( n < 8  ) (*clusters[0][0]->p_irq[n]) (signal_irq_false);
        else if ( n < 10 ) (*clusters[0][0]->p_irq[n]) (signal_irq_false);
        else if ( n < 12 ) (*clusters[0][0]->p_irq[n]) (signal_irq_false);
        else if ( n < 16 ) (*clusters[0][0]->p_irq[n]) (signal_irq_false);
        else if ( n < mx ) (*clusters[0][0]->p_irq[n]) (signal_irq_mtty[n-16]);
        else if ( n < 31 ) (*clusters[0][0]->p_irq[n]) (signal_irq_false);
        else               (*clusters[0][0]->p_irq[n]) (signal_irq_bdev);
    }

    // IOB0 cluster connexion to IOX network
    (*clusters[0][0]->p_vci_iob_iox_ini) (signal_vci_ini_iob0);
    (*clusters[0][0]->p_vci_iob_iox_tgt) (signal_vci_tgt_iob0);

    // IOB1 cluster connexion to IOX network
    (*clusters[x_size-1][y_size-1]->p_vci_iob_iox_ini) (signal_vci_ini_iob1);
    (*clusters[x_size-1][y_size-1]->p_vci_iob_iox_tgt) (signal_vci_tgt_iob1);

    // All clusters Clock & RESET connexions
    for ( size_t x = 0; x < (x_size); x++ )
    {
        for (size_t y = 0; y < y_size; y++)
        {
            clusters[x][y]->p_clk    (signal_clk);
            clusters[x][y]->p_resetn (signal_resetn);
        }
    }

   const int& NORTH = VirtualDspinRouter<dspin_int_cmd_width>::NORTH;
   const int& SOUTH = VirtualDspinRouter<dspin_int_cmd_width>::SOUTH;
   const int& EAST  = VirtualDspinRouter<dspin_int_cmd_width>::EAST;
   const int& WEST  = VirtualDspinRouter<dspin_int_cmd_width>::WEST;

   // Inter Clusters horizontal connections
   if (x_size > 1)
   {
      for (size_t x = 0; x < (x_size-1); x++)
      {
         for (size_t y = 0; y < y_size; y++)
         {
            for (size_t k = 0; k < 3; k++)
            {
               clusters[x][y]->p_dspin_int_cmd_out[EAST][k](
                     signal_dspin_int_cmd_h_inc[x][y][k]);
               clusters[x+1][y]->p_dspin_int_cmd_in[WEST][k](
                     signal_dspin_int_cmd_h_inc[x][y][k]);
               clusters[x][y]->p_dspin_int_cmd_in[EAST][k](
                     signal_dspin_int_cmd_h_dec[x][y][k]);
               clusters[x+1][y]->p_dspin_int_cmd_out[WEST][k](
                     signal_dspin_int_cmd_h_dec[x][y][k]);
            }

            for (size_t k = 0; k < 2; k++)
            {
               clusters[x][y]->p_dspin_int_rsp_out[EAST][k](
                     signal_dspin_int_rsp_h_inc[x][y][k]);
               clusters[x+1][y]->p_dspin_int_rsp_in[WEST][k](
                     signal_dspin_int_rsp_h_inc[x][y][k]);
               clusters[x][y]->p_dspin_int_rsp_in[EAST][k](
                     signal_dspin_int_rsp_h_dec[x][y][k]);
               clusters[x+1][y]->p_dspin_int_rsp_out[WEST][k](
                     signal_dspin_int_rsp_h_dec[x][y][k]);
            }

            clusters[x][y]->p_dspin_ram_cmd_out[EAST](
                  signal_dspin_ram_cmd_h_inc[x][y]);
            clusters[x+1][y]->p_dspin_ram_cmd_in[WEST](
                  signal_dspin_ram_cmd_h_inc[x][y]);
            clusters[x][y]->p_dspin_ram_cmd_in[EAST](
                  signal_dspin_ram_cmd_h_dec[x][y]);
            clusters[x+1][y]->p_dspin_ram_cmd_out[WEST](
                  signal_dspin_ram_cmd_h_dec[x][y]);
            clusters[x][y]->p_dspin_ram_rsp_out[EAST](
                  signal_dspin_ram_rsp_h_inc[x][y]);
            clusters[x+1][y]->p_dspin_ram_rsp_in[WEST](
                  signal_dspin_ram_rsp_h_inc[x][y]);
            clusters[x][y]->p_dspin_ram_rsp_in[EAST](
                  signal_dspin_ram_rsp_h_dec[x][y]);
            clusters[x+1][y]->p_dspin_ram_rsp_out[WEST](
                  signal_dspin_ram_rsp_h_dec[x][y]);
         }
      }
   }

   std::cout << std::endl << "Horizontal connections established"
             << std::endl;

   // Inter Clusters vertical connections
   if (y_size > 1)
   {
      for (size_t y = 0; y < (y_size-1); y++)
      {
         for (size_t x = 0; x < x_size; x++)
         {
            for (size_t k = 0; k < 3; k++)
            {
               clusters[x][y]->p_dspin_int_cmd_out[NORTH][k](
                     signal_dspin_int_cmd_v_inc[x][y][k]);
               clusters[x][y+1]->p_dspin_int_cmd_in[SOUTH][k](
                     signal_dspin_int_cmd_v_inc[x][y][k]);
               clusters[x][y]->p_dspin_int_cmd_in[NORTH][k](
                     signal_dspin_int_cmd_v_dec[x][y][k]);
               clusters[x][y+1]->p_dspin_int_cmd_out[SOUTH][k](
                     signal_dspin_int_cmd_v_dec[x][y][k]);
            }

            for (size_t k = 0; k < 2; k++)
            {
               clusters[x][y]->p_dspin_int_rsp_out[NORTH][k](
                     signal_dspin_int_rsp_v_inc[x][y][k]);
               clusters[x][y+1]->p_dspin_int_rsp_in[SOUTH][k](
                     signal_dspin_int_rsp_v_inc[x][y][k]);
               clusters[x][y]->p_dspin_int_rsp_in[NORTH][k](
                     signal_dspin_int_rsp_v_dec[x][y][k]);
               clusters[x][y+1]->p_dspin_int_rsp_out[SOUTH][k](
                     signal_dspin_int_rsp_v_dec[x][y][k]);
            }

            clusters[x][y]->p_dspin_ram_cmd_out[NORTH](
                  signal_dspin_ram_cmd_v_inc[x][y]);
            clusters[x][y+1]->p_dspin_ram_cmd_in[SOUTH](
                  signal_dspin_ram_cmd_v_inc[x][y]);
            clusters[x][y]->p_dspin_ram_cmd_in[NORTH](
                  signal_dspin_ram_cmd_v_dec[x][y]);
            clusters[x][y+1]->p_dspin_ram_cmd_out[SOUTH](
                  signal_dspin_ram_cmd_v_dec[x][y]);
            clusters[x][y]->p_dspin_ram_rsp_out[NORTH](
                  signal_dspin_ram_rsp_v_inc[x][y]);
            clusters[x][y+1]->p_dspin_ram_rsp_in[SOUTH](
                  signal_dspin_ram_rsp_v_inc[x][y]);
            clusters[x][y]->p_dspin_ram_rsp_in[NORTH](
                  signal_dspin_ram_rsp_v_dec[x][y]);
            clusters[x][y+1]->p_dspin_ram_rsp_out[SOUTH](
                  signal_dspin_ram_rsp_v_dec[x][y]);
         }
      }
   }

   std::cout << "Vertical connections established" << std::endl;

   // East & West boundary cluster connections
   for (size_t y = 0; y < y_size; y++)
   {
      for (size_t k = 0; k < 3; k++)
      {
         clusters[0][y]->p_dspin_int_cmd_in[WEST][k](
               signal_dspin_false_int_cmd_in[0][y][WEST][k]);
         clusters[0][y]->p_dspin_int_cmd_out[WEST][k](
               signal_dspin_false_int_cmd_out[0][y][WEST][k]);
         clusters[x_size-1][y]->p_dspin_int_cmd_in[EAST][k](
               signal_dspin_false_int_cmd_in[x_size-1][y][EAST][k]);
         clusters[x_size-1][y]->p_dspin_int_cmd_out[EAST][k](
               signal_dspin_false_int_cmd_out[x_size-1][y][EAST][k]);
      }

      for (size_t k = 0; k < 2; k++)
      {
         clusters[0][y]->p_dspin_int_rsp_in[WEST][k](
               signal_dspin_false_int_rsp_in[0][y][WEST][k]);
         clusters[0][y]->p_dspin_int_rsp_out[WEST][k](
               signal_dspin_false_int_rsp_out[0][y][WEST][k]);
         clusters[x_size-1][y]->p_dspin_int_rsp_in[EAST][k](
               signal_dspin_false_int_rsp_in[x_size-1][y][EAST][k]);
         clusters[x_size-1][y]->p_dspin_int_rsp_out[EAST][k](
               signal_dspin_false_int_rsp_out[x_size-1][y][EAST][k]);
      }

      // handling IOB to RAM network connection in cluster_iob0
      if( y == 0 )
      {
         (*clusters[0][0]->p_dspin_iob_cmd_out)(
               signal_dspin_cmd_iob0_loopback);
         clusters[0][0]->p_dspin_ram_cmd_in[WEST](
               signal_dspin_cmd_iob0_loopback);
         clusters[0][0]->p_dspin_ram_cmd_out[WEST](
               signal_dspin_false_ram_cmd_out[0][0][WEST]);
         clusters[0][0]->p_dspin_ram_rsp_in[WEST](
               signal_dspin_false_ram_rsp_in[0][0][WEST]);
         clusters[0][0]->p_dspin_ram_rsp_out[WEST](
               signal_dspin_rsp_iob0_loopback);
         (*clusters[0][0]->p_dspin_iob_rsp_in)(
               signal_dspin_rsp_iob0_loopback);
      }
      else
      {
         clusters[0][y]->p_dspin_ram_cmd_in[WEST](
               signal_dspin_false_ram_cmd_in[0][y][WEST]);
         clusters[0][y]->p_dspin_ram_cmd_out[WEST](
               signal_dspin_false_ram_cmd_out[0][y][WEST]);
         clusters[0][y]->p_dspin_ram_rsp_in[WEST](
               signal_dspin_false_ram_rsp_in[0][y][WEST]);
         clusters[0][y]->p_dspin_ram_rsp_out[WEST](
               signal_dspin_false_ram_rsp_out[0][y][WEST]);
      }

      // handling IOB to RAM network connection in cluster_iob1
      if( y == y_size-1 )
      {
         (*clusters[x_size-1][y_size-1]->p_dspin_iob_cmd_out)(
               signal_dspin_cmd_iob1_loopback);
         clusters[x_size-1][y_size-1]->p_dspin_ram_cmd_in[EAST](
               signal_dspin_cmd_iob1_loopback);

         clusters[x_size-1][y_size-1]->p_dspin_ram_cmd_out[EAST](
               signal_dspin_false_ram_cmd_out[x_size-1][y_size-1][EAST]);
         clusters[x_size-1][y_size-1]->p_dspin_ram_rsp_in[EAST](
               signal_dspin_false_ram_rsp_in[x_size-1][y_size-1][EAST]);

         clusters[x_size-1][y_size-1]->p_dspin_ram_rsp_out[EAST](
               signal_dspin_rsp_iob1_loopback);
         (*clusters[x_size-1][y_size-1]->p_dspin_iob_rsp_in)(
               signal_dspin_rsp_iob1_loopback);
      }
      else
      {
         clusters[x_size-1][y]->p_dspin_ram_cmd_in[EAST](
               signal_dspin_false_ram_cmd_in[x_size-1][y][EAST]);
         clusters[x_size-1][y]->p_dspin_ram_cmd_out[EAST](
               signal_dspin_false_ram_cmd_out[x_size-1][y][EAST]);
         clusters[x_size-1][y]->p_dspin_ram_rsp_in[EAST](
               signal_dspin_false_ram_rsp_in[x_size-1][y][EAST]);
         clusters[x_size-1][y]->p_dspin_ram_rsp_out[EAST](
               signal_dspin_false_ram_rsp_out[x_size-1][y][EAST]);
      }
   }

   std::cout << "East & West boundaries established" << std::endl;

   // North & South boundary clusters connections
   for (size_t x = 0; x < x_size; x++)
   {
      for (size_t k = 0; k < 3; k++)
      {
         clusters[x][0]->p_dspin_int_cmd_in[SOUTH][k](
               signal_dspin_false_int_cmd_in[x][0][SOUTH][k]);
         clusters[x][0]->p_dspin_int_cmd_out[SOUTH][k](
               signal_dspin_false_int_cmd_out[x][0][SOUTH][k]);
         clusters[x][y_size-1]->p_dspin_int_cmd_in[NORTH][k](
               signal_dspin_false_int_cmd_in[x][y_size-1][NORTH][k]);
         clusters[x][y_size-1]->p_dspin_int_cmd_out[NORTH][k](
               signal_dspin_false_int_cmd_out[x][y_size-1][NORTH][k]);
      }

      for (size_t k = 0; k < 2; k++)
      {
         clusters[x][0]->p_dspin_int_rsp_in[SOUTH][k](
               signal_dspin_false_int_rsp_in[x][0][SOUTH][k]);
         clusters[x][0]->p_dspin_int_rsp_out[SOUTH][k](
               signal_dspin_false_int_rsp_out[x][0][SOUTH][k]);
         clusters[x][y_size-1]->p_dspin_int_rsp_in[NORTH][k](
               signal_dspin_false_int_rsp_in[x][y_size-1][NORTH][k]);
         clusters[x][y_size-1]->p_dspin_int_rsp_out[NORTH][k](
               signal_dspin_false_int_rsp_out[x][y_size-1][NORTH][k]);
      }

      clusters[x][0]->p_dspin_ram_cmd_in[SOUTH](
            signal_dspin_false_ram_cmd_in[x][0][SOUTH]);
      clusters[x][0]->p_dspin_ram_cmd_out[SOUTH](
            signal_dspin_false_ram_cmd_out[x][0][SOUTH]);
      clusters[x][0]->p_dspin_ram_rsp_in[SOUTH](
            signal_dspin_false_ram_rsp_in[x][0][SOUTH]);
      clusters[x][0]->p_dspin_ram_rsp_out[SOUTH](
            signal_dspin_false_ram_rsp_out[x][0][SOUTH]);

      clusters[x][y_size-1]->p_dspin_ram_cmd_in[NORTH](
            signal_dspin_false_ram_cmd_in[x][y_size-1][NORTH]);
      clusters[x][y_size-1]->p_dspin_ram_cmd_out[NORTH](
            signal_dspin_false_ram_cmd_out[x][y_size-1][NORTH]);
      clusters[x][y_size-1]->p_dspin_ram_rsp_in[NORTH](
            signal_dspin_false_ram_rsp_in[x][y_size-1][NORTH]);
      clusters[x][y_size-1]->p_dspin_ram_rsp_out[NORTH](
            signal_dspin_false_ram_rsp_out[x][y_size-1][NORTH]);
   }

   std::cout << "North & South boundaries established" << std::endl
             << std::endl;

   ////////////////////////////////////////////////////////
   //   Simulation
   ///////////////////////////////////////////////////////

   sc_start(sc_core::sc_time(0, SC_NS));

   signal_resetn = false;

   signal_irq_false = false;

   // network boundaries signals
   for (size_t x = 0; x < x_size ; x++)
   {
      for (size_t y = 0; y < y_size ; y++)
      {
         for (size_t a = 0; a < 4; a++)
         {
            for (size_t k = 0; k < 3; k++)
            {
               signal_dspin_false_int_cmd_in[x][y][a][k].write  = false;
               signal_dspin_false_int_cmd_in[x][y][a][k].read   = true;
               signal_dspin_false_int_cmd_out[x][y][a][k].write = false;
               signal_dspin_false_int_cmd_out[x][y][a][k].read  = true;
            }

            for (size_t k = 0; k < 2; k++)
            {
               signal_dspin_false_int_rsp_in[x][y][a][k].write  = false;
               signal_dspin_false_int_rsp_in[x][y][a][k].read   = true;
               signal_dspin_false_int_rsp_out[x][y][a][k].write = false;
               signal_dspin_false_int_rsp_out[x][y][a][k].read  = true;
            }

            signal_dspin_false_ram_cmd_in[x][y][a].write  = false;
            signal_dspin_false_ram_cmd_in[x][y][a].read   = true;
            signal_dspin_false_ram_cmd_out[x][y][a].write = false;
            signal_dspin_false_ram_cmd_out[x][y][a].read  = true;

            signal_dspin_false_ram_rsp_in[x][y][a].write  = false;
            signal_dspin_false_ram_rsp_in[x][y][a].read   = true;
            signal_dspin_false_ram_rsp_out[x][y][a].write = false;
            signal_dspin_false_ram_rsp_out[x][y][a].read  = true;
         }
      }
   }

    sc_start(sc_core::sc_time(1, SC_NS));
    signal_resetn = true;

    for (size_t n = 1; n < ncycles; n++)
    {
        // Monitor a specific address for one L1 cache
        // clusters[1][1]->proc[0]->cache_monitor(0x50090ULL);

        // Monitor a specific address for one L2 cache
        // clusters[0][0]->memc->cache_monitor( 0x170000ULL);

        // Monitor a specific address for one XRAM
        // if (n == 3000000)
        //     clusters[0][0]->xram->start_monitor( 0x170000ULL , 64);

        if (debug_ok and (n > debug_from) and (n % debug_period == 0))
        {
            std::cout << " ***********************"
                      << " cycle " << std::dec << n
                      << " ***********************"
                      << std::endl;

            // trace proc[debug_proc_id]
            if ( debug_proc_id != 0xFFFFFFFF )
            {
                size_t l          = debug_proc_id % nb_procs ;
                size_t cluster_xy = debug_proc_id / nb_procs ;
                size_t x          = cluster_xy >> Y_WIDTH;
                size_t y          = cluster_xy  & ((1 << Y_WIDTH) - 1);

                clusters[x][y]->proc[l]->print_trace(1);

                std::ostringstream proc_signame;
                proc_signame << "[SIG]PROC_" << x << "_" << y << "_" << l ;
                clusters[x][y]->signal_int_vci_ini_proc[l].print_trace(
                      proc_signame.str());

                clusters[x][y]->xicu->print_trace(l);

                std::ostringstream xicu_signame;
                xicu_signame << "[SIG]XICU_" << x << "_" << y;
                clusters[x][y]->signal_int_vci_tgt_xicu.print_trace(
                      xicu_signame.str());

                if( clusters[x][y]->signal_proc_it[l].read() )
                    std::cout << "### IRQ_PROC_" << std::dec
                              << x << "_" << y << "_" << l
                              << " ACTIVE" << std::endl;
            }

            // trace INT network
//          clusters[0][0]->int_xbar_cmd_d->print_trace();
//          clusters[0][0]->int_xbar_rsp_d->print_trace();

//          clusters[0][0]->signal_int_dspin_cmd_l2g_d.print_trace(
//             "[SIG] INT_CMD_L2G_D_0_0");
//          clusters[0][0]->signal_int_dspin_rsp_g2l_d.print_trace(
//             "[SIG] INT_RSP_G2L_D_0_0");

//          clusters[0][0]->int_router_cmd->print_trace(0);
//          clusters[0][0]->int_router_rsp->print_trace(0);

            // trace INT_CMD_D xbar and router in cluster 0_1
//          clusters[0][1]->int_router_cmd->print_trace(0);
//          clusters[0][1]->int_router_rsp->print_trace(0);

//          clusters[0][1]->signal_int_dspin_cmd_g2l_d.print_trace(
//             "[SIG] INT_CMD_G2L_D_0_0");
//          clusters[0][1]->signal_int_dspin_rsp_l2g_d.print_trace(
//             "[SIG] INT_RSP_L2G_D_0_0");

//          clusters[0][1]->int_xbar_cmd_d->print_trace();
//          clusters[0][1]->int_xbar_rsp_d->print_trace();

            // trace memc[debug_memc_id]
            if ( debug_memc_id != 0xFFFFFFFF )
            {
                size_t x = debug_memc_id >> Y_WIDTH;
                size_t y = debug_memc_id  & ((1 << Y_WIDTH) - 1);

                clusters[x][y]->memc->print_trace(0);
                std::ostringstream smemc_tgt;
                smemc_tgt << "[SIG]MEMC_TGT_" << x << "_" << y;
                clusters[x][y]->signal_int_vci_tgt_memc.print_trace(
                      smemc_tgt.str());
                std::ostringstream smemc_ini;
                smemc_ini << "[SIG]MEMC_INI_" << x << "_" << y;
                clusters[x][y]->signal_ram_vci_ini_memc.print_trace(
                      smemc_ini.str());
                clusters[x][y]->xram->print_trace();
                std::ostringstream sxram_tgt;
                sxram_tgt << "[SIG]XRAM_TGT_" << x << "_" << y;
                clusters[x][y]->signal_ram_vci_tgt_xram.print_trace(
                      sxram_tgt.str());
            }

            // trace RAM network routers
//          for( size_t cluster = 0 ; cluster < XMAX*YMAX ; cluster++ )
//          {
//              size_t x = cluster / YMAX;
//              size_t y = cluster % YMAX;
//              clusters[x][y]->ram_router_cmd->print_trace();
//              clusters[x][y]->ram_router_rsp->print_trace();
//          }

            // trace iob, iox and external peripherals
            if ( debug_iob )
            {
                clusters[0][0]->iob->print_trace();
                clusters[0][0]->signal_int_vci_tgt_iobx.print_trace(
                      "[SIG]IOB0_INT_TGT");
                clusters[0][0]->signal_int_vci_ini_iobx.print_trace(
                      "[SIG]IOB0_INT_INI");
                clusters[0][0]->signal_ram_vci_ini_iobx.print_trace(
                      "[SIG]IOB0_RAM_INI");

                signal_vci_ini_iob0.print_trace("[SIG]IOB0_IOX_INI");
                signal_vci_tgt_iob0.print_trace("[SIG]IOB0_IOX_TGT");

//              signal_dspin_cmd_iob0_loopback.print_trace(
//                    "[SIG]IOB0_CMD_LOOPBACK");
//              signal_dspin_rsp_iob0_loopback.print_trace(
//                    "[SIG]IOB0_RSP_LOOPBACK");

                cdma->print_trace();
                signal_vci_tgt_cdma.print_trace("[SIG]IOX_CDMA_TGT");
                signal_vci_ini_cdma.print_trace("[SIG]IOX_CDMA_INI");

//              mtty->print_trace();
//              signal_vci_tgt_mtty.print_trace("[SIG]IOX_MTTY_TGT");

//              bdev->print_trace();
//              signal_vci_tgt_bdev.print_trace("[SIG]IOX_BDEV_TGT");
//              signal_vci_ini_bdev.print_trace("[SIG]IOX_BDEV_INI");

//              fbuf->print_trace();
//              signal_vci_tgt_fbuf.print_trace("[SIG]FBUF");

                iox_network->print_trace();

                // interrupts
                if (signal_irq_bdev) std::cout << "### IRQ_BDEV ACTIVATED"
                                               << std::endl;
            }
        }

        sc_start(sc_core::sc_time(1, SC_NS));
    }

   delete iox_network;
   delete mnic;
   delete fbuf;
   delete bdev;
   delete cdma;
   delete mtty;

   for(size_t x = 0; x < x_size; x++)
   {
      for(size_t y = 0; y < y_size; y++)
      {
         delete clusters[x][y];
      }
   }

   return EXIT_SUCCESS;
}

int sc_main (int argc, char *argv[])
{
   try {
      return _main(argc, argv);
   } catch (std::exception &e) {
      std::cout << e.what() << std::endl;
   } catch (...) {
      std::cout << "Unknown exception occured" << std::endl;
      throw;
   }
   return 1;
}


// Local Variables:
// tab-width: 3
// c-basic-offset: 3
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=3:tabstop=3:softtabstop=3

