/////////////////////////////////////////////////////////////////////////
// File: tsarv4_vgmn_generic_32_top.cpp
// Author: Alain Greiner
// Copyright: UPMC/LIP6
// Date : november 5 2010
// This program is released under the GNU public license
/////////////////////////////////////////////////////////////////////////
// This file define a generic TSAR architecture without virtual memory.
// - It uses the vci_vgmn as global interconnect 
// - It uses the vci_local_crossbar  as local interconnect 
// - It uses the vci_cc_xcache (No MMU) 
// The physical address space is 32 bits.
// The number of clusters cannot be larger than 256.
// The three parameters are 
// - xmax : number of clusters in a row
// - ymax : number of clusters in a column
// - nprocs : number of processor per cluster
//
// Each cluster contains nprocs processors, one Memory Cache,
// and one XICU component.
// The peripherals BDEV, CDMA, FBUF, MTTY and the boot BROM
// are in the cluster containing address 0xBFC00000.
// - The bdev_irq is connected to IRQ_IN[0]
// - The cdma_irq is connected to IRQ_IN[1]
// - The tty_irq[i] is connected to IRQ_IN[i+2]
// For all clusters, the XICU component contains nprocs timers.
// 
// As we target up to 256 clusters, each cluster can contain 
// at most 16 Mbytes (in a 4Gbytes address space).
// There is one MEMC segment and one XICU segment per cluster.
// - Each memory cache contains 8 Mbytes.
// - The Frame buffer contains 4 Mbytes.
// - The Boot ROM contains 1 Mbytes
//
// General policy for 32 bits address decoding:
// To simplifly, all segments base addresses are aligned
// on 64 Kbytes addresses. Therefore the 16 address MSB bits
// define the target in the direct address space.
// In these 16 bits, the (x_width + y_width) MSB bits define
// the cluster index, and the 8 LSB bits define the local index:
// 
//      | X_ID  | Y_ID  |---| L_ID |     OFFSET          |
//	|x_width|y_width|---|  8   |       16            |
/////////////////////////////////////////////////////////////////////////

#include <systemc>
#include <sys/time.h>
#include <iostream>
#include <sstream>
#include <cstdlib>
#include <cstdarg>
#include <stdint.h>

#include "mapping_table.h"
#include "tsarv4_cluster_xbar.h"
#include "mips32.h"
#include "vci_simple_ram.h"

#include "alloc_elems.h"
#include "config.h"

#if USE_OPENMP
#include <omp.h>
#endif

#if USE_VCI_PROFILER
#include "vci_profiler.h"
#endif

#if USE_GDBSERVER
#include "gdbserver.h"
#endif


#if USE_ALMOS
#define BOOT_INFO_BLOCK 0xbfc08000
#define KERNEL_BIN_IMG  0xbfc10000
#endif

//  cluster index (computed from x,y coordinates)
#define cluster(x,y)	(y + ymax*x)

#define _TO_STR(_str) #_str
#define  TO_STR(_str) _TO_STR(_str)

// flit widths for the DSPIN network
#define cmd_width	         40
#define rsp_width	         33

// VCI format
#define cell_width	         4
#define address_width	         32
#define plen_width	         8
#define error_width	         1
#define clen_width	         1
#define rflag_width	         1
#define srcid_width	         14
#define pktid_width	         4
#define trdid_width	         4
#define wrplen_width	         1


/////////////////////////////////
int _main(int argc, char *argv[])
{
    using namespace sc_core;
    using namespace soclib::caba;
    using namespace soclib::common;
    
    uint64_t       ms1, ms2;
    struct timeval t1, t2;
    
    char    soft_name[BDEV_NAME_LEN] = "to_be_defined";	// pathname to binary code
    char    disk_name[BDEV_NAME_LEN] = BDEV_IMAGE_NAME;   // pathname to the disk image
    size_t  ncycles        = 1000000000;       // simulated cycles
    size_t  xmax           = 2;		       // number of clusters in a row
    size_t  ymax           = 2;                // number of clusters in a column
    size_t  nprocs         = 1;		// number of processors per cluster
    size_t  xfb            = 512;	// frameBuffer column number
    size_t  yfb            = 512;       // frameBuffer lines number
    size_t  fb_mode        = 420;
    #define DEBUG_OK       no
    size_t  from_cycle     = 0;                // debug start cycle
    size_t  memc_size      = MEMC_SIZE;
    size_t  blk_size       = SECTOR_SIZE;
    size_t  l1_i_ways      = L1_IWAYS;
    size_t  l1_d_ways      = L1_DWAYS;
    size_t  l1_i_sets      = L1_ISETS;
    size_t  l1_d_sets      = L1_DSETS;
    size_t  memc_sets      = MEMC_SETS;
    size_t  memc_ways      = MEMC_WAYS;
    size_t  itlb_ways      = TLB_IWAYS;
    size_t  itlb_sets      = TLB_ISETS;
    size_t  dtlb_ways      = TLB_DWAYS;
    size_t  dtlb_sets      = TLB_DSETS;
    size_t  xram_latency   = CONFIG_XRAM_LATENCY;
    size_t  omp_threads    = 1;
    ////////////// command line arguments //////////////////////
    if (argc > 1)
    {
      for( int n=1 ; n<argc ; n=n+2 )
      {
	    if( (strcmp(argv[n],"-NCYCLES") == 0) && (n+1<argc) )
            {
                ncycles = atoi(argv[n+1]);
            }
            else if( (strcmp(argv[n],"-NPROCS") == 0) && (n+1<argc) )
            {
                nprocs = atoi(argv[n+1]);
                assert( ((nprocs == 1) || (nprocs == 2) || (nprocs == 4)) &&
                        "NPROCS must be equal to 1, 2, or 4");
            }
            else if( (strcmp(argv[n],"-THREADS") == 0) && (n+1<argc) )
            {
	        omp_threads = atoi(argv[n+1]);
            }
            else if( (strcmp(argv[n],"-XMAX") == 0) && (n+1<argc) )
            {
                xmax = atoi(argv[n+1]);
                assert( ((xmax == 1) || (xmax == 2) || (xmax == 4) || (xmax == 8) || (xmax == 16)) 
                         && "The XMAX parameter must be 2, 4, 8, or 16" );
            }
	    else if( (strcmp(argv[n],"-YMAX") == 0) && (n+1<argc) )
            {
                ymax = atoi(argv[n+1]);
                assert( ((ymax == 1) || (ymax == 2) || (ymax == 4) || (ymax == 8) || (ymax == 16)) 
                         && "The YMAX parameter must be 2, 4, 8, or 16" );
            }
	    else if( (strcmp(argv[n],"-XFB") == 0) && (n+1<argc) )
            {
	        xfb = atoi(argv[n+1]);
            }
	    else if( (strcmp(argv[n],"-YFB") == 0) && (n+1<argc) )
            {
                yfb = atoi(argv[n+1]);
            }
	    else if( (strcmp(argv[n], "-FBMODE") == 0) && (n+1 < argc))
	    {
	      fb_mode = atoi(argv[n+1]);
	    }
            else if( (strcmp(argv[n],"-SOFT") == 0) && (n+1<argc) )
            {
                strcpy(soft_name, argv[n+1]);
            }
            else if( (strcmp(argv[n],"-DISK") == 0) && (n+1<argc) )
            {
                strcpy(disk_name, argv[n+1]);
            }
	    else if( (strcmp(argv[n],"-BLKSZ") == 0) && (n+1<argc) )
            {
	      blk_size = atoi(argv[n+1]);
	      assert(((blk_size % 512) == 0) && "BDEV: Block size must be multiple of 512 bytes");
            }
            else if( (strcmp(argv[n],"-TRACE") == 0) && (n+1<argc) )
            {
                from_cycle = atoi(argv[n+1]);
            }
	    else if((strcmp(argv[n], "-MEMSZ") == 0) && (n+1 < argc))
	    {
	        memc_size = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-MCWAYS") == 0) && (n+1 < argc))
	    {
	        memc_ways = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-MCSETS") == 0) && (n+1 < argc))
	    {
	        memc_sets = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-L1_IWAYS") == 0) && (n+1 < argc))
	    {
	        l1_i_ways = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-L1_ISETS") == 0) && (n+1 < argc))
	    {
	        l1_i_sets = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-L1_DWAYS") == 0) && (n+1 < argc))
	    {
	        l1_d_ways = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-L1_DSETS") == 0) && (n+1 < argc))
	    {
	        l1_d_sets = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-ITLB_WAYS") == 0) && (n+1 < argc))
	    {
	        itlb_ways = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-ITLB_SETS") == 0) && (n+1 < argc))
	    {
	        itlb_sets = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-DTLB_WAYS") == 0) && (n+1 < argc))
	    {
	        dtlb_ways = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-DTLB_SETS") == 0) && (n+1 < argc))
	    {
	        dtlb_sets = atoi(argv[n+1]);
	    }
	    else if((strcmp(argv[n], "-XLATENCY") == 0) && (n+1 < argc))
	    {
	        xram_latency = atoi(argv[n+1]);
	    }
            else
            {
                std::cout << "   Arguments on the command line are (key,value) couples." << std::endl;
                std::cout << "   The order is not important." << std::endl;
                std::cout << "   Accepted arguments are :" << std::endl << std::endl;
                std::cout << "     -SOFT pathname_for_embedded_soft" << std::endl;
                std::cout << "     -DISK pathname_for_disk_image" << std::endl;
		std::cout << "     -BLKSZ sector size in bytes ( must be multiple of 512 bytes )" << std::endl;
                std::cout << "     -NCYCLES number_of_simulated_cycles" << std::endl;
                std::cout << "     -NPROCS number_of_processors_per_cluster" << std::endl;
                std::cout << "     -XMAX number_of_clusters_in_a_row" << std::endl;
                std::cout << "     -YMAX number_of_clusters_in_a_column" << std::endl;
                std::cout << "     -TRACE debug_start_cycle" << std::endl;
                std::cout << "     -MCWAYS memory_cache_number_of_ways" << std::endl;
                std::cout << "     -MCSETS memory_cache_number_of_sets" << std::endl;
		std::cout << "     -L1_IWAYS L1_instruction_cache_number_of_ways" << std::endl;
		std::cout << "     -L1_ISETS L1_instruction_cache_number_of_sets" << std::endl;
		std::cout << "     -L1_DWAYS L1_data_cache_number_of_ways" << std::endl;
		std::cout << "     -L1_DSETS L1_data_cache_number_of_sets" << std::endl;
                std::cout << "     -XLATENCY external_ram_latency_value" << std::endl;
                std::cout << "     -XFB fram_buffer_number_of_pixels" << std::endl;
                std::cout << "     -YFB fram_buffer_number_of_lines" << std::endl;
		std::cout << "     -FBMODE fram buffer subsampling integer value (YUV:420,YUV:422,RGB:0,RGB:16,RGB:32,RGBPAL:256)" << std::endl;
		std::cout << "     -MEMSZ per-cluster memory size ( <= 12 MB )" << std::endl;
                std::cout << "     -THREADS number_of_cores_for_OpenMP" << std::endl;
                exit(0);
            }
        }
    }

    
    std::cout << "Simulation Parameters:" << std::endl;
    std::cout << "          OMP_THREADS_NR        = " << omp_threads << std::endl;
    std::cout << "          XFB                   = " << xfb << std::endl;
    std::cout << "          YFB                   = " << yfb << std::endl;
    std::cout << "          FB_MODE               = " << fb_mode << std::endl;
    std::cout << "          SECTOR_SIZE  (Bytes)  = " << blk_size << std::endl;
    std::cout << "          RAM/CLSTR    (Bytes)  = " << memc_size << std::endl;
    std::cout << "          XRAM_LATENCY (cycles) = " << xram_latency << std::endl;
    std::cout << "          MEMC_UPD_TBL          = " << CONFIG_MEMC_UPDATE_TAB_LINES << std::endl;
    std::cout << "          MEMC_TRN_TLB          = " << CONFIG_MEMC_TRANSACTION_TAB_LINES <<std::endl;
    std::cout << "          MEMC_WAYS             = " << memc_ways << std::endl;
    std::cout << "          MEMC_SETS             = " << memc_sets << std::endl;
    std::cout << "          L1_IWAYS              = " << l1_i_ways << std::endl;
    std::cout << "          L1_ISETS              = " << l1_i_sets << std::endl;
    std::cout << "          L1_DWAYS              = " << l1_d_ways << std::endl;
    std::cout << "          L1_DSETS              = " << l1_d_sets << std::endl;
    std::cout << "          ITLB_WAYS             = " << itlb_ways << std::endl;
    std::cout << "          ITLB_SETS             = " << itlb_sets << std::endl;
    std::cout << "          DTLB_WAYS             = " << dtlb_ways << std::endl;
    std::cout << "          DTLB_SETS             = " << dtlb_sets << std::endl;
    
#define CONFIG_MEMC_TRANSACTION_TAB_LINES     8

#define MEMC_WAYS                             16
#define MEMC_SETS                             256

#define L1_IWAYS                              4
#define L1_ISETS                              64

#define L1_DWAYS                              4
#define L1_DSETS                              64

#define TLB_IWAYS                             4
#define TLB_ISETS                             16

#define TLB_DWAYS                             4
#define TLB_DSETS                             16

#define CONFIG_XRAM_LATENCY                   0



#if USE_OPENMP
        omp_set_dynamic(false);
        omp_set_num_threads(omp_threads);
        std::cerr << "Built with openmp version " << _OPENMP << std::endl;
#endif


    // Define VCI parameters
    typedef soclib::caba::VciParams<cell_width,
                                    plen_width,
                                    address_width,
                                    error_width,                                   
                                    clen_width,
                                    rflag_width,
                                    srcid_width,
                                    pktid_width,
                                    trdid_width,
                                    wrplen_width> vci_param;

    size_t	cluster_io_index;
    size_t	x_width;
    size_t	y_width;

    if      (xmax == 2) x_width = 1;
    else if (xmax <= 4) x_width = 2;
    else if (xmax <= 8) x_width = 3;
    else                x_width = 4;

    if      (ymax == 2) y_width = 1;
    else if (ymax <= 4) y_width = 2;
    else if (ymax <= 8) y_width = 3;
    else                y_width = 4;

    cluster_io_index = 0xBF >> (8 - x_width - y_width);
    
    /////////////////////
    //  Mapping Tables
    /////////////////////

    // direct network
    MappingTable maptabd(address_width, 
                         IntTab(x_width + y_width, 16 - x_width - y_width), 
                         IntTab(x_width + y_width, srcid_width - x_width - y_width), 
                         0x00F00000);

    for ( size_t x = 0 ; x < xmax ; x++)
    {
        for ( size_t y = 0 ; y < ymax ; y++)
        {
            sc_uint<address_width> offset  = cluster(x,y) << (address_width-x_width-y_width);
            std::ostringstream 	sm;
            sm << "d_seg_memc_" << x << "_" << y;
            maptabd.add(Segment(sm.str(), MEMC_BASE+offset, memc_size, IntTab(cluster(x,y),MEMC_TGTID), true));
            std::ostringstream 	si;
            si << "d_seg_xicu_" << x << "_" << y;
            maptabd.add(Segment(si.str(), XICU_BASE+offset, XICU_SIZE, IntTab(cluster(x,y),XICU_TGTID), false));
            if ( cluster(x,y) == cluster_io_index )
            {
	      maptabd.add(Segment("d_seg_fbuf", FBUF_BASE, FBUF_SIZE, IntTab(cluster(x,y),FBUF_TGTID), false));
	      maptabd.add(Segment("d_seg_bdev", BDEV_BASE, BDEV_SIZE, IntTab(cluster(x,y),BDEV_TGTID), false));
	      maptabd.add(Segment("d_seg_mtty", MTTY_BASE, MTTY_SIZE, IntTab(cluster(x,y),MTTY_TGTID), false));
	      maptabd.add(Segment("d_seg_brom", BROM_BASE, BROM_SIZE, IntTab(cluster(x,y),BROM_TGTID), true));
	      maptabd.add(Segment("d_seg_cdma", CDMA_BASE, CDMA_SIZE, IntTab(cluster(x,y),CDMA_TGTID), false));
            }
        }
    }
    std::cout << maptabd << std::endl;

    // coherence network
    MappingTable maptabc(address_width, 
                         IntTab(x_width + y_width, 12 - x_width - y_width), 
                         IntTab(x_width + y_width, srcid_width - x_width - y_width), 
                         0xF0000000);

    for ( size_t x = 0 ; x < xmax ; x++)
    {
        for ( size_t y = 0 ; y < ymax ; y++)
        {
            sc_uint<address_width> offset  = cluster(x,y) << (address_width-x_width-y_width);
            
            std::ostringstream sm;
            sm << "c_seg_memc_" << x << "_" << y;
            maptabc.add(Segment(sm.str(), MEMC_BASE+offset, memc_size, IntTab(cluster(x,y), nprocs), false));
            // the segment base and size will be modified 
            // when the segmentation of the coherence space will be simplified

            if ( cluster(x,y) == cluster_io_index )
            {
                std::ostringstream sr;
                sr << "c_seg_brom_" << x << "_" << y;
                maptabc.add(Segment(sr.str(), BROM_BASE, BROM_SIZE, IntTab(cluster(x,y), nprocs), false));
            }

	    sc_uint<address_width> avoid_collision  = 0;
	    for ( size_t p = 0 ; p < nprocs ; p++)
            {
	        sc_uint<address_width> base = memc_size + (p*0x100000) + offset;
                // the following test is to avoid a collision between the c_seg_brom segment
                // and a c_seg_proc segment (all segments base addresses being multiple of 1Mbytes)
                if ( base == BROM_BASE ) avoid_collision = 0x100000;
	        std::ostringstream sp;
	        sp << "c_seg_proc_" << x << "_" << y << "_" << p;
	        maptabc.add(Segment(sp.str(), base + avoid_collision, 0x20, IntTab(cluster(x,y), p), false, 
				  true, IntTab(cluster(x,y), p))); 
                // the two last arguments will be removed 
                // when the segmentation of the coherence space will be simplified
            }
        }
    }
    std::cout << maptabc << std::endl;

    // external network
    MappingTable maptabx(address_width, IntTab(1), IntTab(10), 0xF0000000);

    for ( size_t x = 0 ; x < xmax ; x++)
    {
        for ( size_t y = 0 ; y < ymax ; y++)
        { 
	  sc_uint<address_width> offset  = cluster(x,y) << (address_width-x_width-y_width);
	  std::ostringstream sx;
	  
	  sx << "seg_xram_" << x << "_" << y;
	  maptabx.add(Segment(sx.str(), MEMC_BASE + offset, memc_size, IntTab(cluster(x,y)), false));
        }
    }
    std::cout << maptabx << std::endl;

    ////////////////////
    // Signals
    ///////////////////

    sc_clock		signal_clk("clk");
    sc_signal<bool> 	signal_resetn("resetn");
    sc_signal<bool>	signal_false;
   

    // Horizontal inter-clusters DSPIN signals
    DspinSignals<cmd_width>*** signal_dspin_h_cmd_inc =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_h_cmd_inc", xmax-1, ymax, 2);
    DspinSignals<cmd_width>*** signal_dspin_h_cmd_dec =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_h_cmd_dec", xmax-1, ymax, 2);
    DspinSignals<rsp_width>*** signal_dspin_h_rsp_inc =
      alloc_elems<DspinSignals<rsp_width> >("signal_dspin_h_rsp_inc", xmax-1, ymax, 2);
    DspinSignals<rsp_width>*** signal_dspin_h_rsp_dec =
      alloc_elems<DspinSignals<rsp_width> >("signal_dspin_h_rsp_dec", xmax-1, ymax, 2);

    // Vertical inter-clusters DSPIN signals
    DspinSignals<cmd_width>*** signal_dspin_v_cmd_inc =
      alloc_elems<DspinSignals<cmd_width> >("signal_dspin_v_cmd_inc", xmax, ymax-1, 2);
    DspinSignals<cmd_width>*** signal_dspin_v_cmd_dec =
        alloc_elems<DspinSignals<cmd_width> >("signal_dspin_v_cmd_dec", xmax, ymax-1, 2);
    DspinSignals<rsp_width>*** signal_dspin_v_rsp_inc =
        alloc_elems<DspinSignals<rsp_width> >("signal_dspin_v_rsp_inc", xmax, ymax-1, 2);
    DspinSignals<rsp_width>*** signal_dspin_v_rsp_dec =
        alloc_elems<DspinSignals<rsp_width> >("signal_dspin_v_rsp_dec", xmax, ymax-1, 2);

    // Mesh boundaries DSPIN signals
    DspinSignals<cmd_width>**** signal_dspin_false_cmd_in =
        alloc_elems<DspinSignals<cmd_width> >("signal_dspin_false_cmd_in", xmax,  ymax, 2, 4);
    DspinSignals<cmd_width>**** signal_dspin_false_cmd_out =
        alloc_elems<DspinSignals<cmd_width> >("signal_dspin_false_cmd_out", xmax, ymax, 2, 4);
    DspinSignals<rsp_width>**** signal_dspin_false_rsp_in =
        alloc_elems<DspinSignals<rsp_width> >("signal_dspin_false_rsp_in", xmax, ymax, 2, 4);
    DspinSignals<rsp_width>**** signal_dspin_false_rsp_out =
        alloc_elems<DspinSignals<rsp_width> >("signal_dspin_false_rsp_out", xmax, ymax, 2, 4);

    // Xternal network VCI signals
    VciSignals<vci_param> signal_vci_tgt_x_xram("signal_vci_tgt_x_xram");

    ////////////////////////////
    //      Components
    ////////////////////////////

#if USE_ALMOS
    soclib::common::Loader loader("bootloader.bin",
				  "arch-info.bin@"TO_STR(BOOT_INFO_BLOCK)":D",
				  "kernel-soclib.bin@"TO_STR(KERNEL_BIN_IMG)":D");
#else
    soclib::common::Loader loader(soft_name);
#endif

#if USE_GDBSERVER
    typedef soclib::common::GdbServer<soclib::common::Mips32ElIss> proc_iss;
    proc_iss::set_loader(loader);
#else
    typedef soclib::common::Mips32ElIss proc_iss;
#endif


    TsarV4ClusterXbar<vci_param, proc_iss, cmd_width, rsp_width>* clusters[xmax][ymax];

#if USE_OPENMP

#pragma omp parallel
    {
#pragma omp for
      for( int i = 0 ; i  < (xmax * ymax); i++)
      {
	size_t x = i / ymax;
	size_t y = i % ymax;
#pragma omp critical
	{
	  std::ostringstream sc;
	  sc << "cluster_" << x << "_" << y;
	  clusters[x][y] = new TsarV4ClusterXbar<vci_param, proc_iss, cmd_width, rsp_width>
	    (sc.str().c_str(),
	   nprocs,
	     x,
	     y,
	     cluster(x,y),
	     maptabd,
	     maptabc,
	     maptabx,
	     x_width,
	     y_width,
	     MEMC_TGTID,
	     XICU_TGTID,
	     FBUF_TGTID,
	     MTTY_TGTID,
	     BROM_TGTID,
	     BDEV_TGTID,
	     CDMA_TGTID,
	     memc_ways,
	     memc_sets,
	     l1_i_ways,
	     l1_i_sets,
	     l1_d_ways,
	     l1_d_sets,
	     xram_latency,
	     (cluster(x,y) == cluster_io_index),
	     xfb,
	     yfb,
	     fb_mode,
	     disk_name,
	     blk_size,
	     loader);
	}
      }
    }

#else  // USE_OPENMP
    for( size_t x = 0 ; x  < xmax ; x++)
    {
      for( size_t y = 0 ; y < ymax ; y++ )
      {
	std::ostringstream sc;
	sc << "cluster_" << x << "_" << y;
	
	clusters[x][y] = new TsarV4ClusterXbar<vci_param, proc_iss, cmd_width, rsp_width>
	  (sc.str().c_str(),
	   nprocs,
	   x,
	   y,
	   cluster(x,y),
	   maptabd,
	   maptabc,
	   maptabx,
	   x_width,
	   y_width,
	   MEMC_TGTID,
	   XICU_TGTID,
	   FBUF_TGTID,
	   MTTY_TGTID,
	   BROM_TGTID,
	   BDEV_TGTID,
	   CDMA_TGTID,
	   memc_ways,
	   memc_sets,
	   l1_i_ways,
	   l1_i_sets,
	   l1_d_ways,
	   l1_d_sets,
	   xram_latency,
	   (cluster(x,y) == cluster_io_index),
	   xfb,
	   yfb,
	   fb_mode,
	   disk_name,
	   blk_size,
	   loader);
	}
      }
#endif	// USE_OPENMP

    ///////////////////////////////////////////////////////////////
    //     Net-list 
    ///////////////////////////////////////////////////////////////

    // Clock & RESET
    for ( size_t x = 0 ; x < (xmax) ; x++ )
    {
        for ( size_t y = 0 ; y < ymax ; y++ )
        {
            clusters[x][y]->p_clk			(signal_clk);
            clusters[x][y]->p_resetn			(signal_resetn);
        }
    }

    // Inter Clusters horizontal connections
    if ( xmax > 1 )
    {
        for ( size_t x = 0 ; x < (xmax-1) ; x++ )
        {
            for ( size_t y = 0 ; y < ymax ; y++ )
            {
                for ( size_t k = 0 ; k < 2 ; k++ )
                {
		clusters[x][y]->p_cmd_out[k][EAST]      (signal_dspin_h_cmd_inc[x][y][k]);
                clusters[x+1][y]->p_cmd_in[k][WEST]     (signal_dspin_h_cmd_inc[x][y][k]);
                clusters[x][y]->p_cmd_in[k][EAST]       (signal_dspin_h_cmd_dec[x][y][k]);
                clusters[x+1][y]->p_cmd_out[k][WEST]    (signal_dspin_h_cmd_dec[x][y][k]);
                clusters[x][y]->p_rsp_out[k][EAST]      (signal_dspin_h_rsp_inc[x][y][k]);
                clusters[x+1][y]->p_rsp_in[k][WEST]     (signal_dspin_h_rsp_inc[x][y][k]);
                clusters[x][y]->p_rsp_in[k][EAST]       (signal_dspin_h_rsp_dec[x][y][k]);
                clusters[x+1][y]->p_rsp_out[k][WEST]    (signal_dspin_h_rsp_dec[x][y][k]);
                }
            }
        }
    }
    std::cout << "Horizontal connections established" << std::endl;	

    // Inter Clusters vertical connections
    if ( ymax > 1 )
    {
        for ( size_t y = 0 ; y < (ymax-1) ; y++ )
        {
            for ( size_t x = 0 ; x < xmax ; x++ )
            {
                for ( size_t k = 0 ; k < 2 ; k++ )
                {
                clusters[x][y]->p_cmd_out[k][NORTH]     (signal_dspin_v_cmd_inc[x][y][k]);
                clusters[x][y+1]->p_cmd_in[k][SOUTH]    (signal_dspin_v_cmd_inc[x][y][k]);
                clusters[x][y]->p_cmd_in[k][NORTH]      (signal_dspin_v_cmd_dec[x][y][k]);
                clusters[x][y+1]->p_cmd_out[k][SOUTH]   (signal_dspin_v_cmd_dec[x][y][k]);
                clusters[x][y]->p_rsp_out[k][NORTH]     (signal_dspin_v_rsp_inc[x][y][k]);
                clusters[x][y+1]->p_rsp_in[k][SOUTH]    (signal_dspin_v_rsp_inc[x][y][k]);
                clusters[x][y]->p_rsp_in[k][NORTH]      (signal_dspin_v_rsp_dec[x][y][k]);
                clusters[x][y+1]->p_rsp_out[k][SOUTH]   (signal_dspin_v_rsp_dec[x][y][k]);
                }
            }
        }
    }

   std::cout << "Vertical connections established" << std::endl;

    // East & West boundary cluster connections
    for ( size_t y = 0 ; y < ymax ; y++ )
    {
        for ( size_t k = 0 ; k < 2 ; k++ )
        {
	    clusters[0][y]->p_cmd_in[k][WEST]       	(signal_dspin_false_cmd_in[0][y][k][WEST]);
	    clusters[0][y]->p_cmd_out[k][WEST]      	(signal_dspin_false_cmd_out[0][y][k][WEST]);
	    clusters[0][y]->p_rsp_in[k][WEST]       	(signal_dspin_false_rsp_in[0][y][k][WEST]);
	    clusters[0][y]->p_rsp_out[k][WEST]      	(signal_dspin_false_rsp_out[0][y][k][WEST]);
	  
	    clusters[xmax-1][y]->p_cmd_in[k][EAST]  	(signal_dspin_false_cmd_in[xmax-1][y][k][EAST]);
	    clusters[xmax-1][y]->p_cmd_out[k][EAST] 	(signal_dspin_false_cmd_out[xmax-1][y][k][EAST]);
	    clusters[xmax-1][y]->p_rsp_in[k][EAST]  	(signal_dspin_false_rsp_in[xmax-1][y][k][EAST]);
	    clusters[xmax-1][y]->p_rsp_out[k][EAST] 	(signal_dspin_false_rsp_out[xmax-1][y][k][EAST]);
	}
    }
    
    // North & South boundary clusters connections
    for ( size_t x = 0 ; x < xmax ; x++ )
    {
        for ( size_t k = 0 ; k < 2 ; k++ )
        {
	    clusters[x][0]->p_cmd_in[k][SOUTH]      	(signal_dspin_false_cmd_in[x][0][k][SOUTH]);
	    clusters[x][0]->p_cmd_out[k][SOUTH]     	(signal_dspin_false_cmd_out[x][0][k][SOUTH]);
	    clusters[x][0]->p_rsp_in[k][SOUTH]      	(signal_dspin_false_rsp_in[x][0][k][SOUTH]);
	    clusters[x][0]->p_rsp_out[k][SOUTH]     	(signal_dspin_false_rsp_out[x][0][k][SOUTH]);
	    
	    clusters[x][ymax-1]->p_cmd_in[k][NORTH] 	(signal_dspin_false_cmd_in[x][ymax-1][k][NORTH]);
	    clusters[x][ymax-1]->p_cmd_out[k][NORTH]	(signal_dspin_false_cmd_out[x][ymax-1][k][NORTH]);
	    clusters[x][ymax-1]->p_rsp_in[k][NORTH] 	(signal_dspin_false_rsp_in[x][ymax-1][k][NORTH]);
	    clusters[x][ymax-1]->p_rsp_out[k][NORTH]	(signal_dspin_false_rsp_out[x][ymax-1][k][NORTH]);
	}
    }
      
    ////////////////////////////////////////////////////////
    //   Simulation
    ///////////////////////////////////////////////////////

    sc_start(sc_core::sc_time(0, SC_NS));
    signal_resetn = false;

    // network boundaries signals
    for(size_t x=0; x<xmax ; x++)
    {
        for(size_t y=0 ; y<ymax ; y++)
        {
            for (size_t k=0; k<2; k++)
            {
                for(size_t a=0; a<4; a++)
                {
		        signal_dspin_false_cmd_in[x][y][k][a].write = false;
		        signal_dspin_false_cmd_in[x][y][k][a].read = true;
                        signal_dspin_false_cmd_out[x][y][k][a].write = false;
                        signal_dspin_false_cmd_out[x][y][k][a].read = true;

                        signal_dspin_false_rsp_in[x][y][k][a].write = false;
                        signal_dspin_false_rsp_in[x][y][k][a].read = true;
                        signal_dspin_false_rsp_out[x][y][k][a].write = false;
                        signal_dspin_false_rsp_out[x][y][k][a].read = true;
		}
            }
        }
    }

    sc_start(sc_core::sc_time(1, SC_NS));
    signal_resetn = true;

    if (gettimeofday(&t1, NULL) != 0) 
    {
      perror("gettimeofday");
      return EXIT_FAILURE;
    }

    while(1)
    {
      sc_start(sc_core::sc_time(100000000, SC_NS));

      if (gettimeofday(&t2, NULL) != 0) 
      {
	perror("gettimeofday");
	return EXIT_FAILURE;
      }
	    
      ms1 = (uint64_t)t1.tv_sec * 1000ULL + (uint64_t)t1.tv_usec / 1000;
      ms2 = (uint64_t)t2.tv_sec * 1000ULL + (uint64_t)t2.tv_usec / 1000;
      
      std::cerr << "platform clock frequency " << (double)100000000ULL / (double)(ms2 - ms1) << "Khz" << std::endl;
	   
      if (gettimeofday(&t1, NULL) != 0) 
      {
	perror("gettimeofday");
	return EXIT_FAILURE;
      }
    }

    return EXIT_SUCCESS;
}

int sc_main (int argc, char *argv[])
{
	try {
		return _main(argc, argv);
	} catch (std::exception &e) {
		std::cout << e.what() << std::endl;
	} catch (...) {
		std::cout << "Unknown exception occured" << std::endl;
		throw;
	}
	return 1;
}
