///////////////////////////////////////////////////////////////////////////////
// File: tsar_super_cluster.cpp
// Author: Manuel Bouyer
// Copyright: UPMC/LIP6
// Date : march 2016
// This program is released under the GNU public license
//////////////////////////////////////////////////////////////////////////////
// This file define a TSAR super-cluster architecture with virtual memory.
// It uses a 2d-mesh of tsar_cluster_3d clusters, interconected by they
// X and Y ports. The Z ports are used to connect to other super-cluster
// or to a 3rd level mesh.
//////////////////////////////////////////////////////////////////////////////////

#include <systemc>
#include <sys/time.h>
#include <iostream>
#include <sstream>
#include <cstdlib>
#include <cstdarg>
#include <stdint.h>
#include "tsar_super_cluster.h"

#define _NORTH                 0
#define _SOUTH                 1
#define _EAST                  2
#define _WEST                  3
#define _UP                    4
#define _DOWN                  5

#define cluster(x,y,z)   ((x << (m_y_width + m_z_width)) + (y << m_z_width) + z)

namespace soclib {
namespace caba  {

////////////////////////////////////////////////////////////////////////////////////
template<size_t dspin_cmd_width,
         size_t dspin_rsp_width,
         typename vci_param_int,
         typename vci_param_ext> TsarSuperCluster<dspin_cmd_width,
                                                 dspin_rsp_width,
                                                 vci_param_int,
                                                 vci_param_ext>::TsarSuperCluster(
////////////////////////////////////////////////////////////////////////////////////
         sc_module_name                     insname,
         size_t                             nb_procs,
         size_t                             nb_ttys,
         size_t                             nb_dmas,
         size_t                             x_size,
         size_t                             y_size,
         size_t                             z_id,
	 size_t				    elevator_x,
	 size_t				    elevator_y,
         const soclib::common::MappingTable &mtd,
         const soclib::common::MappingTable &mtx,
         size_t                             x_width,
         size_t                             y_width,
         size_t                             z_width,
         size_t                             p_width,
         size_t                             vci_srcid_width,
         size_t                             tgtid_memc,
         size_t                             tgtid_xicu,
         size_t                             tgtid_mdma,
         size_t                             tgtid_fbuf,
         size_t                             tgtid_mtty,
         size_t                             tgtid_brom,
         size_t                             tgtid_mnic,
         size_t                             tgtid_chbuf,
         size_t                             tgtid_bdev,
         size_t                             tgtid_simh,
         size_t                             memc_ways,
         size_t                             memc_sets,
         size_t                             l1_i_ways,
         size_t                             l1_i_sets,
         size_t                             l1_d_ways,
         size_t                             l1_d_sets,
         size_t                             irq_per_processor,
         size_t                             xram_latency,
         size_t				    x_io,
	 size_t				    y_io,
	 size_t				    z_io,
         size_t                             xfb,
         size_t                             yfb,
         char*                              disk_name,
         size_t                             block_size,
         size_t                             nic_channels,
         char*                              nic_rx_name,
         char*                              nic_tx_name,
         uint32_t                           nic_timeout,
         size_t                             chbufdma_channels,
         const Loader &                     loader,
         uint32_t                           frozen_cycles,
         uint32_t                           debug_start_cycle,
         bool                               memc_debug_ok,
         bool                               proc_debug_ok)
            : soclib::caba::BaseModule(insname),
            p_clk("clk"),
            p_resetn("resetn"),
	    m_x_size(x_size),
	    m_y_size(y_size),
	    m_z_id(z_id),
	    m_x_width(x_width),
	    m_y_width(y_width),
	    m_z_width(z_width) {

    assert(x_size <= 32 && "x_size should be less than 32");
    assert(y_size <= 32 && "y_size should be less than 32");

    /////////////////////////////////////////////////////////////////////////////
    // Vectors of ports definition and allocation
    /////////////////////////////////////////////////////////////////////////////

    p_cmd_in  = alloc_elems<DspinInput<dspin_cmd_width> >  ("p_cmd_in",  2);
    p_cmd_out = alloc_elems<DspinOutput<dspin_cmd_width> > ("p_cmd_out", 2);

    p_rsp_in  = alloc_elems<DspinInput<dspin_rsp_width> >  ("p_rsp_in",  2);
    p_rsp_out = alloc_elems<DspinOutput<dspin_rsp_width> > ("p_rsp_out", 2);

    p_m2p_in  = alloc_elems<DspinInput<dspin_cmd_width> >  ("p_m2p_in",  2);
    p_m2p_out = alloc_elems<DspinOutput<dspin_cmd_width> > ("p_m2p_out", 2);

    p_p2m_in  = alloc_elems<DspinInput<dspin_rsp_width> >  ("p_p2m_in",  2);
    p_p2m_out = alloc_elems<DspinOutput<dspin_rsp_width> > ("p_p2m_out", 2);

    p_cla_in  = alloc_elems<DspinInput<dspin_cmd_width> >  ("p_cla_in",  2);
    p_cla_out = alloc_elems<DspinOutput<dspin_cmd_width> > ("p_cla_out", 2);

    /////////////////////////////////////////////////////////////////////////////
    // Allocate array of clusters
    /////////////////////////////////////////////////////////////////////////////
#ifdef _OPENMP
#pragma omp parallel
    {
#pragma omp for
#endif
    for (size_t i = 0; i  < (x_size * y_size); i++) {
	size_t x = i / y_size;
	size_t y = i % y_size;
#ifdef _OPENMP
#pragma omp critical
        {
#endif
	std::ostringstream sc;
	sc << "cluster_" << x << "_" << y << "_" << z_id;
	std::cout << sc << std::endl;
	clusters[x][y] = new TsarXbarCluster<dspin_cmd_width,
					     dspin_rsp_width,
					     vci_param_int,
					     vci_param_ext>
       (
                sc.str().c_str(),
                nb_procs,
                nb_ttys,
                nb_dmas,
                x,
                y,
                z_id,
                cluster(x,y, z_id),
                elevator_x,
                elevator_y,
                mtd,
                mtx,
                x_width,
                y_width,
                z_width,
                vci_srcid_width - x_width - y_width - z_width,   // l_id width,
		p_width,
		tgtid_memc,
		tgtid_xicu,
		tgtid_mdma,
		tgtid_fbuf,
		tgtid_mtty,
		tgtid_brom,
		tgtid_mnic,
		tgtid_chbuf,
		tgtid_bdev,
		tgtid_simh,
		memc_ways,
		memc_sets,
		l1_i_ways,
		l1_i_sets,
		l1_d_ways,
		l1_d_sets,
		irq_per_processor,
		xram_latency,
		x == x_io && y == y_io && z_id == z_io,
		xfb,
		yfb,
		disk_name,
		block_size,
		nic_channels,
		nic_rx_name,
		nic_tx_name,
		nic_timeout,
		chbufdma_channels,
		loader,
		frozen_cycles,
		debug_start_cycle,
		memc_debug_ok,
		proc_debug_ok);
#ifdef _OPENMP
        } // critical
#endif
    }
#ifdef _OPENMP
    } // for
#endif
    
    signal_dspin_h_cmd_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_h_cmd_inc", x_size + 1, y_size);
    signal_dspin_h_cmd_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_h_cmd_dec", x_size + 1, y_size);
    signal_dspin_h_rsp_inc =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_h_rsp_inc", x_size + 1, y_size);
    signal_dspin_h_rsp_dec =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_h_rsp_dec", x_size + 1, y_size);
    signal_dspin_h_m2p_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_h_m2p_inc", x_size+ 1 , y_size);
    signal_dspin_h_m2p_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_h_m2p_dec", x_size + 1, y_size);
    signal_dspin_h_p2m_inc =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_h_p2m_inc", x_size + 1, y_size);
    signal_dspin_h_p2m_dec =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_h_p2m_dec", x_size + 1, y_size);
    signal_dspin_h_cla_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_h_cla_inc", x_size + 1, y_size);
    signal_dspin_h_cla_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_h_cla_dec", x_size + 1, y_size);

    signal_dspin_v_cmd_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_v_cmd_inc", x_size, y_size + 1);
    signal_dspin_v_cmd_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_v_cmd_dec", x_size, y_size + 1);
    signal_dspin_v_rsp_inc =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_v_rsp_inc", x_size, y_size + 1);
    signal_dspin_v_rsp_dec =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_v_rsp_dec", x_size, y_size + 1);
    signal_dspin_v_m2p_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_v_m2p_inc", x_size, y_size + 1);
    signal_dspin_v_m2p_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_v_m2p_dec", x_size, y_size + 1);
    signal_dspin_v_p2m_inc =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_v_p2m_inc", x_size, y_size + 1);
    signal_dspin_v_p2m_dec =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_v_p2m_dec", x_size, y_size + 1);
    signal_dspin_v_cla_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_v_cla_inc", x_size, y_size + 1);
    signal_dspin_v_cla_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_v_cla_dec", x_size, y_size + 1);

    signal_dspin_zu_cmd_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zu_cmd_inc", x_size, y_size);
    signal_dspin_zu_cmd_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zu_cmd_dec", x_size, y_size);
    signal_dspin_zu_rsp_inc =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_zu_rsp_inc", x_size, y_size);
    signal_dspin_zu_rsp_dec =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_zu_rsp_dec", x_size, y_size);
    signal_dspin_zu_m2p_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zu_m2p_inc", x_size, y_size);
    signal_dspin_zu_m2p_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zu_m2p_dec", x_size, y_size);
    signal_dspin_zu_p2m_inc =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_zu_p2m_inc", x_size, y_size);
    signal_dspin_zu_p2m_dec =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_zu_p2m_dec", x_size, y_size);
    signal_dspin_zu_cla_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zu_cla_inc", x_size, y_size);
    signal_dspin_zu_cla_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zu_cla_dec", x_size, y_size);

    signal_dspin_zd_cmd_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zd_cmd_inc", x_size, y_size);
    signal_dspin_zd_cmd_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zd_cmd_dec", x_size, y_size);
    signal_dspin_zd_rsp_inc =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_zd_rsp_inc", x_size, y_size);
    signal_dspin_zd_rsp_dec =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_zd_rsp_dec", x_size, y_size);
    signal_dspin_zd_m2p_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zd_m2p_inc", x_size, y_size);
    signal_dspin_zd_m2p_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zd_m2p_dec", x_size, y_size);
    signal_dspin_zd_p2m_inc =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_zd_p2m_inc", x_size, y_size);
    signal_dspin_zd_p2m_dec =
      alloc_elems<DspinSignals<dspin_rsp_width> >("signal_dspin_zd_p2m_dec", x_size, y_size);
    signal_dspin_zd_cla_inc =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zd_cla_inc", x_size, y_size);
    signal_dspin_zd_cla_dec =
      alloc_elems<DspinSignals<dspin_cmd_width> >("signal_dspin_zd_cla_dec", x_size, y_size);

    ////////////////////////////////////////////////////////////////////////
    // Net-list
    ////////////////////////////////////////////////////////////////////////
    // Clock & RESET
    for (int x = 0; x < x_size; x++) {
      for (int y = 0; y < y_size; y++) {
         clusters[x][y]->p_clk                      (p_clk);
         clusters[x][y]->p_resetn                   (p_resetn);
      }
    }

    // Inter Clusters horizontal connections
    // This also includes signals on E/W boundaries
    for (int x = 0; x < x_size; x++) {
      for (int y = 0; y < y_size; y++) {
         clusters[x][y]->p_cmd_out[_EAST]  (signal_dspin_h_cmd_inc[x + 1][y]);
         clusters[x][y]->p_cmd_in[_WEST]   (signal_dspin_h_cmd_inc[x    ][y]);
         clusters[x][y]->p_cmd_in[_EAST]   (signal_dspin_h_cmd_dec[x + 1][y]);
         clusters[x][y]->p_cmd_out[_WEST]  (signal_dspin_h_cmd_dec[x    ][y]);

         clusters[x][y]->p_rsp_out[_EAST]  (signal_dspin_h_rsp_inc[x + 1][y]);
         clusters[x][y]->p_rsp_in[_WEST]   (signal_dspin_h_rsp_inc[x    ][y]);
         clusters[x][y]->p_rsp_in[_EAST]   (signal_dspin_h_rsp_dec[x + 1][y]);
         clusters[x][y]->p_rsp_out[_WEST]  (signal_dspin_h_rsp_dec[x    ][y]);

         clusters[x][y]->p_m2p_out[_EAST]  (signal_dspin_h_m2p_inc[x + 1][y]);
         clusters[x][y]->p_m2p_in[_WEST]   (signal_dspin_h_m2p_inc[x    ][y]);
         clusters[x][y]->p_m2p_in[_EAST]   (signal_dspin_h_m2p_dec[x + 1][y]);
         clusters[x][y]->p_m2p_out[_WEST]  (signal_dspin_h_m2p_dec[x    ][y]);

         clusters[x][y]->p_p2m_out[_EAST]  (signal_dspin_h_p2m_inc[x + 1][y]);
         clusters[x][y]->p_p2m_in[_WEST]   (signal_dspin_h_p2m_inc[x    ][y]);
         clusters[x][y]->p_p2m_in[_EAST]   (signal_dspin_h_p2m_dec[x + 1][y]);
         clusters[x][y]->p_p2m_out[_WEST]  (signal_dspin_h_p2m_dec[x    ][y]);

         clusters[x][y]->p_cla_out[_EAST]  (signal_dspin_h_cla_inc[x + 1][y]);
         clusters[x][y]->p_cla_in[_WEST]   (signal_dspin_h_cla_inc[x    ][y]);
         clusters[x][y]->p_cla_in[_EAST]   (signal_dspin_h_cla_dec[x + 1][y]);
         clusters[x][y]->p_cla_out[_WEST]  (signal_dspin_h_cla_dec[x    ][y]);
      }
    }
    // Inter Clusters vertical connections
    // This also includes signals on N/S boundaries
    for (int y = 0; y < y_size; y++) {
      for (int x = 0; x < x_size; x++) {
         clusters[x][y]->p_cmd_out[_NORTH] (signal_dspin_v_cmd_inc[x][y + 1]);
         clusters[x][y]->p_cmd_in[_SOUTH]  (signal_dspin_v_cmd_inc[x][y    ]);
         clusters[x][y]->p_cmd_in[_NORTH]  (signal_dspin_v_cmd_dec[x][y + 1]);
         clusters[x][y]->p_cmd_out[_SOUTH] (signal_dspin_v_cmd_dec[x][y    ]);

         clusters[x][y]->p_rsp_out[_NORTH] (signal_dspin_v_rsp_inc[x][y + 1]);
         clusters[x][y]->p_rsp_in[_SOUTH]  (signal_dspin_v_rsp_inc[x][y    ]);
         clusters[x][y]->p_rsp_in[_NORTH]  (signal_dspin_v_rsp_dec[x][y + 1]);
         clusters[x][y]->p_rsp_out[_SOUTH] (signal_dspin_v_rsp_dec[x][y    ]);

         clusters[x][y]->p_m2p_out[_NORTH] (signal_dspin_v_m2p_inc[x][y + 1]);
         clusters[x][y]->p_m2p_in[_SOUTH]  (signal_dspin_v_m2p_inc[x][y    ]);
         clusters[x][y]->p_m2p_in[_NORTH]  (signal_dspin_v_m2p_dec[x][y + 1]);
         clusters[x][y]->p_m2p_out[_SOUTH] (signal_dspin_v_m2p_dec[x][y    ]);

         clusters[x][y]->p_p2m_out[_NORTH] (signal_dspin_v_p2m_inc[x][y + 1]);
         clusters[x][y]->p_p2m_in[_SOUTH]  (signal_dspin_v_p2m_inc[x][y    ]);
         clusters[x][y]->p_p2m_in[_NORTH]  (signal_dspin_v_p2m_dec[x][y + 1]);
         clusters[x][y]->p_p2m_out[_SOUTH] (signal_dspin_v_p2m_dec[x][y    ]);

         clusters[x][y]->p_cla_out[_NORTH] (signal_dspin_v_cla_inc[x][y + 1]);
         clusters[x][y]->p_cla_in[_SOUTH]  (signal_dspin_v_cla_inc[x][y    ]);
         clusters[x][y]->p_cla_in[_NORTH]  (signal_dspin_v_cla_dec[x][y + 1]);
         clusters[x][y]->p_cla_out[_SOUTH] (signal_dspin_v_cla_dec[x][y    ]);
      }
    }

    // z-signals connections.
    // the elevator is connected to the interface signals, others are
    // connected to Null sources/sinks
    for (size_t y = 0; y < y_size; y++) {
      for (size_t x = 0; x < x_size; x++) {
	if (x == elevator_x && y == elevator_y) {
         clusters[x][y]->p_cmd_out[_UP]     (p_cmd_out[0]);
         clusters[x][y]->p_cmd_out[_DOWN]   (p_cmd_out[1]);
         clusters[x][y]->p_cmd_in[_UP]      (p_cmd_in[0]);
         clusters[x][y]->p_cmd_in[_DOWN]    (p_cmd_in[1]);

         clusters[x][y]->p_rsp_in[_UP]      (p_rsp_in[0]);
         clusters[x][y]->p_rsp_in[_DOWN]    (p_rsp_in[1]);
         clusters[x][y]->p_rsp_out[_UP]     (p_rsp_out[0]);
         clusters[x][y]->p_rsp_out[_DOWN]   (p_rsp_out[1]);

         clusters[x][y]->p_m2p_out[_UP]     (p_m2p_out[0]);
         clusters[x][y]->p_m2p_out[_DOWN]   (p_m2p_out[1]);
         clusters[x][y]->p_m2p_in[_UP]      (p_m2p_in[0]);
         clusters[x][y]->p_m2p_in[_DOWN]    (p_m2p_in[1]);

         clusters[x][y]->p_p2m_out[_UP]     (p_p2m_out[0]);
         clusters[x][y]->p_p2m_out[_DOWN]   (p_p2m_out[1]);
         clusters[x][y]->p_p2m_in[_UP]      (p_p2m_in[0]);
         clusters[x][y]->p_p2m_in[_DOWN]    (p_p2m_in[1]);

         clusters[x][y]->p_cla_out[_UP]     (p_cla_out[0]);
         clusters[x][y]->p_cla_out[_DOWN]   (p_cla_out[1]);
         clusters[x][y]->p_cla_in[_UP]      (p_cla_in[0]);
         clusters[x][y]->p_cla_in[_DOWN]    (p_cla_in[1]);
	  
        } else {
         clusters[x][y]->p_cmd_out[_UP]     (signal_dspin_zu_cmd_inc[x][y]);
         clusters[x][y]->p_cmd_in[_UP]      (signal_dspin_zu_cmd_dec[x][y]);
         clusters[x][y]->p_cmd_in[_DOWN]    (signal_dspin_zd_cmd_inc[x][y]);
         clusters[x][y]->p_cmd_out[_DOWN]   (signal_dspin_zd_cmd_dec[x][y]);

         clusters[x][y]->p_rsp_out[_UP]     (signal_dspin_zu_rsp_inc[x][y]);
         clusters[x][y]->p_rsp_in[_UP]      (signal_dspin_zu_rsp_dec[x][y]);
         clusters[x][y]->p_rsp_in[_DOWN]    (signal_dspin_zd_rsp_inc[x][y]);
         clusters[x][y]->p_rsp_out[_DOWN]   (signal_dspin_zd_rsp_dec[x][y]);

         clusters[x][y]->p_m2p_out[_UP]     (signal_dspin_zu_m2p_inc[x][y]);
         clusters[x][y]->p_m2p_in[_UP]      (signal_dspin_zu_m2p_dec[x][y]);
         clusters[x][y]->p_m2p_in[_DOWN]    (signal_dspin_zd_m2p_inc[x][y]);
         clusters[x][y]->p_m2p_out[_DOWN]   (signal_dspin_zd_m2p_dec[x][y]);

         clusters[x][y]->p_p2m_out[_UP]     (signal_dspin_zu_p2m_inc[x][y]);
         clusters[x][y]->p_p2m_in[_UP]      (signal_dspin_zu_p2m_dec[x][y]);
         clusters[x][y]->p_p2m_in[_DOWN]    (signal_dspin_zd_p2m_inc[x][y]);
         clusters[x][y]->p_p2m_out[_DOWN]   (signal_dspin_zd_p2m_dec[x][y]);

         clusters[x][y]->p_cla_out[_UP]     (signal_dspin_zu_cla_inc[x][y]);
         clusters[x][y]->p_cla_in[_UP]      (signal_dspin_zu_cla_dec[x][y]);
         clusters[x][y]->p_cla_in[_DOWN]    (signal_dspin_zd_cla_inc[x][y]);
         clusters[x][y]->p_cla_out[_DOWN]   (signal_dspin_zd_cla_dec[x][y]);
        }
      }
    }

} // end constructor



template<size_t dspin_cmd_width,
         size_t dspin_rsp_width,
         typename vci_param_int,
         typename vci_param_ext> TsarSuperCluster<dspin_cmd_width,
                                                 dspin_rsp_width,
                                                 vci_param_int,
                                                 vci_param_ext>::~TsarSuperCluster() {

    dealloc_elems<DspinInput<dspin_cmd_width> > (p_cmd_in, 2);
    dealloc_elems<DspinOutput<dspin_cmd_width> >(p_cmd_out, 2);

    dealloc_elems<DspinInput<dspin_rsp_width> > (p_rsp_in, 2);
    dealloc_elems<DspinOutput<dspin_rsp_width> >(p_rsp_out, 2);

    dealloc_elems<DspinInput<dspin_cmd_width> > (p_m2p_in, 2);
    dealloc_elems<DspinOutput<dspin_cmd_width> >(p_m2p_out, 2);

    dealloc_elems<DspinInput<dspin_rsp_width> > (p_p2m_in, 2);
    dealloc_elems<DspinOutput<dspin_rsp_width> >(p_p2m_out, 2);

    dealloc_elems<DspinInput<dspin_cmd_width> > (p_cla_in, 2);
    dealloc_elems<DspinOutput<dspin_cmd_width> >(p_cla_out, 2);

    for (size_t i = 0; i  < (m_x_size * m_y_size); i++) {
	size_t x = i / m_y_size;
	size_t y = i % m_y_size;
	delete clusters[x][y];
    }
}


template<size_t dspin_cmd_width,
         size_t dspin_rsp_width,
         typename vci_param_int,
         typename vci_param_ext>
void TsarSuperCluster<dspin_cmd_width,
                     dspin_rsp_width,
                     vci_param_int,
                     vci_param_ext>::trace(sc_core::sc_trace_file * tf) {

    for (size_t i = 0; i  < (m_x_size * m_y_size); i++) {
	std::ostringstream signame;
	size_t x = i / m_y_size;
	size_t y = i % m_y_size;

	signame << "cluster" << x << "_" << y << "_" << m_z_id;
	std::cout << "trace " << signame.str() << std::endl;
        clusters[x][y]->trace(tf, signame.str());
	sc_core::sc_trace(tf, p_resetn, "resetn");
    }
    for (size_t x = 0; x < m_x_size + 1; x++) {
      for (size_t y = 0; y < m_y_size + 1; y++) {
	if (x < m_x_size || y < m_y_size) {
	  std::ostringstream signame;
	  signame <<  "l[" << x << "][" << y << "]";
#define __trace(s) s[x][y].trace(tf, signame.str() + "_" + #s); std::cout << "trace " << signame.str() + "_" + #s << std::endl;
          if (y < m_y_size) {
	    __trace(signal_dspin_h_cmd_inc);
	    __trace(signal_dspin_h_cmd_dec);
	  }
          if (x < m_x_size) {
	    __trace(signal_dspin_v_cmd_inc);
	    __trace(signal_dspin_v_cmd_dec);
	  }
	  if (x < m_x_size && y < m_y_size) {
	    __trace(signal_dspin_zu_cmd_inc);
	    __trace(signal_dspin_zu_cmd_dec);
	    __trace(signal_dspin_zd_cmd_inc);
	    __trace(signal_dspin_zd_cmd_dec);
	  }
          if (y < m_y_size) {
	    __trace(signal_dspin_h_rsp_inc);
	    __trace(signal_dspin_h_rsp_dec);
	  }
          if (x < m_x_size) {
	    __trace(signal_dspin_v_rsp_inc);
	    __trace(signal_dspin_v_rsp_dec);
	  }
	  if (x < m_x_size && y < m_y_size) {
	    __trace(signal_dspin_zu_rsp_inc);
	    __trace(signal_dspin_zu_rsp_dec);
	    __trace(signal_dspin_zd_rsp_inc);
	    __trace(signal_dspin_zd_rsp_dec);
	  }
          if (y < m_y_size) {
	    __trace(signal_dspin_h_m2p_inc);
	    __trace(signal_dspin_h_m2p_dec);
	  }
          if (x < m_x_size) {
	    __trace(signal_dspin_v_m2p_inc);
	    __trace(signal_dspin_v_m2p_dec);
	  }
	  if (x < m_x_size && y < m_y_size) {
	    __trace(signal_dspin_zu_m2p_inc);
	    __trace(signal_dspin_zu_m2p_dec);
	    __trace(signal_dspin_zd_m2p_inc);
	    __trace(signal_dspin_zd_m2p_dec);
	  }
          if (y < m_y_size) {
	    __trace(signal_dspin_h_p2m_inc);
	    __trace(signal_dspin_h_p2m_dec);
	  }
          if (x < m_x_size) {
	    __trace(signal_dspin_v_p2m_inc);
	    __trace(signal_dspin_v_p2m_dec);
	  }
	  if (x < m_x_size && y < m_y_size) {
	    __trace(signal_dspin_zu_p2m_inc);
	    __trace(signal_dspin_zu_p2m_dec);
	    __trace(signal_dspin_zd_p2m_inc);
	    __trace(signal_dspin_zd_p2m_dec);
	  }
          if (y < m_y_size) {
	    __trace(signal_dspin_h_cla_inc);
	    __trace(signal_dspin_h_cla_dec);
	  }
          if (x < m_x_size) {
	    __trace(signal_dspin_v_cla_inc);
	    __trace(signal_dspin_v_cla_dec);
	  }
	  if (x < m_x_size && y < m_y_size) {
	    __trace(signal_dspin_zu_cla_inc);
	    __trace(signal_dspin_zu_cla_dec);
	    __trace(signal_dspin_zd_cla_inc);
	    __trace(signal_dspin_zd_cla_dec);
	  }
	}
      }
    }
}                        

template<size_t dspin_cmd_width,
         size_t dspin_rsp_width,
         typename vci_param_int,
         typename vci_param_ext>
void TsarSuperCluster<dspin_cmd_width,
                     dspin_rsp_width,
                     vci_param_int,
                     vci_param_ext>::reset() {
    // setup boundaries signals
    for (size_t y = 0; y < m_y_size; y++) {
        signal_dspin_h_cmd_inc[0][y].write = false;
        signal_dspin_h_cmd_inc[0][y].read = true;
        signal_dspin_h_cmd_dec[0][y].write = false;
        signal_dspin_h_cmd_dec[0][y].read = true;
        signal_dspin_h_cmd_inc[m_x_size][y].write = false;
        signal_dspin_h_cmd_inc[m_x_size][y].read = true;
        signal_dspin_h_cmd_dec[m_x_size][y].write = false;
        signal_dspin_h_cmd_dec[m_x_size][y].read = true;

        signal_dspin_h_rsp_inc[0][y].write = false;
        signal_dspin_h_rsp_inc[0][y].read = true;
        signal_dspin_h_rsp_dec[0][y].write = false;
        signal_dspin_h_rsp_dec[0][y].read = true;
        signal_dspin_h_rsp_inc[m_x_size][y].write = false;
        signal_dspin_h_rsp_inc[m_x_size][y].read = true;
        signal_dspin_h_rsp_dec[m_x_size][y].write = false;
        signal_dspin_h_rsp_dec[m_x_size][y].read = true;

        signal_dspin_h_m2p_inc[0][y].write = false;
        signal_dspin_h_m2p_inc[0][y].read = true;
        signal_dspin_h_m2p_dec[0][y].write = false;
        signal_dspin_h_m2p_dec[0][y].read = true;
        signal_dspin_h_m2p_inc[m_x_size][y].write = false;
        signal_dspin_h_m2p_inc[m_x_size][y].read = true;
        signal_dspin_h_m2p_dec[m_x_size][y].write = false;
        signal_dspin_h_m2p_dec[m_x_size][y].read = true;

        signal_dspin_h_p2m_inc[0][y].write = false;
        signal_dspin_h_p2m_inc[0][y].read = true;
        signal_dspin_h_p2m_dec[0][y].write = false;
        signal_dspin_h_p2m_dec[0][y].read = true;
        signal_dspin_h_p2m_inc[m_x_size][y].write = false;
        signal_dspin_h_p2m_inc[m_x_size][y].read = true;
        signal_dspin_h_p2m_dec[m_x_size][y].write = false;
        signal_dspin_h_p2m_dec[m_x_size][y].read = true;

        signal_dspin_h_cla_inc[0][y].write = false;
        signal_dspin_h_cla_inc[0][y].read = true;
        signal_dspin_h_cla_dec[0][y].write = false;
        signal_dspin_h_cla_dec[0][y].read = true;
        signal_dspin_h_cla_inc[m_x_size][y].write = false;
        signal_dspin_h_cla_inc[m_x_size][y].read = true;
        signal_dspin_h_cla_dec[m_x_size][y].write = false;
        signal_dspin_h_cla_dec[m_x_size][y].read = true;
    }

    for (size_t x = 0; x < m_x_size; x++) {
        signal_dspin_v_cmd_inc[x][0].write = false;
        signal_dspin_v_cmd_inc[x][0].read = true;
        signal_dspin_v_cmd_dec[x][0].write = false;
        signal_dspin_v_cmd_dec[x][0].read = true;
        signal_dspin_v_cmd_inc[x][m_y_size].write = false;
        signal_dspin_v_cmd_inc[x][m_y_size].read = true;
        signal_dspin_v_cmd_dec[x][m_y_size].write = false;
        signal_dspin_v_cmd_dec[x][m_y_size].read = true;

        signal_dspin_v_rsp_inc[x][0].write = false;
        signal_dspin_v_rsp_inc[x][0].read = true;
        signal_dspin_v_rsp_dec[x][0].write = false;
        signal_dspin_v_rsp_dec[x][0].read = true;
        signal_dspin_v_rsp_inc[x][m_y_size].write = false;
        signal_dspin_v_rsp_inc[x][m_y_size].read = true;
        signal_dspin_v_rsp_dec[x][m_y_size].write = false;
        signal_dspin_v_rsp_dec[x][m_y_size].read = true;

        signal_dspin_v_m2p_inc[x][0].write = false;
        signal_dspin_v_m2p_inc[x][0].read = true;
        signal_dspin_v_m2p_dec[x][0].write = false;
        signal_dspin_v_m2p_dec[x][0].read = true;
        signal_dspin_v_m2p_inc[x][m_y_size].write = false;
        signal_dspin_v_m2p_inc[x][m_y_size].read = true;
        signal_dspin_v_m2p_dec[x][m_y_size].write = false;
        signal_dspin_v_m2p_dec[x][m_y_size].read = true;

        signal_dspin_v_p2m_inc[x][0].write = false;
        signal_dspin_v_p2m_inc[x][0].read = true;
        signal_dspin_v_p2m_dec[x][0].write = false;
        signal_dspin_v_p2m_dec[x][0].read = true;
        signal_dspin_v_p2m_inc[x][m_y_size].write = false;
        signal_dspin_v_p2m_inc[x][m_y_size].read = true;
        signal_dspin_v_p2m_dec[x][m_y_size].write = false;
        signal_dspin_v_p2m_dec[x][m_y_size].read = true;

        signal_dspin_v_cla_inc[x][0].write = false;
        signal_dspin_v_cla_inc[x][0].read = true;
        signal_dspin_v_cla_dec[x][0].write = false;
        signal_dspin_v_cla_dec[x][0].read = true;
        signal_dspin_v_cla_inc[x][m_y_size].write = false;
        signal_dspin_v_cla_inc[x][m_y_size].read = true;
        signal_dspin_v_cla_dec[x][m_y_size].write = false;
        signal_dspin_v_cla_dec[x][m_y_size].read = true;
    }
    for (size_t x = 0; x < m_x_size; x++) {
      for (size_t y = 0; y < m_y_size; y++) {
        signal_dspin_zu_cmd_inc[x][y].write = false;
        signal_dspin_zu_cmd_inc[x][y].read = true;
        signal_dspin_zu_cmd_dec[x][y].write = false;
        signal_dspin_zu_cmd_dec[x][y].read = true;
        signal_dspin_zd_cmd_inc[x][y].write = false;
        signal_dspin_zd_cmd_inc[x][y].read = true;
        signal_dspin_zd_cmd_dec[x][y].write = false;
        signal_dspin_zd_cmd_dec[x][y].read = true;

        signal_dspin_zu_rsp_inc[x][y].write = false;
        signal_dspin_zu_rsp_inc[x][y].read = true;
        signal_dspin_zu_rsp_dec[x][y].write = false;
        signal_dspin_zu_rsp_dec[x][y].read = true;
        signal_dspin_zd_rsp_inc[x][y].write = false;
        signal_dspin_zd_rsp_inc[x][y].read = true;
        signal_dspin_zd_rsp_dec[x][y].write = false;
        signal_dspin_zd_rsp_dec[x][y].read = true;

        signal_dspin_zu_m2p_inc[x][y].write = false;
        signal_dspin_zu_m2p_inc[x][y].read = true;
        signal_dspin_zu_m2p_dec[x][y].write = false;
        signal_dspin_zu_m2p_dec[x][y].read = true;
        signal_dspin_zd_m2p_inc[x][y].write = false;
        signal_dspin_zd_m2p_inc[x][y].read = true;
        signal_dspin_zd_m2p_dec[x][y].write = false;
        signal_dspin_zd_m2p_dec[x][y].read = true;

        signal_dspin_zu_p2m_inc[x][y].write = false;
        signal_dspin_zu_p2m_inc[x][y].read = true;
        signal_dspin_zu_p2m_dec[x][y].write = false;
        signal_dspin_zu_p2m_dec[x][y].read = true;
        signal_dspin_zd_p2m_inc[x][y].write = false;
        signal_dspin_zd_p2m_inc[x][y].read = true;
        signal_dspin_zd_p2m_dec[x][y].write = false;
        signal_dspin_zd_p2m_dec[x][y].read = true;

        signal_dspin_zu_cla_inc[x][y].write = false;
        signal_dspin_zu_cla_inc[x][y].read = true;
        signal_dspin_zu_cla_dec[x][y].write = false;
        signal_dspin_zu_cla_dec[x][y].read = true;
        signal_dspin_zd_cla_inc[x][y].write = false;
        signal_dspin_zd_cla_inc[x][y].read = true;
        signal_dspin_zd_cla_dec[x][y].write = false;
        signal_dspin_zd_cla_dec[x][y].read = true;
      }
    }
}


}}

// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4

