//////////////////////////////////////////////////////////////////////////////
// File: tsar_cluster_mmu.c
// Author: Alain Greiner 
// Copyright: UPMC/LIP6
// Date : march 2011
// This program is released under the GNU public license
//////////////////////////////////////////////////////////////////////////////
// This file define a TSAR cluster architecture with virtual memory:
// - It uses the virtual_dspin_router as distributed global interconnect 
// - It uses the vci_local_crossbar as local interconnect 
// - It uses the vci_cc_vcache_wrapper
// - It uses the vci_mem_cache
// - It contains a private RAM with a variable latency to emulate the L3 cache
// - It can contains 1, 2 or 4 processors
// - Each processor has a private dma channel (vci_multi_dma)
// - It uses the vci_xicu interrupt controller
// - The peripherals MTTY, BDEV, FBUF, and the boot BROM are in the cluster 
//   containing address 0xBFC00000.
// - The Multi-TTY component controls up to 15 terminals. 
// - Each Multi-DMA component controls up to 8 DMA channels.
// - The DMA IRQs are connected to IRQ_IN[8]...IRQ_IN[15]
// - The TTY IRQs are connected to IRQ_IN[16]...IRQ_IN[30]
// - The BDEV IRQ is connected to IRQ_IN[31]
////////////////////////////////////////////////////////////////////////////////// 

#include "../include/tsar_cluster_mmu.h"

namespace soclib {
namespace caba  {

//////////////////////////////////////////////////////////////////////////
//                 Constructor
//////////////////////////////////////////////////////////////////////////
template<typename vci_param, typename iss_t, int cmd_width, int rsp_width>
TsarClusterMmu<vci_param, iss_t, cmd_width, rsp_width>::TsarClusterMmu(
         sc_module_name                     insname,
         size_t                             nb_procs,
         size_t                             nb_ttys,
         size_t                             nb_dmas,
         size_t                             x_id,
         size_t                             y_id,
         size_t                             cluster_id,
         const soclib::common::MappingTable &mtd,
         const soclib::common::MappingTable &mtc, 
         const soclib::common::MappingTable &mtx, 
         size_t                             x_width,
         size_t                             y_width,
         size_t                             tgtid_memc,
         size_t                             tgtid_xicu,
         size_t                             tgtid_mdma,
         size_t                             tgtid_fbuf,
         size_t                             tgtid_mtty,
         size_t                             tgtid_brom,
         size_t                             tgtid_mnic,
         size_t                             tgtid_bdev,
         size_t                             memc_ways,
         size_t                             memc_sets,
         size_t                             l1_i_ways,
         size_t                             l1_i_sets,
         size_t                             l1_d_ways,
         size_t                             l1_d_sets,
         size_t                             xram_latency,
         bool                               io,
         size_t                             xfb,
         size_t                             yfb,
         char*                              disk_name,
         size_t                             block_size,
         size_t                             nic_channels,
         char*                              nic_rx_name,
         char*                              nic_tx_name,
         uint32_t                           nic_timeout,
         const Loader                      &loader,
         uint32_t                           frozen_cycles,
         uint32_t                           debug_start_cycle,
         bool                               memc_debug_ok,
         bool                               proc_debug_ok)
            : soclib::caba::BaseModule(insname),
            p_clk("clk"),
            p_resetn("resetn")

{
    // Vectors of ports definition

    p_cmd_in        = alloc_elems<DspinInput<cmd_width> >("p_cmd_in", 2, 4);
    p_cmd_out       = alloc_elems<DspinOutput<cmd_width> >("p_cmd_out", 2, 4);
    p_rsp_in        = alloc_elems<DspinInput<rsp_width> >("p_rsp_in", 2, 4);
    p_rsp_out       = alloc_elems<DspinOutput<rsp_width> >("p_rsp_out", 2, 4);

    // Components definition 

    // on direct network : local srcid[proc] in [0..nb_procs-1]
    // on direct network : local srcid[mdma] = nb_procs
    // on direct network : local srcid[bdev] = nb_procs + 1

    // on coherence network : local srcid[proc] in [0...nb_procs-1]
    // on coherence network : local srcid[memc] = nb_procs

    std::cout << "  - building proc_" << x_id << "_" << y_id << "-*" << std::endl;

    for (size_t p = 0; p < nb_procs; p++)
    { 
        std::ostringstream sproc;
        sproc << "proc_" << x_id << "_" << y_id << "_" << p;
        proc[p] = new VciCcVCacheWrapper<vci_param, iss_t>(
                      sproc.str().c_str(),
                      cluster_id*nb_procs + p,
                      mtd,                            // Mapping Table Direct
                      mtc,                            // Mapping Table Coherence
                      IntTab(cluster_id,p),           // SRCID_D
                      IntTab(cluster_id,p),           // SRCID_C
                      IntTab(cluster_id,p),           // TGTID_C
                      8,                              // ITLB ways
                      8,                              // ITLB sets
                      8,                              // DTLB ways
                      8,                              // DTLB sets
                      l1_i_ways,l1_i_sets,16,         // ICACHE size
                      l1_d_ways,l1_d_sets,16,         // DCACHE size
                      4,                              // WBUF nlines
                      4,                              // WBUF nwords
                      x_width,
                      y_width,
                      nb_procs,                       // MEMC local index
                      frozen_cycles,                  // max frozen cycles
                      debug_start_cycle,
                      proc_debug_ok);
    }

    std::cout << "  - building memc_" << x_id << "_" << y_id << std::endl;

    std::ostringstream smemc;
    smemc << "memc_" << x_id << "_" << y_id;
    memc = new VciMemCache<vci_param>(
                     smemc.str().c_str(),
                     mtd, mtc, mtx,
                     IntTab(cluster_id),              // SRCID_X
                     IntTab(cluster_id, nb_procs),    // SRCID_C
                     IntTab(cluster_id, tgtid_memc),  // TGTID_D
                     IntTab(cluster_id, nb_procs),    // TGTID_C
                     memc_ways, memc_sets, 16,        // CACHE SIZE
                     //4096,                            // HEAP SIZE
                     256,                            // HEAP SIZE
                     8,                               // TRANSACTION TABLE DEPTH
                     8,                               // UPDATE TABLE DEPTH
                     debug_start_cycle,
                     memc_debug_ok);

    std::cout << "  - building xram_" << x_id << "_" << y_id << std::endl;

    std::ostringstream sxram;
    sxram << "xram_" << x_id << "_" << y_id;
    xram = new VciSimpleRam<vci_param>(
                     sxram.str().c_str(),
                     IntTab(cluster_id),
                     mtx,
                     loader,
                     xram_latency);

    std::cout << "  - building xicu_" << x_id << "_" << y_id << std::endl;

    std::ostringstream sicu;
    sicu << "xicu_" << x_id << "_" << y_id;
    xicu = new VciXicu<vci_param>(
                     sicu.str().c_str(),
                     mtd,                               // mapping table
                     IntTab(cluster_id, tgtid_xicu),    // TGTID_D
                     nb_procs,                          // number of timer IRQs
                     32,                                // number of hard IRQs
                     0,                                 // number of soft IRQs
                     nb_procs);                         // number of output IRQs

    std::cout << "  - building dma_" << x_id << "_" << y_id << std::endl;

    std::ostringstream sdma;
    sdma << "dma_" << x_id << "_" << y_id;
    mdma = new VciMultiDma<vci_param>(
                     sdma.str().c_str(),
                     mtd,
                     IntTab(cluster_id, nb_procs),        // SRCID
                     IntTab(cluster_id, tgtid_mdma),      // TGTID
                     64,                                  // burst size
                     nb_dmas);                           // number of IRQs

    std::cout << "  - building xbard_" << x_id << "_" << y_id << std::endl;

    size_t nb_direct_initiators      = nb_procs + 1;
    size_t nb_direct_targets         = 3;
    if ( io )
    {
        nb_direct_initiators         = nb_procs + 2;
        nb_direct_targets            = 8;
    }
    std::ostringstream sd;
    sd << "xbard_" << x_id << "_" << y_id;
    xbard = new VciLocalCrossbar<vci_param>(
                     sd.str().c_str(),
                     mtd,
                     IntTab(cluster_id),           // cluster initiator index
                     IntTab(cluster_id),           // cluster target index
                     nb_direct_initiators,         // number of initiators
                     nb_direct_targets);           // number of targets      

    std::cout << "  - building xbarc_" << x_id << "_" << y_id << std::endl;

    std::ostringstream sc;
    sc << "xbarc_" << x_id << "_" << y_id;
    xbarc = new VciLocalCrossbar<vci_param>(
                     sc.str().c_str(),
                     mtc,
                     IntTab(cluster_id),           // cluster initiator index
                     IntTab(cluster_id),           // cluster target index
                     nb_procs + 1,                 // number of initiators
                     nb_procs + 1);                // number of targets

    std::cout << "  - building wrappers in cluster_" << x_id << "_" << y_id << std::endl;

    std::ostringstream wid;
    wid << "iniwrapperd_" << x_id << "_" << y_id;
    iniwrapperd = new VciVdspinInitiatorWrapper<vci_param,cmd_width,rsp_width>(
                     wid.str().c_str(),
                     4,                            // cmd fifo depth
                     4);                           // rsp fifo depth

    std::ostringstream wtd;
    wtd << "tgtwrapperd_" << x_id << "_" << y_id;
    tgtwrapperd = new VciVdspinTargetWrapper<vci_param,cmd_width,rsp_width>(
                     wtd.str().c_str(),
                     4,                            // cmd fifo depth
                     4);                           // rsp fifo depth

    std::ostringstream wic;
    wic << "iniwrapperc_" << x_id << "_" << y_id;
    iniwrapperc = new VciVdspinInitiatorWrapper<vci_param,cmd_width,rsp_width>(
                     wic.str().c_str(),
                     4,                            // cmd fifo depth
                     4);                           // rsp fifo depth

    std::ostringstream wtc;
    wtc << "tgtwrapperc_" << x_id << "_" << y_id;
    tgtwrapperc = new VciVdspinTargetWrapper<vci_param,cmd_width,rsp_width>(
    wtc.str().c_str(),
                     4,                            // cmd fifo depth
                     4);                           // rsp fifo depth

    std::cout << "  - building cmdrouter_" << x_id << "_" << y_id << std::endl;

    std::ostringstream scmd;
    scmd << "cmdrouter_" << x_id << "_" << y_id;
    cmdrouter = new VirtualDspinRouter<cmd_width>(
                     scmd.str().c_str(),
                     x_id,y_id,                    // coordinate in the mesh
                     x_width, y_width,             // x & y fields width
                     4,4);                         // input & output fifo depths

    std::cout << "  - building rsprouter_" << x_id << "_" << y_id << std::endl;

    std::ostringstream srsp;
    srsp << "rsprouter_" << x_id << "_" << y_id;
    rsprouter = new VirtualDspinRouter<rsp_width>(
                     srsp.str().c_str(),
                     x_id,y_id,                    // coordinates in mesh
                     x_width, y_width,             // x & y fields width
                     4,4);                         // input & output fifo depths

    // IO cluster components
    if ( io )
    {
        std::cout << "  - building brom" << std::endl;

        brom = new VciSimpleRam<vci_param>(
                        "brom",
                        IntTab(cluster_id, tgtid_brom),
                        mtd,
                        loader);

        std::cout << "  - building fbuf" << std::endl;

        fbuf = new VciFrameBuffer<vci_param>(
                        "fbuf",
                        IntTab(cluster_id, tgtid_fbuf),
                        mtd,
                        xfb, yfb); 

        std::cout << "  - building fbuf" << std::endl;

        bdev = new VciBlockDeviceTsarV4<vci_param>(
                        "bdev",
                        mtd,
                        IntTab(cluster_id, nb_procs+1),
                        IntTab(cluster_id, tgtid_bdev),
                        disk_name,
                        block_size,
                        64);            // burst size

        std::cout << "  - building mnic" << std::endl;

        mnic = new VciMultiNic<vci_param>(
                        "mnic",
                        IntTab(cluster_id, tgtid_mnic),
                        mtd,
                        nic_channels,
                        nic_rx_name,
                        nic_tx_name,
                        0,   // default mac address MAC4
                        0 ); // default mac address MAC2

        std::cout << "  - building mtty" << std::endl;

        std::vector<std::string> vect_names;
        for( size_t tid = 0 ; tid < (nb_ttys) ; tid++ )
        {
            std::ostringstream term_name;
            term_name <<  "term" << tid;
            vect_names.push_back(term_name.str().c_str());
        }
        mtty = new VciMultiTty<vci_param>(
                        "mtty",
                        IntTab(cluster_id, tgtid_mtty),
                        mtd, 
                        vect_names);
    }

    std::cout << std::endl;

    ////////////////////////////////////
    // Connections are defined here
    ////////////////////////////////////

    // CMDROUTER and RSPROUTER
    cmdrouter->p_clk                        (this->p_clk);
    cmdrouter->p_resetn                     (this->p_resetn);
    rsprouter->p_clk                        (this->p_clk);
    rsprouter->p_resetn                     (this->p_resetn);
    for (int x = 0; x < 2; x++)
    {
        for(int y = 0; y < 4; y++)
        {
            cmdrouter->p_out[x][y]          (this->p_cmd_out[x][y]);
            cmdrouter->p_in[x][y]           (this->p_cmd_in[x][y]);
            rsprouter->p_out[x][y]          (this->p_rsp_out[x][y]);
            rsprouter->p_in[x][y]           (this->p_rsp_in[x][y]);
        }
    }

    cmdrouter->p_out[0][4]                  (signal_dspin_cmd_g2l_d);
    cmdrouter->p_out[1][4]                  (signal_dspin_cmd_g2l_c);
    cmdrouter->p_in[0][4]                   (signal_dspin_cmd_l2g_d);
    cmdrouter->p_in[1][4]                   (signal_dspin_cmd_l2g_c);

    rsprouter->p_out[0][4]                  (signal_dspin_rsp_g2l_d);
    rsprouter->p_out[1][4]                  (signal_dspin_rsp_g2l_c);
    rsprouter->p_in[0][4]                   (signal_dspin_rsp_l2g_d);
    rsprouter->p_in[1][4]                   (signal_dspin_rsp_l2g_c);

    std::cout << "  - CMD & RSP routers connected" << std::endl;

    // VCI/DSPIN WRAPPERS
    iniwrapperd->p_clk                      (this->p_clk);
    iniwrapperd->p_resetn                   (this->p_resetn);
    iniwrapperd->p_vci                      (signal_vci_l2g_d);
    iniwrapperd->p_dspin_out                (signal_dspin_cmd_l2g_d);
    iniwrapperd->p_dspin_in                 (signal_dspin_rsp_g2l_d);

    tgtwrapperd->p_clk                      (this->p_clk);
    tgtwrapperd->p_resetn                   (this->p_resetn);
    tgtwrapperd->p_vci                      (signal_vci_g2l_d);
    tgtwrapperd->p_dspin_out                (signal_dspin_rsp_l2g_d);
    tgtwrapperd->p_dspin_in                 (signal_dspin_cmd_g2l_d);

    iniwrapperc->p_clk                      (this->p_clk);
    iniwrapperc->p_resetn                   (this->p_resetn);
    iniwrapperc->p_vci                      (signal_vci_l2g_c);
    iniwrapperc->p_dspin_out                (signal_dspin_cmd_l2g_c);
    iniwrapperc->p_dspin_in                 (signal_dspin_rsp_g2l_c);

    tgtwrapperc->p_clk                      (this->p_clk);
    tgtwrapperc->p_resetn                   (this->p_resetn);
    tgtwrapperc->p_vci                      (signal_vci_g2l_c);
    tgtwrapperc->p_dspin_out                (signal_dspin_rsp_l2g_c);
    tgtwrapperc->p_dspin_in                 (signal_dspin_cmd_g2l_c);

    std::cout << "  - VCI/DSPIN wrappers connected" << std::endl;

    // CROSSBAR direct
    xbard->p_clk                            (this->p_clk);
    xbard->p_resetn                         (this->p_resetn);
    xbard->p_initiator_to_up                (signal_vci_l2g_d);
    xbard->p_target_to_up                   (signal_vci_g2l_d);

    xbard->p_to_target[tgtid_memc]          (signal_vci_tgt_d_memc);
    xbard->p_to_target[tgtid_xicu]          (signal_vci_tgt_d_xicu);
    xbard->p_to_target[tgtid_mdma]          (signal_vci_tgt_d_mdma);

    xbard->p_to_initiator[nb_procs]         (signal_vci_ini_d_mdma);

    for (size_t p = 0; p < nb_procs; p++)
    {
        xbard->p_to_initiator[p]            (signal_vci_ini_d_proc[p]);
    }

    if ( io )
    {
        xbard->p_to_target[tgtid_mtty]      (signal_vci_tgt_d_mtty);
        xbard->p_to_target[tgtid_brom]      (signal_vci_tgt_d_brom);
        xbard->p_to_target[tgtid_bdev]      (signal_vci_tgt_d_bdev);
        xbard->p_to_target[tgtid_fbuf]      (signal_vci_tgt_d_fbuf);
        xbard->p_to_target[tgtid_mnic]      (signal_vci_tgt_d_mnic);

        xbard->p_to_initiator[nb_procs+1]   (signal_vci_ini_d_bdev);
    }

    std::cout << "  - Direct crossbar connected" << std::endl;

    // CROSSBAR coherence
    xbarc->p_clk                            (this->p_clk);
    xbarc->p_resetn                         (this->p_resetn);
    xbarc->p_initiator_to_up                (signal_vci_l2g_c);
    xbarc->p_target_to_up                   (signal_vci_g2l_c);
    xbarc->p_to_initiator[nb_procs]         (signal_vci_ini_c_memc);
    xbarc->p_to_target[nb_procs]            (signal_vci_tgt_c_memc);
    for (size_t p = 0; p < nb_procs; p++) 
    {
        xbarc->p_to_target[p]               (signal_vci_tgt_c_proc[p]);
        xbarc->p_to_initiator[p]            (signal_vci_ini_c_proc[p]);
    }

    std::cout << "  - Coherence crossbar connected" << std::endl;

    // Processors
    for (size_t p = 0; p < nb_procs; p++)
    {
        proc[p]->p_clk                      (this->p_clk);
        proc[p]->p_resetn                   (this->p_resetn);
        proc[p]->p_vci_ini_d                (signal_vci_ini_d_proc[p]);
        proc[p]->p_vci_ini_c                (signal_vci_ini_c_proc[p]);
        proc[p]->p_vci_tgt_c                (signal_vci_tgt_c_proc[p]);
        proc[p]->p_irq[0]                   (signal_proc_it[p]);
        for ( size_t j = 1 ; j < 6 ; j++)
        {
            proc[p]->p_irq[j]               (signal_false);
        }
    }

    std::cout << "  - Processors connected" << std::endl;

    // XICU
    xicu->p_clk                     	(this->p_clk);
    xicu->p_resetn                  	(this->p_resetn);
    xicu->p_vci                     	(signal_vci_tgt_d_xicu);
    for ( size_t p=0 ; p<nb_procs ; p++)
    {
        xicu->p_irq[p]              	(signal_proc_it[p]);
    }
    for ( size_t i=0 ; i<32 ; i++)
    {
        if ( io ) // I/O cluster
        {
            if      (i < 8)                  xicu->p_hwi[i] (signal_false);
            else if (i < (8 + nb_dmas))      xicu->p_hwi[i]	(signal_irq_mdma[i-8]);
            else if (i < 16)                 xicu->p_hwi[i] (signal_false);
            else if (i < (16 + nb_ttys))     xicu->p_hwi[i] (signal_irq_mtty[i-16]);
            else if (i < 31)                 xicu->p_hwi[i]	(signal_false);
            else                             xicu->p_hwi[i] (signal_irq_bdev);
        }
        else      // other clusters
        {
            if      (i < 8)                  xicu->p_hwi[i] (signal_false);
            else if (i < (8 + nb_dmas))      xicu->p_hwi[i]	(signal_irq_mdma[i-8]);
            else                             xicu->p_hwi[i]	(signal_false);
        }
    }

    std::cout << "  - XICU connected" << std::endl;

    // MEMC
    memc->p_clk                     	(this->p_clk);
    memc->p_resetn                  	(this->p_resetn);
    memc->p_vci_ixr                 	(signal_vci_xram);
    memc->p_vci_tgt                 	(signal_vci_tgt_d_memc);
    memc->p_vci_ini                 	(signal_vci_ini_c_memc);
    memc->p_vci_tgt_cleanup         	(signal_vci_tgt_c_memc);

    std::cout << "  - MEMC connected" << std::endl;

    // XRAM
    xram->p_clk                     	(this->p_clk);
    xram->p_resetn                  	(this->p_resetn);
    xram->p_vci                 	      (signal_vci_xram);

    std::cout << "  - XRAM connected" << std::endl;

    // CDMA
    mdma->p_clk                       	(this->p_clk);
    mdma->p_resetn                    	(this->p_resetn);
    mdma->p_vci_target                	(signal_vci_tgt_d_mdma);
    mdma->p_vci_initiator             	(signal_vci_ini_d_mdma);
    for (size_t i=0 ; i<nb_dmas ; i++)
    {
        mdma->p_irq[i]                  (signal_irq_mdma[i]);
    }

    std::cout << "  - MDMA connected" << std::endl;

	 // Components in I/O cluster

	 if ( io )
	 {
        // BDEV            
	     bdev->p_clk                    (this->p_clk);
        bdev->p_resetn                 (this->p_resetn);
        bdev->p_irq                    (signal_irq_bdev);
        bdev->p_vci_target             (signal_vci_tgt_d_bdev);
        bdev->p_vci_initiator          (signal_vci_ini_d_bdev);

        std::cout << "  - BDEV connected" << std::endl;

        // FBUF
        fbuf->p_clk                    (this->p_clk);
        fbuf->p_resetn                 (this->p_resetn);
        fbuf->p_vci                    (signal_vci_tgt_d_fbuf);

        std::cout << "  - FBUF connected" << std::endl;

        // MNIC
        mnic->p_clk                    (this->p_clk);
        mnic->p_resetn                 (this->p_resetn);
        mnic->p_vci                    (signal_vci_tgt_d_mnic);
        for ( size_t i=0 ; i<nic_channels ; i++ )
        {
            mnic->p_rx_irq[i]          (signal_irq_mnic_rx[i]);
            mnic->p_tx_irq[i]          (signal_irq_mnic_tx[i]);
        }

        std::cout << "  - MNIC connected" << std::endl;

        // BROM
        brom->p_clk                    (this->p_clk);
        brom->p_resetn                 (this->p_resetn);
        brom->p_vci                    (signal_vci_tgt_d_brom);

        std::cout << "  - BROM connected" << std::endl;

        // MTTY
        mtty->p_clk                    (this->p_clk);
        mtty->p_resetn                 (this->p_resetn);
        mtty->p_vci                    (signal_vci_tgt_d_mtty);
        for ( size_t i=0 ; i<nb_ttys ; i++ )
        {
            mtty->p_irq[i]           	(signal_irq_mtty[i]);
        }

        std::cout << "  - MTTY connected" << std::endl;
   }
} // end constructor

///////////////////////////////////////////////////////////////////////////
//    destructor
///////////////////////////////////////////////////////////////////////////
template<typename vci_param, typename iss_t, int cmd_width, int rsp_width>
TsarClusterMmu<vci_param, iss_t, cmd_width, rsp_width>::~TsarClusterMmu() {}

}
}


// Local Variables:
// tab-width: 3
// c-basic-offset: 3
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=3:tabstop=3:softtabstop=3



