/* -*- c++ -*-
 *
 * SOCLIB_LGPL_HEADER_BEGIN
 * 
 * This file is part of SoCLib, GNU LGPLv2.1.
 * 
 * SoCLib is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published
 * by the Free Software Foundation; version 2.1 of the License.
 * 
 * SoCLib is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with SoCLib; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301 USA
 * 
 * SOCLIB_LGPL_HEADER_END
 *
 * Copyright (c) UPMC, Lip6, SoC
 *         Alain Greiner <alain.greiner@lip6.fr>, 2008
 *
 * Maintainers: alain
 */
 
#ifndef SOCLIB_CABA_VCI_CC_XCACHE_WRAPPER_V4_H
#define SOCLIB_CABA_VCI_CC_XCACHE_WRAPPER_V4_H

#include <inttypes.h>
#include <fstream>
#include <systemc>
#include <queue>
#include "caba_base_module.h"
#include "multi_write_buffer.h"
#include "generic_cache.h"
#include "generic_fifo.h"
#include "vci_initiator.h"
#include "vci_target.h"
#include "mapping_table.h"
#include "static_assert.h"

/*
 * ----------------------------------------------------------
 * Implementation
 * ----------------------------------------------------------
 *
 * CC_XCACHE_WRAPPER_MULTI_CACHE
 *     1    - icache static partitionnement
 *     2    - icache dedicated
 *
 * ----------------------------------------------------------
 * Debug
 * ----------------------------------------------------------
 *
 * CC_XCACHE_WRAPPER_STOP_SIMULATION
 *   stop simulation if processor is stall after a long time
 *   (configurable with "stop_simulation" function)
 *
 * CC_XCACHE_WRAPPER_DEBUG
 *   Add log to help the debugging
 *
 * CC_XCACHE_WRAPPER_DEBUG_CYCLE_MIN
 *   Number of cycle before to prinf debug message
 *
 * CC_XCACHE_WRAPPER_DEBUG_FILE_TRANSACTION
 *   Print transaction between :
 *     - the cpu and the cache (icache and dcache)
 *     - vci
 *     - cleanup
 *     - coherency
 *
 * MWBUF_VHDL_TESTBENCH
 *   generate a vhdl testbench for multi write buffer
 */

// implementation
#ifndef CC_XCACHE_WRAPPER_MULTI_CACHE
#define CC_XCACHE_WRAPPER_MULTI_CACHE                 2
// if multi_cache : 
// <tsar toplevel>/modules/vci_mem_cache_v4/caba/source/include/mem_cache_directory_v4.h : L1_MULTI_CACHE 1
#endif
  
// debug
#ifndef CC_XCACHE_WRAPPER_STOP_SIMULATION
#define CC_XCACHE_WRAPPER_STOP_SIMULATION             1
#endif
#ifndef CC_XCACHE_WRAPPER_DEBUG
#define CC_XCACHE_WRAPPER_DEBUG                       0
#endif
#ifndef CC_XCACHE_WRAPPER_DEBUG_CYCLE_MIN
#define CC_XCACHE_WRAPPER_DEBUG_CYCLE_MIN             4725000
#endif
#ifndef CC_XCACHE_WRAPPER_DEBUG_FILE_TRANSACTION
#define CC_XCACHE_WRAPPER_DEBUG_FILE_TRANSACTION      0
#define CC_XCACHE_WRAPPER_DEBUG_FILE_TRANSACTION_PATH "log"
#endif
#ifndef MWBUF_VHDL_TESTBENCH
#define MWBUF_VHDL_TESTBENCH                          0
#endif

namespace soclib {
namespace caba {

using namespace sc_core;

////////////////////////////////////////////
template<typename vci_param, typename iss_t>
class VciCcXCacheWrapperV4
///////////////////////////////////////////
    : public soclib::caba::BaseModule
{
    typedef uint64_t            vhdl_tb_t;
    typedef sc_dt::sc_uint<40> 	addr_40;
    typedef uint32_t    	data_t;
    typedef uint32_t    	tag_t;
    typedef uint32_t    	be_t;
    typedef typename vci_param::fast_addr_t vci_addr_t;
    typedef  enum
        {
            WRITE_THROUGH,
            RELEASE_WRITE_THROUGH
        } write_policy_t;
    
    enum dcache_fsm_state_e {
        DCACHE_IDLE,
        DCACHE_WRITE_UPDT,
        DCACHE_MISS_VICTIM,
        DCACHE_MISS_WAIT,
        DCACHE_MISS_UPDT,
        DCACHE_UNC_WAIT,
        DCACHE_SC_WAIT,
        DCACHE_INVAL,
        DCACHE_SYNC,
        DCACHE_ERROR,
        DCACHE_CC_CHECK,
        DCACHE_CC_INVAL,
        DCACHE_CC_UPDT,
        DCACHE_CC_CLEANUP,
    };

    enum icache_fsm_state_e {
        ICACHE_IDLE,
        ICACHE_MISS_VICTIM,
        ICACHE_MISS_WAIT,
        ICACHE_MISS_UPDT,
        ICACHE_UNC_WAIT,
        ICACHE_ERROR,
        ICACHE_CC_CLEANUP, 
        ICACHE_CC_CHECK,
        ICACHE_CC_INVAL,
        ICACHE_CC_UPDT,
    };

    enum cmd_fsm_state_e {
        CMD_IDLE,
        CMD_INS_MISS,
        CMD_INS_UNC,
        CMD_DATA_MISS,
        CMD_DATA_UNC,
        CMD_DATA_WRITE,
        CMD_DATA_SC,
    };

    enum rsp_fsm_state_e {
        RSP_IDLE,
        RSP_INS_MISS,
        RSP_INS_UNC,
        RSP_DATA_MISS,
        RSP_DATA_UNC,
        RSP_DATA_WRITE,
        RSP_DATA_SC,
    };

    enum tgt_fsm_state_e {
        TGT_IDLE,
        TGT_UPDT_WORD,
        TGT_UPDT_DATA,
        TGT_REQ_BROADCAST,
        TGT_REQ_ICACHE,
        TGT_REQ_DCACHE,
        TGT_RSP_BROADCAST,
        TGT_RSP_ICACHE,
        TGT_RSP_DCACHE,
    };

    enum cleanup_fsm_state_e {
        CLEANUP_IDLE,
        CLEANUP_REQ,
        CLEANUP_RSP_DCACHE,
        CLEANUP_RSP_ICACHE,
    };

    enum transaction_type_c_e {
        // convention with memcache
        TYPE_DATA_CLEANUP = 0x0,
        TYPE_INS_CLEANUP  = 0x1
    };

    enum transaction_type_rw_e {
        // convention with memcache
        // b0 : 1 if cached
        // b1 : 1 if instruction
        // b2 : 1 if sc
        TYPE_DATA_UNC     = 0x0,
        TYPE_DATA_MISS    = 0x1,
        TYPE_INS_UNC      = 0x2,
        TYPE_INS_MISS     = 0x3,
        TYPE_DATA_SC      = 0x4, // sc is data and no cached
    };

public:

    // PORTS
    sc_in<bool>                             p_clk;
    sc_in<bool>                             p_resetn;
    sc_in<bool>                          ** p_irq;//[m_nb_cpu][iss_t::n_irq];
    soclib::caba::VciInitiator<vci_param>   p_vci_ini_rw;
    soclib::caba::VciInitiator<vci_param>   p_vci_ini_c;
    soclib::caba::VciTarget<vci_param>      p_vci_tgt;

private:

    // STRUCTURAL PARAMETERS
    const soclib::common::AddressDecodingTable<vci_addr_t, bool>    m_cacheability_table;
    const soclib::common::Segment                                   m_segment;
    iss_t            ** m_iss; //[m_nb_cpu]
    const uint32_t      m_srcid_rw;   
    const uint32_t      m_srcid_c;   
    
    const size_t         m_nb_cpu;
    const size_t         m_nb_icache;
    const size_t         m_nb_dcache;
    const size_t         m_nb_cache;
    const size_t         m_dcache_ways;
    const size_t         m_dcache_words;
    const uint32_t       m_dcache_words_shift;
    const size_t         m_dcache_yzmask;
    const size_t         m_icache_ways;
    const size_t         m_icache_words;
    const uint32_t       m_icache_words_shift;
    const size_t         m_icache_yzmask;
    const write_policy_t m_write_policy;
    const size_t         m_cache_words; // max between m_dcache_words and m_icache_words

#if CC_XCACHE_WRAPPER_STOP_SIMULATION
    bool                m_stop_simulation;
    uint32_t            m_stop_simulation_nb_frz_cycles_max;
    uint32_t          * m_stop_simulation_nb_frz_cycles; //[m_nb_cpu]
#endif // CC_XCACHE_WRAPPER_STOP_SIMULATION

    // REGISTERS
    sc_signal<uint32_t>     r_cpu_prior;
    sc_signal<uint32_t>   * r_icache_lock;//[m_nb_icache]
    sc_signal<uint32_t>   * r_dcache_lock;//[m_nb_dcache]
    sc_signal<bool>       * r_dcache_sync;//[m_nb_dcache]

    sc_signal<int>        * r_dcache_fsm;          //[m_nb_dcache]
    sc_signal<int>        * r_dcache_fsm_save;     //[m_nb_dcache]
    sc_signal<addr_40>    * r_dcache_addr_save;    //[m_nb_dcache]
    sc_signal<data_t>     * r_dcache_wdata_save;   //[m_nb_dcache]
    sc_signal<data_t>     * r_dcache_rdata_save;   //[m_nb_dcache]
    sc_signal<int>        * r_dcache_type_save;    //[m_nb_dcache]
    sc_signal<be_t>       * r_dcache_be_save;      //[m_nb_dcache]
    sc_signal<bool>       * r_dcache_cached_save;  //[m_nb_dcache]
    sc_signal<uint32_t>   * r_dcache_num_cpu_save; //[m_nb_dcache]
    sc_signal<bool>       * r_dcache_cleanup_req;  //[m_nb_dcache]
    sc_signal<addr_40>    * r_dcache_cleanup_line; //[m_nb_dcache]
    sc_signal<bool>       * r_dcache_miss_req;     //[m_nb_dcache]
    sc_signal<size_t>     * r_dcache_miss_way;     //[m_nb_dcache]
    sc_signal<size_t>     * r_dcache_miss_set;     //[m_nb_dcache]
    sc_signal<bool>       * r_dcache_unc_req;      //[m_nb_dcache]
    sc_signal<bool>       * r_dcache_sc_req;       //[m_nb_dcache]
    sc_signal<bool>       * r_dcache_inval_rsp;    //[m_nb_dcache]
    sc_signal<size_t>     * r_dcache_update_addr;  //[m_nb_dcache]
    sc_signal<data_t>    ** r_dcache_ll_data;      //[m_nb_dcache][m_nb_cpu]
    sc_signal<addr_40>   ** r_dcache_ll_addr;      //[m_nb_dcache][m_nb_cpu]
    sc_signal<bool>      ** r_dcache_ll_valid;     //[m_nb_dcache][m_nb_cpu]
    sc_signal<bool>       * r_dcache_previous_unc; //[m_nb_dcache]
                                                   
    sc_signal<int>        * r_icache_fsm;          //[m_nb_icache]
    sc_signal<int>        * r_icache_fsm_save;     //[m_nb_icache]
    sc_signal<addr_40>    * r_icache_addr_save;    //[m_nb_icache]
    sc_signal<bool>       * r_icache_miss_req;     //[m_nb_icache]
    sc_signal<size_t>     * r_icache_miss_way;     //[m_nb_icache]
    sc_signal<size_t>     * r_icache_miss_set;     //[m_nb_icache]
    sc_signal<bool>       * r_icache_unc_req;      //[m_nb_icache]
    sc_signal<bool>       * r_icache_cleanup_req;  //[m_nb_icache]
    sc_signal<addr_40>    * r_icache_cleanup_line; //[m_nb_icache]
    sc_signal<bool>       * r_icache_inval_rsp;    //[m_nb_icache]
    sc_signal<size_t>     * r_icache_update_addr;  //[m_nb_icache]
    sc_signal<bool>       * r_icache_buf_unc_valid;//[m_nb_icache]

    sc_signal<int>          r_vci_cmd_fsm;
    sc_signal<size_t>       r_vci_cmd_min;       
    sc_signal<size_t>       r_vci_cmd_max;       
    sc_signal<size_t>       r_vci_cmd_cpt;       
    sc_signal<bool>         r_vci_cmd_dcache_prior;
    sc_signal<uint32_t>     r_vci_cmd_num_icache_prior;
    sc_signal<uint32_t>     r_vci_cmd_num_dcache_prior;
    sc_signal<uint32_t>     r_vci_cmd_num_cache;

    sc_signal<int>          r_vci_rsp_fsm;
    sc_signal<size_t>       r_vci_rsp_cpt;  
    sc_signal<uint32_t>     r_vci_rsp_num_cache;
    sc_signal<bool>       * r_vci_rsp_ins_error;  //[m_nb_icache]
    sc_signal<bool>       * r_vci_rsp_data_error; //[m_nb_dcache]

    GenericFifo<data_t>     r_vci_rsp_fifo_icache_data;
    GenericFifo<uint32_t>   r_vci_rsp_fifo_icache_num_cache;
    GenericFifo<data_t>     r_vci_rsp_fifo_dcache_data;
    GenericFifo<uint32_t>   r_vci_rsp_fifo_dcache_num_cache;

    data_t                * r_tgt_buf;            //[m_cache_words]
    be_t                  * r_tgt_be;             //[m_cache_words]
    sc_signal<uint32_t>     r_cache_word;

    sc_signal<int>          r_vci_tgt_fsm;
    sc_signal<addr_40>      r_tgt_iaddr;
    sc_signal<addr_40>      r_tgt_daddr;
    sc_signal<size_t>       r_tgt_word;
    sc_signal<bool>         r_tgt_update;
    sc_signal<bool>         r_tgt_update_data;
  //sc_signal<bool>         r_tgt_brdcast;
    sc_signal<size_t>       r_tgt_srcid;
    sc_signal<size_t>       r_tgt_pktid;
    sc_signal<size_t>       r_tgt_trdid;
  //sc_signal<size_t>       r_tgt_plen;
    sc_signal<uint32_t>     r_tgt_num_cache;
    sc_signal<bool>       * r_tgt_icache_req; //[m_nb_icache]
    sc_signal<bool>       * r_tgt_icache_rsp; //[m_nb_icache]
    sc_signal<bool>       * r_tgt_dcache_req; //[m_nb_dcache]
    sc_signal<bool>       * r_tgt_dcache_rsp; //[m_nb_dcache]

    sc_signal<int>          r_cleanup_fsm;		// controls initiator port of the coherence network
    sc_signal<uint32_t>     r_cleanup_num_cache;
    sc_signal<bool>         r_cleanup_icache;

    MultiWriteBuffer<addr_40>** r_wbuf;
    GenericCache<vci_addr_t> ** r_icache;
    GenericCache<vci_addr_t> ** r_dcache;

#if CC_XCACHE_WRAPPER_DEBUG_FILE_TRANSACTION
    bool                        generate_log_transaction_file_icache;
    bool                        generate_log_transaction_file_dcache;
    bool                        generate_log_transaction_file_cmd;
    bool                        generate_log_transaction_file_tgt;
    bool                        generate_log_transaction_file_cleanup;

    std::ofstream             * log_transaction_file_icache; //[m_nb_cpu]
    std::ofstream             * log_transaction_file_dcache; //[m_nb_cpu]
    std::ofstream               log_transaction_file_cmd;
    std::ofstream               log_transaction_file_tgt;
    std::ofstream               log_transaction_file_cleanup;
#endif

#if MWBUF_VHDL_TESTBENCH
    bool                        simulation_started;
    bool                        generate_vhdl_testbench_mwbuf;
    std::ofstream             * vhdl_testbench_mwbuf; //[m_nb_dcache]
#endif

    // Activity counters
    uint32_t   m_cpt_dcache_data_read;             // * DCACHE DATA READ
    uint32_t   m_cpt_dcache_data_write;            // * DCACHE DATA WRITE
    uint32_t   m_cpt_dcache_dir_read;              // * DCACHE DIR READ
    uint32_t   m_cpt_dcache_dir_write;             // * DCACHE DIR WRITE
                                                   
    uint32_t   m_cpt_icache_data_read;             // * ICACHE DATA READ
    uint32_t   m_cpt_icache_data_write;            // * ICACHE DATA WRITE
    uint32_t   m_cpt_icache_dir_read;              // * ICACHE DIR READ
    uint32_t   m_cpt_icache_dir_write;             // * ICACHE DIR WRITE
               
    uint32_t   m_cpt_cc_update_icache;             // number of coherence update packets (for icache)
    uint32_t   m_cpt_cc_update_dcache;             // number of coherence update packets (for dcache)
    uint32_t   m_cpt_cc_inval_broadcast;           // number of coherence inval packets
    uint32_t   m_cpt_cc_inval_icache;              // number of coherence inval packets
    uint32_t   m_cpt_cc_inval_dcache;              // number of coherence inval packets
    uint32_t   m_cpt_cc_update_icache_word_useful; // number of valid word in coherence update packets
    uint32_t   m_cpt_cc_update_dcache_word_useful; // number of valid word in coherence update packets
               
    uint32_t * m_cpt_frz_cycles;	               // * number of cycles where the cpu is frozen
    uint32_t   m_cpt_total_cycles;	               // total number of cycles 
               
    uint32_t   m_cpt_data_read;                    //   number of data read
    uint32_t   m_cpt_data_read_miss;               //   number of data read miss
    uint32_t   m_cpt_data_read_uncached;           //   number of data read uncached
    uint32_t   m_cpt_data_write;                   //   number of data write
    uint32_t   m_cpt_data_write_miss;              //   number of data write miss
    uint32_t   m_cpt_data_write_uncached;          //   number of data write uncached
    uint32_t   m_cpt_ins_miss;                     // * number of instruction miss
               
    uint32_t   m_cost_write_frz;                   // * number of frozen cycles related to write buffer         
    uint32_t   m_cost_data_miss_frz;               // * number of frozen cycles related to data miss
    uint32_t   m_cost_unc_read_frz;                // * number of frozen cycles related to uncached read
    uint32_t   m_cost_ins_miss_frz;                // * number of frozen cycles related to ins miss
               
    uint32_t   m_cpt_imiss_transaction;            // * number of VCI instruction miss transactions
    uint32_t   m_cpt_dmiss_transaction;            // * number of VCI data miss transactions
    uint32_t   m_cpt_unc_transaction;              // * number of VCI uncached read transactions
    uint32_t   m_cpt_data_write_transaction;       // * number of VCI write transactions
               
    uint32_t   m_cost_imiss_transaction;           // * cumulated duration for VCI IMISS transactions
    uint32_t   m_cost_dmiss_transaction;           // * cumulated duration for VCI DMISS transactions
    uint32_t   m_cost_unc_transaction;             // * cumulated duration for VCI UNC transactions
    uint32_t   m_cost_write_transaction;           // * cumulated duration for VCI WRITE transactions
    uint32_t   m_length_write_transaction;         // * cumulated length for VCI WRITE transactions

    uint32_t * m_cpt_icache_access; //[m_nb_icache]
    uint32_t * m_cpt_dcache_access; //[m_nb_dcache]
    uint32_t * m_cpt_dcache_hit_after_miss_read;  //[m_nb_dcache]
    uint32_t * m_cpt_dcache_hit_after_miss_write; //[m_nb_dcache]
    uint32_t * m_cpt_dcache_store_after_store; //[m_nb_dcache]
    uint32_t * m_cpt_icache_miss_victim_wait; //[m_nb_icache]
    uint32_t * m_cpt_dcache_miss_victim_wait; //[m_nb_dcache]

    uint32_t ** m_cpt_fsm_dcache;  //[m_nb_dcache]
    uint32_t ** m_cpt_fsm_icache;  //[m_nb_icache]
    uint32_t  * m_cpt_fsm_cmd;
    uint32_t  * m_cpt_fsm_rsp;
    uint32_t  * m_cpt_fsm_tgt;
    uint32_t  * m_cpt_fsm_cleanup;

    // Non blocking multi-cache
    typename iss_t::InstructionRequest  * ireq        ; //[m_nb_icache]
    typename iss_t::InstructionResponse * irsp        ; //[m_nb_icache]
    bool                                * ireq_cached ; //[m_nb_icache]
    uint32_t                            * ireq_num_cpu; //[m_nb_dcache]
    typename iss_t::DataRequest         * dreq        ; //[m_nb_dcache]
    typename iss_t::DataResponse        * drsp        ; //[m_nb_dcache]
    bool                                * dreq_cached ; //[m_nb_dcache]
    uint32_t                            * dreq_num_cpu; //[m_nb_dcache]

    const uint32_t m_num_cache_LSB;
    const uint32_t m_num_cache_MSB;
          addr_40  m_num_cache_LSB_mask;
          addr_40  m_num_cache_mask;

protected:
    SC_HAS_PROCESS(VciCcXCacheWrapperV4);

public:

    VciCcXCacheWrapperV4(
                       sc_module_name insname,
                       int proc_id,
                       const soclib::common::MappingTable &mtp,
                       const soclib::common::MappingTable &mtc,
                       const soclib::common::IntTab &initiator_index_rw,
                       const soclib::common::IntTab &initiator_index_c,
                       const soclib::common::IntTab &target_index,
                       size_t nb_cpu,
                       size_t nb_dcache,
                       size_t icache_ways,
                       size_t icache_sets,
                       size_t icache_words,
                       size_t dcache_ways,
                       size_t dcache_sets,
                       size_t dcache_words,
                       size_t wbuf_nwords,
                       size_t wbuf_nlines,
                       size_t wbuf_timeout,
                       write_policy_t write_policy=WRITE_THROUGH
                         );

    ~VciCcXCacheWrapperV4();

  void print_trace(size_t mode = 0);
  void print_cpi();
  void print_stats(bool print_wbuf=true, bool print_fsm=true);

  void stop_simulation (uint32_t);
  void log_transaction ( bool generate_file_icache
                        ,bool generate_file_dcache
                        ,bool generate_file_cmd
                        ,bool generate_file_tgt
                        ,bool generate_file_cleanup);

  void vhdl_testbench (bool generate_file_mwbuf);

private:

    void transition();
    void genMoore();

    uint32_t get_num_cache     (addr_40 & addr);
    uint32_t get_num_cache_only(addr_40   addr);
    void     set_num_cache     (addr_40 & addr, uint32_t num_cache);
    addr_40  set_num_cache_only(addr_40   addr, uint32_t num_cache);

    soclib_static_assert((int)iss_t::SC_ATOMIC     == (int)vci_param::STORE_COND_ATOMIC);
    soclib_static_assert((int)iss_t::SC_NOT_ATOMIC == (int)vci_param::STORE_COND_NOT_ATOMIC);
};

}}

#endif /* SOCLIB_CABA_VCI_CC_XCACHE_WRAPPER_V4_H */

// Local Variables:
// tab-width: 4
// c-basic-offset: 4
// c-file-offsets:((innamespace . 0)(inline-open . 0))
// indent-tabs-mode: nil
// End:

// vim: filetype=cpp:expandtab:shiftwidth=4:tabstop=4:softtabstop=4
