Changeset 645 for branches


Ignore:
Timestamp:
Feb 27, 2014, 12:32:14 PM (11 years ago)
Author:
devigne
Message:

Invalidation from memcache on a non-coherent line can cause a cleanup_data.
While sending this cleanup the processor can do a write on the same line.
However there is no guarantee that the memcache will receive transactions in
the correct order, ie it can handle writing before cleanup and therefore
overwrite the new value of writing that is more current than the data contained
in the cleanup.
To solve this problem we have block writes to ZOMBIE lines, they are unlocked
when the line becomes EMPTY.

Location:
branches/RWT/modules/vci_cc_vcache_wrapper/caba/source
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • branches/RWT/modules/vci_cc_vcache_wrapper/caba/source/include/vci_cc_vcache_wrapper.h

    r615 r645  
    541541    //bool                    *r_dcache_in_tlb;           // copy exist in dtlb or itlb
    542542    //bool                    *r_dcache_contains_ptd;     // cache line contains a PTD
    543     int                     *r_dcache_content_state;    // content state of one cache line
    544     int                     *r_dcache_dirty_word;    // content state of one cache line
     543    int                     *r_dcache_content_state; // content state of one cache line
     544    int                     *r_dcache_dirty_word;   
     545    bool                    *r_dcache_zombi_ncc;     
    545546    //////////////////////////////////////////////////////////////////////////////////////
    546547
     
    644645    // Activity counters
    645646    ////////////////////////////////
    646     uint32_t m_cpt_dcache_data_read;        // DCACHE DATA READ
    647     uint32_t m_cpt_dcache_data_write;       // DCACHE DATA WRITE
    648     uint32_t m_cpt_dcache_dir_read;         // DCACHE DIR READ
    649     uint32_t m_cpt_dcache_dir_write;        // DCACHE DIR WRITE
    650 
    651     uint32_t m_cpt_icache_data_read;        // ICACHE DATA READ
    652     uint32_t m_cpt_icache_data_write;       // ICACHE DATA WRITE
    653     uint32_t m_cpt_icache_dir_read;         // ICACHE DIR READ
    654     uint32_t m_cpt_icache_dir_write;        // ICACHE DIR WRITE
    655 
    656     uint32_t m_cpt_frz_cycles;              // number of cycles where the cpu is frozen
    657     uint32_t m_cpt_total_cycles;                // total number of cycles
     647    uint32_t m_cpt_dcache_data_read;           // DCACHE DATA READ
     648    uint32_t m_cpt_dcache_data_write;          // DCACHE DATA WRITE
     649    uint32_t m_cpt_dcache_dir_read;            // DCACHE DIR READ
     650    uint32_t m_cpt_dcache_dir_write;           // DCACHE DIR WRITE
     651
     652    uint32_t m_cpt_icache_data_read;           // ICACHE DATA READ
     653    uint32_t m_cpt_icache_data_write;          // ICACHE DATA WRITE
     654    uint32_t m_cpt_icache_dir_read;            // ICACHE DIR READ
     655    uint32_t m_cpt_icache_dir_write;           // ICACHE DIR WRITE
     656
     657    uint32_t m_cpt_frz_cycles;                 // number of cycles where the cpu is frozen
     658    uint32_t m_cpt_total_cycles;                   // total number of cycles
    658659
    659660    // Cache activity counters
    660     uint32_t m_cpt_data_read;               // total number of read data
    661     uint32_t m_cpt_data_write;              // total number of write data
     661    uint32_t m_cpt_data_read;                  // total number of read data
     662    uint32_t m_cpt_data_write;                 // total number of write data
    662663    uint32_t m_cpt_data_write_back;
    663664    uint32_t m_cpt_data_cleanup;
    664665    uint32_t m_cpt_data_sc;
    665     uint32_t m_cpt_data_miss;               // number of read miss
    666     uint32_t m_cpt_ins_miss;                // number of instruction miss
    667     uint32_t m_cpt_unc_read;                // number of read uncached
    668     uint32_t m_cpt_write_cached;            // number of cached write
    669     uint32_t m_cpt_ins_read;                // number of instruction read
    670     uint32_t m_cpt_ins_spc_miss;            // number of speculative instruction miss
    671 
    672     uint32_t m_cost_write_frz;              // number of frozen cycles related to write buffer
    673     uint32_t m_cost_data_miss_frz;          // number of frozen cycles related to data miss
    674     uint32_t m_cost_unc_read_frz;           // number of frozen cycles related to uncached read
    675     uint32_t m_cost_ins_miss_frz;           // number of frozen cycles related to ins miss
    676 
    677     uint32_t m_cpt_imiss_transaction;       // number of VCI instruction miss transactions
    678     uint32_t m_cpt_dmiss_transaction;       // number of VCI data miss transactions
    679     uint32_t m_cpt_unc_transaction;         // number of VCI uncached read transactions
    680     uint32_t m_cpt_dunc_transaction;         // number of VCI uncached read transactions
    681     uint32_t m_cpt_ll_transaction;         // number of VCI uncached read transactions
    682     uint32_t m_cpt_write_transaction;       // number of VCI write transactions
     666    uint32_t m_cpt_data_miss;                  // number of read miss
     667    uint32_t m_cpt_ins_miss;                   // number of instruction miss
     668    uint32_t m_cpt_unc_read;                   // number of read uncached
     669    uint32_t m_cpt_write_cached;               // number of cached write
     670    uint32_t m_cpt_ins_read;                   // number of instruction read
     671    uint32_t m_cpt_ins_spc_miss;               // number of speculative instruction miss
     672
     673    uint32_t m_cost_write_frz;                 // number of frozen cycles related to write buffer
     674    uint32_t m_cost_data_miss_frz;             // number of frozen cycles related to data miss
     675    uint32_t m_cost_unc_read_frz;              // number of frozen cycles related to uncached read
     676    uint32_t m_cost_ins_miss_frz;              // number of frozen cycles related to ins miss
     677
     678    uint32_t m_cpt_imiss_transaction;          // number of VCI instruction miss transactions
     679    uint32_t m_cpt_dmiss_transaction;          // number of VCI data miss transactions
     680    uint32_t m_cpt_unc_transaction;            // number of VCI uncached read transactions
     681    uint32_t m_cpt_dunc_transaction;           // number of VCI uncached read transactions
     682    uint32_t m_cpt_ll_transaction;             // number of VCI uncached read transactions
     683    uint32_t m_cpt_write_transaction;          // number of VCI write transactions
    683684    uint32_t m_cpt_icache_unc_transaction;
    684685
    685     uint32_t m_cost_imiss_transaction;      // cumulated duration for VCI IMISS transactions
    686     uint32_t m_cost_dmiss_transaction;      // cumulated duration for VCI DMISS transactions
    687     uint32_t m_cost_unc_transaction;        // cumulated duration for VCI UNC transactions
    688     uint32_t m_cost_write_transaction;      // cumulated duration for VCI WRITE transactions
    689     uint32_t m_cost_icache_unc_transaction; // cumulated duration for VCI IUNC transactions
    690     uint32_t m_length_write_transaction;    // cumulated length for VCI WRITE transactions
     686    uint32_t m_cost_imiss_transaction;         // cumulated duration for VCI IMISS transactions
     687    uint32_t m_cost_dmiss_transaction;         // cumulated duration for VCI DMISS transactions
     688    uint32_t m_cost_unc_transaction;           // cumulated duration for VCI UNC transactions
     689    uint32_t m_cost_write_transaction;         // cumulated duration for VCI WRITE transactions
     690    uint32_t m_cost_icache_unc_transaction;    // cumulated duration for VCI IUNC transactions
     691    uint32_t m_length_write_transaction;       // cumulated length for VCI WRITE transactions
    691692
    692693    // TLB activity counters
    693     uint32_t m_cpt_ins_tlb_read;            // number of instruction tlb read
    694     uint32_t m_cpt_ins_tlb_miss;            // number of instruction tlb miss
    695     uint32_t m_cpt_ins_tlb_update_acc;      // number of instruction tlb update
    696     uint32_t m_cpt_ins_tlb_occup_cache;     // number of instruction tlb occupy data cache line
    697     uint32_t m_cpt_ins_tlb_hit_dcache;      // number of instruction tlb hit in data cache
    698 
    699     uint32_t m_cpt_data_tlb_read;           // number of data tlb read
    700     uint32_t m_cpt_data_tlb_miss;           // number of data tlb miss
    701     uint32_t m_cpt_data_tlb_update_acc;     // number of data tlb update
    702     uint32_t m_cpt_data_tlb_update_dirty;   // number of data tlb update dirty
    703     uint32_t m_cpt_data_tlb_hit_dcache;     // number of data tlb hit in data cache
    704     uint32_t m_cpt_data_tlb_occup_cache;    // number of data tlb occupy data cache line
     694    uint32_t m_cpt_ins_tlb_read;               // number of instruction tlb read
     695    uint32_t m_cpt_ins_tlb_miss;               // number of instruction tlb miss
     696    uint32_t m_cpt_ins_tlb_update_acc;         // number of instruction tlb update
     697    uint32_t m_cpt_ins_tlb_occup_cache;        // number of instruction tlb occupy data cache line
     698    uint32_t m_cpt_ins_tlb_hit_dcache;         // number of instruction tlb hit in data cache
     699
     700    uint32_t m_cpt_data_tlb_read;              // number of data tlb read
     701    uint32_t m_cpt_data_tlb_miss;              // number of data tlb miss
     702    uint32_t m_cpt_data_tlb_update_acc;        // number of data tlb update
     703    uint32_t m_cpt_data_tlb_update_dirty;      // number of data tlb update dirty
     704    uint32_t m_cpt_data_tlb_hit_dcache;        // number of data tlb hit in data cache
     705    uint32_t m_cpt_data_tlb_occup_cache;       // number of data tlb occupy data cache line
    705706    uint32_t m_cpt_tlb_occup_dcache;
    706707
    707     uint32_t m_cost_ins_tlb_miss_frz;       // number of frozen cycles related to instruction tlb miss
    708     uint32_t m_cost_data_tlb_miss_frz;      // number of frozen cycles related to data tlb miss
     708    uint32_t m_cost_ins_tlb_miss_frz;          // number of frozen cycles related to instruction tlb miss
     709    uint32_t m_cost_data_tlb_miss_frz;         // number of frozen cycles related to data tlb miss
    709710    uint32_t m_cost_ins_tlb_update_acc_frz;    // number of frozen cycles related to instruction tlb update acc
    710711    uint32_t m_cost_data_tlb_update_acc_frz;   // number of frozen cycles related to data tlb update acc
     
    722723    uint32_t m_cpt_dtlb_sc_dirty_transaction;  // number of dtlb sc dirty transactions
    723724
    724     uint32_t m_cost_itlbmiss_transaction;       // cumulated duration for VCI instruction TLB miss transactions
    725     uint32_t m_cost_itlb_ll_transaction;        // cumulated duration for VCI instruction TLB ll acc transactions
    726     uint32_t m_cost_itlb_sc_transaction;        // cumulated duration for VCI instruction TLB sc acc transactions
    727     uint32_t m_cost_dtlbmiss_transaction;       // cumulated duration for VCI data TLB miss transactions
    728     uint32_t m_cost_dtlb_ll_transaction;        // cumulated duration for VCI data TLB ll acc transactions
    729     uint32_t m_cost_dtlb_sc_transaction;        // cumulated duration for VCI data TLB sc acc transactions
    730     uint32_t m_cost_dtlb_ll_dirty_transaction;  // cumulated duration for VCI data TLB ll dirty transactions
    731     uint32_t m_cost_dtlb_sc_dirty_transaction;  // cumulated duration for VCI data TLB sc dirty transactions
     725    uint32_t m_cost_itlbmiss_transaction;      // cumulated duration for VCI instruction TLB miss transactions
     726    uint32_t m_cost_itlb_ll_transaction;       // cumulated duration for VCI instruction TLB ll acc transactions
     727    uint32_t m_cost_itlb_sc_transaction;       // cumulated duration for VCI instruction TLB sc acc transactions
     728    uint32_t m_cost_dtlbmiss_transaction;      // cumulated duration for VCI data TLB miss transactions
     729    uint32_t m_cost_dtlb_ll_transaction;       // cumulated duration for VCI data TLB ll acc transactions
     730    uint32_t m_cost_dtlb_sc_transaction;       // cumulated duration for VCI data TLB sc acc transactions
     731    uint32_t m_cost_dtlb_ll_dirty_transaction; // cumulated duration for VCI data TLB ll dirty transactions
     732    uint32_t m_cost_dtlb_sc_dirty_transaction; // cumulated duration for VCI data TLB sc dirty transactions
    732733
    733734    // coherence activity counters
    734     uint32_t m_cpt_cc_update_icache;            // number of coherence update instruction commands
    735     uint32_t m_cpt_cc_update_dcache;            // number of coherence update data commands
    736     uint32_t m_cpt_cc_inval_icache;             // number of coherence inval instruction commands
    737     uint32_t m_cpt_cc_inval_dcache;             // number of coherence inval data commands
    738     uint32_t m_cpt_cc_broadcast;                // number of coherence broadcast commands
    739 
    740     uint32_t m_cost_updt_data_frz;              // number of frozen cycles related to coherence update data packets
    741     uint32_t m_cost_inval_ins_frz;              // number of frozen cycles related to coherence inval instruction packets
    742     uint32_t m_cost_inval_data_frz;             // number of frozen cycles related to coherence inval data packets
    743     uint32_t m_cost_broadcast_frz;              // number of frozen cycles related to coherence broadcast packets
    744 
    745     uint32_t m_cpt_cc_cleanup_ins;              // number of coherence cleanup packets
    746     uint32_t m_cpt_cc_cleanup_data;             // number of coherence cleanup packets
    747     uint32_t m_cpt_cleanup_data_not_dirty;
    748     uint32_t m_cpt_cleanup_data_dirty_word;
    749 
    750     uint32_t m_cpt_icleanup_transaction;        // number of instruction cleanup transactions
    751     uint32_t m_cpt_dcleanup_transaction;        // number of instructinumber of data cleanup transactions
    752     uint32_t m_cost_icleanup_transaction;       // cumulated duration for VCI instruction cleanup transactions
    753     uint32_t m_cost_dcleanup_transaction;       // cumulated duration for VCI data cleanup transactions
    754 
    755     uint32_t m_cost_ins_tlb_inval_frz;      // number of frozen cycles related to checking ins tlb invalidate
    756     uint32_t m_cpt_ins_tlb_inval;           // number of ins tlb invalidate
    757 
    758     uint32_t m_cost_data_tlb_inval_frz;     // number of frozen cycles related to checking data tlb invalidate
    759     uint32_t m_cpt_data_tlb_inval;          // number of data tlb invalidate
     735    uint32_t m_cpt_cc_update_icache;           // number of coherence update instruction commands
     736    uint32_t m_cpt_cc_update_dcache;           // number of coherence update data commands
     737    uint32_t m_cpt_cc_inval_icache;            // number of coherence inval instruction commands
     738    uint32_t m_cpt_cc_inval_dcache;            // number of coherence inval data commands
     739    uint32_t m_cpt_cc_broadcast;               // number of coherence broadcast commands
     740
     741    uint32_t m_cost_updt_data_frz;             // number of frozen cycles related to coherence update data packets
     742    uint32_t m_cost_inval_ins_frz;             // number of frozen cycles related to coherence inval instruction packets
     743    uint32_t m_cost_inval_data_frz;            // number of frozen cycles related to coherence inval data packets
     744    uint32_t m_cost_broadcast_frz;             // number of frozen cycles related to coherence broadcast packets
     745
     746    uint32_t m_cpt_cc_cleanup_ins;             // number of coherence cleanup packets
     747    uint32_t m_cpt_cc_cleanup_data;            // number of coherence cleanup packets
     748    uint32_t m_cpt_cleanup_data_not_dirty;     // number of total cleanup data without extra data flits
     749    uint32_t m_cpt_cleanup_data_dirty_word;    // number of total words dirty in cleanup data
     750    uint32_t m_cpt_data_write_miss;            // number of total write miss
     751    uint32_t m_cpt_data_write_on_zombi;        // number of frozen cycles related to blocked write on line NCC/CC ZOMBI
     752    uint32_t m_cpt_data_write_on_zombi_ncc;    // number of frozen cycles related to blocked write on line NCC ZOMBI
     753
     754    uint32_t m_cpt_icleanup_transaction;       // number of instruction cleanup transactions
     755    uint32_t m_cpt_dcleanup_transaction;       // number of instructinumber of data cleanup transactions
     756    uint32_t m_cost_icleanup_transaction;      // cumulated duration for VCI instruction cleanup transactions
     757    uint32_t m_cost_dcleanup_transaction;      // cumulated duration for VCI data cleanup transactions
     758
     759    uint32_t m_cost_ins_tlb_inval_frz;         // number of frozen cycles related to checking ins tlb invalidate
     760    uint32_t m_cpt_ins_tlb_inval;              // number of ins tlb invalidate
     761
     762    uint32_t m_cost_data_tlb_inval_frz;        // number of frozen cycles related to checking data tlb invalidate
     763    uint32_t m_cpt_data_tlb_inval;             // number of data tlb invalidate
    760764
    761765    // FSM activity counters
  • branches/RWT/modules/vci_cc_vcache_wrapper/caba/source/src/vci_cc_vcache_wrapper.cpp

    r615 r645  
    441441    //r_dcache_in_tlb        = new bool[dcache_ways*dcache_sets];
    442442    //r_dcache_contains_ptd  = new bool[dcache_ways*dcache_sets];
    443     r_dcache_content_state = new int [dcache_ways*dcache_sets];
    444     r_dcache_dirty_word    = new int [dcache_ways*dcache_sets*dcache_words];
     443    r_dcache_content_state = new int  [dcache_ways*dcache_sets];
     444    r_dcache_dirty_word    = new int  [dcache_ways*dcache_sets*dcache_words];
     445    r_dcache_zombi_ncc     = new bool [dcache_ways*dcache_sets];
    445446    ///////////////////////////////////////////////////////////
    446447
     
    474475    delete [] r_dcache_content_state;
    475476    delete [] r_dcache_dirty_word;
     477    delete [] r_dcache_zombi_ncc;
    476478    /////////////////////////////////
     479    print_stats();
    477480}
    478481
     
    642645        << "- DUNC TRANSACTION        = " << m_cpt_dunc_transaction << std::endl
    643646        << "- LL TRANSACTION          = " << m_cpt_ll_transaction << std::endl
     647        << "- WRITE DATA MISS         = " << m_cpt_data_write_miss << std::endl
     648        << "- WRITE DATA ON ZOMBI     = " << m_cpt_data_write_on_zombi << std::endl
     649        << "- WRITE DATA ON ZOMBI NCC = " << m_cpt_data_write_on_zombi_ncc << std::endl
    644650        << "- CLEANUP DATA NOT DIRTY  = " << m_cpt_cleanup_data_not_dirty << std::endl
    645651        << "- CLEANUP DATA DIRTY WORD = " << m_cpt_cleanup_data_dirty_word << std::endl;
     
    743749    m_cpt_cleanup_data_not_dirty  = 0;
    744750    m_cpt_cleanup_data_dirty_word = 0;
     751    m_cpt_data_write_miss = 0;
     752    m_cpt_data_write_on_zombi = 0;
     753    m_cpt_data_write_on_zombi_ncc = 0;
    745754
    746755}
     
    778787            //r_dcache_contains_ptd[i]  = false;
    779788            r_dcache_content_state[i] = LINE_CACHE_DATA_NOT_DIRTY;
    780             r_dcache_dirty_word[i] = 0;
     789            r_dcache_dirty_word[i]    = 0;
     790            r_dcache_zombi_ncc[i]     = false;
    781791        }
    782792
     
    943953        m_cpt_cleanup_data_not_dirty  = 0;
    944954        m_cpt_cleanup_data_dirty_word = 0;
     955        m_cpt_data_write_miss = 0;
     956        m_cpt_data_write_on_zombi = 0;
     957        m_cpt_data_write_on_zombi_ncc = 0;
    945958
    946959        m_cpt_itlbmiss_transaction      = 0;
     
    22932306
    22942307
    2295         // physical address computation : systematic DTLB access if activated)
     2308        // physical address computation : systematic DTLB access (if activated)
    22962309        if ( m_dreq.valid )
    22972310        {
    2298 
    22992311
    23002312            if ( r_mmu_mode.read() & DATA_TLB_MASK )  // DTLB activated
     
    29302942                            // response to processor
    29312943                            m_drsp.valid        = true;
     2944
     2945
    29322946                            // activating P1 stage
    2933                             if( (cache_state != CACHE_SLOT_STATE_ZOMBI )&&(cache_state != CACHE_SLOT_STATE_EMPTY )&&(cacheable) )
     2947                            if( (cache_state != CACHE_SLOT_STATE_ZOMBI) && (cache_state != CACHE_SLOT_STATE_EMPTY) && (cacheable) )
    29342948                            {
    29352949                                wbuf_request = (cache_state == CACHE_SLOT_STATE_VALID_CC); //write to L2 only if CC
    29362950                                updt_request = true;
    2937                                 if (cache_state == CACHE_SLOT_STATE_VALID_NCC)
     2951                                if ( cache_state == CACHE_SLOT_STATE_VALID_NCC )
    29382952                                {
    2939                                     if (r_dcache_content_state[cache_way*m_dcache_sets+cache_set] == LINE_CACHE_DATA_NOT_DIRTY)
     2953                                    if ( r_dcache_content_state[cache_way*m_dcache_sets+cache_set] == LINE_CACHE_DATA_NOT_DIRTY )
    29402954                                    {
    29412955                                        r_dcache_content_state[cache_way*m_dcache_sets+cache_set] = LINE_CACHE_DATA_DIRTY;
    29422956                                    }
    2943                                     r_dcache_dirty_word[(cache_way*m_dcache_sets +cache_set)*m_dcache_words+cache_word] = 1;//dirty bit with word granularity (only for stats)
     2957                                    //dirty bit with word granularity (only for stats)
     2958                                    r_dcache_dirty_word[(cache_way*m_dcache_sets +cache_set)*m_dcache_words+cache_word] = 1;
    29442959                                    m_cpt_data_write_back ++;
    29452960                                }
    29462961                            }
     2962                            // We stall proc if a write request is on line ZOMBI
     2963                            // Invalidation from memcache on a non-coherent
     2964                            // line can cause cleanup_data. While sending this
     2965                            // cleanup the processor can do a write on the same
     2966                            // line. However there is no guarantee that the
     2967                            // memcache will receive transactions in the
     2968                            // correct order, ie it can handle writing before
     2969                            // cleanup and therefore overwrite the new value of
     2970                            // writing that is more current than the data
     2971                            // contained in the cleanup .
     2972                            // TODO : MAYBE NEED TO OPTIMIZE
     2973                            else if ( cache_state == CACHE_SLOT_STATE_ZOMBI )
     2974                            {
     2975                                m_drsp.valid = false;
     2976                                r_dcache_fsm = DCACHE_IDLE;
     2977                                // STAT : WRITE ON ZOMBI NCC LINE
     2978                                if (r_dcache_zombi_ncc[cache_way*m_dcache_sets+cache_set] == true)
     2979                                {
     2980                                    m_cpt_data_write_on_zombi_ncc++;
     2981                                }
     2982
     2983                                m_cpt_data_write_on_zombi++;
     2984                            }
    29472985                            else
    29482986                            {
     2987                               if ( cacheable ) m_cpt_data_write_miss++;
     2988                               
    29492989                               wbuf_request = true;
    29502990                               updt_request = false;
     
    40864126m_cpt_dcache_dir_write++;
    40874127#endif
     4128
     4129
     4130
    40884131        r_dcache.write_dir( way,
    40894132                            set,
     
    48184861
    48194862                    r_dcache_content_state[way*m_dcache_sets+set] = LINE_CACHE_DATA_NOT_DIRTY;
     4863
    48204864                }
    48214865                else
     
    48304874                        r_dcache_dirty_word[(way*m_dcache_sets +set)*m_dcache_words+word] = 0;
    48314875                    }
     4876
     4877                    // STAT : WRITE ON ZOMBI NCC LINE
     4878                    r_dcache_zombi_ncc[r_dcache_miss_way.read()*m_dcache_sets+r_dcache_miss_set.read()] = true;
    48324879                }
    48334880
     
    51875234                                r_dcache_clack_set.read(),
    51885235                                CACHE_SLOT_STATE_EMPTY);
     5236
     5237            // STAT : WRITE ON ZOMBI NCC LINE
     5238            r_dcache_zombi_ncc[r_dcache_clack_way.read()*m_dcache_sets+r_dcache_clack_set.read()] = false;
    51895239
    51905240            if ( (r_dcache_miss_set.read() == r_dcache_clack_set.read()) and
Note: See TracChangeset for help on using the changeset viewer.