Changeset 206 for trunk/modules/vci_cc_vcache_wrapper_v4
- Timestamp:
- Mar 14, 2012, 10:22:45 PM (13 years ago)
- Location:
- trunk/modules/vci_cc_vcache_wrapper_v4/caba/source
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/modules/vci_cc_vcache_wrapper_v4/caba/source/include/vci_cc_vcache_wrapper_v4.h
r205 r206 116 116 DCACHE_XTN_DC_INVAL_GO, 117 117 DCACHE_XTN_DT_INVAL, 118 //handling long write (set dirty bit) 119 DCACHE_DIRTY_TLB_SET, 120 DCACHE_DIRTY_CACHE_SET, 118 //handling dirty bit update 119 DCACHE_DIRTY_GET_PTE, 121 120 DCACHE_DIRTY_SC_WAIT, 122 DCACHE_DIRTY_UNC_WAIT,123 121 // handling processor miss requests 124 122 DCACHE_MISS_VICTIM, 125 123 DCACHE_MISS_INVAL, 126 DCACHE_MISS_INVAL_WAIT,127 124 DCACHE_MISS_WAIT, 128 125 DCACHE_MISS_UPDT, … … 274 271 // debug variables (for each FSM) 275 272 ///////////////////////////////////////////// 276 uint32_t 277 bool 278 bool 279 bool 280 bool 281 bool 282 bool 283 bool 273 uint32_t m_debug_start_cycle; 274 bool m_debug_ok; 275 bool m_debug_previous_hit; 276 bool m_debug_dcache_fsm; 277 bool m_debug_icache_fsm; 278 bool m_debug_cleanup_fsm; 279 bool m_debug_inval_itlb_fsm; 280 bool m_debug_inval_dtlb_fsm; 284 281 285 282 /////////////////////////////// … … 344 341 sc_signal<paddr_t> r_dcache_p0_paddr; // physical address 345 342 sc_signal<bool> r_dcache_p0_cacheable; // address cacheable 346 sc_signal<size_t> r_dcache_p0_tlb_way; // selected way (from dtlb)347 sc_signal<size_t> r_dcache_p0_tlb_set; // selected set (from dtlb)348 sc_signal<paddr_t> r_dcache_p0_tlb_nline; // nline value (from dtlb)349 sc_signal<bool> r_dcache_p0_tlb_dirty; // dirty bit (from dtlb)350 sc_signal<bool> r_dcache_p0_tlb_big; // big page bit (from dtlb)351 343 // registers written in P1 stage (used in P2 stage) 352 344 sc_signal<bool> r_dcache_p1_valid; // P2 pipeline stage must be executed 353 sc_signal<bool> r_dcache_p1_updt_cache; // dcache must be updated354 sc_signal<bool> r_dcache_p1_set_dirty; // PTE dirty bit must be set355 sc_signal<uint32_t> r_dcache_p1_vaddr; // virtual address (from proc)356 345 sc_signal<uint32_t> r_dcache_p1_wdata; // write data (from proc) 357 346 sc_signal<vci_be_t> r_dcache_p1_be; // byte enable (from proc) … … 360 349 sc_signal<size_t> r_dcache_p1_cache_set; // selected set (from dcache) 361 350 sc_signal<size_t> r_dcache_p1_cache_word; // selected word (from dcache) 362 sc_signal<size_t> r_dcache_p1_tlb_way; // selected way (from dtlb) 363 sc_signal<size_t> r_dcache_p1_tlb_set; // selected set (from dtlb) 364 sc_signal<paddr_t> r_dcache_p1_tlb_nline; // nline value (from dtlb) 365 sc_signal<bool> r_dcache_p1_tlb_big; // big page bit (from dtlb) 366 // registers written in P2 stage (used in long write) 367 sc_signal<size_t> r_dcache_p2_way; // selected way in dtlb or dcache 368 sc_signal<size_t> r_dcache_p2_set; // selected set in dtlb or dcache 369 sc_signal<size_t> r_dcache_p2_word; // selected word in dcache 370 sc_signal<paddr_t> r_dcache_p2_pte_paddr; // PTE physical address 371 sc_signal<size_t> r_dcache_p2_pte_value; // PTE value 372 sc_signal<bool> r_dcache_p2_type_sc; // request type (WRITE or SC) 373 sc_signal<bool> r_dcache_p2_sc_success; // successful SC request 374 351 // registers used by the Dirty bit sub-fsm 352 sc_signal<paddr_t> r_dcache_dirty_paddr; // PTE physical address 353 sc_signal<size_t> r_dcache_dirty_way; // way to invalidate in dcache 354 sc_signal<size_t> r_dcache_dirty_set; // set to invalidate in dcache 355 375 356 // communication between DCACHE FSM and VCI_CMD FSM 376 357 sc_signal<paddr_t> r_dcache_vci_paddr; // physical address for VCI command … … 422 403 423 404 // ITLB and DTLB invalidation 424 sc_signal<bool> r_dcache_itlb_inval_req; // inval request for itlb425 sc_signal<bool> r_dcache_dtlb_inval_req; // inval request for dtlb426 405 sc_signal<paddr_t> r_dcache_tlb_inval_line; // line index 427 406 sc_signal<size_t> r_dcache_tlb_inval_count; // tlb entry counter … … 436 415 437 416 // dcache directory extension 438 bool *r_dcache_in_ itlb; // copy of dcache line initlb439 bool *r_dcache_ in_dtlb; // copy of dcache line in dtlb417 bool *r_dcache_in_tlb; // copy exist in dtlb or itlb 418 bool *r_dcache_contains_ptd; // cache line contains a PTD 440 419 441 420 /////////////////////////////////// -
trunk/modules/vci_cc_vcache_wrapper_v4/caba/source/src/vci_cc_vcache_wrapper_v4.cpp
r205 r206 88 88 "DCACHE_XTN_DT_INVAL", 89 89 90 "DCACHE_DIRTY_TLB_SET", 91 "DCACHE_DIRTY_CACHE_SET", 90 "DCACHE_DIRTY_PTE_GET", 92 91 "DCACHE_DIRTY_SC_WAIT", 93 "DCACHE_DIRTY_UNC_WAIT",94 92 95 93 "DCACHE_MISS_VICTIM", 96 94 "DCACHE_MISS_INVAL", 97 "DCACHE_MISS_INVAL_WAIT",98 95 "DCACHE_MISS_WAIT", 99 96 "DCACHE_MISS_UPDT", … … 253 250 r_dcache_p0_paddr("r_dcache_p0_paddr"), 254 251 r_dcache_p0_cacheable("r_dcache_p0_cacheable"), 255 r_dcache_p0_tlb_way("r_dcache_p0_tlb_way"),256 r_dcache_p0_tlb_set("r_dcache_p0_tlb_set"),257 r_dcache_p0_tlb_nline("r_dcache_p0_tlb_nline"),258 r_dcache_p0_tlb_dirty("r_dcache_p0_tlb_dirty"),259 r_dcache_p0_tlb_big("r_dcache_p0_tlb_big"),260 252 261 253 r_dcache_p1_valid("r_dcache_p1_valid"), 262 r_dcache_p1_updt_cache("r_dcache_p1_updt_cache"),263 r_dcache_p1_set_dirty("r_dcache_p1_set_dirty"),264 r_dcache_p1_vaddr("r_dcache_p1_vaddr"),265 254 r_dcache_p1_wdata("r_dcache_p1_wdata"), 266 255 r_dcache_p1_be("r_dcache_p1_be"), … … 269 258 r_dcache_p1_cache_set("r_dcache_p1_cache_set"), 270 259 r_dcache_p1_cache_word("r_dcache_p1_word_save"), 271 r_dcache_p1_tlb_way("r_dcache_p1_tlb_way"), 272 r_dcache_p1_tlb_set("r_dcache_p1_tlb_set"), 273 r_dcache_p1_tlb_nline("r_dcache_p1_tlb_nline"), 274 r_dcache_p1_tlb_big("r_dcache_p1_tlb_big"), 275 276 r_dcache_p2_way("r_dcache_p2_way"), 277 r_dcache_p2_set("r_dcache_p2_set"), 278 r_dcache_p2_word("r_dcache_p2_word"), 279 r_dcache_p2_pte_paddr("r_dcache_p2_pte_paddr"), 280 r_dcache_p2_pte_value("r_dcache_p2_pte_value"), 281 r_dcache_p2_type_sc("r_dcache_p2_type_sc"), 282 r_dcache_p2_sc_success("r_dcache_p2_sc_success"), 260 261 r_dcache_dirty_paddr("r_dcache_dirty_paddr"), 262 r_dcache_dirty_way("r_dcache_dirty_way"), 263 r_dcache_dirty_set("r_dcache_dirty_set"), 283 264 284 265 r_dcache_vci_paddr("r_dcache_vci_paddr"), … … 321 302 r_dcache_ll_vaddr("r_dcache_ll_vaddr"), 322 303 323 r_dcache_itlb_inval_req("r_dcache_itlb_inval_req"),324 r_dcache_dtlb_inval_req("r_dcache_dtlb_inval_req"),325 304 r_dcache_tlb_inval_line("r_dcache_tlb_inval_line"), 326 305 r_dcache_tlb_inval_count("r_dcache_tlb_inval_count"), … … 395 374 r_mmu_release = (uint32_t)(1 << 16) | 0x1; 396 375 397 r_tgt_buf = new uint32_t[dcache_words];398 r_tgt_be = new vci_be_t[dcache_words];399 r_dcache_in_ itlb = new bool[dcache_ways*dcache_sets];400 r_dcache_ in_dtlb= new bool[dcache_ways*dcache_sets];376 r_tgt_buf = new uint32_t[dcache_words]; 377 r_tgt_be = new vci_be_t[dcache_words]; 378 r_dcache_in_tlb = new bool[dcache_ways*dcache_sets]; 379 r_dcache_contains_ptd = new bool[dcache_ways*dcache_sets]; 401 380 402 381 SC_METHOD(transition); … … 425 404 delete [] r_tgt_be; 426 405 delete [] r_tgt_buf; 427 delete [] r_dcache_in_ itlb;428 delete [] r_dcache_ in_dtlb;406 delete [] r_dcache_in_tlb; 407 delete [] r_dcache_contains_ptd; 429 408 } 430 409 … … 682 661 r_cleanup_fsm = CLEANUP_DATA_IDLE; 683 662 684 std::memset(r_dcache_in_itlb, 0, sizeof(*r_dcache_in_itlb)*m_icache_ways*m_icache_sets); 685 std::memset(r_dcache_in_dtlb, 0, sizeof(*r_dcache_in_dtlb)*m_dcache_ways*m_dcache_sets); 686 687 663 // reset dcache directory extension 664 for (size_t i=0 ; i< m_dcache_ways*m_dcache_sets ; i++) 665 { 666 r_dcache_in_tlb[i] = false; 667 r_dcache_contains_ptd[i] = false; 668 } 669 688 670 // Response FIFOs and cleanup buffer 689 671 r_vci_rsp_fifo_icache.init(); … … 718 700 // No LL reservation 719 701 r_dcache_ll_valid = false; 720 721 // No TLB inval requestis722 r_dcache_itlb_inval_req = false;723 r_dcache_dtlb_inval_req = false;724 702 725 703 // No processor XTN request pending … … 1223 1201 //////////////////////////////////////////////////////////////////////////////////////// 1224 1202 1225 // default value for m_irsp.valid 1226 m_irsp.valid = false; 1203 // default value for m_irsp 1204 m_irsp.valid = false; 1205 m_irsp.error = false; 1206 m_irsp.instruction = 0; 1227 1207 1228 1208 switch( r_icache_fsm.read() ) … … 1898 1878 // - The registration in wbuf and the dcache hit are computed in stage P1. 1899 1879 // - The dcache update is done in stage P2. 1900 // A write operation can require a "long write" operation (if the PTE dirty bit 1901 // must be updated) handled by a dedicated sub-fsm (DCACHE_DIRTY_TLB_SET state). 1902 // If a PTE is modified, the both te itlb and dtlb are selectively, but sequencially 1880 // WRITE or SC requests can require a PTE Dirty bit update (in memory), 1881 // that is done (before handling the processor request) by a dedicated sub-fsm 1882 // (DCACHE_DIRTY_TLB_SET state). 1883 // If a PTE is modified, both the itlb and dtlb are selectively, but sequencially 1903 1884 // cleared by a dedicated sub_fsm (DCACHE_INVAL_TLB_SCAN state). 1904 1885 // If there is no write in the pipe, dcache and dtlb are accessed in parallel, 1905 1886 // (virtual address for itlb, and speculative physical address computed during 1906 // previous cycle for dcache) in order to return the data in one cycle for a read.1907 // We just pay an extra cycle when the speculative access is failing.1887 // previous cycle for dcache) in order to return the data in one cycle for a READ 1888 // request. We just pay an extra cycle when the speculative access is failing. 1908 1889 // 1909 1890 // 4/ Atomic instructions LL/SC … … 1919 1900 // The cache is not updated, as this is done in case of success by the 1920 1901 // coherence transaction. 1921 // If rqired, the dirty bit is updated in PTE by a "long write".1922 1902 // 1923 1903 // 5/ Non cacheable access: … … 1944 1924 //////////////////////////////////////////////////////////////////////////////////////// 1945 1925 1946 // default value for m_drsp .valid1926 // default value for m_drsp 1947 1927 m_drsp.valid = false; 1928 m_drsp.error = false; 1929 m_drsp.rdata = 0; 1948 1930 1949 1931 switch ( r_dcache_fsm.read() ) 1950 1932 { 1951 1933 case DCACHE_IDLE: // There is 8 conditions to exit the IDLE state : 1952 // 1) Long write request (DCACHE FSM) => DCACHE_DIRTY_***1934 // 1) Dirty bit update (processor) => DCACHE_DIRTY_GET_PTE 1953 1935 // 2) Coherence request (TGT FSM) => DCACHE_CC_CHECK 1954 1936 // 3) ITLB miss request (ICACHE FSM) => DCACHE_TLB_MISS … … 1956 1938 // 5) DTLB miss (processor) => DCACHE_TLB_MISS 1957 1939 // 6) Cacheable read miss (processor) => DCACHE_MISS_VICTIM 1958 // 7) Uncacheable read (processor) => DCACHE_UNC_REQ 1959 // 8) SC access (processor) => DCACHE_SC 1940 // 7) Uncacheable read (processor) => DCACHE_UNC_WAIT 1941 // 8) SC access (processor) => DCACHE_SC_WAIT 1942 // 1943 // The dtlb is unconditionally accessed to translate the 1944 // virtual adress from processor. 1945 // 1960 1946 // There is 4 configurations to access the cache, 1961 1947 // depending on the pipe-line state, defined … … 1971 1957 // Handling P2 pipe-line stage 1972 1958 // Inputs are r_dcache_p1_* registers. 1973 // Three actions are executed in this P2 stage: 1974 // - If r_dcache_p1_updt_cache is true, we update the local copy in dcache. 1975 // - If the modified cache line has copies in TLBs, we launch a TLB invalidate 1976 // operation, that is blocking for the processor, because we switch to 1977 // DCACHE_INVAL_TLB 1978 // - If the PTE dirty bit must be updated, we start a "long write", that is 1979 // blocking for the processor, because we switch to the DCACHE_DIRTY_SET_DIRTY 1980 1981 bool long_write_set_dirty = false; // default value 1982 bool tlb_inval_required = false; // default value 1959 // If r_dcache_p1_valid is true, we update the local copy in dcache. 1960 // If the modified cache line has copies in TLBs, we launch a TLB invalidate 1961 // operation, going to DCACHE_INVAL_TLB_SCAN state. 1962 1963 bool tlb_inval_required = false; 1983 1964 1984 1965 if ( r_dcache_p1_valid.read() ) // P2 stage activated 1985 1966 { 1986 bool cache_updt = r_dcache_p1_updt_cache.read();1987 1967 size_t way = r_dcache_p1_cache_way.read(); 1988 1968 size_t set = r_dcache_p1_cache_set.read(); … … 1991 1971 vci_be_t be = r_dcache_p1_be.read(); 1992 1972 1993 // update dcache if required 1994 if ( cache_updt ) 1995 { 1996 r_dcache.write( way, 1997 set, 1998 word, 1999 wdata, 2000 be ); 1973 r_dcache.write( way, 1974 set, 1975 word, 1976 wdata, 1977 be ); 2001 1978 #ifdef INSTRUMENTATION 2002 1979 m_cpt_dcache_data_write++; 2003 1980 #endif 2004 // cache update can require itlb & dtlb inval 2005 bool inval_itlb = false; 2006 bool inval_dtlb = false; 2007 2008 if ( (r_mmu_mode.read() & DATA_TLB_MASK) and r_dcache_in_dtlb[way*m_dcache_sets+set] ) 2009 { 2010 r_dcache_in_dtlb[way*m_dcache_sets+set] = false; 2011 inval_dtlb = true; 2012 } 2013 if ( (r_mmu_mode.read() & INS_TLB_MASK) and r_dcache_in_itlb[m_dcache_sets*way+set] ) 2014 { 2015 r_dcache_in_itlb[way*m_dcache_sets+set] = false; 2016 inval_itlb = true; 2017 } 2018 if ( inval_itlb or inval_dtlb ) 2019 { 2020 tlb_inval_required = true; 2021 r_dcache_itlb_inval_req = inval_itlb; 2022 r_dcache_dtlb_inval_req = inval_dtlb; 2023 r_dcache_tlb_inval_count = 0; 2024 r_dcache_tlb_inval_line = r_dcache_p1_paddr.read()>>(uint32_log2(m_dcache_words<<2)); 2025 } 2026 } // end dcache update 2027 2028 // checking if dirty bit update is required 2029 if ( r_dcache_p1_set_dirty.read() ) 2030 { 2031 long_write_set_dirty = true; 2032 r_dcache_p2_way = r_dcache_p1_tlb_way.read(); 2033 r_dcache_p2_set = r_dcache_p1_tlb_set.read(); 2034 // The PTE physical address is the concatenation of the nline value (from dtlb), 2035 // with the word index (obtained from the proper bits of the virtual address) 2036 if ( r_dcache_p1_tlb_big.read() ) // PTE1 2037 { 2038 r_dcache_p2_pte_paddr = (paddr_t)(r_dcache_p1_tlb_nline.read()*(m_dcache_words<<2)) | 2039 (paddr_t)((r_dcache_p1_vaddr.read()>>19) & 0x3c); 2040 } 2041 else // PTE2 2042 { 2043 r_dcache_p2_pte_paddr = (paddr_t)(r_dcache_p1_tlb_nline.read()*(m_dcache_words<<2)) | 2044 (paddr_t)((r_dcache_p1_vaddr.read()>>9) & 0x38); 2045 } 1981 // cache update after a WRITE hit can require itlb & dtlb inval or flush 1982 if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) 1983 { 1984 tlb_inval_required = true; 1985 r_dcache_tlb_inval_count = 0; 1986 r_dcache_tlb_inval_line = r_dcache_p1_paddr.read()>>(uint32_log2(m_dcache_words<<2)); 1987 r_dcache_in_tlb[way*m_dcache_sets+set] = false; 1988 } 1989 else if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) 1990 { 1991 r_itlb.reset(); 1992 r_dtlb.reset(); 1993 r_dcache_contains_ptd[way*m_dcache_sets+set] = false; 2046 1994 } 2047 1995 … … 2049 1997 if ( m_debug_dcache_fsm ) 2050 1998 { 2051 if ( cache_updt ) 2052 std::cout << " <PROC.DCACHE_IDLE> P2 stage: cache update" << std::dec 2053 << " / way = " << way 2054 << " / set = " << set 2055 << " / word = " << word << std::hex 2056 << " / wdata = " << wdata 2057 << " / be = " << be << std::endl; 2058 if ( long_write_set_dirty ) 2059 std::cout << " <PROC.DCACHE_IDLE> P2 stage: dirty bit update required" 2060 << " / pte_paddr = " << std::hex << r_dcache_p2_pte_paddr.read() << std::endl; 1999 std::cout << " <PROC.DCACHE_IDLE> Cache update in P2 stage" << std::dec 2000 << " / WAY = " << way 2001 << " / SET = " << set 2002 << " / WORD = " << word << std::hex 2003 << " / DATA = " << wdata 2004 << " / BE = " << be << std::endl; 2061 2005 } 2062 2006 #endif … … 2066 2010 // Handling P1 pipe-line stage 2067 2011 // Inputs are r_dcache_p0_* registers. 2068 // - We must write into wbuf and test the hit in dcache. 2069 // If the write request is not cacheable, and there is a pending 2070 // non cacheable write, or if the write buffer is full, we break: 2071 // The P0 and P1 pipe-line stages are frozen until the write 2072 // request registration is possible, but the P2 stage is not frozen. 2073 // - The r_dcache_p1_valid bit activating the P2 pipe-line stage 2074 // must be computed at all cycles. The P2 stage must be activated 2075 // if there is local copy in dcache, or if the PTE dirty bit must be set. 2076 2077 if ( r_dcache_p0_valid.read() and not tlb_inval_required ) // P1 stage activated 2012 // We must write into wbuf and test the hit in dcache. 2013 // If the write request is non cacheable, and there is a pending 2014 // non cacheable write, or if the write buffer is full, we break, 2015 // because the P0 and P1 pipe-line stages are frozen until the write 2016 // request registration is possible, but he P2 stage is not frozen. 2017 // The r_dcache_p1_valid bit must be computed at all cycles, and 2018 // the P2 stage must be activated if there is local copy in dcache. 2019 2020 if ( r_dcache_p0_valid.read() ) // P1 stage activated 2078 2021 { 2079 2022 // write not cacheable, and previous non cacheable write registered … … 2101 2044 r_dcache_pending_unc_write = not r_dcache_p0_cacheable.read(); 2102 2045 2103 // read directory to detectlocal copy2046 // read directory to check local copy 2104 2047 size_t cache_way; 2105 2048 size_t cache_set; … … 2121 2064 } 2122 2065 2123 // dirty bit update requested 2124 bool dirty_req = (r_mmu_mode.read() & DATA_TLB_MASK) and not r_dcache_p0_tlb_dirty.read(); 2125 2126 // if there is a local copy or a dirty bit update requested 2127 if ( local_copy or dirty_req ) 2066 // store values for P2 pipe stage 2067 if ( local_copy ) 2128 2068 { 2129 2069 r_dcache_p1_valid = true; 2130 r_dcache_p1_set_dirty = dirty_req;2131 r_dcache_p1_updt_cache = local_copy;2132 r_dcache_p1_vaddr = r_dcache_p0_vaddr.read();2133 2070 r_dcache_p1_wdata = r_dcache_p0_wdata.read(); 2134 2071 r_dcache_p1_be = r_dcache_p0_be.read(); 2135 2072 r_dcache_p1_paddr = r_dcache_p0_paddr.read(); 2136 r_dcache_p1_tlb_way = r_dcache_p0_tlb_way;2137 r_dcache_p1_tlb_set = r_dcache_p0_tlb_set;2138 r_dcache_p1_tlb_nline = r_dcache_p0_tlb_nline;2139 r_dcache_p1_tlb_big = r_dcache_p0_tlb_big;2140 2073 r_dcache_p1_cache_way = cache_way; 2141 2074 r_dcache_p1_cache_set = cache_set; … … 2152 2085 } // end P1 stage 2153 2086 2154 ///////////////////////////////////////////////////////////////////////////// 2087 ///////////////////////////////////////////////////////////////////////////////// 2155 2088 // handling P0 pipe-line stage 2156 2089 // This stage is controlling r_dcache_fsm and r_dcache_p0_* registers. 2157 // The r_dcache_p0_valid flip-flop is only set in case of WRITE request.2090 // The r_dcache_p0_valid flip-flop is only set in case of a WRITE request. 2158 2091 // - the TLB invalidate requests have the highest priority, 2159 // - then the long write requests,2160 2092 // - then the external coherence requests, 2161 2093 // - then the itlb miss requests, … … 2163 2095 // If dtlb is activated, there is an unconditionnal access to dtlb, 2164 2096 // for address translation. 2165 // 1) A processor WRITE request enters the three stage pipe-line (handled2166 // by the IDLE state), and can be completed by a "long write" if the2167 // PTE dirty bit must be updated in dtb, dcache and RAM.2168 // 2) A processor READ request generate a simultaneouss access to2097 // 1) A processor WRITE request is blocked if the Dirty bit mus be set, or if 2098 // dtlb miss. If dtlb is OK, It enters the three stage pipe-line (fully 2099 // handled by the IDLE state), and the processor request is acknowledged. 2100 // 2) A processor READ or LL request generate a simultaneouss access to 2169 2101 // both dcache data and dcache directoty, using speculative PPN, but 2170 2102 // is delayed if the write pipe-line is not empty. 2171 2103 // In case of miss, we wait the VCI response in DCACHE_UNC_WAIT or 2172 2104 // DCACHE_MISS_WAIT states. 2173 // 3) A processor LL request is handled as a READ request. 2174 // 4) A processor SC request is delayed until the write pipe-line is empty. 2105 // 3) A processor SC request is delayed until the write pipe-line is empty. 2175 2106 // A VCI SC transaction is launched, and we wait the VCI response in 2176 2107 // DCACHE_SC_WAIT state. It can be completed by a "long write" if the … … 2186 2117 r_dcache_p0_valid = false; 2187 2118 } 2188 2189 // long write request2190 else if ( long_write_set_dirty )2191 {2192 r_dcache_fsm = DCACHE_DIRTY_TLB_SET;2193 r_dcache_p0_valid = false;2194 }2195 2196 2119 // external coherence request 2197 2120 else if ( r_tgt_dcache_req.read() ) … … 2245 2168 2246 2169 // systematic dtlb access using virtual address 2247 2248 2170 paddr_t tlb_paddr; 2249 2171 pte_info_t tlb_flags; … … 2253 2175 bool tlb_hit; 2254 2176 2255 if ( r_mmu_mode.read() & DATA_TLB_MASK ) // TLB activated2177 if ( r_mmu_mode.read() & DATA_TLB_MASK ) // DTLB activated 2256 2178 { 2257 2179 tlb_hit = r_dtlb.translate( m_dreq.addr, … … 2264 2186 m_cpt_dtlb_read++; 2265 2187 #endif 2266 // register dtlb outputs2267 r_dcache_p0_tlb_nline = tlb_nline;2268 r_dcache_p0_tlb_way = tlb_way;2269 r_dcache_p0_tlb_set = tlb_set;2270 r_dcache_p0_tlb_dirty = tlb_flags.d;2271 r_dcache_p0_tlb_big = tlb_flags.b;2272 2188 } 2273 2189 else … … 2524 2440 m_drsp.error = true; 2525 2441 m_drsp.rdata = 0; 2442 #if DEBUG_DCACHE 2443 if ( m_debug_dcache_fsm ) 2444 { 2445 std::cout << " <PROC.DCACHE_IDLE> HIT in dtlb, but privilege violation" << std::endl; 2446 } 2447 #endif 2526 2448 } 2527 2449 else if ( not tlb_flags.w and … … 2534 2456 m_drsp.error = true; 2535 2457 m_drsp.rdata = 0; 2458 #if DEBUG_DCACHE 2459 if ( m_debug_dcache_fsm ) 2460 { 2461 std::cout << " <PROC.DCACHE_IDLE> HIT in dtlb, but writable violation" << std::endl; 2462 } 2463 #endif 2536 2464 } 2537 2465 else … … 2541 2469 2542 2470 // physical address 2543 paddr 2471 paddr = tlb_paddr; 2544 2472 } 2545 2473 else // tlb miss … … 2599 2527 m_drsp.valid = true; 2600 2528 m_drsp.rdata = cache_rdata; 2529 #if DEBUG_DCACHE 2530 if ( m_debug_dcache_fsm ) 2531 { 2532 std::cout << " <PROC.DCACHE_IDLE> HIT in dcache" << std::endl; 2533 } 2534 #endif 2601 2535 } 2602 2536 } … … 2620 2554 2621 2555 // WRITE request: 2622 // The write request arguments have been registered in r_dcache_p0 registers. 2623 // The physical address has been computed and registered. 2624 // We acknowledge the processor request and activate the P1 pipeline stage. 2556 // If the TLB is activated and the PTE Dirty bit is not set, we stall 2557 // the processor and set the Dirty bit before handling the write request. 2558 // If we don't need to set the Dirty bit, we can acknowledge 2559 // the processor request, as the write arguments (including the 2560 // physical address) are registered in r_dcache_p0 registers: 2561 // We simply activate the P1 pipeline stage. 2625 2562 else if ( m_dreq.type == iss_t::DATA_WRITE ) 2626 2563 { 2627 2564 if ( (r_mmu_mode.read() & DATA_TLB_MASK ) 2565 and not tlb_flags.d ) // Dirty bit must be set 2566 { 2567 // The PTE physical address is obtained from the nline value (dtlb), 2568 // and the word index (proper bits of the virtual address) 2569 if ( tlb_flags.b ) // PTE1 2570 { 2571 r_dcache_dirty_paddr = (paddr_t)(tlb_nline*(m_dcache_words<<2)) | 2572 (paddr_t)((m_dreq.addr>>19) & 0x3c); 2573 } 2574 else // PTE2 2575 { 2576 r_dcache_dirty_paddr = (paddr_t)(tlb_nline*(m_dcache_words<<2)) | 2577 (paddr_t)((m_dreq.addr>>9) & 0x38); 2578 } 2579 r_dcache_fsm = DCACHE_DIRTY_GET_PTE; 2580 r_dcache_p0_valid = false; 2581 } 2582 else // Write request accepted 2583 { 2628 2584 #ifdef INSTRUMENTATION 2629 2585 m_cpt_data_write++; 2630 2586 #endif 2631 m_drsp.valid = true; 2632 m_drsp.rdata = 0; 2633 r_dcache_p0_valid = true; 2587 m_drsp.valid = true; 2588 m_drsp.rdata = 0; 2589 r_dcache_p0_valid = true; 2590 } 2634 2591 } // end WRITE 2635 2592 2636 2593 // SC request: 2637 2594 // The SC requests are taken only if the write pipe-line is empty. 2595 // - if there is no valid registered LL, we just return rdata = 1 2596 // (atomic access failed) and the SC transaction is completed. 2638 2597 // - if a valid LL reservation (with the same address) is registered, 2639 // we request a SC transaction to CMD FSM and go to the DCACHE_SC_WAIT state 2640 // that will directly return the response to the processor, invalidate 2641 // the LL reservation, and set the Dirty bit if required. 2642 // We don't check a possible write hit in dcache, as the cache update 2643 // is done by the coherence transaction... 2644 // - if there is no valid registered LL, we just stay in IDLE state, 2645 // and return 1 (atomic access failed) 2598 // we test if a DIRTY bit update is required. 2599 // If the TLB is activated and the PTE Dirty bit is not set, we stall 2600 // the processor and set the Dirty bit before handling the write request. 2601 // If we don't need to set the Dirty bit, we request a SC transaction 2602 // to CMD FSM and go to DCACHE_SC_WAIT state, that will return 2603 // the response to the processor. 2604 // We don't check a possible write hit in dcache, as the cache update 2605 // is done by the coherence transaction induced by the SC... 2646 2606 else if ( ( m_dreq.type == iss_t::DATA_SC ) 2647 2607 and not r_dcache_p0_valid.read() and not r_dcache_p1_valid.read() ) 2648 2608 { 2649 2609 if ( (r_dcache_ll_vaddr.read() != m_dreq.addr) 2610 or not r_dcache_ll_valid.read() ) // no valid registered LL 2611 { 2650 2612 #ifdef INSTRUMENTATION 2651 2613 m_cpt_data_sc++; 2652 2614 #endif 2653 // test if valid registered LL2654 if ( r_dcache_ll_valid.read() and (r_dcache_ll_vaddr.read() == m_dreq.addr))2655 {2656 r_dcache_vci_paddr = paddr;2657 r_dcache_vci_sc_req = true;2658 r_dcache_vci_sc_old = r_dcache_ll_data.read();2659 r_dcache_vci_sc_new = m_dreq.wdata;2660 r_dcache_fsm = DCACHE_SC_WAIT;2661 }2662 else // no registered LL2663 {2664 2665 2615 m_drsp.valid = true; 2666 2616 m_drsp.rdata = 1; 2667 2617 r_dcache_ll_valid = false; 2618 } 2619 else // valid registered LL 2620 { 2621 if ( (r_mmu_mode.read() & DATA_TLB_MASK ) 2622 and not tlb_flags.d ) // Dirty bit must be set 2623 { 2624 // The PTE physical address is obtained from the nline value (dtlb), 2625 // and the word index (proper bits of the virtual address) 2626 if ( tlb_flags.b ) // PTE1 2627 { 2628 r_dcache_dirty_paddr = (paddr_t)(tlb_nline*(m_dcache_words<<2)) | 2629 (paddr_t)((m_dreq.addr>>19) & 0x3c); 2630 } 2631 else // PTE2 2632 { 2633 r_dcache_dirty_paddr = (paddr_t)(tlb_nline*(m_dcache_words<<2)) | 2634 (paddr_t)((m_dreq.addr>>9) & 0x38); 2635 } 2636 r_dcache_fsm = DCACHE_DIRTY_GET_PTE; 2637 } 2638 else // SC request accepted 2639 { 2640 #ifdef INSTRUMENTATION 2641 m_cpt_data_sc++; 2642 #endif 2643 2644 r_dcache_vci_paddr = paddr; 2645 r_dcache_vci_sc_req = true; 2646 r_dcache_vci_sc_old = r_dcache_ll_data.read(); 2647 r_dcache_vci_sc_new = m_dreq.wdata; 2648 r_dcache_ll_valid = false; 2649 r_dcache_fsm = DCACHE_SC_WAIT; 2650 } 2668 2651 } 2669 2652 r_dcache_p0_valid = false; … … 2797 2780 2798 2781 } 2799 else if( entry & PTE_T_MASK ) // PTD : access PT2 2800 { 2782 else if( entry & PTE_T_MASK ) // PTD : me must access PT2 2783 { 2784 // mark the cache line ac containing a PTD 2785 r_dcache_contains_ptd[m_dcache_sets*way+set] = true; 2786 2801 2787 // register bypass 2802 2788 if ( r_dcache_tlb_ins.read() ) // itlb … … 2828 2814 #endif 2829 2815 } 2830 else // PTE1 : update the TLB 2831 { 2832 if ( r_dcache_tlb_ins.read() ) r_dcache_in_itlb[m_icache_sets*way+set] = true; 2833 else r_dcache_in_dtlb[m_dcache_sets*way+set] = true; 2816 else // PTE1 : we must update the TLB 2817 { 2818 r_dcache_in_tlb[m_icache_sets*way+set] = true; 2834 2819 r_dcache_tlb_pte_flags = entry; 2835 2820 r_dcache_tlb_cache_way = way; … … 3050 3035 #endif 3051 3036 } 3052 else // mapped : update the TLB 3053 { 3054 if ( r_dcache_tlb_ins.read() ) r_dcache_in_itlb[m_icache_sets*way+set] = true; 3055 else r_dcache_in_dtlb[m_dcache_sets*way+set] = true; 3037 else // mapped : we must update the TLB 3038 { 3039 r_dcache_in_tlb[m_dcache_sets*way+set] = true; 3056 3040 r_dcache_tlb_pte_flags = pte_flags; 3057 3041 r_dcache_tlb_pte_ppn = pte_ppn; … … 3384 3368 if ( r_dcache_flush_count.read() == (m_dcache_sets*m_dcache_ways - 1) ) // last slot 3385 3369 { 3386 r_dtlb.reset(); // global entries are invalidated3387 r_itlb.reset(); // global entries are invalidated3370 r_dtlb.reset(); 3371 r_itlb.reset(); 3388 3372 for (size_t line = 0; line < m_dcache_ways*m_dcache_sets; line++) 3389 3373 { 3390 r_dcache_in_ itlb[line]= false;3391 r_dcache_ in_dtlb[line] = false;3374 r_dcache_in_tlb[line] = false; 3375 r_dcache_contains_ptd[line] = false; 3392 3376 } 3393 3377 r_dcache_fsm = DCACHE_IDLE; … … 3441 3425 r_dcache_fsm = DCACHE_TLB_MISS; 3442 3426 } 3427 3428 #if DEBUG_DCACHE 3429 if ( m_debug_dcache_fsm ) 3430 { 3431 std::cout << " <PROC.DCACHE_XTN_DC_INVAL_VA> Compute physical address" << std::hex 3432 << " / VADDR = " << r_dcache_p0_wdata.read() 3433 << " / PADDR = " << paddr << std::endl; 3434 } 3435 #endif 3436 3443 3437 break; 3444 3438 } … … 3472 3466 m_drsp.valid = true; 3473 3467 } 3468 3469 #if DEBUG_DCACHE 3470 if ( m_debug_dcache_fsm ) 3471 { 3472 std::cout << " <PROC.DCACHE_XTN_DC_INVAL_PA> Test hit in dcache" << std::hex 3473 << " / PADDR = " << r_dcache_p0_paddr.read() << std::dec 3474 << " / HIT = " << hit 3475 << " / SET = " << set 3476 << " / WAY = " << way << std::endl; 3477 } 3478 #endif 3474 3479 break; 3475 3480 } 3476 3481 //////////////////////////// 3477 case DCACHE_XTN_DC_INVAL_GO: // In this state, we invalidate the cache line & cleanup3482 case DCACHE_XTN_DC_INVAL_GO: // In this state, we invalidate the cache line 3478 3483 // Blocked if previous cleanup not completed 3479 3484 // Test if itlb or dtlb inval is required … … 3484 3489 size_t way = r_dcache_xtn_way.read(); 3485 3490 size_t set = r_dcache_xtn_set.read(); 3486 bool inval_itlb = false;3487 bool inval_dtlb = false;3488 3491 3489 r_ icache.inval( way,3492 r_dcache.inval( way, 3490 3493 set, 3491 3494 &nline ); … … 3495 3498 r_dcache_cleanup_line = nline; 3496 3499 3497 // possible itlb & dtlb invalidate requests 3498 3499 if ( (r_mmu_mode.read() & DATA_TLB_MASK) and r_dcache_in_dtlb[way*m_dcache_sets+set] ) 3500 { 3501 r_dcache_in_dtlb[way*m_dcache_sets+set] = false; 3502 inval_dtlb = true; 3503 } 3504 if ( (r_mmu_mode.read() & INS_TLB_MASK) and r_dcache_in_itlb[m_dcache_sets*way+set] ) 3505 { 3506 r_dcache_in_itlb[way*m_dcache_sets+set] = false; 3507 inval_itlb = true; 3508 } 3509 3510 // no valid response until itlb & dtlb invalidated 3511 if ( inval_itlb or inval_dtlb ) 3512 { 3513 r_dcache_itlb_inval_req = inval_itlb; 3514 r_dcache_dtlb_inval_req = inval_dtlb; 3500 // possible itlb & dtlb invalidate 3501 if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) 3502 { 3515 3503 r_dcache_tlb_inval_line = nline; 3516 3504 r_dcache_tlb_inval_count = 0; 3517 3505 r_dcache_fsm_save = DCACHE_XTN_DC_INVAL_END; 3518 3506 r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; 3507 r_dcache_in_tlb[way*m_dcache_sets+set] = false; 3519 3508 } 3520 else 3521 { 3509 else if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) 3510 { 3511 r_itlb.reset(); 3512 r_dtlb.reset(); 3513 r_dcache_contains_ptd[way*m_dcache_sets+set] = false; 3522 3514 r_dcache_fsm = DCACHE_IDLE; 3523 3515 m_drsp.valid = true; 3524 3516 } 3517 else 3518 { 3519 r_dcache_fsm = DCACHE_IDLE; 3520 m_drsp.valid = true; 3521 } 3522 3523 #if DEBUG_DCACHE 3524 if ( m_debug_dcache_fsm ) 3525 { 3526 std::cout << " <PROC.DCACHE_XTN_DC_INVAL_GO> Actual dcache inval" << std::hex 3527 << " / NLINE = " << nline << std::endl; 3528 } 3529 #endif 3525 3530 } 3526 3531 break; 3527 3532 } 3528 3533 ////////////////////////////// 3529 case DCACHE_XTN_DC_INVAL_END: // waiting completion of itlb and dtlb invalidate3534 case DCACHE_XTN_DC_INVAL_END: // send response to processor XTN request 3530 3535 { 3531 3536 r_dcache_fsm = DCACHE_IDLE; … … 3578 3583 { 3579 3584 paddr_t nline; 3580 size_t way = r_dcache_miss_way.read();3581 size_t set = r_dcache_miss_set.read();3585 size_t way = r_dcache_miss_way.read(); 3586 size_t set = r_dcache_miss_set.read(); 3582 3587 3583 3588 r_dcache.inval( way, … … 3585 3590 &nline ); 3586 3591 3587 // if itlb & dtlb invalidate are required3592 // if selective itlb & dtlb invalidate are required 3588 3593 // the miss response is not handled before invalidate completed 3589 if ( (r_mmu_mode.read() & DATA_TLB_MASK) and 3590 ( r_dcache_in_itlb[way*m_dcache_sets+set] or 3591 r_dcache_in_dtlb[m_dcache_sets*way+set] ) ) 3592 { 3593 r_dcache_tlb_inval_line = r_dcache_vci_paddr.read() >> (uint32_log2(m_dcache_words)+2); 3594 r_dcache_itlb_inval_req = r_dcache_in_itlb[way*m_dcache_sets+set]; 3595 r_dcache_in_itlb[way*m_dcache_sets+set] = false; 3596 r_dcache_dtlb_inval_req = r_dcache_in_dtlb[way*m_dcache_sets+set]; 3597 r_dcache_in_dtlb[way*m_dcache_sets+set] = false; 3598 r_dcache_fsm = DCACHE_MISS_INVAL_WAIT; 3599 } 3594 if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) 3595 { 3596 r_dcache_tlb_inval_line = nline; 3597 r_dcache_tlb_inval_count = 0; 3598 r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; 3599 r_dcache_fsm_save = DCACHE_MISS_WAIT; 3600 r_dcache_in_tlb[way*m_dcache_sets+set] = false; 3601 } 3602 else if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) 3603 { 3604 r_itlb.reset(); 3605 r_dtlb.reset(); 3606 r_dcache_fsm = DCACHE_MISS_WAIT; 3607 } 3600 3608 else 3601 3609 { 3602 r_dcache_fsm = DCACHE_MISS_WAIT; 3603 } 3604 break; 3605 } 3606 //////////////////////////// 3607 case DCACHE_MISS_INVAL_WAIT: // waiting completion of itlb / dtlb invalidate 3608 { 3609 if ( (not r_dcache_itlb_inval_req.read()) or (not r_dcache_dtlb_inval_req.read()) ) 3610 { 3611 r_dcache_fsm = DCACHE_MISS_WAIT; 3610 r_dcache_fsm = DCACHE_MISS_WAIT; 3612 3611 } 3613 3612 break; … … 3719 3718 // pop the FIFO and update the cache 3720 3719 // update the directory at the last word 3721 // send a response to ICACHE FSM 3722 // in case of itlb miss 3723 { 3720 { 3721 size_t way = r_dcache_miss_way.read(); 3722 size_t set = r_dcache_miss_set.read(); 3723 size_t word = r_dcache_miss_word.read(); 3724 3724 3725 3725 #ifdef INSTRUMENTATION 3726 3726 m_cpt_dcache_data_write++; 3727 3727 #endif 3728 r_dcache.write( r_dcache_miss_way.read(),3729 r_dcache_miss_set.read(),3730 r_dcache_miss_word.read(),3728 r_dcache.write( way, 3729 set, 3730 word, 3731 3731 r_vci_rsp_fifo_dcache.read()); 3732 3732 … … 3734 3734 r_dcache_miss_word = r_dcache_miss_word.read() + 1; 3735 3735 3736 // if last word, update directory, set in_ itlb & in_dtlbbits3736 // if last word, update directory, set in_tlb & contains_ptd bits 3737 3737 if ( r_dcache_miss_word.read() == (m_dcache_words - 1) ) 3738 3738 { … … 3744 3744 r_dcache_miss_way.read(), 3745 3745 r_dcache_miss_set.read() ); 3746 3747 r_dcache_in_tlb[way*m_dcache_sets+set] = false; 3748 r_dcache_contains_ptd[way*m_dcache_sets+set] = false; 3746 3749 3747 3750 if (r_dcache_miss_type.read()==PTE1_MISS) r_dcache_fsm = DCACHE_TLB_PTE1_GET; … … 3818 3821 //////////////////// 3819 3822 case DCACHE_SC_WAIT: // waiting VCI response after a processor SC request 3820 // a long write is launched if dirty bit must be set3821 3823 { 3822 3824 // external coherence request … … 3840 3842 else if ( r_vci_rsp_fifo_dcache.rok() ) // response available 3841 3843 { 3842 bool sc_success = (r_vci_rsp_fifo_dcache.read() == 0);3843 3844 vci_rsp_fifo_dcache_get = true; 3844 3845 if ( sc_success and not r_dcache_p0_tlb_dirty.read() ) // Dirty bit must be set 3846 { 3847 // The PTE physical address is the concatenation of the nline value (from dtlb), 3848 // with the word index (obtained from the proper bits of the virtual address) 3849 if ( r_dcache_p0_tlb_big.read() ) // PTE1 3850 { 3851 r_dcache_p2_pte_paddr = (paddr_t)(r_dcache_p0_tlb_nline.read()*(m_dcache_words<<2)) | 3852 (paddr_t)((r_dcache_p0_vaddr.read()>>19) & 0x3c); 3853 } 3854 else // PTE2 3855 { 3856 r_dcache_p2_pte_paddr = (paddr_t)(r_dcache_p0_tlb_nline.read()*(m_dcache_words<<2)) | 3857 (paddr_t)((r_dcache_p0_vaddr.read()>>9) & 0x38); 3858 } 3859 r_dcache_p2_sc_success = sc_success; 3860 r_dcache_p2_way = r_dcache_p0_tlb_way.read(); 3861 r_dcache_p2_set = r_dcache_p0_tlb_set.read(); 3862 r_dcache_fsm = DCACHE_DIRTY_TLB_SET; 3863 3864 } 3865 else 3866 { 3867 m_drsp.valid = true; 3868 m_drsp.rdata = r_vci_rsp_fifo_dcache.read(); 3869 r_dcache_fsm = DCACHE_IDLE; 3870 } 3845 m_drsp.valid = true; 3846 m_drsp.rdata = r_vci_rsp_fifo_dcache.read(); 3847 r_dcache_fsm = DCACHE_IDLE; 3871 3848 } 3872 3849 break; 3873 3850 } 3874 3851 ////////////////////////// 3875 case DCACHE_DIRTY_TLB_SET: // Enter this sub_fsm in case of long write: 3876 // - in case of WRITE request (r_dcache_p2_type_sc == false) 3877 // - in case of SC request (r_dcache_p2_type_sc == true) 3878 // Inputs arguments are: 3879 // - r_dcache_p2_way, 3880 // - r_dcache_p2_set, 3881 // - r_dcache_p2_pte_paddr, 3882 // - r_dcache_p2_type_sc, 3883 // - r_dcache_p2_sc_success, 3884 // In this first state, we set PTE dirty bit in dtlb 3885 // and get PTE in dcache 3886 { 3887 // set dirty bit in dtlb 3888 r_dtlb.set_dirty( r_dcache_p2_way.read(), 3889 r_dcache_p2_set.read() ); 3890 3852 case DCACHE_DIRTY_GET_PTE: // This sub_fsm set the PTE Dirty bit in memory 3853 // before handling a processor WRITE or SC request 3854 // Input argument is r_dcache_dirty_paddr 3855 // In this first state, we get PTE value in dcache 3856 // and post a SC request to CMD FSM 3857 { 3891 3858 // get PTE in dcache 3892 uint32_t pte = 0;3859 uint32_t pte; 3893 3860 size_t way; 3894 3861 size_t set; 3895 size_t word; 3896 bool hit = r_dcache.read( r_dcache_ p2_pte_paddr.read(),3862 size_t word; // unused 3863 bool hit = r_dcache.read( r_dcache_dirty_paddr.read(), 3897 3864 &pte, 3898 3865 &way, … … 3903 3870 m_cpt_dcache_dir_read++; 3904 3871 #endif; 3872 assert( hit and "error in DCACHE_DIRTY_TLB_SET: the PTE should be in dcache" ); 3873 3874 // request sc transaction to CMD_FSM 3875 r_dcache_dirty_way = way; 3876 r_dcache_dirty_set = set; 3877 r_dcache_vci_sc_req = true; 3878 r_dcache_vci_paddr = r_dcache_dirty_paddr.read(); 3879 r_dcache_vci_sc_old = pte; 3880 r_dcache_vci_sc_new = pte | PTE_D_MASK; 3881 r_dcache_fsm = DCACHE_DIRTY_SC_WAIT; 3905 3882 3906 3883 #if DEBUG_DCACHE 3907 3884 if ( m_debug_dcache_fsm ) 3908 3885 { 3909 std::cout << " <PROC.DCACHE_DIRTY_TLB_SET> Set dirty bit in dtlb:" << std::dec 3910 << " / tlb_set = " << r_dcache_p2_set.read() 3911 << " / tlb_way = " << r_dcache_p2_way.read() << std::endl; 3912 r_dtlb.printTrace(); 3913 std::cout << " and get PTE in dcache" << std::hex 3914 << " / PADDR = " << r_dcache_p2_pte_paddr.read() 3915 << " / PTE = " << pte << std::dec 3916 << " / set = " << set 3917 << " / way = " << way << std::endl; 3886 std::cout << " <PROC.DCACHE_DIRTY_GET_PTE> Get PTE in dcache" << std::hex 3887 << " / PTE_PADDR = " << r_dcache_dirty_paddr.read() 3888 << " / PTE_VALUE = " << pte << std::dec 3889 << " / CACHE_SET = " << set 3890 << " / CACHE_WAY = " << way << std::endl; 3918 3891 } 3919 3892 #endif 3920 assert( hit and "error in DCACHE_DIRTY_TLB_SET: the PTE should be in dcache" ); 3921 3922 r_dcache_p2_way = way; // register pte way in dcache 3923 r_dcache_p2_set = set; // register pte set in dcache; 3924 r_dcache_p2_word = word; // register pte word in dcache; 3925 r_dcache_p2_pte_value = pte; // register pte value 3926 r_dcache_fsm = DCACHE_DIRTY_CACHE_SET; 3927 break; 3928 } 3929 //////////////////////////// 3930 case DCACHE_DIRTY_CACHE_SET: // set PTE dirty bit in dcache 3931 // request SC tranansaction to CMD FSM 3932 { 3933 // set PTE dirty bit in dcache 3934 r_dcache.write( r_dcache_p2_way.read(), 3935 r_dcache_p2_set.read(), 3936 r_dcache_p2_word.read(), 3937 r_dcache_p2_pte_value.read() | PTE_D_MASK, 3938 0xF ); 3939 3940 #ifdef INSTRUMENTATION 3941 m_cpt_dcache_data_write++; 3942 #endif 3943 // request sc transaction to CMD_FSM 3944 r_dcache_vci_sc_req = true; 3945 r_dcache_vci_paddr = r_dcache_p2_pte_paddr.read(); 3946 r_dcache_vci_sc_old = r_dcache_p2_pte_value.read(); 3947 r_dcache_vci_sc_new = r_dcache_p2_pte_value.read() | PTE_D_MASK; 3948 r_dcache_fsm = DCACHE_DIRTY_SC_WAIT; 3893 break; 3894 } 3895 ////////////////////////// 3896 case DCACHE_DIRTY_SC_WAIT: // wait completion of SC for PTE Dirty bit 3897 // If the PTE update is a success, return to IDLE state. 3898 // If the PTE update is a failure, invalidate the cache line 3899 // in DCACHE and invalidate the matching TLB entries. 3900 { 3901 // external coherence request 3902 if ( r_tgt_dcache_req ) 3903 { 3904 r_dcache_fsm = DCACHE_CC_CHECK; 3905 r_dcache_fsm_save = r_dcache_fsm; 3906 break; 3907 } 3908 3909 if ( r_vci_rsp_data_error.read() ) // bus error 3910 { 3911 std::cout << "BUS ERROR in DCACHE_DIRTY_SC_WAIT state" << std::endl; 3912 std::cout << "This should not happen in this state" << std::endl; 3913 exit(0); 3914 } 3915 else if ( r_vci_rsp_fifo_dcache.rok() ) // response available 3916 { 3917 vci_rsp_fifo_dcache_get = true; 3918 if ( r_vci_rsp_fifo_dcache.read() == 0 ) // exit if dirty bit update atomic 3919 { 3920 r_dcache_fsm = DCACHE_IDLE; 3949 3921 3950 3922 #if DEBUG_DCACHE 3951 3923 if ( m_debug_dcache_fsm ) 3952 3924 { 3953 std::cout << " <PROC.DCACHE_DIRTY_CACHE_SET> Set PTE dirty bit in dcache" 3954 << " / way = " << r_dcache_p2_way.read() 3955 << " / set = " << r_dcache_p2_set.read() 3956 << " / word = " << r_dcache_p2_word.read() << std::endl; 3957 std::cout << " and request SC transaction for dirty bit update" 3958 << " / address = " << r_dcache_p2_pte_paddr.read() 3959 << " / old = " << r_dcache_p2_pte_value.read() 3960 << " / new = " << (r_dcache_p2_pte_value.read() | PTE_D_MASK) << std::endl; 3925 std::cout << " <PROC.DCACHE_DIRTY_SC_WAIT> Dirty bit successfully set" 3926 << std::endl; 3961 3927 } 3962 3928 #endif 3963 break; 3964 } 3965 ////////////////////////// 3966 case DCACHE_DIRTY_SC_WAIT: // wait completion of SC for PTE Dirty bit 3967 // if atomic, write completed : return to IDLE state 3968 // else, read the mofified PTE to retry the SC 3969 { 3970 // external coherence request 3971 if ( r_tgt_dcache_req ) 3972 { 3973 r_dcache_fsm = DCACHE_CC_CHECK; 3974 r_dcache_fsm_save = r_dcache_fsm; 3975 break; 3976 } 3977 3978 if ( r_vci_rsp_data_error.read() ) // bus error 3979 { 3980 std::cout << "BUS ERROR in DCACHE_DIRTY_SC_WAIT state" << std::endl; 3981 std::cout << "This should not happen in this state" << std::endl; 3982 exit(0); 3983 } 3984 else if ( r_vci_rsp_fifo_dcache.rok() ) // response available 3985 { 3986 vci_rsp_fifo_dcache_get = true; 3987 if ( r_vci_rsp_fifo_dcache.read() == 0 ) // exit if dirty bit update atomic 3988 { 3989 if ( r_dcache_p2_type_sc.read() ) // long write for SC request 3929 } 3930 else // invalidate the cache line and TLBs 3931 { 3932 paddr_t nline; 3933 size_t way = r_dcache_dirty_way.read(); 3934 size_t set = r_dcache_dirty_set.read(); 3935 3936 r_dcache.inval( r_dcache_dirty_way.read(), 3937 r_dcache_dirty_set.read(), 3938 &nline ); 3939 3940 if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) // contains PTE 3990 3941 { 3991 m_drsp.valid = true; 3992 m_drsp.rdata = ( r_dcache_p2_sc_success.read() ? 0 : 1 ); 3942 r_dcache_tlb_inval_line = nline; 3943 r_dcache_tlb_inval_count = 0; 3944 r_dcache_fsm_save = DCACHE_IDLE; 3945 r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; 3946 r_dcache_in_tlb[way*m_dcache_sets+set] = false; 3947 } 3948 if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) // contains PTD 3949 { 3950 r_itlb.reset(); 3951 r_dtlb.reset(); 3993 3952 r_dcache_fsm = DCACHE_IDLE; 3994 3953 } 3995 else // long write for WRITE request 3996 { 3997 r_dcache_fsm = DCACHE_IDLE; 3998 } 3999 } 4000 else // retry if dirty bit update failed 4001 { 4002 r_dcache_vci_paddr = r_dcache_p2_pte_paddr; 4003 r_dcache_vci_unc_req = true; 4004 r_dcache_vci_unc_be = 0xF; 4005 r_dcache_fsm = DCACHE_DIRTY_UNC_WAIT; 4006 } 4007 } 4008 break; 4009 } 4010 /////////////////////////// 4011 case DCACHE_DIRTY_UNC_WAIT: // wait completion of PTE read 4012 // and retry a SC request to 4013 // set the dirty bit in the PTE 4014 { 4015 // external coherence request 4016 if ( r_tgt_dcache_req ) 4017 { 4018 r_dcache_fsm = DCACHE_CC_CHECK; 4019 r_dcache_fsm_save = r_dcache_fsm; 4020 break; 4021 } 4022 4023 if ( r_vci_rsp_data_error.read() ) // bus error 4024 { 4025 std::cout << "BUS ERROR in DCACHE_DIRTY_UNC_WAIT state" << std::endl; 4026 std::cout << "This should not happen in this state" << std::endl; 4027 exit(0); 4028 } 4029 if ( r_vci_rsp_fifo_dcache.rok() ) // PTE available 4030 { 4031 r_dcache_vci_sc_req = true; 4032 r_dcache_vci_sc_old = r_vci_rsp_fifo_dcache.read(); 4033 r_dcache_vci_sc_new = r_vci_rsp_fifo_dcache.read() | PTE_D_MASK; 4034 r_dcache_fsm = DCACHE_DIRTY_SC_WAIT; 3954 #if DEBUG_DCACHE 3955 if ( m_debug_dcache_fsm ) 3956 { 3957 std::cout << " <PROC.DCACHE_DIRTY_SC_WAIT> PTE modified : Inval cache line & TLBs" 3958 << std::endl; 3959 } 3960 #endif 3961 } 4035 3962 } 4036 3963 break; … … 4128 4055 size_t way = r_dcache_cc_way.read(); 4129 4056 size_t set = r_dcache_cc_set.read(); 4130 bool inval_itlb = false;4131 bool inval_dtlb = false;4132 4057 4133 4058 r_dcache.inval( way, … … 4135 4060 &nline ); 4136 4061 4137 // possible itlb & dtlb invalidate requests 4138 4139 if ( (r_mmu_mode.read() & DATA_TLB_MASK) and r_dcache_in_dtlb[way*m_dcache_sets+set] ) 4140 { 4141 r_dtlb.reset_bypass(nline); 4142 r_dcache_in_dtlb[way*m_dcache_sets+set] = false; 4143 inval_dtlb = true; 4144 } 4145 if ( (r_mmu_mode.read() & INS_TLB_MASK) and r_dcache_in_itlb[m_dcache_sets*way+set] ) 4146 { 4147 r_itlb.reset_bypass(nline); 4148 r_dcache_in_itlb[way*m_dcache_sets+set] = false; 4149 inval_itlb = true; 4150 } 4151 4152 if ( inval_itlb or inval_dtlb ) // no valid response until itlb / dtlb invalidated 4153 { 4154 r_dcache_itlb_inval_req = inval_itlb; 4155 r_dcache_dtlb_inval_req = inval_dtlb; 4062 // possible itlb & dtlb invalidate 4063 if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) // selective inval 4064 { 4156 4065 r_dcache_tlb_inval_line = nline; 4157 4066 r_dcache_tlb_inval_count = 0; 4158 4067 r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; 4159 } 4160 else 4161 { 4068 r_dcache_in_tlb[way*m_dcache_sets+set] = false; 4069 } 4070 else if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) // flush 4071 { 4072 r_itlb.reset(); 4073 r_dtlb.reset(); 4162 4074 r_tgt_dcache_rsp = true; 4163 4075 r_tgt_dcache_req = false; 4164 4076 r_dcache_fsm = r_dcache_fsm_save.read(); 4165 4077 } 4078 else // no inval 4079 { 4080 r_tgt_dcache_rsp = true; 4081 r_tgt_dcache_req = false; 4082 r_dcache_fsm = r_dcache_fsm_save.read(); 4083 } 4166 4084 4167 4085 #if DEBUG_DCACHE 4168 4086 if ( m_debug_dcache_fsm ) 4169 4087 { 4170 std::cout << " <PROC.DCACHE_CC_INVAL> Invalidate cache line :" << std::dec 4171 << " way = " << way 4172 << " / set = " << set; 4173 if ( inval_itlb ) std::cout << " / itlb inval required"; 4174 if ( inval_dtlb ) std::cout << " / dtlb inval required"; 4175 std::cout << std::endl; 4088 std::cout << " <PROC.DCACHE_CC_INVAL> Invalidate cache line" << std::dec 4089 << " / WAY = " << way 4090 << " / SET = " << set << std::endl; 4176 4091 } 4177 4092 #endif … … 4186 4101 size_t way = r_dcache_cc_way.read(); 4187 4102 size_t set = r_dcache_cc_set.read(); 4188 bool inval_itlb = false;4189 bool inval_dtlb = false;4190 4103 paddr_t nline = r_tgt_paddr.read() >> (uint32_log2(m_dcache_words)+2); 4191 4104 … … 4202 4115 if ( word == r_tgt_word_max.read() ) // last word 4203 4116 { 4204 // possible itlb & dtlb invalidate requests 4205 4206 if ( (r_mmu_mode.read() & DATA_TLB_MASK) and r_dcache_in_dtlb[way*m_dcache_sets+set] ) 4207 { 4208 r_dtlb.reset_bypass(nline); 4209 r_dcache_in_dtlb[way*m_dcache_sets+set] = false; 4210 inval_dtlb = true; 4211 } 4212 if ( (r_mmu_mode.read() & INS_TLB_MASK) and r_dcache_in_itlb[way*m_dcache_sets+set] ) 4213 { 4214 r_itlb.reset_bypass(nline); 4215 r_dcache_in_itlb[way*m_dcache_sets+set] = false; 4216 inval_itlb = true; 4217 } 4218 4219 if ( inval_itlb or inval_dtlb ) // no valid response until itlb / dtlb invalidated 4220 { 4221 r_dcache_itlb_inval_req = inval_itlb; 4222 r_dcache_dtlb_inval_req = inval_dtlb; 4117 // possible itlb & dtlb invalidate 4118 if ( r_dcache_in_tlb[way*m_dcache_sets+set] ) // selective inval 4119 { 4223 4120 r_dcache_tlb_inval_line = nline; 4224 4121 r_dcache_tlb_inval_count = 0; 4225 4122 r_dcache_fsm = DCACHE_INVAL_TLB_SCAN; 4226 } 4227 else 4228 { 4123 r_dcache_in_tlb[way*m_dcache_sets+set] = false; 4124 } 4125 else if ( r_dcache_contains_ptd[way*m_dcache_sets+set] ) // flush 4126 { 4127 r_itlb.reset(); 4128 r_dtlb.reset(); 4229 4129 r_tgt_dcache_rsp = true; 4230 4130 r_tgt_dcache_req = false; 4231 4131 r_dcache_fsm = r_dcache_fsm_save.read(); 4232 4132 } 4133 else // no inval 4134 { 4135 r_tgt_dcache_rsp = true; 4136 r_tgt_dcache_req = false; 4137 r_dcache_fsm = r_dcache_fsm_save.read(); 4138 } 4233 4139 } 4234 4140 … … 4236 4142 if ( m_debug_dcache_fsm ) 4237 4143 { 4238 std::cout << " <PROC.DCACHE_CC_UPDT> Update one word :" << std::dec 4239 << " way = " << way 4240 << " / set = " << set 4241 << " / word = " << word 4242 << " / value = " << std::hex << r_tgt_buf[word]; 4243 if ( inval_itlb ) std::cout << " / itlb inval required"; 4244 if ( inval_dtlb ) std::cout << " / dtlb inval required"; 4245 std::cout << std::endl; 4144 std::cout << " <PROC.DCACHE_CC_UPDT> Update one word" << std::dec 4145 << " / WAY = " << way 4146 << " / SET = " << set 4147 << " / WORD = " << word 4148 << " / VALUE = " << std::hex << r_tgt_buf[word] << std::endl; 4246 4149 } 4247 4150 #endif … … 4250 4153 } 4251 4154 /////////////////////////// 4252 case DCACHE_INVAL_TLB_SCAN: // scan sequencially all TLB entries for both ITLB & DTLB4155 case DCACHE_INVAL_TLB_SCAN: // Scan sequencially all TLB entries for both ITLB & DTLB 4253 4156 // It makes the assumption that (m_itlb_sets == m_dtlb_sets) 4254 4157 // and (m_itlb_ways == m_dtlb_ways) 4255 // Caution : we enter this state when a DCACHE line is modified,4158 // We enter this state when a DCACHE line is modified, 4256 4159 // and there is a copy in itlb or dtlb. 4257 // It can be caused by a coherence transaction, a XTN inval 4258 // or a WRITE hit. Input arguments are: 4160 // It can be caused by: 4161 // - a coherence inval or updt transaction, 4162 // - a line inval caused by a cache miss 4163 // - a processor XTN inval request, 4164 // - a WRITE hit, 4165 // - a Dirty bit update failure 4166 // Input arguments are: 4259 4167 // - r_dcache_tlb_inval_line 4260 4168 // - r_dcache_tlb_inval_count 4261 // - r_dcache_itlb_inval_req4262 // - r_dcache_dtlb_inval_req4263 4169 // - r_dcache_fsm_save 4264 4170 { … … 4266 4172 size_t way = r_dcache_tlb_inval_count.read()/m_itlb_sets; // way 4267 4173 size_t set = r_dcache_tlb_inval_count.read()%m_itlb_sets; // set 4268 4269 if ( r_dcache_itlb_inval_req.read() ) 4270 { 4271 bool ok = r_itlb.inval( line, 4272 way, 4273 set ); 4174 bool ok; 4175 4176 ok = r_itlb.inval( line, 4177 way, 4178 set ); 4274 4179 #if DEBUG_DCACHE 4275 4180 if ( m_debug_dcache_fsm and ok ) … … 4282 4187 } 4283 4188 #endif 4284 } 4285 4286 if ( r_dcache_dtlb_inval_req.read() ) 4287 { 4288 bool ok = r_dtlb.inval( line, 4289 way, 4290 set ); 4189 ok = r_dtlb.inval( line, 4190 way, 4191 set ); 4291 4192 #if DEBUG_DCACHE 4292 4193 if ( m_debug_dcache_fsm and ok ) … … 4299 4200 } 4300 4201 #endif 4301 }4302 4202 4303 4203 // return to the calling state when TLB inval completed 4304 4204 if ( r_dcache_tlb_inval_count.read() == (m_dtlb_sets*m_dtlb_ways-1) ) 4305 4205 { 4306 if ( r_ dcache_fsm_save.read() != DCACHE_XTN_DC_INVAL_END ) // not an XTN inval4206 if ( r_tgt_dcache_req.read() ) // It's a coherence request 4307 4207 { 4308 4208 r_tgt_dcache_rsp = true; 4309 4209 r_tgt_dcache_req = false; 4310 4210 } 4311 r_dcache_dtlb_inval_req = false;4312 r_dcache_itlb_inval_req = false;4313 4211 r_dcache_fsm = r_dcache_fsm_save.read(); 4314 4212 }
Note: See TracChangeset
for help on using the changeset viewer.