@conference{1981_smith, title={{A study of branch prediction strategies}}, author={Smith, J.E.}, booktitle={Proc. Computer Architecture}, pages={135-148}, year={1981}, organization={IEEE} } @conference{1983_fisher, title={{Very Long Instruction Word architectures and the ELI-512}}, author={Fisher, J.A.}, booktitle={Proceedings of the 10th annual international symposium on Computer architecture}, pages={140--150}, year={1983}, organization={IEEE Computer Society Press Los Alamitos, CA, USA} } @book{1983_Lee, title={{Analysis of Branch Prediction Strategies and Branch Target Buffer Design}}, author={Lee, J.K.F. and Smith, A.J.}, year={1983}, publisher={Computer Science Division (EECS), University of California} } @article{lee1984bps, title={{Branch Prediction Strategies and Branch Target Buffer Design}}, author={Lee, JKF and Smith, AJ}, journal={Computer}, volume={17}, number={1}, pages={6--22}, year={1984} } @article{smith1984dae, title={{Decoupled Access/Execute Computer Architectures}}, author={SMITH, J.E.}, journal={ACM Transactions on Computer Systems}, volume={2}, number={4}, pages={289--308}, year={1984} } @conference{ditzel1987bfc, title={{Branch folding in the CRISP microprocessor: reducing branch delay to zero}}, author={Ditzel, DR and McLellan, HR}, booktitle={Proceedings of the 14th annual international symposium on Computer architecture}, pages={2--8}, year={1987}, organization={ACM New York, NY, USA} } @article{1990_alverson, title={{The Tera Computer System}}, author={Alverson, R. and Callahan, D. and Cummings, D. and Porterfield, A. and Smith, B. and Koblenz, B.} } @article{1991_kaeli, title={{Branch history table prediction of moving target branches due to subroutine returns}}, author={Kaeli, D.R. and Emma, P.G.}, journal={Proceedings of the 18th annual international symposium on Computer architecture}, pages={34--42}, year={1991}, publisher={ACM Press New York, NY, USA} } @article{scherson1991ogc, title={{Orthogonal graphs for the construction of a class ofinterconnection networks}}, author={Scherson, ID}, journal={Parallel and Distributed Systems, IEEE Transactions on}, volume={2}, number={1}, pages={3--19}, year={1991} } @article{1991_wall, title={{Limits of instruction-level parallelism}}, author={David W. Wall}, journal={Proceedings of the fourth international conference on Architectural support for programming languages and operating systems}, pages={176--188}, year={1991} } @article{1992_pan, title={{Improving the accuracy of dynamic branch prediction using branch correlation}}, author={Pan, S.T. and So, K. and Rahmeh, J.T.}, journal={Proceedings of the fifth international conference on Architectural support for programming languages and operating systems}, pages={76--84}, year={1992}, publisher={ACM Press New York, NY, USA} } @article{1992_yeh, title={{Alternative Implementations of Two-Level Adaptive Branch Prediction}}, author={Yeh, T.Y. and Patt, YN}, journal={Computer Architecture, 1992. Proceedings., The 19th Annual International Symposium on}, pages={124--134}, year={1992} } @techreport{1993_mcfarling, title={{Combining Branch Predictors}}, author={McFarling, S.}, institution={Technical Report WRL Technical Note TN-36, Digital Equipment Corporation, June 1993} } @article{mclellan1993aaa, title={{The Alpha AXP architecture and 21064 processor}}, author={McLellan, E.}, journal={IEEE Micro}, volume={13}, number={3}, pages={36--47}, year={1993} } @conference{moudgill1993rra, title={{Register renaming and dynamic speculation: an alternative approach}}, author={Moudgill, M. and Pingali, K. and Vassiliadis, S.}, booktitle={Proceedings of the 26th annual international symposium on Microarchitecture}, pages={202--213}, year={1993}, organization={IEEE Computer Society Press Los Alamitos, CA, USA} } @article{1993_Perleberg, title={{Branch target buffer design and optimization}}, author={Perleberg, CH and Smith, AJ}, journal={Computers, IEEE Transactions on}, volume={42}, number={4}, pages={396--412}, year={1993} } @article{1993_yeh, title={{A comparison of dynamic branch predictors that use two levels of branch history}}, author={Yeh, T.Y. and Patt, Y.N.}, journal={Proceedings of the 20th annual international symposium on Computer architecture}, pages={257--266}, year={1993}, publisher={ACM Press New York, NY, USA} } @article{gallagher1994dmd, title={{Dynamic memory disambiguation using the memory conflict buffer}}, author={Gallagher, D.M. and Chen, W.Y. and Mahlke, S.A. and Gyllenhaal, J.C. and Wen-mei, W.H.}, journal={Proceedings of the sixth international conference on Architectural support for programming languages and operating systems}, pages={183--193}, year={1994}, publisher={ACM Press New York, NY, USA} } @article{song1994prm, title={{The PowerPC 604 RISC microprocessor.}}, author={Song, SP and Denman, M. and Chang, J.}, journal={Micro, IEEE}, volume={14}, number={5}, year={1994} } @article{diep1995pep, title={{Performance evaluation of the PowerPC 620 microarchitecture}}, author={Diep, T.A. and Nelson, C. and Shen, J.P.}, journal={ACM SIGARCH Computer Architecture News}, volume={23}, number={2}, pages={163--174}, year={1995}, publisher={ACM New York, NY, USA} } @article{1995_edmondson, title={{Internal Organization of the Alpha 21164, a 300-MHz 64-bit Quad-issue CMOS RISC Microprocessor}}, author={Edmondson, J.H. and Rubinfeld, P.I. and Bannon, P.J. and Benschneider, B.J. and Bernstein, D. and Castelino, R.W. and Cooper, E.M. and Dever, D.E. and Donchin, D.R. and Fischer, T.C. and others}, journal={Digital Technical Journal}, volume={7}, number={1}, pages={0}, year={1995} } @InProceedings{1995_sohi, author = {Sohi, G.S. and Breach, S.E. and Vijaykumar, T.N. }, title = {Multiscalar processors}, OPTcrossref = {}, OPTkey = {}, OPTbooktitle = {Computer Architecture, 1995. Proceedings. 22nd Annual International Symposium on}, OPTpages = {414-425}, OPTyear = {1995}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {Santa Margherita Ligure , Italy}, OPTmonth = {22-24 Jun}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @conference{1995_tullsen, title={{Simultaneous multithreading: maximizing on-chip parallelism}}, author={Tullsen, D.M. and Eggers, S.J. and Levy, H.M.}, booktitle={Proceedings of the 22nd annual international symposium on Computer architecture}, pages={392--403}, year={1995}, organization={ACM New York, NY, USA} } @article{adve1996smc, title={{Shared Memory Consistency Models: A Tutorial}}, author={Adve, S.V. and Gharachorloo, K.}, journal={COMPUTER}, pages={66--76}, year={1996}, publisher={IEEE Computer Society} } @conference{farkas1996rfd, title={{Register File Design Considerations in Dynamically Scheduled Processors}}, author={Farkas, K.I. and Jouppi, N.P. and Chow, P.}, booktitle={Proceedings of the Second IEEE Symposium on High-Performance Computer Architecture}, pages={40--51}, year={1996} } @InBook{ 1996_mudge, ALTauthor = {trevor mudge}, ALTeditor = {}, title = {ACM Computing Surveys (CSUR)}, chapter = { Special ACM 50th-anniversary issue: strategic directions in computing research}, publisher = {ACM Press New York, NY, USA }, year = {1996}, OPTkey = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTtype = {}, OPTaddress = {}, OPTedition = {}, OPTmonth = {december}, OPTpages = {671 - 678}, OPTnote = {}, OPTannote = {} } @InProceedings{1996_olukotun, author = { Kunle Olukotun and Basem A. Nayfeh and Lance Hammond and Ken Wilson and Kunyung Chang }, title = {the case for a single-chip multiprocessor}, OPTcrossref = {ISBN:0-89791-767-7}, OPTkey = {}, OPTbooktitle = {Proceedings of the seventh international conference on Architectural support for programming languages and operating systems}, OPTpages = {2-11}, OPTyear = {1996}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {Cambridge, Massachusetts, United States}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {ACM Press}, OPTnote = {}, OPTannote = {} } @article{1996_tullsen, title={{Exploiting choice: instruction fetch and issue on an implementable simultaneous multithreading processor}}, author={Tullsen, D.M. and Eggers, S.J. and Emer, J.S. and Levy, H.M. and Lo, J.L. and Stamm, R.L.}, journal={Proceedings of the 23rd annual international symposium on Computer architecture}, pages={191--202}, year={1996}, publisher={ACM Press New York, NY, USA} } @InProceedings{1996_wallace, author = {Wallace, S. and Bagherzadeh, N. }, title = {A scalable register file architecture for dynamically scheduled processors}, OPTcrossref = {}, OPTkey = {}, OPTbooktitle = {Parallel Architectures and Compilation Techniques, 1996., Proceedings of the 1996 Conference on}, OPTpages = {179-184}, OPTyear = {1996}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {Boston, MA, USA}, OPTmonth = {Oct}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @conference{wallace1996srf, title={{A Scalable Register File Architecture for Dynamically Scheduled Processors}}, author={Wallace, S. and Bagherzadeh, N.}, booktitle={Proceedings of PACT}, year={1996} } @article{1996_yeager, title={{The Mips R10000 superscalar microprocessor}}, author={Yeager, KC}, journal={Micro, IEEE}, volume={16}, number={2}, pages={28--41}, year={1996} } @article{burger1997sts, title={{The SimpleScalar tool set, version 2.0}}, author={Burger, D. and Austin, T.M.}, journal={ACM SIGARCH Computer Architecture News}, volume={25}, number={3}, pages={13--25}, year={1997}, publisher={ACM Press New York, NY, USA} } @InProceedings{1997_palacharla, author = {Palacharla, S. and Jouppi, N.P. and Smith, J.E. }, title = {Complexity-Effective Superscalar Processors}, OPTcrossref = {}, OPTkey = {}, OPTbooktitle = {Computer Architecture, 1997. Conference Proceedings. The 24th Annual International Symposium on}, OPTpages = {206 - 218}, OPTyear = {1997}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {}, OPTmonth = {Jun}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @InProceedings{1998a_hammond, author = {Lance Hammond and Mark Willey and Kunle Olukotun}, title = {}, OPTcrossref = {SSN:0163-5980}, OPTkey = {}, OPTbooktitle = {Proceedings of the eighth international conference on Architectural support for programming languages and operating systems}, OPTpages = {58-69}, OPTyear = {1998}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = { San Jose, California, United States}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {ACM Press}, OPTnote = {}, OPTannote = {} } @TechReport{1998b_hammond, author = {Lance Hammond and Kunle Olukotun}, title = {Considerations in the design of hydra : a multiprocessor-on-a-chip microarchitecture}, institution = {Stanford University}, year = {1998}, OPTkey = {CSL-TR-98-749}, OPTtype = {}, OPTnumber = {}, OPTaddress = {}, OPTmonth = {February}, OPTnote = {}, OPTannote = {} } @article{chrysos1998mdp, title={{Memory dependence prediction using store sets}}, author={Chrysos, G.Z. and Emer, J.S.}, journal={ACM SIGARCH Computer Architecture News}, volume={26}, number={3}, pages={142--153}, year={1998} } @article{1998_kessler, title={{The Alpha 21264 microprocessor architecture}}, author={Kessler, RE and McLellan, EJ and Webb, DA}, journal={Computer Design: VLSI in Computers and Processors, 1998. ICCD'98. Proceedings., International Conference on}, pages={90--95}, year={1998} } @InProceedings{1998_krishnan, author = {Krishnan, V. and Torrellas, J. }, title = {A clustered approach to multithreaded processors}, OPTcrossref = {}, OPTkey = {}, OPTbooktitle = {Parallel Processing Symposium, 1998. 1998 IPPS/SPDP. Proceedings of the First Merged International...and Symposium on Parallel and Distributed Processing 1998}, OPTpages = {627-634}, OPTyear = {1998}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {Orlando, FL , USA}, OPTmonth = {30 Mar - 3 Apr}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @article{mikhail1001dmo, title={{Design of a 290 MHz Out of Order Microprocessor with Register Renaming and Speculative Memory Access}}, author={Mikhail, J. and Karl, E. and Dreslinski, R. and Davidson, G.}, journal={Ann Arbor}, volume={1001}, pages={48109} } @conference{1998_skadron, title={{Improving Prediction for Procedure Returns with Return-Address-Stack Repair Mechanisms}}, author={Skadron, K. and Ahuja, P.S. and Martonosi, M. and Clark, D.W.}, booktitle={MICRO-ANNUAL WORKSHOP THEN ANNUAL INTERNATIONAL SYMPOSIUM-}, volume={31}, pages={259--271}, year={1998}, organization={THE ASSOCIATION FOR COMPUTING MACHINERY} } @article{farkas1999mar, title={{The Multicluster Architecture: Reducing Processor Cycle Time Through Partitioning}}, author={Farkas, K.I. and Chow, P. and Jouppi, N.P. and Vranesic, Z.}, journal={International Journal of Parallel Programming}, volume={27}, number={5}, pages={327--356}, year={1999}, publisher={Springer} } @article{1999_schlansker, title={{EPIC: An Architecture for Instruction-Level Parallel Processors}}, author={Schlansker, M.S. and Rau, B.R. and Hewlett-Packard Laboratories}, journal={HP LABORATORIES TECHNICAL REPORT HPL}, year={1999}, publisher={HEWLETT-PACKARD LABORATORIES} } @article{skadron1999bpi, title={{Branch prediction, instruction-window size, and cache size: performance trade-offs and simulation techniques}}, author={Skadron, K. and Ahuja, PS and Martonosi, M. and Clark, DW}, journal={Transactions on Computers}, volume={48}, number={11}, pages={1260--1281}, year={1999} } @article{2000_barroso, title={{Piranha: a scalable architecture based on single-chip multiprocessing}}, author={Barroso, L.A. and Gharachorloo, K. and McNamara, R. and Nowatzyk, A. and Qadeer, S. and Sano, B. and Smith, S. and Stets, R. and Verghese, B.}, journal={Proceedings of the 27th annual international symposium on Computer architecture}, pages={282--293}, year={2000}, publisher={ACM Press New York, NY, USA} } @article{borkenhagen2000mpp, title={{A multithreaded PowerPC processor for commercial servers}}, author={Borkenhagen, JM and Eickemeyer, RJ and Kalla, RN and Kunkel, SR}, journal={IBM J. RES. DEV}, volume={44}, number={6}, pages={885--898}, year={2000} } @article{cruz2000mbr, title={{Multiple-banked register file architectures}}, author={Cruz, J.L. and Gonzalez, A. and Valero, M. and Topham, N.P.}, journal={ACM SIGARCH Computer Architecture News}, volume={28}, number={2}, pages={316--325}, year={2000} } @article{2000_cvetanovic, title={{Performance analysis of the Alpha 21264-based Compaq ES40 system}}, author={Cvetanovic, Z. and Kessler, RE}, journal={ACM SIGARCH Computer Architecture News}, volume={28}, number={2}, pages={192--202}, year={2000}, publisher={ACM New York, NY, USA} } @article{2000_hammond, title={{The Stanford Hydra CMP}}, author={Hammond, L. and Hubbert, B.A. and Siu, M. and Prabhu, M.K. and Chen, M. and Olukotun, K.}, journal = {Micro, IEEE}, year = {2000} } @article{klaiber2000tbc, title={{The Technology Behind Crusoe Processors}}, author={Klaiber, A. and others}, journal={Transmeta Technical Brief}, year={2000} } @article{2000_schlansker, title={{EPIC: Explicitly Parallel Instruction Computing}}, author={Schlansker, M.S. and Rau, B.R.}, journal={COMPUTER}, pages={37--45}, year={2000}, publisher={IEEE Computer Society} } @article{2000_sharangpani, title={{Itanium processor microarchitecture}}, author={Sharangpani, H. and Arora, H.}, journal={Micro, IEEE}, volume={20}, number={5}, pages={24--43}, year={2000} } @article{2000_sima, title={{The design space of register renaming techniques}}, author={Sima, D. and Polytech, B.}, journal={Micro, IEEE}, volume={20}, number={5}, pages={70--83}, year={2000} } @article{sima2000dsr, title={{The design space of register renaming techniques}}, author={Sima, D. and Polytech, B.}, journal={Micro, IEEE}, volume={20}, number={5}, pages={70--83}, year={2000} } @conference{balasubramonian2001rcr, title={{Reducing the complexity of the register file in dynamic superscalar processors}}, author={Balasubramonian, R. and Dwarkadas, S. and Albonesi, D.H.}, booktitle={Proceedings of the 34th annual ACM/IEEE international symposium on Microarchitecture}, pages={237--248}, year={2001}, organization={IEEE Computer Society Washington, DC, USA} } @article{2001_burns, title={{Area and system clock effects on SMT/CMP processors}}, author={Burns, J. and Gaudiot, J.L.}, journal={Parallel Architectures and Compilation Techniques, 2001. Proceedings. 2001 International Conference on}, pages={211--218}, year={2001} } @article{guthaus2001mfc, title={{MiBench: A free, commercially representative embedded benchmark suite}}, author={Guthaus, MR and Ringenberg, JS and Ernst, D. and Austin, TM and Mudge, T. and Brown, RB}, journal={Workload Characterization, 2001. WWC-4. 2001 IEEE International Workshop on}, pages={3--14}, year={2001} } @article{2001_hinton, title={{The microarchitecture of the Pentium 4 processor}}, author={Hinton, G. and Sager, D. and Upton, M. and Boggs, D. and Carmean, D. and Kyker, A. and Roussel, P.}, journal={Intel Technology Journal}, volume={1}, pages={2001}, year={2001} } @InProceedings{2001_nagarajan, author = { Ramadass Nagarajan and Karthikeyan Sankaralingam and Doug Burger and Stephen W. Keckler}, title = {A design space evaluation of grid processor architectures}, OPTcrossref = {SBN ~ ISSN:1072-4451 , 0-7695-1369-7}, OPTkey = {}, OPTbooktitle = {Proceedings of the 34th annual ACM/IEEE international symposium on Microarchitecture }, OPTpages = {40-51}, OPTyear = {2001}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = { Austin, Texas}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = { IEEE Computer Society }, OPTnote = {}, OPTannote = {} } @article{ernst2002eds, title={{Efficient dynamic scheduling through tag elimination}}, author={Ernst, D. and Austin, T.}, journal={Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on}, pages={37--46}, year={2002} } @article{akkary2003cpa, title={{Checkpoint processing and recovery: towards scalable large instruction window processors}}, author={Akkary, H. and Rajwar, R. and Srinivasan, ST}, journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, pages={423--434}, year={2003} } @article{2002_mukherjee, title={{The Alpha 21364 network architecture}}, author={Mukherjee, SS and Bannon, P. and Lang, S. and Spink, A. and Webb, D.}, journal={Micro, IEEE}, volume={22}, number={1}, pages={26--35}, year={2002} } @article{2002_sprangle, title={{Increasing processor performance by implementing deeper pipelines}}, author={Sprangle, E. and Carmean, D.}, journal={Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on}, pages={25--34}, year={2002} } @article{2002_tendler, title={{POWER4 system microarchitecture}}, author={Tendler, J.M. and Dodson, J.S. and Fields Jr, J.S. and Le, H. and Sinharoy, B.}, journal={IBM Journal of Research and Development}, volume={46}, number={1}, pages={5--25}, year={2002} } @Article{2002_ungerer, author = {T. Ungerer and B. Robic and J. Silc }, title = {Multithreaded processors}, journal = {The Computer Journal}, year = {2002}, OPTkey = {}, OPTvolume = {45}, OPTnumber = {3}, OPTpages = {320-348}, OPTmonth = {}, OPTnote = {}, OPTannote = {} } @article{darsch2003oop, title={{Out-of-order Predicated Execution with Translation Register Buffer}}, author={Darsch, A. and Seznec, A.}, journal={Rapport technique RR-1573}, year={2003} } @conference{dehnert2003tcm, title={{The Transmeta Code Morphing Software: using speculation, recovery, and adaptive retranslation to address real-life challenges}}, author={Dehnert, J.C. and Grant, B.K. and Banning, J.P. and Johnson, R. and Kistler, T. and Klaiber, A. and Mattson, J.}, booktitle={ACM International Conference Proceeding Series}, volume={37}, pages={15--24}, year={2003} } @article{heinrich:smt, title={{Scalable Multi-threaded Multiprocessor Architectures}}, author={Heinrich, M.} } @article{2003_jeong, title={{Cost-sensitive cache replacement algorithms}}, author={Jeong, J. and Dubois, M.}, journal={High-Performance Computer Architecture, 2003. HPCA-9 2003. Proceedings. The Ninth International Symposium on}, pages={327--337}, year={2003} } @article{keltcher2003aop, title={{The AMD Opteron Processor for Multiprocessor Servers}}, author={Keltcher, C.N. and McGrath, K.J. and Ahmed, A. and Conway, P.}, journal={IEEE MICRO}, pages={66--76}, year={2003}, publisher={IEEE Computer Society} } @article{2003_koufaty, title={{Hyperthreading technology in the netburst microarchitecture}}, author={Koufaty, D. and Marr, DT}, journal={Micro, IEEE}, volume={23}, number={2}, pages={56--65}, year={2003} } @conference{kumar2003sih, title={{Single-ISA heterogeneous multi-core architectures: the potential for processor power reduction}}, author={Kumar, R. and Farkas, KI and Jouppi, NP and Ranganathan, P. and Tullsen, DM}, booktitle={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, pages={81--92}, year={2003} } @article{2003_mcnairy, title={{Itanium 2 processor microarchitecture}}, author={McNairy, C. and Soltis, D.}, journal={Micro, IEEE}, volume={23}, number={2}, pages={44--55}, year={2003} } @conference{park2003rdc, title={{Reducing Design Complexity of the Load/Store Queue}}, author={Park, I. and Ooi, C.L. and Vijaykumar, TN}, booktitle={Proceedings of the 36th annual IEEE/ACM International Symposium on Microarchitecture}, year={2003}, organization={IEEE Computer Society Washington, DC, USA} } @article{park2003rdc, title={{Reducing design complexity of the load/store queue}}, author={Park, I. and Ooi, C.L. and Vijaykumar, TN}, journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, pages={411--422}, year={2003} } @InProceedings{2003_sankaralingam, author = { Karthikeyan Sankaralingam and Ramadass Nagarajan and Haiming Liu and Changkyu Kim and Jaehyuk Huh and Doug Burger and Stephen W. Keckler and Charles R. Moore}, title = {Exploiting ILP, TLP, and DLP with the polymorphous TRIPS architecture}, OPTcrossref = { ISBN:0-7695-1945-8}, OPTkey = {}, OPTbooktitle = {Proceedings of the 30th annual international symposium on Computer architecture}, OPTpages = {422-433}, OPTyear = {2003}, OPTeditor = {}, OPTvolume = {Volume 31 Issue 2}, OPTnumber = {}, OPTseries = {}, OPTaddress = {San Diego, California}, OPTmonth = {May}, OPTorganization = {}, OPTpublisher = {ACM Press}, OPTnote = {}, OPTannote = {} } @article{sethumadhavan2003shm, title={{Scalable hardware memory disambiguation for high ILP processors}}, author={Sethumadhavan, S. and Desikan, R. and Burger, D. and Moore, CR and Keckler, SW}, journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, pages={399--410}, year={2003} } @InProceedings{2004_chaudhuri, author = {Mainak Chaudhuri and Mark Heinrich}, title = {SMTp: An Architecture for Next-generation Scalable Multi-threading}, OPTcrossref = {}, OPTkey = {}, OPTbooktitle = {Proceedings of the 31st annual international symposium on Computer architecture}, OPTpages = {124-136}, OPTyear = {2004}, OPTeditor = {}, OPTvolume = {0}, OPTnumber = {}, OPTseries = {}, OPTaddress = {München, Germany}, OPTmonth = {}, OPTorganization = {}, OPTpublisher = {IEEE Computer Society}, OPTnote = {}, OPTannote = {} } @article{2004_collins, title={{Clustered multithreaded architectures-pursuing both IPC and cycle time}}, author={Collins, JD and Tullsen, DM}, journal={Parallel and Distributed Processing Symposium, 2004. Proceedings. 18th International}, year={2004} } @conference{cristal2004ooc, title={{Out-of-Order Commit Processors}}, author={Cristal, A. and Ortega, D. and Llosa, J. and Valero, M.}, booktitle={Proceedings of the 10th International Symposium on High Performance Computer Architecture}, pages={48}, year={2004}, organization={IEEE Computer Society Washington, DC, USA} } @article{2004_dolbeau, title={{CASH: Revisiting hardware sharing in single-chip parallel processor}}, author={Dolbeau, R. and Seznec, A.}, journal={Journal of Instruction-Level Parallelism}, volume={6}, pages={1--16}, year={2004} } @article{2004_kalla, title={{IBM Power5 chip: a dual-core multithreaded processor}}, author={Kalla, R. and Sinharoy, B. and Tendler, JM}, journal={Micro, IEEE}, volume={24}, number={2}, pages={40--47}, year={2004} } @article{2004_kumar, title={{Conjoined-Core Chip Multiprocessing}}, author={Kumar, R. and Jouppi, N.P. and Tullsen, D.M.}, journal={Proceedings of the 37th annual IEEE/ACM International Symposium on Microarchitecture}, pages={195--206}, year={2004}, publisher={IEEE Computer Society Washington, DC, USA} } @article{kumar2004sih, title={{Single-ISA Heterogeneous Multi-Core Architectures for Multithreaded Workload Performance}}, author={Kumar, R. and Tullsen, D.M. and Ranganathan, P. and Jouppi, N.P. and Farkas, K.I.}, journal={ACM SIGARCH Computer Architecture News}, volume={32}, number={2}, year={2004}, publisher={ACM New York, NY, USA} } @book{mattsson:esc, title={{Evaluation of synthesizable CPU cores}}, author={Mattsson, D. and Christensson, M.}, publisher={Chalmers tekniska h{\"o}gskola} } @article{tune2004bmi, title={{Balanced Multithreading: Increasing Throughput via a Low Cost Multithreading Hierarchy}}, author={Tune, E. and Kumar, R. and Tullsen, D.M. and Calder, B.}, journal={Proceedings of the 37th International Symposium on Microarchitecture. IEEE}, year={2004} } @InProceedings{ 2004_wang, author = {Nicholas J. Wang and Justin Quek and Todd M. Rafacz and Sanjay J. Pate}, title = {Characterizing the Effects of Transient Faults on a High-Performance Processor Pipeline}, OPTcrossref = {In the Proceedings of the 2004 International Conference on Dependable Systems and Networks}, OPTkey = {}, OPTbooktitle = {}, OPTpages = {}, OPTyear = {2004}, OPTeditor = {}, OPTvolume = {}, OPTnumber = {}, OPTseries = {}, OPTaddress = {Florence , ITALY}, OPTmonth = {june}, OPTorganization = {}, OPTpublisher = {}, OPTnote = {}, OPTannote = {} } @techreport{2005_ARM, title={{Architecture and Implementation of the ARM Cortex-A8 Microprocessor}}, author={ARM}, institution={ARM} } @article{constantinou2005pis, title={{Performance implications of single thread migration on a chip multi-core}}, author={Constantinou, T. and Sazeides, Y. and Michaud, P. and Fetis, D. and Seznec, A.}, journal={ACM SIGARCH Computer Architecture News}, volume={33}, number={4}, pages={80--91}, year={2005}, publisher={ACM New York, NY, USA} } @article{dimond2005cct, title={{CUSTARD-A Customisable Threaded FPGA Soft Processor and Tools}}, author={Dimond, R. and Mencer, O. and Luk, W.}, journal={International Conference on Field Programmable Logic (FPL)}, year={2005} } @book{2005_fisher, title={{Embedded Computing: A Vliw Approach To Architecture, Compilers And Tools}}, author={Joseph A. Fisher, Paolo Faraboschi and Cliff Young}, year={2005}, publisher={Morgan Kaufmann Publishers} } @article{2005_kahle, title={{Introduction to the Cell multiprocessor}}, author={Kahle, J. and others}, journal={IBM Journal of Research and Development}, volume={49}, number={4}, pages={589--604}, year={2005} } @article{2005_kongetira, title={{Niagara: a 32-way multithreaded Sparc processor}}, author={Kongetira, P. and Aingaran, K. and Olukotun, K.}, journal={IEEE Micro}, volume={25}, number={2}, pages={21--29}, year={2005} } @article{2005_kumar, title={{Heterogeneous Chip Multiprocessors}}, author={Kumar, R. and Tullsen, D.M. and Jouppi, N.P. and Ranganathan, P.}, journal={COMPUTER}, pages={32--38}, year={2005}, publisher={IEEE Computer Society} } @article{2005_mcnairy, title={{Montecito: a dual-core, dual-thread Itanium processor}}, author={McNairy, C. and Bhatia, R.}, journal={Micro, IEEE}, volume={25}, number={2}, pages={10--20}, year={2005} } @article{percival2005cmf, title={{Cache missing for fun and profit}}, author={Percival, C.}, journal={BSDCan 2005}, year={2005} } @article{siddiqui:pap, title={{POWER4 and POWER5 Scalability}}, author={Siddiqui, W. and VanBuren, B.G.} } @article{2005_sinharoy, title={{POWER5 system microarchitecture}}, author={Sinharoy, B. and Kalla, R.N. and Tendler, J.M. and Eickemeyer, R.J. and Joyner, J.B.}, journal={IBM JOURNAL OF RESEARCH AND DEVELOPMENT}, volume={49}, number={4/5}, pages={505}, year={2005}, publisher={IBM CORPORATION} } @book{yiannacouras2005mfb, title={{The microarchitecture of FPGA-based soft processors}}, author={Yiannacouras, P. and Rose, J. and Steffan, J.G.}, year={2005}, publisher={ACM New York, NY, USA} } @book{yiannacouras2005mfb, title={{The microarchitecture of FPGA-based soft processors}}, author={Yiannacouras, P. and Rose, J. and Steffan, J.G.}, year={2005}, publisher={ACM New York, NY, USA} } @article{2006_ghasemzadeh, title={{Modified Pseudo LRU Replacement Algorithm}}, author={Ghasemzadeh, H. and Mazrouee, SS and Kakoee, MR}, journal={Engineering of Computer Based Systems, 2006. ECBS 2006. 13th Annual IEEE International Symposium and Workshop on}, pages={368--376}, year={2006} } @article{2006_gochman, title={{Introduction to Intel Core Duo processor architecture}}, author={Gochman, S. and Mendelson, A. and Naveh, A. and Rotem, E.}, journal={Intel Technology Journal}, volume={10}, number={2}, pages={89--97}, year={2006} } @conference{kumar2006cao, title={{Core architecture optimization for heterogeneous chip multiprocessors}}, author={Kumar, R. and Tullsen, D.M. and Jouppi, N.P.}, booktitle={Proceedings of the 15th international conference on Parallel architectures and compilation techniques}, pages={23--32}, year={2006}, organization={ACM New York, NY, USA} } @article{mcghan2006nof, title={{Niagara 2 Opens the Floodgates}}, author={McGhan, H.}, journal={Microprocessor Report}, volume={20}, number={11}, year={2006} } @article{mendelson2006cis, title={{CMP Implementation in Systems Based on the Intel Core Duo Processor}}, author={Mendelson, A. and others}, journal={Intel Tech. Journal}, volume={10}, number={2}, year={2006} } @article{sangireddy2006rrl, title={{Reducing Rename Logic Complexity for High-Speed and Low-Power Front-End Architectures}}, author={Sangireddy, R.}, journal={IEEE TRANSACTIONS ON COMPUTERS}, pages={672--685}, year={2006}, publisher={IEEE Computer Society} } @misc{microsystems2006otm, title={{OpenSPARC T1 Microarchitecture Specification}}, author={Microsystems, S.}, year={2006}, publisher={August} } @article{yiannacouras2006asc, title={{Application-specific customization of soft processor microarchitecture}}, author={Yiannacouras, P. and Steffan, J.G. and Rose, J.}, journal={Proceedings of the 2006 ACM/SIGDA 14th international symposium on Field programmable gate arrays}, pages={201--210}, year={2006}, publisher={ACM New York, NY, USA} } @techreport{2007_ARM, title={{The ARM Cortex-A9 Processor}}, author={ARM}, institution={ARM} } @phdthesis{bingham2007mrl, title={{A MIPS R10000-Like Out-Of-Order Microprocessor Implementation in verilog HDL}}, author={Bingham, S.T.}, year={2007}, school={Cornell University} } @article{eisen:ipa, title={{IBM POWER6 accelerators: VMX and DFU-References}}, author={Eisen, L. and Ward III, JW and Tast, HW and M{\"a}ding, N. and Leenstra, J. and Mueller, SM and Jacobi, C. and Preiss, J. and Schwarz, EM and Carlough, SR} } @article{2007_le, title={{IBM POWER6 microarchitecture}}, author={Le, HQ and Starke, WJ and Fields, JS and O Connell, FP and Nguyen, DQ and Ronchetti, BJ and Sauer, WM and Schwarz, EM and Vaden, MT}, journal={IBM JOURNAL OF RESEARCH AND DEVELOPMENT}, volume={51}, number={6}, pages={639}, year={2007}, publisher={IBM CORPORATION} } @conference{li2007map, title={{Microarchitecture and Performance Analysis of Godson-2 SMT Processor}}, author={Li, Z. and Xu, X. and Hu, W. and Tang, Z.}, booktitle={Computer Design, 2006. ICCD 2006. International Conference on}, pages={485--490}, year={2007} } @article{hardware6mom, title={{Memory Ordering in Modern Microprocessors}}, author={Hardware, S. and Accesses, W.R.M.}, journal={Interface}, volume={6}, pages={6} } @article{sethumadhavan2007lbe, title={{Late-binding: enabling unordered load-store queues}}, author={Sethumadhavan, S. and Roesner, F. and Emer, J.S. and Burger, D. and Keckler, S.W.}, journal={Proceedings of the 34th annual international conference on Computer architecture}, pages={347--357}, year={2007}, publisher={ACM Press New York, NY, USA} } @conference{williams2008lbs, title={{Lattice Boltzmann simulation optimization on leading multicore platforms}}, author={Williams, S. and Carter, J. and Oliker, L. and Shalf, J. and Yelick, K.}, booktitle={Parallel and Distributed Processing, 2008. IPDPS 2008. IEEE International Symposium on}, pages={1--14}, year={2008} }