[91] | 1 | @conference{1981_smith, |
---|
| 2 | title={{A study of branch prediction strategies}}, |
---|
| 3 | author={Smith, J.E.}, |
---|
| 4 | booktitle={Proc. Computer Architecture}, |
---|
| 5 | pages={135-148}, |
---|
| 6 | year={1981}, |
---|
| 7 | organization={IEEE} |
---|
| 8 | } |
---|
| 9 | @conference{1983_fisher, |
---|
| 10 | title={{Very Long Instruction Word architectures and the ELI-512}}, |
---|
| 11 | author={Fisher, J.A.}, |
---|
| 12 | booktitle={Proceedings of the 10th annual international symposium on Computer architecture}, |
---|
| 13 | pages={140--150}, |
---|
| 14 | year={1983}, |
---|
| 15 | organization={IEEE Computer Society Press Los Alamitos, CA, USA} |
---|
| 16 | } |
---|
| 17 | @book{1983_Lee, |
---|
| 18 | title={{Analysis of Branch Prediction Strategies and Branch Target Buffer Design}}, |
---|
| 19 | author={Lee, J.K.F. and Smith, A.J.}, |
---|
| 20 | year={1983}, |
---|
| 21 | publisher={Computer Science Division (EECS), University of California} |
---|
| 22 | } |
---|
| 23 | @article{lee1984bps, |
---|
| 24 | title={{Branch Prediction Strategies and Branch Target Buffer Design}}, |
---|
| 25 | author={Lee, JKF and Smith, AJ}, |
---|
| 26 | journal={Computer}, |
---|
| 27 | volume={17}, |
---|
| 28 | number={1}, |
---|
| 29 | pages={6--22}, |
---|
| 30 | year={1984} |
---|
| 31 | } |
---|
| 32 | @article{smith1984dae, |
---|
| 33 | title={{Decoupled Access/Execute Computer Architectures}}, |
---|
| 34 | author={SMITH, J.E.}, |
---|
| 35 | journal={ACM Transactions on Computer Systems}, |
---|
| 36 | volume={2}, |
---|
| 37 | number={4}, |
---|
| 38 | pages={289--308}, |
---|
| 39 | year={1984} |
---|
| 40 | } |
---|
| 41 | @conference{ditzel1987bfc, |
---|
| 42 | title={{Branch folding in the CRISP microprocessor: reducing branch delay to zero}}, |
---|
| 43 | author={Ditzel, DR and McLellan, HR}, |
---|
| 44 | booktitle={Proceedings of the 14th annual international symposium on Computer architecture}, |
---|
| 45 | pages={2--8}, |
---|
| 46 | year={1987}, |
---|
| 47 | organization={ACM New York, NY, USA} |
---|
| 48 | } |
---|
| 49 | @article{1990_alverson, |
---|
| 50 | title={{The Tera Computer System}}, |
---|
| 51 | author={Alverson, R. and Callahan, D. and Cummings, D. and Porterfield, A. and Smith, B. and Koblenz, B.} |
---|
| 52 | } |
---|
| 53 | @article{1991_kaeli, |
---|
| 54 | title={{Branch history table prediction of moving target branches due to subroutine returns}}, |
---|
| 55 | author={Kaeli, D.R. and Emma, P.G.}, |
---|
| 56 | journal={Proceedings of the 18th annual international symposium on Computer architecture}, |
---|
| 57 | pages={34--42}, |
---|
| 58 | year={1991}, |
---|
| 59 | publisher={ACM Press New York, NY, USA} |
---|
| 60 | } |
---|
| 61 | @article{scherson1991ogc, |
---|
| 62 | title={{Orthogonal graphs for the construction of a class ofinterconnection networks}}, |
---|
| 63 | author={Scherson, ID}, |
---|
| 64 | journal={Parallel and Distributed Systems, IEEE Transactions on}, |
---|
| 65 | volume={2}, |
---|
| 66 | number={1}, |
---|
| 67 | pages={3--19}, |
---|
| 68 | year={1991} |
---|
| 69 | } |
---|
| 70 | @article{1991_wall, |
---|
| 71 | title={{Limits of instruction-level parallelism}}, |
---|
| 72 | author={David W. Wall}, |
---|
| 73 | journal={Proceedings of the fourth international conference on Architectural support for programming languages and operating systems}, |
---|
| 74 | pages={176--188}, |
---|
| 75 | year={1991} |
---|
| 76 | } |
---|
| 77 | @article{1992_pan, |
---|
| 78 | title={{Improving the accuracy of dynamic branch prediction using branch correlation}}, |
---|
| 79 | author={Pan, S.T. and So, K. and Rahmeh, J.T.}, |
---|
| 80 | journal={Proceedings of the fifth international conference on Architectural support for programming languages and operating systems}, |
---|
| 81 | pages={76--84}, |
---|
| 82 | year={1992}, |
---|
| 83 | publisher={ACM Press New York, NY, USA} |
---|
| 84 | } |
---|
| 85 | @article{1992_yeh, |
---|
| 86 | title={{Alternative Implementations of Two-Level Adaptive Branch Prediction}}, |
---|
| 87 | author={Yeh, T.Y. and Patt, YN}, |
---|
| 88 | journal={Computer Architecture, 1992. Proceedings., The 19th Annual International Symposium on}, |
---|
| 89 | pages={124--134}, |
---|
| 90 | year={1992} |
---|
| 91 | } |
---|
| 92 | @techreport{1993_mcfarling, |
---|
| 93 | title={{Combining Branch Predictors}}, |
---|
| 94 | author={McFarling, S.}, |
---|
| 95 | institution={Technical Report WRL Technical Note TN-36, Digital Equipment Corporation, June 1993} |
---|
| 96 | } |
---|
| 97 | @article{mclellan1993aaa, |
---|
| 98 | title={{The Alpha AXP architecture and 21064 processor}}, |
---|
| 99 | author={McLellan, E.}, |
---|
| 100 | journal={IEEE Micro}, |
---|
| 101 | volume={13}, |
---|
| 102 | number={3}, |
---|
| 103 | pages={36--47}, |
---|
| 104 | year={1993} |
---|
| 105 | } |
---|
| 106 | @conference{moudgill1993rra, |
---|
| 107 | title={{Register renaming and dynamic speculation: an alternative approach}}, |
---|
| 108 | author={Moudgill, M. and Pingali, K. and Vassiliadis, S.}, |
---|
| 109 | booktitle={Proceedings of the 26th annual international symposium on Microarchitecture}, |
---|
| 110 | pages={202--213}, |
---|
| 111 | year={1993}, |
---|
| 112 | organization={IEEE Computer Society Press Los Alamitos, CA, USA} |
---|
| 113 | } |
---|
| 114 | @article{1993_Perleberg, |
---|
| 115 | title={{Branch target buffer design and optimization}}, |
---|
| 116 | author={Perleberg, CH and Smith, AJ}, |
---|
| 117 | journal={Computers, IEEE Transactions on}, |
---|
| 118 | volume={42}, |
---|
| 119 | number={4}, |
---|
| 120 | pages={396--412}, |
---|
| 121 | year={1993} |
---|
| 122 | } |
---|
| 123 | @article{1993_yeh, |
---|
| 124 | title={{A comparison of dynamic branch predictors that use two levels of branch history}}, |
---|
| 125 | author={Yeh, T.Y. and Patt, Y.N.}, |
---|
| 126 | journal={Proceedings of the 20th annual international symposium on Computer architecture}, |
---|
| 127 | pages={257--266}, |
---|
| 128 | year={1993}, |
---|
| 129 | publisher={ACM Press New York, NY, USA} |
---|
| 130 | } |
---|
| 131 | @article{gallagher1994dmd, |
---|
| 132 | title={{Dynamic memory disambiguation using the memory conflict buffer}}, |
---|
| 133 | author={Gallagher, D.M. and Chen, W.Y. and Mahlke, S.A. and Gyllenhaal, J.C. and Wen-mei, W.H.}, |
---|
| 134 | journal={Proceedings of the sixth international conference on Architectural support for programming languages and operating systems}, |
---|
| 135 | pages={183--193}, |
---|
| 136 | year={1994}, |
---|
| 137 | publisher={ACM Press New York, NY, USA} |
---|
| 138 | } |
---|
| 139 | @article{song1994prm, |
---|
| 140 | title={{The PowerPC 604 RISC microprocessor.}}, |
---|
| 141 | author={Song, SP and Denman, M. and Chang, J.}, |
---|
| 142 | journal={Micro, IEEE}, |
---|
| 143 | volume={14}, |
---|
| 144 | number={5}, |
---|
| 145 | year={1994} |
---|
| 146 | } |
---|
| 147 | @article{diep1995pep, |
---|
| 148 | title={{Performance evaluation of the PowerPC 620 microarchitecture}}, |
---|
| 149 | author={Diep, T.A. and Nelson, C. and Shen, J.P.}, |
---|
| 150 | journal={ACM SIGARCH Computer Architecture News}, |
---|
| 151 | volume={23}, |
---|
| 152 | number={2}, |
---|
| 153 | pages={163--174}, |
---|
| 154 | year={1995}, |
---|
| 155 | publisher={ACM New York, NY, USA} |
---|
| 156 | } |
---|
| 157 | @article{1995_edmondson, |
---|
| 158 | title={{Internal Organization of the Alpha 21164, a 300-MHz 64-bit Quad-issue CMOS RISC Microprocessor}}, |
---|
| 159 | author={Edmondson, J.H. and Rubinfeld, P.I. and Bannon, P.J. and Benschneider, B.J. and Bernstein, D. and Castelino, R.W. and Cooper, E.M. and Dever, D.E. and Donchin, D.R. and Fischer, T.C. and others}, |
---|
| 160 | journal={Digital Technical Journal}, |
---|
| 161 | volume={7}, |
---|
| 162 | number={1}, |
---|
| 163 | pages={0}, |
---|
| 164 | year={1995} |
---|
| 165 | } |
---|
| 166 | @InProceedings{1995_sohi, |
---|
| 167 | author = {Sohi, G.S. and |
---|
| 168 | Breach, S.E. and |
---|
| 169 | Vijaykumar, T.N. }, |
---|
| 170 | title = {Multiscalar processors}, |
---|
| 171 | OPTcrossref = {}, |
---|
| 172 | OPTkey = {}, |
---|
| 173 | OPTbooktitle = {Computer Architecture, 1995. Proceedings. 22nd Annual International Symposium on}, |
---|
| 174 | OPTpages = {414-425}, |
---|
| 175 | OPTyear = {1995}, |
---|
| 176 | OPTeditor = {}, |
---|
| 177 | OPTvolume = {}, |
---|
| 178 | OPTnumber = {}, |
---|
| 179 | OPTseries = {}, |
---|
| 180 | OPTaddress = {Santa Margherita Ligure , Italy}, |
---|
| 181 | OPTmonth = {22-24 Jun}, |
---|
| 182 | OPTorganization = {}, |
---|
| 183 | OPTpublisher = {}, |
---|
| 184 | OPTnote = {}, |
---|
| 185 | OPTannote = {} |
---|
| 186 | } |
---|
| 187 | @conference{1995_tullsen, |
---|
| 188 | title={{Simultaneous multithreading: maximizing on-chip parallelism}}, |
---|
| 189 | author={Tullsen, D.M. and Eggers, S.J. and Levy, H.M.}, |
---|
| 190 | booktitle={Proceedings of the 22nd annual international symposium on Computer architecture}, |
---|
| 191 | pages={392--403}, |
---|
| 192 | year={1995}, |
---|
| 193 | organization={ACM New York, NY, USA} |
---|
| 194 | } |
---|
| 195 | @article{adve1996smc, |
---|
| 196 | title={{Shared Memory Consistency Models: A Tutorial}}, |
---|
| 197 | author={Adve, S.V. and Gharachorloo, K.}, |
---|
| 198 | journal={COMPUTER}, |
---|
| 199 | pages={66--76}, |
---|
| 200 | year={1996}, |
---|
| 201 | publisher={IEEE Computer Society} |
---|
| 202 | } |
---|
| 203 | @conference{farkas1996rfd, |
---|
| 204 | title={{Register File Design Considerations in Dynamically Scheduled Processors}}, |
---|
| 205 | author={Farkas, K.I. and Jouppi, N.P. and Chow, P.}, |
---|
| 206 | booktitle={Proceedings of the Second IEEE Symposium on High-Performance Computer Architecture}, |
---|
| 207 | pages={40--51}, |
---|
| 208 | year={1996} |
---|
| 209 | } |
---|
| 210 | @InBook{ 1996_mudge, |
---|
| 211 | ALTauthor = {trevor mudge}, |
---|
| 212 | ALTeditor = {}, |
---|
| 213 | title = {ACM Computing Surveys (CSUR)}, |
---|
| 214 | chapter = { Special ACM 50th-anniversary issue: strategic directions in computing research}, |
---|
| 215 | publisher = {ACM Press New York, NY, USA }, |
---|
| 216 | year = {1996}, |
---|
| 217 | OPTkey = {}, |
---|
| 218 | OPTvolume = {}, |
---|
| 219 | OPTnumber = {}, |
---|
| 220 | OPTseries = {}, |
---|
| 221 | OPTtype = {}, |
---|
| 222 | OPTaddress = {}, |
---|
| 223 | OPTedition = {}, |
---|
| 224 | OPTmonth = {december}, |
---|
| 225 | OPTpages = {671 - 678}, |
---|
| 226 | OPTnote = {}, |
---|
| 227 | OPTannote = {} |
---|
| 228 | } |
---|
| 229 | @InProceedings{1996_olukotun, |
---|
| 230 | author = { Kunle Olukotun and |
---|
| 231 | Basem A. Nayfeh and |
---|
| 232 | Lance Hammond and |
---|
| 233 | Ken Wilson and |
---|
| 234 | Kunyung Chang |
---|
| 235 | }, |
---|
| 236 | title = {the case for a single-chip multiprocessor}, |
---|
| 237 | OPTcrossref = {ISBN:0-89791-767-7}, |
---|
| 238 | OPTkey = {}, |
---|
| 239 | OPTbooktitle = {Proceedings of the seventh international conference on Architectural support for programming languages and operating systems}, |
---|
| 240 | OPTpages = {2-11}, |
---|
| 241 | OPTyear = {1996}, |
---|
| 242 | OPTeditor = {}, |
---|
| 243 | OPTvolume = {}, |
---|
| 244 | OPTnumber = {}, |
---|
| 245 | OPTseries = {}, |
---|
| 246 | OPTaddress = {Cambridge, Massachusetts, United States}, |
---|
| 247 | OPTmonth = {}, |
---|
| 248 | OPTorganization = {}, |
---|
| 249 | OPTpublisher = {ACM Press}, |
---|
| 250 | OPTnote = {}, |
---|
| 251 | OPTannote = {} |
---|
| 252 | } |
---|
| 253 | @article{1996_tullsen, |
---|
| 254 | title={{Exploiting choice: instruction fetch and issue on an implementable simultaneous multithreading processor}}, |
---|
| 255 | author={Tullsen, D.M. and Eggers, S.J. and Emer, J.S. and Levy, H.M. and Lo, J.L. and Stamm, R.L.}, |
---|
| 256 | journal={Proceedings of the 23rd annual international symposium on Computer architecture}, |
---|
| 257 | pages={191--202}, |
---|
| 258 | year={1996}, |
---|
| 259 | publisher={ACM Press New York, NY, USA} |
---|
| 260 | } |
---|
| 261 | @InProceedings{1996_wallace, |
---|
| 262 | author = {Wallace, S. and |
---|
| 263 | Bagherzadeh, N. }, |
---|
| 264 | title = {A scalable register file architecture for dynamically scheduled processors}, |
---|
| 265 | OPTcrossref = {}, |
---|
| 266 | OPTkey = {}, |
---|
| 267 | OPTbooktitle = {Parallel Architectures and Compilation Techniques, 1996., Proceedings of the 1996 Conference on}, |
---|
| 268 | OPTpages = {179-184}, |
---|
| 269 | OPTyear = {1996}, |
---|
| 270 | OPTeditor = {}, |
---|
| 271 | OPTvolume = {}, |
---|
| 272 | OPTnumber = {}, |
---|
| 273 | OPTseries = {}, |
---|
| 274 | OPTaddress = {Boston, MA, USA}, |
---|
| 275 | OPTmonth = {Oct}, |
---|
| 276 | OPTorganization = {}, |
---|
| 277 | OPTpublisher = {}, |
---|
| 278 | OPTnote = {}, |
---|
| 279 | OPTannote = {} |
---|
| 280 | } |
---|
| 281 | @conference{wallace1996srf, |
---|
| 282 | title={{A Scalable Register File Architecture for Dynamically Scheduled Processors}}, |
---|
| 283 | author={Wallace, S. and Bagherzadeh, N.}, |
---|
| 284 | booktitle={Proceedings of PACT}, |
---|
| 285 | year={1996} |
---|
| 286 | } |
---|
| 287 | @article{1996_yeager, |
---|
| 288 | title={{The Mips R10000 superscalar microprocessor}}, |
---|
| 289 | author={Yeager, KC}, |
---|
| 290 | journal={Micro, IEEE}, |
---|
| 291 | volume={16}, |
---|
| 292 | number={2}, |
---|
| 293 | pages={28--41}, |
---|
| 294 | year={1996} |
---|
| 295 | } |
---|
| 296 | @article{burger1997sts, |
---|
| 297 | title={{The SimpleScalar tool set, version 2.0}}, |
---|
| 298 | author={Burger, D. and Austin, T.M.}, |
---|
| 299 | journal={ACM SIGARCH Computer Architecture News}, |
---|
| 300 | volume={25}, |
---|
| 301 | number={3}, |
---|
| 302 | pages={13--25}, |
---|
| 303 | year={1997}, |
---|
| 304 | publisher={ACM Press New York, NY, USA} |
---|
| 305 | } |
---|
| 306 | @InProceedings{1997_palacharla, |
---|
| 307 | author = {Palacharla, S. and |
---|
| 308 | Jouppi, N.P. and |
---|
| 309 | Smith, J.E. }, |
---|
| 310 | title = {Complexity-Effective Superscalar Processors}, |
---|
| 311 | OPTcrossref = {}, |
---|
| 312 | OPTkey = {}, |
---|
| 313 | OPTbooktitle = {Computer Architecture, 1997. Conference Proceedings. The 24th Annual International Symposium on}, |
---|
| 314 | OPTpages = {206 - 218}, |
---|
| 315 | OPTyear = {1997}, |
---|
| 316 | OPTeditor = {}, |
---|
| 317 | OPTvolume = {}, |
---|
| 318 | OPTnumber = {}, |
---|
| 319 | OPTseries = {}, |
---|
| 320 | OPTaddress = {}, |
---|
| 321 | OPTmonth = {Jun}, |
---|
| 322 | OPTorganization = {}, |
---|
| 323 | OPTpublisher = {}, |
---|
| 324 | OPTnote = {}, |
---|
| 325 | OPTannote = {} |
---|
| 326 | } |
---|
| 327 | @InProceedings{1998a_hammond, |
---|
| 328 | author = {Lance Hammond and |
---|
| 329 | Mark Willey and |
---|
| 330 | Kunle Olukotun}, |
---|
| 331 | title = {}, |
---|
| 332 | OPTcrossref = {SSN:0163-5980}, |
---|
| 333 | OPTkey = {}, |
---|
| 334 | OPTbooktitle = {Proceedings of the eighth international conference on Architectural support for programming languages and operating systems}, |
---|
| 335 | OPTpages = {58-69}, |
---|
| 336 | OPTyear = {1998}, |
---|
| 337 | OPTeditor = {}, |
---|
| 338 | OPTvolume = {}, |
---|
| 339 | OPTnumber = {}, |
---|
| 340 | OPTseries = {}, |
---|
| 341 | OPTaddress = { San Jose, California, United States}, |
---|
| 342 | OPTmonth = {}, |
---|
| 343 | OPTorganization = {}, |
---|
| 344 | OPTpublisher = {ACM Press}, |
---|
| 345 | OPTnote = {}, |
---|
| 346 | OPTannote = {} |
---|
| 347 | } |
---|
| 348 | @TechReport{1998b_hammond, |
---|
| 349 | author = {Lance Hammond and |
---|
| 350 | Kunle Olukotun}, |
---|
| 351 | title = {Considerations in the design of hydra : a multiprocessor-on-a-chip microarchitecture}, |
---|
| 352 | institution = {Stanford University}, |
---|
| 353 | year = {1998}, |
---|
| 354 | OPTkey = {CSL-TR-98-749}, |
---|
| 355 | OPTtype = {}, |
---|
| 356 | OPTnumber = {}, |
---|
| 357 | OPTaddress = {}, |
---|
| 358 | OPTmonth = {February}, |
---|
| 359 | OPTnote = {}, |
---|
| 360 | OPTannote = {} |
---|
| 361 | } |
---|
| 362 | @article{chrysos1998mdp, |
---|
| 363 | title={{Memory dependence prediction using store sets}}, |
---|
| 364 | author={Chrysos, G.Z. and Emer, J.S.}, |
---|
| 365 | journal={ACM SIGARCH Computer Architecture News}, |
---|
| 366 | volume={26}, |
---|
| 367 | number={3}, |
---|
| 368 | pages={142--153}, |
---|
| 369 | year={1998} |
---|
| 370 | } |
---|
| 371 | @article{1998_kessler, |
---|
| 372 | title={{The Alpha 21264 microprocessor architecture}}, |
---|
| 373 | author={Kessler, RE and McLellan, EJ and Webb, DA}, |
---|
| 374 | journal={Computer Design: VLSI in Computers and Processors, 1998. ICCD'98. Proceedings., International Conference on}, |
---|
| 375 | pages={90--95}, |
---|
| 376 | year={1998} |
---|
| 377 | } |
---|
| 378 | @InProceedings{1998_krishnan, |
---|
| 379 | author = {Krishnan, V. and |
---|
| 380 | Torrellas, J. }, |
---|
| 381 | title = {A clustered approach to multithreaded processors}, |
---|
| 382 | OPTcrossref = {}, |
---|
| 383 | OPTkey = {}, |
---|
| 384 | OPTbooktitle = {Parallel Processing Symposium, 1998. 1998 IPPS/SPDP. Proceedings of the First Merged International...and Symposium on Parallel and Distributed Processing 1998}, |
---|
| 385 | OPTpages = {627-634}, |
---|
| 386 | OPTyear = {1998}, |
---|
| 387 | OPTeditor = {}, |
---|
| 388 | OPTvolume = {}, |
---|
| 389 | OPTnumber = {}, |
---|
| 390 | OPTseries = {}, |
---|
| 391 | OPTaddress = {Orlando, FL , USA}, |
---|
| 392 | OPTmonth = {30 Mar - 3 Apr}, |
---|
| 393 | OPTorganization = {}, |
---|
| 394 | OPTpublisher = {}, |
---|
| 395 | OPTnote = {}, |
---|
| 396 | OPTannote = {} |
---|
| 397 | } |
---|
| 398 | @article{mikhail1001dmo, |
---|
| 399 | title={{Design of a 290 MHz Out of Order Microprocessor with Register Renaming and Speculative Memory Access}}, |
---|
| 400 | author={Mikhail, J. and Karl, E. and Dreslinski, R. and Davidson, G.}, |
---|
| 401 | journal={Ann Arbor}, |
---|
| 402 | volume={1001}, |
---|
| 403 | pages={48109} |
---|
| 404 | } |
---|
| 405 | @conference{1998_skadron, |
---|
| 406 | title={{Improving Prediction for Procedure Returns with Return-Address-Stack Repair Mechanisms}}, |
---|
| 407 | author={Skadron, K. and Ahuja, P.S. and Martonosi, M. and Clark, D.W.}, |
---|
| 408 | booktitle={MICRO-ANNUAL WORKSHOP THEN ANNUAL INTERNATIONAL SYMPOSIUM-}, |
---|
| 409 | volume={31}, |
---|
| 410 | pages={259--271}, |
---|
| 411 | year={1998}, |
---|
| 412 | organization={THE ASSOCIATION FOR COMPUTING MACHINERY} |
---|
| 413 | } |
---|
| 414 | @article{farkas1999mar, |
---|
| 415 | title={{The Multicluster Architecture: Reducing Processor Cycle Time Through Partitioning}}, |
---|
| 416 | author={Farkas, K.I. and Chow, P. and Jouppi, N.P. and Vranesic, Z.}, |
---|
| 417 | journal={International Journal of Parallel Programming}, |
---|
| 418 | volume={27}, |
---|
| 419 | number={5}, |
---|
| 420 | pages={327--356}, |
---|
| 421 | year={1999}, |
---|
| 422 | publisher={Springer} |
---|
| 423 | } |
---|
| 424 | @article{1999_schlansker, |
---|
| 425 | title={{EPIC: An Architecture for Instruction-Level Parallel Processors}}, |
---|
| 426 | author={Schlansker, M.S. and Rau, B.R. and Hewlett-Packard Laboratories}, |
---|
| 427 | journal={HP LABORATORIES TECHNICAL REPORT HPL}, |
---|
| 428 | year={1999}, |
---|
| 429 | publisher={HEWLETT-PACKARD LABORATORIES} |
---|
| 430 | } |
---|
| 431 | @article{skadron1999bpi, |
---|
| 432 | title={{Branch prediction, instruction-window size, and cache size: performance trade-offs and simulation techniques}}, |
---|
| 433 | author={Skadron, K. and Ahuja, PS and Martonosi, M. and Clark, DW}, |
---|
| 434 | journal={Transactions on Computers}, |
---|
| 435 | volume={48}, |
---|
| 436 | number={11}, |
---|
| 437 | pages={1260--1281}, |
---|
| 438 | year={1999} |
---|
| 439 | } |
---|
| 440 | |
---|
| 441 | @article{2000_barroso, |
---|
| 442 | title={{Piranha: a scalable architecture based on single-chip multiprocessing}}, |
---|
| 443 | author={Barroso, L.A. and Gharachorloo, K. and McNamara, R. and Nowatzyk, A. and Qadeer, S. and Sano, B. and Smith, S. and Stets, R. and Verghese, B.}, |
---|
| 444 | journal={Proceedings of the 27th annual international symposium on Computer architecture}, |
---|
| 445 | pages={282--293}, |
---|
| 446 | year={2000}, |
---|
| 447 | publisher={ACM Press New York, NY, USA} |
---|
| 448 | } |
---|
| 449 | @article{borkenhagen2000mpp, |
---|
| 450 | title={{A multithreaded PowerPC processor for commercial servers}}, |
---|
| 451 | author={Borkenhagen, JM and Eickemeyer, RJ and Kalla, RN and Kunkel, SR}, |
---|
| 452 | journal={IBM J. RES. DEV}, |
---|
| 453 | volume={44}, |
---|
| 454 | number={6}, |
---|
| 455 | pages={885--898}, |
---|
| 456 | year={2000} |
---|
| 457 | } |
---|
| 458 | @article{cruz2000mbr, |
---|
| 459 | title={{Multiple-banked register file architectures}}, |
---|
| 460 | author={Cruz, J.L. and Gonzalez, A. and Valero, M. and Topham, N.P.}, |
---|
| 461 | journal={ACM SIGARCH Computer Architecture News}, |
---|
| 462 | volume={28}, |
---|
| 463 | number={2}, |
---|
| 464 | pages={316--325}, |
---|
| 465 | year={2000} |
---|
| 466 | } |
---|
| 467 | @article{2000_cvetanovic, |
---|
| 468 | title={{Performance analysis of the Alpha 21264-based Compaq ES40 system}}, |
---|
| 469 | author={Cvetanovic, Z. and Kessler, RE}, |
---|
| 470 | journal={ACM SIGARCH Computer Architecture News}, |
---|
| 471 | volume={28}, |
---|
| 472 | number={2}, |
---|
| 473 | pages={192--202}, |
---|
| 474 | year={2000}, |
---|
| 475 | publisher={ACM New York, NY, USA} |
---|
| 476 | } |
---|
| 477 | @article{2000_hammond, |
---|
| 478 | title={{The Stanford Hydra CMP}}, |
---|
| 479 | author={Hammond, L. and Hubbert, B.A. and Siu, M. and Prabhu, M.K. and Chen, M. and Olukotun, K.}, |
---|
| 480 | journal = {Micro, IEEE}, |
---|
| 481 | year = {2000} |
---|
| 482 | } |
---|
| 483 | @article{klaiber2000tbc, |
---|
| 484 | title={{The Technology Behind Crusoe Processors}}, |
---|
| 485 | author={Klaiber, A. and others}, |
---|
| 486 | journal={Transmeta Technical Brief}, |
---|
| 487 | year={2000} |
---|
| 488 | } |
---|
| 489 | @article{2000_schlansker, |
---|
| 490 | title={{EPIC: Explicitly Parallel Instruction Computing}}, |
---|
| 491 | author={Schlansker, M.S. and Rau, B.R.}, |
---|
| 492 | journal={COMPUTER}, |
---|
| 493 | pages={37--45}, |
---|
| 494 | year={2000}, |
---|
| 495 | publisher={IEEE Computer Society} |
---|
| 496 | } |
---|
| 497 | @article{2000_sharangpani, |
---|
| 498 | title={{Itanium processor microarchitecture}}, |
---|
| 499 | author={Sharangpani, H. and Arora, H.}, |
---|
| 500 | journal={Micro, IEEE}, |
---|
| 501 | volume={20}, |
---|
| 502 | number={5}, |
---|
| 503 | pages={24--43}, |
---|
| 504 | year={2000} |
---|
| 505 | } |
---|
| 506 | @article{2000_sima, |
---|
| 507 | title={{The design space of register renaming techniques}}, |
---|
| 508 | author={Sima, D. and Polytech, B.}, |
---|
| 509 | journal={Micro, IEEE}, |
---|
| 510 | volume={20}, |
---|
| 511 | number={5}, |
---|
| 512 | pages={70--83}, |
---|
| 513 | year={2000} |
---|
| 514 | } |
---|
| 515 | @article{sima2000dsr, |
---|
| 516 | title={{The design space of register renaming techniques}}, |
---|
| 517 | author={Sima, D. and Polytech, B.}, |
---|
| 518 | journal={Micro, IEEE}, |
---|
| 519 | volume={20}, |
---|
| 520 | number={5}, |
---|
| 521 | pages={70--83}, |
---|
| 522 | year={2000} |
---|
| 523 | } |
---|
| 524 | @conference{balasubramonian2001rcr, |
---|
| 525 | title={{Reducing the complexity of the register file in dynamic superscalar processors}}, |
---|
| 526 | author={Balasubramonian, R. and Dwarkadas, S. and Albonesi, D.H.}, |
---|
| 527 | booktitle={Proceedings of the 34th annual ACM/IEEE international symposium on Microarchitecture}, |
---|
| 528 | pages={237--248}, |
---|
| 529 | year={2001}, |
---|
| 530 | organization={IEEE Computer Society Washington, DC, USA} |
---|
| 531 | } |
---|
| 532 | @article{2001_burns, |
---|
| 533 | title={{Area and system clock effects on SMT/CMP processors}}, |
---|
| 534 | author={Burns, J. and Gaudiot, J.L.}, |
---|
| 535 | journal={Parallel Architectures and Compilation Techniques, 2001. Proceedings. 2001 International Conference on}, |
---|
| 536 | pages={211--218}, |
---|
| 537 | year={2001} |
---|
| 538 | } |
---|
| 539 | @article{guthaus2001mfc, |
---|
| 540 | title={{MiBench: A free, commercially representative embedded benchmark suite}}, |
---|
| 541 | author={Guthaus, MR and Ringenberg, JS and Ernst, D. and Austin, TM and Mudge, T. and Brown, RB}, |
---|
| 542 | journal={Workload Characterization, 2001. WWC-4. 2001 IEEE International Workshop on}, |
---|
| 543 | pages={3--14}, |
---|
| 544 | year={2001} |
---|
| 545 | } |
---|
| 546 | @article{2001_hinton, |
---|
| 547 | title={{The microarchitecture of the Pentium 4 processor}}, |
---|
| 548 | author={Hinton, G. and Sager, D. and Upton, M. and Boggs, D. and Carmean, D. and Kyker, A. and Roussel, P.}, |
---|
| 549 | journal={Intel Technology Journal}, |
---|
| 550 | volume={1}, |
---|
| 551 | pages={2001}, |
---|
| 552 | year={2001} |
---|
| 553 | } |
---|
| 554 | @InProceedings{2001_nagarajan, |
---|
| 555 | author = { Ramadass Nagarajan and |
---|
| 556 | Karthikeyan Sankaralingam and |
---|
| 557 | Doug Burger and |
---|
| 558 | Stephen W. Keckler}, |
---|
| 559 | title = {A design space evaluation of grid processor architectures}, |
---|
| 560 | OPTcrossref = {SBN ~ ISSN:1072-4451 , 0-7695-1369-7}, |
---|
| 561 | OPTkey = {}, |
---|
| 562 | OPTbooktitle = {Proceedings of the 34th annual ACM/IEEE international symposium on Microarchitecture }, |
---|
| 563 | OPTpages = {40-51}, |
---|
| 564 | OPTyear = {2001}, |
---|
| 565 | OPTeditor = {}, |
---|
| 566 | OPTvolume = {}, |
---|
| 567 | OPTnumber = {}, |
---|
| 568 | OPTseries = {}, |
---|
| 569 | OPTaddress = { Austin, Texas}, |
---|
| 570 | OPTmonth = {}, |
---|
| 571 | OPTorganization = {}, |
---|
| 572 | OPTpublisher = { IEEE Computer Society }, |
---|
| 573 | OPTnote = {}, |
---|
| 574 | OPTannote = {} |
---|
| 575 | } |
---|
| 576 | @article{ernst2002eds, |
---|
| 577 | title={{Efficient dynamic scheduling through tag elimination}}, |
---|
| 578 | author={Ernst, D. and Austin, T.}, |
---|
| 579 | journal={Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on}, |
---|
| 580 | pages={37--46}, |
---|
| 581 | year={2002} |
---|
| 582 | } |
---|
| 583 | @article{akkary2003cpa, |
---|
| 584 | title={{Checkpoint processing and recovery: towards scalable large instruction window processors}}, |
---|
| 585 | author={Akkary, H. and Rajwar, R. and Srinivasan, ST}, |
---|
| 586 | journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, |
---|
| 587 | pages={423--434}, |
---|
| 588 | year={2003} |
---|
| 589 | } |
---|
| 590 | @article{2002_mukherjee, |
---|
| 591 | title={{The Alpha 21364 network architecture}}, |
---|
| 592 | author={Mukherjee, SS and Bannon, P. and Lang, S. and Spink, A. and Webb, D.}, |
---|
| 593 | journal={Micro, IEEE}, |
---|
| 594 | volume={22}, |
---|
| 595 | number={1}, |
---|
| 596 | pages={26--35}, |
---|
| 597 | year={2002} |
---|
| 598 | } |
---|
| 599 | @article{2002_sprangle, |
---|
| 600 | title={{Increasing processor performance by implementing deeper pipelines}}, |
---|
| 601 | author={Sprangle, E. and Carmean, D.}, |
---|
| 602 | journal={Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on}, |
---|
| 603 | pages={25--34}, |
---|
| 604 | year={2002} |
---|
| 605 | } |
---|
| 606 | @article{2002_tendler, |
---|
| 607 | title={{POWER4 system microarchitecture}}, |
---|
| 608 | author={Tendler, J.M. and Dodson, J.S. and Fields Jr, J.S. and Le, H. and Sinharoy, B.}, |
---|
| 609 | journal={IBM Journal of Research and Development}, |
---|
| 610 | volume={46}, |
---|
| 611 | number={1}, |
---|
| 612 | pages={5--25}, |
---|
| 613 | year={2002} |
---|
| 614 | } |
---|
| 615 | @Article{2002_ungerer, |
---|
| 616 | author = {T. Ungerer and |
---|
| 617 | B. Robic and |
---|
| 618 | J. Silc }, |
---|
| 619 | title = {Multithreaded processors}, |
---|
| 620 | journal = {The Computer Journal}, |
---|
| 621 | year = {2002}, |
---|
| 622 | OPTkey = {}, |
---|
| 623 | OPTvolume = {45}, |
---|
| 624 | OPTnumber = {3}, |
---|
| 625 | OPTpages = {320-348}, |
---|
| 626 | OPTmonth = {}, |
---|
| 627 | OPTnote = {}, |
---|
| 628 | OPTannote = {} |
---|
| 629 | } |
---|
| 630 | @article{darsch2003oop, |
---|
| 631 | title={{Out-of-order Predicated Execution with Translation Register Buffer}}, |
---|
| 632 | author={Darsch, A. and Seznec, A.}, |
---|
| 633 | journal={Rapport technique RR-1573}, |
---|
| 634 | year={2003} |
---|
| 635 | } |
---|
| 636 | @conference{dehnert2003tcm, |
---|
| 637 | title={{The Transmeta Code Morphing Software: using speculation, recovery, and adaptive retranslation to address real-life challenges}}, |
---|
| 638 | author={Dehnert, J.C. and Grant, B.K. and Banning, J.P. and Johnson, R. and Kistler, T. and Klaiber, A. and Mattson, J.}, |
---|
| 639 | booktitle={ACM International Conference Proceeding Series}, |
---|
| 640 | volume={37}, |
---|
| 641 | pages={15--24}, |
---|
| 642 | year={2003} |
---|
| 643 | } |
---|
| 644 | @article{heinrich:smt, |
---|
| 645 | title={{Scalable Multi-threaded Multiprocessor Architectures}}, |
---|
| 646 | author={Heinrich, M.} |
---|
| 647 | } |
---|
| 648 | @article{2003_jeong, |
---|
| 649 | title={{Cost-sensitive cache replacement algorithms}}, |
---|
| 650 | author={Jeong, J. and Dubois, M.}, |
---|
| 651 | journal={High-Performance Computer Architecture, 2003. HPCA-9 2003. Proceedings. The Ninth International Symposium on}, |
---|
| 652 | pages={327--337}, |
---|
| 653 | year={2003} |
---|
| 654 | } |
---|
| 655 | @article{keltcher2003aop, |
---|
| 656 | title={{The AMD Opteron Processor for Multiprocessor Servers}}, |
---|
| 657 | author={Keltcher, C.N. and McGrath, K.J. and Ahmed, A. and Conway, P.}, |
---|
| 658 | journal={IEEE MICRO}, |
---|
| 659 | pages={66--76}, |
---|
| 660 | year={2003}, |
---|
| 661 | publisher={IEEE Computer Society} |
---|
| 662 | } |
---|
| 663 | @article{2003_koufaty, |
---|
| 664 | title={{Hyperthreading technology in the netburst microarchitecture}}, |
---|
| 665 | author={Koufaty, D. and Marr, DT}, |
---|
| 666 | journal={Micro, IEEE}, |
---|
| 667 | volume={23}, |
---|
| 668 | number={2}, |
---|
| 669 | pages={56--65}, |
---|
| 670 | year={2003} |
---|
| 671 | } |
---|
| 672 | @conference{kumar2003sih, |
---|
| 673 | title={{Single-ISA heterogeneous multi-core architectures: the potential for processor power reduction}}, |
---|
| 674 | author={Kumar, R. and Farkas, KI and Jouppi, NP and Ranganathan, P. and Tullsen, DM}, |
---|
| 675 | booktitle={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, |
---|
| 676 | pages={81--92}, |
---|
| 677 | year={2003} |
---|
| 678 | } |
---|
| 679 | @article{2003_mcnairy, |
---|
| 680 | title={{Itanium 2 processor microarchitecture}}, |
---|
| 681 | author={McNairy, C. and Soltis, D.}, |
---|
| 682 | journal={Micro, IEEE}, |
---|
| 683 | volume={23}, |
---|
| 684 | number={2}, |
---|
| 685 | pages={44--55}, |
---|
| 686 | year={2003} |
---|
| 687 | } |
---|
| 688 | @conference{park2003rdc, |
---|
| 689 | title={{Reducing Design Complexity of the Load/Store Queue}}, |
---|
| 690 | author={Park, I. and Ooi, C.L. and Vijaykumar, TN}, |
---|
| 691 | booktitle={Proceedings of the 36th annual IEEE/ACM International Symposium on Microarchitecture}, |
---|
| 692 | year={2003}, |
---|
| 693 | organization={IEEE Computer Society Washington, DC, USA} |
---|
| 694 | } |
---|
| 695 | @article{park2003rdc, |
---|
| 696 | title={{Reducing design complexity of the load/store queue}}, |
---|
| 697 | author={Park, I. and Ooi, C.L. and Vijaykumar, TN}, |
---|
| 698 | journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, |
---|
| 699 | pages={411--422}, |
---|
| 700 | year={2003} |
---|
| 701 | } |
---|
| 702 | @InProceedings{2003_sankaralingam, |
---|
| 703 | author = { Karthikeyan Sankaralingam and |
---|
| 704 | Ramadass Nagarajan and |
---|
| 705 | Haiming Liu and |
---|
| 706 | Changkyu Kim and |
---|
| 707 | Jaehyuk Huh and |
---|
| 708 | Doug Burger and |
---|
| 709 | Stephen W. Keckler and |
---|
| 710 | Charles R. Moore}, |
---|
| 711 | title = {Exploiting ILP, TLP, and DLP with the polymorphous TRIPS architecture}, |
---|
| 712 | OPTcrossref = { ISBN:0-7695-1945-8}, |
---|
| 713 | OPTkey = {}, |
---|
| 714 | OPTbooktitle = {Proceedings of the 30th annual international symposium on Computer architecture}, |
---|
| 715 | OPTpages = {422-433}, |
---|
| 716 | OPTyear = {2003}, |
---|
| 717 | OPTeditor = {}, |
---|
| 718 | OPTvolume = {Volume 31 Issue 2}, |
---|
| 719 | OPTnumber = {}, |
---|
| 720 | OPTseries = {}, |
---|
| 721 | OPTaddress = {San Diego, California}, |
---|
| 722 | OPTmonth = {May}, |
---|
| 723 | OPTorganization = {}, |
---|
| 724 | OPTpublisher = {ACM Press}, |
---|
| 725 | OPTnote = {}, |
---|
| 726 | OPTannote = {} |
---|
| 727 | } |
---|
| 728 | @article{sethumadhavan2003shm, |
---|
| 729 | title={{Scalable hardware memory disambiguation for high ILP processors}}, |
---|
| 730 | author={Sethumadhavan, S. and Desikan, R. and Burger, D. and Moore, CR and Keckler, SW}, |
---|
| 731 | journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, |
---|
| 732 | pages={399--410}, |
---|
| 733 | year={2003} |
---|
| 734 | } |
---|
| 735 | @InProceedings{2004_chaudhuri, |
---|
| 736 | author = {Mainak Chaudhuri and |
---|
| 737 | Mark Heinrich}, |
---|
| 738 | title = {SMTp: An Architecture for Next-generation Scalable Multi-threading}, |
---|
| 739 | OPTcrossref = {}, |
---|
| 740 | OPTkey = {}, |
---|
| 741 | OPTbooktitle = {Proceedings of the 31st annual international symposium on Computer architecture}, |
---|
| 742 | OPTpages = {124-136}, |
---|
| 743 | OPTyear = {2004}, |
---|
| 744 | OPTeditor = {}, |
---|
| 745 | OPTvolume = {0}, |
---|
| 746 | OPTnumber = {}, |
---|
| 747 | OPTseries = {}, |
---|
| 748 | OPTaddress = {München, Germany}, |
---|
| 749 | OPTmonth = {}, |
---|
| 750 | OPTorganization = {}, |
---|
| 751 | OPTpublisher = {IEEE Computer Society}, |
---|
| 752 | OPTnote = {}, |
---|
| 753 | OPTannote = {} |
---|
| 754 | } |
---|
| 755 | @article{2004_collins, |
---|
| 756 | title={{Clustered multithreaded architectures-pursuing both IPC and cycle time}}, |
---|
| 757 | author={Collins, JD and Tullsen, DM}, |
---|
| 758 | journal={Parallel and Distributed Processing Symposium, 2004. Proceedings. 18th International}, |
---|
| 759 | year={2004} |
---|
| 760 | } |
---|
| 761 | @conference{cristal2004ooc, |
---|
| 762 | title={{Out-of-Order Commit Processors}}, |
---|
| 763 | author={Cristal, A. and Ortega, D. and Llosa, J. and Valero, M.}, |
---|
| 764 | booktitle={Proceedings of the 10th International Symposium on High Performance Computer Architecture}, |
---|
| 765 | pages={48}, |
---|
| 766 | year={2004}, |
---|
| 767 | organization={IEEE Computer Society Washington, DC, USA} |
---|
| 768 | } |
---|
| 769 | @article{2004_dolbeau, |
---|
| 770 | title={{CASH: Revisiting hardware sharing in single-chip parallel processor}}, |
---|
| 771 | author={Dolbeau, R. and Seznec, A.}, |
---|
| 772 | journal={Journal of Instruction-Level Parallelism}, |
---|
| 773 | volume={6}, |
---|
| 774 | pages={1--16}, |
---|
| 775 | year={2004} |
---|
| 776 | } |
---|
| 777 | @article{2004_kalla, |
---|
| 778 | title={{IBM Power5 chip: a dual-core multithreaded processor}}, |
---|
| 779 | author={Kalla, R. and Sinharoy, B. and Tendler, JM}, |
---|
| 780 | journal={Micro, IEEE}, |
---|
| 781 | volume={24}, |
---|
| 782 | number={2}, |
---|
| 783 | pages={40--47}, |
---|
| 784 | year={2004} |
---|
| 785 | } |
---|
| 786 | @article{2004_kumar, |
---|
| 787 | title={{Conjoined-Core Chip Multiprocessing}}, |
---|
| 788 | author={Kumar, R. and Jouppi, N.P. and Tullsen, D.M.}, |
---|
| 789 | journal={Proceedings of the 37th annual IEEE/ACM International Symposium on Microarchitecture}, |
---|
| 790 | pages={195--206}, |
---|
| 791 | year={2004}, |
---|
| 792 | publisher={IEEE Computer Society Washington, DC, USA} |
---|
| 793 | } |
---|
| 794 | @article{kumar2004sih, |
---|
| 795 | title={{Single-ISA Heterogeneous Multi-Core Architectures for Multithreaded Workload Performance}}, |
---|
| 796 | author={Kumar, R. and Tullsen, D.M. and Ranganathan, P. and Jouppi, N.P. and Farkas, K.I.}, |
---|
| 797 | journal={ACM SIGARCH Computer Architecture News}, |
---|
| 798 | volume={32}, |
---|
| 799 | number={2}, |
---|
| 800 | year={2004}, |
---|
| 801 | publisher={ACM New York, NY, USA} |
---|
| 802 | } |
---|
| 803 | @book{mattsson:esc, |
---|
| 804 | title={{Evaluation of synthesizable CPU cores}}, |
---|
| 805 | author={Mattsson, D. and Christensson, M.}, |
---|
| 806 | publisher={Chalmers tekniska h{\"o}gskola} |
---|
| 807 | } |
---|
| 808 | @article{tune2004bmi, |
---|
| 809 | title={{Balanced Multithreading: Increasing Throughput via a Low Cost Multithreading Hierarchy}}, |
---|
| 810 | author={Tune, E. and Kumar, R. and Tullsen, D.M. and Calder, B.}, |
---|
| 811 | journal={Proceedings of the 37th International Symposium on Microarchitecture. IEEE}, |
---|
| 812 | year={2004} |
---|
| 813 | } |
---|
| 814 | |
---|
| 815 | @InProceedings{ 2004_wang, |
---|
| 816 | author = {Nicholas J. Wang and |
---|
| 817 | Justin Quek and |
---|
| 818 | Todd M. Rafacz and |
---|
| 819 | Sanjay J. Pate}, |
---|
| 820 | title = {Characterizing the Effects of Transient Faults on a High-Performance Processor Pipeline}, |
---|
| 821 | OPTcrossref = {In the Proceedings of the 2004 International Conference on Dependable Systems and Networks}, |
---|
| 822 | OPTkey = {}, |
---|
| 823 | OPTbooktitle = {}, |
---|
| 824 | OPTpages = {}, |
---|
| 825 | OPTyear = {2004}, |
---|
| 826 | OPTeditor = {}, |
---|
| 827 | OPTvolume = {}, |
---|
| 828 | OPTnumber = {}, |
---|
| 829 | OPTseries = {}, |
---|
| 830 | OPTaddress = {Florence , ITALY}, |
---|
| 831 | OPTmonth = {june}, |
---|
| 832 | OPTorganization = {}, |
---|
| 833 | OPTpublisher = {}, |
---|
| 834 | OPTnote = {}, |
---|
| 835 | OPTannote = {} |
---|
| 836 | } |
---|
| 837 | @techreport{2005_ARM, |
---|
| 838 | title={{Architecture and Implementation of the ARM Cortex-A8 Microprocessor}}, |
---|
| 839 | author={ARM}, |
---|
| 840 | institution={ARM} |
---|
| 841 | } |
---|
| 842 | @article{constantinou2005pis, |
---|
| 843 | title={{Performance implications of single thread migration on a chip multi-core}}, |
---|
| 844 | author={Constantinou, T. and Sazeides, Y. and Michaud, P. and Fetis, D. and Seznec, A.}, |
---|
| 845 | journal={ACM SIGARCH Computer Architecture News}, |
---|
| 846 | volume={33}, |
---|
| 847 | number={4}, |
---|
| 848 | pages={80--91}, |
---|
| 849 | year={2005}, |
---|
| 850 | publisher={ACM New York, NY, USA} |
---|
| 851 | } |
---|
| 852 | @article{dimond2005cct, |
---|
| 853 | title={{CUSTARD-A Customisable Threaded FPGA Soft Processor and Tools}}, |
---|
| 854 | author={Dimond, R. and Mencer, O. and Luk, W.}, |
---|
| 855 | journal={International Conference on Field Programmable Logic (FPL)}, |
---|
| 856 | year={2005} |
---|
| 857 | } |
---|
| 858 | @book{2005_fisher, |
---|
| 859 | title={{Embedded Computing: A Vliw Approach To Architecture, Compilers And Tools}}, |
---|
| 860 | author={Joseph A. Fisher, Paolo Faraboschi and Cliff Young}, |
---|
| 861 | year={2005}, |
---|
| 862 | publisher={Morgan Kaufmann Publishers} |
---|
| 863 | } |
---|
| 864 | @article{2005_kahle, |
---|
| 865 | title={{Introduction to the Cell multiprocessor}}, |
---|
| 866 | author={Kahle, J. and others}, |
---|
| 867 | journal={IBM Journal of Research and Development}, |
---|
| 868 | volume={49}, |
---|
| 869 | number={4}, |
---|
| 870 | pages={589--604}, |
---|
| 871 | year={2005} |
---|
| 872 | } |
---|
| 873 | @article{2005_kongetira, |
---|
| 874 | title={{Niagara: a 32-way multithreaded Sparc processor}}, |
---|
| 875 | author={Kongetira, P. and Aingaran, K. and Olukotun, K.}, |
---|
| 876 | journal={IEEE Micro}, |
---|
| 877 | volume={25}, |
---|
| 878 | number={2}, |
---|
| 879 | pages={21--29}, |
---|
| 880 | year={2005} |
---|
| 881 | } |
---|
| 882 | @article{2005_kumar, |
---|
| 883 | title={{Heterogeneous Chip Multiprocessors}}, |
---|
| 884 | author={Kumar, R. and Tullsen, D.M. and Jouppi, N.P. and Ranganathan, P.}, |
---|
| 885 | journal={COMPUTER}, |
---|
| 886 | pages={32--38}, |
---|
| 887 | year={2005}, |
---|
| 888 | publisher={IEEE Computer Society} |
---|
| 889 | } |
---|
| 890 | @article{2005_mcnairy, |
---|
| 891 | title={{Montecito: a dual-core, dual-thread Itanium processor}}, |
---|
| 892 | author={McNairy, C. and Bhatia, R.}, |
---|
| 893 | journal={Micro, IEEE}, |
---|
| 894 | volume={25}, |
---|
| 895 | number={2}, |
---|
| 896 | pages={10--20}, |
---|
| 897 | year={2005} |
---|
| 898 | } |
---|
| 899 | @article{percival2005cmf, |
---|
| 900 | title={{Cache missing for fun and profit}}, |
---|
| 901 | author={Percival, C.}, |
---|
| 902 | journal={BSDCan 2005}, |
---|
| 903 | year={2005} |
---|
| 904 | } |
---|
| 905 | @article{siddiqui:pap, |
---|
| 906 | title={{POWER4 and POWER5 Scalability}}, |
---|
| 907 | author={Siddiqui, W. and VanBuren, B.G.} |
---|
| 908 | } |
---|
| 909 | @article{2005_sinharoy, |
---|
| 910 | title={{POWER5 system microarchitecture}}, |
---|
| 911 | author={Sinharoy, B. and Kalla, R.N. and Tendler, J.M. and Eickemeyer, R.J. and Joyner, J.B.}, |
---|
| 912 | journal={IBM JOURNAL OF RESEARCH AND DEVELOPMENT}, |
---|
| 913 | volume={49}, |
---|
| 914 | number={4/5}, |
---|
| 915 | pages={505}, |
---|
| 916 | year={2005}, |
---|
| 917 | publisher={IBM CORPORATION} |
---|
| 918 | } |
---|
| 919 | @book{yiannacouras2005mfb, |
---|
| 920 | title={{The microarchitecture of FPGA-based soft processors}}, |
---|
| 921 | author={Yiannacouras, P. and Rose, J. and Steffan, J.G.}, |
---|
| 922 | year={2005}, |
---|
| 923 | publisher={ACM New York, NY, USA} |
---|
| 924 | } |
---|
| 925 | @book{yiannacouras2005mfb, |
---|
| 926 | title={{The microarchitecture of FPGA-based soft processors}}, |
---|
| 927 | author={Yiannacouras, P. and Rose, J. and Steffan, J.G.}, |
---|
| 928 | year={2005}, |
---|
| 929 | publisher={ACM New York, NY, USA} |
---|
| 930 | } |
---|
| 931 | @article{2006_ghasemzadeh, |
---|
| 932 | title={{Modified Pseudo LRU Replacement Algorithm}}, |
---|
| 933 | author={Ghasemzadeh, H. and Mazrouee, SS and Kakoee, MR}, |
---|
| 934 | journal={Engineering of Computer Based Systems, 2006. ECBS 2006. 13th Annual IEEE International Symposium and Workshop on}, |
---|
| 935 | pages={368--376}, |
---|
| 936 | year={2006} |
---|
| 937 | } |
---|
| 938 | @article{2006_gochman, |
---|
| 939 | title={{Introduction to Intel Core Duo processor architecture}}, |
---|
| 940 | author={Gochman, S. and Mendelson, A. and Naveh, A. and Rotem, E.}, |
---|
| 941 | journal={Intel Technology Journal}, |
---|
| 942 | volume={10}, |
---|
| 943 | number={2}, |
---|
| 944 | pages={89--97}, |
---|
| 945 | year={2006} |
---|
| 946 | } |
---|
| 947 | @conference{kumar2006cao, |
---|
| 948 | title={{Core architecture optimization for heterogeneous chip multiprocessors}}, |
---|
| 949 | author={Kumar, R. and Tullsen, D.M. and Jouppi, N.P.}, |
---|
| 950 | booktitle={Proceedings of the 15th international conference on Parallel architectures and compilation techniques}, |
---|
| 951 | pages={23--32}, |
---|
| 952 | year={2006}, |
---|
| 953 | organization={ACM New York, NY, USA} |
---|
| 954 | } |
---|
| 955 | @article{mcghan2006nof, |
---|
| 956 | title={{Niagara 2 Opens the Floodgates}}, |
---|
| 957 | author={McGhan, H.}, |
---|
| 958 | journal={Microprocessor Report}, |
---|
| 959 | volume={20}, |
---|
| 960 | number={11}, |
---|
| 961 | year={2006} |
---|
| 962 | } |
---|
| 963 | @article{mendelson2006cis, |
---|
| 964 | title={{CMP Implementation in Systems Based on the Intel Core Duo Processor}}, |
---|
| 965 | author={Mendelson, A. and others}, |
---|
| 966 | journal={Intel Tech. Journal}, |
---|
| 967 | volume={10}, |
---|
| 968 | number={2}, |
---|
| 969 | year={2006} |
---|
| 970 | } |
---|
| 971 | @article{sangireddy2006rrl, |
---|
| 972 | title={{Reducing Rename Logic Complexity for High-Speed and Low-Power Front-End Architectures}}, |
---|
| 973 | author={Sangireddy, R.}, |
---|
| 974 | journal={IEEE TRANSACTIONS ON COMPUTERS}, |
---|
| 975 | pages={672--685}, |
---|
| 976 | year={2006}, |
---|
| 977 | publisher={IEEE Computer Society} |
---|
| 978 | } |
---|
| 979 | @misc{microsystems2006otm, |
---|
| 980 | title={{OpenSPARC T1 Microarchitecture Specification}}, |
---|
| 981 | author={Microsystems, S.}, |
---|
| 982 | year={2006}, |
---|
| 983 | publisher={August} |
---|
| 984 | } |
---|
| 985 | @article{yiannacouras2006asc, |
---|
| 986 | title={{Application-specific customization of soft processor microarchitecture}}, |
---|
| 987 | author={Yiannacouras, P. and Steffan, J.G. and Rose, J.}, |
---|
| 988 | journal={Proceedings of the 2006 ACM/SIGDA 14th international symposium on Field programmable gate arrays}, |
---|
| 989 | pages={201--210}, |
---|
| 990 | year={2006}, |
---|
| 991 | publisher={ACM New York, NY, USA} |
---|
| 992 | } |
---|
| 993 | @techreport{2007_ARM, |
---|
| 994 | title={{The ARM Cortex-A9 Processor}}, |
---|
| 995 | author={ARM}, |
---|
| 996 | institution={ARM} |
---|
| 997 | } |
---|
| 998 | @phdthesis{bingham2007mrl, |
---|
| 999 | title={{A MIPS R10000-Like Out-Of-Order Microprocessor Implementation in verilog HDL}}, |
---|
| 1000 | author={Bingham, S.T.}, |
---|
| 1001 | year={2007}, |
---|
| 1002 | school={Cornell University} |
---|
| 1003 | } |
---|
| 1004 | @article{eisen:ipa, |
---|
| 1005 | title={{IBM POWER6 accelerators: VMX and DFU-References}}, |
---|
| 1006 | author={Eisen, L. and Ward III, JW and Tast, HW and M{\"a}ding, N. and Leenstra, J. and Mueller, SM and Jacobi, C. and Preiss, J. and Schwarz, EM and Carlough, SR} |
---|
| 1007 | } |
---|
| 1008 | @article{2007_le, |
---|
| 1009 | title={{IBM POWER6 microarchitecture}}, |
---|
| 1010 | author={Le, HQ and Starke, WJ and Fields, JS and O Connell, FP and Nguyen, DQ and Ronchetti, BJ and Sauer, WM and Schwarz, EM and Vaden, MT}, |
---|
| 1011 | journal={IBM JOURNAL OF RESEARCH AND DEVELOPMENT}, |
---|
| 1012 | volume={51}, |
---|
| 1013 | number={6}, |
---|
| 1014 | pages={639}, |
---|
| 1015 | year={2007}, |
---|
| 1016 | publisher={IBM CORPORATION} |
---|
| 1017 | } |
---|
| 1018 | @conference{li2007map, |
---|
| 1019 | title={{Microarchitecture and Performance Analysis of Godson-2 SMT Processor}}, |
---|
| 1020 | author={Li, Z. and Xu, X. and Hu, W. and Tang, Z.}, |
---|
| 1021 | booktitle={Computer Design, 2006. ICCD 2006. International Conference on}, |
---|
| 1022 | pages={485--490}, |
---|
| 1023 | year={2007} |
---|
| 1024 | } |
---|
| 1025 | @article{hardware6mom, |
---|
| 1026 | title={{Memory Ordering in Modern Microprocessors}}, |
---|
| 1027 | author={Hardware, S. and Accesses, W.R.M.}, |
---|
| 1028 | journal={Interface}, |
---|
| 1029 | volume={6}, |
---|
| 1030 | pages={6} |
---|
| 1031 | } |
---|
| 1032 | @article{sethumadhavan2007lbe, |
---|
| 1033 | title={{Late-binding: enabling unordered load-store queues}}, |
---|
| 1034 | author={Sethumadhavan, S. and Roesner, F. and Emer, J.S. and Burger, D. and Keckler, S.W.}, |
---|
| 1035 | journal={Proceedings of the 34th annual international conference on Computer architecture}, |
---|
| 1036 | pages={347--357}, |
---|
| 1037 | year={2007}, |
---|
| 1038 | publisher={ACM Press New York, NY, USA} |
---|
| 1039 | } |
---|
| 1040 | @conference{williams2008lbs, |
---|
| 1041 | title={{Lattice Boltzmann simulation optimization on leading multicore platforms}}, |
---|
| 1042 | author={Williams, S. and Carter, J. and Oliker, L. and Shalf, J. and Yelick, K.}, |
---|
| 1043 | booktitle={Parallel and Distributed Processing, 2008. IPDPS 2008. IEEE International Symposium on}, |
---|
| 1044 | pages={1--14}, |
---|
| 1045 | year={2008} |
---|
| 1046 | } |
---|