1 | @conference{1981_smith, |
---|
2 | title={{A study of branch prediction strategies}}, |
---|
3 | author={Smith, J.E.}, |
---|
4 | booktitle={Proc. Computer Architecture}, |
---|
5 | pages={135-148}, |
---|
6 | year={1981}, |
---|
7 | organization={IEEE} |
---|
8 | } |
---|
9 | @conference{1983_fisher, |
---|
10 | title={{Very Long Instruction Word architectures and the ELI-512}}, |
---|
11 | author={Fisher, J.A.}, |
---|
12 | booktitle={Proceedings of the 10th annual international symposium on Computer architecture}, |
---|
13 | pages={140--150}, |
---|
14 | year={1983}, |
---|
15 | organization={IEEE Computer Society Press Los Alamitos, CA, USA} |
---|
16 | } |
---|
17 | @book{1983_Lee, |
---|
18 | title={{Analysis of Branch Prediction Strategies and Branch Target Buffer Design}}, |
---|
19 | author={Lee, J.K.F. and Smith, A.J.}, |
---|
20 | year={1983}, |
---|
21 | publisher={Computer Science Division (EECS), University of California} |
---|
22 | } |
---|
23 | @article{lee1984bps, |
---|
24 | title={{Branch Prediction Strategies and Branch Target Buffer Design}}, |
---|
25 | author={Lee, JKF and Smith, AJ}, |
---|
26 | journal={Computer}, |
---|
27 | volume={17}, |
---|
28 | number={1}, |
---|
29 | pages={6--22}, |
---|
30 | year={1984} |
---|
31 | } |
---|
32 | @article{smith1984dae, |
---|
33 | title={{Decoupled Access/Execute Computer Architectures}}, |
---|
34 | author={SMITH, J.E.}, |
---|
35 | journal={ACM Transactions on Computer Systems}, |
---|
36 | volume={2}, |
---|
37 | number={4}, |
---|
38 | pages={289--308}, |
---|
39 | year={1984} |
---|
40 | } |
---|
41 | @conference{ditzel1987bfc, |
---|
42 | title={{Branch folding in the CRISP microprocessor: reducing branch delay to zero}}, |
---|
43 | author={Ditzel, DR and McLellan, HR}, |
---|
44 | booktitle={Proceedings of the 14th annual international symposium on Computer architecture}, |
---|
45 | pages={2--8}, |
---|
46 | year={1987}, |
---|
47 | organization={ACM New York, NY, USA} |
---|
48 | } |
---|
49 | @article{1990_alverson, |
---|
50 | title={{The Tera Computer System}}, |
---|
51 | author={Alverson, R. and Callahan, D. and Cummings, D. and Porterfield, A. and Smith, B. and Koblenz, B.} |
---|
52 | } |
---|
53 | @article{1991_kaeli, |
---|
54 | title={{Branch history table prediction of moving target branches due to subroutine returns}}, |
---|
55 | author={Kaeli, D.R. and Emma, P.G.}, |
---|
56 | journal={Proceedings of the 18th annual international symposium on Computer architecture}, |
---|
57 | pages={34--42}, |
---|
58 | year={1991}, |
---|
59 | publisher={ACM Press New York, NY, USA} |
---|
60 | } |
---|
61 | @article{scherson1991ogc, |
---|
62 | title={{Orthogonal graphs for the construction of a class ofinterconnection networks}}, |
---|
63 | author={Scherson, ID}, |
---|
64 | journal={Parallel and Distributed Systems, IEEE Transactions on}, |
---|
65 | volume={2}, |
---|
66 | number={1}, |
---|
67 | pages={3--19}, |
---|
68 | year={1991} |
---|
69 | } |
---|
70 | @article{1991_wall, |
---|
71 | title={{Limits of instruction-level parallelism}}, |
---|
72 | author={David W. Wall}, |
---|
73 | journal={Proceedings of the fourth international conference on Architectural support for programming languages and operating systems}, |
---|
74 | pages={176--188}, |
---|
75 | year={1991} |
---|
76 | } |
---|
77 | @article{1992_pan, |
---|
78 | title={{Improving the accuracy of dynamic branch prediction using branch correlation}}, |
---|
79 | author={Pan, S.T. and So, K. and Rahmeh, J.T.}, |
---|
80 | journal={Proceedings of the fifth international conference on Architectural support for programming languages and operating systems}, |
---|
81 | pages={76--84}, |
---|
82 | year={1992}, |
---|
83 | publisher={ACM Press New York, NY, USA} |
---|
84 | } |
---|
85 | @article{1992_yeh, |
---|
86 | title={{Alternative Implementations of Two-Level Adaptive Branch Prediction}}, |
---|
87 | author={Yeh, T.Y. and Patt, YN}, |
---|
88 | journal={Computer Architecture, 1992. Proceedings., The 19th Annual International Symposium on}, |
---|
89 | pages={124--134}, |
---|
90 | year={1992} |
---|
91 | } |
---|
92 | @techreport{1993_mcfarling, |
---|
93 | title={{Combining Branch Predictors}}, |
---|
94 | author={McFarling, S.}, |
---|
95 | institution={Technical Report WRL Technical Note TN-36, Digital Equipment Corporation, June 1993} |
---|
96 | } |
---|
97 | @article{mclellan1993aaa, |
---|
98 | title={{The Alpha AXP architecture and 21064 processor}}, |
---|
99 | author={McLellan, E.}, |
---|
100 | journal={IEEE Micro}, |
---|
101 | volume={13}, |
---|
102 | number={3}, |
---|
103 | pages={36--47}, |
---|
104 | year={1993} |
---|
105 | } |
---|
106 | @conference{moudgill1993rra, |
---|
107 | title={{Register renaming and dynamic speculation: an alternative approach}}, |
---|
108 | author={Moudgill, M. and Pingali, K. and Vassiliadis, S.}, |
---|
109 | booktitle={Proceedings of the 26th annual international symposium on Microarchitecture}, |
---|
110 | pages={202--213}, |
---|
111 | year={1993}, |
---|
112 | organization={IEEE Computer Society Press Los Alamitos, CA, USA} |
---|
113 | } |
---|
114 | @article{1993_Perleberg, |
---|
115 | title={{Branch target buffer design and optimization}}, |
---|
116 | author={Perleberg, CH and Smith, AJ}, |
---|
117 | journal={Computers, IEEE Transactions on}, |
---|
118 | volume={42}, |
---|
119 | number={4}, |
---|
120 | pages={396--412}, |
---|
121 | year={1993} |
---|
122 | } |
---|
123 | @article{1993_yeh, |
---|
124 | title={{A comparison of dynamic branch predictors that use two levels of branch history}}, |
---|
125 | author={Yeh, T.Y. and Patt, Y.N.}, |
---|
126 | journal={Proceedings of the 20th annual international symposium on Computer architecture}, |
---|
127 | pages={257--266}, |
---|
128 | year={1993}, |
---|
129 | publisher={ACM Press New York, NY, USA} |
---|
130 | } |
---|
131 | @article{gallagher1994dmd, |
---|
132 | title={{Dynamic memory disambiguation using the memory conflict buffer}}, |
---|
133 | author={Gallagher, D.M. and Chen, W.Y. and Mahlke, S.A. and Gyllenhaal, J.C. and Wen-mei, W.H.}, |
---|
134 | journal={Proceedings of the sixth international conference on Architectural support for programming languages and operating systems}, |
---|
135 | pages={183--193}, |
---|
136 | year={1994}, |
---|
137 | publisher={ACM Press New York, NY, USA} |
---|
138 | } |
---|
139 | @article{song1994prm, |
---|
140 | title={{The PowerPC 604 RISC microprocessor.}}, |
---|
141 | author={Song, SP and Denman, M. and Chang, J.}, |
---|
142 | journal={Micro, IEEE}, |
---|
143 | volume={14}, |
---|
144 | number={5}, |
---|
145 | year={1994} |
---|
146 | } |
---|
147 | @article{diep1995pep, |
---|
148 | title={{Performance evaluation of the PowerPC 620 microarchitecture}}, |
---|
149 | author={Diep, T.A. and Nelson, C. and Shen, J.P.}, |
---|
150 | journal={ACM SIGARCH Computer Architecture News}, |
---|
151 | volume={23}, |
---|
152 | number={2}, |
---|
153 | pages={163--174}, |
---|
154 | year={1995}, |
---|
155 | publisher={ACM New York, NY, USA} |
---|
156 | } |
---|
157 | @article{1995_edmondson, |
---|
158 | title={{Internal Organization of the Alpha 21164, a 300-MHz 64-bit Quad-issue CMOS RISC Microprocessor}}, |
---|
159 | author={Edmondson, J.H. and Rubinfeld, P.I. and Bannon, P.J. and Benschneider, B.J. and Bernstein, D. and Castelino, R.W. and Cooper, E.M. and Dever, D.E. and Donchin, D.R. and Fischer, T.C. and others}, |
---|
160 | journal={Digital Technical Journal}, |
---|
161 | volume={7}, |
---|
162 | number={1}, |
---|
163 | pages={0}, |
---|
164 | year={1995} |
---|
165 | } |
---|
166 | @InProceedings{1995_sohi, |
---|
167 | author = {Sohi, G.S. and |
---|
168 | Breach, S.E. and |
---|
169 | Vijaykumar, T.N. }, |
---|
170 | title = {Multiscalar processors}, |
---|
171 | OPTcrossref = {}, |
---|
172 | OPTkey = {}, |
---|
173 | OPTbooktitle = {Computer Architecture, 1995. Proceedings. 22nd Annual International Symposium on}, |
---|
174 | OPTpages = {414-425}, |
---|
175 | OPTyear = {1995}, |
---|
176 | OPTeditor = {}, |
---|
177 | OPTvolume = {}, |
---|
178 | OPTnumber = {}, |
---|
179 | OPTseries = {}, |
---|
180 | OPTaddress = {Santa Margherita Ligure , Italy}, |
---|
181 | OPTmonth = {22-24 Jun}, |
---|
182 | OPTorganization = {}, |
---|
183 | OPTpublisher = {}, |
---|
184 | OPTnote = {}, |
---|
185 | OPTannote = {} |
---|
186 | } |
---|
187 | @conference{1995_tullsen, |
---|
188 | title={{Simultaneous multithreading: maximizing on-chip parallelism}}, |
---|
189 | author={Tullsen, D.M. and Eggers, S.J. and Levy, H.M.}, |
---|
190 | booktitle={Proceedings of the 22nd annual international symposium on Computer architecture}, |
---|
191 | pages={392--403}, |
---|
192 | year={1995}, |
---|
193 | organization={ACM New York, NY, USA} |
---|
194 | } |
---|
195 | @article{adve1996smc, |
---|
196 | title={{Shared Memory Consistency Models: A Tutorial}}, |
---|
197 | author={Adve, S.V. and Gharachorloo, K.}, |
---|
198 | journal={COMPUTER}, |
---|
199 | pages={66--76}, |
---|
200 | year={1996}, |
---|
201 | publisher={IEEE Computer Society} |
---|
202 | } |
---|
203 | @conference{farkas1996rfd, |
---|
204 | title={{Register File Design Considerations in Dynamically Scheduled Processors}}, |
---|
205 | author={Farkas, K.I. and Jouppi, N.P. and Chow, P.}, |
---|
206 | booktitle={Proceedings of the Second IEEE Symposium on High-Performance Computer Architecture}, |
---|
207 | pages={40--51}, |
---|
208 | year={1996} |
---|
209 | } |
---|
210 | @InBook{ 1996_mudge, |
---|
211 | ALTauthor = {trevor mudge}, |
---|
212 | ALTeditor = {}, |
---|
213 | title = {ACM Computing Surveys (CSUR)}, |
---|
214 | chapter = { Special ACM 50th-anniversary issue: strategic directions in computing research}, |
---|
215 | publisher = {ACM Press New York, NY, USA }, |
---|
216 | year = {1996}, |
---|
217 | OPTkey = {}, |
---|
218 | OPTvolume = {}, |
---|
219 | OPTnumber = {}, |
---|
220 | OPTseries = {}, |
---|
221 | OPTtype = {}, |
---|
222 | OPTaddress = {}, |
---|
223 | OPTedition = {}, |
---|
224 | OPTmonth = {december}, |
---|
225 | OPTpages = {671 - 678}, |
---|
226 | OPTnote = {}, |
---|
227 | OPTannote = {} |
---|
228 | } |
---|
229 | @InProceedings{1996_olukotun, |
---|
230 | author = { Kunle Olukotun and |
---|
231 | Basem A. Nayfeh and |
---|
232 | Lance Hammond and |
---|
233 | Ken Wilson and |
---|
234 | Kunyung Chang |
---|
235 | }, |
---|
236 | title = {the case for a single-chip multiprocessor}, |
---|
237 | OPTcrossref = {ISBN:0-89791-767-7}, |
---|
238 | OPTkey = {}, |
---|
239 | OPTbooktitle = {Proceedings of the seventh international conference on Architectural support for programming languages and operating systems}, |
---|
240 | OPTpages = {2-11}, |
---|
241 | OPTyear = {1996}, |
---|
242 | OPTeditor = {}, |
---|
243 | OPTvolume = {}, |
---|
244 | OPTnumber = {}, |
---|
245 | OPTseries = {}, |
---|
246 | OPTaddress = {Cambridge, Massachusetts, United States}, |
---|
247 | OPTmonth = {}, |
---|
248 | OPTorganization = {}, |
---|
249 | OPTpublisher = {ACM Press}, |
---|
250 | OPTnote = {}, |
---|
251 | OPTannote = {} |
---|
252 | } |
---|
253 | @article{1996_tullsen, |
---|
254 | title={{Exploiting choice: instruction fetch and issue on an implementable simultaneous multithreading processor}}, |
---|
255 | author={Tullsen, D.M. and Eggers, S.J. and Emer, J.S. and Levy, H.M. and Lo, J.L. and Stamm, R.L.}, |
---|
256 | journal={Proceedings of the 23rd annual international symposium on Computer architecture}, |
---|
257 | pages={191--202}, |
---|
258 | year={1996}, |
---|
259 | publisher={ACM Press New York, NY, USA} |
---|
260 | } |
---|
261 | @InProceedings{1996_wallace, |
---|
262 | author = {Wallace, S. and |
---|
263 | Bagherzadeh, N. }, |
---|
264 | title = {A scalable register file architecture for dynamically scheduled processors}, |
---|
265 | OPTcrossref = {}, |
---|
266 | OPTkey = {}, |
---|
267 | OPTbooktitle = {Parallel Architectures and Compilation Techniques, 1996., Proceedings of the 1996 Conference on}, |
---|
268 | OPTpages = {179-184}, |
---|
269 | OPTyear = {1996}, |
---|
270 | OPTeditor = {}, |
---|
271 | OPTvolume = {}, |
---|
272 | OPTnumber = {}, |
---|
273 | OPTseries = {}, |
---|
274 | OPTaddress = {Boston, MA, USA}, |
---|
275 | OPTmonth = {Oct}, |
---|
276 | OPTorganization = {}, |
---|
277 | OPTpublisher = {}, |
---|
278 | OPTnote = {}, |
---|
279 | OPTannote = {} |
---|
280 | } |
---|
281 | @conference{wallace1996srf, |
---|
282 | title={{A Scalable Register File Architecture for Dynamically Scheduled Processors}}, |
---|
283 | author={Wallace, S. and Bagherzadeh, N.}, |
---|
284 | booktitle={Proceedings of PACT}, |
---|
285 | year={1996} |
---|
286 | } |
---|
287 | @article{1996_yeager, |
---|
288 | title={{The Mips R10000 superscalar microprocessor}}, |
---|
289 | author={Yeager, KC}, |
---|
290 | journal={Micro, IEEE}, |
---|
291 | volume={16}, |
---|
292 | number={2}, |
---|
293 | pages={28--41}, |
---|
294 | year={1996} |
---|
295 | } |
---|
296 | @article{burger1997sts, |
---|
297 | title={{The SimpleScalar tool set, version 2.0}}, |
---|
298 | author={Burger, D. and Austin, T.M.}, |
---|
299 | journal={ACM SIGARCH Computer Architecture News}, |
---|
300 | volume={25}, |
---|
301 | number={3}, |
---|
302 | pages={13--25}, |
---|
303 | year={1997}, |
---|
304 | publisher={ACM Press New York, NY, USA} |
---|
305 | } |
---|
306 | @InProceedings{1997_palacharla, |
---|
307 | author = {Palacharla, S. and |
---|
308 | Jouppi, N.P. and |
---|
309 | Smith, J.E. }, |
---|
310 | title = {Complexity-Effective Superscalar Processors}, |
---|
311 | OPTcrossref = {}, |
---|
312 | OPTkey = {}, |
---|
313 | OPTbooktitle = {Computer Architecture, 1997. Conference Proceedings. The 24th Annual International Symposium on}, |
---|
314 | OPTpages = {206 - 218}, |
---|
315 | OPTyear = {1997}, |
---|
316 | OPTeditor = {}, |
---|
317 | OPTvolume = {}, |
---|
318 | OPTnumber = {}, |
---|
319 | OPTseries = {}, |
---|
320 | OPTaddress = {}, |
---|
321 | OPTmonth = {Jun}, |
---|
322 | OPTorganization = {}, |
---|
323 | OPTpublisher = {}, |
---|
324 | OPTnote = {}, |
---|
325 | OPTannote = {} |
---|
326 | } |
---|
327 | @InProceedings{1998a_hammond, |
---|
328 | author = {Lance Hammond and |
---|
329 | Mark Willey and |
---|
330 | Kunle Olukotun}, |
---|
331 | title = {}, |
---|
332 | OPTcrossref = {SSN:0163-5980}, |
---|
333 | OPTkey = {}, |
---|
334 | OPTbooktitle = {Proceedings of the eighth international conference on Architectural support for programming languages and operating systems}, |
---|
335 | OPTpages = {58-69}, |
---|
336 | OPTyear = {1998}, |
---|
337 | OPTeditor = {}, |
---|
338 | OPTvolume = {}, |
---|
339 | OPTnumber = {}, |
---|
340 | OPTseries = {}, |
---|
341 | OPTaddress = { San Jose, California, United States}, |
---|
342 | OPTmonth = {}, |
---|
343 | OPTorganization = {}, |
---|
344 | OPTpublisher = {ACM Press}, |
---|
345 | OPTnote = {}, |
---|
346 | OPTannote = {} |
---|
347 | } |
---|
348 | @TechReport{1998b_hammond, |
---|
349 | author = {Lance Hammond and |
---|
350 | Kunle Olukotun}, |
---|
351 | title = {Considerations in the design of hydra : a multiprocessor-on-a-chip microarchitecture}, |
---|
352 | institution = {Stanford University}, |
---|
353 | year = {1998}, |
---|
354 | OPTkey = {CSL-TR-98-749}, |
---|
355 | OPTtype = {}, |
---|
356 | OPTnumber = {}, |
---|
357 | OPTaddress = {}, |
---|
358 | OPTmonth = {February}, |
---|
359 | OPTnote = {}, |
---|
360 | OPTannote = {} |
---|
361 | } |
---|
362 | @article{chrysos1998mdp, |
---|
363 | title={{Memory dependence prediction using store sets}}, |
---|
364 | author={Chrysos, G.Z. and Emer, J.S.}, |
---|
365 | journal={ACM SIGARCH Computer Architecture News}, |
---|
366 | volume={26}, |
---|
367 | number={3}, |
---|
368 | pages={142--153}, |
---|
369 | year={1998} |
---|
370 | } |
---|
371 | @article{1998_kessler, |
---|
372 | title={{The Alpha 21264 microprocessor architecture}}, |
---|
373 | author={Kessler, RE and McLellan, EJ and Webb, DA}, |
---|
374 | journal={Computer Design: VLSI in Computers and Processors, 1998. ICCD'98. Proceedings., International Conference on}, |
---|
375 | pages={90--95}, |
---|
376 | year={1998} |
---|
377 | } |
---|
378 | @InProceedings{1998_krishnan, |
---|
379 | author = {Krishnan, V. and |
---|
380 | Torrellas, J. }, |
---|
381 | title = {A clustered approach to multithreaded processors}, |
---|
382 | OPTcrossref = {}, |
---|
383 | OPTkey = {}, |
---|
384 | OPTbooktitle = {Parallel Processing Symposium, 1998. 1998 IPPS/SPDP. Proceedings of the First Merged International...and Symposium on Parallel and Distributed Processing 1998}, |
---|
385 | OPTpages = {627-634}, |
---|
386 | OPTyear = {1998}, |
---|
387 | OPTeditor = {}, |
---|
388 | OPTvolume = {}, |
---|
389 | OPTnumber = {}, |
---|
390 | OPTseries = {}, |
---|
391 | OPTaddress = {Orlando, FL , USA}, |
---|
392 | OPTmonth = {30 Mar - 3 Apr}, |
---|
393 | OPTorganization = {}, |
---|
394 | OPTpublisher = {}, |
---|
395 | OPTnote = {}, |
---|
396 | OPTannote = {} |
---|
397 | } |
---|
398 | @article{mikhail1001dmo, |
---|
399 | title={{Design of a 290 MHz Out of Order Microprocessor with Register Renaming and Speculative Memory Access}}, |
---|
400 | author={Mikhail, J. and Karl, E. and Dreslinski, R. and Davidson, G.}, |
---|
401 | journal={Ann Arbor}, |
---|
402 | volume={1001}, |
---|
403 | pages={48109} |
---|
404 | } |
---|
405 | @conference{1998_skadron, |
---|
406 | title={{Improving Prediction for Procedure Returns with Return-Address-Stack Repair Mechanisms}}, |
---|
407 | author={Skadron, K. and Ahuja, P.S. and Martonosi, M. and Clark, D.W.}, |
---|
408 | booktitle={MICRO-ANNUAL WORKSHOP THEN ANNUAL INTERNATIONAL SYMPOSIUM-}, |
---|
409 | volume={31}, |
---|
410 | pages={259--271}, |
---|
411 | year={1998}, |
---|
412 | organization={THE ASSOCIATION FOR COMPUTING MACHINERY} |
---|
413 | } |
---|
414 | @article{farkas1999mar, |
---|
415 | title={{The Multicluster Architecture: Reducing Processor Cycle Time Through Partitioning}}, |
---|
416 | author={Farkas, K.I. and Chow, P. and Jouppi, N.P. and Vranesic, Z.}, |
---|
417 | journal={International Journal of Parallel Programming}, |
---|
418 | volume={27}, |
---|
419 | number={5}, |
---|
420 | pages={327--356}, |
---|
421 | year={1999}, |
---|
422 | publisher={Springer} |
---|
423 | } |
---|
424 | @article{1999_schlansker, |
---|
425 | title={{EPIC: An Architecture for Instruction-Level Parallel Processors}}, |
---|
426 | author={Schlansker, M.S. and Rau, B.R. and Hewlett-Packard Laboratories}, |
---|
427 | journal={HP LABORATORIES TECHNICAL REPORT HPL}, |
---|
428 | year={1999}, |
---|
429 | publisher={HEWLETT-PACKARD LABORATORIES} |
---|
430 | } |
---|
431 | @article{skadron1999bpi, |
---|
432 | title={{Branch prediction, instruction-window size, and cache size: performance trade-offs and simulation techniques}}, |
---|
433 | author={Skadron, K. and Ahuja, PS and Martonosi, M. and Clark, DW}, |
---|
434 | journal={Transactions on Computers}, |
---|
435 | volume={48}, |
---|
436 | number={11}, |
---|
437 | pages={1260--1281}, |
---|
438 | year={1999} |
---|
439 | } |
---|
440 | |
---|
441 | @article{2000_barroso, |
---|
442 | title={{Piranha: a scalable architecture based on single-chip multiprocessing}}, |
---|
443 | author={Barroso, L.A. and Gharachorloo, K. and McNamara, R. and Nowatzyk, A. and Qadeer, S. and Sano, B. and Smith, S. and Stets, R. and Verghese, B.}, |
---|
444 | journal={Proceedings of the 27th annual international symposium on Computer architecture}, |
---|
445 | pages={282--293}, |
---|
446 | year={2000}, |
---|
447 | publisher={ACM Press New York, NY, USA} |
---|
448 | } |
---|
449 | @article{borkenhagen2000mpp, |
---|
450 | title={{A multithreaded PowerPC processor for commercial servers}}, |
---|
451 | author={Borkenhagen, JM and Eickemeyer, RJ and Kalla, RN and Kunkel, SR}, |
---|
452 | journal={IBM J. RES. DEV}, |
---|
453 | volume={44}, |
---|
454 | number={6}, |
---|
455 | pages={885--898}, |
---|
456 | year={2000} |
---|
457 | } |
---|
458 | @article{cruz2000mbr, |
---|
459 | title={{Multiple-banked register file architectures}}, |
---|
460 | author={Cruz, J.L. and Gonzalez, A. and Valero, M. and Topham, N.P.}, |
---|
461 | journal={ACM SIGARCH Computer Architecture News}, |
---|
462 | volume={28}, |
---|
463 | number={2}, |
---|
464 | pages={316--325}, |
---|
465 | year={2000} |
---|
466 | } |
---|
467 | @article{2000_cvetanovic, |
---|
468 | title={{Performance analysis of the Alpha 21264-based Compaq ES40 system}}, |
---|
469 | author={Cvetanovic, Z. and Kessler, RE}, |
---|
470 | journal={ACM SIGARCH Computer Architecture News}, |
---|
471 | volume={28}, |
---|
472 | number={2}, |
---|
473 | pages={192--202}, |
---|
474 | year={2000}, |
---|
475 | publisher={ACM New York, NY, USA} |
---|
476 | } |
---|
477 | @article{2000_hammond, |
---|
478 | title={{The Stanford Hydra CMP}}, |
---|
479 | author={Hammond, L. and Hubbert, B.A. and Siu, M. and Prabhu, M.K. and Chen, M. and Olukotun, K.}, |
---|
480 | journal = {Micro, IEEE}, |
---|
481 | year = {2000} |
---|
482 | } |
---|
483 | @article{klaiber2000tbc, |
---|
484 | title={{The Technology Behind Crusoe Processors}}, |
---|
485 | author={Klaiber, A. and others}, |
---|
486 | journal={Transmeta Technical Brief}, |
---|
487 | year={2000} |
---|
488 | } |
---|
489 | @article{2000_schlansker, |
---|
490 | title={{EPIC: Explicitly Parallel Instruction Computing}}, |
---|
491 | author={Schlansker, M.S. and Rau, B.R.}, |
---|
492 | journal={COMPUTER}, |
---|
493 | pages={37--45}, |
---|
494 | year={2000}, |
---|
495 | publisher={IEEE Computer Society} |
---|
496 | } |
---|
497 | @article{2000_sharangpani, |
---|
498 | title={{Itanium processor microarchitecture}}, |
---|
499 | author={Sharangpani, H. and Arora, H.}, |
---|
500 | journal={Micro, IEEE}, |
---|
501 | volume={20}, |
---|
502 | number={5}, |
---|
503 | pages={24--43}, |
---|
504 | year={2000} |
---|
505 | } |
---|
506 | @article{2000_sima, |
---|
507 | title={{The design space of register renaming techniques}}, |
---|
508 | author={Sima, D. and Polytech, B.}, |
---|
509 | journal={Micro, IEEE}, |
---|
510 | volume={20}, |
---|
511 | number={5}, |
---|
512 | pages={70--83}, |
---|
513 | year={2000} |
---|
514 | } |
---|
515 | @article{sima2000dsr, |
---|
516 | title={{The design space of register renaming techniques}}, |
---|
517 | author={Sima, D. and Polytech, B.}, |
---|
518 | journal={Micro, IEEE}, |
---|
519 | volume={20}, |
---|
520 | number={5}, |
---|
521 | pages={70--83}, |
---|
522 | year={2000} |
---|
523 | } |
---|
524 | @conference{balasubramonian2001rcr, |
---|
525 | title={{Reducing the complexity of the register file in dynamic superscalar processors}}, |
---|
526 | author={Balasubramonian, R. and Dwarkadas, S. and Albonesi, D.H.}, |
---|
527 | booktitle={Proceedings of the 34th annual ACM/IEEE international symposium on Microarchitecture}, |
---|
528 | pages={237--248}, |
---|
529 | year={2001}, |
---|
530 | organization={IEEE Computer Society Washington, DC, USA} |
---|
531 | } |
---|
532 | @article{2001_burns, |
---|
533 | title={{Area and system clock effects on SMT/CMP processors}}, |
---|
534 | author={Burns, J. and Gaudiot, J.L.}, |
---|
535 | journal={Parallel Architectures and Compilation Techniques, 2001. Proceedings. 2001 International Conference on}, |
---|
536 | pages={211--218}, |
---|
537 | year={2001} |
---|
538 | } |
---|
539 | @article{guthaus2001mfc, |
---|
540 | title={{MiBench: A free, commercially representative embedded benchmark suite}}, |
---|
541 | author={Guthaus, MR and Ringenberg, JS and Ernst, D. and Austin, TM and Mudge, T. and Brown, RB}, |
---|
542 | journal={Workload Characterization, 2001. WWC-4. 2001 IEEE International Workshop on}, |
---|
543 | pages={3--14}, |
---|
544 | year={2001} |
---|
545 | } |
---|
546 | @article{2001_hinton, |
---|
547 | title={{The microarchitecture of the Pentium 4 processor}}, |
---|
548 | author={Hinton, G. and Sager, D. and Upton, M. and Boggs, D. and Carmean, D. and Kyker, A. and Roussel, P.}, |
---|
549 | journal={Intel Technology Journal}, |
---|
550 | volume={1}, |
---|
551 | pages={2001}, |
---|
552 | year={2001} |
---|
553 | } |
---|
554 | @InProceedings{2001_nagarajan, |
---|
555 | author = { Ramadass Nagarajan and |
---|
556 | Karthikeyan Sankaralingam and |
---|
557 | Doug Burger and |
---|
558 | Stephen W. Keckler}, |
---|
559 | title = {A design space evaluation of grid processor architectures}, |
---|
560 | OPTcrossref = {SBN ~ ISSN:1072-4451 , 0-7695-1369-7}, |
---|
561 | OPTkey = {}, |
---|
562 | OPTbooktitle = {Proceedings of the 34th annual ACM/IEEE international symposium on Microarchitecture }, |
---|
563 | OPTpages = {40-51}, |
---|
564 | OPTyear = {2001}, |
---|
565 | OPTeditor = {}, |
---|
566 | OPTvolume = {}, |
---|
567 | OPTnumber = {}, |
---|
568 | OPTseries = {}, |
---|
569 | OPTaddress = { Austin, Texas}, |
---|
570 | OPTmonth = {}, |
---|
571 | OPTorganization = {}, |
---|
572 | OPTpublisher = { IEEE Computer Society }, |
---|
573 | OPTnote = {}, |
---|
574 | OPTannote = {} |
---|
575 | } |
---|
576 | @article{ernst2002eds, |
---|
577 | title={{Efficient dynamic scheduling through tag elimination}}, |
---|
578 | author={Ernst, D. and Austin, T.}, |
---|
579 | journal={Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on}, |
---|
580 | pages={37--46}, |
---|
581 | year={2002} |
---|
582 | } |
---|
583 | @article{akkary2003cpa, |
---|
584 | title={{Checkpoint processing and recovery: towards scalable large instruction window processors}}, |
---|
585 | author={Akkary, H. and Rajwar, R. and Srinivasan, ST}, |
---|
586 | journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, |
---|
587 | pages={423--434}, |
---|
588 | year={2003} |
---|
589 | } |
---|
590 | @article{2002_mukherjee, |
---|
591 | title={{The Alpha 21364 network architecture}}, |
---|
592 | author={Mukherjee, SS and Bannon, P. and Lang, S. and Spink, A. and Webb, D.}, |
---|
593 | journal={Micro, IEEE}, |
---|
594 | volume={22}, |
---|
595 | number={1}, |
---|
596 | pages={26--35}, |
---|
597 | year={2002} |
---|
598 | } |
---|
599 | @article{2002_sprangle, |
---|
600 | title={{Increasing processor performance by implementing deeper pipelines}}, |
---|
601 | author={Sprangle, E. and Carmean, D.}, |
---|
602 | journal={Computer Architecture, 2002. Proceedings. 29th Annual International Symposium on}, |
---|
603 | pages={25--34}, |
---|
604 | year={2002} |
---|
605 | } |
---|
606 | @article{2002_tendler, |
---|
607 | title={{POWER4 system microarchitecture}}, |
---|
608 | author={Tendler, J.M. and Dodson, J.S. and Fields Jr, J.S. and Le, H. and Sinharoy, B.}, |
---|
609 | journal={IBM Journal of Research and Development}, |
---|
610 | volume={46}, |
---|
611 | number={1}, |
---|
612 | pages={5--25}, |
---|
613 | year={2002} |
---|
614 | } |
---|
615 | @Article{2002_ungerer, |
---|
616 | author = {T. Ungerer and |
---|
617 | B. Robic and |
---|
618 | J. Silc }, |
---|
619 | title = {Multithreaded processors}, |
---|
620 | journal = {The Computer Journal}, |
---|
621 | year = {2002}, |
---|
622 | OPTkey = {}, |
---|
623 | OPTvolume = {45}, |
---|
624 | OPTnumber = {3}, |
---|
625 | OPTpages = {320-348}, |
---|
626 | OPTmonth = {}, |
---|
627 | OPTnote = {}, |
---|
628 | OPTannote = {} |
---|
629 | } |
---|
630 | @article{darsch2003oop, |
---|
631 | title={{Out-of-order Predicated Execution with Translation Register Buffer}}, |
---|
632 | author={Darsch, A. and Seznec, A.}, |
---|
633 | journal={Rapport technique RR-1573}, |
---|
634 | year={2003} |
---|
635 | } |
---|
636 | @conference{dehnert2003tcm, |
---|
637 | title={{The Transmeta Code Morphing Software: using speculation, recovery, and adaptive retranslation to address real-life challenges}}, |
---|
638 | author={Dehnert, J.C. and Grant, B.K. and Banning, J.P. and Johnson, R. and Kistler, T. and Klaiber, A. and Mattson, J.}, |
---|
639 | booktitle={ACM International Conference Proceeding Series}, |
---|
640 | volume={37}, |
---|
641 | pages={15--24}, |
---|
642 | year={2003} |
---|
643 | } |
---|
644 | @article{heinrich:smt, |
---|
645 | title={{Scalable Multi-threaded Multiprocessor Architectures}}, |
---|
646 | author={Heinrich, M.} |
---|
647 | } |
---|
648 | @article{2003_jeong, |
---|
649 | title={{Cost-sensitive cache replacement algorithms}}, |
---|
650 | author={Jeong, J. and Dubois, M.}, |
---|
651 | journal={High-Performance Computer Architecture, 2003. HPCA-9 2003. Proceedings. The Ninth International Symposium on}, |
---|
652 | pages={327--337}, |
---|
653 | year={2003} |
---|
654 | } |
---|
655 | @article{keltcher2003aop, |
---|
656 | title={{The AMD Opteron Processor for Multiprocessor Servers}}, |
---|
657 | author={Keltcher, C.N. and McGrath, K.J. and Ahmed, A. and Conway, P.}, |
---|
658 | journal={IEEE MICRO}, |
---|
659 | pages={66--76}, |
---|
660 | year={2003}, |
---|
661 | publisher={IEEE Computer Society} |
---|
662 | } |
---|
663 | @article{2003_koufaty, |
---|
664 | title={{Hyperthreading technology in the netburst microarchitecture}}, |
---|
665 | author={Koufaty, D. and Marr, DT}, |
---|
666 | journal={Micro, IEEE}, |
---|
667 | volume={23}, |
---|
668 | number={2}, |
---|
669 | pages={56--65}, |
---|
670 | year={2003} |
---|
671 | } |
---|
672 | @conference{kumar2003sih, |
---|
673 | title={{Single-ISA heterogeneous multi-core architectures: the potential for processor power reduction}}, |
---|
674 | author={Kumar, R. and Farkas, KI and Jouppi, NP and Ranganathan, P. and Tullsen, DM}, |
---|
675 | booktitle={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, |
---|
676 | pages={81--92}, |
---|
677 | year={2003} |
---|
678 | } |
---|
679 | @article{2003_mcnairy, |
---|
680 | title={{Itanium 2 processor microarchitecture}}, |
---|
681 | author={McNairy, C. and Soltis, D.}, |
---|
682 | journal={Micro, IEEE}, |
---|
683 | volume={23}, |
---|
684 | number={2}, |
---|
685 | pages={44--55}, |
---|
686 | year={2003} |
---|
687 | } |
---|
688 | @conference{park2003rdc, |
---|
689 | title={{Reducing Design Complexity of the Load/Store Queue}}, |
---|
690 | author={Park, I. and Ooi, C.L. and Vijaykumar, TN}, |
---|
691 | booktitle={Proceedings of the 36th annual IEEE/ACM International Symposium on Microarchitecture}, |
---|
692 | year={2003}, |
---|
693 | organization={IEEE Computer Society Washington, DC, USA} |
---|
694 | } |
---|
695 | @article{park2003rdc, |
---|
696 | title={{Reducing design complexity of the load/store queue}}, |
---|
697 | author={Park, I. and Ooi, C.L. and Vijaykumar, TN}, |
---|
698 | journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, |
---|
699 | pages={411--422}, |
---|
700 | year={2003} |
---|
701 | } |
---|
702 | @InProceedings{2003_sankaralingam, |
---|
703 | author = { Karthikeyan Sankaralingam and |
---|
704 | Ramadass Nagarajan and |
---|
705 | Haiming Liu and |
---|
706 | Changkyu Kim and |
---|
707 | Jaehyuk Huh and |
---|
708 | Doug Burger and |
---|
709 | Stephen W. Keckler and |
---|
710 | Charles R. Moore}, |
---|
711 | title = {Exploiting ILP, TLP, and DLP with the polymorphous TRIPS architecture}, |
---|
712 | OPTcrossref = { ISBN:0-7695-1945-8}, |
---|
713 | OPTkey = {}, |
---|
714 | OPTbooktitle = {Proceedings of the 30th annual international symposium on Computer architecture}, |
---|
715 | OPTpages = {422-433}, |
---|
716 | OPTyear = {2003}, |
---|
717 | OPTeditor = {}, |
---|
718 | OPTvolume = {Volume 31 Issue 2}, |
---|
719 | OPTnumber = {}, |
---|
720 | OPTseries = {}, |
---|
721 | OPTaddress = {San Diego, California}, |
---|
722 | OPTmonth = {May}, |
---|
723 | OPTorganization = {}, |
---|
724 | OPTpublisher = {ACM Press}, |
---|
725 | OPTnote = {}, |
---|
726 | OPTannote = {} |
---|
727 | } |
---|
728 | @article{sethumadhavan2003shm, |
---|
729 | title={{Scalable hardware memory disambiguation for high ILP processors}}, |
---|
730 | author={Sethumadhavan, S. and Desikan, R. and Burger, D. and Moore, CR and Keckler, SW}, |
---|
731 | journal={Microarchitecture, 2003. MICRO-36. Proceedings. 36th Annual IEEE/ACM International Symposium on}, |
---|
732 | pages={399--410}, |
---|
733 | year={2003} |
---|
734 | } |
---|
735 | @InProceedings{2004_chaudhuri, |
---|
736 | author = {Mainak Chaudhuri and |
---|
737 | Mark Heinrich}, |
---|
738 | title = {SMTp: An Architecture for Next-generation Scalable Multi-threading}, |
---|
739 | OPTcrossref = {}, |
---|
740 | OPTkey = {}, |
---|
741 | OPTbooktitle = {Proceedings of the 31st annual international symposium on Computer architecture}, |
---|
742 | OPTpages = {124-136}, |
---|
743 | OPTyear = {2004}, |
---|
744 | OPTeditor = {}, |
---|
745 | OPTvolume = {0}, |
---|
746 | OPTnumber = {}, |
---|
747 | OPTseries = {}, |
---|
748 | OPTaddress = {München, Germany}, |
---|
749 | OPTmonth = {}, |
---|
750 | OPTorganization = {}, |
---|
751 | OPTpublisher = {IEEE Computer Society}, |
---|
752 | OPTnote = {}, |
---|
753 | OPTannote = {} |
---|
754 | } |
---|
755 | @article{2004_collins, |
---|
756 | title={{Clustered multithreaded architectures-pursuing both IPC and cycle time}}, |
---|
757 | author={Collins, JD and Tullsen, DM}, |
---|
758 | journal={Parallel and Distributed Processing Symposium, 2004. Proceedings. 18th International}, |
---|
759 | year={2004} |
---|
760 | } |
---|
761 | @conference{cristal2004ooc, |
---|
762 | title={{Out-of-Order Commit Processors}}, |
---|
763 | author={Cristal, A. and Ortega, D. and Llosa, J. and Valero, M.}, |
---|
764 | booktitle={Proceedings of the 10th International Symposium on High Performance Computer Architecture}, |
---|
765 | pages={48}, |
---|
766 | year={2004}, |
---|
767 | organization={IEEE Computer Society Washington, DC, USA} |
---|
768 | } |
---|
769 | @article{2004_dolbeau, |
---|
770 | title={{CASH: Revisiting hardware sharing in single-chip parallel processor}}, |
---|
771 | author={Dolbeau, R. and Seznec, A.}, |
---|
772 | journal={Journal of Instruction-Level Parallelism}, |
---|
773 | volume={6}, |
---|
774 | pages={1--16}, |
---|
775 | year={2004} |
---|
776 | } |
---|
777 | @article{2004_kalla, |
---|
778 | title={{IBM Power5 chip: a dual-core multithreaded processor}}, |
---|
779 | author={Kalla, R. and Sinharoy, B. and Tendler, JM}, |
---|
780 | journal={Micro, IEEE}, |
---|
781 | volume={24}, |
---|
782 | number={2}, |
---|
783 | pages={40--47}, |
---|
784 | year={2004} |
---|
785 | } |
---|
786 | @article{2004_kumar, |
---|
787 | title={{Conjoined-Core Chip Multiprocessing}}, |
---|
788 | author={Kumar, R. and Jouppi, N.P. and Tullsen, D.M.}, |
---|
789 | journal={Proceedings of the 37th annual IEEE/ACM International Symposium on Microarchitecture}, |
---|
790 | pages={195--206}, |
---|
791 | year={2004}, |
---|
792 | publisher={IEEE Computer Society Washington, DC, USA} |
---|
793 | } |
---|
794 | @article{kumar2004sih, |
---|
795 | title={{Single-ISA Heterogeneous Multi-Core Architectures for Multithreaded Workload Performance}}, |
---|
796 | author={Kumar, R. and Tullsen, D.M. and Ranganathan, P. and Jouppi, N.P. and Farkas, K.I.}, |
---|
797 | journal={ACM SIGARCH Computer Architecture News}, |
---|
798 | volume={32}, |
---|
799 | number={2}, |
---|
800 | year={2004}, |
---|
801 | publisher={ACM New York, NY, USA} |
---|
802 | } |
---|
803 | @book{mattsson:esc, |
---|
804 | title={{Evaluation of synthesizable CPU cores}}, |
---|
805 | author={Mattsson, D. and Christensson, M.}, |
---|
806 | publisher={Chalmers tekniska h{\"o}gskola} |
---|
807 | } |
---|
808 | @article{tune2004bmi, |
---|
809 | title={{Balanced Multithreading: Increasing Throughput via a Low Cost Multithreading Hierarchy}}, |
---|
810 | author={Tune, E. and Kumar, R. and Tullsen, D.M. and Calder, B.}, |
---|
811 | journal={Proceedings of the 37th International Symposium on Microarchitecture. IEEE}, |
---|
812 | year={2004} |
---|
813 | } |
---|
814 | |
---|
815 | @InProceedings{ 2004_wang, |
---|
816 | author = {Nicholas J. Wang and |
---|
817 | Justin Quek and |
---|
818 | Todd M. Rafacz and |
---|
819 | Sanjay J. Pate}, |
---|
820 | title = {Characterizing the Effects of Transient Faults on a High-Performance Processor Pipeline}, |
---|
821 | OPTcrossref = {In the Proceedings of the 2004 International Conference on Dependable Systems and Networks}, |
---|
822 | OPTkey = {}, |
---|
823 | OPTbooktitle = {}, |
---|
824 | OPTpages = {}, |
---|
825 | OPTyear = {2004}, |
---|
826 | OPTeditor = {}, |
---|
827 | OPTvolume = {}, |
---|
828 | OPTnumber = {}, |
---|
829 | OPTseries = {}, |
---|
830 | OPTaddress = {Florence , ITALY}, |
---|
831 | OPTmonth = {june}, |
---|
832 | OPTorganization = {}, |
---|
833 | OPTpublisher = {}, |
---|
834 | OPTnote = {}, |
---|
835 | OPTannote = {} |
---|
836 | } |
---|
837 | @techreport{2005_ARM, |
---|
838 | title={{Architecture and Implementation of the ARM Cortex-A8 Microprocessor}}, |
---|
839 | author={ARM}, |
---|
840 | institution={ARM} |
---|
841 | } |
---|
842 | @article{constantinou2005pis, |
---|
843 | title={{Performance implications of single thread migration on a chip multi-core}}, |
---|
844 | author={Constantinou, T. and Sazeides, Y. and Michaud, P. and Fetis, D. and Seznec, A.}, |
---|
845 | journal={ACM SIGARCH Computer Architecture News}, |
---|
846 | volume={33}, |
---|
847 | number={4}, |
---|
848 | pages={80--91}, |
---|
849 | year={2005}, |
---|
850 | publisher={ACM New York, NY, USA} |
---|
851 | } |
---|
852 | @article{dimond2005cct, |
---|
853 | title={{CUSTARD-A Customisable Threaded FPGA Soft Processor and Tools}}, |
---|
854 | author={Dimond, R. and Mencer, O. and Luk, W.}, |
---|
855 | journal={International Conference on Field Programmable Logic (FPL)}, |
---|
856 | year={2005} |
---|
857 | } |
---|
858 | @book{2005_fisher, |
---|
859 | title={{Embedded Computing: A Vliw Approach To Architecture, Compilers And Tools}}, |
---|
860 | author={Joseph A. Fisher, Paolo Faraboschi and Cliff Young}, |
---|
861 | year={2005}, |
---|
862 | publisher={Morgan Kaufmann Publishers} |
---|
863 | } |
---|
864 | @article{2005_kahle, |
---|
865 | title={{Introduction to the Cell multiprocessor}}, |
---|
866 | author={Kahle, J. and others}, |
---|
867 | journal={IBM Journal of Research and Development}, |
---|
868 | volume={49}, |
---|
869 | number={4}, |
---|
870 | pages={589--604}, |
---|
871 | year={2005} |
---|
872 | } |
---|
873 | @article{2005_kongetira, |
---|
874 | title={{Niagara: a 32-way multithreaded Sparc processor}}, |
---|
875 | author={Kongetira, P. and Aingaran, K. and Olukotun, K.}, |
---|
876 | journal={IEEE Micro}, |
---|
877 | volume={25}, |
---|
878 | number={2}, |
---|
879 | pages={21--29}, |
---|
880 | year={2005} |
---|
881 | } |
---|
882 | @article{2005_kumar, |
---|
883 | title={{Heterogeneous Chip Multiprocessors}}, |
---|
884 | author={Kumar, R. and Tullsen, D.M. and Jouppi, N.P. and Ranganathan, P.}, |
---|
885 | journal={COMPUTER}, |
---|
886 | pages={32--38}, |
---|
887 | year={2005}, |
---|
888 | publisher={IEEE Computer Society} |
---|
889 | } |
---|
890 | @article{2005_mcnairy, |
---|
891 | title={{Montecito: a dual-core, dual-thread Itanium processor}}, |
---|
892 | author={McNairy, C. and Bhatia, R.}, |
---|
893 | journal={Micro, IEEE}, |
---|
894 | volume={25}, |
---|
895 | number={2}, |
---|
896 | pages={10--20}, |
---|
897 | year={2005} |
---|
898 | } |
---|
899 | @article{percival2005cmf, |
---|
900 | title={{Cache missing for fun and profit}}, |
---|
901 | author={Percival, C.}, |
---|
902 | journal={BSDCan 2005}, |
---|
903 | year={2005} |
---|
904 | } |
---|
905 | @article{siddiqui:pap, |
---|
906 | title={{POWER4 and POWER5 Scalability}}, |
---|
907 | author={Siddiqui, W. and VanBuren, B.G.} |
---|
908 | } |
---|
909 | @article{2005_sinharoy, |
---|
910 | title={{POWER5 system microarchitecture}}, |
---|
911 | author={Sinharoy, B. and Kalla, R.N. and Tendler, J.M. and Eickemeyer, R.J. and Joyner, J.B.}, |
---|
912 | journal={IBM JOURNAL OF RESEARCH AND DEVELOPMENT}, |
---|
913 | volume={49}, |
---|
914 | number={4/5}, |
---|
915 | pages={505}, |
---|
916 | year={2005}, |
---|
917 | publisher={IBM CORPORATION} |
---|
918 | } |
---|
919 | @book{yiannacouras2005mfb, |
---|
920 | title={{The microarchitecture of FPGA-based soft processors}}, |
---|
921 | author={Yiannacouras, P. and Rose, J. and Steffan, J.G.}, |
---|
922 | year={2005}, |
---|
923 | publisher={ACM New York, NY, USA} |
---|
924 | } |
---|
925 | @book{yiannacouras2005mfb, |
---|
926 | title={{The microarchitecture of FPGA-based soft processors}}, |
---|
927 | author={Yiannacouras, P. and Rose, J. and Steffan, J.G.}, |
---|
928 | year={2005}, |
---|
929 | publisher={ACM New York, NY, USA} |
---|
930 | } |
---|
931 | @article{2006_ghasemzadeh, |
---|
932 | title={{Modified Pseudo LRU Replacement Algorithm}}, |
---|
933 | author={Ghasemzadeh, H. and Mazrouee, SS and Kakoee, MR}, |
---|
934 | journal={Engineering of Computer Based Systems, 2006. ECBS 2006. 13th Annual IEEE International Symposium and Workshop on}, |
---|
935 | pages={368--376}, |
---|
936 | year={2006} |
---|
937 | } |
---|
938 | @article{2006_gochman, |
---|
939 | title={{Introduction to Intel Core Duo processor architecture}}, |
---|
940 | author={Gochman, S. and Mendelson, A. and Naveh, A. and Rotem, E.}, |
---|
941 | journal={Intel Technology Journal}, |
---|
942 | volume={10}, |
---|
943 | number={2}, |
---|
944 | pages={89--97}, |
---|
945 | year={2006} |
---|
946 | } |
---|
947 | @conference{kumar2006cao, |
---|
948 | title={{Core architecture optimization for heterogeneous chip multiprocessors}}, |
---|
949 | author={Kumar, R. and Tullsen, D.M. and Jouppi, N.P.}, |
---|
950 | booktitle={Proceedings of the 15th international conference on Parallel architectures and compilation techniques}, |
---|
951 | pages={23--32}, |
---|
952 | year={2006}, |
---|
953 | organization={ACM New York, NY, USA} |
---|
954 | } |
---|
955 | @article{mcghan2006nof, |
---|
956 | title={{Niagara 2 Opens the Floodgates}}, |
---|
957 | author={McGhan, H.}, |
---|
958 | journal={Microprocessor Report}, |
---|
959 | volume={20}, |
---|
960 | number={11}, |
---|
961 | year={2006} |
---|
962 | } |
---|
963 | @article{mendelson2006cis, |
---|
964 | title={{CMP Implementation in Systems Based on the Intel Core Duo Processor}}, |
---|
965 | author={Mendelson, A. and others}, |
---|
966 | journal={Intel Tech. Journal}, |
---|
967 | volume={10}, |
---|
968 | number={2}, |
---|
969 | year={2006} |
---|
970 | } |
---|
971 | @article{sangireddy2006rrl, |
---|
972 | title={{Reducing Rename Logic Complexity for High-Speed and Low-Power Front-End Architectures}}, |
---|
973 | author={Sangireddy, R.}, |
---|
974 | journal={IEEE TRANSACTIONS ON COMPUTERS}, |
---|
975 | pages={672--685}, |
---|
976 | year={2006}, |
---|
977 | publisher={IEEE Computer Society} |
---|
978 | } |
---|
979 | @misc{microsystems2006otm, |
---|
980 | title={{OpenSPARC T1 Microarchitecture Specification}}, |
---|
981 | author={Microsystems, S.}, |
---|
982 | year={2006}, |
---|
983 | publisher={August} |
---|
984 | } |
---|
985 | @article{yiannacouras2006asc, |
---|
986 | title={{Application-specific customization of soft processor microarchitecture}}, |
---|
987 | author={Yiannacouras, P. and Steffan, J.G. and Rose, J.}, |
---|
988 | journal={Proceedings of the 2006 ACM/SIGDA 14th international symposium on Field programmable gate arrays}, |
---|
989 | pages={201--210}, |
---|
990 | year={2006}, |
---|
991 | publisher={ACM New York, NY, USA} |
---|
992 | } |
---|
993 | @techreport{2007_ARM, |
---|
994 | title={{The ARM Cortex-A9 Processor}}, |
---|
995 | author={ARM}, |
---|
996 | institution={ARM} |
---|
997 | } |
---|
998 | @phdthesis{bingham2007mrl, |
---|
999 | title={{A MIPS R10000-Like Out-Of-Order Microprocessor Implementation in verilog HDL}}, |
---|
1000 | author={Bingham, S.T.}, |
---|
1001 | year={2007}, |
---|
1002 | school={Cornell University} |
---|
1003 | } |
---|
1004 | @article{eisen:ipa, |
---|
1005 | title={{IBM POWER6 accelerators: VMX and DFU-References}}, |
---|
1006 | author={Eisen, L. and Ward III, JW and Tast, HW and M{\"a}ding, N. and Leenstra, J. and Mueller, SM and Jacobi, C. and Preiss, J. and Schwarz, EM and Carlough, SR} |
---|
1007 | } |
---|
1008 | @article{2007_le, |
---|
1009 | title={{IBM POWER6 microarchitecture}}, |
---|
1010 | author={Le, HQ and Starke, WJ and Fields, JS and O Connell, FP and Nguyen, DQ and Ronchetti, BJ and Sauer, WM and Schwarz, EM and Vaden, MT}, |
---|
1011 | journal={IBM JOURNAL OF RESEARCH AND DEVELOPMENT}, |
---|
1012 | volume={51}, |
---|
1013 | number={6}, |
---|
1014 | pages={639}, |
---|
1015 | year={2007}, |
---|
1016 | publisher={IBM CORPORATION} |
---|
1017 | } |
---|
1018 | @conference{li2007map, |
---|
1019 | title={{Microarchitecture and Performance Analysis of Godson-2 SMT Processor}}, |
---|
1020 | author={Li, Z. and Xu, X. and Hu, W. and Tang, Z.}, |
---|
1021 | booktitle={Computer Design, 2006. ICCD 2006. International Conference on}, |
---|
1022 | pages={485--490}, |
---|
1023 | year={2007} |
---|
1024 | } |
---|
1025 | @article{hardware6mom, |
---|
1026 | title={{Memory Ordering in Modern Microprocessors}}, |
---|
1027 | author={Hardware, S. and Accesses, W.R.M.}, |
---|
1028 | journal={Interface}, |
---|
1029 | volume={6}, |
---|
1030 | pages={6} |
---|
1031 | } |
---|
1032 | @article{sethumadhavan2007lbe, |
---|
1033 | title={{Late-binding: enabling unordered load-store queues}}, |
---|
1034 | author={Sethumadhavan, S. and Roesner, F. and Emer, J.S. and Burger, D. and Keckler, S.W.}, |
---|
1035 | journal={Proceedings of the 34th annual international conference on Computer architecture}, |
---|
1036 | pages={347--357}, |
---|
1037 | year={2007}, |
---|
1038 | publisher={ACM Press New York, NY, USA} |
---|
1039 | } |
---|
1040 | @conference{williams2008lbs, |
---|
1041 | title={{Lattice Boltzmann simulation optimization on leading multicore platforms}}, |
---|
1042 | author={Williams, S. and Carter, J. and Oliker, L. and Shalf, J. and Yelick, K.}, |
---|
1043 | booktitle={Parallel and Distributed Processing, 2008. IPDPS 2008. IEEE International Symposium on}, |
---|
1044 | pages={1--14}, |
---|
1045 | year={2008} |
---|
1046 | } |
---|