source: trunk/libs/newlib/src/libgloss/arm/cpu-init/rdimon-aem.S @ 535

Last change on this file since 535 was 444, checked in by satin@…, 7 years ago

add newlib,libalmos-mkh, restructure shared_syscalls.h and mini-libc

File size: 18.6 KB
Line 
1/* Copyright (c) 2005-2013 ARM Ltd.  All rights reserved.
2
3 Redistribution and use in source and binary forms, with or without
4 modification, are permitted provided that the following conditions
5 are met:
6 1. Redistributions of source code must retain the above copyright
7    notice, this list of conditions and the following disclaimer.
8 2. Redistributions in binary form must reproduce the above copyright
9    notice, this list of conditions and the following disclaimer in the
10    documentation and/or other materials provided with the distribution.
11 3. The name of the company may not be used to endorse or promote
12    products derived from this software without specific prior written
13    permission.
14
15 THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
16 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
17 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
19 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
20 TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
25
26/* This file gives a basic initialisation of a Cortex-A series core.  It is
27   the bare minimum required to get Cortex-A core running with a semihosting
28   interface.
29
30   It sets up a basic 1:1 phsyical address to virtual address mapping;
31   turns the MMU on; enables branch prediction; activates any integrated
32   caches; enables the Advanced SIMD and VFP co-processors; and installs
33   basic exception handlers.
34
35   It does not handle peripherals, and assumes all memory is Normal.
36
37   It does not change processor state from the startup privilege and security
38   level.
39
40   This has only been tested to work in ARM state.
41
42   By default it assumes exception vectors are located from address 0.
43   However, if this is not true they can be moved by defining the
44   _rdimon_vector_base symbol.  For example if you have HIVECS enabled you
45   may pass --defsym _rdimon_vector_base=0xffff0000 on the linker command
46   line.  */
47
48   /* __ARM_ARCH_PROFILE is defined from GCC 4.8 onwards, however __ARM_ARCH_7A
49        has been defined since 4.2 onwards, which is when v7-a support was added
50        and hence 'A' profile support was added in the compiler.  Allow for this
51        file to be built with older compilers.  */
52#if defined(__ARM_ARCH_7A__) || (__ARM_ARCH_PROFILE == 'A')
53    .syntax     unified
54    .arch       armv7-a
55    .arm
56
57    @ CPU Initialisation
58    .globl      _rdimon_hw_init_hook
59    .type       _rdimon_hw_init_hook, %function
60
61_rdimon_hw_init_hook:
62    @ Only run the code on CPU 0 - otherwise spin
63    mrc         15, 0, r4, cr0, cr0, 5  @ Read MPIDR
64    ands        r4, r4, #15
65spin:
66    bne spin
67
68    mov         r10, lr                 @ Save LR for final return
69
70#ifdef __ARMEB__
71    @ Setup for Big Endian
72    setend      be
73    mrc         15, 0, r4, cr1, cr0, 0  @ Read SCTLR
74    orr         r4, r4, #(1<<25)        @ Switch to Big Endian (Set SCTLR.EE)
75    mcr         15, 0, r4, cr1, cr0, 0  @ Write SCTLR
76#else
77    @ Setup for Little Endian
78    setend      le
79    mrc         15, 0, r4, cr1, cr0, 0  @ Read SCTLR
80    bic         r4, r4, #(1<<25)        @ Switch to LE (unset SCTLR.EE)
81    mcr         15, 0, r4, cr1, cr0, 0  @ Write SCTLR
82#endif
83
84    bl          is_a15_a7
85
86    @ For Cortex-A15 and Cortex-A7 only:
87    @ Write zero into the ACTLR to turn everything on.
88    itt         eq
89    moveq       r4, #0
90    mcreq       15, 0, r4, c1, c0, 1
91    isb
92
93    @ For Cortex-A15 and Cortex-A7 only:
94    @ Set ACTLR:SMP bit before enabling the caches and MMU,
95    @ or performing any cache and TLB maintenance operations.
96    ittt        eq
97    mrceq       15, 0, r4, c1, c0, 1    @ Read ACTLR
98    orreq       r4, r4, #(1<<6)         @ Enable ACTLR:SMP
99    mcreq       15, 0, r4, c1, c0, 1    @ Write ACTLR
100    isb
101
102    @ Setup for exceptions being taken to Thumb/ARM state
103    mrc         15, 0, r4, cr1, cr0, 0  @ Read SCTLR
104#if defined(__thumb__)
105    orr         r4, r4, #(1 << 30)      @ Enable SCTLR.TE
106#else
107    bic         r4, r4, #(1 << 30)      @ Disable SCTLR.TE
108#endif
109    mcr         15, 0, r4, cr1, cr0, 0  @ Write SCTLR
110
111    bl          __reset_caches
112
113    mrc         15, 0, r4, cr1, cr0, 0  @ Read SCTLR
114    orr         r4, r4, #(1<<22)        @ Enable unaligned mode
115    bic         r4, r4, #2              @ Disable alignment faults
116    bic         r4, r4, #1              @ Disable MMU
117    mcr         15, 0, r4, cr1, cr0, 0  @ Write SCTLR
118
119    mov         r4, #0
120    mcr         15, 0, r4, cr8, cr7, 0  @ Write TLBIALL - Invaliidate unified
121                                        @ TLB
122    @ Setup MMU Primary table P=V mapping.
123    mvn         r4, #0
124    mcr         15, 0, r4, cr3, cr0, 0  @ Write DACR
125
126    mov         r4, #0                  @ Always use TTBR0, no LPAE
127    mcr         15, 0, r4, cr2, cr0, 2  @ Write TTBCR
128    adr         r4, page_table_addr     @ Load the base for vectors
129    ldr         r4, [r4]
130    mrc         p15, 0, r0, c0, c0, 5   @ read MPIDR
131    tst         r0, #0x80000000         @ bis[31]
132    @ Set page table flags - there are two page table flag formats for the
133    @ architecture.  For systems without multiprocessor extensions we use 0x1
134    @ which is Inner cacheable/Outer non-cacheable.  For systems with
135    @ multiprocessor extensions we use 0x59 which is Inner/Outer write-back,
136    @ no write-allocate, and cacheable.  See the ARMARM-v7AR for more details.
137    it          ne
138    addne       r4, r4, #0x58
139    add         r4, r4, #1
140
141    mcr         15, 0, r4, cr2, cr0, 0  @ Write TTBR0
142
143    mov         r0, #34 @ 0x22          @ TR0 and TR1 - normal memory
144    orr         r0, r0, #(1 << 19)      @ Shareable
145    mcr         15, 0, r0, cr10, cr2, 0 @ Write PRRR
146    movw        r0, #0x33
147    movt        r0, #0x33
148    mcr         15, 0, r0, cr10, cr2, 1 @ Write NMRR
149    mrc         15, 0, r0, cr1, cr0, 0  @ Read SCTLR
150    bic         r0, r0, #(1 << 28)      @ Clear TRE bit
151    mcr         15, 0, r0, cr1, cr0, 0  @ Write SCTLR
152
153    @ Now install the vector code - we move the Vector code from where it is
154    @ in the image to be based at _rdimon_vector_base.  We have to do this copy
155    @ as the code is all PC-relative.  We actually cheat and do a BX <reg> so
156    @ that we are at a known address relatively quickly and have to move as
157    @ little code as possible.
158    mov         r7, #(VectorCode_Limit - VectorCode)
159    adr         r5, VectorCode
160    adr         r6, vector_base_addr    @ Load the base for vectors
161    ldr         r6, [r6]
162
163copy_loop:                              @ Do the copy
164    ldr         r4, [r5], #4
165    str         r4, [r6], #4
166    subs        r7, r7, #4
167    bne         copy_loop
168
169    mrc         15, 0, r4, cr1, cr0, 0  @ Read SCTLR
170    bic         r4, r4, #0x1000         @ Disable I Cache
171    bic         r4, r4, #4              @ Disable D Cache
172    orr         r4, r4, #1              @ Enable MMU
173    bic         r4, r4, #(1 << 28)      @ Clear TRE bit
174    mcr         15, 0, r4, cr1, cr0, 0  @ Write SCTLR
175    mrc         15, 0, r4, cr1, cr0, 2  @ Read CPACR
176    orr         r4, r4, #0x00f00000     @ Turn on VFP Co-procs
177    bic         r4, r4, #0x80000000     @ Clear ASEDIS bit
178    mcr         15, 0, r4, cr1, cr0, 2  @ Write CPACR
179    isb
180    mov         r4, #0
181    mcr         15, 0, r4, cr7, cr5, 4  @ Flush prefetch buffer
182    mrc         15, 0, r4, cr1, cr0, 2  @ Read CPACR
183    ubfx        r4, r4, #20, #4         @ Extract bits [20, 23)
184    cmp         r4, #0xf                @ If not all set then the CPU does not
185    itt         eq                      @ have FP or Advanced SIMD.
186    moveq       r4, #0x40000000         @ Enable FP and Advanced SIMD
187    mcreq       10, 7, r4, cr8, cr0, 0  @ vmsr  fpexc, r4
188skip_vfp_enable:
189    bl          __enable_caches         @ Turn caches on
190    bx          r10                     @ Return to CRT startup routine
191
192    @ This enable us to be more precise about which caches we want
193init_cpu_client_enable_dcache:
194init_cpu_client_enable_icache:
195    mov         r0, #1
196    bx          lr
197
198vector_base_addr:
199    .word       _rdimon_vector_base
200    .weak       _rdimon_vector_base
201page_table_addr:
202    .word       page_tables
203
204    @ Vector code - must be PIC and in ARM state.
205VectorCode:
206    b           vector_reset
207    b           vector_undef
208    b           vector_swi
209    b           vector_prefetch
210    b           vector_dataabt
211    b           vector_reserved
212    b           vector_irq
213    b           vector_fiq
214
215vector_reset:
216    adr         sp, vector_sp_base
217    push        {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
218    mov         r4, #0
219    b           vector_common
220vector_undef:
221    adr         sp, vector_sp_base
222    push        {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
223    mov         r4, #1
224    b           vector_common
225vector_swi:
226    adr         sp, vector_sp_base
227    push        {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
228    mov         r4, #2
229    b           vector_common
230vector_prefetch:
231    adr         sp, vector_sp_base
232    push        {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
233    mov         r4, #3
234    b           vector_common
235vector_dataabt:
236    adr         sp, vector_sp_base
237    push        {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
238    mov         r4, #4
239    b           vector_common
240vector_reserved:
241    adr         sp, vector_sp_base
242    push        {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
243    mov         r4, #5
244    b           vector_common
245vector_irq:
246    adr         sp, vector_sp_base
247    push        {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
248    mov         r4, #6
249    b           vector_common
250vector_fiq:
251    adr         sp, vector_sp_base
252    push        {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, sl, fp, ip, lr}
253    mov         r4, #7
254    b           vector_common
255vector_common:
256    adr         r1, vector_common_adr   @ Find where we're going to
257    ldr         r1, [r1]
258    bx          r1                      @ And branch there
259vector_common_adr:
260   .word        vector_common_2         @ Common handling code
261
262                                        @ Vector stack
263   .p2align       3                       @ Align to 8 byte boundary boundary to
264                                        @ keep ABI compatibility
265   .fill        32, 4, 0                @ 32-entry stack is enough for vector
266                                        @ handlers.
267vector_sp_base:
268VectorCode_Limit:
269    @ End of PIC code for vectors
270
271    @ Common Handling of vectors
272    .type       vector_common_2, %function
273vector_common_2:
274    mrs         r1, APSR
275    mrs         r2, SPSR
276    push        {r1, r2}                @ Save PSRs
277
278    @ Output the vector we have caught
279    bl          out_nl
280    adr         r0, which_vector
281    bl          out_string
282    adr         r0, vector_names
283    mov         r1, #11
284    mla         r0, r4, r1, r0
285    bl          out_string
286    bl          out_nl
287
288    @ Dump the registers
289    adrl        r6, register_names
290    mov         r7, #0
291dump_r_loop:
292    mov         r0, r6
293    bl          out_string
294    add         r6, r6, #6
295    ldr         r0, [sp, r7, lsl #2]
296    bl          out_word
297    bl          out_nl
298    add         r7, r7, #1
299    cmp         r7, #16
300    blt         dump_r_loop
301    adr         r0, end
302    bl          out_string
303
304    @ And exit
305    mov         r0, #24
306    orr         r1, r4, #0x20000
307    svc         0x00123456
308
309    @ Output the string in r0
310out_string:
311    push        {lr}
312    mov         r1, r0
313    mov         r0, #4
314    svc         0x00123456
315    pop         {pc}
316
317    @ Output a New-line
318out_nl:
319    mov r0, #10
320    @ Fallthrough
321
322    @ Output the character in r0
323out_char:
324    push        {lr}
325    strb        r0, [sp, #-4]!
326    mov         r0, #3
327    mov         r1, sp
328    svc         0x00123456
329    add         sp, sp, #4
330    pop         {pc}
331
332    @ Output the value of r0 as a hex-word
333out_word:
334    push        {r4, r5, r6, lr}
335    mov         r4, r0
336    mov         r5, #28
337    adr         r6, hexchars
338word_loop:
339    lsr         r0, r4, r5
340    and         r0, r0, #15
341    ldrb        r0, [r6, r0]
342    bl          out_char
343    subs        r5, r5, #4
344    bpl         word_loop
345    pop         {r4, r5, r6, pc}
346
347hexchars:
348    .ascii      "0123456789abcdef"
349
350which_vector:
351    .asciz      "Hit vector:"
352end:
353    .asciz      "End.\n"
354
355vector_names:
356    .asciz      "reset     "
357    .asciz      "undef     "
358    .asciz      "swi       "
359    .asciz      "prefetch  "
360    .asciz      "data abort"
361    .asciz      "reserved  "
362    .asciz      "irq       "
363    .asciz      "fiq       "
364
365register_names:
366    .asciz      "apsr "
367    .asciz      "spsr "
368    .asciz      "r0   "
369    .asciz      "r1   "
370    .asciz      "r2   "
371    .asciz      "r3   "
372    .asciz      "r4   "
373    .asciz      "r5   "
374    .asciz      "r6   "
375    .asciz      "r7   "
376    .asciz      "r8   "
377    .asciz      "r9   "
378    .asciz      "r10  "
379    .asciz      "r11  "
380    .asciz      "r12  "
381    .asciz      "r14  "
382
383    .p2align      3
384
385
386    @ Enable the caches
387__enable_caches:
388    mov         r0, #0
389    mcr         15, 0, r0, cr8, cr7, 0  @ Invalidate all unified-TLB
390    mov         r0, #0
391    mcr         15, 0, r0, cr7, cr5, 6  @ Invalidate branch predictor
392    mrc         15, 0, r4, cr1, cr0, 0  @ Read SCTLR
393    orr         r4, r4, #0x800          @ Enable branch predictor
394    mcr         15, 0, r4, cr1, cr0, 0  @ Set SCTLR
395    mov         r5, lr                  @ Save LR as we're going to BL
396    mrc         15, 0, r4, cr1, cr0, 0  @ Read SCTLR
397    bl          init_cpu_client_enable_icache
398    cmp         r0, #0
399    it          ne
400    orrne       r4, r4, #0x1000         @ Enable I-Cache
401    bl          init_cpu_client_enable_dcache
402    cmp         r0, #0
403    it          ne
404    orrne       r4, r4, #4
405    mcr         15, 0, r4, cr1, cr0, 0  @ Enable D-Cache
406    bx          r5                      @ Return
407
408__reset_caches:
409    mov         ip, lr                  @ Save LR
410    mov         r0, #0
411    mcr         15, 0, r0, cr7, cr5, 6  @ Invalidate branch predictor
412    mrc         15, 0, r6, cr1, cr0, 0  @ Read SCTLR
413    mrc         15, 0, r0, cr1, cr0, 0  @ Read SCTLR!
414    bic         r0, r0, #0x1000         @ Disable I cache
415    mcr         15, 0, r0, cr1, cr0, 0  @ Write SCTLR
416    mrc         15, 1, r0, cr0, cr0, 1  @ Read CLIDR
417    tst         r0, #3                  @ Harvard Cache?
418    mov         r0, #0
419    it          ne
420    mcrne       15, 0, r0, cr7, cr5, 0  @ Invalidate Instruction Cache?
421
422    mrc         15, 0, r1, cr1, cr0, 0  @ Read SCTLR (again!)
423    orr         r1, r1, #0x800          @ Enable branch predictor
424
425                                        @ If we're not enabling caches we have
426                                        @ no more work to do.
427    bl          init_cpu_client_enable_icache
428    cmp         r0, #0
429    it          ne
430    orrne       r1, r1, #0x1000         @ Enable I-Cache now -
431                                        @ We actually only do this if we have a
432                                        @ Harvard style cache.
433    it          eq
434    bleq        init_cpu_client_enable_dcache
435    itt         eq
436    cmpeq       r0, #0
437    beq         Finished1
438
439    mcr         15, 0, r1, cr1, cr0, 0  @ Write SCTLR (turn on Branch predictor & I-cache)
440
441    mrc         15, 1, r0, cr0, cr0, 1  @ Read CLIDR
442    ands        r3, r0, #0x7000000
443    lsr         r3, r3, #23             @ Total cache levels << 1
444    beq         Finished1
445
446    mov         lr, #0                  @ lr = cache level << 1
447Loop11:
448    mrc         15, 1, r0, cr0, cr0, 1  @ Read CLIDR
449    add         r2, lr, lr, lsr #1      @ r2 holds cache 'set' position
450    lsr         r1, r0, r2              @ Bottom 3-bits are Ctype for this level
451    and         r1, r1, #7              @ Get those 3-bits alone
452    cmp         r1, #2
453    blt         Skip1                   @ No cache or only I-Cache at this level
454    mcr         15, 2, lr, cr0, cr0, 0  @ Write CSSELR
455    mov         r1, #0
456    isb         sy
457    mrc         15, 1, r1, cr0, cr0, 0  @ Read CCSIDR
458    and         r2, r1, #7              @ Extract line length field
459    add         r2, r2, #4              @ Add 4 for the line length offset (log2 16 bytes)
460    movw        r0, #0x3ff
461    ands        r0, r0, r1, lsr #3      @ r0 is the max number on the way size
462    clz         r4, r0                  @ r4 is the bit position of the way size increment
463    movw        r5, #0x7fff
464    ands        r5, r5, r1, lsr #13     @ r5 is the max number of the index size (right aligned)
465Loop21:
466    mov r7, r0                          @ r7 working copy of max way size
467Loop31:
468    orr         r1, lr, r7, lsl r4      @ factor in way number and cache number
469    orr         r1, r1, r5, lsl r2      @ factor in set number
470    tst         r6, #4                  @ D-Cache on?
471    ite         eq
472    mcreq       15, 0, r1, cr7, cr6, 2  @ No - invalidate by set/way
473    mcrne       15, 0, r1, cr7, cr14, 2 @ yes - clean + invalidate by set/way
474    subs        r7, r7, #1              @ Decrement way number
475    bge         Loop31
476    subs        r5, r5, #1              @ Decrement set number
477    bge         Loop21
478Skip1:
479    add         lr, lr, #2              @ increment cache number
480    cmp         r3, lr
481    bgt         Loop11
482Finished1:
483    @ Now we know the caches are clean we can:
484    mrc         15, 0, r4, cr1, cr0, 0  @ Read SCTLR
485    bic         r4, r4, #4              @ Disable D-Cache
486    mcr         15, 0, r4, cr1, cr0, 0  @ Write SCTLR
487    mov         r4, #0
488    mcr         15, 0, r4, cr7, cr5, 6  @ Write BPIALL
489
490    bx          ip                      @ Return
491
492    @ Set Z if this is a Cortex-A15 or Cortex_A7
493    @ Other flags corrupted
494is_a15_a7:
495    mrc         15, 0, r8, c0, c0, 0
496    movw        r9, #0xfff0
497    movt        r9, #0xff0f
498    and         r8, r8, r9
499    movw        r9, #0xc0f0
500    movt        r9, #0x410f
501    cmp         r8, r9
502    movw        r9, #0xc070
503    movt        r9, #0x410f
504    it          ne
505    cmpne       r8, r9
506    bx          lr
507
508    @ Descriptor type: Section
509    @ Bufferable: True
510    @ Cacheable: True
511    @ Execute Never: False
512    @ Domain: 0
513    @ Impl. Defined: 0
514    @ Access: 0/11 Full access
515    @ TEX: 001
516    @ Shareable: False
517    @ Not Global: False
518    @ Supersection: False
519#define PT(X) \
520    .word       X;
521#define PT2(X) \
522    PT(X)  PT(X + 0x100000)    PT(X + 0x200000)    PT(X + 0x300000)
523#define PT3(X) \
524    PT2(X) PT2(X + 0x400000)   PT2(X + 0x800000)   PT2(X + 0xc00000)
525#define PT4(X) \
526    PT3(X) PT3(X + 0x1000000)  PT3(X + 0x2000000)  PT3(X + 0x3000000)
527#define PT5(X) \
528    PT4(X) PT4(X + 0x4000000)  PT4(X + 0x8000000)  PT4(X + 0xc000000)
529#define PT6(X) \
530    PT5(X) PT5(X + 0x10000000) PT5(X + 0x20000000) PT5(X + 0x30000000)
531#define PT7(X) \
532    PT6(X) PT6(X + 0x40000000) PT6(X + 0x80000000) PT6(X + 0xc0000000)
533
534    .section    page_tables_section, "aw", %progbits
535    .p2align    14
536page_tables:
537     PT7(0x1c0e)
538
539#endif //#if defined(__ARM_ARCH_7A__) || __ARM_ARCH_PROFILE == 'A'
Note: See TracBrowser for help on using the repository browser.