1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Optimized assembly for low-level CPU operations on ARM64 processors.
7 #include <arch/cache.h>
9 .macro dcache_apply_all crm
11 mrs x0, clidr_el1 // read CLIDR
12 and w3, w0, #0x07000000 // narrow to LoC
13 lsr w3, w3, #23 // left align LoC (low 4 bits)
16 mov w10, #0 // w10 = 2 * cache level
17 mov w8, #1 // w8 = constant 0b1
19 mrs x12, id_aa64mmfr2_el1 // read ID_AA64MMFR2_EL1
20 ubfx x12, x12, #20, #4 // [23:20] - CCIDX support
23 add w2, w10, w10, lsr #1 // calculate 3 * cache level
24 lsr w1, w0, w2 // extract 3-bit cache type for this level
25 and w1, w1, #0x7 // w1 = cache type
26 cmp w1, #2 // is it data or i&d?
28 msr csselr_el1, x10 // select current cache level
29 isb // sync change of csselr
30 mrs x1, ccsidr_el1 // w1 = read ccsidr
31 and w2, w1, #7 // w2 = log2(linelen_bytes) - 4
32 add w2, w2, #4 // w2 = log2(linelen_bytes)
34 cbz x12, 11f // check FEAT_CCIDX for associativity
35 // branch to 11 if FEAT_CCIDX is not implemented
36 ubfx x4, x1, #3, #21 // x4 = associativity CCSIDR_EL1[23:3]
39 ubfx x4, x1, #3, #10 // x4 = associativity CCSIDR_EL1[12:3]
41 clz w5, w4 // w5 = 32 - log2(ways)
42 // (bit position of way in DC)
43 lsl w9, w4, w5 // w9 = max way number
45 lsl w16, w8, w5 // w16 = amount to decrement (way
46 // number per iteration)
48 cbz x12, 21f // check FEAT_CCIDX for numsets
49 // branch to 21 if FEAT_CCIDX is not implemented
50 ubfx x7, x1, #32, #24 // x7(w7) = numsets CCSIDR_EL1[55:32]
53 ubfx w7, w1, #13, #15 // w7 = numsets CCSIDR_EL1[27:13]
55 lsl w7, w7, w2 // w7 = max set #, DC aligned
56 lsl w17, w8, w2 // w17 = amount to decrement (set
57 // number per iteration)
60 orr w11, w10, w9 // w11 = combine way # & cache #
61 orr w11, w11, w7 // ... and set #
62 dc \crm, x11 // clean and/or invalidate line
63 subs w7, w7, w17 // decrement set number
65 subs x9, x9, x16 // decrement way number
69 add w10, w10, #2 // increment 2 *cache level
70 cmp w3, w10 // Went beyond LoC?
79 ENTRY(dcache_invalidate_all)
80 dcache_apply_all crm=isw
81 ENDPROC(dcache_invalidate_all)
83 ENTRY(dcache_clean_all)
84 dcache_apply_all crm=csw
85 ENDPROC(dcache_clean_all)
87 ENTRY(dcache_clean_invalidate_all)
88 dcache_apply_all crm=cisw
89 ENDPROC(dcache_clean_invalidate_all)
91 /* This must be implemented in assembly to ensure there are no accesses to
92 memory (e.g. the stack) in between disabling and flushing the cache. */
95 mrs x0, CURRENT_EL(sctlr)
96 mov x1, #~(SCTLR_C | SCTLR_M)
98 msr CURRENT_EL(sctlr), x0
100 bl dcache_clean_invalidate_all
106 * Bring an ARMv8 processor we just gained control of (e.g. from IROM) into a
107 * known state regarding caches/SCTLR/SCR/PSTATE. Completely invalidates
108 * icache/dcache, disables MMU and dcache (if active), and enables unaligned
109 * accesses, icache. Seeds stack and initializes SP_EL0. Clobbers R22 and R23.
111 ENTRY(arm64_init_cpu)
112 /* Initialize PSTATE (mask all exceptions, select SP_EL0). */
116 /* TODO: This is where we'd put non-boot CPUs into WFI if needed. */
118 /* x22: SCTLR, return address: x23 (callee-saved by subroutine) */
120 mrs x22, CURRENT_EL(sctlr)
122 /* Activate ICache already for speed during cache flush below. */
123 orr x22, x22, #SCTLR_I
124 msr CURRENT_EL(sctlr), x22
127 /* Invalidate dcache */
128 bl dcache_invalidate_all
130 /* Reinitialize SCTLR from scratch to known-good state.
131 This may disable MMU or DCache. */
132 ldr w22, =(SCTLR_RES1 | SCTLR_I | SCTLR_SA)
133 msr CURRENT_EL(sctlr), x22
135 #if CONFIG_ARM64_CURRENT_EL == EL3
136 /* Initialize SCR to unmask all interrupts (so that if we get a spurious
137 IRQ/SError we'll see it when it happens, not hang in BL31). This will
138 only have an effect after we DAIFClr in exception_init(). */
139 mov x22, #SCR_RES1 | SCR_IRQ | SCR_FIQ | SCR_EA
143 /* Invalidate icache and TLB for good measure */
149 /* Initialize stack with sentinel value to later check overflow. */
150 ldr x2, =0xdeadbeefdeadbeef
154 stp x2, x2, [x0], #16
158 /* Leave a line of beef dead for easier visibility in stack dumps. */
162 ENDPROC(arm64_init_cpu)