1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * Based on arch/arm/include/asm/assembler.h, arch/arm/mm/proc-macros.S
5 * Copyright (C) 1996-2000 Russell King
6 * Copyright (C) 2012 ARM Ltd.
9 #error "Only include this from assembly code"
12 #ifndef __ASM_ASSEMBLER_H
13 #define __ASM_ASSEMBLER_H
15 #include <asm-generic/export.h>
17 #include <asm/asm-offsets.h>
18 #include <asm/cpufeature.h>
19 #include <asm/cputype.h>
20 #include <asm/debug-monitors.h>
22 #include <asm/pgtable-hwdef.h>
23 #include <asm/ptrace.h>
24 #include <asm/thread_info.h>
26 .macro save_and_disable_daif
, flags
39 .macro restore_daif
, flags
:req
43 /* Only on aarch64 pstate, PSR_D_BIT is different for aarch32 */
44 .macro inherit_daif
, pstate
:req
, tmp
:req
45 and \tmp
, \pstate
, #(PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT)
49 /* IRQ is the lowest priority flag, unconditionally unmask the rest. */
51 msr daifclr
, #(8 | 4 | 1)
55 * Save/restore interrupts.
57 .macro save_and_disable_irq
, flags
62 .macro restore_irq
, flags
70 .macro disable_step_tsk
, flgs
, tmp
71 tbz
\flgs
, #TIF_SINGLESTEP, 9990f
73 bic
\tmp
, \tmp
, #DBG_MDSCR_SS
75 isb
// Synchronise with enable_dbg
79 /* call with daif masked */
80 .macro enable_step_tsk
, flgs
, tmp
81 tbz
\flgs
, #TIF_SINGLESTEP, 9990f
83 orr
\tmp
, \tmp
, #DBG_MDSCR_SS
89 * SMP data memory barrier
96 * RAS Error Synchronization barrier
99 #ifdef CONFIG_ARM64_RAS_EXTN
107 * Value prediction barrier
114 * Speculation barrier
117 alternative_if_not ARM64_HAS_SB
127 * Sanitise a 64-bit bounded index wrt speculation, returning zero if out
130 .macro mask_nospec64
, idx
, limit
, tmp
131 sub
\tmp
, \idx
, \limit
133 and \idx
, \idx
, \tmp
, asr
#63
147 * Emit an entry into the exception table
149 .macro _asm_extable
, from
, to
150 .pushsection __ex_table
, "a"
152 .long (\from
- .), (\to
- .)
156 #define USER(l, x...) \
158 _asm_extable 9999b, l
163 lr
.req x30
// link register
174 * Select code when configured for BE.
176 #ifdef CONFIG_CPU_BIG_ENDIAN
177 #define CPU_BE(code...) code
179 #define CPU_BE(code...)
183 * Select code when configured for LE.
185 #ifdef CONFIG_CPU_BIG_ENDIAN
186 #define CPU_LE(code...)
188 #define CPU_LE(code...) code
192 * Define a macro that constructs a 64-bit value by concatenating two
193 * 32-bit registers. Note that on big endian systems the order of the
194 * registers is swapped.
196 #ifndef CONFIG_CPU_BIG_ENDIAN
197 .macro regs_to_64
, rd
, lbits
, hbits
199 .macro regs_to_64
, rd
, hbits
, lbits
201 orr
\rd
, \lbits
, \hbits
, lsl
#32
205 * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
206 * <symbol> is within the range +/- 4 GB of the PC.
209 * @dst: destination register (64 bit wide)
210 * @sym: name of the symbol
212 .macro adr_l
, dst
, sym
214 add \dst
, \dst
, :lo12
:\sym
218 * @dst: destination register (32 or 64 bit wide)
219 * @sym: name of the symbol
220 * @tmp: optional 64-bit scratch register to be used if <dst> is a
221 * 32-bit wide register, in which case it cannot be used to hold
224 .macro ldr_l
, dst
, sym
, tmp
=
227 ldr \dst
, [\dst
, :lo12
:\sym
]
230 ldr \dst
, [\tmp
, :lo12
:\sym
]
235 * @src: source register (32 or 64 bit wide)
236 * @sym: name of the symbol
237 * @tmp: mandatory 64-bit scratch register to calculate the address
238 * while <src> needs to be preserved.
240 .macro str_l
, src
, sym
, tmp
242 str \src
, [\tmp
, :lo12
:\sym
]
246 * @dst: Result of per_cpu(sym, smp_processor_id()) (can be SP)
247 * @sym: The name of the per-cpu variable
248 * @tmp: scratch register
250 .macro adr_this_cpu
, dst
, sym
, tmp
252 add \dst
, \tmp
, #:lo12:\sym
253 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
262 * @dst: Result of READ_ONCE(per_cpu(sym, smp_processor_id()))
263 * @sym: The name of the per-cpu variable
264 * @tmp: scratch register
266 .macro ldr_this_cpu dst
, sym
, tmp
268 alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
273 ldr \dst
, [\dst
, \tmp
]
277 * vma_vm_mm - get mm pointer from vma pointer (vma->vm_mm)
279 .macro vma_vm_mm
, rd
, rn
280 ldr
\rd
, [\rn
, #VMA_VM_MM]
284 * mmid - get context id from mm pointer (mm->context.id)
287 ldr
\rd
, [\rn
, #MM_CONTEXT_ID]
290 * read_ctr - read CTR_EL0. If the system has mismatched register fields,
291 * provide the system wide safe value from arm64_ftr_reg_ctrel0.sys_val
294 alternative_if_not ARM64_MISMATCHED_CACHE_TYPE
295 mrs
\reg
, ctr_el0
// read CTR
298 ldr_l
\reg
, arm64_ftr_reg_ctrel0
+ ARM64_FTR_SYSVAL
304 * raw_dcache_line_size - get the minimum D-cache line size on this CPU
305 * from the CTR register.
307 .macro raw_dcache_line_size
, reg
, tmp
308 mrs
\tmp
, ctr_el0
// read CTR
309 ubfm
\tmp
, \tmp
, #16, #19 // cache line size encoding
310 mov
\reg
, #4 // bytes per word
311 lsl
\reg
, \reg
, \tmp
// actual cache line size
315 * dcache_line_size - get the safe D-cache line size across all CPUs
317 .macro dcache_line_size
, reg
, tmp
319 ubfm
\tmp
, \tmp
, #16, #19 // cache line size encoding
320 mov
\reg
, #4 // bytes per word
321 lsl
\reg
, \reg
, \tmp
// actual cache line size
325 * raw_icache_line_size - get the minimum I-cache line size on this CPU
326 * from the CTR register.
328 .macro raw_icache_line_size
, reg
, tmp
329 mrs
\tmp
, ctr_el0
// read CTR
330 and \tmp
, \tmp
, #0xf // cache line size encoding
331 mov
\reg
, #4 // bytes per word
332 lsl
\reg
, \reg
, \tmp
// actual cache line size
336 * icache_line_size - get the safe I-cache line size across all CPUs
338 .macro icache_line_size
, reg
, tmp
340 and \tmp
, \tmp
, #0xf // cache line size encoding
341 mov
\reg
, #4 // bytes per word
342 lsl
\reg
, \reg
, \tmp
// actual cache line size
346 * tcr_set_t0sz - update TCR.T0SZ so that we can load the ID map
348 .macro tcr_set_t0sz
, valreg
, t0sz
349 bfi
\valreg
, \t0sz
, #TCR_T0SZ_OFFSET, #TCR_TxSZ_WIDTH
353 * tcr_compute_pa_size - set TCR.(I)PS to the highest supported
354 * ID_AA64MMFR0_EL1.PARange value
356 * tcr: register with the TCR_ELx value to be updated
357 * pos: IPS or PS bitfield position
358 * tmp{0,1}: temporary registers
360 .macro tcr_compute_pa_size
, tcr
, pos
, tmp0
, tmp1
361 mrs
\tmp
0, ID_AA64MMFR0_EL1
362 // Narrow PARange to fit the PS field in TCR_ELx
363 ubfx
\tmp
0, \tmp
0, #ID_AA64MMFR0_PARANGE_SHIFT, #3
364 mov
\tmp
1, #ID_AA64MMFR0_PARANGE_MAX
366 csel
\tmp
0, \tmp
1, \tmp
0, hi
367 bfi
\tcr
, \tmp
0, \pos
, #3
371 * Macro to perform a data cache maintenance for the interval
372 * [kaddr, kaddr + size)
374 * op: operation passed to dc instruction
375 * domain: domain used in dsb instruciton
376 * kaddr: starting virtual address of the region
377 * size: size of the region
378 * Corrupts: kaddr, size, tmp1, tmp2
380 .macro __dcache_op_workaround_clean_cache
, op
, kaddr
381 alternative_if_not ARM64_WORKAROUND_CLEAN_CACHE
388 .macro dcache_by_line_op op
, domain
, kaddr
, size
, tmp1
, tmp2
389 dcache_line_size
\tmp
1, \tmp
2
390 add \size
, \kaddr
, \size
392 bic \kaddr
, \kaddr
, \tmp
2
395 __dcache_op_workaround_clean_cache \op
, \kaddr
398 __dcache_op_workaround_clean_cache \op
, \kaddr
401 sys
3, c7
, c12
, 1, \kaddr
// dc cvap
404 sys
3, c7
, c13
, 1, \kaddr
// dc cvadp
411 add \kaddr
, \kaddr
, \tmp
1
418 * Macro to perform an instruction cache maintenance for the interval
421 * start, end: virtual addresses describing the region
422 * label: A label to branch to on user fault.
423 * Corrupts: tmp1, tmp2
425 .macro invalidate_icache_by_line start
, end
, tmp1
, tmp2
, label
426 icache_line_size
\tmp
1, \tmp
2
428 bic
\tmp
2, \start
, \tmp
2
430 USER(\label
, ic ivau
, \tmp
2) // invalidate I line PoU
431 add
\tmp
2, \tmp
2, \tmp
1
439 * reset_pmuserenr_el0 - reset PMUSERENR_EL0 if PMUv3 present
441 .macro reset_pmuserenr_el0
, tmpreg
442 mrs
\tmpreg
, id_aa64dfr0_el1
443 sbfx
\tmpreg
, \tmpreg
, #ID_AA64DFR0_PMUVER_SHIFT, #4
444 cmp
\tmpreg
, #1 // Skip if no PMU present
446 msr pmuserenr_el0
, xzr
// Disable PMU access from EL0
451 * copy_page - copy src to dest using temp registers t1-t8
453 .macro copy_page dest
:req src
:req t1
:req t2
:req t3
:req t4
:req t5
:req t6
:req t7
:req t8
:req
454 9998: ldp
\t1, \t2, [\src
]
455 ldp
\t3, \t4, [\src
, #16]
456 ldp
\t5, \t6, [\src
, #32]
457 ldp
\t7, \t8, [\src
, #48]
459 stnp
\t1, \t2, [\dest
]
460 stnp
\t3, \t4, [\dest
, #16]
461 stnp
\t5, \t6, [\dest
, #32]
462 stnp
\t7, \t8, [\dest
, #48]
463 add \dest
, \dest
, #64
464 tst \src
, #(PAGE_SIZE - 1)
469 * Annotate a function as position independent, i.e., safe to be called before
470 * the kernel virtual mapping is activated.
472 #define ENDPIPROC(x) \
474 .type __pi_##x, %function; \
476 .size __pi_##x, . - x; \
480 * Annotate a function as being unsuitable for kprobes.
482 #ifdef CONFIG_KPROBES
483 #define NOKPROBE(x) \
484 .pushsection "_kprobe_blacklist", "aw"; \
492 #define EXPORT_SYMBOL_NOKASAN(name)
494 #define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name)
498 * Emit a 64-bit absolute little endian symbol reference in a way that
499 * ensures that it will be resolved at build time, even when building a
500 * PIE binary. This requires cooperation from the linker script, which
501 * must emit the lo32/hi32 halves individually.
509 * mov_q - move an immediate constant into a 64-bit register using
510 * between 2 and 4 movz/movk instructions (depending on the
511 * magnitude and sign of the operand)
513 .macro mov_q
, reg
, val
514 .if (((\val
) >> 31) == 0 || ((\val
) >> 31) == 0x1ffffffff)
515 movz
\reg
, :abs_g1_s
:\val
517 .if (((\val
) >> 47) == 0 || ((\val
) >> 47) == 0x1ffff)
518 movz
\reg
, :abs_g2_s
:\val
520 movz
\reg
, :abs_g3
:\val
521 movk
\reg
, :abs_g2_nc
:\val
523 movk
\reg
, :abs_g1_nc
:\val
525 movk
\reg
, :abs_g0_nc
:\val
529 * Return the current task_struct.
531 .macro get_current_task
, rd
536 * Offset ttbr1 to allow for 48-bit kernel VAs set with 52-bit PTRS_PER_PGD.
537 * orr is used as it can cover the immediate value (and is idempotent).
538 * In future this may be nop'ed out when dealing with 52-bit kernel VAs.
539 * ttbr: Value of ttbr to set, modified.
541 .macro offset_ttbr1
, ttbr
542 #ifdef CONFIG_ARM64_USER_VA_BITS_52
543 orr
\ttbr
, \ttbr
, #TTBR1_BADDR_4852_OFFSET
548 * Perform the reverse of offset_ttbr1.
549 * bic is used as it can cover the immediate value and, in future, won't need
550 * to be nop'ed out when dealing with 52-bit kernel VAs.
552 .macro restore_ttbr1
, ttbr
553 #ifdef CONFIG_ARM64_USER_VA_BITS_52
554 bic
\ttbr
, \ttbr
, #TTBR1_BADDR_4852_OFFSET
559 * Arrange a physical address in a TTBR register, taking care of 52-bit
562 * phys: physical address, preserved
563 * ttbr: returns the TTBR value
565 .macro phys_to_ttbr
, ttbr
, phys
566 #ifdef CONFIG_ARM64_PA_BITS_52
567 orr
\ttbr
, \phys
, \phys
, lsr
#46
568 and \ttbr
, \ttbr
, #TTBR_BADDR_MASK_52
574 .macro phys_to_pte
, pte
, phys
575 #ifdef CONFIG_ARM64_PA_BITS_52
577 * We assume \phys is 64K aligned and this is guaranteed by only
578 * supporting this configuration with 64K pages.
580 orr \pte
, \phys
, \phys
, lsr
#36
581 and \pte
, \pte
, #PTE_ADDR_MASK
587 .macro pte_to_phys
, phys
, pte
588 #ifdef CONFIG_ARM64_PA_BITS_52
589 ubfiz \phys
, \pte
, #(48 - 16 - 12), #16
590 bfxil \phys
, \pte
, #16, #32
591 lsl \phys
, \phys
, #16
593 and \phys
, \pte
, #PTE_ADDR_MASK
598 * tcr_clear_errata_bits - Clear TCR bits that trigger an errata on this CPU.
600 .macro tcr_clear_errata_bits
, tcr
, tmp1
, tmp2
601 #ifdef CONFIG_FUJITSU_ERRATUM_010001
604 mov_q
\tmp
2, MIDR_FUJITSU_ERRATUM_010001_MASK
605 and \tmp
1, \tmp
1, \tmp
2
606 mov_q
\tmp
2, MIDR_FUJITSU_ERRATUM_010001
610 mov_q
\tmp
2, TCR_CLEAR_FUJITSU_ERRATUM_010001
611 bic
\tcr
, \tcr
, \tmp
2
613 #endif /* CONFIG_FUJITSU_ERRATUM_010001 */
617 * Errata workaround prior to disable MMU. Insert an ISB immediately prior
618 * to executing the MSR that will change SCTLR_ELn[M] from a value of 1 to 0.
620 .macro pre_disable_mmu_workaround
621 #ifdef CONFIG_QCOM_FALKOR_ERRATUM_E1041
627 * frame_push - Push @regcount callee saved registers to the stack,
628 * starting at x19, as well as x29/x30, and set x29 to
629 * the new value of sp. Add @extra bytes of stack space
632 .macro frame_push
, regcount
:req
, extra
633 __frame st
, \regcount
, \extra
637 * frame_pop - Pop the callee saved registers from the stack that were
638 * pushed in the most recent call to frame_push, as well
639 * as x29/x30 and any extra stack space that may have been
646 .macro __frame_regs
, reg1
, reg2
, op
, num
647 .if .Lframe_regcount
== \num
648 \op\
()r
\reg
1, [sp
, #(\num + 1) * 8]
649 .elseif
.Lframe_regcount
> \num
650 \op\
()p
\reg
1, \reg
2, [sp
, #(\num + 1) * 8]
654 .macro __frame
, op
, regcount
, extra
=0
656 .if (\regcount
) < 0 || (\regcount
) > 10
657 .error
"regcount should be in the range [0 ... 10]"
659 .if ((\extra
) % 16) != 0
660 .error
"extra should be a multiple of 16 bytes"
662 .ifdef
.Lframe_regcount
663 .if .Lframe_regcount
!= -1
664 .error
"frame_push/frame_pop may not be nested"
667 .set
.Lframe_regcount
, \regcount
668 .set
.Lframe_extra
, \extra
669 .set
.Lframe_local_offset
, ((\regcount
+ 3) / 2) * 16
670 stp x29
, x30
, [sp
, #-.Lframe_local_offset - .Lframe_extra]!
674 __frame_regs x19
, x20
, \op
, 1
675 __frame_regs x21
, x22
, \op
, 3
676 __frame_regs x23
, x24
, \op
, 5
677 __frame_regs x25
, x26
, \op
, 7
678 __frame_regs x27
, x28
, \op
, 9
681 .if .Lframe_regcount
== -1
682 .error
"frame_push/frame_pop may not be nested"
684 ldp x29
, x30
, [sp
], #.Lframe_local_offset + .Lframe_extra
685 .set
.Lframe_regcount
, -1
690 * Check whether to yield to another runnable task from kernel mode NEON code
691 * (which runs with preemption disabled).
693 * if_will_cond_yield_neon
694 * // pre-yield patchup code
696 * // post-yield patchup code
697 * endif_yield_neon <label>
699 * where <label> is optional, and marks the point where execution will resume
700 * after a yield has been performed. If omitted, execution resumes right after
701 * the endif_yield_neon invocation. Note that the entire sequence, including
702 * the provided patchup code, will be omitted from the image if CONFIG_PREEMPT
705 * As a convenience, in the case where no patchup code is required, the above
706 * sequence may be abbreviated to
708 * cond_yield_neon <label>
710 * Note that the patchup code does not support assembler directives that change
711 * the output section, any use of such directives is undefined.
713 * The yield itself consists of the following:
714 * - Check whether the preempt count is exactly 1 and a reschedule is also
715 * needed. If so, calling of preempt_enable() in kernel_neon_end() will
716 * trigger a reschedule. If it is not the case, yielding is pointless.
717 * - Disable and re-enable kernel mode NEON, and branch to the yield fixup
720 * This macro sequence may clobber all CPU state that is not guaranteed by the
721 * AAPCS to be preserved across an ordinary function call.
724 .macro cond_yield_neon
, lbl
725 if_will_cond_yield_neon
727 endif_yield_neon \lbl
730 .macro if_will_cond_yield_neon
731 #ifdef CONFIG_PREEMPT
733 ldr x0
, [x0
, #TSK_TI_PREEMPT]
734 sub x0
, x0
, #PREEMPT_DISABLE_OFFSET
736 /* fall through to endif_yield_neon */
740 .section
".discard.cond_yield_neon", "ax"
744 .macro do_cond_yield_neon
749 .macro endif_yield_neon
, lbl
759 #endif /* __ASM_ASSEMBLER_H */