drm/panthor: Don't add write fences to the shared BOs
[drm/drm-misc.git] / arch / arm64 / include / asm / fpsimdmacros.h
blobcda81d009c9bd715f2957381427bd07fea09dbc9
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * FP/SIMD state saving and restoring macros
5 * Copyright (C) 2012 ARM Ltd.
6 * Author: Catalin Marinas <catalin.marinas@arm.com>
7 */
9 #include <asm/assembler.h>
11 .macro fpsimd_save state, tmpnr
12 stp q0, q1, [\state, #16 * 0]
13 stp q2, q3, [\state, #16 * 2]
14 stp q4, q5, [\state, #16 * 4]
15 stp q6, q7, [\state, #16 * 6]
16 stp q8, q9, [\state, #16 * 8]
17 stp q10, q11, [\state, #16 * 10]
18 stp q12, q13, [\state, #16 * 12]
19 stp q14, q15, [\state, #16 * 14]
20 stp q16, q17, [\state, #16 * 16]
21 stp q18, q19, [\state, #16 * 18]
22 stp q20, q21, [\state, #16 * 20]
23 stp q22, q23, [\state, #16 * 22]
24 stp q24, q25, [\state, #16 * 24]
25 stp q26, q27, [\state, #16 * 26]
26 stp q28, q29, [\state, #16 * 28]
27 stp q30, q31, [\state, #16 * 30]!
28 mrs x\tmpnr, fpsr
29 str w\tmpnr, [\state, #16 * 2]
30 mrs x\tmpnr, fpcr
31 str w\tmpnr, [\state, #16 * 2 + 4]
32 .endm
34 .macro fpsimd_restore_fpcr state, tmp
36 * Writes to fpcr may be self-synchronising, so avoid restoring
37 * the register if it hasn't changed.
39 mrs \tmp, fpcr
40 cmp \tmp, \state
41 b.eq 9999f
42 msr fpcr, \state
43 9999:
44 .endm
46 /* Clobbers \state */
47 .macro fpsimd_restore state, tmpnr
48 ldp q0, q1, [\state, #16 * 0]
49 ldp q2, q3, [\state, #16 * 2]
50 ldp q4, q5, [\state, #16 * 4]
51 ldp q6, q7, [\state, #16 * 6]
52 ldp q8, q9, [\state, #16 * 8]
53 ldp q10, q11, [\state, #16 * 10]
54 ldp q12, q13, [\state, #16 * 12]
55 ldp q14, q15, [\state, #16 * 14]
56 ldp q16, q17, [\state, #16 * 16]
57 ldp q18, q19, [\state, #16 * 18]
58 ldp q20, q21, [\state, #16 * 20]
59 ldp q22, q23, [\state, #16 * 22]
60 ldp q24, q25, [\state, #16 * 24]
61 ldp q26, q27, [\state, #16 * 26]
62 ldp q28, q29, [\state, #16 * 28]
63 ldp q30, q31, [\state, #16 * 30]!
64 ldr w\tmpnr, [\state, #16 * 2]
65 msr fpsr, x\tmpnr
66 ldr w\tmpnr, [\state, #16 * 2 + 4]
67 fpsimd_restore_fpcr x\tmpnr, \state
68 .endm
70 /* Sanity-check macros to help avoid encoding garbage instructions */
72 .macro _check_general_reg nr
73 .if (\nr) < 0 || (\nr) > 30
74 .error "Bad register number \nr."
75 .endif
76 .endm
78 .macro _sve_check_zreg znr
79 .if (\znr) < 0 || (\znr) > 31
80 .error "Bad Scalable Vector Extension vector register number \znr."
81 .endif
82 .endm
84 .macro _sve_check_preg pnr
85 .if (\pnr) < 0 || (\pnr) > 15
86 .error "Bad Scalable Vector Extension predicate register number \pnr."
87 .endif
88 .endm
90 .macro _check_num n, min, max
91 .if (\n) < (\min) || (\n) > (\max)
92 .error "Number \n out of range [\min,\max]"
93 .endif
94 .endm
96 .macro _sme_check_wv v
97 .if (\v) < 12 || (\v) > 15
98 .error "Bad vector select register \v."
99 .endif
100 .endm
102 /* SVE instruction encodings for non-SVE-capable assemblers */
103 /* (pre binutils 2.28, all kernel capable clang versions support SVE) */
105 /* STR (vector): STR Z\nz, [X\nxbase, #\offset, MUL VL] */
106 .macro _sve_str_v nz, nxbase, offset=0
107 _sve_check_zreg \nz
108 _check_general_reg \nxbase
109 _check_num (\offset), -0x100, 0xff
110 .inst 0xe5804000 \
111 | (\nz) \
112 | ((\nxbase) << 5) \
113 | (((\offset) & 7) << 10) \
114 | (((\offset) & 0x1f8) << 13)
115 .endm
117 /* LDR (vector): LDR Z\nz, [X\nxbase, #\offset, MUL VL] */
118 .macro _sve_ldr_v nz, nxbase, offset=0
119 _sve_check_zreg \nz
120 _check_general_reg \nxbase
121 _check_num (\offset), -0x100, 0xff
122 .inst 0x85804000 \
123 | (\nz) \
124 | ((\nxbase) << 5) \
125 | (((\offset) & 7) << 10) \
126 | (((\offset) & 0x1f8) << 13)
127 .endm
129 /* STR (predicate): STR P\np, [X\nxbase, #\offset, MUL VL] */
130 .macro _sve_str_p np, nxbase, offset=0
131 _sve_check_preg \np
132 _check_general_reg \nxbase
133 _check_num (\offset), -0x100, 0xff
134 .inst 0xe5800000 \
135 | (\np) \
136 | ((\nxbase) << 5) \
137 | (((\offset) & 7) << 10) \
138 | (((\offset) & 0x1f8) << 13)
139 .endm
141 /* LDR (predicate): LDR P\np, [X\nxbase, #\offset, MUL VL] */
142 .macro _sve_ldr_p np, nxbase, offset=0
143 _sve_check_preg \np
144 _check_general_reg \nxbase
145 _check_num (\offset), -0x100, 0xff
146 .inst 0x85800000 \
147 | (\np) \
148 | ((\nxbase) << 5) \
149 | (((\offset) & 7) << 10) \
150 | (((\offset) & 0x1f8) << 13)
151 .endm
153 /* RDVL X\nx, #\imm */
154 .macro _sve_rdvl nx, imm
155 _check_general_reg \nx
156 _check_num (\imm), -0x20, 0x1f
157 .inst 0x04bf5000 \
158 | (\nx) \
159 | (((\imm) & 0x3f) << 5)
160 .endm
162 /* RDFFR (unpredicated): RDFFR P\np.B */
163 .macro _sve_rdffr np
164 _sve_check_preg \np
165 .inst 0x2519f000 \
166 | (\np)
167 .endm
169 /* WRFFR P\np.B */
170 .macro _sve_wrffr np
171 _sve_check_preg \np
172 .inst 0x25289000 \
173 | ((\np) << 5)
174 .endm
176 /* PFALSE P\np.B */
177 .macro _sve_pfalse np
178 _sve_check_preg \np
179 .inst 0x2518e400 \
180 | (\np)
181 .endm
183 /* SME instruction encodings for non-SME-capable assemblers */
184 /* (pre binutils 2.38/LLVM 13) */
186 /* RDSVL X\nx, #\imm */
187 .macro _sme_rdsvl nx, imm
188 _check_general_reg \nx
189 _check_num (\imm), -0x20, 0x1f
190 .inst 0x04bf5800 \
191 | (\nx) \
192 | (((\imm) & 0x3f) << 5)
193 .endm
196 * STR (vector from ZA array):
197 * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
199 .macro _sme_str_zav nw, nxbase, offset=0
200 _sme_check_wv \nw
201 _check_general_reg \nxbase
202 _check_num (\offset), -0x100, 0xff
203 .inst 0xe1200000 \
204 | (((\nw) & 3) << 13) \
205 | ((\nxbase) << 5) \
206 | ((\offset) & 7)
207 .endm
210 * LDR (vector to ZA array):
211 * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
213 .macro _sme_ldr_zav nw, nxbase, offset=0
214 _sme_check_wv \nw
215 _check_general_reg \nxbase
216 _check_num (\offset), -0x100, 0xff
217 .inst 0xe1000000 \
218 | (((\nw) & 3) << 13) \
219 | ((\nxbase) << 5) \
220 | ((\offset) & 7)
221 .endm
224 * LDR (ZT0)
226 * LDR ZT0, nx
228 .macro _ldr_zt nx
229 _check_general_reg \nx
230 .inst 0xe11f8000 \
231 | (\nx << 5)
232 .endm
235 * STR (ZT0)
237 * STR ZT0, nx
239 .macro _str_zt nx
240 _check_general_reg \nx
241 .inst 0xe13f8000 \
242 | (\nx << 5)
243 .endm
245 .macro __for from:req, to:req
246 .if (\from) == (\to)
247 _for__body %\from
248 .else
249 __for %\from, %((\from) + ((\to) - (\from)) / 2)
250 __for %((\from) + ((\to) - (\from)) / 2 + 1), %\to
251 .endif
252 .endm
254 .macro _for var:req, from:req, to:req, insn:vararg
255 .macro _for__body \var:req
256 .noaltmacro
257 \insn
258 .altmacro
259 .endm
261 .altmacro
262 __for \from, \to
263 .noaltmacro
265 .purgem _for__body
266 .endm
268 /* Update ZCR_EL1.LEN with the new VQ */
269 .macro sve_load_vq xvqminus1, xtmp, xtmp2
270 mrs_s \xtmp, SYS_ZCR_EL1
271 bic \xtmp2, \xtmp, ZCR_ELx_LEN_MASK
272 orr \xtmp2, \xtmp2, \xvqminus1
273 cmp \xtmp2, \xtmp
274 b.eq 921f
275 msr_s SYS_ZCR_EL1, \xtmp2 //self-synchronising
276 921:
277 .endm
279 /* Update SMCR_EL1.LEN with the new VQ */
280 .macro sme_load_vq xvqminus1, xtmp, xtmp2
281 mrs_s \xtmp, SYS_SMCR_EL1
282 bic \xtmp2, \xtmp, SMCR_ELx_LEN_MASK
283 orr \xtmp2, \xtmp2, \xvqminus1
284 cmp \xtmp2, \xtmp
285 b.eq 921f
286 msr_s SYS_SMCR_EL1, \xtmp2 //self-synchronising
287 921:
288 .endm
290 /* Preserve the first 128-bits of Znz and zero the rest. */
291 .macro _sve_flush_z nz
292 _sve_check_zreg \nz
293 mov v\nz\().16b, v\nz\().16b
294 .endm
296 .macro sve_flush_z
297 _for n, 0, 31, _sve_flush_z \n
298 .endm
299 .macro sve_flush_p
300 _for n, 0, 15, _sve_pfalse \n
301 .endm
302 .macro sve_flush_ffr
303 _sve_wrffr 0
304 .endm
306 .macro sve_save nxbase, xpfpsr, save_ffr, nxtmp
307 _for n, 0, 31, _sve_str_v \n, \nxbase, \n - 34
308 _for n, 0, 15, _sve_str_p \n, \nxbase, \n - 16
309 cbz \save_ffr, 921f
310 _sve_rdffr 0
311 b 922f
312 921:
313 _sve_pfalse 0 // Zero out FFR
314 922:
315 _sve_str_p 0, \nxbase
316 _sve_ldr_p 0, \nxbase, -16
317 mrs x\nxtmp, fpsr
318 str w\nxtmp, [\xpfpsr]
319 mrs x\nxtmp, fpcr
320 str w\nxtmp, [\xpfpsr, #4]
321 .endm
323 .macro sve_load nxbase, xpfpsr, restore_ffr, nxtmp
324 _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34
325 cbz \restore_ffr, 921f
326 _sve_ldr_p 0, \nxbase
327 _sve_wrffr 0
328 921:
329 _for n, 0, 15, _sve_ldr_p \n, \nxbase, \n - 16
331 ldr w\nxtmp, [\xpfpsr]
332 msr fpsr, x\nxtmp
333 ldr w\nxtmp, [\xpfpsr, #4]
334 msr fpcr, x\nxtmp
335 .endm
337 .macro sme_save_za nxbase, xvl, nw
338 mov w\nw, #0
340 423:
341 _sme_str_zav \nw, \nxbase
342 add x\nxbase, x\nxbase, \xvl
343 add x\nw, x\nw, #1
344 cmp \xvl, x\nw
345 bne 423b
346 .endm
348 .macro sme_load_za nxbase, xvl, nw
349 mov w\nw, #0
351 423:
352 _sme_ldr_zav \nw, \nxbase
353 add x\nxbase, x\nxbase, \xvl
354 add x\nw, x\nw, #1
355 cmp \xvl, x\nw
356 bne 423b
357 .endm