1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
5 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
11 #define AES_ENTRY(func) ENTRY(neon_ ## func)
12 #define AES_ENDPROC(func) ENDPROC(neon_ ## func)
16 .macro xts_reload_mask, tmp
20 /* multiply by polynomial 'x' in GF(2^8) */
21 .macro mul_by_x, out, in, temp, const
24 and \temp, \temp, \const
28 /* multiply by polynomial 'x^2' in GF(2^8) */
29 .macro mul_by_x2, out, in, temp, const
32 pmul \temp, \temp, \const
36 /* preload the entire Sbox */
37 .macro prepare, sbox, shiftrows, temp
39 ldr_l q13, \shiftrows, \temp
40 ldr_l q14, .Lror32by8, \temp
42 ld1 {v16.16b-v19.16b}, [\temp], #64
43 ld1 {v20.16b-v23.16b}, [\temp], #64
44 ld1 {v24.16b-v27.16b}, [\temp], #64
45 ld1 {v28.16b-v31.16b}, [\temp]
48 /* do preload for encryption */
49 .macro enc_prepare, ignore0, ignore1, temp
50 prepare .LForward_Sbox, .LForward_ShiftRows, \temp
53 .macro enc_switch_key, ignore0, ignore1, temp
57 /* do preload for decryption */
58 .macro dec_prepare, ignore0, ignore1, temp
59 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
62 /* apply SubBytes transformation using the the preloaded Sbox */
64 sub v9.16b, \in\().16b, v15.16b
65 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
66 sub v10.16b, v9.16b, v15.16b
67 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
68 sub v11.16b, v10.16b, v15.16b
69 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
70 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
73 /* apply MixColumns transformation */
74 .macro mix_columns, in, enc
76 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
77 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
78 eor \in\().16b, \in\().16b, v8.16b
80 eor \in\().16b, \in\().16b, v8.16b
83 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
84 rev32 v8.8h, \in\().8h
85 eor v8.16b, v8.16b, v9.16b
86 eor \in\().16b, \in\().16b, v8.16b
87 tbl \in\().16b, {\in\().16b}, v14.16b
88 eor \in\().16b, \in\().16b, v8.16b
91 .macro do_block, enc, in, rounds, rk, rkp, i
95 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
97 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
100 ld1 {v15.4s}, [\rkp], #16
102 mix_columns \in, \enc
104 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
107 .macro encrypt_block, in, rounds, rk, rkp, i
108 do_block 1, \in, \rounds, \rk, \rkp, \i
111 .macro decrypt_block, in, rounds, rk, rkp, i
112 do_block 0, \in, \rounds, \rk, \rkp, \i
116 * Interleaved versions: functionally equivalent to the
117 * ones above, but applied to 2 or 4 AES states in parallel.
120 .macro sub_bytes_2x, in0, in1
121 sub v8.16b, \in0\().16b, v15.16b
122 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
123 sub v9.16b, \in1\().16b, v15.16b
124 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
125 sub v10.16b, v8.16b, v15.16b
126 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
127 sub v11.16b, v9.16b, v15.16b
128 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
129 sub v8.16b, v10.16b, v15.16b
130 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
131 sub v9.16b, v11.16b, v15.16b
132 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
133 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
134 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
137 .macro sub_bytes_4x, in0, in1, in2, in3
138 sub v8.16b, \in0\().16b, v15.16b
139 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
140 sub v9.16b, \in1\().16b, v15.16b
141 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
142 sub v10.16b, \in2\().16b, v15.16b
143 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
144 sub v11.16b, \in3\().16b, v15.16b
145 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
146 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
147 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
148 sub v8.16b, v8.16b, v15.16b
149 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
150 sub v9.16b, v9.16b, v15.16b
151 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
152 sub v10.16b, v10.16b, v15.16b
153 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
154 sub v11.16b, v11.16b, v15.16b
155 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
156 sub v8.16b, v8.16b, v15.16b
157 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
158 sub v9.16b, v9.16b, v15.16b
159 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
160 sub v10.16b, v10.16b, v15.16b
161 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
162 sub v11.16b, v11.16b, v15.16b
163 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
164 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
165 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
168 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
169 sshr \tmp0\().16b, \in0\().16b, #7
170 shl \out0\().16b, \in0\().16b, #1
171 sshr \tmp1\().16b, \in1\().16b, #7
172 and \tmp0\().16b, \tmp0\().16b, \const\().16b
173 shl \out1\().16b, \in1\().16b, #1
174 and \tmp1\().16b, \tmp1\().16b, \const\().16b
175 eor \out0\().16b, \out0\().16b, \tmp0\().16b
176 eor \out1\().16b, \out1\().16b, \tmp1\().16b
179 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
180 ushr \tmp0\().16b, \in0\().16b, #6
181 shl \out0\().16b, \in0\().16b, #2
182 ushr \tmp1\().16b, \in1\().16b, #6
183 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
184 shl \out1\().16b, \in1\().16b, #2
185 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
186 eor \out0\().16b, \out0\().16b, \tmp0\().16b
187 eor \out1\().16b, \out1\().16b, \tmp1\().16b
190 .macro mix_columns_2x, in0, in1, enc
192 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
193 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
194 eor \in0\().16b, \in0\().16b, v8.16b
196 eor \in1\().16b, \in1\().16b, v9.16b
198 eor \in0\().16b, \in0\().16b, v8.16b
199 eor \in1\().16b, \in1\().16b, v9.16b
202 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
203 rev32 v10.8h, \in0\().8h
204 rev32 v11.8h, \in1\().8h
205 eor v10.16b, v10.16b, v8.16b
206 eor v11.16b, v11.16b, v9.16b
207 eor \in0\().16b, \in0\().16b, v10.16b
208 eor \in1\().16b, \in1\().16b, v11.16b
209 tbl \in0\().16b, {\in0\().16b}, v14.16b
210 tbl \in1\().16b, {\in1\().16b}, v14.16b
211 eor \in0\().16b, \in0\().16b, v10.16b
212 eor \in1\().16b, \in1\().16b, v11.16b
215 .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
219 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
220 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
222 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
223 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
224 sub_bytes_2x \in0, \in1
226 ld1 {v15.4s}, [\rkp], #16
228 mix_columns_2x \in0, \in1, \enc
230 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
231 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
234 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
238 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
239 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
240 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
241 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
243 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
244 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
245 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
246 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
247 sub_bytes_4x \in0, \in1, \in2, \in3
249 ld1 {v15.4s}, [\rkp], #16
251 mix_columns_2x \in0, \in1, \enc
252 mix_columns_2x \in2, \in3, \enc
254 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
255 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
256 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
257 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
260 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
261 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
264 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
265 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
268 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
269 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
272 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
273 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
276 #include "aes-modes.S"
278 .section ".rodata", "a"
281 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
282 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
283 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
284 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
285 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
286 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
287 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
288 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
289 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
290 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
291 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
292 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
293 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
294 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
295 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
296 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
297 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
298 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
299 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
300 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
301 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
302 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
303 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
304 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
305 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
306 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
307 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
308 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
309 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
310 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
311 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
312 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
315 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
316 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
317 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
318 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
319 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
320 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
321 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
322 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
323 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
324 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
325 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
326 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
327 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
328 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
329 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
330 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
331 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
332 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
333 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
334 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
335 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
336 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
337 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
338 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
339 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
340 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
341 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
342 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
343 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
344 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
345 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
346 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
349 .octa 0x0b06010c07020d08030e09040f0a0500
352 .octa 0x0306090c0f0205080b0e0104070a0d00
355 .octa 0x0c0f0e0d080b0a090407060500030201