2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/linkage.h>
12 #include <asm/assembler.h>
14 #define AES_ENTRY(func) ENTRY(neon_ ## func)
15 #define AES_ENDPROC(func) ENDPROC(neon_ ## func)
17 /* multiply by polynomial 'x' in GF(2^8) */
18 .macro mul_by_x, out, in, temp, const
21 and \temp, \temp, \const
25 /* multiply by polynomial 'x^2' in GF(2^8) */
26 .macro mul_by_x2, out, in, temp, const
29 pmul \temp, \temp, \const
33 /* preload the entire Sbox */
34 .macro prepare, sbox, shiftrows, temp
39 ld1 {v16.16b-v19.16b}, [\temp], #64
40 ld1 {v20.16b-v23.16b}, [\temp], #64
41 ld1 {v24.16b-v27.16b}, [\temp], #64
42 ld1 {v28.16b-v31.16b}, [\temp]
45 /* do preload for encryption */
46 .macro enc_prepare, ignore0, ignore1, temp
47 prepare .LForward_Sbox, .LForward_ShiftRows, \temp
50 .macro enc_switch_key, ignore0, ignore1, temp
54 /* do preload for decryption */
55 .macro dec_prepare, ignore0, ignore1, temp
56 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
59 /* apply SubBytes transformation using the the preloaded Sbox */
61 sub v9.16b, \in\().16b, v15.16b
62 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
63 sub v10.16b, v9.16b, v15.16b
64 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
65 sub v11.16b, v10.16b, v15.16b
66 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
67 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
70 /* apply MixColumns transformation */
71 .macro mix_columns, in, enc
73 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
74 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
75 eor \in\().16b, \in\().16b, v8.16b
77 eor \in\().16b, \in\().16b, v8.16b
80 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
81 rev32 v8.8h, \in\().8h
82 eor v8.16b, v8.16b, v9.16b
83 eor \in\().16b, \in\().16b, v8.16b
84 tbl \in\().16b, {\in\().16b}, v14.16b
85 eor \in\().16b, \in\().16b, v8.16b
88 .macro do_block, enc, in, rounds, rk, rkp, i
92 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
94 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
97 ld1 {v15.4s}, [\rkp], #16
101 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
104 .macro encrypt_block, in, rounds, rk, rkp, i
105 do_block 1, \in, \rounds, \rk, \rkp, \i
108 .macro decrypt_block, in, rounds, rk, rkp, i
109 do_block 0, \in, \rounds, \rk, \rkp, \i
113 * Interleaved versions: functionally equivalent to the
114 * ones above, but applied to 2 or 4 AES states in parallel.
117 .macro sub_bytes_2x, in0, in1
118 sub v8.16b, \in0\().16b, v15.16b
119 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
120 sub v9.16b, \in1\().16b, v15.16b
121 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
122 sub v10.16b, v8.16b, v15.16b
123 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
124 sub v11.16b, v9.16b, v15.16b
125 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
126 sub v8.16b, v10.16b, v15.16b
127 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
128 sub v9.16b, v11.16b, v15.16b
129 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
130 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
131 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
134 .macro sub_bytes_4x, in0, in1, in2, in3
135 sub v8.16b, \in0\().16b, v15.16b
136 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
137 sub v9.16b, \in1\().16b, v15.16b
138 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
139 sub v10.16b, \in2\().16b, v15.16b
140 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
141 sub v11.16b, \in3\().16b, v15.16b
142 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
143 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
144 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
145 sub v8.16b, v8.16b, v15.16b
146 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
147 sub v9.16b, v9.16b, v15.16b
148 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
149 sub v10.16b, v10.16b, v15.16b
150 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
151 sub v11.16b, v11.16b, v15.16b
152 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
153 sub v8.16b, v8.16b, v15.16b
154 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
155 sub v9.16b, v9.16b, v15.16b
156 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
157 sub v10.16b, v10.16b, v15.16b
158 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
159 sub v11.16b, v11.16b, v15.16b
160 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
161 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
162 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
165 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
166 sshr \tmp0\().16b, \in0\().16b, #7
167 shl \out0\().16b, \in0\().16b, #1
168 sshr \tmp1\().16b, \in1\().16b, #7
169 and \tmp0\().16b, \tmp0\().16b, \const\().16b
170 shl \out1\().16b, \in1\().16b, #1
171 and \tmp1\().16b, \tmp1\().16b, \const\().16b
172 eor \out0\().16b, \out0\().16b, \tmp0\().16b
173 eor \out1\().16b, \out1\().16b, \tmp1\().16b
176 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
177 ushr \tmp0\().16b, \in0\().16b, #6
178 shl \out0\().16b, \in0\().16b, #2
179 ushr \tmp1\().16b, \in1\().16b, #6
180 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
181 shl \out1\().16b, \in1\().16b, #2
182 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
183 eor \out0\().16b, \out0\().16b, \tmp0\().16b
184 eor \out1\().16b, \out1\().16b, \tmp1\().16b
187 .macro mix_columns_2x, in0, in1, enc
189 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
190 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
191 eor \in0\().16b, \in0\().16b, v8.16b
193 eor \in1\().16b, \in1\().16b, v9.16b
195 eor \in0\().16b, \in0\().16b, v8.16b
196 eor \in1\().16b, \in1\().16b, v9.16b
199 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
200 rev32 v10.8h, \in0\().8h
201 rev32 v11.8h, \in1\().8h
202 eor v10.16b, v10.16b, v8.16b
203 eor v11.16b, v11.16b, v9.16b
204 eor \in0\().16b, \in0\().16b, v10.16b
205 eor \in1\().16b, \in1\().16b, v11.16b
206 tbl \in0\().16b, {\in0\().16b}, v14.16b
207 tbl \in1\().16b, {\in1\().16b}, v14.16b
208 eor \in0\().16b, \in0\().16b, v10.16b
209 eor \in1\().16b, \in1\().16b, v11.16b
212 .macro do_block_2x, enc, in0, in1, rounds, rk, rkp, i
216 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
217 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
219 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
220 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
221 sub_bytes_2x \in0, \in1
223 ld1 {v15.4s}, [\rkp], #16
225 mix_columns_2x \in0, \in1, \enc
227 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
228 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
231 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
235 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
236 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
237 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
238 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
240 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
241 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
242 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
243 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
244 sub_bytes_4x \in0, \in1, \in2, \in3
246 ld1 {v15.4s}, [\rkp], #16
248 mix_columns_2x \in0, \in1, \enc
249 mix_columns_2x \in2, \in3, \enc
251 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
252 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
253 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
254 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
257 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
258 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
261 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
262 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
265 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
266 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
269 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
270 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
273 #include "aes-modes.S"
278 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
279 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
280 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
281 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
282 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
283 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
284 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
285 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
286 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
287 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
288 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
289 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
290 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
291 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
292 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
293 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
294 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
295 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
296 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
297 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
298 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
299 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
300 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
301 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
302 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
303 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
304 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
305 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
306 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
307 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
308 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
309 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
312 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
313 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
314 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
315 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
316 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
317 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
318 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
319 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
320 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
321 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
322 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
323 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
324 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
325 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
326 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
327 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
328 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
329 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
330 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
331 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
332 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
333 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
334 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
335 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
336 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
337 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
338 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
339 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
340 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
341 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
342 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
343 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
346 .octa 0x0b06010c07020d08030e09040f0a0500
349 .octa 0x0306090c0f0205080b0e0104070a0d00
352 .octa 0x0c0f0e0d080b0a090407060500030201