2 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4 * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
11 #include <linux/linkage.h>
13 #define AES_ENTRY(func) ENTRY(neon_ ## func)
14 #define AES_ENDPROC(func) ENDPROC(neon_ ## func)
16 /* multiply by polynomial 'x' in GF(2^8) */
17 .macro mul_by_x, out, in, temp, const
20 and \temp, \temp, \const
24 /* preload the entire Sbox */
25 .macro prepare, sbox, shiftrows, temp
30 ld1 {v16.16b-v19.16b}, [\temp], #64
31 ld1 {v20.16b-v23.16b}, [\temp], #64
32 ld1 {v24.16b-v27.16b}, [\temp], #64
33 ld1 {v28.16b-v31.16b}, [\temp]
36 /* do preload for encryption */
37 .macro enc_prepare, ignore0, ignore1, temp
38 prepare .LForward_Sbox, .LForward_ShiftRows, \temp
41 .macro enc_switch_key, ignore0, ignore1, temp
45 /* do preload for decryption */
46 .macro dec_prepare, ignore0, ignore1, temp
47 prepare .LReverse_Sbox, .LReverse_ShiftRows, \temp
50 /* apply SubBytes transformation using the the preloaded Sbox */
52 sub v9.16b, \in\().16b, v12.16b
53 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
54 sub v10.16b, v9.16b, v12.16b
55 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
56 sub v11.16b, v10.16b, v12.16b
57 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
58 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
61 /* apply MixColumns transformation */
62 .macro mix_columns, in
63 mul_by_x v10.16b, \in\().16b, v9.16b, v14.16b
64 rev32 v8.8h, \in\().8h
65 eor \in\().16b, v10.16b, \in\().16b
67 shl v11.4s, \in\().4s, #24
69 sri v11.4s, \in\().4s, #8
70 eor v9.16b, v9.16b, v8.16b
71 eor v10.16b, v10.16b, v9.16b
72 eor \in\().16b, v10.16b, v11.16b
75 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
76 .macro inv_mix_columns, in
77 mul_by_x v11.16b, \in\().16b, v10.16b, v14.16b
78 mul_by_x v11.16b, v11.16b, v10.16b, v14.16b
79 eor \in\().16b, \in\().16b, v11.16b
81 eor \in\().16b, \in\().16b, v11.16b
85 .macro do_block, enc, in, rounds, rk, rkp, i
89 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
90 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
92 ld1 {v15.16b}, [\rkp], #16
101 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
104 .macro encrypt_block, in, rounds, rk, rkp, i
105 do_block 1, \in, \rounds, \rk, \rkp, \i
108 .macro decrypt_block, in, rounds, rk, rkp, i
109 do_block 0, \in, \rounds, \rk, \rkp, \i
113 * Interleaved versions: functionally equivalent to the
114 * ones above, but applied to 2 or 4 AES states in parallel.
117 .macro sub_bytes_2x, in0, in1
118 sub v8.16b, \in0\().16b, v12.16b
119 sub v9.16b, \in1\().16b, v12.16b
120 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
121 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
122 sub v10.16b, v8.16b, v12.16b
123 sub v11.16b, v9.16b, v12.16b
124 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
125 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
126 sub v8.16b, v10.16b, v12.16b
127 sub v9.16b, v11.16b, v12.16b
128 tbx \in0\().16b, {v24.16b-v27.16b}, v10.16b
129 tbx \in1\().16b, {v24.16b-v27.16b}, v11.16b
130 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
131 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
134 .macro sub_bytes_4x, in0, in1, in2, in3
135 sub v8.16b, \in0\().16b, v12.16b
136 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
137 sub v9.16b, \in1\().16b, v12.16b
138 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
139 sub v10.16b, \in2\().16b, v12.16b
140 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
141 sub v11.16b, \in3\().16b, v12.16b
142 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
143 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
144 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
145 sub v8.16b, v8.16b, v12.16b
146 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
147 sub v9.16b, v9.16b, v12.16b
148 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
149 sub v10.16b, v10.16b, v12.16b
150 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
151 sub v11.16b, v11.16b, v12.16b
152 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
153 sub v8.16b, v8.16b, v12.16b
154 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
155 sub v9.16b, v9.16b, v12.16b
156 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
157 sub v10.16b, v10.16b, v12.16b
158 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
159 sub v11.16b, v11.16b, v12.16b
160 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
161 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
162 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
165 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
166 sshr \tmp0\().16b, \in0\().16b, #7
167 add \out0\().16b, \in0\().16b, \in0\().16b
168 sshr \tmp1\().16b, \in1\().16b, #7
169 and \tmp0\().16b, \tmp0\().16b, \const\().16b
170 add \out1\().16b, \in1\().16b, \in1\().16b
171 and \tmp1\().16b, \tmp1\().16b, \const\().16b
172 eor \out0\().16b, \out0\().16b, \tmp0\().16b
173 eor \out1\().16b, \out1\().16b, \tmp1\().16b
176 .macro mix_columns_2x, in0, in1
177 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
178 rev32 v10.8h, \in0\().8h
179 rev32 v11.8h, \in1\().8h
180 eor \in0\().16b, v8.16b, \in0\().16b
181 eor \in1\().16b, v9.16b, \in1\().16b
182 shl v12.4s, v10.4s, #24
183 shl v13.4s, v11.4s, #24
184 eor v8.16b, v8.16b, v10.16b
185 sri v12.4s, v10.4s, #8
186 shl v10.4s, \in0\().4s, #24
187 eor v9.16b, v9.16b, v11.16b
188 sri v13.4s, v11.4s, #8
189 shl v11.4s, \in1\().4s, #24
190 sri v10.4s, \in0\().4s, #8
191 eor \in0\().16b, v8.16b, v12.16b
192 sri v11.4s, \in1\().4s, #8
193 eor \in1\().16b, v9.16b, v13.16b
194 eor \in0\().16b, v10.16b, \in0\().16b
195 eor \in1\().16b, v11.16b, \in1\().16b
198 .macro inv_mix_cols_2x, in0, in1
199 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
200 mul_by_x_2x v8, v9, v8, v9, v10, v11, v14
201 eor \in0\().16b, \in0\().16b, v8.16b
202 eor \in1\().16b, \in1\().16b, v9.16b
205 eor \in0\().16b, \in0\().16b, v8.16b
206 eor \in1\().16b, \in1\().16b, v9.16b
207 mix_columns_2x \in0, \in1
210 .macro inv_mix_cols_4x, in0, in1, in2, in3
211 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v14
212 mul_by_x_2x v10, v11, \in2, \in3, v12, v13, v14
213 mul_by_x_2x v8, v9, v8, v9, v12, v13, v14
214 mul_by_x_2x v10, v11, v10, v11, v12, v13, v14
215 eor \in0\().16b, \in0\().16b, v8.16b
216 eor \in1\().16b, \in1\().16b, v9.16b
217 eor \in2\().16b, \in2\().16b, v10.16b
218 eor \in3\().16b, \in3\().16b, v11.16b
223 eor \in0\().16b, \in0\().16b, v8.16b
224 eor \in1\().16b, \in1\().16b, v9.16b
225 eor \in2\().16b, \in2\().16b, v10.16b
226 eor \in3\().16b, \in3\().16b, v11.16b
227 mix_columns_2x \in0, \in1
228 mix_columns_2x \in2, \in3
231 .macro do_block_2x, enc, in0, in1 rounds, rk, rkp, i
235 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
236 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
237 sub_bytes_2x \in0, \in1
238 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
239 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
240 ld1 {v15.16b}, [\rkp], #16
244 mix_columns_2x \in0, \in1
245 ldr q13, .LForward_ShiftRows
247 inv_mix_cols_2x \in0, \in1
248 ldr q13, .LReverse_ShiftRows
252 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
253 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
256 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
260 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
261 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
262 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
263 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
264 sub_bytes_4x \in0, \in1, \in2, \in3
265 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
266 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
267 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
268 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
269 ld1 {v15.16b}, [\rkp], #16
273 mix_columns_2x \in0, \in1
274 mix_columns_2x \in2, \in3
275 ldr q13, .LForward_ShiftRows
277 inv_mix_cols_4x \in0, \in1, \in2, \in3
278 ldr q13, .LReverse_ShiftRows
282 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
283 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
284 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
285 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
288 .macro encrypt_block2x, in0, in1, rounds, rk, rkp, i
289 do_block_2x 1, \in0, \in1, \rounds, \rk, \rkp, \i
292 .macro decrypt_block2x, in0, in1, rounds, rk, rkp, i
293 do_block_2x 0, \in0, \in1, \rounds, \rk, \rkp, \i
296 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
297 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
300 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
301 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
304 #include "aes-modes.S"
309 .byte 0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3
310 .byte 0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb
313 .byte 0x0, 0xd, 0xa, 0x7, 0x4, 0x1, 0xe, 0xb
314 .byte 0x8, 0x5, 0x2, 0xf, 0xc, 0x9, 0x6, 0x3
317 .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
318 .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
319 .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
320 .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
321 .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
322 .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
323 .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
324 .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
325 .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
326 .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
327 .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
328 .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
329 .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
330 .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
331 .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
332 .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
333 .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
334 .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
335 .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
336 .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
337 .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
338 .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
339 .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
340 .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
341 .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
342 .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
343 .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
344 .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
345 .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
346 .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
347 .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
348 .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
351 .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
352 .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
353 .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
354 .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
355 .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
356 .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
357 .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
358 .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
359 .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
360 .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
361 .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
362 .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
363 .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
364 .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
365 .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
366 .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
367 .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
368 .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
369 .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
370 .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
371 .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
372 .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
373 .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
374 .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
375 .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
376 .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
377 .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
378 .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
379 .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
380 .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
381 .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
382 .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d