Merge tag 'for-linus-20190706' of git://git.kernel.dk/linux-block
[linux/fpc-iii.git] / arch / arm64 / crypto / aes-neon.S
blobd261331747f212b78a8cbc4bfdea76e0956d5fe0
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
4  *
5  * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
6  */
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
11 #define AES_ENTRY(func)         ENTRY(neon_ ## func)
12 #define AES_ENDPROC(func)       ENDPROC(neon_ ## func)
14         xtsmask         .req    v7
16         .macro          xts_reload_mask, tmp
17         xts_load_mask   \tmp
18         .endm
20         /* multiply by polynomial 'x' in GF(2^8) */
21         .macro          mul_by_x, out, in, temp, const
22         sshr            \temp, \in, #7
23         shl             \out, \in, #1
24         and             \temp, \temp, \const
25         eor             \out, \out, \temp
26         .endm
28         /* multiply by polynomial 'x^2' in GF(2^8) */
29         .macro          mul_by_x2, out, in, temp, const
30         ushr            \temp, \in, #6
31         shl             \out, \in, #2
32         pmul            \temp, \temp, \const
33         eor             \out, \out, \temp
34         .endm
36         /* preload the entire Sbox */
37         .macro          prepare, sbox, shiftrows, temp
38         movi            v12.16b, #0x1b
39         ldr_l           q13, \shiftrows, \temp
40         ldr_l           q14, .Lror32by8, \temp
41         adr_l           \temp, \sbox
42         ld1             {v16.16b-v19.16b}, [\temp], #64
43         ld1             {v20.16b-v23.16b}, [\temp], #64
44         ld1             {v24.16b-v27.16b}, [\temp], #64
45         ld1             {v28.16b-v31.16b}, [\temp]
46         .endm
48         /* do preload for encryption */
49         .macro          enc_prepare, ignore0, ignore1, temp
50         prepare         .LForward_Sbox, .LForward_ShiftRows, \temp
51         .endm
53         .macro          enc_switch_key, ignore0, ignore1, temp
54         /* do nothing */
55         .endm
57         /* do preload for decryption */
58         .macro          dec_prepare, ignore0, ignore1, temp
59         prepare         .LReverse_Sbox, .LReverse_ShiftRows, \temp
60         .endm
62         /* apply SubBytes transformation using the the preloaded Sbox */
63         .macro          sub_bytes, in
64         sub             v9.16b, \in\().16b, v15.16b
65         tbl             \in\().16b, {v16.16b-v19.16b}, \in\().16b
66         sub             v10.16b, v9.16b, v15.16b
67         tbx             \in\().16b, {v20.16b-v23.16b}, v9.16b
68         sub             v11.16b, v10.16b, v15.16b
69         tbx             \in\().16b, {v24.16b-v27.16b}, v10.16b
70         tbx             \in\().16b, {v28.16b-v31.16b}, v11.16b
71         .endm
73         /* apply MixColumns transformation */
74         .macro          mix_columns, in, enc
75         .if             \enc == 0
76         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
77         mul_by_x2       v8.16b, \in\().16b, v9.16b, v12.16b
78         eor             \in\().16b, \in\().16b, v8.16b
79         rev32           v8.8h, v8.8h
80         eor             \in\().16b, \in\().16b, v8.16b
81         .endif
83         mul_by_x        v9.16b, \in\().16b, v8.16b, v12.16b
84         rev32           v8.8h, \in\().8h
85         eor             v8.16b, v8.16b, v9.16b
86         eor             \in\().16b, \in\().16b, v8.16b
87         tbl             \in\().16b, {\in\().16b}, v14.16b
88         eor             \in\().16b, \in\().16b, v8.16b
89         .endm
91         .macro          do_block, enc, in, rounds, rk, rkp, i
92         ld1             {v15.4s}, [\rk]
93         add             \rkp, \rk, #16
94         mov             \i, \rounds
95 1111:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
96         movi            v15.16b, #0x40
97         tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
98         sub_bytes       \in
99         subs            \i, \i, #1
100         ld1             {v15.4s}, [\rkp], #16
101         beq             2222f
102         mix_columns     \in, \enc
103         b               1111b
104 2222:   eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
105         .endm
107         .macro          encrypt_block, in, rounds, rk, rkp, i
108         do_block        1, \in, \rounds, \rk, \rkp, \i
109         .endm
111         .macro          decrypt_block, in, rounds, rk, rkp, i
112         do_block        0, \in, \rounds, \rk, \rkp, \i
113         .endm
115         /*
116          * Interleaved versions: functionally equivalent to the
117          * ones above, but applied to 2 or 4 AES states in parallel.
118          */
120         .macro          sub_bytes_2x, in0, in1
121         sub             v8.16b, \in0\().16b, v15.16b
122         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
123         sub             v9.16b, \in1\().16b, v15.16b
124         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
125         sub             v10.16b, v8.16b, v15.16b
126         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
127         sub             v11.16b, v9.16b, v15.16b
128         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
129         sub             v8.16b, v10.16b, v15.16b
130         tbx             \in0\().16b, {v24.16b-v27.16b}, v10.16b
131         sub             v9.16b, v11.16b, v15.16b
132         tbx             \in1\().16b, {v24.16b-v27.16b}, v11.16b
133         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
134         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
135         .endm
137         .macro          sub_bytes_4x, in0, in1, in2, in3
138         sub             v8.16b, \in0\().16b, v15.16b
139         tbl             \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
140         sub             v9.16b, \in1\().16b, v15.16b
141         tbl             \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
142         sub             v10.16b, \in2\().16b, v15.16b
143         tbl             \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
144         sub             v11.16b, \in3\().16b, v15.16b
145         tbl             \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
146         tbx             \in0\().16b, {v20.16b-v23.16b}, v8.16b
147         tbx             \in1\().16b, {v20.16b-v23.16b}, v9.16b
148         sub             v8.16b, v8.16b, v15.16b
149         tbx             \in2\().16b, {v20.16b-v23.16b}, v10.16b
150         sub             v9.16b, v9.16b, v15.16b
151         tbx             \in3\().16b, {v20.16b-v23.16b}, v11.16b
152         sub             v10.16b, v10.16b, v15.16b
153         tbx             \in0\().16b, {v24.16b-v27.16b}, v8.16b
154         sub             v11.16b, v11.16b, v15.16b
155         tbx             \in1\().16b, {v24.16b-v27.16b}, v9.16b
156         sub             v8.16b, v8.16b, v15.16b
157         tbx             \in2\().16b, {v24.16b-v27.16b}, v10.16b
158         sub             v9.16b, v9.16b, v15.16b
159         tbx             \in3\().16b, {v24.16b-v27.16b}, v11.16b
160         sub             v10.16b, v10.16b, v15.16b
161         tbx             \in0\().16b, {v28.16b-v31.16b}, v8.16b
162         sub             v11.16b, v11.16b, v15.16b
163         tbx             \in1\().16b, {v28.16b-v31.16b}, v9.16b
164         tbx             \in2\().16b, {v28.16b-v31.16b}, v10.16b
165         tbx             \in3\().16b, {v28.16b-v31.16b}, v11.16b
166         .endm
168         .macro          mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
169         sshr            \tmp0\().16b, \in0\().16b, #7
170         shl             \out0\().16b, \in0\().16b, #1
171         sshr            \tmp1\().16b, \in1\().16b, #7
172         and             \tmp0\().16b, \tmp0\().16b, \const\().16b
173         shl             \out1\().16b, \in1\().16b, #1
174         and             \tmp1\().16b, \tmp1\().16b, \const\().16b
175         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
176         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
177         .endm
179         .macro          mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
180         ushr            \tmp0\().16b, \in0\().16b, #6
181         shl             \out0\().16b, \in0\().16b, #2
182         ushr            \tmp1\().16b, \in1\().16b, #6
183         pmul            \tmp0\().16b, \tmp0\().16b, \const\().16b
184         shl             \out1\().16b, \in1\().16b, #2
185         pmul            \tmp1\().16b, \tmp1\().16b, \const\().16b
186         eor             \out0\().16b, \out0\().16b, \tmp0\().16b
187         eor             \out1\().16b, \out1\().16b, \tmp1\().16b
188         .endm
190         .macro          mix_columns_2x, in0, in1, enc
191         .if             \enc == 0
192         /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
193         mul_by_x2_2x    v8, v9, \in0, \in1, v10, v11, v12
194         eor             \in0\().16b, \in0\().16b, v8.16b
195         rev32           v8.8h, v8.8h
196         eor             \in1\().16b, \in1\().16b, v9.16b
197         rev32           v9.8h, v9.8h
198         eor             \in0\().16b, \in0\().16b, v8.16b
199         eor             \in1\().16b, \in1\().16b, v9.16b
200         .endif
202         mul_by_x_2x     v8, v9, \in0, \in1, v10, v11, v12
203         rev32           v10.8h, \in0\().8h
204         rev32           v11.8h, \in1\().8h
205         eor             v10.16b, v10.16b, v8.16b
206         eor             v11.16b, v11.16b, v9.16b
207         eor             \in0\().16b, \in0\().16b, v10.16b
208         eor             \in1\().16b, \in1\().16b, v11.16b
209         tbl             \in0\().16b, {\in0\().16b}, v14.16b
210         tbl             \in1\().16b, {\in1\().16b}, v14.16b
211         eor             \in0\().16b, \in0\().16b, v10.16b
212         eor             \in1\().16b, \in1\().16b, v11.16b
213         .endm
215         .macro          do_block_2x, enc, in0, in1, rounds, rk, rkp, i
216         ld1             {v15.4s}, [\rk]
217         add             \rkp, \rk, #16
218         mov             \i, \rounds
219 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
220         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
221         movi            v15.16b, #0x40
222         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
223         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
224         sub_bytes_2x    \in0, \in1
225         subs            \i, \i, #1
226         ld1             {v15.4s}, [\rkp], #16
227         beq             2222f
228         mix_columns_2x  \in0, \in1, \enc
229         b               1111b
230 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
231         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
232         .endm
234         .macro          do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
235         ld1             {v15.4s}, [\rk]
236         add             \rkp, \rk, #16
237         mov             \i, \rounds
238 1111:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
239         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
240         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
241         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
242         movi            v15.16b, #0x40
243         tbl             \in0\().16b, {\in0\().16b}, v13.16b     /* ShiftRows */
244         tbl             \in1\().16b, {\in1\().16b}, v13.16b     /* ShiftRows */
245         tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
246         tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
247         sub_bytes_4x    \in0, \in1, \in2, \in3
248         subs            \i, \i, #1
249         ld1             {v15.4s}, [\rkp], #16
250         beq             2222f
251         mix_columns_2x  \in0, \in1, \enc
252         mix_columns_2x  \in2, \in3, \enc
253         b               1111b
254 2222:   eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
255         eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
256         eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
257         eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
258         .endm
260         .macro          encrypt_block2x, in0, in1, rounds, rk, rkp, i
261         do_block_2x     1, \in0, \in1, \rounds, \rk, \rkp, \i
262         .endm
264         .macro          decrypt_block2x, in0, in1, rounds, rk, rkp, i
265         do_block_2x     0, \in0, \in1, \rounds, \rk, \rkp, \i
266         .endm
268         .macro          encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
269         do_block_4x     1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
270         .endm
272         .macro          decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
273         do_block_4x     0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
274         .endm
276 #include "aes-modes.S"
278         .section        ".rodata", "a"
279         .align          6
280 .LForward_Sbox:
281         .byte           0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
282         .byte           0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
283         .byte           0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
284         .byte           0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
285         .byte           0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
286         .byte           0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
287         .byte           0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
288         .byte           0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
289         .byte           0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
290         .byte           0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
291         .byte           0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
292         .byte           0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
293         .byte           0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
294         .byte           0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
295         .byte           0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
296         .byte           0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
297         .byte           0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
298         .byte           0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
299         .byte           0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
300         .byte           0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
301         .byte           0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
302         .byte           0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
303         .byte           0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
304         .byte           0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
305         .byte           0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
306         .byte           0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
307         .byte           0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
308         .byte           0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
309         .byte           0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
310         .byte           0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
311         .byte           0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
312         .byte           0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
314 .LReverse_Sbox:
315         .byte           0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
316         .byte           0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
317         .byte           0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
318         .byte           0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
319         .byte           0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
320         .byte           0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
321         .byte           0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
322         .byte           0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
323         .byte           0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
324         .byte           0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
325         .byte           0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
326         .byte           0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
327         .byte           0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
328         .byte           0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
329         .byte           0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
330         .byte           0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
331         .byte           0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
332         .byte           0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
333         .byte           0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
334         .byte           0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
335         .byte           0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
336         .byte           0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
337         .byte           0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
338         .byte           0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
339         .byte           0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
340         .byte           0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
341         .byte           0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
342         .byte           0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
343         .byte           0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
344         .byte           0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
345         .byte           0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
346         .byte           0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
348 .LForward_ShiftRows:
349         .octa           0x0b06010c07020d08030e09040f0a0500
351 .LReverse_ShiftRows:
352         .octa           0x0306090c0f0205080b0e0104070a0d00
354 .Lror32by8:
355         .octa           0x0c0f0e0d080b0a090407060500030201