1 /* SPDX-License-Identifier: GPL-2.0-only */
3 * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
5 * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
8 #include <linux/linkage.h>
9 #include <asm/assembler.h>
11 #define AES_ENTRY(func) SYM_FUNC_START(neon_ ## func)
12 #define AES_ENDPROC(func) SYM_FUNC_END(neon_ ## func)
18 .macro xts_reload_mask, tmp
22 /* special case for the neon-bs driver calling into this one for CTS */
23 .macro xts_cts_skip_tw, reg, lbl
27 /* multiply by polynomial 'x' in GF(2^8) */
28 .macro mul_by_x, out, in, temp, const
31 and \temp, \temp, \const
35 /* multiply by polynomial 'x^2' in GF(2^8) */
36 .macro mul_by_x2, out, in, temp, const
39 pmul \temp, \temp, \const
43 /* preload the entire Sbox */
44 .macro prepare, sbox, shiftrows, temp
46 ldr_l q13, \shiftrows, \temp
47 ldr_l q14, .Lror32by8, \temp
49 ld1 {v16.16b-v19.16b}, [\temp], #64
50 ld1 {v20.16b-v23.16b}, [\temp], #64
51 ld1 {v24.16b-v27.16b}, [\temp], #64
52 ld1 {v28.16b-v31.16b}, [\temp]
55 /* do preload for encryption */
56 .macro enc_prepare, ignore0, ignore1, temp
57 prepare crypto_aes_sbox, .LForward_ShiftRows, \temp
60 .macro enc_switch_key, ignore0, ignore1, temp
64 /* do preload for decryption */
65 .macro dec_prepare, ignore0, ignore1, temp
66 prepare crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
69 /* apply SubBytes transformation using the the preloaded Sbox */
71 sub v9.16b, \in\().16b, v15.16b
72 tbl \in\().16b, {v16.16b-v19.16b}, \in\().16b
73 sub v10.16b, v9.16b, v15.16b
74 tbx \in\().16b, {v20.16b-v23.16b}, v9.16b
75 sub v11.16b, v10.16b, v15.16b
76 tbx \in\().16b, {v24.16b-v27.16b}, v10.16b
77 tbx \in\().16b, {v28.16b-v31.16b}, v11.16b
80 /* apply MixColumns transformation */
81 .macro mix_columns, in, enc
83 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
84 mul_by_x2 v8.16b, \in\().16b, v9.16b, v12.16b
85 eor \in\().16b, \in\().16b, v8.16b
87 eor \in\().16b, \in\().16b, v8.16b
90 mul_by_x v9.16b, \in\().16b, v8.16b, v12.16b
91 rev32 v8.8h, \in\().8h
92 eor v8.16b, v8.16b, v9.16b
93 eor \in\().16b, \in\().16b, v8.16b
94 tbl \in\().16b, {\in\().16b}, v14.16b
95 eor \in\().16b, \in\().16b, v8.16b
98 .macro do_block, enc, in, rounds, rk, rkp, i
102 1111: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
104 tbl \in\().16b, {\in\().16b}, v13.16b /* ShiftRows */
107 ld1 {v15.4s}, [\rkp], #16
109 mix_columns \in, \enc
111 2222: eor \in\().16b, \in\().16b, v15.16b /* ^round key */
114 .macro encrypt_block, in, rounds, rk, rkp, i
115 do_block 1, \in, \rounds, \rk, \rkp, \i
118 .macro decrypt_block, in, rounds, rk, rkp, i
119 do_block 0, \in, \rounds, \rk, \rkp, \i
123 * Interleaved versions: functionally equivalent to the
124 * ones above, but applied to AES states in parallel.
127 .macro sub_bytes_4x, in0, in1, in2, in3
128 sub v8.16b, \in0\().16b, v15.16b
129 tbl \in0\().16b, {v16.16b-v19.16b}, \in0\().16b
130 sub v9.16b, \in1\().16b, v15.16b
131 tbl \in1\().16b, {v16.16b-v19.16b}, \in1\().16b
132 sub v10.16b, \in2\().16b, v15.16b
133 tbl \in2\().16b, {v16.16b-v19.16b}, \in2\().16b
134 sub v11.16b, \in3\().16b, v15.16b
135 tbl \in3\().16b, {v16.16b-v19.16b}, \in3\().16b
136 tbx \in0\().16b, {v20.16b-v23.16b}, v8.16b
137 tbx \in1\().16b, {v20.16b-v23.16b}, v9.16b
138 sub v8.16b, v8.16b, v15.16b
139 tbx \in2\().16b, {v20.16b-v23.16b}, v10.16b
140 sub v9.16b, v9.16b, v15.16b
141 tbx \in3\().16b, {v20.16b-v23.16b}, v11.16b
142 sub v10.16b, v10.16b, v15.16b
143 tbx \in0\().16b, {v24.16b-v27.16b}, v8.16b
144 sub v11.16b, v11.16b, v15.16b
145 tbx \in1\().16b, {v24.16b-v27.16b}, v9.16b
146 sub v8.16b, v8.16b, v15.16b
147 tbx \in2\().16b, {v24.16b-v27.16b}, v10.16b
148 sub v9.16b, v9.16b, v15.16b
149 tbx \in3\().16b, {v24.16b-v27.16b}, v11.16b
150 sub v10.16b, v10.16b, v15.16b
151 tbx \in0\().16b, {v28.16b-v31.16b}, v8.16b
152 sub v11.16b, v11.16b, v15.16b
153 tbx \in1\().16b, {v28.16b-v31.16b}, v9.16b
154 tbx \in2\().16b, {v28.16b-v31.16b}, v10.16b
155 tbx \in3\().16b, {v28.16b-v31.16b}, v11.16b
158 .macro mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
159 sshr \tmp0\().16b, \in0\().16b, #7
160 shl \out0\().16b, \in0\().16b, #1
161 sshr \tmp1\().16b, \in1\().16b, #7
162 and \tmp0\().16b, \tmp0\().16b, \const\().16b
163 shl \out1\().16b, \in1\().16b, #1
164 and \tmp1\().16b, \tmp1\().16b, \const\().16b
165 eor \out0\().16b, \out0\().16b, \tmp0\().16b
166 eor \out1\().16b, \out1\().16b, \tmp1\().16b
169 .macro mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
170 ushr \tmp0\().16b, \in0\().16b, #6
171 shl \out0\().16b, \in0\().16b, #2
172 ushr \tmp1\().16b, \in1\().16b, #6
173 pmul \tmp0\().16b, \tmp0\().16b, \const\().16b
174 shl \out1\().16b, \in1\().16b, #2
175 pmul \tmp1\().16b, \tmp1\().16b, \const\().16b
176 eor \out0\().16b, \out0\().16b, \tmp0\().16b
177 eor \out1\().16b, \out1\().16b, \tmp1\().16b
180 .macro mix_columns_2x, in0, in1, enc
182 /* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
183 mul_by_x2_2x v8, v9, \in0, \in1, v10, v11, v12
184 eor \in0\().16b, \in0\().16b, v8.16b
186 eor \in1\().16b, \in1\().16b, v9.16b
188 eor \in0\().16b, \in0\().16b, v8.16b
189 eor \in1\().16b, \in1\().16b, v9.16b
192 mul_by_x_2x v8, v9, \in0, \in1, v10, v11, v12
193 rev32 v10.8h, \in0\().8h
194 rev32 v11.8h, \in1\().8h
195 eor v10.16b, v10.16b, v8.16b
196 eor v11.16b, v11.16b, v9.16b
197 eor \in0\().16b, \in0\().16b, v10.16b
198 eor \in1\().16b, \in1\().16b, v11.16b
199 tbl \in0\().16b, {\in0\().16b}, v14.16b
200 tbl \in1\().16b, {\in1\().16b}, v14.16b
201 eor \in0\().16b, \in0\().16b, v10.16b
202 eor \in1\().16b, \in1\().16b, v11.16b
205 .macro do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
209 1111: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
210 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
211 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
212 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
214 tbl \in0\().16b, {\in0\().16b}, v13.16b /* ShiftRows */
215 tbl \in1\().16b, {\in1\().16b}, v13.16b /* ShiftRows */
216 tbl \in2\().16b, {\in2\().16b}, v13.16b /* ShiftRows */
217 tbl \in3\().16b, {\in3\().16b}, v13.16b /* ShiftRows */
218 sub_bytes_4x \in0, \in1, \in2, \in3
220 ld1 {v15.4s}, [\rkp], #16
222 mix_columns_2x \in0, \in1, \enc
223 mix_columns_2x \in2, \in3, \enc
225 2222: eor \in0\().16b, \in0\().16b, v15.16b /* ^round key */
226 eor \in1\().16b, \in1\().16b, v15.16b /* ^round key */
227 eor \in2\().16b, \in2\().16b, v15.16b /* ^round key */
228 eor \in3\().16b, \in3\().16b, v15.16b /* ^round key */
231 .macro encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
232 do_block_4x 1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
235 .macro decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
236 do_block_4x 0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
239 #include "aes-modes.S"
241 .section ".rodata", "a"
244 .octa 0x0b06010c07020d08030e09040f0a0500
247 .octa 0x0306090c0f0205080b0e0104070a0d00
250 .octa 0x0c0f0e0d080b0a090407060500030201