1 /* SPDX-License-Identifier: GPL-2.0-or-later */
3 * Implement AES algorithm in Intel AES-NI instructions.
5 * The white paper of AES-NI instructions can be downloaded from:
6 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
8 * Copyright (C) 2008, Intel Corp.
9 * Author: Huang Ying <ying.huang@intel.com>
10 * Vinodh Gopal <vinodh.gopal@intel.com>
13 * Copyright (c) 2010, Intel Corporation.
15 * Ported x86_64 version to x86:
16 * Author: Mathias Krause <minipli@googlemail.com>
19 #include <linux/linkage.h>
20 #include <asm/frame.h>
35 #define BSWAP_MASK %xmm10
39 #define GF128MUL_MASK %xmm7
67 SYM_FUNC_START_LOCAL(_key_expansion_256a)
68 pshufd $0b11111111, %xmm1, %xmm1
69 shufps $0b00010000, %xmm0, %xmm4
71 shufps $0b10001100, %xmm0, %xmm4
77 SYM_FUNC_END(_key_expansion_256a)
78 SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a)
80 SYM_FUNC_START_LOCAL(_key_expansion_192a)
81 pshufd $0b01010101, %xmm1, %xmm1
82 shufps $0b00010000, %xmm0, %xmm4
84 shufps $0b10001100, %xmm0, %xmm4
91 pshufd $0b11111111, %xmm0, %xmm3
96 shufps $0b01000100, %xmm0, %xmm6
98 shufps $0b01001110, %xmm2, %xmm1
99 movaps %xmm1, 0x10(TKEYP)
102 SYM_FUNC_END(_key_expansion_192a)
104 SYM_FUNC_START_LOCAL(_key_expansion_192b)
105 pshufd $0b01010101, %xmm1, %xmm1
106 shufps $0b00010000, %xmm0, %xmm4
108 shufps $0b10001100, %xmm0, %xmm4
114 pshufd $0b11111111, %xmm0, %xmm3
118 movaps %xmm0, (TKEYP)
121 SYM_FUNC_END(_key_expansion_192b)
123 SYM_FUNC_START_LOCAL(_key_expansion_256b)
124 pshufd $0b10101010, %xmm1, %xmm1
125 shufps $0b00010000, %xmm2, %xmm4
127 shufps $0b10001100, %xmm2, %xmm4
130 movaps %xmm2, (TKEYP)
133 SYM_FUNC_END(_key_expansion_256b)
136 * void aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
137 * unsigned int key_len)
139 SYM_FUNC_START(aesni_set_key)
143 movl (FRAME_OFFSET+8)(%esp), KEYP # ctx
144 movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key
145 movl (FRAME_OFFSET+16)(%esp), %edx # key_len
147 movups (UKEYP), %xmm0 # user key (first 16 bytes)
149 lea 0x10(KEYP), TKEYP # key addr
151 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
155 movups 0x10(UKEYP), %xmm2 # other user key
156 movaps %xmm2, (TKEYP)
158 aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
159 call _key_expansion_256a
160 aeskeygenassist $0x1, %xmm0, %xmm1
161 call _key_expansion_256b
162 aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
163 call _key_expansion_256a
164 aeskeygenassist $0x2, %xmm0, %xmm1
165 call _key_expansion_256b
166 aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
167 call _key_expansion_256a
168 aeskeygenassist $0x4, %xmm0, %xmm1
169 call _key_expansion_256b
170 aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
171 call _key_expansion_256a
172 aeskeygenassist $0x8, %xmm0, %xmm1
173 call _key_expansion_256b
174 aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
175 call _key_expansion_256a
176 aeskeygenassist $0x10, %xmm0, %xmm1
177 call _key_expansion_256b
178 aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
179 call _key_expansion_256a
180 aeskeygenassist $0x20, %xmm0, %xmm1
181 call _key_expansion_256b
182 aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
183 call _key_expansion_256a
186 movq 0x10(UKEYP), %xmm2 # other user key
187 aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
188 call _key_expansion_192a
189 aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
190 call _key_expansion_192b
191 aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
192 call _key_expansion_192a
193 aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
194 call _key_expansion_192b
195 aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
196 call _key_expansion_192a
197 aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
198 call _key_expansion_192b
199 aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
200 call _key_expansion_192a
201 aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
202 call _key_expansion_192b
205 aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
206 call _key_expansion_128
207 aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
208 call _key_expansion_128
209 aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
210 call _key_expansion_128
211 aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
212 call _key_expansion_128
213 aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
214 call _key_expansion_128
215 aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
216 call _key_expansion_128
217 aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
218 call _key_expansion_128
219 aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
220 call _key_expansion_128
221 aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
222 call _key_expansion_128
223 aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
224 call _key_expansion_128
228 movaps (TKEYP), %xmm1
229 movaps %xmm0, 240(TKEYP)
230 movaps %xmm1, 240(KEYP)
232 lea 240-16(TKEYP), UKEYP
237 movaps %xmm1, (UKEYP)
247 SYM_FUNC_END(aesni_set_key)
250 * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
252 SYM_FUNC_START(aesni_enc)
257 movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
258 movl (FRAME_OFFSET+16)(%esp), OUTP # dst
259 movl (FRAME_OFFSET+20)(%esp), INP # src
261 movl 480(KEYP), KLEN # key length
262 movups (INP), STATE # input
264 movups STATE, (OUTP) # output
271 SYM_FUNC_END(aesni_enc)
274 * _aesni_enc1: internal ABI
276 * KEYP: key struct pointer
278 * STATE: initial state (input)
280 * STATE: finial state (output)
285 SYM_FUNC_START_LOCAL(_aesni_enc1)
286 movaps (KEYP), KEY # key
288 pxor KEY, STATE # round 0
292 lea 0x20(TKEYP), TKEYP
295 movaps -0x60(TKEYP), KEY
297 movaps -0x50(TKEYP), KEY
301 movaps -0x40(TKEYP), KEY
303 movaps -0x30(TKEYP), KEY
307 movaps -0x20(TKEYP), KEY
309 movaps -0x10(TKEYP), KEY
313 movaps 0x10(TKEYP), KEY
315 movaps 0x20(TKEYP), KEY
317 movaps 0x30(TKEYP), KEY
319 movaps 0x40(TKEYP), KEY
321 movaps 0x50(TKEYP), KEY
323 movaps 0x60(TKEYP), KEY
325 movaps 0x70(TKEYP), KEY
326 aesenclast KEY, STATE
328 SYM_FUNC_END(_aesni_enc1)
331 * _aesni_enc4: internal ABI
333 * KEYP: key struct pointer
335 * STATE1: initial state (input)
340 * STATE1: finial state (output)
348 SYM_FUNC_START_LOCAL(_aesni_enc4)
349 movaps (KEYP), KEY # key
351 pxor KEY, STATE1 # round 0
358 lea 0x20(TKEYP), TKEYP
361 movaps -0x60(TKEYP), KEY
366 movaps -0x50(TKEYP), KEY
373 movaps -0x40(TKEYP), KEY
378 movaps -0x30(TKEYP), KEY
385 movaps -0x20(TKEYP), KEY
390 movaps -0x10(TKEYP), KEY
400 movaps 0x10(TKEYP), KEY
405 movaps 0x20(TKEYP), KEY
410 movaps 0x30(TKEYP), KEY
415 movaps 0x40(TKEYP), KEY
420 movaps 0x50(TKEYP), KEY
425 movaps 0x60(TKEYP), KEY
430 movaps 0x70(TKEYP), KEY
431 aesenclast KEY, STATE1 # last round
432 aesenclast KEY, STATE2
433 aesenclast KEY, STATE3
434 aesenclast KEY, STATE4
436 SYM_FUNC_END(_aesni_enc4)
439 * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
441 SYM_FUNC_START(aesni_dec)
446 movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
447 movl (FRAME_OFFSET+16)(%esp), OUTP # dst
448 movl (FRAME_OFFSET+20)(%esp), INP # src
450 mov 480(KEYP), KLEN # key length
452 movups (INP), STATE # input
454 movups STATE, (OUTP) #output
461 SYM_FUNC_END(aesni_dec)
464 * _aesni_dec1: internal ABI
466 * KEYP: key struct pointer
468 * STATE: initial state (input)
470 * STATE: finial state (output)
475 SYM_FUNC_START_LOCAL(_aesni_dec1)
476 movaps (KEYP), KEY # key
478 pxor KEY, STATE # round 0
482 lea 0x20(TKEYP), TKEYP
485 movaps -0x60(TKEYP), KEY
487 movaps -0x50(TKEYP), KEY
491 movaps -0x40(TKEYP), KEY
493 movaps -0x30(TKEYP), KEY
497 movaps -0x20(TKEYP), KEY
499 movaps -0x10(TKEYP), KEY
503 movaps 0x10(TKEYP), KEY
505 movaps 0x20(TKEYP), KEY
507 movaps 0x30(TKEYP), KEY
509 movaps 0x40(TKEYP), KEY
511 movaps 0x50(TKEYP), KEY
513 movaps 0x60(TKEYP), KEY
515 movaps 0x70(TKEYP), KEY
516 aesdeclast KEY, STATE
518 SYM_FUNC_END(_aesni_dec1)
521 * _aesni_dec4: internal ABI
523 * KEYP: key struct pointer
525 * STATE1: initial state (input)
530 * STATE1: finial state (output)
538 SYM_FUNC_START_LOCAL(_aesni_dec4)
539 movaps (KEYP), KEY # key
541 pxor KEY, STATE1 # round 0
548 lea 0x20(TKEYP), TKEYP
551 movaps -0x60(TKEYP), KEY
556 movaps -0x50(TKEYP), KEY
563 movaps -0x40(TKEYP), KEY
568 movaps -0x30(TKEYP), KEY
575 movaps -0x20(TKEYP), KEY
580 movaps -0x10(TKEYP), KEY
590 movaps 0x10(TKEYP), KEY
595 movaps 0x20(TKEYP), KEY
600 movaps 0x30(TKEYP), KEY
605 movaps 0x40(TKEYP), KEY
610 movaps 0x50(TKEYP), KEY
615 movaps 0x60(TKEYP), KEY
620 movaps 0x70(TKEYP), KEY
621 aesdeclast KEY, STATE1 # last round
622 aesdeclast KEY, STATE2
623 aesdeclast KEY, STATE3
624 aesdeclast KEY, STATE4
626 SYM_FUNC_END(_aesni_dec4)
629 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
632 SYM_FUNC_START(aesni_ecb_enc)
638 movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
639 movl (FRAME_OFFSET+20)(%esp), OUTP # dst
640 movl (FRAME_OFFSET+24)(%esp), INP # src
641 movl (FRAME_OFFSET+28)(%esp), LEN # len
643 test LEN, LEN # check length
653 movups 0x10(INP), STATE2
654 movups 0x20(INP), STATE3
655 movups 0x30(INP), STATE4
657 movups STATE1, (OUTP)
658 movups STATE2, 0x10(OUTP)
659 movups STATE3, 0x20(OUTP)
660 movups STATE4, 0x30(OUTP)
672 movups STATE1, (OUTP)
686 SYM_FUNC_END(aesni_ecb_enc)
689 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
692 SYM_FUNC_START(aesni_ecb_dec)
698 movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
699 movl (FRAME_OFFSET+20)(%esp), OUTP # dst
700 movl (FRAME_OFFSET+24)(%esp), INP # src
701 movl (FRAME_OFFSET+28)(%esp), LEN # len
714 movups 0x10(INP), STATE2
715 movups 0x20(INP), STATE3
716 movups 0x30(INP), STATE4
718 movups STATE1, (OUTP)
719 movups STATE2, 0x10(OUTP)
720 movups STATE3, 0x20(OUTP)
721 movups STATE4, 0x30(OUTP)
733 movups STATE1, (OUTP)
747 SYM_FUNC_END(aesni_ecb_dec)
750 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
751 * size_t len, u8 *iv)
753 SYM_FUNC_START(aesni_cbc_enc)
760 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
761 movl (FRAME_OFFSET+24)(%esp), OUTP # dst
762 movl (FRAME_OFFSET+28)(%esp), INP # src
763 movl (FRAME_OFFSET+32)(%esp), LEN # len
764 movl (FRAME_OFFSET+36)(%esp), IVP # iv
769 movups (IVP), STATE # load iv as initial state
772 movups (INP), IN # load input
775 movups STATE, (OUTP) # store output
791 SYM_FUNC_END(aesni_cbc_enc)
794 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
795 * size_t len, u8 *iv)
797 SYM_FUNC_START(aesni_cbc_dec)
804 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
805 movl (FRAME_OFFSET+24)(%esp), OUTP # dst
806 movl (FRAME_OFFSET+28)(%esp), INP # src
807 movl (FRAME_OFFSET+32)(%esp), LEN # len
808 movl (FRAME_OFFSET+36)(%esp), IVP # iv
811 jb .Lcbc_dec_just_ret
821 movups 0x10(INP), IN2
824 movups 0x20(INP), IN3
826 movups 0x30(INP), IN4
829 movups 0x20(INP), IN1
831 movups 0x30(INP), IN2
846 movups 0x10(INP), IN2
849 movups STATE1, (OUTP)
850 movups STATE2, 0x10(OUTP)
851 movups STATE3, 0x20(OUTP)
852 movups STATE4, 0x30(OUTP)
884 SYM_FUNC_END(aesni_cbc_dec)
887 * void aesni_cts_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
888 * size_t len, u8 *iv)
890 SYM_FUNC_START(aesni_cts_cbc_enc)
897 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
898 movl (FRAME_OFFSET+24)(%esp), OUTP # dst
899 movl (FRAME_OFFSET+28)(%esp), INP # src
900 movl (FRAME_OFFSET+32)(%esp), LEN # len
901 movl (FRAME_OFFSET+36)(%esp), IVP # iv
902 lea .Lcts_permute_table, T1
904 lea .Lcts_permute_table(%rip), T1
941 SYM_FUNC_END(aesni_cts_cbc_enc)
944 * void aesni_cts_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
945 * size_t len, u8 *iv)
947 SYM_FUNC_START(aesni_cts_cbc_dec)
954 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
955 movl (FRAME_OFFSET+24)(%esp), OUTP # dst
956 movl (FRAME_OFFSET+28)(%esp), INP # src
957 movl (FRAME_OFFSET+32)(%esp), LEN # len
958 movl (FRAME_OFFSET+36)(%esp), IVP # iv
959 lea .Lcts_permute_table, T1
961 lea .Lcts_permute_table(%rip), T1
1002 SYM_FUNC_END(aesni_cts_cbc_dec)
1004 .pushsection .rodata
1006 .Lcts_permute_table:
1007 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
1008 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
1009 .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
1010 .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
1011 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
1012 .byte 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
1015 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
1021 * _aesni_inc_init: internal ABI
1022 * setup registers used by _aesni_inc
1026 * CTR: == IV, in little endian
1027 * TCTR_LOW: == lower qword of CTR
1028 * INC: == 1, in little endian
1029 * BSWAP_MASK == endian swapping mask
1031 SYM_FUNC_START_LOCAL(_aesni_inc_init)
1032 movaps .Lbswap_mask(%rip), BSWAP_MASK
1034 pshufb BSWAP_MASK, CTR
1039 SYM_FUNC_END(_aesni_inc_init)
1042 * _aesni_inc: internal ABI
1043 * Increase IV by 1, IV is in big endian
1046 * CTR: == IV, in little endian
1047 * TCTR_LOW: == lower qword of CTR
1048 * INC: == 1, in little endian
1049 * BSWAP_MASK == endian swapping mask
1053 * CTR: == output IV, in little endian
1054 * TCTR_LOW: == lower qword of CTR
1056 SYM_FUNC_START_LOCAL(_aesni_inc)
1065 pshufb BSWAP_MASK, IV
1067 SYM_FUNC_END(_aesni_inc)
1070 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
1071 * size_t len, u8 *iv)
1073 SYM_FUNC_START(aesni_ctr_enc)
1076 jb .Lctr_enc_just_ret
1079 call _aesni_inc_init
1089 movups 0x10(INP), IN2
1092 movups 0x20(INP), IN3
1095 movups 0x30(INP), IN4
1098 movups STATE1, (OUTP)
1100 movups STATE2, 0x10(OUTP)
1102 movups STATE3, 0x20(OUTP)
1104 movups STATE4, 0x30(OUTP)
1119 movups STATE, (OUTP)
1130 SYM_FUNC_END(aesni_ctr_enc)
1134 .section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
1136 .Lgf128mul_x_ble_mask:
1137 .octa 0x00000000000000010000000000000087
1141 * _aesni_gf128mul_x_ble: Multiply in GF(2^128) for XTS IVs
1144 * GF128MUL_MASK == mask with 0x87 and 0x01
1148 * KEY: == temporary value
1150 .macro _aesni_gf128mul_x_ble
1151 pshufd $0x13, IV, KEY
1154 pand GF128MUL_MASK, KEY
1158 .macro _aesni_xts_crypt enc
1165 movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
1166 movl (FRAME_OFFSET+24)(%esp), OUTP # dst
1167 movl (FRAME_OFFSET+28)(%esp), INP # src
1168 movl (FRAME_OFFSET+32)(%esp), LEN # len
1169 movl (FRAME_OFFSET+36)(%esp), IVP # iv
1170 movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
1172 movdqa .Lgf128mul_x_ble_mask(%rip), GF128MUL_MASK
1190 movdqu 0x00(INP), IN
1192 movdqu IV, 0x00(OUTP)
1194 _aesni_gf128mul_x_ble
1196 movdqu 0x10(INP), IN
1198 movdqu IV, 0x10(OUTP)
1200 _aesni_gf128mul_x_ble
1202 movdqu 0x20(INP), IN
1204 movdqu IV, 0x20(OUTP)
1206 _aesni_gf128mul_x_ble
1208 movdqu 0x30(INP), IN
1210 movdqu IV, 0x30(OUTP)
1218 movdqu 0x00(OUTP), IN
1220 movdqu STATE1, 0x00(OUTP)
1222 movdqu 0x10(OUTP), IN
1224 movdqu STATE2, 0x10(OUTP)
1226 movdqu 0x20(OUTP), IN
1228 movdqu STATE3, 0x20(OUTP)
1230 movdqu 0x30(OUTP), IN
1232 movdqu STATE4, 0x30(OUTP)
1234 _aesni_gf128mul_x_ble
1275 _aesni_gf128mul_x_ble
1286 movdqu STATE, (OUTP)
1291 movdqu STATE, (OUTP)
1296 movdqa STATE4, STATE
1302 _aesni_gf128mul_x_ble
1309 lea .Lcts_permute_table, T1
1311 lea .Lcts_permute_table(%rip), T1
1313 add LEN, INP /* rewind input pointer */
1314 add $16, LEN /* # bytes in final block */
1343 movups STATE, (OUTP)
1348 * void aesni_xts_enc(const struct crypto_aes_ctx *ctx, u8 *dst,
1349 * const u8 *src, unsigned int len, le128 *iv)
1351 SYM_FUNC_START(aesni_xts_enc)
1353 SYM_FUNC_END(aesni_xts_enc)
1356 * void aesni_xts_dec(const struct crypto_aes_ctx *ctx, u8 *dst,
1357 * const u8 *src, unsigned int len, le128 *iv)
1359 SYM_FUNC_START(aesni_xts_dec)
1361 SYM_FUNC_END(aesni_xts_dec)