2 * Implement AES algorithm in Intel AES-NI instructions.
4 * The white paper of AES-NI instructions can be downloaded from:
5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 * Vinodh Gopal <vinodh.gopal@intel.com>
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
18 #include <linux/linkage.h>
35 #define BSWAP_MASK %xmm10
52 pshufd $0b11111111, %xmm1, %xmm1
53 shufps $0b00010000, %xmm0, %xmm4
55 shufps $0b10001100, %xmm0, %xmm4
63 pshufd $0b01010101, %xmm1, %xmm1
64 shufps $0b00010000, %xmm0, %xmm4
66 shufps $0b10001100, %xmm0, %xmm4
73 pshufd $0b11111111, %xmm0, %xmm3
78 shufps $0b01000100, %xmm0, %xmm6
80 shufps $0b01001110, %xmm2, %xmm1
81 movaps %xmm1, 16(%rcx)
86 pshufd $0b01010101, %xmm1, %xmm1
87 shufps $0b00010000, %xmm0, %xmm4
89 shufps $0b10001100, %xmm0, %xmm4
95 pshufd $0b11111111, %xmm0, %xmm3
104 pshufd $0b10101010, %xmm1, %xmm1
105 shufps $0b00010000, %xmm2, %xmm4
107 shufps $0b10001100, %xmm2, %xmm4
115 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
116 * unsigned int key_len)
119 movups (%rsi), %xmm0 # user key (first 16 bytes)
121 lea 0x10(%rdi), %rcx # key addr
123 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
127 movups 0x10(%rsi), %xmm2 # other user key
130 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
131 call _key_expansion_256a
132 AESKEYGENASSIST 0x1 %xmm0 %xmm1
133 call _key_expansion_256b
134 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
135 call _key_expansion_256a
136 AESKEYGENASSIST 0x2 %xmm0 %xmm1
137 call _key_expansion_256b
138 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
139 call _key_expansion_256a
140 AESKEYGENASSIST 0x4 %xmm0 %xmm1
141 call _key_expansion_256b
142 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
143 call _key_expansion_256a
144 AESKEYGENASSIST 0x8 %xmm0 %xmm1
145 call _key_expansion_256b
146 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
147 call _key_expansion_256a
148 AESKEYGENASSIST 0x10 %xmm0 %xmm1
149 call _key_expansion_256b
150 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
151 call _key_expansion_256a
152 AESKEYGENASSIST 0x20 %xmm0 %xmm1
153 call _key_expansion_256b
154 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
155 call _key_expansion_256a
158 movq 0x10(%rsi), %xmm2 # other user key
159 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
160 call _key_expansion_192a
161 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
162 call _key_expansion_192b
163 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
164 call _key_expansion_192a
165 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
166 call _key_expansion_192b
167 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
168 call _key_expansion_192a
169 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
170 call _key_expansion_192b
171 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
172 call _key_expansion_192a
173 AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
174 call _key_expansion_192b
177 AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
178 call _key_expansion_128
179 AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
180 call _key_expansion_128
181 AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
182 call _key_expansion_128
183 AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
184 call _key_expansion_128
185 AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
186 call _key_expansion_128
187 AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
188 call _key_expansion_128
189 AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
190 call _key_expansion_128
191 AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
192 call _key_expansion_128
193 AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
194 call _key_expansion_128
195 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
196 call _key_expansion_128
201 movaps %xmm0, 240(%rcx)
202 movaps %xmm1, 240(%rdi)
204 lea 240-16(%rcx), %rsi
218 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
221 movl 480(KEYP), KLEN # key length
222 movups (INP), STATE # input
224 movups STATE, (OUTP) # output
228 * _aesni_enc1: internal ABI
230 * KEYP: key struct pointer
232 * STATE: initial state (input)
234 * STATE: finial state (output)
240 movaps (KEYP), KEY # key
242 pxor KEY, STATE # round 0
246 lea 0x20(TKEYP), TKEYP
249 movaps -0x60(TKEYP), KEY
251 movaps -0x50(TKEYP), KEY
255 movaps -0x40(TKEYP), KEY
257 movaps -0x30(TKEYP), KEY
261 movaps -0x20(TKEYP), KEY
263 movaps -0x10(TKEYP), KEY
267 movaps 0x10(TKEYP), KEY
269 movaps 0x20(TKEYP), KEY
271 movaps 0x30(TKEYP), KEY
273 movaps 0x40(TKEYP), KEY
275 movaps 0x50(TKEYP), KEY
277 movaps 0x60(TKEYP), KEY
279 movaps 0x70(TKEYP), KEY
284 * _aesni_enc4: internal ABI
286 * KEYP: key struct pointer
288 * STATE1: initial state (input)
293 * STATE1: finial state (output)
302 movaps (KEYP), KEY # key
304 pxor KEY, STATE1 # round 0
311 lea 0x20(TKEYP), TKEYP
314 movaps -0x60(TKEYP), KEY
319 movaps -0x50(TKEYP), KEY
326 movaps -0x40(TKEYP), KEY
331 movaps -0x30(TKEYP), KEY
338 movaps -0x20(TKEYP), KEY
343 movaps -0x10(TKEYP), KEY
353 movaps 0x10(TKEYP), KEY
358 movaps 0x20(TKEYP), KEY
363 movaps 0x30(TKEYP), KEY
368 movaps 0x40(TKEYP), KEY
373 movaps 0x50(TKEYP), KEY
378 movaps 0x60(TKEYP), KEY
383 movaps 0x70(TKEYP), KEY
384 AESENCLAST KEY STATE1 # last round
385 AESENCLAST KEY STATE2
386 AESENCLAST KEY STATE3
387 AESENCLAST KEY STATE4
391 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
394 mov 480(KEYP), KLEN # key length
396 movups (INP), STATE # input
398 movups STATE, (OUTP) #output
402 * _aesni_dec1: internal ABI
404 * KEYP: key struct pointer
406 * STATE: initial state (input)
408 * STATE: finial state (output)
414 movaps (KEYP), KEY # key
416 pxor KEY, STATE # round 0
420 lea 0x20(TKEYP), TKEYP
423 movaps -0x60(TKEYP), KEY
425 movaps -0x50(TKEYP), KEY
429 movaps -0x40(TKEYP), KEY
431 movaps -0x30(TKEYP), KEY
435 movaps -0x20(TKEYP), KEY
437 movaps -0x10(TKEYP), KEY
441 movaps 0x10(TKEYP), KEY
443 movaps 0x20(TKEYP), KEY
445 movaps 0x30(TKEYP), KEY
447 movaps 0x40(TKEYP), KEY
449 movaps 0x50(TKEYP), KEY
451 movaps 0x60(TKEYP), KEY
453 movaps 0x70(TKEYP), KEY
458 * _aesni_dec4: internal ABI
460 * KEYP: key struct pointer
462 * STATE1: initial state (input)
467 * STATE1: finial state (output)
476 movaps (KEYP), KEY # key
478 pxor KEY, STATE1 # round 0
485 lea 0x20(TKEYP), TKEYP
488 movaps -0x60(TKEYP), KEY
493 movaps -0x50(TKEYP), KEY
500 movaps -0x40(TKEYP), KEY
505 movaps -0x30(TKEYP), KEY
512 movaps -0x20(TKEYP), KEY
517 movaps -0x10(TKEYP), KEY
527 movaps 0x10(TKEYP), KEY
532 movaps 0x20(TKEYP), KEY
537 movaps 0x30(TKEYP), KEY
542 movaps 0x40(TKEYP), KEY
547 movaps 0x50(TKEYP), KEY
552 movaps 0x60(TKEYP), KEY
557 movaps 0x70(TKEYP), KEY
558 AESDECLAST KEY STATE1 # last round
559 AESDECLAST KEY STATE2
560 AESDECLAST KEY STATE3
561 AESDECLAST KEY STATE4
565 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
569 test LEN, LEN # check length
579 movups 0x10(INP), STATE2
580 movups 0x20(INP), STATE3
581 movups 0x30(INP), STATE4
583 movups STATE1, (OUTP)
584 movups STATE2, 0x10(OUTP)
585 movups STATE3, 0x20(OUTP)
586 movups STATE4, 0x30(OUTP)
598 movups STATE1, (OUTP)
608 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
623 movups 0x10(INP), STATE2
624 movups 0x20(INP), STATE3
625 movups 0x30(INP), STATE4
627 movups STATE1, (OUTP)
628 movups STATE2, 0x10(OUTP)
629 movups STATE3, 0x20(OUTP)
630 movups STATE4, 0x30(OUTP)
642 movups STATE1, (OUTP)
652 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
653 * size_t len, u8 *iv)
659 movups (IVP), STATE # load iv as initial state
662 movups (INP), IN # load input
665 movups STATE, (OUTP) # store output
676 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
677 * size_t len, u8 *iv)
681 jb .Lcbc_dec_just_ret
691 movups 0x10(INP), IN2
693 movups 0x20(INP), IN3
695 movups 0x30(INP), IN4
703 movups STATE1, (OUTP)
704 movups STATE2, 0x10(OUTP)
705 movups STATE3, 0x20(OUTP)
706 movups STATE4, 0x30(OUTP)
734 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
737 * _aesni_inc_init: internal ABI
738 * setup registers used by _aesni_inc
742 * CTR: == IV, in little endian
743 * TCTR_LOW: == lower qword of CTR
744 * INC: == 1, in little endian
745 * BSWAP_MASK == endian swapping mask
748 movaps .Lbswap_mask, BSWAP_MASK
750 PSHUFB_XMM BSWAP_MASK CTR
752 MOVQ_R64_XMM TCTR_LOW INC
753 MOVQ_R64_XMM CTR TCTR_LOW
757 * _aesni_inc: internal ABI
758 * Increase IV by 1, IV is in big endian
761 * CTR: == IV, in little endian
762 * TCTR_LOW: == lower qword of CTR
763 * INC: == 1, in little endian
764 * BSWAP_MASK == endian swapping mask
768 * CTR: == output IV, in little endian
769 * TCTR_LOW: == lower qword of CTR
780 PSHUFB_XMM BSWAP_MASK IV
784 * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
785 * size_t len, u8 *iv)
789 jb .Lctr_enc_just_ret
802 movups 0x10(INP), IN2
805 movups 0x20(INP), IN3
808 movups 0x30(INP), IN4
811 movups STATE1, (OUTP)
813 movups STATE2, 0x10(OUTP)
815 movups STATE3, 0x20(OUTP)
817 movups STATE4, 0x30(OUTP)