2 * Implement AES algorithm in Intel AES-NI instructions.
4 * The white paper of AES-NI instructions can be downloaded from:
5 * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
7 * Copyright (C) 2008, Intel Corp.
8 * Author: Huang Ying <ying.huang@intel.com>
9 * Vinodh Gopal <vinodh.gopal@intel.com>
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
18 #include <linux/linkage.h>
48 pshufd $0b11111111, %xmm1, %xmm1
49 shufps $0b00010000, %xmm0, %xmm4
51 shufps $0b10001100, %xmm0, %xmm4
59 pshufd $0b01010101, %xmm1, %xmm1
60 shufps $0b00010000, %xmm0, %xmm4
62 shufps $0b10001100, %xmm0, %xmm4
69 pshufd $0b11111111, %xmm0, %xmm3
74 shufps $0b01000100, %xmm0, %xmm6
76 shufps $0b01001110, %xmm2, %xmm1
77 movaps %xmm1, 16(%rcx)
82 pshufd $0b01010101, %xmm1, %xmm1
83 shufps $0b00010000, %xmm0, %xmm4
85 shufps $0b10001100, %xmm0, %xmm4
91 pshufd $0b11111111, %xmm0, %xmm3
100 pshufd $0b10101010, %xmm1, %xmm1
101 shufps $0b00010000, %xmm2, %xmm4
103 shufps $0b10001100, %xmm2, %xmm4
111 * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
112 * unsigned int key_len)
115 movups (%rsi), %xmm0 # user key (first 16 bytes)
117 lea 0x10(%rdi), %rcx # key addr
119 pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
123 movups 0x10(%rsi), %xmm2 # other user key
126 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
127 call _key_expansion_256a
128 AESKEYGENASSIST 0x1 %xmm0 %xmm1
129 call _key_expansion_256b
130 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
131 call _key_expansion_256a
132 AESKEYGENASSIST 0x2 %xmm0 %xmm1
133 call _key_expansion_256b
134 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
135 call _key_expansion_256a
136 AESKEYGENASSIST 0x4 %xmm0 %xmm1
137 call _key_expansion_256b
138 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
139 call _key_expansion_256a
140 AESKEYGENASSIST 0x8 %xmm0 %xmm1
141 call _key_expansion_256b
142 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
143 call _key_expansion_256a
144 AESKEYGENASSIST 0x10 %xmm0 %xmm1
145 call _key_expansion_256b
146 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
147 call _key_expansion_256a
148 AESKEYGENASSIST 0x20 %xmm0 %xmm1
149 call _key_expansion_256b
150 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
151 call _key_expansion_256a
154 movq 0x10(%rsi), %xmm2 # other user key
155 AESKEYGENASSIST 0x1 %xmm2 %xmm1 # round 1
156 call _key_expansion_192a
157 AESKEYGENASSIST 0x2 %xmm2 %xmm1 # round 2
158 call _key_expansion_192b
159 AESKEYGENASSIST 0x4 %xmm2 %xmm1 # round 3
160 call _key_expansion_192a
161 AESKEYGENASSIST 0x8 %xmm2 %xmm1 # round 4
162 call _key_expansion_192b
163 AESKEYGENASSIST 0x10 %xmm2 %xmm1 # round 5
164 call _key_expansion_192a
165 AESKEYGENASSIST 0x20 %xmm2 %xmm1 # round 6
166 call _key_expansion_192b
167 AESKEYGENASSIST 0x40 %xmm2 %xmm1 # round 7
168 call _key_expansion_192a
169 AESKEYGENASSIST 0x80 %xmm2 %xmm1 # round 8
170 call _key_expansion_192b
173 AESKEYGENASSIST 0x1 %xmm0 %xmm1 # round 1
174 call _key_expansion_128
175 AESKEYGENASSIST 0x2 %xmm0 %xmm1 # round 2
176 call _key_expansion_128
177 AESKEYGENASSIST 0x4 %xmm0 %xmm1 # round 3
178 call _key_expansion_128
179 AESKEYGENASSIST 0x8 %xmm0 %xmm1 # round 4
180 call _key_expansion_128
181 AESKEYGENASSIST 0x10 %xmm0 %xmm1 # round 5
182 call _key_expansion_128
183 AESKEYGENASSIST 0x20 %xmm0 %xmm1 # round 6
184 call _key_expansion_128
185 AESKEYGENASSIST 0x40 %xmm0 %xmm1 # round 7
186 call _key_expansion_128
187 AESKEYGENASSIST 0x80 %xmm0 %xmm1 # round 8
188 call _key_expansion_128
189 AESKEYGENASSIST 0x1b %xmm0 %xmm1 # round 9
190 call _key_expansion_128
191 AESKEYGENASSIST 0x36 %xmm0 %xmm1 # round 10
192 call _key_expansion_128
197 movaps %xmm0, 240(%rcx)
198 movaps %xmm1, 240(%rdi)
200 lea 240-16(%rcx), %rsi
214 * void aesni_enc(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
217 movl 480(KEYP), KLEN # key length
218 movups (INP), STATE # input
220 movups STATE, (OUTP) # output
224 * _aesni_enc1: internal ABI
226 * KEYP: key struct pointer
228 * STATE: initial state (input)
230 * STATE: finial state (output)
236 movaps (KEYP), KEY # key
238 pxor KEY, STATE # round 0
242 lea 0x20(TKEYP), TKEYP
245 movaps -0x60(TKEYP), KEY
247 movaps -0x50(TKEYP), KEY
251 movaps -0x40(TKEYP), KEY
253 movaps -0x30(TKEYP), KEY
257 movaps -0x20(TKEYP), KEY
259 movaps -0x10(TKEYP), KEY
263 movaps 0x10(TKEYP), KEY
265 movaps 0x20(TKEYP), KEY
267 movaps 0x30(TKEYP), KEY
269 movaps 0x40(TKEYP), KEY
271 movaps 0x50(TKEYP), KEY
273 movaps 0x60(TKEYP), KEY
275 movaps 0x70(TKEYP), KEY
280 * _aesni_enc4: internal ABI
282 * KEYP: key struct pointer
284 * STATE1: initial state (input)
289 * STATE1: finial state (output)
298 movaps (KEYP), KEY # key
300 pxor KEY, STATE1 # round 0
307 lea 0x20(TKEYP), TKEYP
310 movaps -0x60(TKEYP), KEY
315 movaps -0x50(TKEYP), KEY
322 movaps -0x40(TKEYP), KEY
327 movaps -0x30(TKEYP), KEY
334 movaps -0x20(TKEYP), KEY
339 movaps -0x10(TKEYP), KEY
349 movaps 0x10(TKEYP), KEY
354 movaps 0x20(TKEYP), KEY
359 movaps 0x30(TKEYP), KEY
364 movaps 0x40(TKEYP), KEY
369 movaps 0x50(TKEYP), KEY
374 movaps 0x60(TKEYP), KEY
379 movaps 0x70(TKEYP), KEY
380 AESENCLAST KEY STATE1 # last round
381 AESENCLAST KEY STATE2
382 AESENCLAST KEY STATE3
383 AESENCLAST KEY STATE4
387 * void aesni_dec (struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
390 mov 480(KEYP), KLEN # key length
392 movups (INP), STATE # input
394 movups STATE, (OUTP) #output
398 * _aesni_dec1: internal ABI
400 * KEYP: key struct pointer
402 * STATE: initial state (input)
404 * STATE: finial state (output)
410 movaps (KEYP), KEY # key
412 pxor KEY, STATE # round 0
416 lea 0x20(TKEYP), TKEYP
419 movaps -0x60(TKEYP), KEY
421 movaps -0x50(TKEYP), KEY
425 movaps -0x40(TKEYP), KEY
427 movaps -0x30(TKEYP), KEY
431 movaps -0x20(TKEYP), KEY
433 movaps -0x10(TKEYP), KEY
437 movaps 0x10(TKEYP), KEY
439 movaps 0x20(TKEYP), KEY
441 movaps 0x30(TKEYP), KEY
443 movaps 0x40(TKEYP), KEY
445 movaps 0x50(TKEYP), KEY
447 movaps 0x60(TKEYP), KEY
449 movaps 0x70(TKEYP), KEY
454 * _aesni_dec4: internal ABI
456 * KEYP: key struct pointer
458 * STATE1: initial state (input)
463 * STATE1: finial state (output)
472 movaps (KEYP), KEY # key
474 pxor KEY, STATE1 # round 0
481 lea 0x20(TKEYP), TKEYP
484 movaps -0x60(TKEYP), KEY
489 movaps -0x50(TKEYP), KEY
496 movaps -0x40(TKEYP), KEY
501 movaps -0x30(TKEYP), KEY
508 movaps -0x20(TKEYP), KEY
513 movaps -0x10(TKEYP), KEY
523 movaps 0x10(TKEYP), KEY
528 movaps 0x20(TKEYP), KEY
533 movaps 0x30(TKEYP), KEY
538 movaps 0x40(TKEYP), KEY
543 movaps 0x50(TKEYP), KEY
548 movaps 0x60(TKEYP), KEY
553 movaps 0x70(TKEYP), KEY
554 AESDECLAST KEY STATE1 # last round
555 AESDECLAST KEY STATE2
556 AESDECLAST KEY STATE3
557 AESDECLAST KEY STATE4
561 * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
565 test LEN, LEN # check length
575 movups 0x10(INP), STATE2
576 movups 0x20(INP), STATE3
577 movups 0x30(INP), STATE4
579 movups STATE1, (OUTP)
580 movups STATE2, 0x10(OUTP)
581 movups STATE3, 0x20(OUTP)
582 movups STATE4, 0x30(OUTP)
594 movups STATE1, (OUTP)
604 * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
619 movups 0x10(INP), STATE2
620 movups 0x20(INP), STATE3
621 movups 0x30(INP), STATE4
623 movups STATE1, (OUTP)
624 movups STATE2, 0x10(OUTP)
625 movups STATE3, 0x20(OUTP)
626 movups STATE4, 0x30(OUTP)
638 movups STATE1, (OUTP)
648 * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
649 * size_t len, u8 *iv)
655 movups (IVP), STATE # load iv as initial state
658 movups (INP), IN # load input
661 movups STATE, (OUTP) # store output
672 * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
673 * size_t len, u8 *iv)
677 jb .Lcbc_dec_just_ret
687 movups 0x10(INP), IN2
689 movups 0x20(INP), IN3
691 movups 0x30(INP), IN4
699 movups STATE1, (OUTP)
700 movups STATE2, 0x10(OUTP)
701 movups STATE3, 0x20(OUTP)
702 movups STATE4, 0x30(OUTP)