import less(1)
[unleashed/tickless.git] / usr / src / common / crypto / aes / amd64 / aes_intel.s
bloba74d36dea639c6062aa3b1b98b00e482e407111a
1 /*
2 * ====================================================================
3 * Written by Intel Corporation for the OpenSSL project to add support
4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 * in source and binary forms are granted according to the OpenSSL
6 * license.
8 * Author: Huang Ying <ying.huang at intel dot com>
9 * Vinodh Gopal <vinodh.gopal at intel dot com>
10 * Kahraman Akdemir
12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 * instructions that are going to be introduced in the next generation
14 * of Intel processor, as of 2009. These instructions enable fast and
15 * secure data encryption and decryption, using the Advanced Encryption
16 * Standard (AES), defined by FIPS Publication number 197. The
17 * architecture introduces six instructions that offer full hardware
18 * support for AES. Four of them support high performance data
19 * encryption and decryption, and the other two instructions support
20 * the AES key expansion procedure.
21 * ====================================================================
25 * ====================================================================
26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in
37 * the documentation and/or other materials provided with the
38 * distribution.
40 * 3. All advertising materials mentioning features or use of this
41 * software must display the following acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 * endorse or promote products derived from this software without
47 * prior written permission. For written permission, please contact
48 * openssl-core@openssl.org.
50 * 5. Products derived from this software may not be called "OpenSSL"
51 * nor may "OpenSSL" appear in their names without prior written
52 * permission of the OpenSSL Project.
54 * 6. Redistributions of any form whatsoever must retain the following
55 * acknowledgment:
56 * "This product includes software developed by the OpenSSL Project
57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 * ====================================================================
75 * ====================================================================
76 * OpenSolaris OS modifications
78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
83 * This OpenSolaris version has these major changes from the original source:
85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 * definitions for lint.
89 * 2. Formatted code, added comments, and added #includes and #defines.
91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 * calling kpreempt_disable() and kpreempt_enable().
93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 * and end of function calls (%xmm* registers are not saved and restored by
95 * during kernel thread preemption).
97 * 4. Renamed functions, reordered parameters, and changed return value
98 * to match OpenSolaris:
100 * OpenSSL interface:
101 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 * const int bits, AES_KEY *key);
103 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 * const int bits, AES_KEY *key);
105 * Return values for above are non-zero on error, 0 on success.
107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 * const AES_KEY *key);
109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 * const AES_KEY *key);
111 * typedef struct aes_key_st {
112 * unsigned int rd_key[4 *(AES_MAXNR + 1)];
113 * int rounds;
114 * unsigned int pad[3];
115 * } AES_KEY;
116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 * (ks32) instead of 64-bit (ks64).
118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 * int rijndael_key_setup_dec_intel(uint32_t rk[],
122 * const uint32_t cipherKey[], uint64_t keyBits);
123 * int rijndael_key_setup_enc_intel(uint32_t rk[],
124 * const uint32_t cipherKey[], uint64_t keyBits);
125 * Return values for above are 0 on error, number of rounds on success.
127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 * const uint32_t pt[4], uint32_t ct[4]);
129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 * const uint32_t pt[4], uint32_t ct[4]);
131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
134 * typedef union {
135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
136 * } aes_ks_t;
137 * typedef struct aes_key {
138 * aes_ks_t encr_ks, decr_ks;
139 * long double align128;
140 * int flags, nr, type;
141 * } aes_key_t;
143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 * ct is crypto text, and MAX_AES_NR is 14.
145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
149 * ====================================================================
153 #include <sys/asm_linkage.h>
154 #include <sys/controlregs.h>
155 #ifdef _KERNEL
156 #include <sys/machprivregs.h>
157 #endif
159 #ifdef _KERNEL
161 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is,
162 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
163 * uses it to pass P2 to syscall.
164 * This also occurs with the STTS macro, but we don't care if
165 * P2 (%rsi) is modified just before function exit.
166 * The CLTS and STTS macros push and pop P1 (%rdi) already.
168 #ifdef __xpv
169 #define PROTECTED_CLTS \
170 push %rsi; \
171 CLTS; \
172 pop %rsi
173 #else
174 #define PROTECTED_CLTS \
175 CLTS
176 #endif /* __xpv */
178 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
179 push %rbp; \
180 mov %rsp, %rbp; \
181 movq %cr0, tmpreg; \
182 testq $CR0_TS, tmpreg; \
183 jnz 1f; \
184 and $-XMM_ALIGN, %rsp; \
185 sub $[XMM_SIZE * 2], %rsp; \
186 movaps %xmm0, 16(%rsp); \
187 movaps %xmm1, (%rsp); \
188 jmp 2f; \
189 1: \
190 PROTECTED_CLTS; \
194 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
195 * otherwise set CR0_TS.
197 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
198 testq $CR0_TS, tmpreg; \
199 jnz 1f; \
200 movaps (%rsp), %xmm1; \
201 movaps 16(%rsp), %xmm0; \
202 jmp 2f; \
203 1: \
204 STTS(tmpreg); \
205 2: \
206 mov %rbp, %rsp; \
207 pop %rbp
210 * If CR0_TS is not set, align stack (with push %rbp) and push
211 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
213 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
214 push %rbp; \
215 mov %rsp, %rbp; \
216 movq %cr0, tmpreg; \
217 testq $CR0_TS, tmpreg; \
218 jnz 1f; \
219 and $-XMM_ALIGN, %rsp; \
220 sub $[XMM_SIZE * 7], %rsp; \
221 movaps %xmm0, 96(%rsp); \
222 movaps %xmm1, 80(%rsp); \
223 movaps %xmm2, 64(%rsp); \
224 movaps %xmm3, 48(%rsp); \
225 movaps %xmm4, 32(%rsp); \
226 movaps %xmm5, 16(%rsp); \
227 movaps %xmm6, (%rsp); \
228 jmp 2f; \
229 1: \
230 PROTECTED_CLTS; \
235 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
236 * otherwise set CR0_TS.
238 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
239 testq $CR0_TS, tmpreg; \
240 jnz 1f; \
241 movaps (%rsp), %xmm6; \
242 movaps 16(%rsp), %xmm5; \
243 movaps 32(%rsp), %xmm4; \
244 movaps 48(%rsp), %xmm3; \
245 movaps 64(%rsp), %xmm2; \
246 movaps 80(%rsp), %xmm1; \
247 movaps 96(%rsp), %xmm0; \
248 jmp 2f; \
249 1: \
250 STTS(tmpreg); \
251 2: \
252 mov %rbp, %rsp; \
253 pop %rbp
256 #else
257 #define PROTECTED_CLTS
258 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
259 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
260 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
261 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
262 #endif /* _KERNEL */
266 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
267 * _key_expansion_256a(), _key_expansion_256b()
269 * Helper functions called by rijndael_key_setup_inc_intel().
270 * Also used indirectly by rijndael_key_setup_dec_intel().
272 * Input:
273 * %xmm0 User-provided cipher key
274 * %xmm1 Round constant
275 * Output:
276 * (%rcx) AES key
279 .align 16
280 _key_expansion_128:
281 _key_expansion_256a:
282 pshufd $0b11111111, %xmm1, %xmm1
283 shufps $0b00010000, %xmm0, %xmm4
284 pxor %xmm4, %xmm0
285 shufps $0b10001100, %xmm0, %xmm4
286 pxor %xmm4, %xmm0
287 pxor %xmm1, %xmm0
288 movaps %xmm0, (%rcx)
289 add $0x10, %rcx
291 SET_SIZE(_key_expansion_128)
292 SET_SIZE(_key_expansion_256a)
294 .align 16
295 _key_expansion_192a:
296 pshufd $0b01010101, %xmm1, %xmm1
297 shufps $0b00010000, %xmm0, %xmm4
298 pxor %xmm4, %xmm0
299 shufps $0b10001100, %xmm0, %xmm4
300 pxor %xmm4, %xmm0
301 pxor %xmm1, %xmm0
303 movaps %xmm2, %xmm5
304 movaps %xmm2, %xmm6
305 pslldq $4, %xmm5
306 pshufd $0b11111111, %xmm0, %xmm3
307 pxor %xmm3, %xmm2
308 pxor %xmm5, %xmm2
310 movaps %xmm0, %xmm1
311 shufps $0b01000100, %xmm0, %xmm6
312 movaps %xmm6, (%rcx)
313 shufps $0b01001110, %xmm2, %xmm1
314 movaps %xmm1, 0x10(%rcx)
315 add $0x20, %rcx
317 SET_SIZE(_key_expansion_192a)
319 .align 16
320 _key_expansion_192b:
321 pshufd $0b01010101, %xmm1, %xmm1
322 shufps $0b00010000, %xmm0, %xmm4
323 pxor %xmm4, %xmm0
324 shufps $0b10001100, %xmm0, %xmm4
325 pxor %xmm4, %xmm0
326 pxor %xmm1, %xmm0
328 movaps %xmm2, %xmm5
329 pslldq $4, %xmm5
330 pshufd $0b11111111, %xmm0, %xmm3
331 pxor %xmm3, %xmm2
332 pxor %xmm5, %xmm2
334 movaps %xmm0, (%rcx)
335 add $0x10, %rcx
337 SET_SIZE(_key_expansion_192b)
339 .align 16
340 _key_expansion_256b:
341 pshufd $0b10101010, %xmm1, %xmm1
342 shufps $0b00010000, %xmm2, %xmm4
343 pxor %xmm4, %xmm2
344 shufps $0b10001100, %xmm2, %xmm4
345 pxor %xmm4, %xmm2
346 pxor %xmm1, %xmm2
347 movaps %xmm2, (%rcx)
348 add $0x10, %rcx
350 SET_SIZE(_key_expansion_256b)
354 * rijndael_key_setup_enc_intel()
355 * Expand the cipher key into the encryption key schedule.
357 * For kernel code, caller is responsible for ensuring kpreempt_disable()
358 * has been called. This is because %xmm registers are not saved/restored.
359 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
360 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
361 * on the stack.
363 * OpenSolaris interface:
364 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
365 * uint64_t keyBits);
366 * Return value is 0 on error, number of rounds on success.
368 * Original Intel OpenSSL interface:
369 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
370 * const int bits, AES_KEY *key);
371 * Return value is non-zero on error, 0 on success.
374 #ifdef OPENSSL_INTERFACE
375 #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
376 #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
378 #define USERCIPHERKEY rdi /* P1, 64 bits */
379 #define KEYSIZE32 esi /* P2, 32 bits */
380 #define KEYSIZE64 rsi /* P2, 64 bits */
381 #define AESKEY rdx /* P3, 64 bits */
383 #else /* OpenSolaris Interface */
384 #define AESKEY rdi /* P1, 64 bits */
385 #define USERCIPHERKEY rsi /* P2, 64 bits */
386 #define KEYSIZE32 edx /* P3, 32 bits */
387 #define KEYSIZE64 rdx /* P3, 64 bits */
388 #endif /* OPENSSL_INTERFACE */
390 #define ROUNDS32 KEYSIZE32 /* temp */
391 #define ROUNDS64 KEYSIZE64 /* temp */
392 #define ENDAESKEY USERCIPHERKEY /* temp */
395 ENTRY_NP(rijndael_key_setup_enc_intel)
396 CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
398 / NULL pointer sanity check
399 test %USERCIPHERKEY, %USERCIPHERKEY
400 jz .Lenc_key_invalid_param
401 test %AESKEY, %AESKEY
402 jz .Lenc_key_invalid_param
404 movups (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes)
405 movaps %xmm0, (%AESKEY)
406 lea 0x10(%AESKEY), %rcx / key addr
407 pxor %xmm4, %xmm4 / xmm4 is assumed 0 in _key_expansion_x
409 cmp $256, %KEYSIZE32
410 jnz .Lenc_key192
412 / AES 256: 14 rounds in encryption key schedule
413 #ifdef OPENSSL_INTERFACE
414 mov $14, %ROUNDS32
415 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 14
416 #endif /* OPENSSL_INTERFACE */
418 movups 0x10(%USERCIPHERKEY), %xmm2 / other user key (2nd 16 bytes)
419 movaps %xmm2, (%rcx)
420 add $0x10, %rcx
422 aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key
423 call _key_expansion_256a
424 aeskeygenassist $0x1, %xmm0, %xmm1
425 call _key_expansion_256b
426 aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key
427 call _key_expansion_256a
428 aeskeygenassist $0x2, %xmm0, %xmm1
429 call _key_expansion_256b
430 aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key
431 call _key_expansion_256a
432 aeskeygenassist $0x4, %xmm0, %xmm1
433 call _key_expansion_256b
434 aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key
435 call _key_expansion_256a
436 aeskeygenassist $0x8, %xmm0, %xmm1
437 call _key_expansion_256b
438 aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key
439 call _key_expansion_256a
440 aeskeygenassist $0x10, %xmm0, %xmm1
441 call _key_expansion_256b
442 aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key
443 call _key_expansion_256a
444 aeskeygenassist $0x20, %xmm0, %xmm1
445 call _key_expansion_256b
446 aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key
447 call _key_expansion_256a
449 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
450 #ifdef OPENSSL_INTERFACE
451 xor %rax, %rax / return 0 (OK)
452 #else /* Open Solaris Interface */
453 mov $14, %rax / return # rounds = 14
454 #endif
457 .align 4
458 .Lenc_key192:
459 cmp $192, %KEYSIZE32
460 jnz .Lenc_key128
462 / AES 192: 12 rounds in encryption key schedule
463 #ifdef OPENSSL_INTERFACE
464 mov $12, %ROUNDS32
465 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 12
466 #endif /* OPENSSL_INTERFACE */
468 movq 0x10(%USERCIPHERKEY), %xmm2 / other user key
469 aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key
470 call _key_expansion_192a
471 aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key
472 call _key_expansion_192b
473 aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key
474 call _key_expansion_192a
475 aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key
476 call _key_expansion_192b
477 aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key
478 call _key_expansion_192a
479 aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key
480 call _key_expansion_192b
481 aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key
482 call _key_expansion_192a
483 aeskeygenassist $0x80, %xmm2, %xmm1 / expand the key
484 call _key_expansion_192b
486 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
487 #ifdef OPENSSL_INTERFACE
488 xor %rax, %rax / return 0 (OK)
489 #else /* OpenSolaris Interface */
490 mov $12, %rax / return # rounds = 12
491 #endif
494 .align 4
495 .Lenc_key128:
496 cmp $128, %KEYSIZE32
497 jnz .Lenc_key_invalid_key_bits
499 / AES 128: 10 rounds in encryption key schedule
500 #ifdef OPENSSL_INTERFACE
501 mov $10, %ROUNDS32
502 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 10
503 #endif /* OPENSSL_INTERFACE */
505 aeskeygenassist $0x1, %xmm0, %xmm1 / expand the key
506 call _key_expansion_128
507 aeskeygenassist $0x2, %xmm0, %xmm1 / expand the key
508 call _key_expansion_128
509 aeskeygenassist $0x4, %xmm0, %xmm1 / expand the key
510 call _key_expansion_128
511 aeskeygenassist $0x8, %xmm0, %xmm1 / expand the key
512 call _key_expansion_128
513 aeskeygenassist $0x10, %xmm0, %xmm1 / expand the key
514 call _key_expansion_128
515 aeskeygenassist $0x20, %xmm0, %xmm1 / expand the key
516 call _key_expansion_128
517 aeskeygenassist $0x40, %xmm0, %xmm1 / expand the key
518 call _key_expansion_128
519 aeskeygenassist $0x80, %xmm0, %xmm1 / expand the key
520 call _key_expansion_128
521 aeskeygenassist $0x1b, %xmm0, %xmm1 / expand the key
522 call _key_expansion_128
523 aeskeygenassist $0x36, %xmm0, %xmm1 / expand the key
524 call _key_expansion_128
526 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
527 #ifdef OPENSSL_INTERFACE
528 xor %rax, %rax / return 0 (OK)
529 #else /* OpenSolaris Interface */
530 mov $10, %rax / return # rounds = 10
531 #endif
534 .Lenc_key_invalid_param:
535 #ifdef OPENSSL_INTERFACE
536 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
537 mov $-1, %rax / user key or AES key pointer is NULL
539 #else
540 /* FALLTHROUGH */
541 #endif /* OPENSSL_INTERFACE */
543 .Lenc_key_invalid_key_bits:
544 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
545 #ifdef OPENSSL_INTERFACE
546 mov $-2, %rax / keysize is invalid
547 #else /* Open Solaris Interface */
548 xor %rax, %rax / a key pointer is NULL or invalid keysize
549 #endif /* OPENSSL_INTERFACE */
552 SET_SIZE(rijndael_key_setup_enc_intel)
556 * rijndael_key_setup_dec_intel()
557 * Expand the cipher key into the decryption key schedule.
559 * For kernel code, caller is responsible for ensuring kpreempt_disable()
560 * has been called. This is because %xmm registers are not saved/restored.
561 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
562 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
563 * on the stack.
565 * OpenSolaris interface:
566 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
567 * uint64_t keyBits);
568 * Return value is 0 on error, number of rounds on success.
569 * P1->P2, P2->P3, P3->P1
571 * Original Intel OpenSSL interface:
572 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
573 * const int bits, AES_KEY *key);
574 * Return value is non-zero on error, 0 on success.
576 ENTRY_NP(rijndael_key_setup_dec_intel)
577 / Generate round keys used for encryption
578 call rijndael_key_setup_enc_intel
579 test %rax, %rax
580 #ifdef OPENSSL_INTERFACE
581 jnz .Ldec_key_exit / Failed if returned non-0
582 #else /* OpenSolaris Interface */
583 jz .Ldec_key_exit / Failed if returned 0
584 #endif /* OPENSSL_INTERFACE */
586 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
589 * Convert round keys used for encryption
590 * to a form usable for decryption
592 #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
593 mov %rax, %ROUNDS64 / set # rounds (10, 12, or 14)
594 / (already set for OpenSSL)
595 #endif
597 lea 0x10(%AESKEY), %rcx / key addr
598 shl $4, %ROUNDS32
599 add %AESKEY, %ROUNDS64
600 mov %ROUNDS64, %ENDAESKEY
602 .align 4
603 .Ldec_key_reorder_loop:
604 movaps (%AESKEY), %xmm0
605 movaps (%ROUNDS64), %xmm1
606 movaps %xmm0, (%ROUNDS64)
607 movaps %xmm1, (%AESKEY)
608 lea 0x10(%AESKEY), %AESKEY
609 lea -0x10(%ROUNDS64), %ROUNDS64
610 cmp %AESKEY, %ROUNDS64
611 ja .Ldec_key_reorder_loop
613 .align 4
614 .Ldec_key_inv_loop:
615 movaps (%rcx), %xmm0
616 / Convert an encryption round key to a form usable for decryption
617 / with the "AES Inverse Mix Columns" instruction
618 aesimc %xmm0, %xmm1
619 movaps %xmm1, (%rcx)
620 lea 0x10(%rcx), %rcx
621 cmp %ENDAESKEY, %rcx
622 jnz .Ldec_key_inv_loop
624 SET_TS_OR_POP_XMM0_XMM1(%r10)
626 .Ldec_key_exit:
627 / OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
628 / OpenSSL: rax = 0 for OK, or non-zero for error
630 SET_SIZE(rijndael_key_setup_dec_intel)
634 * aes_encrypt_intel()
635 * Encrypt a single block (in and out can overlap).
637 * For kernel code, caller is responsible for ensuring kpreempt_disable()
638 * has been called. This is because %xmm registers are not saved/restored.
639 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
640 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
641 * on the stack.
643 * Temporary register usage:
644 * %xmm0 State
645 * %xmm1 Key
647 * Original OpenSolaris Interface:
648 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
649 * const uint32_t pt[4], uint32_t ct[4])
651 * Original Intel OpenSSL Interface:
652 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
653 * const AES_KEY *key)
656 #ifdef OPENSSL_INTERFACE
657 #define aes_encrypt_intel intel_AES_encrypt
658 #define aes_decrypt_intel intel_AES_decrypt
660 #define INP rdi /* P1, 64 bits */
661 #define OUTP rsi /* P2, 64 bits */
662 #define KEYP rdx /* P3, 64 bits */
664 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
665 #define NROUNDS32 ecx /* temporary, 32 bits */
666 #define NROUNDS cl /* temporary, 8 bits */
668 #else /* OpenSolaris Interface */
669 #define KEYP rdi /* P1, 64 bits */
670 #define NROUNDS esi /* P2, 32 bits */
671 #define INP rdx /* P3, 64 bits */
672 #define OUTP rcx /* P4, 64 bits */
673 #endif /* OPENSSL_INTERFACE */
675 #define STATE xmm0 /* temporary, 128 bits */
676 #define KEY xmm1 /* temporary, 128 bits */
678 ENTRY_NP(aes_encrypt_intel)
679 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
681 movups (%INP), %STATE / input
682 movaps (%KEYP), %KEY / key
683 #ifdef OPENSSL_INTERFACE
684 mov 240(%KEYP), %NROUNDS32 / round count
685 #else /* OpenSolaris Interface */
686 /* Round count is already present as P2 in %rsi/%esi */
687 #endif /* OPENSSL_INTERFACE */
689 pxor %KEY, %STATE / round 0
690 lea 0x30(%KEYP), %KEYP
691 cmp $12, %NROUNDS
692 jb .Lenc128
693 lea 0x20(%KEYP), %KEYP
694 je .Lenc192
696 / AES 256
697 lea 0x20(%KEYP), %KEYP
698 movaps -0x60(%KEYP), %KEY
699 aesenc %KEY, %STATE
700 movaps -0x50(%KEYP), %KEY
701 aesenc %KEY, %STATE
703 .align 4
704 .Lenc192:
705 / AES 192 and 256
706 movaps -0x40(%KEYP), %KEY
707 aesenc %KEY, %STATE
708 movaps -0x30(%KEYP), %KEY
709 aesenc %KEY, %STATE
711 .align 4
712 .Lenc128:
713 / AES 128, 192, and 256
714 movaps -0x20(%KEYP), %KEY
715 aesenc %KEY, %STATE
716 movaps -0x10(%KEYP), %KEY
717 aesenc %KEY, %STATE
718 movaps (%KEYP), %KEY
719 aesenc %KEY, %STATE
720 movaps 0x10(%KEYP), %KEY
721 aesenc %KEY, %STATE
722 movaps 0x20(%KEYP), %KEY
723 aesenc %KEY, %STATE
724 movaps 0x30(%KEYP), %KEY
725 aesenc %KEY, %STATE
726 movaps 0x40(%KEYP), %KEY
727 aesenc %KEY, %STATE
728 movaps 0x50(%KEYP), %KEY
729 aesenc %KEY, %STATE
730 movaps 0x60(%KEYP), %KEY
731 aesenc %KEY, %STATE
732 movaps 0x70(%KEYP), %KEY
733 aesenclast %KEY, %STATE / last round
734 movups %STATE, (%OUTP) / output
736 SET_TS_OR_POP_XMM0_XMM1(%r10)
738 SET_SIZE(aes_encrypt_intel)
742 * aes_decrypt_intel()
743 * Decrypt a single block (in and out can overlap).
745 * For kernel code, caller is responsible for ensuring kpreempt_disable()
746 * has been called. This is because %xmm registers are not saved/restored.
747 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
748 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
749 * on the stack.
751 * Temporary register usage:
752 * %xmm0 State
753 * %xmm1 Key
755 * Original OpenSolaris Interface:
756 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
757 * const uint32_t pt[4], uint32_t ct[4])/
759 * Original Intel OpenSSL Interface:
760 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
761 * const AES_KEY *key);
763 ENTRY_NP(aes_decrypt_intel)
764 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
766 movups (%INP), %STATE / input
767 movaps (%KEYP), %KEY / key
768 #ifdef OPENSSL_INTERFACE
769 mov 240(%KEYP), %NROUNDS32 / round count
770 #else /* OpenSolaris Interface */
771 /* Round count is already present as P2 in %rsi/%esi */
772 #endif /* OPENSSL_INTERFACE */
774 pxor %KEY, %STATE / round 0
775 lea 0x30(%KEYP), %KEYP
776 cmp $12, %NROUNDS
777 jb .Ldec128
778 lea 0x20(%KEYP), %KEYP
779 je .Ldec192
781 / AES 256
782 lea 0x20(%KEYP), %KEYP
783 movaps -0x60(%KEYP), %KEY
784 aesdec %KEY, %STATE
785 movaps -0x50(%KEYP), %KEY
786 aesdec %KEY, %STATE
788 .align 4
789 .Ldec192:
790 / AES 192 and 256
791 movaps -0x40(%KEYP), %KEY
792 aesdec %KEY, %STATE
793 movaps -0x30(%KEYP), %KEY
794 aesdec %KEY, %STATE
796 .align 4
797 .Ldec128:
798 / AES 128, 192, and 256
799 movaps -0x20(%KEYP), %KEY
800 aesdec %KEY, %STATE
801 movaps -0x10(%KEYP), %KEY
802 aesdec %KEY, %STATE
803 movaps (%KEYP), %KEY
804 aesdec %KEY, %STATE
805 movaps 0x10(%KEYP), %KEY
806 aesdec %KEY, %STATE
807 movaps 0x20(%KEYP), %KEY
808 aesdec %KEY, %STATE
809 movaps 0x30(%KEYP), %KEY
810 aesdec %KEY, %STATE
811 movaps 0x40(%KEYP), %KEY
812 aesdec %KEY, %STATE
813 movaps 0x50(%KEYP), %KEY
814 aesdec %KEY, %STATE
815 movaps 0x60(%KEYP), %KEY
816 aesdec %KEY, %STATE
817 movaps 0x70(%KEYP), %KEY
818 aesdeclast %KEY, %STATE / last round
819 movups %STATE, (%OUTP) / output
821 SET_TS_OR_POP_XMM0_XMM1(%r10)
823 SET_SIZE(aes_decrypt_intel)