2 * ====================================================================
3 * Written by Intel Corporation for the OpenSSL project to add support
4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 * in source and binary forms are granted according to the OpenSSL
8 * Author: Huang Ying <ying.huang at intel dot com>
9 * Vinodh Gopal <vinodh.gopal at intel dot com>
12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 * instructions that are going to be introduced in the next generation
14 * of Intel processor, as of 2009. These instructions enable fast and
15 * secure data encryption and decryption, using the Advanced Encryption
16 * Standard (AES), defined by FIPS Publication number 197. The
17 * architecture introduces six instructions that offer full hardware
18 * support for AES. Four of them support high performance data
19 * encryption and decryption, and the other two instructions support
20 * the AES key expansion procedure.
21 * ====================================================================
25 * ====================================================================
26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in
37 * the documentation and/or other materials provided with the
40 * 3. All advertising materials mentioning features or use of this
41 * software must display the following acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 * endorse or promote products derived from this software without
47 * prior written permission. For written permission, please contact
48 * openssl-core@openssl.org.
50 * 5. Products derived from this software may not be called "OpenSSL"
51 * nor may "OpenSSL" appear in their names without prior written
52 * permission of the OpenSSL Project.
54 * 6. Redistributions of any form whatsoever must retain the following
56 * "This product includes software developed by the OpenSSL Project
57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 * ====================================================================
75 * ====================================================================
76 * OpenSolaris OS modifications
78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
83 * This OpenSolaris version has these major changes from the original source:
85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 * definitions for lint.
89 * 2. Formatted code, added comments, and added #includes and #defines.
91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 * calling kpreempt_disable() and kpreempt_enable().
93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 * and end of function calls (%xmm* registers are not saved and restored by
95 * during kernel thread preemption).
97 * 4. Renamed functions, reordered parameters, and changed return value
98 * to match OpenSolaris:
101 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 * const int bits, AES_KEY *key);
103 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 * const int bits, AES_KEY *key);
105 * Return values for above are non-zero on error, 0 on success.
107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 * const AES_KEY *key);
109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 * const AES_KEY *key);
111 * typedef struct aes_key_st {
112 * unsigned int rd_key[4 *(AES_MAXNR + 1)];
114 * unsigned int pad[3];
116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 * (ks32) instead of 64-bit (ks64).
118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 * int rijndael_key_setup_dec_intel(uint32_t rk[],
122 * const uint32_t cipherKey[], uint64_t keyBits);
123 * int rijndael_key_setup_enc_intel(uint32_t rk[],
124 * const uint32_t cipherKey[], uint64_t keyBits);
125 * Return values for above are 0 on error, number of rounds on success.
127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 * const uint32_t pt[4], uint32_t ct[4]);
129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 * const uint32_t pt[4], uint32_t ct[4]);
131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
137 * typedef struct aes_key {
138 * aes_ks_t encr_ks, decr_ks;
139 * long double align128;
140 * int flags, nr, type;
143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 * ct is crypto text, and MAX_AES_NR is 14.
145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
149 * ====================================================================
152 #if defined(lint) || defined(__lint)
154 #include <sys/types.h>
158 aes_encrypt_intel
(const uint32_t rk
[], int Nr
, const uint32_t pt
[4],
163 aes_decrypt_intel
(const uint32_t rk
[], int Nr
, const uint32_t ct
[4],
168 rijndael_key_setup_enc_intel
(uint32_t rk
[], const uint32_t cipherKey
[],
174 rijndael_key_setup_dec_intel
(uint32_t rk
[], const uint32_t cipherKey
[],
182 #include <sys/asm_linkage.h>
183 #include <sys/controlregs.h>
185 #include <sys/machprivregs.h>
190 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is,
191 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
192 * uses it to pass P2 to syscall.
193 * This also occurs with the STTS macro, but we don't care if
194 * P2 (%rsi) is modified just before function exit.
195 * The CLTS and STTS macros push and pop P1 (%rdi) already.
198 #define PROTECTED_CLTS \
203 #define PROTECTED_CLTS \
207 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
211 testq $CR0_TS
, tmpreg; \
213 and $
-XMM_ALIGN
, %rsp; \
214 sub $
[XMM_SIZE
* 2], %rsp; \
215 movaps
%xmm0
, 16(%rsp
); \
216 movaps
%xmm1
, (%rsp
); \
223 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
224 * otherwise set CR0_TS.
226 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
227 testq $CR0_TS
, tmpreg; \
229 movaps
(%rsp
), %xmm1; \
230 movaps
16(%rsp
), %xmm0; \
239 * If CR0_TS is not set, align stack (with push %rbp) and push
240 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
242 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
246 testq $CR0_TS
, tmpreg; \
248 and $
-XMM_ALIGN
, %rsp; \
249 sub $
[XMM_SIZE
* 7], %rsp; \
250 movaps
%xmm0
, 96(%rsp
); \
251 movaps
%xmm1
, 80(%rsp
); \
252 movaps
%xmm2
, 64(%rsp
); \
253 movaps
%xmm3
, 48(%rsp
); \
254 movaps
%xmm4
, 32(%rsp
); \
255 movaps
%xmm5
, 16(%rsp
); \
256 movaps
%xmm6
, (%rsp
); \
264 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
265 * otherwise set CR0_TS.
267 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
268 testq $CR0_TS
, tmpreg; \
270 movaps
(%rsp
), %xmm6; \
271 movaps
16(%rsp
), %xmm5; \
272 movaps
32(%rsp
), %xmm4; \
273 movaps
48(%rsp
), %xmm3; \
274 movaps
64(%rsp
), %xmm2; \
275 movaps
80(%rsp
), %xmm1; \
276 movaps
96(%rsp
), %xmm0; \
286 #define PROTECTED_CLTS
287 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
288 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
289 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
290 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
295 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
296 * _key_expansion_256a(), _key_expansion_256b()
298 * Helper functions called by rijndael_key_setup_inc_intel().
299 * Also used indirectly by rijndael_key_setup_dec_intel().
302 * %xmm0 User-provided cipher key
303 * %xmm1 Round constant
311 pshufd $
0b11111111
, %xmm1
, %xmm1
312 shufps $
0b00010000
, %xmm0
, %xmm4
314 shufps $
0b10001100
, %xmm0
, %xmm4
320 SET_SIZE
(_key_expansion_128
)
321 SET_SIZE
(_key_expansion_256a
)
325 pshufd $
0b01010101
, %xmm1
, %xmm1
326 shufps $
0b00010000
, %xmm0
, %xmm4
328 shufps $
0b10001100
, %xmm0
, %xmm4
335 pshufd $
0b11111111
, %xmm0
, %xmm3
340 shufps $
0b01000100
, %xmm0
, %xmm6
342 shufps $
0b01001110
, %xmm2
, %xmm1
343 movaps
%xmm1
, 0x10(%rcx
)
346 SET_SIZE
(_key_expansion_192a
)
350 pshufd $
0b01010101
, %xmm1
, %xmm1
351 shufps $
0b00010000
, %xmm0
, %xmm4
353 shufps $
0b10001100
, %xmm0
, %xmm4
359 pshufd $
0b11111111
, %xmm0
, %xmm3
366 SET_SIZE
(_key_expansion_192b
)
370 pshufd $
0b10101010
, %xmm1
, %xmm1
371 shufps $
0b00010000
, %xmm2
, %xmm4
373 shufps $
0b10001100
, %xmm2
, %xmm4
379 SET_SIZE
(_key_expansion_256b
)
383 * rijndael_key_setup_enc_intel()
384 * Expand the cipher key into the encryption key schedule.
386 * For kernel code, caller is responsible for ensuring kpreempt_disable()
387 * has been called. This is because %xmm registers are not saved/restored.
388 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
389 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
392 * OpenSolaris interface:
393 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
395 * Return value is 0 on error, number of rounds on success.
397 * Original Intel OpenSSL interface:
398 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
399 * const int bits, AES_KEY *key);
400 * Return value is non-zero on error, 0 on success.
403 #ifdef OPENSSL_INTERFACE
404 #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
405 #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
407 #define USERCIPHERKEY rdi /* P1, 64 bits */
408 #define KEYSIZE32 esi /* P2, 32 bits */
409 #define KEYSIZE64 rsi /* P2, 64 bits */
410 #define AESKEY rdx /* P3, 64 bits */
412 #else /* OpenSolaris Interface */
413 #define AESKEY rdi /* P1, 64 bits */
414 #define USERCIPHERKEY rsi /* P2, 64 bits */
415 #define KEYSIZE32 edx /* P3, 32 bits */
416 #define KEYSIZE64 rdx /* P3, 64 bits */
417 #endif /* OPENSSL_INTERFACE */
419 #define ROUNDS32 KEYSIZE32 /* temp */
420 #define ROUNDS64 KEYSIZE64 /* temp */
421 #define ENDAESKEY USERCIPHERKEY /* temp */
424 ENTRY_NP
(rijndael_key_setup_enc_intel
)
425 CLEAR_TS_OR_PUSH_XMM0_TO_XMM6
(%r10)
427 / NULL pointer sanity check
428 test
%USERCIPHERKEY
, %USERCIPHERKEY
429 jz
.Lenc_key_invalid_param
430 test
%AESKEY
, %AESKEY
431 jz
.Lenc_key_invalid_param
433 movups
(%USERCIPHERKEY
), %xmm0
/ user key
(first
16 bytes
)
434 movaps
%xmm0
, (%AESKEY
)
435 lea
0x10(%AESKEY
), %rcx
/ key addr
436 pxor
%xmm4
, %xmm4
/ xmm4 is assumed
0 in _key_expansion_x
441 / AES
256: 14 rounds in encryption key schedule
442 #ifdef OPENSSL_INTERFACE
444 movl
%ROUNDS32
, 240(%AESKEY
) / key.rounds
= 14
445 #endif /* OPENSSL_INTERFACE */
447 movups
0x10(%USERCIPHERKEY
), %xmm2
/ other user key
(2nd
16 bytes
)
451 aeskeygenassist $
0x1, %xmm2
, %xmm1
/ expand the key
452 call _key_expansion_256a
453 aeskeygenassist $
0x1, %xmm0
, %xmm1
454 call _key_expansion_256b
455 aeskeygenassist $
0x2, %xmm2
, %xmm1
/ expand the key
456 call _key_expansion_256a
457 aeskeygenassist $
0x2, %xmm0
, %xmm1
458 call _key_expansion_256b
459 aeskeygenassist $
0x4, %xmm2
, %xmm1
/ expand the key
460 call _key_expansion_256a
461 aeskeygenassist $
0x4, %xmm0
, %xmm1
462 call _key_expansion_256b
463 aeskeygenassist $
0x8, %xmm2
, %xmm1
/ expand the key
464 call _key_expansion_256a
465 aeskeygenassist $
0x8, %xmm0
, %xmm1
466 call _key_expansion_256b
467 aeskeygenassist $
0x10, %xmm2
, %xmm1
/ expand the key
468 call _key_expansion_256a
469 aeskeygenassist $
0x10, %xmm0
, %xmm1
470 call _key_expansion_256b
471 aeskeygenassist $
0x20, %xmm2
, %xmm1
/ expand the key
472 call _key_expansion_256a
473 aeskeygenassist $
0x20, %xmm0
, %xmm1
474 call _key_expansion_256b
475 aeskeygenassist $
0x40, %xmm2
, %xmm1
/ expand the key
476 call _key_expansion_256a
478 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
479 #ifdef OPENSSL_INTERFACE
480 xor %rax
, %rax
/ return
0 (OK
)
481 #else /* Open Solaris Interface */
482 mov $
14, %rax
/ return
# rounds = 14
491 / AES
192: 12 rounds in encryption key schedule
492 #ifdef OPENSSL_INTERFACE
494 movl
%ROUNDS32
, 240(%AESKEY
) / key.rounds
= 12
495 #endif /* OPENSSL_INTERFACE */
497 movq
0x10(%USERCIPHERKEY
), %xmm2
/ other user key
498 aeskeygenassist $
0x1, %xmm2
, %xmm1
/ expand the key
499 call _key_expansion_192a
500 aeskeygenassist $
0x2, %xmm2
, %xmm1
/ expand the key
501 call _key_expansion_192b
502 aeskeygenassist $
0x4, %xmm2
, %xmm1
/ expand the key
503 call _key_expansion_192a
504 aeskeygenassist $
0x8, %xmm2
, %xmm1
/ expand the key
505 call _key_expansion_192b
506 aeskeygenassist $
0x10, %xmm2
, %xmm1
/ expand the key
507 call _key_expansion_192a
508 aeskeygenassist $
0x20, %xmm2
, %xmm1
/ expand the key
509 call _key_expansion_192b
510 aeskeygenassist $
0x40, %xmm2
, %xmm1
/ expand the key
511 call _key_expansion_192a
512 aeskeygenassist $
0x80, %xmm2
, %xmm1
/ expand the key
513 call _key_expansion_192b
515 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
516 #ifdef OPENSSL_INTERFACE
517 xor %rax
, %rax
/ return
0 (OK
)
518 #else /* OpenSolaris Interface */
519 mov $
12, %rax
/ return
# rounds = 12
526 jnz
.Lenc_key_invalid_key_bits
528 / AES
128: 10 rounds in encryption key schedule
529 #ifdef OPENSSL_INTERFACE
531 movl
%ROUNDS32
, 240(%AESKEY
) / key.rounds
= 10
532 #endif /* OPENSSL_INTERFACE */
534 aeskeygenassist $
0x1, %xmm0
, %xmm1
/ expand the key
535 call _key_expansion_128
536 aeskeygenassist $
0x2, %xmm0
, %xmm1
/ expand the key
537 call _key_expansion_128
538 aeskeygenassist $
0x4, %xmm0
, %xmm1
/ expand the key
539 call _key_expansion_128
540 aeskeygenassist $
0x8, %xmm0
, %xmm1
/ expand the key
541 call _key_expansion_128
542 aeskeygenassist $
0x10, %xmm0
, %xmm1
/ expand the key
543 call _key_expansion_128
544 aeskeygenassist $
0x20, %xmm0
, %xmm1
/ expand the key
545 call _key_expansion_128
546 aeskeygenassist $
0x40, %xmm0
, %xmm1
/ expand the key
547 call _key_expansion_128
548 aeskeygenassist $
0x80, %xmm0
, %xmm1
/ expand the key
549 call _key_expansion_128
550 aeskeygenassist $
0x1b, %xmm0
, %xmm1
/ expand the key
551 call _key_expansion_128
552 aeskeygenassist $
0x36, %xmm0
, %xmm1
/ expand the key
553 call _key_expansion_128
555 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
556 #ifdef OPENSSL_INTERFACE
557 xor %rax
, %rax
/ return
0 (OK
)
558 #else /* OpenSolaris Interface */
559 mov $
10, %rax
/ return
# rounds = 10
563 .Lenc_key_invalid_param:
564 #ifdef OPENSSL_INTERFACE
565 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
566 mov $
-1, %rax
/ user key
or AES key pointer is NULL
570 #endif /* OPENSSL_INTERFACE */
572 .Lenc_key_invalid_key_bits:
573 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
574 #ifdef OPENSSL_INTERFACE
575 mov $
-2, %rax
/ keysize is invalid
576 #else /* Open Solaris Interface */
577 xor %rax
, %rax
/ a key pointer is NULL
or invalid keysize
578 #endif /* OPENSSL_INTERFACE */
581 SET_SIZE
(rijndael_key_setup_enc_intel
)
585 * rijndael_key_setup_dec_intel()
586 * Expand the cipher key into the decryption key schedule.
588 * For kernel code, caller is responsible for ensuring kpreempt_disable()
589 * has been called. This is because %xmm registers are not saved/restored.
590 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
591 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
594 * OpenSolaris interface:
595 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
597 * Return value is 0 on error, number of rounds on success.
598 * P1->P2, P2->P3, P3->P1
600 * Original Intel OpenSSL interface:
601 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
602 * const int bits, AES_KEY *key);
603 * Return value is non-zero on error, 0 on success.
605 ENTRY_NP
(rijndael_key_setup_dec_intel
)
606 / Generate round keys used for encryption
607 call rijndael_key_setup_enc_intel
609 #ifdef OPENSSL_INTERFACE
610 jnz
.Ldec_key_exit / Failed if returned non-0
611 #else /* OpenSolaris Interface */
612 jz
.Ldec_key_exit / Failed if returned 0
613 #endif /* OPENSSL_INTERFACE */
615 CLEAR_TS_OR_PUSH_XMM0_XMM1
(%r10)
618 * Convert round keys used for encryption
619 * to a form usable for decryption
621 #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
622 mov
%rax
, %ROUNDS64
/ set
# rounds (10, 12, or 14)
623 / (already set for OpenSSL
)
626 lea
0x10(%AESKEY
), %rcx
/ key addr
628 add %AESKEY
, %ROUNDS64
629 mov
%ROUNDS64
, %ENDAESKEY
632 .Ldec_key_reorder_loop:
633 movaps
(%AESKEY
), %xmm0
634 movaps
(%ROUNDS64
), %xmm1
635 movaps
%xmm0
, (%ROUNDS64
)
636 movaps
%xmm1
, (%AESKEY
)
637 lea
0x10(%AESKEY
), %AESKEY
638 lea
-0x10(%ROUNDS64
), %ROUNDS64
639 cmp %AESKEY
, %ROUNDS64
640 ja
.Ldec_key_reorder_loop
645 / Convert an encryption round key to
a form usable for decryption
646 / with the
"AES Inverse Mix Columns" instruction
651 jnz
.Ldec_key_inv_loop
653 SET_TS_OR_POP_XMM0_XMM1
(%r10)
656 / OpenSolaris
: rax
= # rounds (10, 12, or 14) or 0 for error
657 / OpenSSL
: rax
= 0 for OK
, or non-zero for error
659 SET_SIZE
(rijndael_key_setup_dec_intel
)
663 * aes_encrypt_intel()
664 * Encrypt a single block (in and out can overlap).
666 * For kernel code, caller is responsible for ensuring kpreempt_disable()
667 * has been called. This is because %xmm registers are not saved/restored.
668 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
669 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
672 * Temporary register usage:
676 * Original OpenSolaris Interface:
677 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
678 * const uint32_t pt[4], uint32_t ct[4])
680 * Original Intel OpenSSL Interface:
681 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
682 * const AES_KEY *key)
685 #ifdef OPENSSL_INTERFACE
686 #define aes_encrypt_intel intel_AES_encrypt
687 #define aes_decrypt_intel intel_AES_decrypt
689 #define INP rdi /* P1, 64 bits */
690 #define OUTP rsi /* P2, 64 bits */
691 #define KEYP rdx /* P3, 64 bits */
693 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
694 #define NROUNDS32 ecx /* temporary, 32 bits */
695 #define NROUNDS cl /* temporary, 8 bits */
697 #else /* OpenSolaris Interface */
698 #define KEYP rdi /* P1, 64 bits */
699 #define NROUNDS esi /* P2, 32 bits */
700 #define INP rdx /* P3, 64 bits */
701 #define OUTP rcx /* P4, 64 bits */
702 #endif /* OPENSSL_INTERFACE */
704 #define STATE xmm0 /* temporary, 128 bits */
705 #define KEY xmm1 /* temporary, 128 bits */
707 ENTRY_NP
(aes_encrypt_intel
)
708 CLEAR_TS_OR_PUSH_XMM0_XMM1
(%r10)
710 movups
(%INP
), %STATE
/ input
711 movaps
(%KEYP
), %KEY
/ key
712 #ifdef OPENSSL_INTERFACE
713 mov
240(%KEYP
), %NROUNDS32
/ round count
714 #else /* OpenSolaris Interface */
715 /* Round count is already present as P2 in %rsi/%esi */
716 #endif /* OPENSSL_INTERFACE */
718 pxor
%KEY
, %STATE
/ round
0
719 lea
0x30(%KEYP
), %KEYP
722 lea
0x20(%KEYP
), %KEYP
726 lea
0x20(%KEYP
), %KEYP
727 movaps
-0x60(%KEYP
), %KEY
729 movaps
-0x50(%KEYP
), %KEY
735 movaps
-0x40(%KEYP
), %KEY
737 movaps
-0x30(%KEYP
), %KEY
742 / AES
128, 192, and 256
743 movaps
-0x20(%KEYP
), %KEY
745 movaps
-0x10(%KEYP
), %KEY
749 movaps
0x10(%KEYP
), %KEY
751 movaps
0x20(%KEYP
), %KEY
753 movaps
0x30(%KEYP
), %KEY
755 movaps
0x40(%KEYP
), %KEY
757 movaps
0x50(%KEYP
), %KEY
759 movaps
0x60(%KEYP
), %KEY
761 movaps
0x70(%KEYP
), %KEY
762 aesenclast
%KEY
, %STATE
/ last round
763 movups
%STATE
, (%OUTP
) / output
765 SET_TS_OR_POP_XMM0_XMM1
(%r10)
767 SET_SIZE
(aes_encrypt_intel
)
771 * aes_decrypt_intel()
772 * Decrypt a single block (in and out can overlap).
774 * For kernel code, caller is responsible for ensuring kpreempt_disable()
775 * has been called. This is because %xmm registers are not saved/restored.
776 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
777 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
780 * Temporary register usage:
784 * Original OpenSolaris Interface:
785 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
786 * const uint32_t pt[4], uint32_t ct[4])/
788 * Original Intel OpenSSL Interface:
789 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
790 * const AES_KEY *key);
792 ENTRY_NP
(aes_decrypt_intel
)
793 CLEAR_TS_OR_PUSH_XMM0_XMM1
(%r10)
795 movups
(%INP
), %STATE
/ input
796 movaps
(%KEYP
), %KEY
/ key
797 #ifdef OPENSSL_INTERFACE
798 mov
240(%KEYP
), %NROUNDS32
/ round count
799 #else /* OpenSolaris Interface */
800 /* Round count is already present as P2 in %rsi/%esi */
801 #endif /* OPENSSL_INTERFACE */
803 pxor
%KEY
, %STATE
/ round
0
804 lea
0x30(%KEYP
), %KEYP
807 lea
0x20(%KEYP
), %KEYP
811 lea
0x20(%KEYP
), %KEYP
812 movaps
-0x60(%KEYP
), %KEY
814 movaps
-0x50(%KEYP
), %KEY
820 movaps
-0x40(%KEYP
), %KEY
822 movaps
-0x30(%KEYP
), %KEY
827 / AES
128, 192, and 256
828 movaps
-0x20(%KEYP
), %KEY
830 movaps
-0x10(%KEYP
), %KEY
834 movaps
0x10(%KEYP
), %KEY
836 movaps
0x20(%KEYP
), %KEY
838 movaps
0x30(%KEYP
), %KEY
840 movaps
0x40(%KEYP
), %KEY
842 movaps
0x50(%KEYP
), %KEY
844 movaps
0x60(%KEYP
), %KEY
846 movaps
0x70(%KEYP
), %KEY
847 aesdeclast
%KEY
, %STATE
/ last round
848 movups
%STATE
, (%OUTP
) / output
850 SET_TS_OR_POP_XMM0_XMM1
(%r10)
852 SET_SIZE
(aes_decrypt_intel
)
854 #endif /* lint || __lint */