2 * ====================================================================
3 * Written by Intel Corporation for the OpenSSL project to add support
4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 * in source and binary forms are granted according to the OpenSSL
8 * Author: Huang Ying <ying.huang at intel dot com>
9 * Vinodh Gopal <vinodh.gopal at intel dot com>
12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 * instructions that are going to be introduced in the next generation
14 * of Intel processor, as of 2009. These instructions enable fast and
15 * secure data encryption and decryption, using the Advanced Encryption
16 * Standard (AES), defined by FIPS Publication number 197. The
17 * architecture introduces six instructions that offer full hardware
18 * support for AES. Four of them support high performance data
19 * encryption and decryption, and the other two instructions support
20 * the AES key expansion procedure.
21 * ====================================================================
25 * ====================================================================
26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in
37 * the documentation and/or other materials provided with the
40 * 3. All advertising materials mentioning features or use of this
41 * software must display the following acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 * endorse or promote products derived from this software without
47 * prior written permission. For written permission, please contact
48 * openssl-core@openssl.org.
50 * 5. Products derived from this software may not be called "OpenSSL"
51 * nor may "OpenSSL" appear in their names without prior written
52 * permission of the OpenSSL Project.
54 * 6. Redistributions of any form whatsoever must retain the following
56 * "This product includes software developed by the OpenSSL Project
57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 * ====================================================================
75 * ====================================================================
76 * OpenSolaris OS modifications
78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
83 * This OpenSolaris version has these major changes from the original source:
85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 * definitions for lint.
89 * 2. Formatted code, added comments, and added #includes and #defines.
91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 * calling kpreempt_disable() and kpreempt_enable().
93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 * and end of function calls (%xmm* registers are not saved and restored by
95 * during kernel thread preemption).
97 * 4. Renamed functions, reordered parameters, and changed return value
98 * to match OpenSolaris:
101 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 * const int bits, AES_KEY *key);
103 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 * const int bits, AES_KEY *key);
105 * Return values for above are non-zero on error, 0 on success.
107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 * const AES_KEY *key);
109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 * const AES_KEY *key);
111 * typedef struct aes_key_st {
112 * unsigned int rd_key[4 *(AES_MAXNR + 1)];
114 * unsigned int pad[3];
116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 * (ks32) instead of 64-bit (ks64).
118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 * int rijndael_key_setup_dec_intel(uint32_t rk[],
122 * const uint32_t cipherKey[], uint64_t keyBits);
123 * int rijndael_key_setup_enc_intel(uint32_t rk[],
124 * const uint32_t cipherKey[], uint64_t keyBits);
125 * Return values for above are 0 on error, number of rounds on success.
127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 * const uint32_t pt[4], uint32_t ct[4]);
129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 * const uint32_t pt[4], uint32_t ct[4]);
131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
137 * typedef struct aes_key {
138 * aes_ks_t encr_ks, decr_ks;
139 * long double align128;
140 * int flags, nr, type;
143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 * ct is crypto text, and MAX_AES_NR is 14.
145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
149 * ====================================================================
153 #include <sys/asm_linkage.h>
154 #include <sys/controlregs.h>
156 #include <sys/machprivregs.h>
161 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is,
162 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
163 * uses it to pass P2 to syscall.
164 * This also occurs with the STTS macro, but we don't care if
165 * P2 (%rsi) is modified just before function exit.
166 * The CLTS and STTS macros push and pop P1 (%rdi) already.
169 #define PROTECTED_CLTS \
174 #define PROTECTED_CLTS \
178 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
182 testq $CR0_TS
, tmpreg; \
184 and $
-XMM_ALIGN
, %rsp; \
185 sub $
[XMM_SIZE
* 2], %rsp; \
186 movaps
%xmm0
, 16(%rsp
); \
187 movaps
%xmm1
, (%rsp
); \
194 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
195 * otherwise set CR0_TS.
197 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
198 testq $CR0_TS
, tmpreg; \
200 movaps
(%rsp
), %xmm1; \
201 movaps
16(%rsp
), %xmm0; \
210 * If CR0_TS is not set, align stack (with push %rbp) and push
211 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
213 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
217 testq $CR0_TS
, tmpreg; \
219 and $
-XMM_ALIGN
, %rsp; \
220 sub $
[XMM_SIZE
* 7], %rsp; \
221 movaps
%xmm0
, 96(%rsp
); \
222 movaps
%xmm1
, 80(%rsp
); \
223 movaps
%xmm2
, 64(%rsp
); \
224 movaps
%xmm3
, 48(%rsp
); \
225 movaps
%xmm4
, 32(%rsp
); \
226 movaps
%xmm5
, 16(%rsp
); \
227 movaps
%xmm6
, (%rsp
); \
235 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
236 * otherwise set CR0_TS.
238 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
239 testq $CR0_TS
, tmpreg; \
241 movaps
(%rsp
), %xmm6; \
242 movaps
16(%rsp
), %xmm5; \
243 movaps
32(%rsp
), %xmm4; \
244 movaps
48(%rsp
), %xmm3; \
245 movaps
64(%rsp
), %xmm2; \
246 movaps
80(%rsp
), %xmm1; \
247 movaps
96(%rsp
), %xmm0; \
257 #define PROTECTED_CLTS
258 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
259 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
260 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
261 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
266 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
267 * _key_expansion_256a(), _key_expansion_256b()
269 * Helper functions called by rijndael_key_setup_inc_intel().
270 * Also used indirectly by rijndael_key_setup_dec_intel().
273 * %xmm0 User-provided cipher key
274 * %xmm1 Round constant
282 pshufd $
0b11111111
, %xmm1
, %xmm1
283 shufps $
0b00010000
, %xmm0
, %xmm4
285 shufps $
0b10001100
, %xmm0
, %xmm4
291 SET_SIZE
(_key_expansion_128
)
292 SET_SIZE
(_key_expansion_256a
)
296 pshufd $
0b01010101
, %xmm1
, %xmm1
297 shufps $
0b00010000
, %xmm0
, %xmm4
299 shufps $
0b10001100
, %xmm0
, %xmm4
306 pshufd $
0b11111111
, %xmm0
, %xmm3
311 shufps $
0b01000100
, %xmm0
, %xmm6
313 shufps $
0b01001110
, %xmm2
, %xmm1
314 movaps
%xmm1
, 0x10(%rcx
)
317 SET_SIZE
(_key_expansion_192a
)
321 pshufd $
0b01010101
, %xmm1
, %xmm1
322 shufps $
0b00010000
, %xmm0
, %xmm4
324 shufps $
0b10001100
, %xmm0
, %xmm4
330 pshufd $
0b11111111
, %xmm0
, %xmm3
337 SET_SIZE
(_key_expansion_192b
)
341 pshufd $
0b10101010
, %xmm1
, %xmm1
342 shufps $
0b00010000
, %xmm2
, %xmm4
344 shufps $
0b10001100
, %xmm2
, %xmm4
350 SET_SIZE
(_key_expansion_256b
)
354 * rijndael_key_setup_enc_intel()
355 * Expand the cipher key into the encryption key schedule.
357 * For kernel code, caller is responsible for ensuring kpreempt_disable()
358 * has been called. This is because %xmm registers are not saved/restored.
359 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
360 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
363 * OpenSolaris interface:
364 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
366 * Return value is 0 on error, number of rounds on success.
368 * Original Intel OpenSSL interface:
369 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
370 * const int bits, AES_KEY *key);
371 * Return value is non-zero on error, 0 on success.
374 #ifdef OPENSSL_INTERFACE
375 #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
376 #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
378 #define USERCIPHERKEY rdi /* P1, 64 bits */
379 #define KEYSIZE32 esi /* P2, 32 bits */
380 #define KEYSIZE64 rsi /* P2, 64 bits */
381 #define AESKEY rdx /* P3, 64 bits */
383 #else /* OpenSolaris Interface */
384 #define AESKEY rdi /* P1, 64 bits */
385 #define USERCIPHERKEY rsi /* P2, 64 bits */
386 #define KEYSIZE32 edx /* P3, 32 bits */
387 #define KEYSIZE64 rdx /* P3, 64 bits */
388 #endif /* OPENSSL_INTERFACE */
390 #define ROUNDS32 KEYSIZE32 /* temp */
391 #define ROUNDS64 KEYSIZE64 /* temp */
392 #define ENDAESKEY USERCIPHERKEY /* temp */
395 ENTRY_NP
(rijndael_key_setup_enc_intel
)
396 CLEAR_TS_OR_PUSH_XMM0_TO_XMM6
(%r10)
398 / NULL pointer sanity check
399 test
%USERCIPHERKEY
, %USERCIPHERKEY
400 jz
.Lenc_key_invalid_param
401 test
%AESKEY
, %AESKEY
402 jz
.Lenc_key_invalid_param
404 movups
(%USERCIPHERKEY
), %xmm0
/ user key
(first
16 bytes
)
405 movaps
%xmm0
, (%AESKEY
)
406 lea
0x10(%AESKEY
), %rcx
/ key addr
407 pxor
%xmm4
, %xmm4
/ xmm4 is assumed
0 in _key_expansion_x
412 / AES
256: 14 rounds in encryption key schedule
413 #ifdef OPENSSL_INTERFACE
415 movl
%ROUNDS32
, 240(%AESKEY
) / key.rounds
= 14
416 #endif /* OPENSSL_INTERFACE */
418 movups
0x10(%USERCIPHERKEY
), %xmm2
/ other user key
(2nd
16 bytes
)
422 aeskeygenassist $
0x1, %xmm2
, %xmm1
/ expand the key
423 call _key_expansion_256a
424 aeskeygenassist $
0x1, %xmm0
, %xmm1
425 call _key_expansion_256b
426 aeskeygenassist $
0x2, %xmm2
, %xmm1
/ expand the key
427 call _key_expansion_256a
428 aeskeygenassist $
0x2, %xmm0
, %xmm1
429 call _key_expansion_256b
430 aeskeygenassist $
0x4, %xmm2
, %xmm1
/ expand the key
431 call _key_expansion_256a
432 aeskeygenassist $
0x4, %xmm0
, %xmm1
433 call _key_expansion_256b
434 aeskeygenassist $
0x8, %xmm2
, %xmm1
/ expand the key
435 call _key_expansion_256a
436 aeskeygenassist $
0x8, %xmm0
, %xmm1
437 call _key_expansion_256b
438 aeskeygenassist $
0x10, %xmm2
, %xmm1
/ expand the key
439 call _key_expansion_256a
440 aeskeygenassist $
0x10, %xmm0
, %xmm1
441 call _key_expansion_256b
442 aeskeygenassist $
0x20, %xmm2
, %xmm1
/ expand the key
443 call _key_expansion_256a
444 aeskeygenassist $
0x20, %xmm0
, %xmm1
445 call _key_expansion_256b
446 aeskeygenassist $
0x40, %xmm2
, %xmm1
/ expand the key
447 call _key_expansion_256a
449 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
450 #ifdef OPENSSL_INTERFACE
451 xor %rax
, %rax
/ return
0 (OK
)
452 #else /* Open Solaris Interface */
453 mov $
14, %rax
/ return
# rounds = 14
462 / AES
192: 12 rounds in encryption key schedule
463 #ifdef OPENSSL_INTERFACE
465 movl
%ROUNDS32
, 240(%AESKEY
) / key.rounds
= 12
466 #endif /* OPENSSL_INTERFACE */
468 movq
0x10(%USERCIPHERKEY
), %xmm2
/ other user key
469 aeskeygenassist $
0x1, %xmm2
, %xmm1
/ expand the key
470 call _key_expansion_192a
471 aeskeygenassist $
0x2, %xmm2
, %xmm1
/ expand the key
472 call _key_expansion_192b
473 aeskeygenassist $
0x4, %xmm2
, %xmm1
/ expand the key
474 call _key_expansion_192a
475 aeskeygenassist $
0x8, %xmm2
, %xmm1
/ expand the key
476 call _key_expansion_192b
477 aeskeygenassist $
0x10, %xmm2
, %xmm1
/ expand the key
478 call _key_expansion_192a
479 aeskeygenassist $
0x20, %xmm2
, %xmm1
/ expand the key
480 call _key_expansion_192b
481 aeskeygenassist $
0x40, %xmm2
, %xmm1
/ expand the key
482 call _key_expansion_192a
483 aeskeygenassist $
0x80, %xmm2
, %xmm1
/ expand the key
484 call _key_expansion_192b
486 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
487 #ifdef OPENSSL_INTERFACE
488 xor %rax
, %rax
/ return
0 (OK
)
489 #else /* OpenSolaris Interface */
490 mov $
12, %rax
/ return
# rounds = 12
497 jnz
.Lenc_key_invalid_key_bits
499 / AES
128: 10 rounds in encryption key schedule
500 #ifdef OPENSSL_INTERFACE
502 movl
%ROUNDS32
, 240(%AESKEY
) / key.rounds
= 10
503 #endif /* OPENSSL_INTERFACE */
505 aeskeygenassist $
0x1, %xmm0
, %xmm1
/ expand the key
506 call _key_expansion_128
507 aeskeygenassist $
0x2, %xmm0
, %xmm1
/ expand the key
508 call _key_expansion_128
509 aeskeygenassist $
0x4, %xmm0
, %xmm1
/ expand the key
510 call _key_expansion_128
511 aeskeygenassist $
0x8, %xmm0
, %xmm1
/ expand the key
512 call _key_expansion_128
513 aeskeygenassist $
0x10, %xmm0
, %xmm1
/ expand the key
514 call _key_expansion_128
515 aeskeygenassist $
0x20, %xmm0
, %xmm1
/ expand the key
516 call _key_expansion_128
517 aeskeygenassist $
0x40, %xmm0
, %xmm1
/ expand the key
518 call _key_expansion_128
519 aeskeygenassist $
0x80, %xmm0
, %xmm1
/ expand the key
520 call _key_expansion_128
521 aeskeygenassist $
0x1b, %xmm0
, %xmm1
/ expand the key
522 call _key_expansion_128
523 aeskeygenassist $
0x36, %xmm0
, %xmm1
/ expand the key
524 call _key_expansion_128
526 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
527 #ifdef OPENSSL_INTERFACE
528 xor %rax
, %rax
/ return
0 (OK
)
529 #else /* OpenSolaris Interface */
530 mov $
10, %rax
/ return
# rounds = 10
534 .Lenc_key_invalid_param:
535 #ifdef OPENSSL_INTERFACE
536 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
537 mov $
-1, %rax
/ user key
or AES key pointer is NULL
541 #endif /* OPENSSL_INTERFACE */
543 .Lenc_key_invalid_key_bits:
544 SET_TS_OR_POP_XMM0_TO_XMM6
(%r10)
545 #ifdef OPENSSL_INTERFACE
546 mov $
-2, %rax
/ keysize is invalid
547 #else /* Open Solaris Interface */
548 xor %rax
, %rax
/ a key pointer is NULL
or invalid keysize
549 #endif /* OPENSSL_INTERFACE */
552 SET_SIZE
(rijndael_key_setup_enc_intel
)
556 * rijndael_key_setup_dec_intel()
557 * Expand the cipher key into the decryption key schedule.
559 * For kernel code, caller is responsible for ensuring kpreempt_disable()
560 * has been called. This is because %xmm registers are not saved/restored.
561 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
562 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
565 * OpenSolaris interface:
566 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
568 * Return value is 0 on error, number of rounds on success.
569 * P1->P2, P2->P3, P3->P1
571 * Original Intel OpenSSL interface:
572 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
573 * const int bits, AES_KEY *key);
574 * Return value is non-zero on error, 0 on success.
576 ENTRY_NP
(rijndael_key_setup_dec_intel
)
577 / Generate round keys used for encryption
578 call rijndael_key_setup_enc_intel
580 #ifdef OPENSSL_INTERFACE
581 jnz
.Ldec_key_exit / Failed if returned non-0
582 #else /* OpenSolaris Interface */
583 jz
.Ldec_key_exit / Failed if returned 0
584 #endif /* OPENSSL_INTERFACE */
586 CLEAR_TS_OR_PUSH_XMM0_XMM1
(%r10)
589 * Convert round keys used for encryption
590 * to a form usable for decryption
592 #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
593 mov
%rax
, %ROUNDS64
/ set
# rounds (10, 12, or 14)
594 / (already set for OpenSSL
)
597 lea
0x10(%AESKEY
), %rcx
/ key addr
599 add %AESKEY
, %ROUNDS64
600 mov
%ROUNDS64
, %ENDAESKEY
603 .Ldec_key_reorder_loop:
604 movaps
(%AESKEY
), %xmm0
605 movaps
(%ROUNDS64
), %xmm1
606 movaps
%xmm0
, (%ROUNDS64
)
607 movaps
%xmm1
, (%AESKEY
)
608 lea
0x10(%AESKEY
), %AESKEY
609 lea
-0x10(%ROUNDS64
), %ROUNDS64
610 cmp %AESKEY
, %ROUNDS64
611 ja
.Ldec_key_reorder_loop
616 / Convert an encryption round key to
a form usable for decryption
617 / with the
"AES Inverse Mix Columns" instruction
622 jnz
.Ldec_key_inv_loop
624 SET_TS_OR_POP_XMM0_XMM1
(%r10)
627 / OpenSolaris
: rax
= # rounds (10, 12, or 14) or 0 for error
628 / OpenSSL
: rax
= 0 for OK
, or non-zero for error
630 SET_SIZE
(rijndael_key_setup_dec_intel
)
634 * aes_encrypt_intel()
635 * Encrypt a single block (in and out can overlap).
637 * For kernel code, caller is responsible for ensuring kpreempt_disable()
638 * has been called. This is because %xmm registers are not saved/restored.
639 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
640 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
643 * Temporary register usage:
647 * Original OpenSolaris Interface:
648 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
649 * const uint32_t pt[4], uint32_t ct[4])
651 * Original Intel OpenSSL Interface:
652 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
653 * const AES_KEY *key)
656 #ifdef OPENSSL_INTERFACE
657 #define aes_encrypt_intel intel_AES_encrypt
658 #define aes_decrypt_intel intel_AES_decrypt
660 #define INP rdi /* P1, 64 bits */
661 #define OUTP rsi /* P2, 64 bits */
662 #define KEYP rdx /* P3, 64 bits */
664 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
665 #define NROUNDS32 ecx /* temporary, 32 bits */
666 #define NROUNDS cl /* temporary, 8 bits */
668 #else /* OpenSolaris Interface */
669 #define KEYP rdi /* P1, 64 bits */
670 #define NROUNDS esi /* P2, 32 bits */
671 #define INP rdx /* P3, 64 bits */
672 #define OUTP rcx /* P4, 64 bits */
673 #endif /* OPENSSL_INTERFACE */
675 #define STATE xmm0 /* temporary, 128 bits */
676 #define KEY xmm1 /* temporary, 128 bits */
678 ENTRY_NP
(aes_encrypt_intel
)
679 CLEAR_TS_OR_PUSH_XMM0_XMM1
(%r10)
681 movups
(%INP
), %STATE
/ input
682 movaps
(%KEYP
), %KEY
/ key
683 #ifdef OPENSSL_INTERFACE
684 mov
240(%KEYP
), %NROUNDS32
/ round count
685 #else /* OpenSolaris Interface */
686 /* Round count is already present as P2 in %rsi/%esi */
687 #endif /* OPENSSL_INTERFACE */
689 pxor
%KEY
, %STATE
/ round
0
690 lea
0x30(%KEYP
), %KEYP
693 lea
0x20(%KEYP
), %KEYP
697 lea
0x20(%KEYP
), %KEYP
698 movaps
-0x60(%KEYP
), %KEY
700 movaps
-0x50(%KEYP
), %KEY
706 movaps
-0x40(%KEYP
), %KEY
708 movaps
-0x30(%KEYP
), %KEY
713 / AES
128, 192, and 256
714 movaps
-0x20(%KEYP
), %KEY
716 movaps
-0x10(%KEYP
), %KEY
720 movaps
0x10(%KEYP
), %KEY
722 movaps
0x20(%KEYP
), %KEY
724 movaps
0x30(%KEYP
), %KEY
726 movaps
0x40(%KEYP
), %KEY
728 movaps
0x50(%KEYP
), %KEY
730 movaps
0x60(%KEYP
), %KEY
732 movaps
0x70(%KEYP
), %KEY
733 aesenclast
%KEY
, %STATE
/ last round
734 movups
%STATE
, (%OUTP
) / output
736 SET_TS_OR_POP_XMM0_XMM1
(%r10)
738 SET_SIZE
(aes_encrypt_intel
)
742 * aes_decrypt_intel()
743 * Decrypt a single block (in and out can overlap).
745 * For kernel code, caller is responsible for ensuring kpreempt_disable()
746 * has been called. This is because %xmm registers are not saved/restored.
747 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
748 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
751 * Temporary register usage:
755 * Original OpenSolaris Interface:
756 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
757 * const uint32_t pt[4], uint32_t ct[4])/
759 * Original Intel OpenSSL Interface:
760 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
761 * const AES_KEY *key);
763 ENTRY_NP
(aes_decrypt_intel
)
764 CLEAR_TS_OR_PUSH_XMM0_XMM1
(%r10)
766 movups
(%INP
), %STATE
/ input
767 movaps
(%KEYP
), %KEY
/ key
768 #ifdef OPENSSL_INTERFACE
769 mov
240(%KEYP
), %NROUNDS32
/ round count
770 #else /* OpenSolaris Interface */
771 /* Round count is already present as P2 in %rsi/%esi */
772 #endif /* OPENSSL_INTERFACE */
774 pxor
%KEY
, %STATE
/ round
0
775 lea
0x30(%KEYP
), %KEYP
778 lea
0x20(%KEYP
), %KEYP
782 lea
0x20(%KEYP
), %KEYP
783 movaps
-0x60(%KEYP
), %KEY
785 movaps
-0x50(%KEYP
), %KEY
791 movaps
-0x40(%KEYP
), %KEY
793 movaps
-0x30(%KEYP
), %KEY
798 / AES
128, 192, and 256
799 movaps
-0x20(%KEYP
), %KEY
801 movaps
-0x10(%KEYP
), %KEY
805 movaps
0x10(%KEYP
), %KEY
807 movaps
0x20(%KEYP
), %KEY
809 movaps
0x30(%KEYP
), %KEY
811 movaps
0x40(%KEYP
), %KEY
813 movaps
0x50(%KEYP
), %KEY
815 movaps
0x60(%KEYP
), %KEY
817 movaps
0x70(%KEYP
), %KEY
818 aesdeclast
%KEY
, %STATE
/ last round
819 movups
%STATE
, (%OUTP
) / output
821 SET_TS_OR_POP_XMM0_XMM1
(%r10)
823 SET_SIZE
(aes_decrypt_intel
)