8322 nl: misleading-indentation
[unleashed/tickless.git] / usr / src / common / crypto / aes / amd64 / aes_intel.s
blob839706b5a37cf090090b9cb21798ceb46be1a96b
1 /*
2 * ====================================================================
3 * Written by Intel Corporation for the OpenSSL project to add support
4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 * in source and binary forms are granted according to the OpenSSL
6 * license.
8 * Author: Huang Ying <ying.huang at intel dot com>
9 * Vinodh Gopal <vinodh.gopal at intel dot com>
10 * Kahraman Akdemir
12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 * instructions that are going to be introduced in the next generation
14 * of Intel processor, as of 2009. These instructions enable fast and
15 * secure data encryption and decryption, using the Advanced Encryption
16 * Standard (AES), defined by FIPS Publication number 197. The
17 * architecture introduces six instructions that offer full hardware
18 * support for AES. Four of them support high performance data
19 * encryption and decryption, and the other two instructions support
20 * the AES key expansion procedure.
21 * ====================================================================
25 * ====================================================================
26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in
37 * the documentation and/or other materials provided with the
38 * distribution.
40 * 3. All advertising materials mentioning features or use of this
41 * software must display the following acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 * endorse or promote products derived from this software without
47 * prior written permission. For written permission, please contact
48 * openssl-core@openssl.org.
50 * 5. Products derived from this software may not be called "OpenSSL"
51 * nor may "OpenSSL" appear in their names without prior written
52 * permission of the OpenSSL Project.
54 * 6. Redistributions of any form whatsoever must retain the following
55 * acknowledgment:
56 * "This product includes software developed by the OpenSSL Project
57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 * ====================================================================
75 * ====================================================================
76 * OpenSolaris OS modifications
78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
83 * This OpenSolaris version has these major changes from the original source:
85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 * definitions for lint.
89 * 2. Formatted code, added comments, and added #includes and #defines.
91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 * calling kpreempt_disable() and kpreempt_enable().
93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 * and end of function calls (%xmm* registers are not saved and restored by
95 * during kernel thread preemption).
97 * 4. Renamed functions, reordered parameters, and changed return value
98 * to match OpenSolaris:
100 * OpenSSL interface:
101 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 * const int bits, AES_KEY *key);
103 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 * const int bits, AES_KEY *key);
105 * Return values for above are non-zero on error, 0 on success.
107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 * const AES_KEY *key);
109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 * const AES_KEY *key);
111 * typedef struct aes_key_st {
112 * unsigned int rd_key[4 *(AES_MAXNR + 1)];
113 * int rounds;
114 * unsigned int pad[3];
115 * } AES_KEY;
116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 * (ks32) instead of 64-bit (ks64).
118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 * int rijndael_key_setup_dec_intel(uint32_t rk[],
122 * const uint32_t cipherKey[], uint64_t keyBits);
123 * int rijndael_key_setup_enc_intel(uint32_t rk[],
124 * const uint32_t cipherKey[], uint64_t keyBits);
125 * Return values for above are 0 on error, number of rounds on success.
127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 * const uint32_t pt[4], uint32_t ct[4]);
129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 * const uint32_t pt[4], uint32_t ct[4]);
131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
134 * typedef union {
135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
136 * } aes_ks_t;
137 * typedef struct aes_key {
138 * aes_ks_t encr_ks, decr_ks;
139 * long double align128;
140 * int flags, nr, type;
141 * } aes_key_t;
143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 * ct is crypto text, and MAX_AES_NR is 14.
145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
149 * ====================================================================
152 #if defined(lint) || defined(__lint)
154 #include <sys/types.h>
156 /* ARGSUSED */
157 void
158 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
159 uint32_t ct[4]) {
161 /* ARGSUSED */
162 void
163 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
164 uint32_t pt[4]) {
166 /* ARGSUSED */
168 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
169 uint64_t keyBits) {
170 return (0);
172 /* ARGSUSED */
174 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
175 uint64_t keyBits) {
176 return (0);
180 #else /* lint */
182 #include <sys/asm_linkage.h>
183 #include <sys/controlregs.h>
184 #ifdef _KERNEL
185 #include <sys/machprivregs.h>
186 #endif
188 #ifdef _KERNEL
190 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is,
191 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it
192 * uses it to pass P2 to syscall.
193 * This also occurs with the STTS macro, but we don't care if
194 * P2 (%rsi) is modified just before function exit.
195 * The CLTS and STTS macros push and pop P1 (%rdi) already.
197 #ifdef __xpv
198 #define PROTECTED_CLTS \
199 push %rsi; \
200 CLTS; \
201 pop %rsi
202 #else
203 #define PROTECTED_CLTS \
204 CLTS
205 #endif /* __xpv */
207 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \
208 push %rbp; \
209 mov %rsp, %rbp; \
210 movq %cr0, tmpreg; \
211 testq $CR0_TS, tmpreg; \
212 jnz 1f; \
213 and $-XMM_ALIGN, %rsp; \
214 sub $[XMM_SIZE * 2], %rsp; \
215 movaps %xmm0, 16(%rsp); \
216 movaps %xmm1, (%rsp); \
217 jmp 2f; \
218 1: \
219 PROTECTED_CLTS; \
223 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack,
224 * otherwise set CR0_TS.
226 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \
227 testq $CR0_TS, tmpreg; \
228 jnz 1f; \
229 movaps (%rsp), %xmm1; \
230 movaps 16(%rsp), %xmm0; \
231 jmp 2f; \
232 1: \
233 STTS(tmpreg); \
234 2: \
235 mov %rbp, %rsp; \
236 pop %rbp
239 * If CR0_TS is not set, align stack (with push %rbp) and push
240 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS
242 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \
243 push %rbp; \
244 mov %rsp, %rbp; \
245 movq %cr0, tmpreg; \
246 testq $CR0_TS, tmpreg; \
247 jnz 1f; \
248 and $-XMM_ALIGN, %rsp; \
249 sub $[XMM_SIZE * 7], %rsp; \
250 movaps %xmm0, 96(%rsp); \
251 movaps %xmm1, 80(%rsp); \
252 movaps %xmm2, 64(%rsp); \
253 movaps %xmm3, 48(%rsp); \
254 movaps %xmm4, 32(%rsp); \
255 movaps %xmm5, 16(%rsp); \
256 movaps %xmm6, (%rsp); \
257 jmp 2f; \
258 1: \
259 PROTECTED_CLTS; \
264 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack,
265 * otherwise set CR0_TS.
267 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \
268 testq $CR0_TS, tmpreg; \
269 jnz 1f; \
270 movaps (%rsp), %xmm6; \
271 movaps 16(%rsp), %xmm5; \
272 movaps 32(%rsp), %xmm4; \
273 movaps 48(%rsp), %xmm3; \
274 movaps 64(%rsp), %xmm2; \
275 movaps 80(%rsp), %xmm1; \
276 movaps 96(%rsp), %xmm0; \
277 jmp 2f; \
278 1: \
279 STTS(tmpreg); \
280 2: \
281 mov %rbp, %rsp; \
282 pop %rbp
285 #else
286 #define PROTECTED_CLTS
287 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg)
288 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg)
289 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg)
290 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg)
291 #endif /* _KERNEL */
295 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
296 * _key_expansion_256a(), _key_expansion_256b()
298 * Helper functions called by rijndael_key_setup_inc_intel().
299 * Also used indirectly by rijndael_key_setup_dec_intel().
301 * Input:
302 * %xmm0 User-provided cipher key
303 * %xmm1 Round constant
304 * Output:
305 * (%rcx) AES key
308 .align 16
309 _key_expansion_128:
310 _key_expansion_256a:
311 pshufd $0b11111111, %xmm1, %xmm1
312 shufps $0b00010000, %xmm0, %xmm4
313 pxor %xmm4, %xmm0
314 shufps $0b10001100, %xmm0, %xmm4
315 pxor %xmm4, %xmm0
316 pxor %xmm1, %xmm0
317 movaps %xmm0, (%rcx)
318 add $0x10, %rcx
320 SET_SIZE(_key_expansion_128)
321 SET_SIZE(_key_expansion_256a)
323 .align 16
324 _key_expansion_192a:
325 pshufd $0b01010101, %xmm1, %xmm1
326 shufps $0b00010000, %xmm0, %xmm4
327 pxor %xmm4, %xmm0
328 shufps $0b10001100, %xmm0, %xmm4
329 pxor %xmm4, %xmm0
330 pxor %xmm1, %xmm0
332 movaps %xmm2, %xmm5
333 movaps %xmm2, %xmm6
334 pslldq $4, %xmm5
335 pshufd $0b11111111, %xmm0, %xmm3
336 pxor %xmm3, %xmm2
337 pxor %xmm5, %xmm2
339 movaps %xmm0, %xmm1
340 shufps $0b01000100, %xmm0, %xmm6
341 movaps %xmm6, (%rcx)
342 shufps $0b01001110, %xmm2, %xmm1
343 movaps %xmm1, 0x10(%rcx)
344 add $0x20, %rcx
346 SET_SIZE(_key_expansion_192a)
348 .align 16
349 _key_expansion_192b:
350 pshufd $0b01010101, %xmm1, %xmm1
351 shufps $0b00010000, %xmm0, %xmm4
352 pxor %xmm4, %xmm0
353 shufps $0b10001100, %xmm0, %xmm4
354 pxor %xmm4, %xmm0
355 pxor %xmm1, %xmm0
357 movaps %xmm2, %xmm5
358 pslldq $4, %xmm5
359 pshufd $0b11111111, %xmm0, %xmm3
360 pxor %xmm3, %xmm2
361 pxor %xmm5, %xmm2
363 movaps %xmm0, (%rcx)
364 add $0x10, %rcx
366 SET_SIZE(_key_expansion_192b)
368 .align 16
369 _key_expansion_256b:
370 pshufd $0b10101010, %xmm1, %xmm1
371 shufps $0b00010000, %xmm2, %xmm4
372 pxor %xmm4, %xmm2
373 shufps $0b10001100, %xmm2, %xmm4
374 pxor %xmm4, %xmm2
375 pxor %xmm1, %xmm2
376 movaps %xmm2, (%rcx)
377 add $0x10, %rcx
379 SET_SIZE(_key_expansion_256b)
383 * rijndael_key_setup_enc_intel()
384 * Expand the cipher key into the encryption key schedule.
386 * For kernel code, caller is responsible for ensuring kpreempt_disable()
387 * has been called. This is because %xmm registers are not saved/restored.
388 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
389 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
390 * on the stack.
392 * OpenSolaris interface:
393 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
394 * uint64_t keyBits);
395 * Return value is 0 on error, number of rounds on success.
397 * Original Intel OpenSSL interface:
398 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
399 * const int bits, AES_KEY *key);
400 * Return value is non-zero on error, 0 on success.
403 #ifdef OPENSSL_INTERFACE
404 #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
405 #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
407 #define USERCIPHERKEY rdi /* P1, 64 bits */
408 #define KEYSIZE32 esi /* P2, 32 bits */
409 #define KEYSIZE64 rsi /* P2, 64 bits */
410 #define AESKEY rdx /* P3, 64 bits */
412 #else /* OpenSolaris Interface */
413 #define AESKEY rdi /* P1, 64 bits */
414 #define USERCIPHERKEY rsi /* P2, 64 bits */
415 #define KEYSIZE32 edx /* P3, 32 bits */
416 #define KEYSIZE64 rdx /* P3, 64 bits */
417 #endif /* OPENSSL_INTERFACE */
419 #define ROUNDS32 KEYSIZE32 /* temp */
420 #define ROUNDS64 KEYSIZE64 /* temp */
421 #define ENDAESKEY USERCIPHERKEY /* temp */
424 ENTRY_NP(rijndael_key_setup_enc_intel)
425 CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10)
427 / NULL pointer sanity check
428 test %USERCIPHERKEY, %USERCIPHERKEY
429 jz .Lenc_key_invalid_param
430 test %AESKEY, %AESKEY
431 jz .Lenc_key_invalid_param
433 movups (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes)
434 movaps %xmm0, (%AESKEY)
435 lea 0x10(%AESKEY), %rcx / key addr
436 pxor %xmm4, %xmm4 / xmm4 is assumed 0 in _key_expansion_x
438 cmp $256, %KEYSIZE32
439 jnz .Lenc_key192
441 / AES 256: 14 rounds in encryption key schedule
442 #ifdef OPENSSL_INTERFACE
443 mov $14, %ROUNDS32
444 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 14
445 #endif /* OPENSSL_INTERFACE */
447 movups 0x10(%USERCIPHERKEY), %xmm2 / other user key (2nd 16 bytes)
448 movaps %xmm2, (%rcx)
449 add $0x10, %rcx
451 aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key
452 call _key_expansion_256a
453 aeskeygenassist $0x1, %xmm0, %xmm1
454 call _key_expansion_256b
455 aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key
456 call _key_expansion_256a
457 aeskeygenassist $0x2, %xmm0, %xmm1
458 call _key_expansion_256b
459 aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key
460 call _key_expansion_256a
461 aeskeygenassist $0x4, %xmm0, %xmm1
462 call _key_expansion_256b
463 aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key
464 call _key_expansion_256a
465 aeskeygenassist $0x8, %xmm0, %xmm1
466 call _key_expansion_256b
467 aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key
468 call _key_expansion_256a
469 aeskeygenassist $0x10, %xmm0, %xmm1
470 call _key_expansion_256b
471 aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key
472 call _key_expansion_256a
473 aeskeygenassist $0x20, %xmm0, %xmm1
474 call _key_expansion_256b
475 aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key
476 call _key_expansion_256a
478 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
479 #ifdef OPENSSL_INTERFACE
480 xor %rax, %rax / return 0 (OK)
481 #else /* Open Solaris Interface */
482 mov $14, %rax / return # rounds = 14
483 #endif
486 .align 4
487 .Lenc_key192:
488 cmp $192, %KEYSIZE32
489 jnz .Lenc_key128
491 / AES 192: 12 rounds in encryption key schedule
492 #ifdef OPENSSL_INTERFACE
493 mov $12, %ROUNDS32
494 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 12
495 #endif /* OPENSSL_INTERFACE */
497 movq 0x10(%USERCIPHERKEY), %xmm2 / other user key
498 aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key
499 call _key_expansion_192a
500 aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key
501 call _key_expansion_192b
502 aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key
503 call _key_expansion_192a
504 aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key
505 call _key_expansion_192b
506 aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key
507 call _key_expansion_192a
508 aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key
509 call _key_expansion_192b
510 aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key
511 call _key_expansion_192a
512 aeskeygenassist $0x80, %xmm2, %xmm1 / expand the key
513 call _key_expansion_192b
515 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
516 #ifdef OPENSSL_INTERFACE
517 xor %rax, %rax / return 0 (OK)
518 #else /* OpenSolaris Interface */
519 mov $12, %rax / return # rounds = 12
520 #endif
523 .align 4
524 .Lenc_key128:
525 cmp $128, %KEYSIZE32
526 jnz .Lenc_key_invalid_key_bits
528 / AES 128: 10 rounds in encryption key schedule
529 #ifdef OPENSSL_INTERFACE
530 mov $10, %ROUNDS32
531 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 10
532 #endif /* OPENSSL_INTERFACE */
534 aeskeygenassist $0x1, %xmm0, %xmm1 / expand the key
535 call _key_expansion_128
536 aeskeygenassist $0x2, %xmm0, %xmm1 / expand the key
537 call _key_expansion_128
538 aeskeygenassist $0x4, %xmm0, %xmm1 / expand the key
539 call _key_expansion_128
540 aeskeygenassist $0x8, %xmm0, %xmm1 / expand the key
541 call _key_expansion_128
542 aeskeygenassist $0x10, %xmm0, %xmm1 / expand the key
543 call _key_expansion_128
544 aeskeygenassist $0x20, %xmm0, %xmm1 / expand the key
545 call _key_expansion_128
546 aeskeygenassist $0x40, %xmm0, %xmm1 / expand the key
547 call _key_expansion_128
548 aeskeygenassist $0x80, %xmm0, %xmm1 / expand the key
549 call _key_expansion_128
550 aeskeygenassist $0x1b, %xmm0, %xmm1 / expand the key
551 call _key_expansion_128
552 aeskeygenassist $0x36, %xmm0, %xmm1 / expand the key
553 call _key_expansion_128
555 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
556 #ifdef OPENSSL_INTERFACE
557 xor %rax, %rax / return 0 (OK)
558 #else /* OpenSolaris Interface */
559 mov $10, %rax / return # rounds = 10
560 #endif
563 .Lenc_key_invalid_param:
564 #ifdef OPENSSL_INTERFACE
565 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
566 mov $-1, %rax / user key or AES key pointer is NULL
568 #else
569 /* FALLTHROUGH */
570 #endif /* OPENSSL_INTERFACE */
572 .Lenc_key_invalid_key_bits:
573 SET_TS_OR_POP_XMM0_TO_XMM6(%r10)
574 #ifdef OPENSSL_INTERFACE
575 mov $-2, %rax / keysize is invalid
576 #else /* Open Solaris Interface */
577 xor %rax, %rax / a key pointer is NULL or invalid keysize
578 #endif /* OPENSSL_INTERFACE */
581 SET_SIZE(rijndael_key_setup_enc_intel)
585 * rijndael_key_setup_dec_intel()
586 * Expand the cipher key into the decryption key schedule.
588 * For kernel code, caller is responsible for ensuring kpreempt_disable()
589 * has been called. This is because %xmm registers are not saved/restored.
590 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
591 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
592 * on the stack.
594 * OpenSolaris interface:
595 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
596 * uint64_t keyBits);
597 * Return value is 0 on error, number of rounds on success.
598 * P1->P2, P2->P3, P3->P1
600 * Original Intel OpenSSL interface:
601 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
602 * const int bits, AES_KEY *key);
603 * Return value is non-zero on error, 0 on success.
605 ENTRY_NP(rijndael_key_setup_dec_intel)
606 / Generate round keys used for encryption
607 call rijndael_key_setup_enc_intel
608 test %rax, %rax
609 #ifdef OPENSSL_INTERFACE
610 jnz .Ldec_key_exit / Failed if returned non-0
611 #else /* OpenSolaris Interface */
612 jz .Ldec_key_exit / Failed if returned 0
613 #endif /* OPENSSL_INTERFACE */
615 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
618 * Convert round keys used for encryption
619 * to a form usable for decryption
621 #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
622 mov %rax, %ROUNDS64 / set # rounds (10, 12, or 14)
623 / (already set for OpenSSL)
624 #endif
626 lea 0x10(%AESKEY), %rcx / key addr
627 shl $4, %ROUNDS32
628 add %AESKEY, %ROUNDS64
629 mov %ROUNDS64, %ENDAESKEY
631 .align 4
632 .Ldec_key_reorder_loop:
633 movaps (%AESKEY), %xmm0
634 movaps (%ROUNDS64), %xmm1
635 movaps %xmm0, (%ROUNDS64)
636 movaps %xmm1, (%AESKEY)
637 lea 0x10(%AESKEY), %AESKEY
638 lea -0x10(%ROUNDS64), %ROUNDS64
639 cmp %AESKEY, %ROUNDS64
640 ja .Ldec_key_reorder_loop
642 .align 4
643 .Ldec_key_inv_loop:
644 movaps (%rcx), %xmm0
645 / Convert an encryption round key to a form usable for decryption
646 / with the "AES Inverse Mix Columns" instruction
647 aesimc %xmm0, %xmm1
648 movaps %xmm1, (%rcx)
649 lea 0x10(%rcx), %rcx
650 cmp %ENDAESKEY, %rcx
651 jnz .Ldec_key_inv_loop
653 SET_TS_OR_POP_XMM0_XMM1(%r10)
655 .Ldec_key_exit:
656 / OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
657 / OpenSSL: rax = 0 for OK, or non-zero for error
659 SET_SIZE(rijndael_key_setup_dec_intel)
663 * aes_encrypt_intel()
664 * Encrypt a single block (in and out can overlap).
666 * For kernel code, caller is responsible for ensuring kpreempt_disable()
667 * has been called. This is because %xmm registers are not saved/restored.
668 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
669 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
670 * on the stack.
672 * Temporary register usage:
673 * %xmm0 State
674 * %xmm1 Key
676 * Original OpenSolaris Interface:
677 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
678 * const uint32_t pt[4], uint32_t ct[4])
680 * Original Intel OpenSSL Interface:
681 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
682 * const AES_KEY *key)
685 #ifdef OPENSSL_INTERFACE
686 #define aes_encrypt_intel intel_AES_encrypt
687 #define aes_decrypt_intel intel_AES_decrypt
689 #define INP rdi /* P1, 64 bits */
690 #define OUTP rsi /* P2, 64 bits */
691 #define KEYP rdx /* P3, 64 bits */
693 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
694 #define NROUNDS32 ecx /* temporary, 32 bits */
695 #define NROUNDS cl /* temporary, 8 bits */
697 #else /* OpenSolaris Interface */
698 #define KEYP rdi /* P1, 64 bits */
699 #define NROUNDS esi /* P2, 32 bits */
700 #define INP rdx /* P3, 64 bits */
701 #define OUTP rcx /* P4, 64 bits */
702 #endif /* OPENSSL_INTERFACE */
704 #define STATE xmm0 /* temporary, 128 bits */
705 #define KEY xmm1 /* temporary, 128 bits */
707 ENTRY_NP(aes_encrypt_intel)
708 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
710 movups (%INP), %STATE / input
711 movaps (%KEYP), %KEY / key
712 #ifdef OPENSSL_INTERFACE
713 mov 240(%KEYP), %NROUNDS32 / round count
714 #else /* OpenSolaris Interface */
715 /* Round count is already present as P2 in %rsi/%esi */
716 #endif /* OPENSSL_INTERFACE */
718 pxor %KEY, %STATE / round 0
719 lea 0x30(%KEYP), %KEYP
720 cmp $12, %NROUNDS
721 jb .Lenc128
722 lea 0x20(%KEYP), %KEYP
723 je .Lenc192
725 / AES 256
726 lea 0x20(%KEYP), %KEYP
727 movaps -0x60(%KEYP), %KEY
728 aesenc %KEY, %STATE
729 movaps -0x50(%KEYP), %KEY
730 aesenc %KEY, %STATE
732 .align 4
733 .Lenc192:
734 / AES 192 and 256
735 movaps -0x40(%KEYP), %KEY
736 aesenc %KEY, %STATE
737 movaps -0x30(%KEYP), %KEY
738 aesenc %KEY, %STATE
740 .align 4
741 .Lenc128:
742 / AES 128, 192, and 256
743 movaps -0x20(%KEYP), %KEY
744 aesenc %KEY, %STATE
745 movaps -0x10(%KEYP), %KEY
746 aesenc %KEY, %STATE
747 movaps (%KEYP), %KEY
748 aesenc %KEY, %STATE
749 movaps 0x10(%KEYP), %KEY
750 aesenc %KEY, %STATE
751 movaps 0x20(%KEYP), %KEY
752 aesenc %KEY, %STATE
753 movaps 0x30(%KEYP), %KEY
754 aesenc %KEY, %STATE
755 movaps 0x40(%KEYP), %KEY
756 aesenc %KEY, %STATE
757 movaps 0x50(%KEYP), %KEY
758 aesenc %KEY, %STATE
759 movaps 0x60(%KEYP), %KEY
760 aesenc %KEY, %STATE
761 movaps 0x70(%KEYP), %KEY
762 aesenclast %KEY, %STATE / last round
763 movups %STATE, (%OUTP) / output
765 SET_TS_OR_POP_XMM0_XMM1(%r10)
767 SET_SIZE(aes_encrypt_intel)
771 * aes_decrypt_intel()
772 * Decrypt a single block (in and out can overlap).
774 * For kernel code, caller is responsible for ensuring kpreempt_disable()
775 * has been called. This is because %xmm registers are not saved/restored.
776 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
777 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
778 * on the stack.
780 * Temporary register usage:
781 * %xmm0 State
782 * %xmm1 Key
784 * Original OpenSolaris Interface:
785 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
786 * const uint32_t pt[4], uint32_t ct[4])/
788 * Original Intel OpenSSL Interface:
789 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
790 * const AES_KEY *key);
792 ENTRY_NP(aes_decrypt_intel)
793 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10)
795 movups (%INP), %STATE / input
796 movaps (%KEYP), %KEY / key
797 #ifdef OPENSSL_INTERFACE
798 mov 240(%KEYP), %NROUNDS32 / round count
799 #else /* OpenSolaris Interface */
800 /* Round count is already present as P2 in %rsi/%esi */
801 #endif /* OPENSSL_INTERFACE */
803 pxor %KEY, %STATE / round 0
804 lea 0x30(%KEYP), %KEYP
805 cmp $12, %NROUNDS
806 jb .Ldec128
807 lea 0x20(%KEYP), %KEYP
808 je .Ldec192
810 / AES 256
811 lea 0x20(%KEYP), %KEYP
812 movaps -0x60(%KEYP), %KEY
813 aesdec %KEY, %STATE
814 movaps -0x50(%KEYP), %KEY
815 aesdec %KEY, %STATE
817 .align 4
818 .Ldec192:
819 / AES 192 and 256
820 movaps -0x40(%KEYP), %KEY
821 aesdec %KEY, %STATE
822 movaps -0x30(%KEYP), %KEY
823 aesdec %KEY, %STATE
825 .align 4
826 .Ldec128:
827 / AES 128, 192, and 256
828 movaps -0x20(%KEYP), %KEY
829 aesdec %KEY, %STATE
830 movaps -0x10(%KEYP), %KEY
831 aesdec %KEY, %STATE
832 movaps (%KEYP), %KEY
833 aesdec %KEY, %STATE
834 movaps 0x10(%KEYP), %KEY
835 aesdec %KEY, %STATE
836 movaps 0x20(%KEYP), %KEY
837 aesdec %KEY, %STATE
838 movaps 0x30(%KEYP), %KEY
839 aesdec %KEY, %STATE
840 movaps 0x40(%KEYP), %KEY
841 aesdec %KEY, %STATE
842 movaps 0x50(%KEYP), %KEY
843 aesdec %KEY, %STATE
844 movaps 0x60(%KEYP), %KEY
845 aesdec %KEY, %STATE
846 movaps 0x70(%KEYP), %KEY
847 aesdeclast %KEY, %STATE / last round
848 movups %STATE, (%OUTP) / output
850 SET_TS_OR_POP_XMM0_XMM1(%r10)
852 SET_SIZE(aes_decrypt_intel)
854 #endif /* lint || __lint */