4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 * Unified version for both position independent and non position independent
28 * for both v8plus and v9.
31 * cc -c -xarch=v8plus aes_crypt_asm.S or
32 * cc -c -arch=v9 aes_crypt_asm.S
33 * for kernel use (no -KPIC).
37 * cc -c -xarch=v8plus -KPIC -DPIC aes_crypt_asm.S or
38 * cc -c -arch=v9 -KPIC -DPIC aes_crypt_asm.S
42 * The tables were generated by a C program, compiled into the C version
43 * of this function, from which a .s was generated by the C compiler and
44 * that .s was used as a starting point for this one, in particular for
45 * the data definitions. It is important, though that the tables and
46 * the code both remain in the text section and in this order, otherwise,
47 * at least on UltraSparc-II processors, collisions in the E-cache are
48 * highly probable between the code and the data it is using which can
49 * result in up to 40% performance loss.
51 * For a description of the AES algorithm (a.k.a. Rijndael), see NIST
52 * publication FIPS PUB 197.
56 #include <sys/stack.h>
58 #if defined(lint) || defined(__lint)
60 #include <sys/types.h>
62 void aes_encrypt_impl
(const uint32_t rk
[], int Nr
, const uint32_t pt
[4],
66 void aes_decrypt_impl
(const uint32_t rk
[], int Nr
, const uint32_t ct
[4],
70 #else /* lint || __lint */
72 .section ".text",#alloc,#execinstr
73 .file "aes_crypt_asm.s"
75 .register %g2,#scratch
76 .register %g3,#scratch
78 .section ".text",#alloc
2419 .section ".data",#alloc,#write
2425 ! for v8plus
, the addresses are
64-bit long so we should use
.xword
2440 #else /* __sparcv9 */
2443 ! for v8plus
, the addresses are
32-bit long
, we use filler
0's so that
2444 ! we can use ldx to load the addresses just like in the v9 version
2467 #endif /* __sparcv9 */
2469 .type aes_const,#object
2472 .section ".text",#alloc,#execinstr
2474 ! SUBROUTINE aes_encrypt_impl
2476 ! void aes_encrypt_impl(const uint32_t rk[], int Nr, const uint32_t pt[4],
2479 ! OFFSET SOURCE LINE LABEL INSTRUCTION
2481 .global aes_encrypt_impl
2486 save %sp, -SA(MINFRAME), %sp
2489 sethi %hi(aes_const), %l0
2493 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.L1-.)), %l1
2494 or %l0, %lo(aes_const), %l0
2497 or %l1, %lo(_GLOBAL_OFFSET_TABLE_-(.L1-.)), %l1
2498 sethi %hi(0xff000000), %i5
2501 sethi %hi(0x7fff8), %l6
2504 or %l6, %lo(0x7ff8), %l6
2509 sethi %hh(aes_const), %l0
2510 sethi %lm(aes_const), %l7
2512 or %l0, %hm(aes_const), %l0
2513 or %l7, %lo(aes_const), %l7
2516 sethi %hi(0xff000000), %i5
2519 sethi %hi(0x7fff8), %l6
2523 or %l6, %lo(0x7ff8), %l6
2528 #else /* __sparcv9 */
2530 save %sp, -SA(MINFRAME), %sp
2531 sethi %hi(aes_const), %l0
2537 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.L1-.)), %l1
2538 or %l0, %lo(aes_const), %l0
2541 or %l1, %lo(_GLOBAL_OFFSET_TABLE_-(.L1-.)), %l1
2542 sethi %hi(0xff000000), %i5
2545 sethi %hi(0x7fff8), %l6
2548 or %l6, %lo(0x7ff8), %l6
2553 or %l0, %lo(aes_const), %l7
2554 sethi %hi(0xff000000), %i5
2557 sethi %hi(0x7fff8), %l6
2560 or %l6, %lo(0x7ff8), %l6
2565 #endif /* __sparcv9 */
2633 ldx [%l0 + %l4], %o0
2637 ldx [%l1 + %l5], %o5
2641 ldx [%l2 + %l6], %g5
2645 ldx [%l3 + %l7], %o0
2653 ldx [%l0 + %l4], %o0
2657 ldx [%l1 + %l5], %o5
2660 ldx [%l2 + %l6], %g5
2664 ldx [%l3 + %l7], %o0
2672 ldx [%l0 + %l4], %o0
2676 ldx [%l1 + %l5], %o5
2679 ldx [%l2 + %l6], %g5
2683 ldx [%l3 + %l7], %o0
2691 ldx [%l0 + %l4], %o0
2695 ldx [%l1 + %l5], %o5
2698 ldx [%l2 + %l6], %g5
2703 ldx [%l3 + %l7], %o0
2712 ldx [%l0 + %l4], %o0
2716 ldx [%l1 + %l5], %o5
2720 ldx [%l2 + %l6], %g5
2724 ldx [%l3 + %l7], %o0
2732 ldx [%l0 + %l4], %o0
2736 ldx [%l1 + %l5], %o5
2739 ldx [%l2 + %l6], %g5
2743 ldx [%l3 + %l7], %o0
2751 ldx [%l0 + %l4], %o0
2755 ldx [%l1 + %l5], %o5
2758 ldx [%l2 + %l6], %g5
2762 ldx [%l3 + %l7], %o0
2770 ldx [%l0 + %l4], %o0
2774 ldx [%l1 + %l5], %o5
2777 ldx [%l2 + %l6], %g5
2781 ldx [%l3 + %l7], %o0
2790 ldx [%l0 + %l4], %o0
2794 ldx [%l1 + %l5], %o5
2798 ldx [%l2 + %l6], %g5
2802 ldx [%l3 + %l7], %o0
2810 ldx [%l0 + %l4], %o0
2814 ldx [%l1 + %l5], %o5
2817 ldx [%l2 + %l6], %g5
2821 ldx [%l3 + %l7], %o0
2829 ldx [%l0 + %l4], %o0
2833 ldx [%l1 + %l5], %o5
2836 ldx [%l2 + %l6], %g5
2840 ldx [%l3 + %l7], %o0
2848 ldx [%l0 + %l4], %o0
2852 ldx [%l1 + %l5], %o5
2855 ldx [%l2 + %l6], %g5
2860 ldx [%l3 + %l7], %o0
2869 srl %l4, 1, %l4 !***** should be removed after
2870 srl %l5, 1, %l5 !***** unrolling the loop and correcting
2871 add %i0, 32, %i0 !***** the last iteration
2988 .type aes_encrypt_impl,2
2989 .size aes_encrypt_impl,(.-aes_encrypt_impl)
2992 .section ".text",#alloc,#execinstr
2995 ! SUBROUTINE aes_decrypt_impl
2997 ! void aes_decrypt_impl(const uint32_t rk[], int Nr, const uint32_t ct[4],
3000 ! OFFSET SOURCE LINE LABEL INSTRUCTION
3002 .global aes_decrypt_impl
3007 save %sp, -SA(MINFRAME), %sp
3010 sethi %hi(aes_const), %l0
3014 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.L1d-.)), %l1
3015 or %l0, %lo(aes_const), %l0
3018 or %l1, %lo(_GLOBAL_OFFSET_TABLE_-(.L1d-.)), %l1
3019 sethi %hi(0xff000000), %i5
3022 sethi %hi(0x7fff8), %l6
3025 or %l6, %lo(0x7ff8), %l6
3030 sethi %hh(aes_const), %l0
3031 sethi %lm(aes_const), %l7
3033 or %l0, %hm(aes_const), %l0
3034 or %l7, %lo(aes_const), %l7
3037 sethi %hi(0xff000000), %i5
3040 sethi %hi(0x7fff8), %l6
3044 or %l6, %lo(0x7ff8), %l6
3049 #else /* __sparcv9 */
3051 save %sp, -SA(MINFRAME), %sp
3052 sethi %hi(aes_const), %l0
3058 sethi %hi(_GLOBAL_OFFSET_TABLE_-(.L1d-.)), %l1
3059 or %l0, %lo(aes_const), %l0
3062 or %l1, %lo(_GLOBAL_OFFSET_TABLE_-(.L1d-.)), %l1
3063 sethi %hi(0xff000000), %i5
3066 sethi %hi(0x7fff8), %l6
3069 or %l6, %lo(0x7ff8), %l6
3074 or %l0, %lo(aes_const), %l7
3075 sethi %hi(0xff000000), %i5
3078 sethi %hi(0x7fff8), %l6
3081 or %l6, %lo(0x7ff8), %l6
3086 #endif /* __sparcv9 */
3154 ldx [%l0 + %l4], %o0
3158 ldx [%l1 + %l5], %o5
3162 ldx [%l2 + %l6], %g5
3166 ldx [%l3 + %l7], %o0
3174 ldx [%l0 + %l4], %o0
3178 ldx [%l1 + %l5], %o5
3181 ldx [%l2 + %l6], %g5
3185 ldx [%l3 + %l7], %o0
3193 ldx [%l0 + %l4], %o0
3197 ldx [%l1 + %l5], %o5
3200 ldx [%l2 + %l6], %g5
3204 ldx [%l3 + %l7], %o0
3212 ldx [%l0 + %l4], %o0
3216 ldx [%l1 + %l5], %o5
3219 ldx [%l2 + %l6], %g5
3224 ldx [%l3 + %l7], %o0
3233 ldx [%l0 + %l4], %o0
3237 ldx [%l1 + %l5], %o5
3241 ldx [%l2 + %l6], %g5
3245 ldx [%l3 + %l7], %o0
3253 ldx [%l0 + %l4], %o0
3257 ldx [%l1 + %l5], %o5
3260 ldx [%l2 + %l6], %g5
3264 ldx [%l3 + %l7], %o0
3272 ldx [%l0 + %l4], %o0
3276 ldx [%l1 + %l5], %o5
3279 ldx [%l2 + %l6], %g5
3283 ldx [%l3 + %l7], %o0
3291 ldx [%l0 + %l4], %o0
3295 ldx [%l1 + %l5], %o5
3298 ldx [%l2 + %l6], %g5
3302 ldx [%l3 + %l7], %o0
3311 ldx [%l0 + %l4], %o0
3315 ldx [%l1 + %l5], %o5
3319 ldx [%l2 + %l6], %g5
3323 ldx [%l3 + %l7], %o0
3331 ldx [%l0 + %l4], %o0
3335 ldx [%l1 + %l5], %o5
3338 ldx [%l2 + %l6], %g5
3342 ldx [%l3 + %l7], %o0
3350 ldx [%l0 + %l4], %o0
3354 ldx [%l1 + %l5], %o5
3357 ldx [%l2 + %l6], %g5
3361 ldx [%l3 + %l7], %o0
3369 ldx [%l0 + %l4], %o0
3373 ldx [%l1 + %l5], %o5
3376 ldx [%l2 + %l6], %g5
3381 ldx [%l3 + %l7], %o0
3390 srl %l4, 1, %l4 !***** should be removed after
3391 srl %l5, 1, %l5 !***** unrolling the loop and correcting
3392 add %i0, 32, %i0 !***** the last iteration
3509 .type aes_decrypt_impl,2
3510 .size aes_decrypt_impl,(.-aes_decrypt_impl)
3513 .section ".text",#alloc
5868 ! Begin Disassembling Stabs
5869 .xstabs ".stab.index","Xa ; O ; P ; V=3.1 ; R=WorkShop Compilers 5.0 99/02/25 C 5.0 patch 107289-01",60,0,0,0 ! (/tmp/acompAAA5jaWsZ:1)
5870 ! End Disassembling Stabs
5872 ! Begin Disassembling Ident
5873 .ident "cg: WorkShop Compilers 5.0 99/04/15 Compiler Common 5.0 Patch 107357-02" ! (NO SOURCE LINE)
5874 .ident "acomp: WorkShop Compilers 5.0 99/02/25 C 5.0 patch 107289-01" ! (/tmp/acompAAA5jaWsZ:4675)
5875 ! End Disassembling Ident
5877 #endif /* lint || __lint */