4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #ifndef _MD5_BYTESWAP_H
28 #define _MD5_BYTESWAP_H
31 * definitions for inline functions for little-endian loads.
33 * This file has special definitions for UltraSPARC architectures,
34 * which have a special address space identifier for loading 32 and 16 bit
35 * integers in little-endian byte order.
37 * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the
38 * same thing and must be changed together.
41 #include <sys/types.h>
43 #include <v9/sys/asi.h>
44 #elif defined(_LITTLE_ENDIAN)
45 #include <sys/byteorder.h>
52 #if defined(_LITTLE_ENDIAN)
55 * Little-endian optimization: I don't need to do any weirdness. On
56 * some little-endian boxen, I'll have to do alignment checks, but I can do
60 #if !defined(__i386) && !defined(__amd64)
62 * i386 and amd64 don't require aligned 4-byte loads. The symbol
63 * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
64 * requires alignment checking.
66 #define _MD5_CHECK_ALIGNMENT
67 #endif /* !__i386 && !__amd64 */
69 #define LOAD_LITTLE_32(addr) (*(uint32_t *)(void *)(addr))
71 #else /* !_LITTLE_ENDIAN */
74 * sparc v9/v8plus optimization:
76 * on the sparc v9/v8plus, we can load data little endian. however, since
77 * the compiler doesn't have direct support for little endian, we
78 * link to an assembly-language routine `load_little_32' to do
79 * the magic. note that special care must be taken to ensure the
80 * address is 32-bit aligned -- in the interest of speed, we don't
81 * check to make sure, since careful programming can guarantee this
86 /* Define alignment check because we can 4-byte load as little endian. */
87 #define _MD5_CHECK_ALIGNMENT
88 #define LOAD_LITTLE_32(addr) load_little_32((uint32_t *)(void *)(addr))
90 static __inline__
uint32_t
91 load_little_32(uint32_t *addr
)
96 "lduwa [%1] %2, %0\n\t"
98 : "r" (addr
), "i" (ASI_PL
));
103 #elif defined(_LITTLE_ENDIAN)
104 #define LOAD_LITTLE_32(addr) htonl(addr)
107 /* big endian -- will work on little endian, but slowly */
108 /* Since we do byte operations, we don't have to check for alignment. */
109 #define LOAD_LITTLE_32(addr) \
110 ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
116 * For N1 want to minimize number of arithmetic operations. This is best
117 * achieved by using the %asi register to specify ASI for the lduwa operations.
118 * Also, have a separate inline template for each word, so can utilize the
119 * immediate offset in lduwa, without relying on the compiler to do the right
122 * Moving to 64-bit loads might also be beneficial.
124 #define LOAD_LITTLE_32_0(addr) load_little_32_0((uint32_t *)(addr))
125 #define LOAD_LITTLE_32_1(addr) load_little_32_1((uint32_t *)(addr))
126 #define LOAD_LITTLE_32_2(addr) load_little_32_2((uint32_t *)(addr))
127 #define LOAD_LITTLE_32_3(addr) load_little_32_3((uint32_t *)(addr))
128 #define LOAD_LITTLE_32_4(addr) load_little_32_4((uint32_t *)(addr))
129 #define LOAD_LITTLE_32_5(addr) load_little_32_5((uint32_t *)(addr))
130 #define LOAD_LITTLE_32_6(addr) load_little_32_6((uint32_t *)(addr))
131 #define LOAD_LITTLE_32_7(addr) load_little_32_7((uint32_t *)(addr))
132 #define LOAD_LITTLE_32_8(addr) load_little_32_8((uint32_t *)(addr))
133 #define LOAD_LITTLE_32_9(addr) load_little_32_9((uint32_t *)(addr))
134 #define LOAD_LITTLE_32_a(addr) load_little_32_a((uint32_t *)(addr))
135 #define LOAD_LITTLE_32_b(addr) load_little_32_b((uint32_t *)(addr))
136 #define LOAD_LITTLE_32_c(addr) load_little_32_c((uint32_t *)(addr))
137 #define LOAD_LITTLE_32_d(addr) load_little_32_d((uint32_t *)(addr))
138 #define LOAD_LITTLE_32_e(addr) load_little_32_e((uint32_t *)(addr))
139 #define LOAD_LITTLE_32_f(addr) load_little_32_f((uint32_t *)(addr))
142 * This actually sets the ASI register, not necessarily to ASI_PL.
144 static __inline__
void
145 set_little(uint8_t asi
)
147 __asm__
__volatile__(
148 "wr %%g0, %0, %%asi\n\t"
153 static __inline__
uint8_t
158 __asm__
__volatile__(
166 * We have 16 functions which differ only in the offset from which they
167 * load. Use this preprocessor template to simplify maintenance. Its
168 * argument is the offset in hex, without the 0x.
170 #define LL_TEMPLATE(__off) \
171 static __inline__ uint32_t \
172 load_little_32_##__off(uint32_t *addr) \
176 "lduwa [%1 + %2]%%asi, %0\n\t" \
178 : "r" (addr), "i" ((0x##__off) << 2)); \
202 #endif /* _LITTLE_ENDIAN */
208 #endif /* !_MD5_BYTESWAP_H */