usr/src/common/crypto/md5/md5_byteswap.h

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 #ifndef _MD5_BYTESWAP_H
  28 #define _MD5_BYTESWAP_H
  29
  30 /*
  31  * definitions for inline functions for little-endian loads.
  32  *
  33  * This file has special definitions for UltraSPARC architectures,
  34  * which have a special address space identifier for loading 32 and 16 bit
  35  * integers in little-endian byte order.
  36  *
  37  * This file and common/crypto/md5/sparc/sun4[uv]/byteswap.il implement the
  38  * same thing and must be changed together.
  39  */
  40
  41 #include <sys/types.h>
  42 #if defined(__sparc)
  43 #include <v9/sys/asi.h>
  44 #elif defined(_LITTLE_ENDIAN)
  45 #include <sys/byteorder.h>
  46 #endif
  47
  48 #ifdef  __cplusplus
  49 extern "C" {
  50 #endif
  51
  52 #if defined(_LITTLE_ENDIAN)
  53
  54 /*
  55  * Little-endian optimization:  I don't need to do any weirdness.   On
  56  * some little-endian boxen, I'll have to do alignment checks, but I can do
  57  * that below.
  58  */
  59
  60 #if !defined(__i386) && !defined(__amd64)
  61 /*
  62  * i386 and amd64 don't require aligned 4-byte loads.  The symbol
  63  * _MD5_CHECK_ALIGNMENT indicates below whether the MD5Transform function
  64  * requires alignment checking.
  65  */
  66 #define _MD5_CHECK_ALIGNMENT
  67 #endif /* !__i386 && !__amd64 */
  68
  69 #define LOAD_LITTLE_32(addr)    (*(uint32_t *)(void *)(addr))
  70
  71 #else   /* !_LITTLE_ENDIAN */
  72
  73 /*
  74  * sparc v9/v8plus optimization:
  75  *
  76  * on the sparc v9/v8plus, we can load data little endian.  however, since
  77  * the compiler doesn't have direct support for little endian, we
  78  * link to an assembly-language routine `load_little_32' to do
  79  * the magic.  note that special care must be taken to ensure the
  80  * address is 32-bit aligned -- in the interest of speed, we don't
  81  * check to make sure, since careful programming can guarantee this
  82  * for us.
  83  */
  84 #if defined(sun4u)
  85
  86 /* Define alignment check because we can 4-byte load as little endian. */
  87 #define _MD5_CHECK_ALIGNMENT
  88 #define LOAD_LITTLE_32(addr)    load_little_32((uint32_t *)(void *)(addr))
  89
  90 static __inline__ uint32_t
  91 load_little_32(uint32_t *addr)
  92 {
  93         uint32_t value;
  94
  95         __asm__(
  96             "lduwa      [%1] %2, %0\n\t"
  97             : "=r" (value)
  98             : "r" (addr), "i" (ASI_PL));
  99
 100         return (value);
 101 }
 102
 103 #elif defined(_LITTLE_ENDIAN)
 104 #define LOAD_LITTLE_32(addr)    htonl(addr)
 105
 106 #else
 107 /* big endian -- will work on little endian, but slowly */
 108 /* Since we do byte operations, we don't have to check for alignment. */
 109 #define LOAD_LITTLE_32(addr)    \
 110         ((addr)[0] | ((addr)[1] << 8) | ((addr)[2] << 16) | ((addr)[3] << 24))
 111 #endif  /* sun4u */
 112
 113 #if defined(sun4v)
 114
 115 /*
 116  * For N1 want to minimize number of arithmetic operations. This is best
 117  * achieved by using the %asi register to specify ASI for the lduwa operations.
 118  * Also, have a separate inline template for each word, so can utilize the
 119  * immediate offset in lduwa, without relying on the compiler to do the right
 120  * thing.
 121  *
 122  * Moving to 64-bit loads might also be beneficial.
 123  */
 124 #define LOAD_LITTLE_32_0(addr)  load_little_32_0((uint32_t *)(addr))
 125 #define LOAD_LITTLE_32_1(addr)  load_little_32_1((uint32_t *)(addr))
 126 #define LOAD_LITTLE_32_2(addr)  load_little_32_2((uint32_t *)(addr))
 127 #define LOAD_LITTLE_32_3(addr)  load_little_32_3((uint32_t *)(addr))
 128 #define LOAD_LITTLE_32_4(addr)  load_little_32_4((uint32_t *)(addr))
 129 #define LOAD_LITTLE_32_5(addr)  load_little_32_5((uint32_t *)(addr))
 130 #define LOAD_LITTLE_32_6(addr)  load_little_32_6((uint32_t *)(addr))
 131 #define LOAD_LITTLE_32_7(addr)  load_little_32_7((uint32_t *)(addr))
 132 #define LOAD_LITTLE_32_8(addr)  load_little_32_8((uint32_t *)(addr))
 133 #define LOAD_LITTLE_32_9(addr)  load_little_32_9((uint32_t *)(addr))
 134 #define LOAD_LITTLE_32_a(addr)  load_little_32_a((uint32_t *)(addr))
 135 #define LOAD_LITTLE_32_b(addr)  load_little_32_b((uint32_t *)(addr))
 136 #define LOAD_LITTLE_32_c(addr)  load_little_32_c((uint32_t *)(addr))
 137 #define LOAD_LITTLE_32_d(addr)  load_little_32_d((uint32_t *)(addr))
 138 #define LOAD_LITTLE_32_e(addr)  load_little_32_e((uint32_t *)(addr))
 139 #define LOAD_LITTLE_32_f(addr)  load_little_32_f((uint32_t *)(addr))
 140
 141 /*
 142  * This actually sets the ASI register, not necessarily to ASI_PL.
 143  */
 144 static __inline__ void
 145 set_little(uint8_t asi)
 146 {
 147         __asm__ __volatile__(
 148             "wr %%g0, %0, %%asi\n\t"
 149             : /* Nothing */
 150             : "r" (asi));
 151 }
 152
 153 static __inline__ uint8_t
 154 get_little(void)
 155 {
 156         uint8_t asi;
 157
 158         __asm__ __volatile__(
 159             "rd %%asi, %0\n\t"
 160             : "=r" (asi));
 161
 162         return (asi);
 163 }
 164
 165 /*
 166  * We have 16 functions which differ only in the offset from which they
 167  * load.  Use this preprocessor template to simplify maintenance.  Its
 168  * argument is the offset in hex, without the 0x.
 169  */
 170 #define LL_TEMPLATE(__off)                      \
 171 static __inline__ uint32_t                      \
 172 load_little_32_##__off(uint32_t *addr)          \
 173 {                                               \
 174         uint32_t value;                         \
 175         __asm__(                                \
 176                 "lduwa  [%1 + %2]%%asi, %0\n\t" \
 177         : "=r" (value)                          \
 178         : "r" (addr), "i" ((0x##__off) << 2));  \
 179         return (value);                         \
 180 }
 181
 182 LL_TEMPLATE(0)
 183 LL_TEMPLATE(1)
 184 LL_TEMPLATE(2)
 185 LL_TEMPLATE(3)
 186 LL_TEMPLATE(4)
 187 LL_TEMPLATE(5)
 188 LL_TEMPLATE(6)
 189 LL_TEMPLATE(7)
 190 LL_TEMPLATE(8)
 191 LL_TEMPLATE(9)
 192 LL_TEMPLATE(a)
 193 LL_TEMPLATE(b)
 194 LL_TEMPLATE(c)
 195 LL_TEMPLATE(d)
 196 LL_TEMPLATE(e)
 197 LL_TEMPLATE(f)
 198 #undef  LL_TEMPLATE
 199
 200 #endif  /* sun4v */
 201
 202 #endif  /* _LITTLE_ENDIAN */
 203
 204 #ifdef  __cplusplus
 205 }
 206 #endif
 207
 208 #endif  /* !_MD5_BYTESWAP_H */