arch/sh/include/asm/unaligned-sh4a.h

   1 #ifndef __ASM_SH_UNALIGNED_SH4A_H
   2 #define __ASM_SH_UNALIGNED_SH4A_H
   3
   4 /*
   5  * SH-4A has support for unaligned 32-bit loads, and 32-bit loads only.
   6  * Support for 64-bit accesses are done through shifting and masking
   7  * relative to the endianness. Unaligned stores are not supported by the
   8  * instruction encoding, so these continue to use the packed
   9  * struct.
  10  *
  11  * The same note as with the movli.l/movco.l pair applies here, as long
  12  * as the load is guaranteed to be inlined, nothing else will hook in to
  13  * r0 and we get the return value for free.
  14  *
  15  * NOTE: Due to the fact we require r0 encoding, care should be taken to
  16  * avoid mixing these heavily with other r0 consumers, such as the atomic
  17  * ops. Failure to adhere to this can result in the compiler running out
  18  * of spill registers and blowing up when building at low optimization
  19  * levels. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34777.
  20  */
  21 #include <linux/unaligned/packed_struct.h>
  22 #include <linux/types.h>
  23 #include <asm/byteorder.h>
  24
  25 static inline u16 sh4a_get_unaligned_cpu16(const u8 *p)
  26 {
  27 #ifdef __LITTLE_ENDIAN
  28         return p[0] | p[1] << 8;
  29 #else
  30         return p[0] << 8 | p[1];
  31 #endif
  32 }
  33
  34 static __always_inline u32 sh4a_get_unaligned_cpu32(const u8 *p)
  35 {
  36         unsigned long unaligned;
  37
  38         __asm__ __volatile__ (
  39                 "movua.l        @%1, %0\n\t"
  40                  : "=z" (unaligned)
  41                  : "r" (p)
  42         );
  43
  44         return unaligned;
  45 }
  46
  47 /*
  48  * Even though movua.l supports auto-increment on the read side, it can
  49  * only store to r0 due to instruction encoding constraints, so just let
  50  * the compiler sort it out on its own.
  51  */
  52 static inline u64 sh4a_get_unaligned_cpu64(const u8 *p)
  53 {
  54 #ifdef __LITTLE_ENDIAN
  55         return (u64)sh4a_get_unaligned_cpu32(p + 4) << 32 |
  56                     sh4a_get_unaligned_cpu32(p);
  57 #else
  58         return (u64)sh4a_get_unaligned_cpu32(p) << 32 |
  59                     sh4a_get_unaligned_cpu32(p + 4);
  60 #endif
  61 }
  62
  63 static inline u16 get_unaligned_le16(const void *p)
  64 {
  65         return le16_to_cpu(sh4a_get_unaligned_cpu16(p));
  66 }
  67
  68 static inline u32 get_unaligned_le32(const void *p)
  69 {
  70         return le32_to_cpu(sh4a_get_unaligned_cpu32(p));
  71 }
  72
  73 static inline u64 get_unaligned_le64(const void *p)
  74 {
  75         return le64_to_cpu(sh4a_get_unaligned_cpu64(p));
  76 }
  77
  78 static inline u16 get_unaligned_be16(const void *p)
  79 {
  80         return be16_to_cpu(sh4a_get_unaligned_cpu16(p));
  81 }
  82
  83 static inline u32 get_unaligned_be32(const void *p)
  84 {
  85         return be32_to_cpu(sh4a_get_unaligned_cpu32(p));
  86 }
  87
  88 static inline u64 get_unaligned_be64(const void *p)
  89 {
  90         return be64_to_cpu(sh4a_get_unaligned_cpu64(p));
  91 }
  92
  93 static inline void nonnative_put_le16(u16 val, u8 *p)
  94 {
  95         *p++ = val;
  96         *p++ = val >> 8;
  97 }
  98
  99 static inline void nonnative_put_le32(u32 val, u8 *p)
 100 {
 101         nonnative_put_le16(val, p);
 102         nonnative_put_le16(val >> 16, p + 2);
 103 }
 104
 105 static inline void nonnative_put_le64(u64 val, u8 *p)
 106 {
 107         nonnative_put_le32(val, p);
 108         nonnative_put_le32(val >> 32, p + 4);
 109 }
 110
 111 static inline void nonnative_put_be16(u16 val, u8 *p)
 112 {
 113         *p++ = val >> 8;
 114         *p++ = val;
 115 }
 116
 117 static inline void nonnative_put_be32(u32 val, u8 *p)
 118 {
 119         nonnative_put_be16(val >> 16, p);
 120         nonnative_put_be16(val, p + 2);
 121 }
 122
 123 static inline void nonnative_put_be64(u64 val, u8 *p)
 124 {
 125         nonnative_put_be32(val >> 32, p);
 126         nonnative_put_be32(val, p + 4);
 127 }
 128
 129 static inline void put_unaligned_le16(u16 val, void *p)
 130 {
 131 #ifdef __LITTLE_ENDIAN
 132         __put_unaligned_cpu16(val, p);
 133 #else
 134         nonnative_put_le16(val, p);
 135 #endif
 136 }
 137
 138 static inline void put_unaligned_le32(u32 val, void *p)
 139 {
 140 #ifdef __LITTLE_ENDIAN
 141         __put_unaligned_cpu32(val, p);
 142 #else
 143         nonnative_put_le32(val, p);
 144 #endif
 145 }
 146
 147 static inline void put_unaligned_le64(u64 val, void *p)
 148 {
 149 #ifdef __LITTLE_ENDIAN
 150         __put_unaligned_cpu64(val, p);
 151 #else
 152         nonnative_put_le64(val, p);
 153 #endif
 154 }
 155
 156 static inline void put_unaligned_be16(u16 val, void *p)
 157 {
 158 #ifdef __BIG_ENDIAN
 159         __put_unaligned_cpu16(val, p);
 160 #else
 161         nonnative_put_be16(val, p);
 162 #endif
 163 }
 164
 165 static inline void put_unaligned_be32(u32 val, void *p)
 166 {
 167 #ifdef __BIG_ENDIAN
 168         __put_unaligned_cpu32(val, p);
 169 #else
 170         nonnative_put_be32(val, p);
 171 #endif
 172 }
 173
 174 static inline void put_unaligned_be64(u64 val, void *p)
 175 {
 176 #ifdef __BIG_ENDIAN
 177         __put_unaligned_cpu64(val, p);
 178 #else
 179         nonnative_put_be64(val, p);
 180 #endif
 181 }
 182
 183 /*
 184  * While it's a bit non-obvious, even though the generic le/be wrappers
 185  * use the __get/put_xxx prefixing, they actually wrap in to the
 186  * non-prefixed get/put_xxx variants as provided above.
 187  */
 188 #include <linux/unaligned/generic.h>
 189
 190 #ifdef __LITTLE_ENDIAN
 191 # define get_unaligned __get_unaligned_le
 192 # define put_unaligned __put_unaligned_le
 193 #else
 194 # define get_unaligned __get_unaligned_be
 195 # define put_unaligned __put_unaligned_be
 196 #endif
 197
 198 #endif /* __ASM_SH_UNALIGNED_SH4A_H */