libc/src/string/memory_utils/utils.h

   1 //===-- Memory utils --------------------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC_MEMORY_UTILS_UTILS_H
  10 #define LLVM_LIBC_SRC_MEMORY_UTILS_UTILS_H
  11
  12 #include "src/__support/CPP/bit.h"
  13 #include "src/__support/CPP/cstddef.h"
  14 #include "src/__support/CPP/type_traits.h"
  15 #include "src/__support/endian.h"
  16 #include "src/__support/macros/attributes.h" // LIBC_INLINE
  17 #include "src/__support/macros/config.h"     // LIBC_HAS_BUILTIN
  18
  19 #include <stddef.h> // size_t
  20 #include <stdint.h> // intptr_t / uintptr_t
  21
  22 namespace __llvm_libc {
  23
  24 // Allows compile time error reporting in `if constexpr` branches.
  25 template <bool flag = false>
  26 static void deferred_static_assert(const char *msg) {
  27   static_assert(flag, "compilation error");
  28   (void)msg;
  29 }
  30
  31 // Return whether `value` is zero or a power of two.
  32 static constexpr bool is_power2_or_zero(size_t value) {
  33   return (value & (value - 1U)) == 0;
  34 }
  35
  36 // Return whether `value` is a power of two.
  37 static constexpr bool is_power2(size_t value) {
  38   return value && is_power2_or_zero(value);
  39 }
  40
  41 // Compile time version of log2 that handles 0.
  42 static constexpr size_t log2(size_t value) {
  43   return (value == 0 || value == 1) ? 0 : 1 + log2(value / 2);
  44 }
  45
  46 // Returns the first power of two preceding value or value if it is already a
  47 // power of two (or 0 when value is 0).
  48 static constexpr size_t le_power2(size_t value) {
  49   return value == 0 ? value : 1ULL << log2(value);
  50 }
  51
  52 // Returns the first power of two following value or value if it is already a
  53 // power of two (or 0 when value is 0).
  54 static constexpr size_t ge_power2(size_t value) {
  55   return is_power2_or_zero(value) ? value : 1ULL << (log2(value) + 1);
  56 }
  57
  58 // Returns the number of bytes to substract from ptr to get to the previous
  59 // multiple of alignment. If ptr is already aligned returns 0.
  60 template <size_t alignment> uintptr_t distance_to_align_down(const void *ptr) {
  61   static_assert(is_power2(alignment), "alignment must be a power of 2");
  62   return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
  63 }
  64
  65 // Returns the number of bytes to add to ptr to get to the next multiple of
  66 // alignment. If ptr is already aligned returns 0.
  67 template <size_t alignment> uintptr_t distance_to_align_up(const void *ptr) {
  68   static_assert(is_power2(alignment), "alignment must be a power of 2");
  69   // The logic is not straightforward and involves unsigned modulo arithmetic
  70   // but the generated code is as fast as it can be.
  71   return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
  72 }
  73
  74 // Returns the number of bytes to add to ptr to get to the next multiple of
  75 // alignment. If ptr is already aligned returns alignment.
  76 template <size_t alignment>
  77 uintptr_t distance_to_next_aligned(const void *ptr) {
  78   return alignment - distance_to_align_down<alignment>(ptr);
  79 }
  80
  81 // Returns the same pointer but notifies the compiler that it is aligned.
  82 template <size_t alignment, typename T> static T *assume_aligned(T *ptr) {
  83   return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
  84 }
  85
  86 #if LIBC_HAS_BUILTIN(__builtin_memcpy_inline)
  87 #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
  88 #endif
  89
  90 #if LIBC_HAS_BUILTIN(__builtin_memset_inline)
  91 #define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
  92 #endif
  93
  94 // Performs a constant count copy.
  95 template <size_t Size>
  96 LIBC_INLINE void memcpy_inline(void *__restrict dst,
  97                                const void *__restrict src) {
  98 #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
  99   __builtin_memcpy_inline(dst, src, Size);
 100 #else
 101 // In memory functions `memcpy_inline` is instantiated several times with
 102 // different value of the Size parameter. This doesn't play well with GCC's
 103 // Value Range Analysis that wrongly detects out of bounds accesses. We disable
 104 // the 'array-bounds' warning for the purpose of this function.
 105 #pragma GCC diagnostic push
 106 #pragma GCC diagnostic ignored "-Warray-bounds"
 107   for (size_t i = 0; i < Size; ++i)
 108     static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i];
 109 #pragma GCC diagnostic pop
 110 #endif
 111 }
 112
 113 using Ptr = cpp::byte *;        // Pointer to raw data.
 114 using CPtr = const cpp::byte *; // Const pointer to raw data.
 115
 116 // This type makes sure that we don't accidentally promote an integral type to
 117 // another one. It is only constructible from the exact T type.
 118 template <typename T> struct StrictIntegralType {
 119   static_assert(cpp::is_integral_v<T>);
 120
 121   // Can only be constructed from a T.
 122   template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0>
 123   StrictIntegralType(U value) : value(value) {}
 124
 125   // Allows using the type in an if statement.
 126   explicit operator bool() const { return value; }
 127
 128   // If type is unsigned (bcmp) we allow bitwise OR operations.
 129   StrictIntegralType operator|(const StrictIntegralType &Rhs) const {
 130     static_assert(!cpp::is_signed_v<T>);
 131     return value | Rhs.value;
 132   }
 133
 134   // For interation with the C API we allow explicit conversion back to the
 135   // `int` type.
 136   explicit operator int() const {
 137     // bit_cast makes sure that T and int have the same size.
 138     return cpp::bit_cast<int>(value);
 139   }
 140
 141   // Helper to get the zero value.
 142   LIBC_INLINE static constexpr StrictIntegralType ZERO() { return {T(0)}; }
 143   LIBC_INLINE static constexpr StrictIntegralType NONZERO() { return {T(1)}; }
 144
 145 private:
 146   T value;
 147 };
 148
 149 using MemcmpReturnType = StrictIntegralType<int32_t>;
 150 using BcmpReturnType = StrictIntegralType<uint32_t>;
 151
 152 // Loads bytes from memory (possibly unaligned) and materializes them as
 153 // type.
 154 template <typename T> LIBC_INLINE T load(CPtr ptr) {
 155   T Out;
 156   memcpy_inline<sizeof(T)>(&Out, ptr);
 157   return Out;
 158 }
 159
 160 // Stores a value of type T in memory (possibly unaligned).
 161 template <typename T> LIBC_INLINE void store(Ptr ptr, T value) {
 162   memcpy_inline<sizeof(T)>(ptr, &value);
 163 }
 164
 165 // On architectures that do not allow for unaligned access we perform several
 166 // aligned accesses and recombine them through shifts and logicals operations.
 167 // For instance, if we know that the pointer is 2-byte aligned we can decompose
 168 // a 64-bit operation into four 16-bit operations.
 169
 170 // Loads a 'ValueType' by decomposing it into several loads that are assumed to
 171 // be aligned.
 172 // e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr);
 173 template <typename ValueType, typename T, typename... TS>
 174 ValueType load_aligned(CPtr src) {
 175   static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
 176   const ValueType value = load<T>(assume_aligned<sizeof(T)>(src));
 177   if constexpr (sizeof...(TS) > 0) {
 178     constexpr size_t shift = sizeof(T) * 8;
 179     const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T));
 180     if constexpr (Endian::IS_LITTLE)
 181       return value | (next << shift);
 182     else if constexpr (Endian::IS_BIG)
 183       return (value << shift) | next;
 184     else
 185       deferred_static_assert("Invalid endianness");
 186   } else {
 187     return value;
 188   }
 189 }
 190
 191 // Alias for loading a 'uint32_t'.
 192 template <typename T, typename... TS>
 193 auto load32_aligned(CPtr src, size_t offset) {
 194   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
 195   return load_aligned<uint32_t, T, TS...>(src + offset);
 196 }
 197
 198 // Alias for loading a 'uint64_t'.
 199 template <typename T, typename... TS>
 200 auto load64_aligned(CPtr src, size_t offset) {
 201   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
 202   return load_aligned<uint64_t, T, TS...>(src + offset);
 203 }
 204
 205 // Stores a 'ValueType' by decomposing it into several stores that are assumed
 206 // to be aligned.
 207 // e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr);
 208 template <typename ValueType, typename T, typename... TS>
 209 void store_aligned(ValueType value, Ptr dst) {
 210   static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
 211   constexpr size_t shift = sizeof(T) * 8;
 212   if constexpr (Endian::IS_LITTLE) {
 213     store<T>(assume_aligned<sizeof(T)>(dst), value & ~T(0));
 214     if constexpr (sizeof...(TS) > 0)
 215       store_aligned<ValueType, TS...>(value >> shift, dst + sizeof(T));
 216   } else if constexpr (Endian::IS_BIG) {
 217     constexpr size_t OFFSET = (0 + ... + sizeof(TS));
 218     store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0));
 219     if constexpr (sizeof...(TS) > 0)
 220       store_aligned<ValueType, TS...>(value >> shift, dst);
 221   } else {
 222     deferred_static_assert("Invalid endianness");
 223   }
 224 }
 225
 226 // Alias for storing a 'uint32_t'.
 227 template <typename T, typename... TS>
 228 void store32_aligned(uint32_t value, Ptr dst, size_t offset) {
 229   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
 230   store_aligned<uint32_t, T, TS...>(value, dst + offset);
 231 }
 232
 233 // Alias for storing a 'uint64_t'.
 234 template <typename T, typename... TS>
 235 void store64_aligned(uint64_t value, Ptr dst, size_t offset) {
 236   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
 237   store_aligned<uint64_t, T, TS...>(value, dst + offset);
 238 }
 239
 240 // Advances the pointers p1 and p2 by offset bytes and decrease count by the
 241 // same amount.
 242 template <typename T1, typename T2>
 243 LIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1,
 244                         T2 *__restrict &p2, size_t &count) {
 245   p1 += offset;
 246   p2 += offset;
 247   count -= offset;
 248 }
 249
 250 // Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary
 251 // and decrease count by the same amount.
 252 // We make sure the compiler knows about the adjusted pointer alignment.
 253 template <size_t SIZE, typename T1, typename T2>
 254 void align_p1_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
 255                                size_t &count) {
 256   adjust(distance_to_next_aligned<SIZE>(p1), p1, p2, count);
 257   p1 = assume_aligned<SIZE>(p1);
 258 }
 259
 260 // Same as align_p1_to_next_boundary above but with a single pointer instead.
 261 template <size_t SIZE, typename T1>
 262 void align_to_next_boundary(T1 *&p1, size_t &count) {
 263   CPtr dummy;
 264   align_p1_to_next_boundary<SIZE>(p1, dummy, count);
 265 }
 266
 267 // An enum class that discriminates between the first and second pointer.
 268 enum class Arg { P1, P2, Dst = P1, Src = P2 };
 269
 270 // Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1.
 271 // Precondition: &p1 != &p2
 272 template <size_t SIZE, Arg AlignOn, typename T1, typename T2>
 273 void align_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
 274                             size_t &count) {
 275   if constexpr (AlignOn == Arg::P1)
 276     align_p1_to_next_boundary<SIZE>(p1, p2, count);
 277   else if constexpr (AlignOn == Arg::P2)
 278     align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2.
 279   else
 280     deferred_static_assert("AlignOn must be either Arg::P1 or Arg::P2");
 281 }
 282
 283 } // namespace __llvm_libc
 284
 285 #endif // LLVM_LIBC_SRC_MEMORY_UTILS_UTILS_H