libc/src/string/memory_utils/utils.h

   1 //===-- Memory utils --------------------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
  10 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H
  11
  12 #include "src/__support/CPP/bit.h"
  13 #include "src/__support/CPP/cstddef.h"
  14 #include "src/__support/CPP/type_traits.h"
  15 #include "src/__support/endian.h"
  16 #include "src/__support/macros/attributes.h" // LIBC_INLINE
  17 #include "src/__support/macros/config.h"     // LIBC_HAS_BUILTIN
  18 #include "src/__support/macros/properties/architectures.h"
  19
  20 #include <stddef.h> // size_t
  21 #include <stdint.h> // intptr_t / uintptr_t / INT32_MAX / INT32_MIN
  22
  23 namespace LIBC_NAMESPACE {
  24
  25 // Allows compile time error reporting in `if constexpr` branches.
  26 template <bool flag = false>
  27 LIBC_INLINE void deferred_static_assert(const char *msg) {
  28   static_assert(flag, "compilation error");
  29   (void)msg;
  30 }
  31
  32 // Return whether `value` is zero or a power of two.
  33 LIBC_INLINE constexpr bool is_power2_or_zero(size_t value) {
  34   return (value & (value - 1U)) == 0;
  35 }
  36
  37 // Return whether `value` is a power of two.
  38 LIBC_INLINE constexpr bool is_power2(size_t value) {
  39   return value && is_power2_or_zero(value);
  40 }
  41
  42 // Compile time version of log2 that handles 0.
  43 LIBC_INLINE constexpr size_t log2s(size_t value) {
  44   return (value == 0 || value == 1) ? 0 : 1 + log2s(value / 2);
  45 }
  46
  47 // Returns the first power of two preceding value or value if it is already a
  48 // power of two (or 0 when value is 0).
  49 LIBC_INLINE constexpr size_t le_power2(size_t value) {
  50   return value == 0 ? value : 1ULL << log2s(value);
  51 }
  52
  53 // Returns the first power of two following value or value if it is already a
  54 // power of two (or 0 when value is 0).
  55 LIBC_INLINE constexpr size_t ge_power2(size_t value) {
  56   return is_power2_or_zero(value) ? value : 1ULL << (log2s(value) + 1);
  57 }
  58
  59 // Returns the number of bytes to substract from ptr to get to the previous
  60 // multiple of alignment. If ptr is already aligned returns 0.
  61 template <size_t alignment>
  62 LIBC_INLINE uintptr_t distance_to_align_down(const void *ptr) {
  63   static_assert(is_power2(alignment), "alignment must be a power of 2");
  64   return reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
  65 }
  66
  67 // Returns the number of bytes to add to ptr to get to the next multiple of
  68 // alignment. If ptr is already aligned returns 0.
  69 template <size_t alignment>
  70 LIBC_INLINE uintptr_t distance_to_align_up(const void *ptr) {
  71   static_assert(is_power2(alignment), "alignment must be a power of 2");
  72   // The logic is not straightforward and involves unsigned modulo arithmetic
  73   // but the generated code is as fast as it can be.
  74   return -reinterpret_cast<uintptr_t>(ptr) & (alignment - 1U);
  75 }
  76
  77 // Returns the number of bytes to add to ptr to get to the next multiple of
  78 // alignment. If ptr is already aligned returns alignment.
  79 template <size_t alignment>
  80 LIBC_INLINE uintptr_t distance_to_next_aligned(const void *ptr) {
  81   return alignment - distance_to_align_down<alignment>(ptr);
  82 }
  83
  84 // Returns the same pointer but notifies the compiler that it is aligned.
  85 template <size_t alignment, typename T> LIBC_INLINE T *assume_aligned(T *ptr) {
  86   return reinterpret_cast<T *>(__builtin_assume_aligned(ptr, alignment));
  87 }
  88
  89 // Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are
  90 // disjoint.
  91 LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) {
  92   const ptrdiff_t sdiff =
  93       static_cast<const char *>(p1) - static_cast<const char *>(p2);
  94   // We use bit_cast to make sure that we don't run into accidental integer
  95   // promotion. Notably the unary minus operator goes through integer promotion
  96   // at the expression level. We assume arithmetic to be two's complement (i.e.,
  97   // bit_cast has the same behavior as a regular signed to unsigned cast).
  98   static_assert(-1 == ~0, "not 2's complement");
  99   const size_t udiff = cpp::bit_cast<size_t>(sdiff);
 100   // Integer promition would be caught here.
 101   const size_t neg_udiff = cpp::bit_cast<size_t>(-sdiff);
 102   // This is expected to compile a conditional move.
 103   return sdiff >= 0 ? size <= udiff : size <= neg_udiff;
 104 }
 105
 106 #if LIBC_HAS_BUILTIN(__builtin_memcpy_inline)
 107 #define LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
 108 #endif
 109
 110 #if LIBC_HAS_BUILTIN(__builtin_memset_inline)
 111 #define LLVM_LIBC_HAS_BUILTIN_MEMSET_INLINE
 112 #endif
 113
 114 // Performs a constant count copy.
 115 template <size_t Size>
 116 LIBC_INLINE void memcpy_inline(void *__restrict dst,
 117                                const void *__restrict src) {
 118 #ifdef LLVM_LIBC_HAS_BUILTIN_MEMCPY_INLINE
 119   __builtin_memcpy_inline(dst, src, Size);
 120 #else
 121   // In memory functions `memcpy_inline` is instantiated several times with
 122   // different value of the Size parameter. This doesn't play well with GCC's
 123   // Value Range Analysis that wrongly detects out of bounds accesses. We
 124   // disable the 'array-bounds' warning for the purpose of this function.
 125 #pragma GCC diagnostic push
 126 #pragma GCC diagnostic ignored "-Warray-bounds"
 127   for (size_t i = 0; i < Size; ++i)
 128     static_cast<char *>(dst)[i] = static_cast<const char *>(src)[i];
 129 #pragma GCC diagnostic pop
 130 #endif
 131 }
 132
 133 using Ptr = cpp::byte *;        // Pointer to raw data.
 134 using CPtr = const cpp::byte *; // Const pointer to raw data.
 135
 136 // This type makes sure that we don't accidentally promote an integral type to
 137 // another one. It is only constructible from the exact T type.
 138 template <typename T> struct StrictIntegralType {
 139   static_assert(cpp::is_integral_v<T>);
 140
 141   // Can only be constructed from a T.
 142   template <typename U, cpp::enable_if_t<cpp::is_same_v<U, T>, bool> = 0>
 143   LIBC_INLINE StrictIntegralType(U value) : value(value) {}
 144
 145   // Allows using the type in an if statement.
 146   LIBC_INLINE explicit operator bool() const { return value; }
 147
 148   // If type is unsigned (bcmp) we allow bitwise OR operations.
 149   LIBC_INLINE StrictIntegralType
 150   operator|(const StrictIntegralType &Rhs) const {
 151     static_assert(!cpp::is_signed_v<T>);
 152     return value | Rhs.value;
 153   }
 154
 155   // For interation with the C API we allow explicit conversion back to the
 156   // `int` type.
 157   LIBC_INLINE explicit operator int() const {
 158     // bit_cast makes sure that T and int have the same size.
 159     return cpp::bit_cast<int>(value);
 160   }
 161
 162   // Helper to get the zero value.
 163   LIBC_INLINE static constexpr StrictIntegralType ZERO() { return {T(0)}; }
 164   LIBC_INLINE static constexpr StrictIntegralType NONZERO() { return {T(1)}; }
 165
 166 private:
 167   T value;
 168 };
 169
 170 using MemcmpReturnType = StrictIntegralType<int32_t>;
 171 using BcmpReturnType = StrictIntegralType<uint32_t>;
 172
 173 // This implements the semantic of 'memcmp' returning a negative value when 'a'
 174 // is less than 'b', '0' when 'a' equals 'b' and a positive number otherwise.
 175 LIBC_INLINE MemcmpReturnType cmp_uint32_t(uint32_t a, uint32_t b) {
 176   // We perform the difference as an int64_t.
 177   const int64_t diff = static_cast<int64_t>(a) - static_cast<int64_t>(b);
 178   // For the int64_t to int32_t conversion we want the following properties:
 179   // - int32_t[31:31] == 1 iff diff < 0
 180   // - int32_t[31:0] == 0 iff diff == 0
 181
 182   // We also observe that:
 183   // - When diff < 0: diff[63:32] == 0xffffffff and diff[31:0] != 0
 184   // - When diff > 0: diff[63:32] == 0 and diff[31:0] != 0
 185   // - When diff == 0: diff[63:32] == 0 and diff[31:0] == 0
 186   // - https://godbolt.org/z/8W7qWP6e5
 187   // - This implies that we can only look at diff[32:32] for determining the
 188   // sign bit for the returned int32_t.
 189
 190   // So, we do the following:
 191   // - int32_t[31:31] = diff[32:32]
 192   // - int32_t[30:0] = diff[31:0] == 0 ? 0 : non-0.
 193
 194   // And, we can achieve the above by the expression below. We could have also
 195   // used (diff64 >> 1) | (diff64 & 0x1) but (diff64 & 0xFFFF) is faster than
 196   // (diff64 & 0x1). https://godbolt.org/z/j3b569rW1
 197   return static_cast<int32_t>((diff >> 1) | (diff & 0xFFFF));
 198 }
 199
 200 // Returns a negative value if 'a' is less than 'b' and a positive value
 201 // otherwise. This implements the semantic of 'memcmp' when we know that 'a' and
 202 // 'b' differ.
 203 LIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) {
 204 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
 205   // On x86, the best strategy would be to use 'INT32_MAX' and 'INT32_MIN' for
 206   // positive and negative value respectively as they are one value apart:
 207   //   xor     eax, eax         <- free
 208   //   cmp     rdi, rsi         <- serializing
 209   //   adc     eax, 2147483647  <- serializing
 210
 211   // Unfortunately we found instances of client code that negate the result of
 212   // 'memcmp' to reverse ordering. Because signed integers are not symmetric
 213   // (e.g., int8_t ∈ [-128, 127]) returning 'INT_MIN' would break such code as
 214   // `-INT_MIN` is not representable as an int32_t.
 215
 216   // As a consequence, we use 5 and -5 which is still OK nice in terms of
 217   // latency.
 218   //   cmp     rdi, rsi         <- serializing
 219   //   mov     ecx, -5          <- can be done in parallel
 220   //   mov     eax, 5           <- can be done in parallel
 221   //   cmovb   eax, ecx         <- serializing
 222   static constexpr int32_t POSITIVE = 5;
 223   static constexpr int32_t NEGATIVE = -5;
 224 #else
 225   // On RISC-V we simply use '1' and '-1' as it leads to branchless code.
 226   // On ARMv8, both strategies lead to the same performance.
 227   static constexpr int32_t POSITIVE = 1;
 228   static constexpr int32_t NEGATIVE = -1;
 229 #endif
 230   static_assert(POSITIVE > 0);
 231   static_assert(NEGATIVE < 0);
 232   return a < b ? NEGATIVE : POSITIVE;
 233 }
 234
 235 // Loads bytes from memory (possibly unaligned) and materializes them as
 236 // type.
 237 template <typename T> LIBC_INLINE T load(CPtr ptr) {
 238   T Out;
 239   memcpy_inline<sizeof(T)>(&Out, ptr);
 240   return Out;
 241 }
 242
 243 // Stores a value of type T in memory (possibly unaligned).
 244 template <typename T> LIBC_INLINE void store(Ptr ptr, T value) {
 245   memcpy_inline<sizeof(T)>(ptr, &value);
 246 }
 247
 248 // On architectures that do not allow for unaligned access we perform several
 249 // aligned accesses and recombine them through shifts and logicals operations.
 250 // For instance, if we know that the pointer is 2-byte aligned we can decompose
 251 // a 64-bit operation into four 16-bit operations.
 252
 253 // Loads a 'ValueType' by decomposing it into several loads that are assumed to
 254 // be aligned.
 255 // e.g. load_aligned<uint32_t, uint16_t, uint16_t>(ptr);
 256 template <typename ValueType, typename T, typename... TS>
 257 LIBC_INLINE ValueType load_aligned(CPtr src) {
 258   static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
 259   const ValueType value = load<T>(assume_aligned<sizeof(T)>(src));
 260   if constexpr (sizeof...(TS) > 0) {
 261     constexpr size_t shift = sizeof(T) * 8;
 262     const ValueType next = load_aligned<ValueType, TS...>(src + sizeof(T));
 263     if constexpr (Endian::IS_LITTLE)
 264       return value | (next << shift);
 265     else if constexpr (Endian::IS_BIG)
 266       return (value << shift) | next;
 267     else
 268       deferred_static_assert("Invalid endianness");
 269   } else {
 270     return value;
 271   }
 272 }
 273
 274 // Alias for loading a 'uint32_t'.
 275 template <typename T, typename... TS>
 276 LIBC_INLINE auto load32_aligned(CPtr src, size_t offset) {
 277   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
 278   return load_aligned<uint32_t, T, TS...>(src + offset);
 279 }
 280
 281 // Alias for loading a 'uint64_t'.
 282 template <typename T, typename... TS>
 283 LIBC_INLINE auto load64_aligned(CPtr src, size_t offset) {
 284   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
 285   return load_aligned<uint64_t, T, TS...>(src + offset);
 286 }
 287
 288 // Stores a 'ValueType' by decomposing it into several stores that are assumed
 289 // to be aligned.
 290 // e.g. store_aligned<uint32_t, uint16_t, uint16_t>(value, ptr);
 291 template <typename ValueType, typename T, typename... TS>
 292 LIBC_INLINE void store_aligned(ValueType value, Ptr dst) {
 293   static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS)));
 294   constexpr size_t shift = sizeof(T) * 8;
 295   if constexpr (Endian::IS_LITTLE) {
 296     store<T>(assume_aligned<sizeof(T)>(dst), value & ~T(0));
 297     if constexpr (sizeof...(TS) > 0)
 298       store_aligned<ValueType, TS...>(value >> shift, dst + sizeof(T));
 299   } else if constexpr (Endian::IS_BIG) {
 300     constexpr size_t OFFSET = (0 + ... + sizeof(TS));
 301     store<T>(assume_aligned<sizeof(T)>(dst + OFFSET), value & ~T(0));
 302     if constexpr (sizeof...(TS) > 0)
 303       store_aligned<ValueType, TS...>(value >> shift, dst);
 304   } else {
 305     deferred_static_assert("Invalid endianness");
 306   }
 307 }
 308
 309 // Alias for storing a 'uint32_t'.
 310 template <typename T, typename... TS>
 311 LIBC_INLINE void store32_aligned(uint32_t value, Ptr dst, size_t offset) {
 312   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint32_t));
 313   store_aligned<uint32_t, T, TS...>(value, dst + offset);
 314 }
 315
 316 // Alias for storing a 'uint64_t'.
 317 template <typename T, typename... TS>
 318 LIBC_INLINE void store64_aligned(uint64_t value, Ptr dst, size_t offset) {
 319   static_assert((sizeof(T) + ... + sizeof(TS)) == sizeof(uint64_t));
 320   store_aligned<uint64_t, T, TS...>(value, dst + offset);
 321 }
 322
 323 // Advances the pointers p1 and p2 by offset bytes and decrease count by the
 324 // same amount.
 325 template <typename T1, typename T2>
 326 LIBC_INLINE void adjust(ptrdiff_t offset, T1 *__restrict &p1,
 327                         T2 *__restrict &p2, size_t &count) {
 328   p1 += offset;
 329   p2 += offset;
 330   count -= offset;
 331 }
 332
 333 // Advances p1 and p2 so p1 gets aligned to the next SIZE bytes boundary
 334 // and decrease count by the same amount.
 335 // We make sure the compiler knows about the adjusted pointer alignment.
 336 template <size_t SIZE, typename T1, typename T2>
 337 void align_p1_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
 338                                size_t &count) {
 339   adjust(distance_to_next_aligned<SIZE>(p1), p1, p2, count);
 340   p1 = assume_aligned<SIZE>(p1);
 341 }
 342
 343 // Same as align_p1_to_next_boundary above but with a single pointer instead.
 344 template <size_t SIZE, typename T1>
 345 LIBC_INLINE void align_to_next_boundary(T1 *&p1, size_t &count) {
 346   CPtr dummy;
 347   align_p1_to_next_boundary<SIZE>(p1, dummy, count);
 348 }
 349
 350 // An enum class that discriminates between the first and second pointer.
 351 enum class Arg { P1, P2, Dst = P1, Src = P2 };
 352
 353 // Same as align_p1_to_next_boundary but allows for aligning p2 instead of p1.
 354 // Precondition: &p1 != &p2
 355 template <size_t SIZE, Arg AlignOn, typename T1, typename T2>
 356 LIBC_INLINE void align_to_next_boundary(T1 *__restrict &p1, T2 *__restrict &p2,
 357                                         size_t &count) {
 358   if constexpr (AlignOn == Arg::P1)
 359     align_p1_to_next_boundary<SIZE>(p1, p2, count);
 360   else if constexpr (AlignOn == Arg::P2)
 361     align_p1_to_next_boundary<SIZE>(p2, p1, count); // swapping p1 and p2.
 362   else
 363     deferred_static_assert("AlignOn must be either Arg::P1 or Arg::P2");
 364 }
 365
 366 template <size_t SIZE> struct AlignHelper {
 367   LIBC_INLINE AlignHelper(CPtr ptr)
 368       : offset_(distance_to_next_aligned<SIZE>(ptr)) {}
 369
 370   LIBC_INLINE bool not_aligned() const { return offset_ != SIZE; }
 371   LIBC_INLINE uintptr_t offset() const { return offset_; }
 372
 373 private:
 374   uintptr_t offset_;
 375 };
 376
 377 } // namespace LIBC_NAMESPACE
 378
 379 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_UTILS_H