libc/src/string/memory_utils/op_generic.h

   1 //===-- Generic implementation of memory function building blocks ---------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file provides generic C++ building blocks.
  10 // Depending on the requested size, the block operation uses unsigned integral
  11 // types, vector types or an array of the type with the maximum size.
  12 //
  13 // The maximum size is passed as a template argument. For instance, on x86
  14 // platforms that only supports integral types the maximum size would be 8
  15 // (corresponding to uint64_t). On this platform if we request the size 32, this
  16 // would be treated as a cpp::array<uint64_t, 4>.
  17 //
  18 // On the other hand, if the platform is x86 with support for AVX the maximum
  19 // size is 32 and the operation can be handled with a single native operation.
  20 //
  21 //===----------------------------------------------------------------------===//
  22
  23 #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
  24 #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H
  25
  26 #include "src/__support/CPP/array.h"
  27 #include "src/__support/CPP/type_traits.h"
  28 #include "src/__support/common.h"
  29 #include "src/__support/endian.h"
  30 #include "src/__support/macros/optimization.h"
  31 #include "src/string/memory_utils/op_builtin.h"
  32 #include "src/string/memory_utils/utils.h"
  33
  34 #include <stdint.h>
  35
  36 namespace __llvm_libc {
  37 // Compiler types using the vector attributes.
  38 using uint8x1_t = uint8_t __attribute__((__vector_size__(1)));
  39 using uint8x2_t = uint8_t __attribute__((__vector_size__(2)));
  40 using uint8x4_t = uint8_t __attribute__((__vector_size__(4)));
  41 using uint8x8_t = uint8_t __attribute__((__vector_size__(8)));
  42 using uint8x16_t = uint8_t __attribute__((__vector_size__(16)));
  43 using uint8x32_t = uint8_t __attribute__((__vector_size__(32)));
  44 using uint8x64_t = uint8_t __attribute__((__vector_size__(64)));
  45 } // namespace __llvm_libc
  46
  47 namespace __llvm_libc::generic {
  48 // We accept three types of values as elements for generic operations:
  49 // - scalar : unsigned integral types
  50 // - vector : compiler types using the vector attributes
  51 // - array  : a cpp::array<T, N> where T is itself either a scalar or a vector.
  52 // The following traits help discriminate between these cases.
  53 template <typename T>
  54 constexpr bool is_scalar_v = cpp::is_integral_v<T> && cpp::is_unsigned_v<T>;
  55
  56 template <typename T>
  57 constexpr bool is_vector_v =
  58     cpp::details::is_unqualified_any_of<T, uint8x1_t, uint8x2_t, uint8x4_t,
  59                                         uint8x8_t, uint8x16_t, uint8x32_t,
  60                                         uint8x64_t>();
  61
  62 template <class T> struct is_array : cpp::false_type {};
  63 template <class T, size_t N> struct is_array<cpp::array<T, N>> {
  64   static constexpr bool value = is_scalar_v<T> || is_vector_v<T>;
  65 };
  66 template <typename T> constexpr bool is_array_v = is_array<T>::value;
  67
  68 template <typename T>
  69 constexpr bool is_element_type_v =
  70     is_scalar_v<T> || is_vector_v<T> || is_array_v<T>;
  71
  72 //
  73 template <class T> struct array_size {};
  74 template <class T, size_t N>
  75 struct array_size<cpp::array<T, N>> : cpp::integral_constant<size_t, N> {};
  76 template <typename T> constexpr size_t array_size_v = array_size<T>::value;
  77
  78 // Generic operations for the above type categories.
  79
  80 template <typename T> T load(CPtr src) {
  81   static_assert(is_element_type_v<T>);
  82   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
  83     return ::__llvm_libc::load<T>(src);
  84   } else if constexpr (is_array_v<T>) {
  85     using value_type = typename T::value_type;
  86     T Value;
  87     for (size_t I = 0; I < array_size_v<T>; ++I)
  88       Value[I] = load<value_type>(src + (I * sizeof(value_type)));
  89     return Value;
  90   }
  91 }
  92
  93 template <typename T> void store(Ptr dst, T value) {
  94   static_assert(is_element_type_v<T>);
  95   if constexpr (is_scalar_v<T> || is_vector_v<T>) {
  96     ::__llvm_libc::store<T>(dst, value);
  97   } else if constexpr (is_array_v<T>) {
  98     using value_type = typename T::value_type;
  99     for (size_t I = 0; I < array_size_v<T>; ++I)
 100       store<value_type>(dst + (I * sizeof(value_type)), value[I]);
 101   }
 102 }
 103
 104 template <typename T> T splat(uint8_t value) {
 105   static_assert(is_scalar_v<T> || is_vector_v<T>);
 106   if constexpr (is_scalar_v<T>)
 107     return T(~0) / T(0xFF) * T(value);
 108   else if constexpr (is_vector_v<T>) {
 109     T Out;
 110     // This for loop is optimized out for vector types.
 111     for (size_t i = 0; i < sizeof(T); ++i)
 112       Out[i] = value;
 113     return Out;
 114   }
 115 }
 116
 117 static_assert((UINTPTR_MAX == 4294967295U) ||
 118                   (UINTPTR_MAX == 18446744073709551615UL),
 119               "We currently only support 32- or 64-bit platforms");
 120
 121 #if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
 122 #define LLVM_LIBC_HAS_UINT64
 123 #endif
 124
 125 namespace details {
 126 // Checks that each type is sorted in strictly decreasing order of size.
 127 // i.e. sizeof(First) > sizeof(Second) > ... > sizeof(Last)
 128 template <typename First> constexpr bool is_decreasing_size() {
 129   return sizeof(First) == 1;
 130 }
 131 template <typename First, typename Second, typename... Next>
 132 constexpr bool is_decreasing_size() {
 133   if constexpr (sizeof...(Next) > 0)
 134     return sizeof(First) > sizeof(Second) && is_decreasing_size<Next...>();
 135   else
 136     return sizeof(First) > sizeof(Second) && is_decreasing_size<Second>();
 137 }
 138
 139 template <size_t Size, typename... Ts> struct Largest;
 140 template <size_t Size> struct Largest<Size> : cpp::type_identity<uint8_t> {};
 141 template <size_t Size, typename T, typename... Ts>
 142 struct Largest<Size, T, Ts...> {
 143   using next = Largest<Size, Ts...>;
 144   using type = cpp::conditional_t<(Size >= sizeof(T)), T, typename next::type>;
 145 };
 146
 147 } // namespace details
 148
 149 // 'SupportedTypes' holds a list of natively supported types.
 150 // The types are instanciations of ScalarType or VectorType.
 151 // They should be ordered in strictly decreasing order.
 152 // The 'TypeFor<Size>' type retrieves is the largest supported type that can
 153 // handle 'Size' bytes. e.g.
 154 //
 155 // using ST = SupportedTypes<ScalarType<uint16_t>, ScalarType<uint8_t>>;
 156 // using Type = ST::TypeFor<10>;
 157 // static_assert(cpp:is_same_v<Type, ScalarType<uint16_t>>);
 158
 159 template <typename First, typename... Ts> struct SupportedTypes {
 160   static_assert(details::is_decreasing_size<First, Ts...>());
 161
 162   using MaxType = First;
 163
 164   template <size_t Size>
 165   using TypeFor = typename details::Largest<Size, First, Ts...>::type;
 166 };
 167
 168 // Map from sizes to structures offering static load, store and splat methods.
 169 // Note: On platforms lacking vector support, we use the ArrayType below and
 170 // decompose the operation in smaller pieces.
 171
 172 // Lists a generic native types to use for Memset and Memmove operations.
 173 // TODO: Inject the native types within Memset and Memmove depending on the
 174 // target architectures and derive MaxSize from it.
 175 using NativeTypeMap = SupportedTypes<uint8x64_t, //
 176                                      uint8x32_t, //
 177                                      uint8x16_t,
 178 #if defined(LLVM_LIBC_HAS_UINT64)
 179                                      uint64_t, // Not available on 32bit
 180 #endif
 181                                      uint32_t, //
 182                                      uint16_t, //
 183                                      uint8_t>;
 184
 185 namespace details {
 186
 187 // Helper to test if a type is void.
 188 template <typename T> inline constexpr bool is_void_v = cpp::is_same_v<T, void>;
 189
 190 // In case the 'Size' is not supported we can fall back to a sequence of smaller
 191 // operations using the largest natively supported type.
 192 template <size_t Size, size_t MaxSize> static constexpr bool useArrayType() {
 193   return (Size > MaxSize) && ((Size % MaxSize) == 0) &&
 194          !details::is_void_v<NativeTypeMap::TypeFor<MaxSize>>;
 195 }
 196
 197 // Compute the type to handle an operation of 'Size' bytes knowing that the
 198 // underlying platform only support native types up to MaxSize bytes.
 199 template <size_t Size, size_t MaxSize>
 200 using getTypeFor = cpp::conditional_t<
 201     useArrayType<Size, MaxSize>(),
 202     cpp::array<NativeTypeMap::TypeFor<MaxSize>, Size / MaxSize>,
 203     NativeTypeMap::TypeFor<Size>>;
 204
 205 } // namespace details
 206
 207 ///////////////////////////////////////////////////////////////////////////////
 208 // Memset
 209 ///////////////////////////////////////////////////////////////////////////////
 210
 211 template <typename T> struct Memset {
 212   static constexpr size_t SIZE = sizeof(T);
 213
 214   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
 215     static_assert(is_element_type_v<T>);
 216     if constexpr (is_scalar_v<T> || is_vector_v<T>) {
 217       store<T>(dst, splat<T>(value));
 218     } else if constexpr (is_array_v<T>) {
 219       using value_type = typename T::value_type;
 220       const auto Splat = splat<value_type>(value);
 221       for (size_t I = 0; I < array_size_v<T>; ++I)
 222         store<value_type>(dst + (I * sizeof(value_type)), Splat);
 223     }
 224   }
 225
 226   LIBC_INLINE static void tail(Ptr dst, uint8_t value, size_t count) {
 227     block(dst + count - SIZE, value);
 228   }
 229
 230   LIBC_INLINE static void head_tail(Ptr dst, uint8_t value, size_t count) {
 231     block(dst, value);
 232     tail(dst, value, count);
 233   }
 234
 235   LIBC_INLINE static void loop_and_tail(Ptr dst, uint8_t value, size_t count) {
 236     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
 237     size_t offset = 0;
 238     do {
 239       block(dst + offset, value);
 240       offset += SIZE;
 241     } while (offset < count - SIZE);
 242     tail(dst, value, count);
 243   }
 244 };
 245
 246 template <typename T, typename... TS> struct MemsetSequence {
 247   static constexpr size_t SIZE = (sizeof(T) + ... + sizeof(TS));
 248   LIBC_INLINE static void block(Ptr dst, uint8_t value) {
 249     Memset<T>::block(dst, value);
 250     if constexpr (sizeof...(TS) > 0) {
 251       return MemsetSequence<TS...>::block(dst + sizeof(T), value);
 252     }
 253   }
 254 };
 255
 256 ///////////////////////////////////////////////////////////////////////////////
 257 // Memmove
 258 ///////////////////////////////////////////////////////////////////////////////
 259
 260 template <typename T> struct Memmove {
 261   static constexpr size_t SIZE = sizeof(T);
 262
 263   LIBC_INLINE static void block(Ptr dst, CPtr src) {
 264     store<T>(dst, load<T>(src));
 265   }
 266
 267   LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
 268     const size_t offset = count - SIZE;
 269     // The load and store operations can be performed in any order as long as
 270     // they are not interleaved. More investigations are needed to determine
 271     // the best order.
 272     const auto head = load<T>(src);
 273     const auto tail = load<T>(src + offset);
 274     store<T>(dst, head);
 275     store<T>(dst + offset, tail);
 276   }
 277
 278   // Align forward suitable when dst < src. The alignment is performed with
 279   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
 280   //
 281   // e.g. Moving two bytes forward, we make sure src is aligned.
 282   // [  |       |       |       |      ]
 283   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
 284   // [____LLLLLLLL_____________________]
 285   // [___________LLLLLLLA______________]
 286   // [_SSSSSSSS________________________]
 287   // [________SSSSSSSS_________________]
 288   //
 289   // e.g. Moving two bytes forward, we make sure dst is aligned.
 290   // [  |       |       |       |      ]
 291   // [____XXXXXXXXXXXXXXXXXXXXXXXXXXXX_]
 292   // [____LLLLLLLL_____________________]
 293   // [______LLLLLLLL___________________]
 294   // [_SSSSSSSS________________________]
 295   // [___SSSSSSSA______________________]
 296   template <Arg AlignOn>
 297   LIBC_INLINE static void align_forward(Ptr &dst, CPtr &src, size_t &count) {
 298     Ptr prev_dst = dst;
 299     CPtr prev_src = src;
 300     size_t prev_count = count;
 301     align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
 302     adjust(SIZE, dst, src, count);
 303     head_tail(prev_dst, prev_src, prev_count - count);
 304   }
 305
 306   // Align backward suitable when dst > src. The alignment is performed with
 307   // an HeadTail operation of count ∈ [Alignment, 2 x Alignment].
 308   //
 309   // e.g. Moving two bytes backward, we make sure src is aligned.
 310   // [  |       |       |       |      ]
 311   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
 312   // [ _________________ALLLLLLL_______]
 313   // [ ___________________LLLLLLLL_____]
 314   // [____________________SSSSSSSS_____]
 315   // [______________________SSSSSSSS___]
 316   //
 317   // e.g. Moving two bytes backward, we make sure dst is aligned.
 318   // [  |       |       |       |      ]
 319   // [____XXXXXXXXXXXXXXXXXXXXXXXX_____]
 320   // [ _______________LLLLLLLL_________]
 321   // [ ___________________LLLLLLLL_____]
 322   // [__________________ASSSSSSS_______]
 323   // [______________________SSSSSSSS___]
 324   template <Arg AlignOn>
 325   LIBC_INLINE static void align_backward(Ptr &dst, CPtr &src, size_t &count) {
 326     Ptr headtail_dst = dst + count;
 327     CPtr headtail_src = src + count;
 328     size_t headtail_size = 0;
 329     align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
 330                                           headtail_size);
 331     adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
 332     head_tail(headtail_dst, headtail_src, headtail_size);
 333     count -= headtail_size;
 334   }
 335
 336   // Move forward suitable when dst < src. We load the tail bytes before
 337   // handling the loop.
 338   //
 339   // e.g. Moving two bytes
 340   // [   |       |       |       |       |]
 341   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
 342   // [_________________________LLLLLLLL___]
 343   // [___LLLLLLLL_________________________]
 344   // [_SSSSSSSS___________________________]
 345   // [___________LLLLLLLL_________________]
 346   // [_________SSSSSSSS___________________]
 347   // [___________________LLLLLLLL_________]
 348   // [_________________SSSSSSSS___________]
 349   // [_______________________SSSSSSSS_____]
 350   LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
 351                                                 size_t count) {
 352     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
 353     const size_t tail_offset = count - SIZE;
 354     const auto tail_value = load<T>(src + tail_offset);
 355     size_t offset = 0;
 356     LIBC_LOOP_NOUNROLL
 357     do {
 358       block(dst + offset, src + offset);
 359       offset += SIZE;
 360     } while (offset < count - SIZE);
 361     store<T>(dst + tail_offset, tail_value);
 362   }
 363
 364   // Move backward suitable when dst > src. We load the head bytes before
 365   // handling the loop.
 366   //
 367   // e.g. Moving two bytes
 368   // [   |       |       |       |       |]
 369   // [___XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
 370   // [___LLLLLLLL_________________________]
 371   // [_________________________LLLLLLLL___]
 372   // [___________________________SSSSSSSS_]
 373   // [_________________LLLLLLLL___________]
 374   // [___________________SSSSSSSS_________]
 375   // [_________LLLLLLLL___________________]
 376   // [___________SSSSSSSS_________________]
 377   // [_____SSSSSSSS_______________________]
 378   LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
 379                                                  size_t count) {
 380     static_assert(SIZE > 1, "a loop of size 1 does not need tail");
 381     const auto head_value = load<T>(src);
 382     ptrdiff_t offset = count - SIZE;
 383     LIBC_LOOP_NOUNROLL
 384     do {
 385       block(dst + offset, src + offset);
 386       offset -= SIZE;
 387     } while (offset >= 0);
 388     store<T>(dst, head_value);
 389   }
 390 };
 391
 392 ///////////////////////////////////////////////////////////////////////////////
 393 // Bcmp
 394 ///////////////////////////////////////////////////////////////////////////////
 395 template <size_t Size> struct Bcmp {
 396   static constexpr size_t SIZE = Size;
 397   static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64)
 398                                         ? sizeof(uint64_t)
 399                                         : sizeof(uint32_t);
 400
 401   template <typename T> LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) {
 402     static_assert(sizeof(T) <= sizeof(uint32_t));
 403     return load<T>(p1) ^ load<T>(p2);
 404   }
 405
 406   template <typename T>
 407   LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) {
 408     return load<T>(p1) != load<T>(p2);
 409   }
 410
 411   LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) {
 412     if constexpr (Size == 1) {
 413       return load_xor<uint8_t>(p1, p2);
 414     } else if constexpr (Size == 2) {
 415       return load_xor<uint16_t>(p1, p2);
 416     } else if constexpr (Size == 4) {
 417       return load_xor<uint32_t>(p1, p2);
 418     } else if constexpr (Size == 8) {
 419       return load_not_equal<uint64_t>(p1, p2);
 420     } else if constexpr (details::useArrayType<Size, MaxSize>()) {
 421       for (size_t offset = 0; offset < Size; offset += MaxSize)
 422         if (auto value = Bcmp<MaxSize>::block(p1 + offset, p2 + offset))
 423           return value;
 424     } else {
 425       deferred_static_assert("Unimplemented Size");
 426     }
 427     return BcmpReturnType::ZERO();
 428   }
 429
 430   LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
 431     return block(p1 + count - SIZE, p2 + count - SIZE);
 432   }
 433
 434   LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) {
 435     return block(p1, p2) | tail(p1, p2, count);
 436   }
 437
 438   LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
 439                                                   size_t count) {
 440     static_assert(Size > 1, "a loop of size 1 does not need tail");
 441     size_t offset = 0;
 442     do {
 443       if (auto value = block(p1 + offset, p2 + offset))
 444         return value;
 445       offset += SIZE;
 446     } while (offset < count - SIZE);
 447     return tail(p1, p2, count);
 448   }
 449 };
 450
 451 ///////////////////////////////////////////////////////////////////////////////
 452 // Memcmp
 453 ///////////////////////////////////////////////////////////////////////////////
 454 template <size_t Size> struct Memcmp {
 455   static constexpr size_t SIZE = Size;
 456   static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64)
 457                                         ? sizeof(uint64_t)
 458                                         : sizeof(uint32_t);
 459
 460   template <typename T> LIBC_INLINE static T load_be(CPtr ptr) {
 461     return Endian::to_big_endian(load<T>(ptr));
 462   }
 463
 464   template <typename T>
 465   LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) {
 466     return load_be<T>(p1) - load_be<T>(p2);
 467   }
 468
 469   template <typename T>
 470   LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) {
 471     const auto la = load_be<T>(p1);
 472     const auto lb = load_be<T>(p2);
 473     return la > lb ? 1 : la < lb ? -1 : 0;
 474   }
 475
 476   LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) {
 477     if constexpr (Size == 1) {
 478       return load_be_diff<uint8_t>(p1, p2);
 479     } else if constexpr (Size == 2) {
 480       return load_be_diff<uint16_t>(p1, p2);
 481     } else if constexpr (Size == 4) {
 482       return load_be_cmp<uint32_t>(p1, p2);
 483     } else if constexpr (Size == 8) {
 484       return load_be_cmp<uint64_t>(p1, p2);
 485     } else if constexpr (details::useArrayType<Size, MaxSize>()) {
 486       for (size_t offset = 0; offset < Size; offset += MaxSize)
 487         if (Bcmp<MaxSize>::block(p1 + offset, p2 + offset))
 488           return Memcmp<MaxSize>::block(p1 + offset, p2 + offset);
 489       return MemcmpReturnType::ZERO();
 490     } else if constexpr (Size == 3) {
 491       if (auto value = Memcmp<2>::block(p1, p2))
 492         return value;
 493       return Memcmp<1>::block(p1 + 2, p2 + 2);
 494     } else {
 495       deferred_static_assert("Unimplemented Size");
 496     }
 497   }
 498
 499   LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) {
 500     return block(p1 + count - SIZE, p2 + count - SIZE);
 501   }
 502
 503   LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2,
 504                                                 size_t count) {
 505     if (auto value = block(p1, p2))
 506       return value;
 507     return tail(p1, p2, count);
 508   }
 509
 510   LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2,
 511                                                     size_t count) {
 512     static_assert(Size > 1, "a loop of size 1 does not need tail");
 513     size_t offset = 0;
 514     do {
 515       if (auto value = block(p1 + offset, p2 + offset))
 516         return value;
 517       offset += SIZE;
 518     } while (offset < count - SIZE);
 519     return tail(p1, p2, count);
 520   }
 521 };
 522
 523 } // namespace __llvm_libc::generic
 524
 525 #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H