offload/test/offloading/ompx_bare_shfl_down_sync.cpp

   1 // RUN: %libomptarget-compilexx-run-and-check-generic
   2 //
   3 // REQUIRES: gpu
   4
   5 #include <cassert>
   6 #include <cmath>
   7 #include <cstdint>
   8 #include <cstdio>
   9 #include <limits>
  10 #include <ompx.h>
  11 #include <type_traits>
  12
  13 template <typename T, std::enable_if_t<std::is_integral<T>::value, bool> = true>
  14 bool equal(T LHS, T RHS) {
  15   return LHS == RHS;
  16 }
  17
  18 template <typename T,
  19           std::enable_if_t<std::is_floating_point<T>::value, bool> = true>
  20 bool equal(T LHS, T RHS) {
  21   return __builtin_fabs(LHS - RHS) < std::numeric_limits<T>::epsilon();
  22 }
  23
  24 template <typename T> void test() {
  25   constexpr const int num_blocks = 1;
  26   constexpr const int block_size = 256;
  27   constexpr const int N = num_blocks * block_size;
  28   int *res = new int[N];
  29
  30 #pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) \
  31         map(from: res[0:N])
  32   {
  33     int tid = ompx_thread_id_x();
  34     T val = ompx::shfl_down_sync(~0U, static_cast<T>(tid), 1);
  35 #ifdef __AMDGCN_WAVEFRONT_SIZE
  36     int warp_size = __AMDGCN_WAVEFRONT_SIZE;
  37 #else
  38     int warp_size = 32;
  39 #endif
  40     if ((tid & (warp_size - 1)) != warp_size - 1)
  41       res[tid] = equal(val, static_cast<T>(tid + 1));
  42     else
  43       res[tid] = equal(val, static_cast<T>(tid));
  44   }
  45
  46   for (int i = 0; i < N; ++i)
  47     assert(res[i]);
  48
  49   delete[] res;
  50 }
  51
  52 int main(int argc, char *argv[]) {
  53   test<int32_t>();
  54   test<int64_t>();
  55   test<float>();
  56   test<double>();
  57   // CHECK: PASS
  58   printf("PASS\n");
  59
  60   return 0;
  61 }