clang/test/OpenMP/nvptx_allocate_codegen.cpp

   1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
   2 // RUN: %clang_cc1 -verify -fopenmp -triple x86_64-apple-darwin10.6.0 -fopenmp-targets=nvptx64-nvidia-cuda  -emit-llvm-bc -o %t-host.bc %s
   3 // RUN: %clang_cc1 -verify -fopenmp -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1
   4 // expected-no-diagnostics
   5
   6 #ifndef HEADER
   7 #define HEADER
   8
   9 #pragma omp declare target
  10 typedef void **omp_allocator_handle_t;
  11 extern const omp_allocator_handle_t omp_null_allocator;
  12 extern const omp_allocator_handle_t omp_default_mem_alloc;
  13 extern const omp_allocator_handle_t omp_large_cap_mem_alloc;
  14 extern const omp_allocator_handle_t omp_const_mem_alloc;
  15 extern const omp_allocator_handle_t omp_high_bw_mem_alloc;
  16 extern const omp_allocator_handle_t omp_low_lat_mem_alloc;
  17 extern const omp_allocator_handle_t omp_cgroup_mem_alloc;
  18 extern const omp_allocator_handle_t omp_pteam_mem_alloc;
  19 extern const omp_allocator_handle_t omp_thread_mem_alloc;
  20
  21 struct St{
  22  int a;
  23 };
  24
  25 struct St1{
  26  int a;
  27  static int b;
  28 #pragma omp allocate(b) allocator(omp_default_mem_alloc)
  29 } d;
  30
  31 int a, b, c;
  32 #pragma omp allocate(a) allocator(omp_large_cap_mem_alloc)
  33 #pragma omp allocate(b) allocator(omp_const_mem_alloc)
  34 #pragma omp allocate(d, c) allocator(omp_high_bw_mem_alloc)
  35
  36 template <class T>
  37 struct ST {
  38   static T m;
  39   #pragma omp allocate(m) allocator(omp_low_lat_mem_alloc)
  40 };
  41
  42 template <class T> T foo() {
  43   T v;
  44   #pragma omp allocate(v) allocator(omp_cgroup_mem_alloc)
  45   v = ST<T>::m;
  46   return v;
  47 }
  48
  49 namespace ns{
  50   int a;
  51 }
  52 #pragma omp allocate(ns::a) allocator(omp_pteam_mem_alloc)
  53
  54 int main () {
  55   static int a;
  56 #pragma omp allocate(a) allocator(omp_thread_mem_alloc)
  57   a=2;
  58   double b = 3;
  59   float c;
  60 #pragma omp allocate(b) allocator(omp_default_mem_alloc)
  61 #pragma omp allocate(c) allocator(omp_cgroup_mem_alloc)
  62   return (foo<int>());
  63 }
  64
  65
  66 extern template int ST<int>::m;
  67
  68 void baz(float &);
  69
  70 void bar() {
  71   float bar_a;
  72   double bar_b;
  73   int bar_c;
  74 #pragma omp allocate(bar_c) allocator(omp_cgroup_mem_alloc)
  75 #pragma omp parallel private(bar_a, bar_b) allocate(omp_thread_mem_alloc                  \
  76                                                     : bar_a) allocate(omp_pteam_mem_alloc \
  77                                                                       : bar_b)
  78   {
  79     bar_b = bar_a;
  80     baz(bar_a);
  81   }
  82 }
  83
  84 #pragma omp end declare target
  85 #endif
  86 // CHECK1-LABEL: define {{[^@]+}}@main
  87 // CHECK1-SAME: () #[[ATTR0:[0-9]+]] {
  88 // CHECK1-NEXT:  entry:
  89 // CHECK1-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
  90 // CHECK1-NEXT:    store i32 0, ptr [[RETVAL]], align 4
  91 // CHECK1-NEXT:    store i32 2, ptr @_ZZ4mainE1a, align 4
  92 // CHECK1-NEXT:    store double 3.000000e+00, ptr @b1, align 8
  93 // CHECK1-NEXT:    [[CALL:%.*]] = call noundef i32 @_Z3fooIiET_v() #[[ATTR7:[0-9]+]]
  94 // CHECK1-NEXT:    ret i32 [[CALL]]
  95 //
  96 //
  97 // CHECK1-LABEL: define {{[^@]+}}@_Z3fooIiET_v
  98 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] comdat {
  99 // CHECK1-NEXT:  entry:
 100 // CHECK1-NEXT:    [[TMP0:%.*]] = load i32, ptr @_ZN2STIiE1mE, align 4
 101 // CHECK1-NEXT:    store i32 [[TMP0]], ptr @v, align 4
 102 // CHECK1-NEXT:    [[TMP1:%.*]] = load i32, ptr @v, align 4
 103 // CHECK1-NEXT:    ret i32 [[TMP1]]
 104 //
 105 //
 106 // CHECK1-LABEL: define {{[^@]+}}@_Z3barv
 107 // CHECK1-SAME: () #[[ATTR1]] {
 108 // CHECK1-NEXT:  entry:
 109 // CHECK1-NEXT:    [[BAR_A:%.*]] = alloca float, align 4
 110 // CHECK1-NEXT:    [[BAR_B:%.*]] = alloca double, align 8
 111 // CHECK1-NEXT:    [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x ptr], align 8
 112 // CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(ptr @[[GLOB1:[0-9]+]])
 113 // CHECK1-NEXT:    call void @__kmpc_parallel_51(ptr @[[GLOB1]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, ptr @_Z3barv_omp_outlined, ptr @_Z3barv_omp_outlined_wrapper, ptr [[CAPTURED_VARS_ADDRS]], i64 0)
 114 // CHECK1-NEXT:    ret void
 115 //
 116 //
 117 // CHECK1-LABEL: define {{[^@]+}}@_Z3barv_omp_outlined
 118 // CHECK1-SAME: (ptr noalias noundef [[DOTGLOBAL_TID_:%.*]], ptr noalias noundef [[DOTBOUND_TID_:%.*]]) #[[ATTR2:[0-9]+]] {
 119 // CHECK1-NEXT:  entry:
 120 // CHECK1-NEXT:    [[DOTGLOBAL_TID__ADDR:%.*]] = alloca ptr, align 8
 121 // CHECK1-NEXT:    [[DOTBOUND_TID__ADDR:%.*]] = alloca ptr, align 8
 122 // CHECK1-NEXT:    [[BAR_A:%.*]] = alloca float, align 4
 123 // CHECK1-NEXT:    store ptr [[DOTGLOBAL_TID_]], ptr [[DOTGLOBAL_TID__ADDR]], align 8
 124 // CHECK1-NEXT:    store ptr [[DOTBOUND_TID_]], ptr [[DOTBOUND_TID__ADDR]], align 8
 125 // CHECK1-NEXT:    [[TMP0:%.*]] = load float, ptr [[BAR_A]], align 4
 126 // CHECK1-NEXT:    [[CONV:%.*]] = fpext float [[TMP0]] to double
 127 // CHECK1-NEXT:    store double [[CONV]], ptr addrspacecast (ptr addrspace(3) @bar_b to ptr), align 8
 128 // CHECK1-NEXT:    call void @_Z3bazRf(ptr noundef nonnull align 4 dereferenceable(4) [[BAR_A]]) #[[ATTR7]]
 129 // CHECK1-NEXT:    ret void
 130 //
 131 //
 132 // CHECK1-LABEL: define {{[^@]+}}@_Z3barv_omp_outlined_wrapper
 133 // CHECK1-SAME: (i16 noundef zeroext [[TMP0:%.*]], i32 noundef [[TMP1:%.*]]) #[[ATTR4:[0-9]+]] {
 134 // CHECK1-NEXT:  entry:
 135 // CHECK1-NEXT:    [[DOTADDR:%.*]] = alloca i16, align 2
 136 // CHECK1-NEXT:    [[DOTADDR1:%.*]] = alloca i32, align 4
 137 // CHECK1-NEXT:    [[DOTZERO_ADDR:%.*]] = alloca i32, align 4
 138 // CHECK1-NEXT:    [[GLOBAL_ARGS:%.*]] = alloca ptr, align 8
 139 // CHECK1-NEXT:    store i16 [[TMP0]], ptr [[DOTADDR]], align 2
 140 // CHECK1-NEXT:    store i32 [[TMP1]], ptr [[DOTADDR1]], align 4
 141 // CHECK1-NEXT:    store i32 0, ptr [[DOTZERO_ADDR]], align 4
 142 // CHECK1-NEXT:    call void @__kmpc_get_shared_variables(ptr [[GLOBAL_ARGS]])
 143 // CHECK1-NEXT:    call void @_Z3barv_omp_outlined(ptr [[DOTADDR1]], ptr [[DOTZERO_ADDR]]) #[[ATTR5:[0-9]+]]
 144 // CHECK1-NEXT:    ret void
 145 //