Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / nvptx_target_requires_unified_shared_memory.cpp
blobc1bfe36507d145127c8bab2657570322369a682f
1 // Test declare target link under unified memory requirement.
3 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-HOST
5 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
6 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK-DEVICE
8 // Test declare target link under unified memory requirement.
10 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK-HOST
12 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
13 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix CHECK-DEVICE
15 // expected-no-diagnostics
17 #ifndef HEADER
18 #define HEADER
20 #define N 1000
22 double var = 10.0;
23 double to_var = 20.0;
25 #pragma omp requires unified_shared_memory
26 #pragma omp declare target link(var)
27 #pragma omp declare target to(to_var)
29 int bar(int n){
30 double sum = 0;
32 #pragma omp target
33 for(int i = 0; i < n; i++) {
34 sum += var + to_var;
37 return sum;
40 // CHECK-HOST: [[VAR:@.+]] ={{.*}} global double 1.000000e+01
41 // CHECK-HOST: [[VAR_DECL_TGT_LINK_PTR:@.+]] = weak{{.*}} global ptr [[VAR]]
43 // CHECK-HOST: [[TO_VAR:@.+]] ={{.*}} global double 2.000000e+01
44 // CHECK-HOST: [[VAR_DECL_TGT_TO_PTR:@.+]] = weak{{.*}} global ptr [[TO_VAR]]
46 // CHECK-HOST: [[OFFLOAD_SIZES:@.+]] = private unnamed_addr constant [2 x i64] [i64 4, i64 8]
47 // CHECK-HOST: [[OFFLOAD_MAPTYPES:@.+]] = private unnamed_addr constant [2 x i64] [i64 800, i64 800]
49 // CHECK-HOST: [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR_NAME:@.+]] = internal unnamed_addr constant [21 x i8]
50 // CHECK-HOST: [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR:@.+]] = weak{{.*}} constant %struct.__tgt_offload_entry { ptr [[VAR_DECL_TGT_LINK_PTR]], ptr [[OMP_OFFLOAD_ENTRY_LINK_VAR_PTR_NAME]], i64 8, i32 1, i32 0 }, section "omp_offloading_entries"
52 // CHECK-HOST: [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR_NAME:@.+]] = internal unnamed_addr constant [24 x i8]
53 // CHECK-HOST: [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR:@.+]] = weak{{.*}} constant %struct.__tgt_offload_entry { ptr [[VAR_DECL_TGT_TO_PTR]], ptr [[OMP_OFFLOAD_ENTRY_TO_VAR_PTR_NAME]], i64 8, i32 0, i32 0 }, section "omp_offloading_entries"
55 // CHECK-HOST: [[N_CASTED:%.+]] = alloca i64
56 // CHECK-HOST: [[SUM_CASTED:%.+]] = alloca i64
58 // CHECK-HOST: [[OFFLOAD_BASEPTRS:%.+]] = alloca [2 x ptr]
59 // CHECK-HOST: [[OFFLOAD_PTRS:%.+]] = alloca [2 x ptr]
61 // CHECK-HOST: [[LOAD1:%.+]] = load i64, ptr [[N_CASTED]]
62 // CHECK-HOST: [[LOAD2:%.+]] = load i64, ptr [[SUM_CASTED]]
64 // CHECK-HOST: [[BPTR1:%.+]] = getelementptr inbounds [2 x ptr], ptr [[OFFLOAD_BASEPTRS]], i32 0, i32 0
65 // CHECK-HOST: store i64 [[LOAD1]], ptr [[BPTR1]]
66 // CHECK-HOST: [[BPTR2:%.+]] = getelementptr inbounds [2 x ptr], ptr [[OFFLOAD_PTRS]], i32 0, i32 0
67 // CHECK-HOST: store i64 [[LOAD1]], ptr [[BPTR2]]
69 // CHECK-HOST: [[BPTR3:%.+]] = getelementptr inbounds [2 x ptr], ptr [[OFFLOAD_BASEPTRS]], i32 0, i32 1
70 // CHECK-HOST: store i64 [[LOAD2]], ptr [[BPTR3]]
71 // CHECK-HOST: [[BPTR4:%.+]] = getelementptr inbounds [2 x ptr], ptr [[OFFLOAD_PTRS]], i32 0, i32 1
72 // CHECK-HOST: store i64 [[LOAD2]], ptr [[BPTR4]]
74 // CHECK-HOST: [[BPTR7:%.+]] = getelementptr inbounds [2 x ptr], ptr [[OFFLOAD_BASEPTRS]], i32 0, i32 0
75 // CHECK-HOST: [[BPTR8:%.+]] = getelementptr inbounds [2 x ptr], ptr [[OFFLOAD_PTRS]], i32 0, i32 0
77 // CHECK-HOST: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr %{{.+}})
79 // CHECK-DEVICE: [[VAR_LINK:@.+]] = weak{{.*}} global ptr null
80 // CHECK-DEVICE: [[VAR_TO:@.+]] = weak{{.*}} global ptr null
82 // CHECK-DEVICE: [[VAR_TO_PTR:%.+]] = load ptr, ptr [[VAR_TO]]
83 // CHECK-DEVICE: load double, ptr [[VAR_TO_PTR]]
85 #endif