Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / OpenMP / amdgcn_target_codegen.cpp
blob3ea2d107f072adb66c673d7e8ed6b34905fac9bc
1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
2 // REQUIRES: amdgpu-registered-target
4 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
5 // RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
6 // expected-no-diagnostics
7 #ifndef HEADER
8 #define HEADER
10 #define N 1000
12 int test_amdgcn_target_tid_threads() {
13 int arr[N];
14 #pragma omp target
15 for (int i = 0; i < N; i++) {
16 arr[i] = 1;
18 return arr[0];
21 int test_amdgcn_target_tid_threads_simd() {
22 int arr[N];
23 #pragma omp target simd
24 for (int i = 0; i < N; i++) {
25 arr[i] = 1;
27 return arr[0];
30 #endif
31 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14
32 // CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR0:[0-9]+]] {
33 // CHECK-NEXT: entry:
34 // CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
35 // CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
36 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
37 // CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
38 // CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
39 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
40 // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
41 // CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
42 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
43 // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z30test_amdgcn_target_tid_threadsv_l14_kernel_environment to ptr), ptr [[DYN_PTR]])
44 // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
45 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
46 // CHECK: user_code.entry:
47 // CHECK-NEXT: store i32 0, ptr [[I_ASCAST]], align 4
48 // CHECK-NEXT: br label [[FOR_COND:%.*]]
49 // CHECK: for.cond:
50 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I_ASCAST]], align 4
51 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000
52 // CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
53 // CHECK: for.body:
54 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I_ASCAST]], align 4
55 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP3]] to i64
56 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
57 // CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4
58 // CHECK-NEXT: br label [[FOR_INC:%.*]]
59 // CHECK: for.inc:
60 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4
61 // CHECK-NEXT: [[INC:%.*]] = add nsw i32 [[TMP4]], 1
62 // CHECK-NEXT: store i32 [[INC]], ptr [[I_ASCAST]], align 4
63 // CHECK-NEXT: br label [[FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]]
64 // CHECK: worker.exit:
65 // CHECK-NEXT: ret void
66 // CHECK: for.end:
67 // CHECK-NEXT: call void @__kmpc_target_deinit()
68 // CHECK-NEXT: ret void
71 // CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23
72 // CHECK-SAME: (ptr noalias noundef [[DYN_PTR:%.*]], ptr noundef nonnull align 4 dereferenceable(4000) [[ARR:%.*]]) #[[ATTR1:[0-9]+]] {
73 // CHECK-NEXT: entry:
74 // CHECK-NEXT: [[DYN_PTR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
75 // CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
76 // CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4, addrspace(5)
77 // CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4, addrspace(5)
78 // CHECK-NEXT: [[I:%.*]] = alloca i32, align 4, addrspace(5)
79 // CHECK-NEXT: [[DYN_PTR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DYN_PTR_ADDR]] to ptr
80 // CHECK-NEXT: [[ARR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ARR_ADDR]] to ptr
81 // CHECK-NEXT: [[TMP_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TMP]] to ptr
82 // CHECK-NEXT: [[DOTOMP_IV_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[DOTOMP_IV]] to ptr
83 // CHECK-NEXT: [[I_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I]] to ptr
84 // CHECK-NEXT: store ptr [[DYN_PTR]], ptr [[DYN_PTR_ADDR_ASCAST]], align 8
85 // CHECK-NEXT: store ptr [[ARR]], ptr [[ARR_ADDR_ASCAST]], align 8
86 // CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR_ASCAST]], align 8
87 // CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_target_init(ptr addrspacecast (ptr addrspace(1) @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z35test_amdgcn_target_tid_threads_simdv_l23_kernel_environment to ptr), ptr [[DYN_PTR]])
88 // CHECK-NEXT: [[EXEC_USER_CODE:%.*]] = icmp eq i32 [[TMP1]], -1
89 // CHECK-NEXT: br i1 [[EXEC_USER_CODE]], label [[USER_CODE_ENTRY:%.*]], label [[WORKER_EXIT:%.*]]
90 // CHECK: user_code.entry:
91 // CHECK-NEXT: store i32 0, ptr [[DOTOMP_IV_ASCAST]], align 4
92 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]]
93 // CHECK: omp.inner.for.cond:
94 // CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11:![0-9]+]]
95 // CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP2]], 1000
96 // CHECK-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]]
97 // CHECK: omp.inner.for.body:
98 // CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
99 // CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP3]], 1
100 // CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]]
101 // CHECK-NEXT: store i32 [[ADD]], ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
102 // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
103 // CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64
104 // CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], ptr [[TMP0]], i64 0, i64 [[IDXPROM]]
105 // CHECK-NEXT: store i32 1, ptr [[ARRAYIDX]], align 4, !llvm.access.group [[ACC_GRP11]]
106 // CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]]
107 // CHECK: omp.body.continue:
108 // CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]]
109 // CHECK: omp.inner.for.inc:
110 // CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
111 // CHECK-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP5]], 1
112 // CHECK-NEXT: store i32 [[ADD1]], ptr [[DOTOMP_IV_ASCAST]], align 4, !llvm.access.group [[ACC_GRP11]]
113 // CHECK-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]]
114 // CHECK: worker.exit:
115 // CHECK-NEXT: ret void
116 // CHECK: omp.inner.for.end:
117 // CHECK-NEXT: store i32 1000, ptr [[I_ASCAST]], align 4
118 // CHECK-NEXT: call void @__kmpc_target_deinit()
119 // CHECK-NEXT: ret void