Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGenOpenCL / builtins-fp-atomics-gfx90a.cl
blobafe80b17e51165e06620b27c1b8b5eb317bc7616
1 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
2 // RUN: %s -S -emit-llvm -o - | FileCheck %s -check-prefix=CHECK
4 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
5 // RUN: -S -o - %s | FileCheck -check-prefix=GFX90A %s
7 // REQUIRES: amdgpu-registered-target
9 typedef half __attribute__((ext_vector_type(2))) half2;
11 // CHECK-LABEL: test_global_add_f64
12 // CHECK: call double @llvm.amdgcn.global.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
13 // GFX90A-LABEL: test_global_add_f64$local:
14 // GFX90A: global_atomic_add_f64
15 void test_global_add_f64(__global double *addr, double x) {
16 double *rtn;
17 *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x);
20 // CHECK-LABEL: test_global_add_half2
21 // CHECK: call <2 x half> @llvm.amdgcn.global.atomic.fadd.v2f16.p1.v2f16(ptr addrspace(1) %{{.*}}, <2 x half> %{{.*}})
22 // GFX90A-LABEL: test_global_add_half2
23 // GFX90A: global_atomic_pk_add_f16 v2, v[0:1], v2, off glc
24 void test_global_add_half2(__global half2 *addr, half2 x) {
25 half2 *rtn;
26 *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
29 // CHECK-LABEL: test_global_global_min_f64
30 // CHECK: call double @llvm.amdgcn.global.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
31 // GFX90A-LABEL: test_global_global_min_f64$local
32 // GFX90A: global_atomic_min_f64
33 void test_global_global_min_f64(__global double *addr, double x){
34 double *rtn;
35 *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x);
38 // CHECK-LABEL: test_global_max_f64
39 // CHECK: call double @llvm.amdgcn.global.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
40 // GFX90A-LABEL: test_global_max_f64$local
41 // GFX90A: global_atomic_max_f64
42 void test_global_max_f64(__global double *addr, double x){
43 double *rtn;
44 *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x);
47 // CHECK-LABEL: test_flat_add_local_f64
48 // CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p3.f64(ptr addrspace(3) %{{.*}}, double %{{.*}})
49 // GFX90A-LABEL: test_flat_add_local_f64$local
50 // GFX90A: ds_add_rtn_f64
51 void test_flat_add_local_f64(__local double *addr, double x){
52 double *rtn;
53 *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x);
56 // CHECK-LABEL: test_flat_global_add_f64
57 // CHECK: call double @llvm.amdgcn.flat.atomic.fadd.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
58 // GFX90A-LABEL: test_flat_global_add_f64$local
59 // GFX90A: global_atomic_add_f64
60 void test_flat_global_add_f64(__global double *addr, double x){
61 double *rtn;
62 *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x);
65 // CHECK-LABEL: test_flat_min_flat_f64
66 // CHECK: call double @llvm.amdgcn.flat.atomic.fmin.f64.p0.f64(ptr %{{.*}}, double %{{.*}})
67 // GFX90A-LABEL: test_flat_min_flat_f64$local
68 // GFX90A: flat_atomic_min_f64
69 void test_flat_min_flat_f64(__generic double *addr, double x){
70 double *rtn;
71 *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x);
74 // CHECK-LABEL: test_flat_global_min_f64
75 // CHECK: call double @llvm.amdgcn.flat.atomic.fmin.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
76 // GFX90A: test_flat_global_min_f64$local
77 // GFX90A: global_atomic_min_f64
78 void test_flat_global_min_f64(__global double *addr, double x){
79 double *rtn;
80 *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x);
83 // CHECK-LABEL: test_flat_max_flat_f64
84 // CHECK: call double @llvm.amdgcn.flat.atomic.fmax.f64.p0.f64(ptr %{{.*}}, double %{{.*}})
85 // GFX90A-LABEL: test_flat_max_flat_f64$local
86 // GFX90A: flat_atomic_max_f64
87 void test_flat_max_flat_f64(__generic double *addr, double x){
88 double *rtn;
89 *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x);
92 // CHECK-LABEL: test_flat_global_max_f64
93 // CHECK: call double @llvm.amdgcn.flat.atomic.fmax.f64.p1.f64(ptr addrspace(1) %{{.*}}, double %{{.*}})
94 // GFX90A-LABEL: test_flat_global_max_f64$local
95 // GFX90A: global_atomic_max_f64
96 void test_flat_global_max_f64(__global double *addr, double x){
97 double *rtn;
98 *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x);
101 // CHECK-LABEL: test_ds_add_local_f64
102 // CHECK: call double @llvm.amdgcn.ds.fadd.f64(ptr addrspace(3) %{{.*}}, double %{{.*}},
103 // GFX90A: test_ds_add_local_f64$local
104 // GFX90A: ds_add_rtn_f64
105 void test_ds_add_local_f64(__local double *addr, double x){
106 double *rtn;
107 *rtn = __builtin_amdgcn_ds_atomic_fadd_f64(addr, x);
110 // CHECK-LABEL: test_ds_addf_local_f32
111 // CHECK: call float @llvm.amdgcn.ds.fadd.f32(ptr addrspace(3) %{{.*}}, float %{{.*}},
112 // GFX90A-LABEL: test_ds_addf_local_f32$local
113 // GFX90A: ds_add_rtn_f32
114 void test_ds_addf_local_f32(__local float *addr, float x){
115 float *rtn;
116 *rtn = __builtin_amdgcn_ds_atomic_fadd_f32(addr, x);
119 // CHECK-LABEL: @test_global_add_f32
120 // CHECK: call float @llvm.amdgcn.global.atomic.fadd.f32.p1.f32(ptr addrspace(1) %{{.*}}, float %{{.*}})
121 void test_global_add_f32(float *rtn, global float *addr, float x) {
122 *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);