Revert "[llvm] Improve llvm.objectsize computation by computing GEP, alloca and mallo...
[llvm-project.git] / clang / test / CodeGenOpenCL / builtins-fp-atomics-gfx90a.cl
blobef97d12afab1d14969910518dc76eba2b1f1c3b5
1 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
2 // RUN: %s -emit-llvm -o - | FileCheck %s -check-prefix=CHECK
4 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
5 // RUN: -S -o - %s | FileCheck -check-prefix=GFX90A %s
7 // REQUIRES: amdgpu-registered-target
9 typedef half __attribute__((ext_vector_type(2))) half2;
11 // CHECK-LABEL: test_global_add_f64
12 // CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
13 // GFX90A-LABEL: test_global_add_f64$local:
14 // GFX90A: global_atomic_add_f64
15 void test_global_add_f64(__global double *addr, double x) {
16 double *rtn;
17 *rtn = __builtin_amdgcn_global_atomic_fadd_f64(addr, x);
20 // CHECK-LABEL: test_global_add_half2
21 // CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
22 // GFX90A-LABEL: test_global_add_half2
23 // GFX90A: global_atomic_pk_add_f16 v2, v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}}, off glc
24 void test_global_add_half2(__global half2 *addr, half2 x) {
25 half2 *rtn;
26 *rtn = __builtin_amdgcn_global_atomic_fadd_v2f16(addr, x);
29 // CHECK-LABEL: test_global_global_min_f64
30 // CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
32 // GFX90A-LABEL: test_global_global_min_f64$local
33 // GFX90A: global_atomic_min_f64
34 void test_global_global_min_f64(__global double *addr, double x){
35 double *rtn;
36 *rtn = __builtin_amdgcn_global_atomic_fmin_f64(addr, x);
39 // CHECK-LABEL: test_global_max_f64
40 // CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
42 // GFX90A-LABEL: test_global_max_f64$local
43 // GFX90A: global_atomic_max_f64
44 void test_global_max_f64(__global double *addr, double x){
45 double *rtn;
46 *rtn = __builtin_amdgcn_global_atomic_fmax_f64(addr, x);
49 // CHECK-LABEL: test_flat_add_local_f64
50 // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8{{$}}
52 // GFX90A-LABEL: test_flat_add_local_f64$local
53 // GFX90A: ds_add_rtn_f64
54 void test_flat_add_local_f64(__local double *addr, double x){
55 double *rtn;
56 *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x);
59 // CHECK-LABEL: test_flat_global_add_f64
60 // CHECK: = atomicrmw fadd ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
62 // GFX90A-LABEL: test_flat_global_add_f64$local
63 // GFX90A: global_atomic_add_f64
64 void test_flat_global_add_f64(__global double *addr, double x){
65 double *rtn;
66 *rtn = __builtin_amdgcn_flat_atomic_fadd_f64(addr, x);
69 // CHECK-LABEL: test_flat_min_flat_f64
70 // CHECK: = atomicrmw fmin ptr {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
72 // GFX90A-LABEL: test_flat_min_flat_f64$local
73 // GFX90A: flat_atomic_min_f64
74 void test_flat_min_flat_f64(__generic double *addr, double x){
75 double *rtn;
76 *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x);
79 // CHECK-LABEL: test_flat_global_min_f64
80 // CHECK: = atomicrmw fmin ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
82 // GFX90A: test_flat_global_min_f64$local
83 // GFX90A: global_atomic_min_f64
84 void test_flat_global_min_f64(__global double *addr, double x){
85 double *rtn;
86 *rtn = __builtin_amdgcn_flat_atomic_fmin_f64(addr, x);
89 // CHECK-LABEL: test_flat_max_flat_f64
90 // CHECK: = atomicrmw fmax ptr {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
92 // GFX90A-LABEL: test_flat_max_flat_f64$local
93 // GFX90A: flat_atomic_max_f64
94 void test_flat_max_flat_f64(__generic double *addr, double x){
95 double *rtn;
96 *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x);
99 // CHECK-LABEL: test_flat_global_max_f64
100 // CHECK: = atomicrmw fmax ptr addrspace(1) {{.+}}, double %{{.+}} syncscope("agent") monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
102 // GFX90A-LABEL: test_flat_global_max_f64$local
103 // GFX90A: global_atomic_max_f64
104 void test_flat_global_max_f64(__global double *addr, double x){
105 double *rtn;
106 *rtn = __builtin_amdgcn_flat_atomic_fmax_f64(addr, x);
109 // CHECK-LABEL: test_ds_add_local_f64
110 // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, double %{{.+}} monotonic, align 8
111 // GFX90A: test_ds_add_local_f64$local
112 // GFX90A: ds_add_rtn_f64
113 void test_ds_add_local_f64(__local double *addr, double x){
114 double *rtn;
115 *rtn = __builtin_amdgcn_ds_atomic_fadd_f64(addr, x);
118 // CHECK-LABEL: test_ds_addf_local_f32
119 // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, float %{{.+}} monotonic, align 4
120 // GFX90A-LABEL: test_ds_addf_local_f32$local
121 // GFX90A: ds_add_rtn_f32
122 void test_ds_addf_local_f32(__local float *addr, float x){
123 float *rtn;
124 *rtn = __builtin_amdgcn_ds_atomic_fadd_f32(addr, x);
127 // CHECK-LABEL: @test_global_add_f32
128 // CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
129 void test_global_add_f32(float *rtn, global float *addr, float x) {
130 *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);