Revert "[llvm] Improve llvm.objectsize computation by computing GEP, alloca and mallo...
[llvm-project.git] / clang / test / CodeGenOpenCL / builtins-fp-atomics-gfx940.cl
blob832d7df00db142fea1b06d1e4bb9182cd0459f00
1 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 \
2 // RUN: %s -emit-llvm -o - | FileCheck %s
4 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 \
5 // RUN: -S -o - %s | FileCheck -check-prefix=GFX940 %s
7 // REQUIRES: amdgpu-registered-target
9 typedef half __attribute__((ext_vector_type(2))) half2;
10 typedef short __attribute__((ext_vector_type(2))) short2;
12 // CHECK-LABEL: test_flat_add_f32
13 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
15 // GFX940-LABEL: test_flat_add_f32
16 // GFX940: flat_atomic_add_f32
17 half2 test_flat_add_f32(__generic float *addr, float x) {
18 return __builtin_amdgcn_flat_atomic_fadd_f32(addr, x);
21 // CHECK-LABEL: test_flat_add_2f16
22 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x half> %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
24 // GFX940-LABEL: test_flat_add_2f16
25 // GFX940: flat_atomic_pk_add_f16
26 half2 test_flat_add_2f16(__generic half2 *addr, half2 x) {
27 return __builtin_amdgcn_flat_atomic_fadd_v2f16(addr, x);
30 // CHECK-LABEL: test_flat_add_2bf16
31 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
32 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
33 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
35 // GFX940-LABEL: test_flat_add_2bf16
36 // GFX940: flat_atomic_pk_add_bf16
37 short2 test_flat_add_2bf16(__generic short2 *addr, short2 x) {
38 return __builtin_amdgcn_flat_atomic_fadd_v2bf16(addr, x);
41 // CHECK-LABEL: test_global_add_2bf16
42 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
43 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(1) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
44 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
46 // GFX940-LABEL: test_global_add_2bf16
47 // GFX940: global_atomic_pk_add_bf16
48 short2 test_global_add_2bf16(__global short2 *addr, short2 x) {
49 return __builtin_amdgcn_global_atomic_fadd_v2bf16(addr, x);
52 // CHECK-LABEL: test_local_add_2bf16
54 // CHECK: [[BC0:%.+]] = bitcast <2 x i16> {{.+}} to <2 x bfloat>
55 // CHECK: [[RMW:%.+]] = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x bfloat> [[BC0]] syncscope("agent") monotonic, align 4{{$}}
56 // CHECK-NEXT: bitcast <2 x bfloat> [[RMW]] to <2 x i16>
58 // GFX940-LABEL: test_local_add_2bf16
59 // GFX940: ds_pk_add_rtn_bf16
60 short2 test_local_add_2bf16(__local short2 *addr, short2 x) {
61 return __builtin_amdgcn_ds_atomic_fadd_v2bf16(addr, x);
64 // CHECK-LABEL: test_local_add_2f16
65 // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
66 // GFX940-LABEL: test_local_add_2f16
67 // GFX940: ds_pk_add_rtn_f16
68 half2 test_local_add_2f16(__local half2 *addr, half2 x) {
69 return __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
72 // CHECK-LABEL: test_local_add_2f16_noret
73 // CHECK: = atomicrmw fadd ptr addrspace(3) %{{.+}}, <2 x half> %{{.+}} monotonic, align 4
74 // GFX940-LABEL: test_local_add_2f16_noret
75 // GFX940: ds_pk_add_f16
76 void test_local_add_2f16_noret(__local half2 *addr, half2 x) {
77 __builtin_amdgcn_ds_atomic_fadd_v2f16(addr, x);
80 // CHECK-LABEL: @test_global_add_f32
81 // CHECK: = atomicrmw fadd ptr addrspace(1) %{{.+}}, float %{{.+}} syncscope("agent") monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
82 void test_global_add_f32(float *rtn, global float *addr, float x) {
83 *rtn = __builtin_amdgcn_global_atomic_fadd_f32(addr, x);