1 // RUN
: %clang_cc1 -O0 -cl-std
=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
2 // RUN
: %s -emit-llvm -o - | FileCheck %s -check-prefix
=CHECK
4 // RUN
: %clang_cc1 -O0 -cl-std
=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a \
5 // RUN
: -S -o - %s | FileCheck -check-prefix
=GFX90A %s
7 // REQUIRES
: amdgpu-registered-target
9 typedef half __attribute__
((ext_vector_type(2))) half2
;
11 // CHECK-LABEL
: test_global_add_f64
12 // CHECK
: = atomicrmw fadd ptr addrspace
(1) %
{{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
13 // GFX90A-LABEL
: test_global_add_f64$local
:
14 // GFX90A
: global_atomic_add_f64
15 void test_global_add_f64
(__global double
*addr
, double x
) {
17 *rtn
= __builtin_amdgcn_global_atomic_fadd_f64
(addr, x
);
20 // CHECK-LABEL
: test_global_add_half2
21 // CHECK
: = atomicrmw fadd ptr addrspace
(1) %
{{.
+}}, <2 x half
> %
{{.
+}} syncscope
("agent") monotonic
, align
4, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
22 // GFX90A-LABEL
: test_global_add_half2
23 // GFX90A
: global_atomic_pk_add_f16 v2
, v
[{{[0-
9]+:[0-
9]+}}], v
{{[0-
9]+}}, off glc
24 void test_global_add_half2
(__global half2
*addr
, half2 x
) {
26 *rtn
= __builtin_amdgcn_global_atomic_fadd_v2f16
(addr, x
);
29 // CHECK-LABEL
: test_global_global_min_f64
30 // CHECK
: = atomicrmw fmin ptr addrspace
(1) {{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
32 // GFX90A-LABEL
: test_global_global_min_f64$local
33 // GFX90A
: global_atomic_min_f64
34 void test_global_global_min_f64
(__global double
*addr
, double x
){
36 *rtn
= __builtin_amdgcn_global_atomic_fmin_f64
(addr, x
);
39 // CHECK-LABEL
: test_global_max_f64
40 // CHECK
: = atomicrmw fmax ptr addrspace
(1) {{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
42 // GFX90A-LABEL
: test_global_max_f64$local
43 // GFX90A
: global_atomic_max_f64
44 void test_global_max_f64
(__global double
*addr
, double x
){
46 *rtn
= __builtin_amdgcn_global_atomic_fmax_f64
(addr, x
);
49 // CHECK-LABEL
: test_flat_add_local_f64
50 // CHECK
: = atomicrmw fadd ptr addrspace
(3) %
{{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8{{$
}}
52 // GFX90A-LABEL
: test_flat_add_local_f64$local
53 // GFX90A
: ds_add_rtn_f64
54 void test_flat_add_local_f64
(__local double
*addr
, double x
){
56 *rtn
= __builtin_amdgcn_flat_atomic_fadd_f64
(addr, x
);
59 // CHECK-LABEL
: test_flat_global_add_f64
60 // CHECK
: = atomicrmw fadd ptr addrspace
(1) {{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
62 // GFX90A-LABEL
: test_flat_global_add_f64$local
63 // GFX90A
: global_atomic_add_f64
64 void test_flat_global_add_f64
(__global double
*addr
, double x
){
66 *rtn
= __builtin_amdgcn_flat_atomic_fadd_f64
(addr, x
);
69 // CHECK-LABEL
: test_flat_min_flat_f64
70 // CHECK
: = atomicrmw fmin ptr
{{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
72 // GFX90A-LABEL
: test_flat_min_flat_f64$local
73 // GFX90A
: flat_atomic_min_f64
74 void test_flat_min_flat_f64
(__generic double
*addr
, double x
){
76 *rtn
= __builtin_amdgcn_flat_atomic_fmin_f64
(addr, x
);
79 // CHECK-LABEL
: test_flat_global_min_f64
80 // CHECK
: = atomicrmw fmin ptr addrspace
(1) {{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
82 // GFX90A
: test_flat_global_min_f64$local
83 // GFX90A
: global_atomic_min_f64
84 void test_flat_global_min_f64
(__global double
*addr
, double x
){
86 *rtn
= __builtin_amdgcn_flat_atomic_fmin_f64
(addr, x
);
89 // CHECK-LABEL
: test_flat_max_flat_f64
90 // CHECK
: = atomicrmw fmax ptr
{{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
92 // GFX90A-LABEL
: test_flat_max_flat_f64$local
93 // GFX90A
: flat_atomic_max_f64
94 void test_flat_max_flat_f64
(__generic double
*addr
, double x
){
96 *rtn
= __builtin_amdgcn_flat_atomic_fmax_f64
(addr, x
);
99 // CHECK-LABEL
: test_flat_global_max_f64
100 // CHECK
: = atomicrmw fmax ptr addrspace
(1) {{.
+}}, double %
{{.
+}} syncscope
("agent") monotonic
, align
8, !amdgpu.no.fine.grained.memory
!{{[0-
9]+$
}}
102 // GFX90A-LABEL
: test_flat_global_max_f64$local
103 // GFX90A
: global_atomic_max_f64
104 void test_flat_global_max_f64
(__global double
*addr
, double x
){
106 *rtn
= __builtin_amdgcn_flat_atomic_fmax_f64
(addr, x
);
109 // CHECK-LABEL
: test_ds_add_local_f64
110 // CHECK
: = atomicrmw fadd ptr addrspace
(3) %
{{.
+}}, double %
{{.
+}} monotonic
, align
8
111 // GFX90A
: test_ds_add_local_f64$local
112 // GFX90A
: ds_add_rtn_f64
113 void test_ds_add_local_f64
(__local double
*addr
, double x
){
115 *rtn
= __builtin_amdgcn_ds_atomic_fadd_f64
(addr, x
);
118 // CHECK-LABEL
: test_ds_addf_local_f32
119 // CHECK
: = atomicrmw fadd ptr addrspace
(3) %
{{.
+}}, float %
{{.
+}} monotonic
, align
4
120 // GFX90A-LABEL
: test_ds_addf_local_f32$local
121 // GFX90A
: ds_add_rtn_f32
122 void test_ds_addf_local_f32
(__local float
*addr
, float x
){
124 *rtn
= __builtin_amdgcn_ds_atomic_fadd_f32
(addr, x
);
127 // CHECK-LABEL
: @test_global_add_f32
128 // CHECK
: = atomicrmw fadd ptr addrspace
(1) %
{{.
+}}, float %
{{.
+}} syncscope
("agent") monotonic
, align
4, !amdgpu.no.fine.grained.memory
!{{[0-
9]+}}, !amdgpu.ignore.denormal.mode
!{{[0-
9]+$
}}
129 void test_global_add_f32
(float *rtn
, global float
*addr
, float x
) {
130 *rtn
= __builtin_amdgcn_global_atomic_fadd_f32
(addr, x
);