1 // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
2 // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
3 // RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefixes=CHECK,SAFEIR %s
5 // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
6 // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
7 // RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics | FileCheck -check-prefixes=CHECK,UNSAFEIR %s
9 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
10 // RUN: -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \
11 // RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s
13 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
14 // RUN: -fcuda-is-device -target-cpu gfx940 -fnative-half-type \
15 // RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics \
16 // RUN: | FileCheck -check-prefix=UNSAFE %s
18 // REQUIRES: amdgpu-registered-target
20 #include "Inputs/cuda.h"
21 #include <stdatomic.h>
23 __global__ void ffp1(float *p) {
24 // CHECK-LABEL: @_Z4ffp1Pf
25 // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4{{$}}
26 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4{{$}}
27 // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4{{$}}
28 // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4{{$}}
29 // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE:[0-9]+]]{{$}}
30 // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
31 // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
32 // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
34 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
35 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
36 // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
37 // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 4, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
38 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE:[0-9]+]], !amdgpu.no.fine.grained.memory !{{[0-9]+}}, !amdgpu.ignore.denormal.mode !{{[0-9]+$}}
39 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
40 // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
41 // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
44 // SAFE: global_atomic_cmpswap
45 // SAFE: global_atomic_cmpswap
46 // SAFE: global_atomic_cmpswap
47 // SAFE: global_atomic_cmpswap
48 // SAFE: global_atomic_cmpswap
49 // SAFE: global_atomic_cmpswap
52 // UNSAFE: global_atomic_add_f32
53 // UNSAFE: global_atomic_cmpswap
54 // UNSAFE: global_atomic_cmpswap
55 // UNSAFE: global_atomic_cmpswap
56 // UNSAFE: global_atomic_cmpswap
57 // UNSAFE: global_atomic_cmpswap
59 __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
60 __atomic_fetch_sub(p, 1.0f, memory_order_relaxed);
61 __atomic_fetch_max(p, 1.0f, memory_order_relaxed);
62 __atomic_fetch_min(p, 1.0f, memory_order_relaxed);
64 __hip_atomic_fetch_add(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
65 __hip_atomic_fetch_sub(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
66 __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
67 __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
70 __global__ void ffp2(double *p) {
71 // CHECK-LABEL: @_Z4ffp2Pd
72 // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
73 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
74 // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
75 // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
76 // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
77 // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
78 // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
79 // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
81 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
82 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
83 // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
84 // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
85 // UNSAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
86 // UNSAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
87 // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
88 // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
90 // SAFE-LABEL: @_Z4ffp2Pd
91 // SAFE: global_atomic_cmpswap_b64
92 // SAFE: global_atomic_cmpswap_b64
93 // SAFE: global_atomic_cmpswap_b64
94 // SAFE: global_atomic_cmpswap_b64
95 // SAFE: global_atomic_cmpswap_b64
96 // SAFE: global_atomic_cmpswap_b64
98 // UNSAFE-LABEL: @_Z4ffp2Pd
99 // UNSAFE: global_atomic_add_f64
100 // UNSAFE: global_atomic_cmpswap_x2
101 // UNSAFE: global_atomic_max_f64
102 // UNSAFE: global_atomic_min_f64
103 // UNSAFE: global_atomic_max_f64
104 // UNSAFE: global_atomic_min_f64
105 __atomic_fetch_add(p, 1.0, memory_order_relaxed);
106 __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
107 __atomic_fetch_max(p, 1.0, memory_order_relaxed);
108 __atomic_fetch_min(p, 1.0, memory_order_relaxed);
109 __hip_atomic_fetch_add(p, 1.0, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
110 __hip_atomic_fetch_sub(p, 1.0, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
111 __hip_atomic_fetch_max(p, 1.0, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
112 __hip_atomic_fetch_min(p, 1.0, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
115 // long double is the same as double for amdgcn.
116 __global__ void ffp3(long double *p) {
117 // CHECK-LABEL: @_Z4ffp3Pe
118 // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8{{$}}
119 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
120 // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8{{$}}
121 // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8{{$}}
122 // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
123 // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
124 // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
125 // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
127 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
128 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
129 // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
130 // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
131 // UNSAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
132 // UNSAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
133 // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
134 // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
136 // SAFE-LABEL: @_Z4ffp3Pe
137 // SAFE: global_atomic_cmpswap_b64
138 // SAFE: global_atomic_cmpswap_b64
139 // SAFE: global_atomic_cmpswap_b64
140 // SAFE: global_atomic_cmpswap_b64
141 // SAFE: global_atomic_cmpswap_b64
142 // UNSAFE-LABEL: @_Z4ffp3Pe
143 // UNSAFE: global_atomic_cmpswap_x2
144 // UNSAFE: global_atomic_max_f64
145 // UNSAFE: global_atomic_min_f64
146 // UNSAFE: global_atomic_max_f64
147 // UNSAFE: global_atomic_min_f64
148 __atomic_fetch_add(p, 1.0L, memory_order_relaxed);
149 __atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
150 __atomic_fetch_max(p, 1.0L, memory_order_relaxed);
151 __atomic_fetch_min(p, 1.0L, memory_order_relaxed);
152 __hip_atomic_fetch_add(p, 1.0L, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
153 __hip_atomic_fetch_sub(p, 1.0L, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
154 __hip_atomic_fetch_max(p, 1.0L, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
155 __hip_atomic_fetch_min(p, 1.0L, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
158 __device__ double ffp4(double *p, float f) {
159 // CHECK-LABEL: @_Z4ffp4Pdf
160 // CHECK: fpext contract float {{.*}} to double
161 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
162 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
164 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
165 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
166 __atomic_fetch_sub(p, f, memory_order_relaxed);
167 return __hip_atomic_fetch_sub(p, f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
170 __device__ double ffp5(double *p, int i) {
171 // CHECK-LABEL: @_Z4ffp5Pdi
172 // CHECK: sitofp i32 {{.*}} to double
173 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8{{$}}
174 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
175 __atomic_fetch_sub(p, i, memory_order_relaxed);
177 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
178 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 8, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
179 return __hip_atomic_fetch_sub(p, i, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
182 __global__ void ffp6(_Float16 *p) {
183 // CHECK-LABEL: @_Z4ffp6PDF16
184 // SAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2{{$}}
185 // SAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2{{$}}
186 // SAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2{{$}}
187 // SAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2{{$}}
188 // SAFEIR: atomicrmw fadd ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
189 // SAFEIR: atomicrmw fsub ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
190 // SAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
191 // SAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
193 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
194 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
195 // UNSAFEIR: atomicrmw fmax ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
196 // UNSAFEIR: atomicrmw fmin ptr {{.*}} monotonic, align 2, !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
197 // UNSAFEIR: atomicrmw fadd ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
198 // UNSAFEIR: atomicrmw fsub ptr {{.*}} monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
199 // UNSAFEIR: atomicrmw fmax ptr {{.*}} syncscope("agent") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
200 // UNSAFEIR: atomicrmw fmin ptr {{.*}} syncscope("workgroup") monotonic, align 2, !noalias.addrspace ![[$NO_PRIVATE]], !amdgpu.no.fine.grained.memory !{{[0-9]+$}}
202 // SAFE: _Z4ffp6PDF16
203 // SAFE: global_atomic_cmpswap
204 // SAFE: global_atomic_cmpswap
205 // SAFE: global_atomic_cmpswap
206 // SAFE: global_atomic_cmpswap
207 // SAFE: global_atomic_cmpswap
208 // SAFE: global_atomic_cmpswap
210 // UNSAFE: _Z4ffp6PDF16
211 // UNSAFE: global_atomic_cmpswap
212 // UNSAFE: global_atomic_cmpswap
213 // UNSAFE: global_atomic_cmpswap
214 // UNSAFE: global_atomic_cmpswap
215 // UNSAFE: global_atomic_cmpswap
216 // UNSAFE: global_atomic_cmpswap
217 __atomic_fetch_add(p, 1.0, memory_order_relaxed);
218 __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
219 __atomic_fetch_max(p, 1.0, memory_order_relaxed);
220 __atomic_fetch_min(p, 1.0, memory_order_relaxed);
222 __hip_atomic_fetch_add(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
223 __hip_atomic_fetch_sub(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
224 __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
225 __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
228 // CHECK-LABEL: @_Z12test_cmpxchgPiii
229 // CHECK: cmpxchg ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} acquire acquire, align 4{{$}}
230 // CHECK: cmpxchg weak ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} acquire acquire, align 4{{$}}
231 // CHECK: cmpxchg ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} syncscope("workgroup") monotonic monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
232 // CHECK: cmpxchg weak ptr %{{.+}}, i32 %{{.+}}, i32 %{{.+}} syncscope("workgroup") monotonic monotonic, align 4, !noalias.addrspace ![[$NO_PRIVATE]]{{$}}
233 __device__ int test_cmpxchg(int *ptr, int cmp, int desired) {
234 bool flag = __atomic_compare_exchange(ptr, &cmp, &desired, 0, memory_order_acquire, memory_order_acquire);
235 flag = __atomic_compare_exchange_n(ptr, &cmp, desired, 1, memory_order_acquire, memory_order_acquire);
236 flag = __hip_atomic_compare_exchange_strong(ptr, &cmp, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
237 flag = __hip_atomic_compare_exchange_weak(ptr, &cmp, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
241 // SAFEIR: ![[$NO_PRIVATE]] = !{i32 5, i32 6}
242 // UNSAFEIR: ![[$NO_PRIVATE]] = !{i32 5, i32 6}