Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / clang / test / CodeGenCUDA / amdgpu-atomic-ops.cu
blob57557bf09f411c2ec0bce4b678a11f94a7fab4c8
1 // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
2 // RUN:   -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
3 // RUN:   -fnative-half-arguments-and-returns | FileCheck %s
5 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
6 // RUN:   -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \
7 // RUN:   -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s
9 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
10 // RUN:   -fcuda-is-device -target-cpu gfx940 -fnative-half-type \
11 // RUN:   -fnative-half-arguments-and-returns -munsafe-fp-atomics \
12 // RUN:   | FileCheck -check-prefix=UNSAFE %s
14 // REQUIRES: amdgpu-registered-target
16 #include "Inputs/cuda.h"
17 #include <stdatomic.h>
19 __global__ void ffp1(float *p) {
20   // CHECK-LABEL: @_Z4ffp1Pf
21   // CHECK: atomicrmw fadd ptr {{.*}} monotonic
22   // CHECK: atomicrmw fmax ptr {{.*}} monotonic
23   // CHECK: atomicrmw fmin ptr {{.*}} monotonic
24   // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
25   // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
26   // SAFE: _Z4ffp1Pf
27   // SAFE: global_atomic_cmpswap
28   // SAFE: global_atomic_cmpswap
29   // SAFE: global_atomic_cmpswap
30   // SAFE: global_atomic_cmpswap
31   // SAFE: global_atomic_cmpswap
32   // UNSAFE: _Z4ffp1Pf
33   // UNSAFE: global_atomic_add_f32
34   // UNSAFE: global_atomic_cmpswap
35   // UNSAFE: global_atomic_cmpswap
36   // UNSAFE: global_atomic_cmpswap
37   // UNSAFE: global_atomic_cmpswap
38   __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
39   __atomic_fetch_max(p, 1.0f, memory_order_relaxed);
40   __atomic_fetch_min(p, 1.0f, memory_order_relaxed);
41   __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
42   __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
45 __global__ void ffp2(double *p) {
46   // CHECK-LABEL: @_Z4ffp2Pd
47   // CHECK: atomicrmw fsub ptr {{.*}} monotonic
48   // CHECK: atomicrmw fmax ptr {{.*}} monotonic
49   // CHECK: atomicrmw fmin ptr {{.*}} monotonic
50   // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
51   // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
52   // SAFE: _Z4ffp2Pd
53   // SAFE: global_atomic_cmpswap_b64
54   // SAFE: global_atomic_cmpswap_b64
55   // SAFE: global_atomic_cmpswap_b64
56   // SAFE: global_atomic_cmpswap_b64
57   // SAFE: global_atomic_cmpswap_b64
58   // UNSAFE: _Z4ffp2Pd
59   // UNSAFE: global_atomic_cmpswap_x2
60   // UNSAFE: global_atomic_cmpswap_x2
61   // UNSAFE: global_atomic_cmpswap_x2
62   // UNSAFE: global_atomic_cmpswap_x2
63   // UNSAFE: global_atomic_cmpswap_x2
64   __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
65   __atomic_fetch_max(p, 1.0, memory_order_relaxed);
66   __atomic_fetch_min(p, 1.0, memory_order_relaxed);
67   __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
68   __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
71 // long double is the same as double for amdgcn.
72 __global__ void ffp3(long double *p) {
73   // CHECK-LABEL: @_Z4ffp3Pe
74   // CHECK: atomicrmw fsub ptr {{.*}} monotonic
75   // CHECK: atomicrmw fmax ptr {{.*}} monotonic
76   // CHECK: atomicrmw fmin ptr {{.*}} monotonic
77   // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
78   // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
79   // SAFE: _Z4ffp3Pe
80   // SAFE: global_atomic_cmpswap_b64
81   // SAFE: global_atomic_cmpswap_b64
82   // SAFE: global_atomic_cmpswap_b64
83   // SAFE: global_atomic_cmpswap_b64
84   // SAFE: global_atomic_cmpswap_b64
85   // UNSAFE: _Z4ffp3Pe
86   // UNSAFE: global_atomic_cmpswap_x2
87   // UNSAFE: global_atomic_cmpswap_x2
88   // UNSAFE: global_atomic_cmpswap_x2
89   // UNSAFE: global_atomic_cmpswap_x2
90   // UNSAFE: global_atomic_cmpswap_x2
91   __atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
92   __atomic_fetch_max(p, 1.0L, memory_order_relaxed);
93   __atomic_fetch_min(p, 1.0L, memory_order_relaxed);
94   __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
95   __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
98 __device__ double ffp4(double *p, float f) {
99   // CHECK-LABEL: @_Z4ffp4Pdf
100   // CHECK: fpext float {{.*}} to double
101   // CHECK: atomicrmw fsub ptr {{.*}} monotonic
102   return __atomic_fetch_sub(p, f, memory_order_relaxed);
105 __device__ double ffp5(double *p, int i) {
106   // CHECK-LABEL: @_Z4ffp5Pdi
107   // CHECK: sitofp i32 {{.*}} to double
108   // CHECK: atomicrmw fsub ptr {{.*}} monotonic
109   return __atomic_fetch_sub(p, i, memory_order_relaxed);
112 __global__ void ffp6(_Float16 *p) {
113   // CHECK-LABEL: @_Z4ffp6PDF16
114   // CHECK: atomicrmw fadd ptr {{.*}} monotonic
115   // CHECK: atomicrmw fmax ptr {{.*}} monotonic
116   // CHECK: atomicrmw fmin ptr {{.*}} monotonic
117   // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
118   // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
119   // SAFE: _Z4ffp6PDF16
120   // SAFE: global_atomic_cmpswap
121   // SAFE: global_atomic_cmpswap
122   // SAFE: global_atomic_cmpswap
123   // SAFE: global_atomic_cmpswap
124   // SAFE: global_atomic_cmpswap
125   // UNSAFE: _Z4ffp6PDF16
126   // UNSAFE: global_atomic_cmpswap
127   // UNSAFE: global_atomic_cmpswap
128   // UNSAFE: global_atomic_cmpswap
129   // UNSAFE: global_atomic_cmpswap
130   // UNSAFE: global_atomic_cmpswap
131   __atomic_fetch_add(p, 1.0, memory_order_relaxed);
132   __atomic_fetch_max(p, 1.0, memory_order_relaxed);
133   __atomic_fetch_min(p, 1.0, memory_order_relaxed);
134   __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
135   __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);