1 // RUN: %clang_cc1 -x hip %s -emit-llvm -o - -triple=amdgcn-amd-amdhsa \
2 // RUN: -fcuda-is-device -target-cpu gfx906 -fnative-half-type \
3 // RUN: -fnative-half-arguments-and-returns | FileCheck %s
5 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
6 // RUN: -fcuda-is-device -target-cpu gfx1100 -fnative-half-type \
7 // RUN: -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s
9 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
10 // RUN: -fcuda-is-device -target-cpu gfx940 -fnative-half-type \
11 // RUN: -fnative-half-arguments-and-returns -munsafe-fp-atomics \
12 // RUN: | FileCheck -check-prefix=UNSAFE %s
14 // REQUIRES: amdgpu-registered-target
16 #include "Inputs/cuda.h"
17 #include <stdatomic.h>
19 __global__ void ffp1(float *p) {
20 // CHECK-LABEL: @_Z4ffp1Pf
21 // CHECK: atomicrmw fadd ptr {{.*}} monotonic
22 // CHECK: atomicrmw fmax ptr {{.*}} monotonic
23 // CHECK: atomicrmw fmin ptr {{.*}} monotonic
24 // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
25 // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
27 // SAFE: global_atomic_cmpswap
28 // SAFE: global_atomic_cmpswap
29 // SAFE: global_atomic_cmpswap
30 // SAFE: global_atomic_cmpswap
31 // SAFE: global_atomic_cmpswap
33 // UNSAFE: global_atomic_add_f32
34 // UNSAFE: global_atomic_cmpswap
35 // UNSAFE: global_atomic_cmpswap
36 // UNSAFE: global_atomic_cmpswap
37 // UNSAFE: global_atomic_cmpswap
38 __atomic_fetch_add(p, 1.0f, memory_order_relaxed);
39 __atomic_fetch_max(p, 1.0f, memory_order_relaxed);
40 __atomic_fetch_min(p, 1.0f, memory_order_relaxed);
41 __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
42 __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
45 __global__ void ffp2(double *p) {
46 // CHECK-LABEL: @_Z4ffp2Pd
47 // CHECK: atomicrmw fsub ptr {{.*}} monotonic
48 // CHECK: atomicrmw fmax ptr {{.*}} monotonic
49 // CHECK: atomicrmw fmin ptr {{.*}} monotonic
50 // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
51 // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
53 // SAFE: global_atomic_cmpswap_b64
54 // SAFE: global_atomic_cmpswap_b64
55 // SAFE: global_atomic_cmpswap_b64
56 // SAFE: global_atomic_cmpswap_b64
57 // SAFE: global_atomic_cmpswap_b64
59 // UNSAFE: global_atomic_cmpswap_x2
60 // UNSAFE: global_atomic_cmpswap_x2
61 // UNSAFE: global_atomic_cmpswap_x2
62 // UNSAFE: global_atomic_cmpswap_x2
63 // UNSAFE: global_atomic_cmpswap_x2
64 __atomic_fetch_sub(p, 1.0, memory_order_relaxed);
65 __atomic_fetch_max(p, 1.0, memory_order_relaxed);
66 __atomic_fetch_min(p, 1.0, memory_order_relaxed);
67 __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
68 __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
71 // long double is the same as double for amdgcn.
72 __global__ void ffp3(long double *p) {
73 // CHECK-LABEL: @_Z4ffp3Pe
74 // CHECK: atomicrmw fsub ptr {{.*}} monotonic
75 // CHECK: atomicrmw fmax ptr {{.*}} monotonic
76 // CHECK: atomicrmw fmin ptr {{.*}} monotonic
77 // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
78 // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
80 // SAFE: global_atomic_cmpswap_b64
81 // SAFE: global_atomic_cmpswap_b64
82 // SAFE: global_atomic_cmpswap_b64
83 // SAFE: global_atomic_cmpswap_b64
84 // SAFE: global_atomic_cmpswap_b64
86 // UNSAFE: global_atomic_cmpswap_x2
87 // UNSAFE: global_atomic_cmpswap_x2
88 // UNSAFE: global_atomic_cmpswap_x2
89 // UNSAFE: global_atomic_cmpswap_x2
90 // UNSAFE: global_atomic_cmpswap_x2
91 __atomic_fetch_sub(p, 1.0L, memory_order_relaxed);
92 __atomic_fetch_max(p, 1.0L, memory_order_relaxed);
93 __atomic_fetch_min(p, 1.0L, memory_order_relaxed);
94 __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
95 __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);
98 __device__ double ffp4(double *p, float f) {
99 // CHECK-LABEL: @_Z4ffp4Pdf
100 // CHECK: fpext float {{.*}} to double
101 // CHECK: atomicrmw fsub ptr {{.*}} monotonic
102 return __atomic_fetch_sub(p, f, memory_order_relaxed);
105 __device__ double ffp5(double *p, int i) {
106 // CHECK-LABEL: @_Z4ffp5Pdi
107 // CHECK: sitofp i32 {{.*}} to double
108 // CHECK: atomicrmw fsub ptr {{.*}} monotonic
109 return __atomic_fetch_sub(p, i, memory_order_relaxed);
112 __global__ void ffp6(_Float16 *p) {
113 // CHECK-LABEL: @_Z4ffp6PDF16
114 // CHECK: atomicrmw fadd ptr {{.*}} monotonic
115 // CHECK: atomicrmw fmax ptr {{.*}} monotonic
116 // CHECK: atomicrmw fmin ptr {{.*}} monotonic
117 // CHECK: atomicrmw fmax ptr {{.*}} syncscope("agent-one-as") monotonic
118 // CHECK: atomicrmw fmin ptr {{.*}} syncscope("workgroup-one-as") monotonic
119 // SAFE: _Z4ffp6PDF16
120 // SAFE: global_atomic_cmpswap
121 // SAFE: global_atomic_cmpswap
122 // SAFE: global_atomic_cmpswap
123 // SAFE: global_atomic_cmpswap
124 // SAFE: global_atomic_cmpswap
125 // UNSAFE: _Z4ffp6PDF16
126 // UNSAFE: global_atomic_cmpswap
127 // UNSAFE: global_atomic_cmpswap
128 // UNSAFE: global_atomic_cmpswap
129 // UNSAFE: global_atomic_cmpswap
130 // UNSAFE: global_atomic_cmpswap
131 __atomic_fetch_add(p, 1.0, memory_order_relaxed);
132 __atomic_fetch_max(p, 1.0, memory_order_relaxed);
133 __atomic_fetch_min(p, 1.0, memory_order_relaxed);
134 __hip_atomic_fetch_max(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_AGENT);
135 __hip_atomic_fetch_min(p, 1.0f, memory_order_relaxed, __HIP_MEMORY_SCOPE_WORKGROUP);