1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
3 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
5 define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
6 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
7 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
8 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
10 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
11 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
12 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
13 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
14 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
15 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
16 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
17 ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
18 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
19 ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
20 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
21 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
23 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
24 ; IR-ITERATIVE-NEXT: br label [[TMP16]]
26 ; IR-ITERATIVE-NEXT: br label [[TMP17]]
28 ; IR-ITERATIVE-NEXT: ret void
30 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
31 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
32 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
34 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
35 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
36 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
37 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
38 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
39 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
40 ; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
41 ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
42 ; IR-DPP-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
43 ; IR-DPP-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
44 ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
45 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
47 ; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
48 ; IR-DPP-NEXT: br label [[TMP16]]
50 ; IR-DPP-NEXT: br label [[TMP17]]
52 ; IR-DPP-NEXT: ret void
54 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
58 define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
59 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
60 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
61 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
63 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
64 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
65 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
66 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
67 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
68 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
69 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
70 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
72 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("agent") monotonic, align 4
73 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
75 ; IR-ITERATIVE-NEXT: br label [[TMP13]]
77 ; IR-ITERATIVE-NEXT: ret void
78 ; IR-ITERATIVE: ComputeLoop:
79 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ]
80 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ]
81 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
82 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
83 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32
84 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP16]], i32 [[TMP15]])
85 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float
86 ; IR-ITERATIVE-NEXT: [[TMP19]] = fadd float [[ACCUMULATOR]], [[TMP18]]
87 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]]
88 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1
89 ; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]]
90 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0
91 ; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
92 ; IR-ITERATIVE: ComputeEnd:
93 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0
94 ; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]]
96 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
97 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
98 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]]
100 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
101 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
102 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
103 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
104 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
105 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
106 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
107 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648)
108 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
109 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
110 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false)
111 ; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP11]], [[TMP13]]
112 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false)
113 ; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]]
114 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false)
115 ; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]]
116 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false)
117 ; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]]
118 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false)
119 ; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]]
120 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false)
121 ; IR-DPP-NEXT: [[TMP24:%.*]] = fadd float [[TMP22]], [[TMP23]]
122 ; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32
123 ; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP25]], i32 63)
124 ; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float
125 ; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]])
126 ; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0
127 ; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]]
129 ; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("agent") monotonic, align 4
130 ; IR-DPP-NEXT: br label [[TMP32]]
132 ; IR-DPP-NEXT: br label [[TMP33]]
134 ; IR-DPP-NEXT: ret void
136 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
140 define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 {
141 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(
142 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]]
143 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
145 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
146 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
147 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
148 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
149 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
150 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
151 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
152 ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
153 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
154 ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
155 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
156 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
158 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4
159 ; IR-ITERATIVE-NEXT: br label [[TMP16]]
161 ; IR-ITERATIVE-NEXT: br label [[TMP17]]
163 ; IR-ITERATIVE-NEXT: ret void
165 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(
166 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]]
167 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
169 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
170 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
171 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
172 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
173 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
174 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
175 ; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
176 ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
177 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
178 ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
179 ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
180 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
182 ; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4
183 ; IR-DPP-NEXT: br label [[TMP16]]
185 ; IR-DPP-NEXT: br label [[TMP17]]
187 ; IR-DPP-NEXT: ret void
189 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
193 define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) #1 {
194 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(
195 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
196 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
198 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
199 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
200 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
201 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
202 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
203 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
204 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
205 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
207 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("one-as") monotonic, align 4
208 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
210 ; IR-ITERATIVE-NEXT: br label [[TMP13]]
212 ; IR-ITERATIVE-NEXT: ret void
213 ; IR-ITERATIVE: ComputeLoop:
214 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ]
215 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ]
216 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
217 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
218 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32
219 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP16]], i32 [[TMP15]]) #[[ATTR7]]
220 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float
221 ; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
222 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]]
223 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1
224 ; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]]
225 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0
226 ; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
227 ; IR-ITERATIVE: ComputeEnd:
228 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0
229 ; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]]
231 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(
232 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
233 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]]
235 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
236 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
237 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
238 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
239 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
240 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
241 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
242 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]]
243 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
244 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
245 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
246 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
247 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
248 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
249 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
250 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
251 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
252 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
253 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
254 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
255 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
256 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
257 ; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32
258 ; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP25]], i32 63) #[[ATTR8]]
259 ; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float
260 ; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) #[[ATTR8]]
261 ; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0
262 ; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]]
264 ; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("one-as") monotonic, align 4
265 ; IR-DPP-NEXT: br label [[TMP32]]
267 ; IR-DPP-NEXT: br label [[TMP33]]
269 ; IR-DPP-NEXT: ret void
271 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
275 define amdgpu_ps void @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
276 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
277 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
278 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
280 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
281 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
282 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
283 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
284 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
285 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
286 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
287 ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
288 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
289 ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
290 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
291 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
293 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
294 ; IR-ITERATIVE-NEXT: br label [[TMP16]]
296 ; IR-ITERATIVE-NEXT: br label [[TMP17]]
298 ; IR-ITERATIVE-NEXT: ret void
300 ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
301 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
302 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
304 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
305 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
306 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
307 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
308 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
309 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
310 ; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
311 ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
312 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
313 ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
314 ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
315 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
317 ; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
318 ; IR-DPP-NEXT: br label [[TMP16]]
320 ; IR-DPP-NEXT: br label [[TMP17]]
322 ; IR-DPP-NEXT: ret void
324 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
329 define amdgpu_ps void @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
330 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
331 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
332 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
334 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
335 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
336 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
337 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
338 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
339 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
340 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
341 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
343 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("agent") monotonic, align 4
344 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
346 ; IR-ITERATIVE-NEXT: br label [[TMP13]]
348 ; IR-ITERATIVE-NEXT: ret void
349 ; IR-ITERATIVE: ComputeLoop:
350 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ]
351 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ]
352 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
353 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
354 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32
355 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP16]], i32 [[TMP15]]) #[[ATTR7]]
356 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float
357 ; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
358 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]]
359 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1
360 ; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]]
361 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0
362 ; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
363 ; IR-ITERATIVE: ComputeEnd:
364 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0
365 ; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]]
367 ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
368 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
369 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]]
371 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
372 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
373 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
374 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
375 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
376 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
377 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
378 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]]
379 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
380 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
381 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
382 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
383 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
384 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
385 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
386 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
387 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
388 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
389 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
390 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
391 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
392 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
393 ; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32
394 ; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP25]], i32 63) #[[ATTR8]]
395 ; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float
396 ; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) #[[ATTR8]]
397 ; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0
398 ; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]]
400 ; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("agent") monotonic, align 4
401 ; IR-DPP-NEXT: br label [[TMP32]]
403 ; IR-DPP-NEXT: br label [[TMP33]]
405 ; IR-DPP-NEXT: ret void
407 %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
411 define amdgpu_ps void @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
412 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
413 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
414 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
416 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
417 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
418 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
419 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
420 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
421 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
422 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
423 ; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
425 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
426 ; IR-ITERATIVE-NEXT: br label [[TMP12]]
428 ; IR-ITERATIVE-NEXT: br label [[TMP13]]
430 ; IR-ITERATIVE-NEXT: ret void
432 ; IR-DPP-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
433 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
434 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
436 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
437 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
438 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
439 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
440 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
441 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
442 ; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
443 ; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
445 ; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
446 ; IR-DPP-NEXT: br label [[TMP12]]
448 ; IR-DPP-NEXT: br label [[TMP13]]
450 ; IR-DPP-NEXT: ret void
452 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
456 define amdgpu_ps void @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
457 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
458 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
459 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
461 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
462 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
463 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
464 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
465 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
466 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
467 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
468 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
470 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("agent") monotonic, align 4
471 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
473 ; IR-ITERATIVE-NEXT: br label [[TMP13]]
475 ; IR-ITERATIVE-NEXT: ret void
476 ; IR-ITERATIVE: ComputeLoop:
477 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF0000000000000, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ]
478 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ]
479 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
480 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
481 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32
482 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP16]], i32 [[TMP15]])
483 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float
484 ; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP18]])
485 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]]
486 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1
487 ; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]]
488 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0
489 ; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
490 ; IR-ITERATIVE: ComputeEnd:
491 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0
492 ; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]]
494 ; IR-DPP-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
495 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
496 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]]
498 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
499 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
500 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
501 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
502 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
503 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
504 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
505 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 2139095040)
506 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
507 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
508 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP11]], i32 273, i32 15, i32 15, i1 false)
509 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP13]])
510 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP14]], i32 274, i32 15, i32 15, i1 false)
511 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP15]])
512 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP16]], i32 276, i32 15, i32 15, i1 false)
513 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP17]])
514 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP18]], i32 280, i32 15, i32 15, i1 false)
515 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.minnum.f32(float [[TMP18]], float [[TMP19]])
516 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP20]], i32 322, i32 10, i32 15, i1 false)
517 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.minnum.f32(float [[TMP20]], float [[TMP21]])
518 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP22]], i32 323, i32 12, i32 15, i1 false)
519 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.minnum.f32(float [[TMP22]], float [[TMP23]])
520 ; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32
521 ; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP25]], i32 63)
522 ; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float
523 ; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]])
524 ; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0
525 ; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]]
527 ; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("agent") monotonic, align 4
528 ; IR-DPP-NEXT: br label [[TMP32]]
530 ; IR-DPP-NEXT: br label [[TMP33]]
532 ; IR-DPP-NEXT: ret void
534 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
538 define amdgpu_ps void @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{
539 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(
540 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
541 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
543 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
544 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
545 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
546 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
547 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
548 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
549 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
550 ; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
552 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
553 ; IR-ITERATIVE-NEXT: br label [[TMP12]]
555 ; IR-ITERATIVE-NEXT: br label [[TMP13]]
557 ; IR-ITERATIVE-NEXT: ret void
559 ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(
560 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
561 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
563 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
564 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
565 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
566 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
567 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
568 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
569 ; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
570 ; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
572 ; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
573 ; IR-DPP-NEXT: br label [[TMP12]]
575 ; IR-DPP-NEXT: br label [[TMP13]]
577 ; IR-DPP-NEXT: ret void
579 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
583 define amdgpu_ps void @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) #1{
584 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(
585 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
586 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
588 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
589 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
590 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
591 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
592 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
593 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
594 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
595 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
597 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] syncscope("agent") monotonic, align 4
598 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
600 ; IR-ITERATIVE-NEXT: br label [[TMP13]]
602 ; IR-ITERATIVE-NEXT: ret void
603 ; IR-ITERATIVE: ComputeLoop:
604 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0xFFF0000000000000, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ]
605 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ]
606 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
607 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
608 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32
609 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP16]], i32 [[TMP15]]) #[[ATTR7]]
610 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float
611 ; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR7]]
612 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]]
613 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1
614 ; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]]
615 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0
616 ; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
617 ; IR-ITERATIVE: ComputeEnd:
618 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0
619 ; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]]
621 ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(
622 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
623 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]]
625 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
626 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
627 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
628 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
629 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
630 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
631 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
632 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -8388608) #[[ATTR8]]
633 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
634 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
635 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
636 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP13]], metadata !"fpexcept.strict") #[[ATTR8]]
637 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
638 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP15]], metadata !"fpexcept.strict") #[[ATTR8]]
639 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
640 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP17]], metadata !"fpexcept.strict") #[[ATTR8]]
641 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
642 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP18]], float [[TMP19]], metadata !"fpexcept.strict") #[[ATTR8]]
643 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
644 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP20]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR8]]
645 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
646 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP22]], float [[TMP23]], metadata !"fpexcept.strict") #[[ATTR8]]
647 ; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32
648 ; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP25]], i32 63) #[[ATTR8]]
649 ; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float
650 ; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) #[[ATTR8]]
651 ; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0
652 ; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]]
654 ; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] syncscope("agent") monotonic, align 4
655 ; IR-DPP-NEXT: br label [[TMP32]]
657 ; IR-DPP-NEXT: br label [[TMP33]]
659 ; IR-DPP-NEXT: ret void
661 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
665 define amdgpu_ps void @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
666 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
667 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
668 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
670 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
671 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
672 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
673 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
674 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
675 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
676 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
677 ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
678 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
679 ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
680 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
681 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
683 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4
684 ; IR-ITERATIVE-NEXT: br label [[TMP16]]
686 ; IR-ITERATIVE-NEXT: br label [[TMP17]]
688 ; IR-ITERATIVE-NEXT: ret void
690 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
691 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
692 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP17:%.*]]
694 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
695 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
696 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
697 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
698 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
699 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
700 ; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
701 ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
702 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
703 ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
704 ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
705 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
707 ; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4
708 ; IR-DPP-NEXT: br label [[TMP16]]
710 ; IR-DPP-NEXT: br label [[TMP17]]
712 ; IR-DPP-NEXT: ret void
714 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
718 define amdgpu_ps void @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
719 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
720 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
721 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP13:%.*]]
723 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
724 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
725 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
726 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
727 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
728 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
729 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
730 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
732 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP19:%.*]] monotonic, align 4
733 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
735 ; IR-ITERATIVE-NEXT: br label [[TMP13]]
737 ; IR-ITERATIVE-NEXT: ret void
738 ; IR-ITERATIVE: ComputeLoop:
739 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP19]], [[COMPUTELOOP]] ]
740 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP22:%.*]], [[COMPUTELOOP]] ]
741 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
742 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = trunc i64 [[TMP14]] to i32
743 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast float [[VAL:%.*]] to i32
744 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP16]], i32 [[TMP15]]) #[[ATTR7]]
745 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float
746 ; IR-ITERATIVE-NEXT: [[TMP19]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP18]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
747 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = shl i64 1, [[TMP14]]
748 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = xor i64 [[TMP20]], -1
749 ; IR-ITERATIVE-NEXT: [[TMP22]] = and i64 [[ACTIVEBITS]], [[TMP21]]
750 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = icmp eq i64 [[TMP22]], 0
751 ; IR-ITERATIVE-NEXT: br i1 [[TMP23]], label [[COMPUTEEND:%.*]], label [[COMPUTELOOP]]
752 ; IR-ITERATIVE: ComputeEnd:
753 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = icmp eq i32 [[TMP8]], 0
754 ; IR-ITERATIVE-NEXT: br i1 [[TMP24]], label [[TMP10:%.*]], label [[TMP12]]
756 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
757 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
758 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP33:%.*]]
760 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
761 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
762 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
763 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
764 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
765 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
766 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
767 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]]
768 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
769 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
770 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
771 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
772 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
773 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
774 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
775 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
776 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
777 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
778 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
779 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
780 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
781 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
782 ; IR-DPP-NEXT: [[TMP25:%.*]] = bitcast float [[TMP24]] to i32
783 ; IR-DPP-NEXT: [[TMP26:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP25]], i32 63) #[[ATTR8]]
784 ; IR-DPP-NEXT: [[TMP27:%.*]] = bitcast i32 [[TMP26]] to float
785 ; IR-DPP-NEXT: [[TMP28:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP27]]) #[[ATTR8]]
786 ; IR-DPP-NEXT: [[TMP29:%.*]] = icmp eq i32 [[TMP8]], 0
787 ; IR-DPP-NEXT: br i1 [[TMP29]], label [[TMP30:%.*]], label [[TMP32:%.*]]
789 ; IR-DPP-NEXT: [[TMP31:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP28]] monotonic, align 4
790 ; IR-DPP-NEXT: br label [[TMP32]]
792 ; IR-DPP-NEXT: br label [[TMP33]]
794 ; IR-DPP-NEXT: ret void
796 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
801 define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
802 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
803 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
804 ; IR-ITERATIVE-NEXT: ret void
806 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
807 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
808 ; IR-DPP-NEXT: ret void
810 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
814 define amdgpu_ps void @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
815 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
816 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
817 ; IR-ITERATIVE-NEXT: ret void
819 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
820 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
821 ; IR-DPP-NEXT: ret void
823 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
827 define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
828 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
829 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
830 ; IR-ITERATIVE-NEXT: ret void
832 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
833 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
834 ; IR-DPP-NEXT: ret void
836 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
840 define amdgpu_ps void @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1 {
841 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
842 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
843 ; IR-ITERATIVE-NEXT: ret void
845 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
846 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
847 ; IR-DPP-NEXT: ret void
849 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
853 define amdgpu_ps void @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
854 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
855 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
856 ; IR-ITERATIVE-NEXT: ret void
858 ; IR-DPP-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
859 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
860 ; IR-DPP-NEXT: ret void
862 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
867 define amdgpu_ps void @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
868 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
869 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
870 ; IR-ITERATIVE-NEXT: ret void
872 ; IR-DPP-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
873 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
874 ; IR-DPP-NEXT: ret void
876 %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
880 define amdgpu_ps void @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 {
881 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
882 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
883 ; IR-ITERATIVE-NEXT: ret void
885 ; IR-DPP-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
886 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
887 ; IR-DPP-NEXT: ret void
889 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
893 define amdgpu_ps void @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 {
894 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
895 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
896 ; IR-ITERATIVE-NEXT: ret void
898 ; IR-DPP-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
899 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
900 ; IR-DPP-NEXT: ret void
902 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
906 define amdgpu_ps void @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1{
907 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
908 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
909 ; IR-ITERATIVE-NEXT: ret void
911 ; IR-DPP-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
912 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
913 ; IR-DPP-NEXT: ret void
915 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
919 define amdgpu_ps void @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1{
920 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
921 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
922 ; IR-ITERATIVE-NEXT: ret void
924 ; IR-DPP-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
925 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
926 ; IR-DPP-NEXT: ret void
928 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
932 define amdgpu_ps void @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
933 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
934 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
935 ; IR-ITERATIVE-NEXT: ret void
937 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
938 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
939 ; IR-DPP-NEXT: ret void
941 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
945 define amdgpu_ps void @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
946 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
947 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
948 ; IR-ITERATIVE-NEXT: ret void
950 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
951 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
952 ; IR-DPP-NEXT: ret void
954 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
958 attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
959 attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
960 attributes #2 = { strictfp }