1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=Iterative -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-ITERATIVE %s
3 ; RUN: opt -S -mtriple=amdgcn-- -mcpu=gfx906 -amdgpu-atomic-optimizer-strategy=DPP -passes='amdgpu-atomic-optimizer,verify<domtree>' %s | FileCheck -check-prefix=IR-DPP %s
5 define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
6 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
7 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
8 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
10 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
11 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
12 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
13 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
14 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
15 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
16 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
17 ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
18 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
19 ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
20 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
21 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
23 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
24 ; IR-ITERATIVE-NEXT: br label [[TMP16]]
26 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
27 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
28 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]])
29 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
30 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = uitofp i32 [[TMP8]] to float
31 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = fmul float [[VAL]], [[TMP21]]
32 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = fadd float [[TMP20]], [[TMP22]]
33 ; IR-ITERATIVE-NEXT: br label [[TMP24]]
35 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
36 ; IR-ITERATIVE-NEXT: ret float [[TMP25]]
38 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_agent_scope_unsafe(
39 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
40 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
42 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
43 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
44 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
45 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
46 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
47 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
48 ; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]])
49 ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
50 ; IR-DPP-NEXT: [[TMP11:%.*]] = uitofp i32 [[TMP10]] to float
51 ; IR-DPP-NEXT: [[TMP12:%.*]] = fmul float [[VAL:%.*]], [[TMP11]]
52 ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
53 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
55 ; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
56 ; IR-DPP-NEXT: br label [[TMP16]]
58 ; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
59 ; IR-DPP-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
60 ; IR-DPP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]])
61 ; IR-DPP-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
62 ; IR-DPP-NEXT: [[TMP21:%.*]] = uitofp i32 [[TMP8]] to float
63 ; IR-DPP-NEXT: [[TMP22:%.*]] = fmul float [[VAL]], [[TMP21]]
64 ; IR-DPP-NEXT: [[TMP23:%.*]] = fadd float [[TMP20]], [[TMP22]]
65 ; IR-DPP-NEXT: br label [[TMP24]]
67 ; IR-DPP-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
68 ; IR-DPP-NEXT: ret float [[TMP25]]
70 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
74 define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
75 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
76 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
77 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
79 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
80 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
81 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
82 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
83 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
84 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
85 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
86 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
88 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4
89 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
91 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
92 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
93 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
94 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
95 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = fadd float [[TMP16]], [[TMP28:%.*]]
96 ; IR-ITERATIVE-NEXT: br label [[TMP18]]
98 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ]
99 ; IR-ITERATIVE-NEXT: ret float [[TMP19]]
100 ; IR-ITERATIVE: ComputeLoop:
101 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ]
102 ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ]
103 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ]
104 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
105 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32
106 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32
107 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP22]], i32 [[TMP21]])
108 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float
109 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32
110 ; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32
111 ; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]])
112 ; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float
113 ; IR-ITERATIVE-NEXT: [[TMP29]] = fadd float [[ACCUMULATOR]], [[TMP24]]
114 ; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]]
115 ; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1
116 ; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]]
117 ; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0
118 ; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
119 ; IR-ITERATIVE: ComputeEnd:
120 ; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0
121 ; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]]
123 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_scope_agent_scope_unsafe(
124 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
125 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]]
127 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
128 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
129 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
130 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
131 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
132 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
133 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
134 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648)
135 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
136 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
137 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false)
138 ; IR-DPP-NEXT: [[TMP14:%.*]] = fadd float [[TMP11]], [[TMP13]]
139 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false)
140 ; IR-DPP-NEXT: [[TMP16:%.*]] = fadd float [[TMP14]], [[TMP15]]
141 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false)
142 ; IR-DPP-NEXT: [[TMP18:%.*]] = fadd float [[TMP16]], [[TMP17]]
143 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false)
144 ; IR-DPP-NEXT: [[TMP20:%.*]] = fadd float [[TMP18]], [[TMP19]]
145 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false)
146 ; IR-DPP-NEXT: [[TMP22:%.*]] = fadd float [[TMP20]], [[TMP21]]
147 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false)
148 ; IR-DPP-NEXT: [[TMP24:%.*]] = fadd float [[TMP22]], [[TMP23]]
149 ; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false)
150 ; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32
151 ; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 63)
152 ; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float
153 ; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]])
154 ; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0
155 ; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]]
157 ; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4
158 ; IR-DPP-NEXT: br label [[TMP33]]
160 ; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ]
161 ; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32
162 ; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP35]])
163 ; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float
164 ; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]])
165 ; IR-DPP-NEXT: [[TMP39:%.*]] = fadd float [[TMP37]], [[TMP38]]
166 ; IR-DPP-NEXT: br label [[TMP40]]
168 ; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ]
169 ; IR-DPP-NEXT: ret float [[TMP41]]
171 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
175 define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1 {
176 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(
177 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7:[0-9]+]]
178 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
180 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
181 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
182 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
183 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
184 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
185 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
186 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
187 ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
188 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
189 ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
190 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
191 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
193 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4
194 ; IR-ITERATIVE-NEXT: br label [[TMP16]]
196 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
197 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
198 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]]) #[[ATTR7]]
199 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
200 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
201 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
202 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP22]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
203 ; IR-ITERATIVE-NEXT: br label [[TMP24]]
205 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
206 ; IR-ITERATIVE-NEXT: ret float [[TMP25]]
208 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_one_as_scope_unsafe_structfp(
209 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8:[0-9]+]]
210 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
212 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
213 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
214 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
215 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
216 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
217 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
218 ; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
219 ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
220 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
221 ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
222 ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
223 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
225 ; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("one-as") monotonic, align 4
226 ; IR-DPP-NEXT: br label [[TMP16]]
228 ; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
229 ; IR-DPP-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
230 ; IR-DPP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]]) #[[ATTR8]]
231 ; IR-DPP-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
232 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
233 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
234 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP22]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
235 ; IR-DPP-NEXT: br label [[TMP24]]
237 ; IR-DPP-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
238 ; IR-DPP-NEXT: ret float [[TMP25]]
240 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
244 define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) #1 {
245 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(
246 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
247 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
249 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
250 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
251 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
252 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
253 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
254 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
255 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
256 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
258 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("one-as") monotonic, align 4
259 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
261 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
262 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
263 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]]) #[[ATTR7]]
264 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
265 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
266 ; IR-ITERATIVE-NEXT: br label [[TMP18]]
268 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ]
269 ; IR-ITERATIVE-NEXT: ret float [[TMP19]]
270 ; IR-ITERATIVE: ComputeLoop:
271 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ]
272 ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ]
273 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ]
274 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
275 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32
276 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32
277 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]]
278 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float
279 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32
280 ; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32
281 ; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]]
282 ; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float
283 ; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
284 ; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]]
285 ; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1
286 ; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]]
287 ; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0
288 ; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
289 ; IR-ITERATIVE: ComputeEnd:
290 ; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0
291 ; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]]
293 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_one_as_scope_unsafe_structfp(
294 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
295 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]]
297 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
298 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
299 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
300 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
301 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
302 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
303 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
304 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]]
305 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
306 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
307 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
308 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
309 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
310 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
311 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
312 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
313 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
314 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
315 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
316 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
317 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
318 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
319 ; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
320 ; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32
321 ; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 63) #[[ATTR8]]
322 ; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float
323 ; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]]
324 ; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0
325 ; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]]
327 ; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("one-as") monotonic, align 4
328 ; IR-DPP-NEXT: br label [[TMP33]]
330 ; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ]
331 ; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32
332 ; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP35]]) #[[ATTR8]]
333 ; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float
334 ; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]]
335 ; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
336 ; IR-DPP-NEXT: br label [[TMP40]]
338 ; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ]
339 ; IR-DPP-NEXT: ret float [[TMP41]]
341 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
345 define amdgpu_ps float @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
346 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
347 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
348 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
350 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
351 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
352 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
353 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
354 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
355 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
356 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
357 ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
358 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
359 ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
360 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
361 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
363 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
364 ; IR-ITERATIVE-NEXT: br label [[TMP16]]
366 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
367 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
368 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]]) #[[ATTR7]]
369 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
370 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
371 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
372 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP22]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
373 ; IR-ITERATIVE-NEXT: br label [[TMP24]]
375 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
376 ; IR-ITERATIVE-NEXT: ret float [[TMP25]]
378 ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_uni_value_agent_scope_strictfp(
379 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
380 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
382 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
383 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
384 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
385 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
386 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
387 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
388 ; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
389 ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
390 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
391 ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
392 ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
393 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
395 ; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] syncscope("agent") monotonic, align 4
396 ; IR-DPP-NEXT: br label [[TMP16]]
398 ; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
399 ; IR-DPP-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
400 ; IR-DPP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]]) #[[ATTR8]]
401 ; IR-DPP-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
402 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
403 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
404 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP22]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
405 ; IR-DPP-NEXT: br label [[TMP24]]
407 ; IR-DPP-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
408 ; IR-DPP-NEXT: ret float [[TMP25]]
410 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
415 define amdgpu_ps float @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
416 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
417 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
418 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
420 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
421 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
422 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
423 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
424 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
425 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
426 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
427 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
429 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4
430 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
432 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
433 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
434 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]]) #[[ATTR7]]
435 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
436 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
437 ; IR-ITERATIVE-NEXT: br label [[TMP18]]
439 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ]
440 ; IR-ITERATIVE-NEXT: ret float [[TMP19]]
441 ; IR-ITERATIVE: ComputeLoop:
442 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ]
443 ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ]
444 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ]
445 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
446 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32
447 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32
448 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]]
449 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float
450 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32
451 ; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32
452 ; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]]
453 ; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float
454 ; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
455 ; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]]
456 ; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1
457 ; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]]
458 ; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0
459 ; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
460 ; IR-ITERATIVE: ComputeEnd:
461 ; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0
462 ; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]]
464 ; IR-DPP-LABEL: @global_atomic_fsub_uni_address_div_value_agent_scope_strictfp(
465 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
466 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]]
468 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
469 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
470 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
471 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
472 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
473 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
474 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
475 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]]
476 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
477 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
478 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
479 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
480 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
481 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
482 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
483 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
484 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
485 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
486 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
487 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
488 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
489 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
490 ; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
491 ; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32
492 ; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 63) #[[ATTR8]]
493 ; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float
494 ; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]]
495 ; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0
496 ; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]]
498 ; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4
499 ; IR-DPP-NEXT: br label [[TMP33]]
501 ; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ]
502 ; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32
503 ; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP35]]) #[[ATTR8]]
504 ; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float
505 ; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]]
506 ; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
507 ; IR-DPP-NEXT: br label [[TMP40]]
509 ; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ]
510 ; IR-DPP-NEXT: ret float [[TMP41]]
512 %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
516 define amdgpu_ps float @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float inreg %val) #0 {
517 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
518 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
519 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
521 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
522 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
523 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
524 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
525 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
526 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
527 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
528 ; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
530 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
531 ; IR-ITERATIVE-NEXT: br label [[TMP12]]
533 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
534 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
535 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
536 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
537 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float
538 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF0000000000000, float [[VAL]]
539 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]])
540 ; IR-ITERATIVE-NEXT: br label [[TMP20]]
542 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
543 ; IR-ITERATIVE-NEXT: ret float [[TMP21]]
545 ; IR-DPP-LABEL: @global_atomic_fmin_uni_address_uni_value_agent_scope_unsafe(
546 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
547 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
549 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
550 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
551 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
552 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
553 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
554 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
555 ; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
556 ; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
558 ; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
559 ; IR-DPP-NEXT: br label [[TMP12]]
561 ; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
562 ; IR-DPP-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
563 ; IR-DPP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
564 ; IR-DPP-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
565 ; IR-DPP-NEXT: [[TMP17:%.*]] = uitofp i32 [[TMP8]] to float
566 ; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0x7FF0000000000000, float [[VAL]]
567 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP18]])
568 ; IR-DPP-NEXT: br label [[TMP20]]
570 ; IR-DPP-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
571 ; IR-DPP-NEXT: ret float [[TMP21]]
573 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
577 define amdgpu_ps float @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(ptr addrspace(1) inreg %ptr, float %val) #0 {
578 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
579 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
580 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
582 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
583 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
584 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
585 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
586 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
587 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
588 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
589 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
591 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4
592 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
594 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
595 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
596 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]])
597 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
598 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP28:%.*]])
599 ; IR-ITERATIVE-NEXT: br label [[TMP18]]
601 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ]
602 ; IR-ITERATIVE-NEXT: ret float [[TMP19]]
603 ; IR-ITERATIVE: ComputeLoop:
604 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0x7FF0000000000000, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ]
605 ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ]
606 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ]
607 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true)
608 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32
609 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32
610 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP22]], i32 [[TMP21]])
611 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float
612 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32
613 ; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32
614 ; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]])
615 ; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float
616 ; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.minnum.f32(float [[ACCUMULATOR]], float [[TMP24]])
617 ; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]]
618 ; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1
619 ; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]]
620 ; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0
621 ; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
622 ; IR-ITERATIVE: ComputeEnd:
623 ; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0
624 ; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]]
626 ; IR-DPP-LABEL: @global_atomic_fmin_uni_address_div_value_agent_scope_unsafe(
627 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live()
628 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]]
630 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true)
631 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
632 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
633 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
634 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0)
635 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]])
636 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
637 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 2139095040)
638 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
639 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
640 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP11]], i32 273, i32 15, i32 15, i1 false)
641 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.minnum.f32(float [[TMP11]], float [[TMP13]])
642 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP14]], i32 274, i32 15, i32 15, i1 false)
643 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.minnum.f32(float [[TMP14]], float [[TMP15]])
644 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP16]], i32 276, i32 15, i32 15, i1 false)
645 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.minnum.f32(float [[TMP16]], float [[TMP17]])
646 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP18]], i32 280, i32 15, i32 15, i1 false)
647 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.minnum.f32(float [[TMP18]], float [[TMP19]])
648 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP20]], i32 322, i32 10, i32 15, i1 false)
649 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.minnum.f32(float [[TMP20]], float [[TMP21]])
650 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP22]], i32 323, i32 12, i32 15, i1 false)
651 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.minnum.f32(float [[TMP22]], float [[TMP23]])
652 ; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0x7FF0000000000000, float [[TMP24]], i32 312, i32 15, i32 15, i1 false)
653 ; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32
654 ; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 63)
655 ; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float
656 ; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]])
657 ; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0
658 ; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]]
660 ; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4
661 ; IR-DPP-NEXT: br label [[TMP33]]
663 ; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ]
664 ; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32
665 ; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP35]])
666 ; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float
667 ; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]])
668 ; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.minnum.f32(float [[TMP37]], float [[TMP38]])
669 ; IR-DPP-NEXT: br label [[TMP40]]
671 ; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ]
672 ; IR-DPP-NEXT: ret float [[TMP41]]
674 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
678 define amdgpu_ps float @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float inreg %val) #1{
679 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(
680 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
681 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
683 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
684 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
685 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
686 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
687 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
688 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
689 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
690 ; IR-ITERATIVE-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
692 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
693 ; IR-ITERATIVE-NEXT: br label [[TMP12]]
695 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
696 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
697 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]]) #[[ATTR7]]
698 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
699 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
700 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0xFFF0000000000000, float [[VAL]]
701 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR7]]
702 ; IR-ITERATIVE-NEXT: br label [[TMP20]]
704 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
705 ; IR-ITERATIVE-NEXT: ret float [[TMP21]]
707 ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_uni_value_agent_scope_unsafe_structfp(
708 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
709 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP20:%.*]]
711 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
712 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
713 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
714 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
715 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
716 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
717 ; IR-DPP-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 0
718 ; IR-DPP-NEXT: br i1 [[TMP9]], label [[TMP10:%.*]], label [[TMP12:%.*]]
720 ; IR-DPP-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
721 ; IR-DPP-NEXT: br label [[TMP12]]
723 ; IR-DPP-NEXT: [[TMP13:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP11]], [[TMP10]] ]
724 ; IR-DPP-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
725 ; IR-DPP-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]]) #[[ATTR8]]
726 ; IR-DPP-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
727 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
728 ; IR-DPP-NEXT: [[TMP18:%.*]] = select i1 [[TMP9]], float 0xFFF0000000000000, float [[VAL]]
729 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP18]], metadata !"fpexcept.strict") #[[ATTR8]]
730 ; IR-DPP-NEXT: br label [[TMP20]]
732 ; IR-DPP-NEXT: [[TMP21:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP19]], [[TMP12]] ]
733 ; IR-DPP-NEXT: ret float [[TMP21]]
735 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
739 define amdgpu_ps float @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) inreg %ptr, float %val) #1{
740 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(
741 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
742 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
744 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
745 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
746 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
747 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
748 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
749 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
750 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
751 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
753 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] syncscope("agent") monotonic, align 4
754 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
756 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
757 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
758 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]]) #[[ATTR7]]
759 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
760 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"fpexcept.strict") #[[ATTR7]]
761 ; IR-ITERATIVE-NEXT: br label [[TMP18]]
763 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ]
764 ; IR-ITERATIVE-NEXT: ret float [[TMP19]]
765 ; IR-ITERATIVE: ComputeLoop:
766 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ 0xFFF0000000000000, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ]
767 ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ]
768 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ]
769 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
770 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32
771 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32
772 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]]
773 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float
774 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32
775 ; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32
776 ; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]]
777 ; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float
778 ; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.maxnum.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"fpexcept.strict") #[[ATTR7]]
779 ; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]]
780 ; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1
781 ; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]]
782 ; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0
783 ; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
784 ; IR-ITERATIVE: ComputeEnd:
785 ; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0
786 ; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]]
788 ; IR-DPP-LABEL: @global_atomic_fmax_uni_address_div_value_agent_scope_unsafe_structfp(
789 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
790 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]]
792 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
793 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
794 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
795 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
796 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
797 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
798 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
799 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -8388608) #[[ATTR8]]
800 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
801 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
802 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
803 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP11]], float [[TMP13]], metadata !"fpexcept.strict") #[[ATTR8]]
804 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
805 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP14]], float [[TMP15]], metadata !"fpexcept.strict") #[[ATTR8]]
806 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
807 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP16]], float [[TMP17]], metadata !"fpexcept.strict") #[[ATTR8]]
808 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
809 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP18]], float [[TMP19]], metadata !"fpexcept.strict") #[[ATTR8]]
810 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
811 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP20]], float [[TMP21]], metadata !"fpexcept.strict") #[[ATTR8]]
812 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
813 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP22]], float [[TMP23]], metadata !"fpexcept.strict") #[[ATTR8]]
814 ; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float 0xFFF0000000000000, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
815 ; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32
816 ; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 63) #[[ATTR8]]
817 ; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float
818 ; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]]
819 ; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0
820 ; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]]
822 ; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] syncscope("agent") monotonic, align 4
823 ; IR-DPP-NEXT: br label [[TMP33]]
825 ; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ]
826 ; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32
827 ; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP35]]) #[[ATTR8]]
828 ; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float
829 ; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]]
830 ; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.maxnum.f32(float [[TMP37]], float [[TMP38]], metadata !"fpexcept.strict") #[[ATTR8]]
831 ; IR-DPP-NEXT: br label [[TMP40]]
833 ; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ]
834 ; IR-DPP-NEXT: ret float [[TMP41]]
836 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
840 define amdgpu_ps float @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float inreg %val) #2 {
841 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
842 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
843 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
845 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
846 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
847 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
848 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
849 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
850 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
851 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR7]]
852 ; IR-ITERATIVE-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
853 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
854 ; IR-ITERATIVE-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
855 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
856 ; IR-ITERATIVE-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
858 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4
859 ; IR-ITERATIVE-NEXT: br label [[TMP16]]
861 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
862 ; IR-ITERATIVE-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
863 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]]) #[[ATTR7]]
864 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
865 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
866 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
867 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP22]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
868 ; IR-ITERATIVE-NEXT: br label [[TMP24]]
870 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
871 ; IR-ITERATIVE-NEXT: ret float [[TMP25]]
873 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_uni_value_system_scope_strictfp(
874 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
875 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP24:%.*]]
877 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
878 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
879 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
880 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
881 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
882 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
883 ; IR-DPP-NEXT: [[TMP9:%.*]] = call i64 @llvm.ctpop.i64(i64 [[TMP3]]) #[[ATTR8]]
884 ; IR-DPP-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP9]] to i32
885 ; IR-DPP-NEXT: [[TMP11:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
886 ; IR-DPP-NEXT: [[TMP12:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL:%.*]], float [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
887 ; IR-DPP-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP8]], 0
888 ; IR-DPP-NEXT: br i1 [[TMP13]], label [[TMP14:%.*]], label [[TMP16:%.*]]
890 ; IR-DPP-NEXT: [[TMP15:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP12]] monotonic, align 4
891 ; IR-DPP-NEXT: br label [[TMP16]]
893 ; IR-DPP-NEXT: [[TMP17:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP15]], [[TMP14]] ]
894 ; IR-DPP-NEXT: [[TMP18:%.*]] = bitcast float [[TMP17]] to i32
895 ; IR-DPP-NEXT: [[TMP19:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP18]]) #[[ATTR8]]
896 ; IR-DPP-NEXT: [[TMP20:%.*]] = bitcast i32 [[TMP19]] to float
897 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.experimental.constrained.uitofp.f32.i32(i32 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
898 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[VAL]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
899 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP22]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
900 ; IR-DPP-NEXT: br label [[TMP24]]
902 ; IR-DPP-NEXT: [[TMP25:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP23]], [[TMP16]] ]
903 ; IR-DPP-NEXT: ret float [[TMP25]]
905 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
909 define amdgpu_ps float @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(ptr addrspace(1) inreg %ptr, float %val) #2 {
910 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
911 ; IR-ITERATIVE-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR7]]
912 ; IR-ITERATIVE-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP18:%.*]]
914 ; IR-ITERATIVE-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
915 ; IR-ITERATIVE-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
916 ; IR-ITERATIVE-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
917 ; IR-ITERATIVE-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
918 ; IR-ITERATIVE-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR7]]
919 ; IR-ITERATIVE-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR7]]
920 ; IR-ITERATIVE-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR7]]
921 ; IR-ITERATIVE-NEXT: br label [[COMPUTELOOP:%.*]]
923 ; IR-ITERATIVE-NEXT: [[TMP11:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29:%.*]] monotonic, align 4
924 ; IR-ITERATIVE-NEXT: br label [[TMP12:%.*]]
926 ; IR-ITERATIVE-NEXT: [[TMP13:%.*]] = phi float [ poison, [[COMPUTEEND:%.*]] ], [ [[TMP11]], [[TMP10:%.*]] ]
927 ; IR-ITERATIVE-NEXT: [[TMP14:%.*]] = bitcast float [[TMP13]] to i32
928 ; IR-ITERATIVE-NEXT: [[TMP15:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP14]]) #[[ATTR7]]
929 ; IR-ITERATIVE-NEXT: [[TMP16:%.*]] = bitcast i32 [[TMP15]] to float
930 ; IR-ITERATIVE-NEXT: [[TMP17:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP28:%.*]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
931 ; IR-ITERATIVE-NEXT: br label [[TMP18]]
933 ; IR-ITERATIVE-NEXT: [[TMP19:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP17]], [[TMP12]] ]
934 ; IR-ITERATIVE-NEXT: ret float [[TMP19]]
935 ; IR-ITERATIVE: ComputeLoop:
936 ; IR-ITERATIVE-NEXT: [[ACCUMULATOR:%.*]] = phi float [ -0.000000e+00, [[TMP2]] ], [ [[TMP29]], [[COMPUTELOOP]] ]
937 ; IR-ITERATIVE-NEXT: [[OLDVALUEPHI:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP28]], [[COMPUTELOOP]] ]
938 ; IR-ITERATIVE-NEXT: [[ACTIVEBITS:%.*]] = phi i64 [ [[TMP9]], [[TMP2]] ], [ [[TMP32:%.*]], [[COMPUTELOOP]] ]
939 ; IR-ITERATIVE-NEXT: [[TMP20:%.*]] = call i64 @llvm.cttz.i64(i64 [[ACTIVEBITS]], i1 true) #[[ATTR7]]
940 ; IR-ITERATIVE-NEXT: [[TMP21:%.*]] = trunc i64 [[TMP20]] to i32
941 ; IR-ITERATIVE-NEXT: [[TMP22:%.*]] = bitcast float [[VAL:%.*]] to i32
942 ; IR-ITERATIVE-NEXT: [[TMP23:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP22]], i32 [[TMP21]]) #[[ATTR7]]
943 ; IR-ITERATIVE-NEXT: [[TMP24:%.*]] = bitcast i32 [[TMP23]] to float
944 ; IR-ITERATIVE-NEXT: [[TMP25:%.*]] = bitcast float [[ACCUMULATOR]] to i32
945 ; IR-ITERATIVE-NEXT: [[TMP26:%.*]] = bitcast float [[OLDVALUEPHI]] to i32
946 ; IR-ITERATIVE-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.writelane(i32 [[TMP25]], i32 [[TMP21]], i32 [[TMP26]]) #[[ATTR7]]
947 ; IR-ITERATIVE-NEXT: [[TMP28]] = bitcast i32 [[TMP27]] to float
948 ; IR-ITERATIVE-NEXT: [[TMP29]] = call float @llvm.experimental.constrained.fadd.f32(float [[ACCUMULATOR]], float [[TMP24]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR7]]
949 ; IR-ITERATIVE-NEXT: [[TMP30:%.*]] = shl i64 1, [[TMP20]]
950 ; IR-ITERATIVE-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], -1
951 ; IR-ITERATIVE-NEXT: [[TMP32]] = and i64 [[ACTIVEBITS]], [[TMP31]]
952 ; IR-ITERATIVE-NEXT: [[TMP33:%.*]] = icmp eq i64 [[TMP32]], 0
953 ; IR-ITERATIVE-NEXT: br i1 [[TMP33]], label [[COMPUTEEND]], label [[COMPUTELOOP]]
954 ; IR-ITERATIVE: ComputeEnd:
955 ; IR-ITERATIVE-NEXT: [[TMP34:%.*]] = icmp eq i32 [[TMP8]], 0
956 ; IR-ITERATIVE-NEXT: br i1 [[TMP34]], label [[TMP10]], label [[TMP12]]
958 ; IR-DPP-LABEL: @global_atomic_fadd_uni_address_div_value_system_scope_strictfp(
959 ; IR-DPP-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.ps.live() #[[ATTR8]]
960 ; IR-DPP-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP40:%.*]]
962 ; IR-DPP-NEXT: [[TMP3:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 true) #[[ATTR8]]
963 ; IR-DPP-NEXT: [[TMP4:%.*]] = trunc i64 [[TMP3]] to i32
964 ; IR-DPP-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP3]], 32
965 ; IR-DPP-NEXT: [[TMP6:%.*]] = trunc i64 [[TMP5]] to i32
966 ; IR-DPP-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 [[TMP4]], i32 0) #[[ATTR8]]
967 ; IR-DPP-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 [[TMP6]], i32 [[TMP7]]) #[[ATTR8]]
968 ; IR-DPP-NEXT: [[TMP9:%.*]] = bitcast float [[VAL:%.*]] to i32
969 ; IR-DPP-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP9]], i32 -2147483648) #[[ATTR8]]
970 ; IR-DPP-NEXT: [[TMP11:%.*]] = bitcast i32 [[TMP10]] to float
971 ; IR-DPP-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP9]] to float
972 ; IR-DPP-NEXT: [[TMP13:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP11]], i32 273, i32 15, i32 15, i1 false) #[[ATTR8]]
973 ; IR-DPP-NEXT: [[TMP14:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP11]], float [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
974 ; IR-DPP-NEXT: [[TMP15:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP14]], i32 274, i32 15, i32 15, i1 false) #[[ATTR8]]
975 ; IR-DPP-NEXT: [[TMP16:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP14]], float [[TMP15]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
976 ; IR-DPP-NEXT: [[TMP17:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP16]], i32 276, i32 15, i32 15, i1 false) #[[ATTR8]]
977 ; IR-DPP-NEXT: [[TMP18:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP16]], float [[TMP17]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
978 ; IR-DPP-NEXT: [[TMP19:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP18]], i32 280, i32 15, i32 15, i1 false) #[[ATTR8]]
979 ; IR-DPP-NEXT: [[TMP20:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP18]], float [[TMP19]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
980 ; IR-DPP-NEXT: [[TMP21:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP20]], i32 322, i32 10, i32 15, i1 false) #[[ATTR8]]
981 ; IR-DPP-NEXT: [[TMP22:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP20]], float [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
982 ; IR-DPP-NEXT: [[TMP23:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP22]], i32 323, i32 12, i32 15, i1 false) #[[ATTR8]]
983 ; IR-DPP-NEXT: [[TMP24:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP22]], float [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
984 ; IR-DPP-NEXT: [[TMP25:%.*]] = call float @llvm.amdgcn.update.dpp.f32(float -0.000000e+00, float [[TMP24]], i32 312, i32 15, i32 15, i1 false) #[[ATTR8]]
985 ; IR-DPP-NEXT: [[TMP26:%.*]] = bitcast float [[TMP24]] to i32
986 ; IR-DPP-NEXT: [[TMP27:%.*]] = call i32 @llvm.amdgcn.readlane(i32 [[TMP26]], i32 63) #[[ATTR8]]
987 ; IR-DPP-NEXT: [[TMP28:%.*]] = bitcast i32 [[TMP27]] to float
988 ; IR-DPP-NEXT: [[TMP29:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP28]]) #[[ATTR8]]
989 ; IR-DPP-NEXT: [[TMP30:%.*]] = icmp eq i32 [[TMP8]], 0
990 ; IR-DPP-NEXT: br i1 [[TMP30]], label [[TMP31:%.*]], label [[TMP33:%.*]]
992 ; IR-DPP-NEXT: [[TMP32:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[TMP29]] monotonic, align 4
993 ; IR-DPP-NEXT: br label [[TMP33]]
995 ; IR-DPP-NEXT: [[TMP34:%.*]] = phi float [ poison, [[TMP2]] ], [ [[TMP32]], [[TMP31]] ]
996 ; IR-DPP-NEXT: [[TMP35:%.*]] = bitcast float [[TMP34]] to i32
997 ; IR-DPP-NEXT: [[TMP36:%.*]] = call i32 @llvm.amdgcn.readfirstlane(i32 [[TMP35]]) #[[ATTR8]]
998 ; IR-DPP-NEXT: [[TMP37:%.*]] = bitcast i32 [[TMP36]] to float
999 ; IR-DPP-NEXT: [[TMP38:%.*]] = call float @llvm.amdgcn.strict.wwm.f32(float [[TMP25]]) #[[ATTR8]]
1000 ; IR-DPP-NEXT: [[TMP39:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[TMP37]], float [[TMP38]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR8]]
1001 ; IR-DPP-NEXT: br label [[TMP40]]
1003 ; IR-DPP-NEXT: [[TMP41:%.*]] = phi float [ poison, [[TMP0:%.*]] ], [ [[TMP39]], [[TMP33]] ]
1004 ; IR-DPP-NEXT: ret float [[TMP41]]
1006 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
1011 define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float inreg %val) #0 {
1012 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
1013 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1014 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1016 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_agent_scope_unsafe(
1017 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1018 ; IR-DPP-NEXT: ret float [[RESULT]]
1020 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
1024 define amdgpu_ps float @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(ptr addrspace(1) %ptr, float %val) #0 {
1025 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
1026 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1027 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1029 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_agent_scope_unsafe(
1030 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1031 ; IR-DPP-NEXT: ret float [[RESULT]]
1033 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic, align 4
1037 define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1 {
1038 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
1039 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
1040 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1042 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_one_as_scope_unsafe_structfp(
1043 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
1044 ; IR-DPP-NEXT: ret float [[RESULT]]
1046 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
1050 define amdgpu_ps float @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1 {
1051 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
1052 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
1053 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1055 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_one_as_scope_unsafe_structfp(
1056 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("one-as") monotonic, align 4
1057 ; IR-DPP-NEXT: ret float [[RESULT]]
1059 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("one-as") monotonic
1063 define amdgpu_ps float @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
1064 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
1065 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1066 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1068 ; IR-DPP-LABEL: @global_atomic_fsub_div_address_uni_value_agent_scope_strictfp(
1069 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1070 ; IR-DPP-NEXT: ret float [[RESULT]]
1072 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
1077 define amdgpu_ps float @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
1078 ; IR-ITERATIVE-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
1079 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1080 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1082 ; IR-DPP-LABEL: @global_atomic_fsub_div_address_div_value_agent_scope_strictfp(
1083 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fsub ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1084 ; IR-DPP-NEXT: ret float [[RESULT]]
1086 %result = atomicrmw fsub ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
1090 define amdgpu_ps float @global_atomic_fmin_div_address_uni_value_agent_scope(ptr addrspace(1) %ptr, float inreg %val) #0 {
1091 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
1092 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1093 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1095 ; IR-DPP-LABEL: @global_atomic_fmin_div_address_uni_value_agent_scope(
1096 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1097 ; IR-DPP-NEXT: ret float [[RESULT]]
1099 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
1103 define amdgpu_ps float @global_atomic_fmin_div_address_div_value_agent_scope(ptr addrspace(1) %ptr, float %val) #0 {
1104 ; IR-ITERATIVE-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
1105 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1106 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1108 ; IR-DPP-LABEL: @global_atomic_fmin_div_address_div_value_agent_scope(
1109 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmin ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1110 ; IR-DPP-NEXT: ret float [[RESULT]]
1112 %result = atomicrmw fmin ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
1116 define amdgpu_ps float @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float inreg %val) #1{
1117 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
1118 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1119 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1121 ; IR-DPP-LABEL: @global_atomic_fmax_div_address_uni_value_agent_scope_unsafe_structfp(
1122 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1123 ; IR-DPP-NEXT: ret float [[RESULT]]
1125 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
1129 define amdgpu_ps float @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(ptr addrspace(1) %ptr, float %val) #1{
1130 ; IR-ITERATIVE-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
1131 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1132 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1134 ; IR-DPP-LABEL: @global_atomic_fmax_div_address_div_value_agent_scope_unsafe_structfp(
1135 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fmax ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] syncscope("agent") monotonic, align 4
1136 ; IR-DPP-NEXT: ret float [[RESULT]]
1138 %result = atomicrmw fmax ptr addrspace(1) %ptr, float %val syncscope("agent") monotonic
1142 define amdgpu_ps float @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(ptr addrspace(1) %ptr, float inreg %val) #2 {
1143 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
1144 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
1145 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1147 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_uni_value_system_scope_strictfp(
1148 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
1149 ; IR-DPP-NEXT: ret float [[RESULT]]
1151 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
1155 define amdgpu_ps float @global_atomic_fadd_div_address_div_value_system_scope_strictfp(ptr addrspace(1) %ptr, float %val) #2 {
1156 ; IR-ITERATIVE-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
1157 ; IR-ITERATIVE-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
1158 ; IR-ITERATIVE-NEXT: ret float [[RESULT]]
1160 ; IR-DPP-LABEL: @global_atomic_fadd_div_address_div_value_system_scope_strictfp(
1161 ; IR-DPP-NEXT: [[RESULT:%.*]] = atomicrmw fadd ptr addrspace(1) [[PTR:%.*]], float [[VAL:%.*]] monotonic, align 4
1162 ; IR-DPP-NEXT: ret float [[RESULT]]
1164 %result = atomicrmw fadd ptr addrspace(1) %ptr, float %val monotonic, align 4
1168 attributes #0 = { "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
1169 attributes #1 = { strictfp "denormal-fp-math-f32"="preserve-sign,preserve-sign" "amdgpu-unsafe-fp-atomics"="true" }
1170 attributes #2 = { strictfp }