1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
3 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
5 ; Test that fneg is folded into source modifiers when it wasn't
6 ; possible to fold fsub to fneg without context.
8 define float @no_fold_f32_fsub_into_fneg_modifier_ieee_pos1(float %v0, float %v1) #0 {
9 ; CHECK-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_pos1:
11 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0
13 ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
14 ; CHECK-NEXT: s_setpc_b64 s[30:31]
15 %sub = fsub float 1.0, %v0
16 %mul = fmul float %sub, %v1
20 define float @no_fold_f32_fsub_into_fneg_modifier_daz_pos1(float %v0, float %v1) #1 {
21 ; CHECK-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_pos1:
23 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
24 ; CHECK-NEXT: v_sub_f32_e32 v0, 1.0, v0
25 ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
26 ; CHECK-NEXT: s_setpc_b64 s[30:31]
27 %sub = fsub float 1.0, %v0
28 %mul = fmul float %sub, %v1
32 define float @no_fold_f32_fsub_into_fneg_modifier_ieee_commuted(float %v0, float %v1) #0 {
33 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_commuted:
35 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36 ; SDAG-NEXT: v_add_f32_e32 v0, 0, v0
37 ; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
38 ; SDAG-NEXT: s_setpc_b64 s[30:31]
40 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_commuted:
42 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
43 ; GISEL-NEXT: v_subrev_f32_e32 v0, 0x80000000, v0
44 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
45 ; GISEL-NEXT: s_setpc_b64 s[30:31]
46 %sub = fsub float %v0, -0.0
47 %mul = fmul float %sub, %v1
51 define float @fold_f32_fsub_into_fneg_modifier_ieee_pos0(float %v0, float %v1) #0 {
52 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_pos0:
54 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
55 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
56 ; SDAG-NEXT: s_setpc_b64 s[30:31]
58 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_pos0:
60 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GISEL-NEXT: v_sub_f32_e32 v0, 0, v0
62 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
63 ; GISEL-NEXT: s_setpc_b64 s[30:31]
64 %sub = fsub float 0.0, %v0
65 %mul = fmul float %sub, %v1
69 define float @fold_f32_fsub_into_fneg_modifier_daz_pos0(float %v0, float %v1) #1 {
70 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_pos0:
72 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
73 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
74 ; SDAG-NEXT: s_setpc_b64 s[30:31]
76 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz_pos0:
78 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GISEL-NEXT: v_sub_f32_e32 v0, 0, v0
80 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
81 ; GISEL-NEXT: s_setpc_b64 s[30:31]
82 %sub = fsub float 0.0, %v0
83 %mul = fmul float %sub, %v1
87 define float @no_fold_f32_fsub_into_fneg_modifier_daz_commuted(float %v0, float %v1) #1 {
88 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted:
90 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91 ; SDAG-NEXT: v_add_f32_e32 v0, 0, v0
92 ; SDAG-NEXT: v_mul_f32_e32 v0, v0, v1
93 ; SDAG-NEXT: s_setpc_b64 s[30:31]
95 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted:
97 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98 ; GISEL-NEXT: v_subrev_f32_e32 v0, 0x80000000, v0
99 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
100 ; GISEL-NEXT: s_setpc_b64 s[30:31]
101 %sub = fsub float %v0, -0.0
102 %mul = fmul float %sub, %v1
106 define float @fold_f32_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
107 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee:
109 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
111 ; SDAG-NEXT: s_setpc_b64 s[30:31]
113 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_ieee:
115 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
117 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
118 ; GISEL-NEXT: s_setpc_b64 s[30:31]
119 %sub = fsub float -0.0, %v0
120 %mul = fmul float %sub, %v1
124 define float @fold_f32_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
125 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz:
127 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
128 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
129 ; SDAG-NEXT: s_setpc_b64 s[30:31]
131 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz:
133 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
135 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
136 ; GISEL-NEXT: s_setpc_b64 s[30:31]
137 %sub = fsub float -0.0, %v0
138 %mul = fmul float %sub, %v1
142 define float @fold_f32_fsub_into_fneg_modifier_ieee_nsz(float %v0, float %v1) #0 {
143 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_nsz:
145 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
147 ; SDAG-NEXT: s_setpc_b64 s[30:31]
149 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_nsz:
151 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
152 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
153 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
154 ; GISEL-NEXT: s_setpc_b64 s[30:31]
155 %sub = fsub nsz float -0.0, %v0
156 %mul = fmul nsz float %sub, %v1
160 define float @fold_f32_fsub_into_fneg_modifier_daz_nsz(float %v0, float %v1) #1 {
161 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:
163 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
165 ; SDAG-NEXT: s_setpc_b64 s[30:31]
167 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:
169 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
170 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
171 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
172 ; GISEL-NEXT: s_setpc_b64 s[30:31]
173 %sub = fsub nsz float -0.0, %v0
174 %mul = fmul nsz float %sub, %v1
178 define float @fold_f32_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
179 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:
181 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
183 ; SDAG-NEXT: s_setpc_b64 s[30:31]
185 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:
187 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
188 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
189 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
190 ; GISEL-NEXT: s_setpc_b64 s[30:31]
191 %sub = fsub float -0.0, %v0
192 %mul = fmul float %sub, %v1
196 define float @fold_f32_fsub_into_fneg_modifier_dynamic_nsz(float %v0, float %v1) #2 {
197 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:
199 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v1
201 ; SDAG-NEXT: s_setpc_b64 s[30:31]
203 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:
205 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
207 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v1
208 ; GISEL-NEXT: s_setpc_b64 s[30:31]
209 %sub = fsub nsz float -0.0, %v0
210 %mul = fmul nsz float %sub, %v1
214 define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee(<2 x float> %v0, <2 x float> %v1) #0 {
215 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee:
217 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
219 ; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
220 ; SDAG-NEXT: s_setpc_b64 s[30:31]
222 ; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee:
224 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
226 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
227 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
228 ; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
229 ; GISEL-NEXT: s_setpc_b64 s[30:31]
230 %sub = fsub <2 x float> <float -0.0, float -0.0>, %v0
231 %mul = fmul <2 x float> %sub, %v1
235 define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz(<2 x float> %v0, <2 x float> %v1) #1 {
236 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:
238 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
239 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
240 ; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
241 ; SDAG-NEXT: s_setpc_b64 s[30:31]
243 ; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:
245 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
247 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
248 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
249 ; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
250 ; GISEL-NEXT: s_setpc_b64 s[30:31]
251 %sub = fsub <2 x float> <float -0.0, float -0.0>, %v0
252 %mul = fmul <2 x float> %sub, %v1
256 define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee_nsz(<2 x float> %v0, <2 x float> %v1) #0 {
257 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee_nsz:
259 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
260 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
261 ; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
262 ; SDAG-NEXT: s_setpc_b64 s[30:31]
264 ; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee_nsz:
266 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
268 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
269 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
270 ; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
271 ; GISEL-NEXT: s_setpc_b64 s[30:31]
272 %sub = fsub nsz <2 x float> <float -0.0, float -0.0>, %v0
273 %mul = fmul nsz <2 x float> %sub, %v1
277 define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz_nsz(<2 x float> %v0, <2 x float> %v1) #1 {
278 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:
280 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
281 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
282 ; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
283 ; SDAG-NEXT: s_setpc_b64 s[30:31]
285 ; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:
287 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
288 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
289 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
290 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
291 ; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
292 ; GISEL-NEXT: s_setpc_b64 s[30:31]
293 %sub = fsub nsz <2 x float> <float -0.0, float -0.0>, %v0
294 %mul = fmul nsz <2 x float> %sub, %v1
298 define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic(<2 x float> %v0, <2 x float> %v1) #2 {
299 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:
301 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
303 ; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
304 ; SDAG-NEXT: s_setpc_b64 s[30:31]
306 ; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:
308 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
309 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
310 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
311 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
312 ; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
313 ; GISEL-NEXT: s_setpc_b64 s[30:31]
314 %sub = fsub <2 x float> <float -0.0, float -0.0>, %v0
315 %mul = fmul <2 x float> %sub, %v1
319 define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz(<2 x float> %v0, <2 x float> %v1) #2 {
320 ; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:
322 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323 ; SDAG-NEXT: v_mul_f32_e64 v0, -v0, v2
324 ; SDAG-NEXT: v_mul_f32_e64 v1, -v1, v3
325 ; SDAG-NEXT: s_setpc_b64 s[30:31]
327 ; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:
329 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
331 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
332 ; GISEL-NEXT: v_mul_f32_e32 v0, v0, v2
333 ; GISEL-NEXT: v_mul_f32_e32 v1, v1, v3
334 ; GISEL-NEXT: s_setpc_b64 s[30:31]
335 %sub = fsub nsz <2 x float> <float -0.0, float -0.0>, %v0
336 %mul = fmul nsz <2 x float> %sub, %v1
341 define half @fold_f16_fsub_into_fneg_modifier_ieee(half %v0, half %v1) #0 {
342 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_ieee:
344 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
345 ; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
346 ; SDAG-NEXT: s_setpc_b64 s[30:31]
348 ; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_ieee:
350 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
352 ; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
353 ; GISEL-NEXT: s_setpc_b64 s[30:31]
354 %sub = fsub half -0.0, %v0
355 %mul = fmul half %sub, %v1
359 define half @fold_f16_fsub_into_fneg_modifier_daz(half %v0, half %v1) #1 {
360 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz:
362 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
363 ; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
364 ; SDAG-NEXT: s_setpc_b64 s[30:31]
366 ; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz:
368 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
369 ; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
370 ; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
371 ; GISEL-NEXT: s_setpc_b64 s[30:31]
372 %sub = fsub half -0.0, %v0
373 %mul = fmul half %sub, %v1
377 define half @fold_f16_fsub_into_fneg_modifier_ieee_nsz(half %v0, half %v1) #0 {
378 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_ieee_nsz:
380 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
381 ; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
382 ; SDAG-NEXT: s_setpc_b64 s[30:31]
384 ; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_ieee_nsz:
386 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
387 ; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
388 ; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
389 ; GISEL-NEXT: s_setpc_b64 s[30:31]
390 %sub = fsub nsz half -0.0, %v0
391 %mul = fmul nsz half %sub, %v1
395 define half @fold_f16_fsub_into_fneg_modifier_daz_nsz(half %v0, half %v1) #1 {
396 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:
398 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
399 ; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
400 ; SDAG-NEXT: s_setpc_b64 s[30:31]
402 ; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:
404 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405 ; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
406 ; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
407 ; GISEL-NEXT: s_setpc_b64 s[30:31]
408 %sub = fsub nsz half -0.0, %v0
409 %mul = fmul nsz half %sub, %v1
413 define half @fold_f16_fsub_into_fneg_modifier_dynamic(half %v0, half %v1) #2 {
414 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:
416 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
417 ; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
418 ; SDAG-NEXT: s_setpc_b64 s[30:31]
420 ; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:
422 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
423 ; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
424 ; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
425 ; GISEL-NEXT: s_setpc_b64 s[30:31]
426 %sub = fsub half -0.0, %v0
427 %mul = fmul half %sub, %v1
431 define half @fold_f16_fsub_into_fneg_modifier_dynamic_nsz(half %v0, half %v1) #2 {
432 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:
434 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
435 ; SDAG-NEXT: v_mul_f16_e64 v0, -v0, v1
436 ; SDAG-NEXT: s_setpc_b64 s[30:31]
438 ; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:
440 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
441 ; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
442 ; GISEL-NEXT: v_mul_f16_e32 v0, v0, v1
443 ; GISEL-NEXT: s_setpc_b64 s[30:31]
444 %sub = fsub nsz half -0.0, %v0
445 %mul = fmul nsz half %sub, %v1
449 define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee(<2 x half> %v0, <2 x half> %v1) #0 {
450 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee:
452 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453 ; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
454 ; SDAG-NEXT: s_setpc_b64 s[30:31]
456 ; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee:
458 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
459 ; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
460 ; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
461 ; GISEL-NEXT: s_setpc_b64 s[30:31]
462 %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
463 %mul = fmul <2 x half> %sub, %v1
467 define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz(<2 x half> %v0, <2 x half> %v1) #1 {
468 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz:
470 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
472 ; SDAG-NEXT: s_setpc_b64 s[30:31]
474 ; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz:
476 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
477 ; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
478 ; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
479 ; GISEL-NEXT: s_setpc_b64 s[30:31]
480 %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
481 %mul = fmul <2 x half> %sub, %v1
485 define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee_nsz(<2 x half> %v0, <2 x half> %v1) #0 {
486 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee_nsz:
488 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489 ; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
490 ; SDAG-NEXT: s_setpc_b64 s[30:31]
492 ; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee_nsz:
494 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
495 ; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
496 ; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
497 ; GISEL-NEXT: s_setpc_b64 s[30:31]
498 %sub = fsub nsz <2 x half> <half -0.0, half -0.0>, %v0
499 %mul = fmul nsz <2 x half> %sub, %v1
503 define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz_nsz(<2 x half> %v0, <2 x half> %v1) #1 {
504 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz_nsz:
506 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507 ; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
508 ; SDAG-NEXT: s_setpc_b64 s[30:31]
510 ; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz_nsz:
512 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
513 ; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
514 ; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
515 ; GISEL-NEXT: s_setpc_b64 s[30:31]
516 %sub = fsub nsz <2 x half> <half -0.0, half -0.0>, %v0
517 %mul = fmul nsz <2 x half> %sub, %v1
521 define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic(<2 x half> %v0, <2 x half> %v1) #2 {
522 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic:
524 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525 ; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
526 ; SDAG-NEXT: s_setpc_b64 s[30:31]
528 ; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic:
530 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
531 ; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
532 ; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
533 ; GISEL-NEXT: s_setpc_b64 s[30:31]
534 %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
535 %mul = fmul <2 x half> %sub, %v1
539 define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz(<2 x half> %v0, <2 x half> %v1) #2 {
540 ; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz:
542 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543 ; SDAG-NEXT: v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
544 ; SDAG-NEXT: s_setpc_b64 s[30:31]
546 ; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz:
548 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
549 ; GISEL-NEXT: v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
550 ; GISEL-NEXT: v_pk_mul_f16 v0, v0, v1
551 ; GISEL-NEXT: s_setpc_b64 s[30:31]
552 %sub = fsub nsz <2 x half> <half -0.0, half -0.0>, %v0
553 %mul = fmul nsz <2 x half> %sub, %v1
557 define double @fold_f64_fsub_into_fneg_modifier_ieee(double %v0, double %v1) #0 {
558 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_ieee:
560 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
561 ; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
562 ; SDAG-NEXT: s_setpc_b64 s[30:31]
564 ; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_ieee:
566 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
567 ; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
568 ; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
569 ; GISEL-NEXT: s_setpc_b64 s[30:31]
570 %sub = fsub double -0.0, %v0
571 %mul = fmul double %sub, %v1
575 define double @fold_f64_fsub_into_fneg_modifier_daz(double %v0, double %v1) #1 {
576 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_daz:
578 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
579 ; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
580 ; SDAG-NEXT: s_setpc_b64 s[30:31]
582 ; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_daz:
584 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
586 ; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
587 ; GISEL-NEXT: s_setpc_b64 s[30:31]
588 %sub = fsub double -0.0, %v0
589 %mul = fmul double %sub, %v1
593 define double @fold_f64_fsub_into_fneg_modifier_ieee_nsz(double %v0, double %v1) #0 {
594 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_ieee_nsz:
596 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
598 ; SDAG-NEXT: s_setpc_b64 s[30:31]
600 ; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_ieee_nsz:
602 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
603 ; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
604 ; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
605 ; GISEL-NEXT: s_setpc_b64 s[30:31]
606 %sub = fsub nsz double -0.0, %v0
607 %mul = fmul nsz double %sub, %v1
611 define double @fold_f64_fsub_into_fneg_modifier_daz_nsz(double %v0, double %v1) #1 {
612 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_daz_nsz:
614 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
615 ; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
616 ; SDAG-NEXT: s_setpc_b64 s[30:31]
618 ; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_daz_nsz:
620 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621 ; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
622 ; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
623 ; GISEL-NEXT: s_setpc_b64 s[30:31]
624 %sub = fsub nsz double -0.0, %v0
625 %mul = fmul nsz double %sub, %v1
629 define double @fold_f64_fsub_into_fneg_modifier_dynamic(double %v0, double %v1) #2 {
630 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic:
632 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633 ; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
634 ; SDAG-NEXT: s_setpc_b64 s[30:31]
636 ; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic:
638 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
639 ; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
640 ; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
641 ; GISEL-NEXT: s_setpc_b64 s[30:31]
642 %sub = fsub double -0.0, %v0
643 %mul = fmul double %sub, %v1
647 define double @fold_f64_fsub_into_fneg_modifier_dynamic_nsz(double %v0, double %v1) #2 {
648 ; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic_nsz:
650 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
651 ; SDAG-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
652 ; SDAG-NEXT: s_setpc_b64 s[30:31]
654 ; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic_nsz:
656 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GISEL-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1]
658 ; GISEL-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3]
659 ; GISEL-NEXT: s_setpc_b64 s[30:31]
660 %sub = fsub nsz double -0.0, %v0
661 %mul = fmul nsz double %sub, %v1
665 define float @fold_f32_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, float %v0, float %v1) #0 {
666 ; SDAG-LABEL: fold_f32_select_user_fsub_into_fneg_modifier_ieee:
668 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
669 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
670 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
671 ; SDAG-NEXT: v_cndmask_b32_e64 v0, v2, -v1, vcc
672 ; SDAG-NEXT: s_setpc_b64 s[30:31]
674 ; GISEL-LABEL: fold_f32_select_user_fsub_into_fneg_modifier_ieee:
676 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
677 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
678 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
679 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
680 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
681 ; GISEL-NEXT: s_setpc_b64 s[30:31]
682 %sub = fsub float -0.0, %v0
683 %mul = select i1 %cond, float %sub, float %v1
687 define float @no_fold_f32_select_user_fsub_into_fneg_modifier_daz(i1 %cond, float %v0, float %v1) #1 {
688 ; SDAG-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_daz:
690 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
691 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
692 ; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1
693 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
694 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
695 ; SDAG-NEXT: s_setpc_b64 s[30:31]
697 ; GISEL-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_daz:
699 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
700 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
701 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
702 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
703 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
704 ; GISEL-NEXT: s_setpc_b64 s[30:31]
705 %sub = fsub float -0.0, %v0
706 %mul = select i1 %cond, float %sub, float %v1
710 define float @no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, float %v0, float %v1) #2 {
711 ; SDAG-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic:
713 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
714 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
715 ; SDAG-NEXT: v_sub_f32_e32 v1, 0x80000000, v1
716 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
717 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
718 ; SDAG-NEXT: s_setpc_b64 s[30:31]
720 ; GISEL-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic:
722 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
723 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
724 ; GISEL-NEXT: v_max_f32_e64 v1, -v1, -v1
725 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
726 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
727 ; GISEL-NEXT: s_setpc_b64 s[30:31]
728 %sub = fsub float -0.0, %v0
729 %mul = select i1 %cond, float %sub, float %v1
733 define half @fold_f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, half %v0, half %v1) #0 {
734 ; SDAG-LABEL: fold_f16_select_user_fsub_into_fneg_modifier_ieee:
736 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
737 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
738 ; SDAG-NEXT: v_xor_b32_e32 v1, 0x8000, v1
739 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
740 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
741 ; SDAG-NEXT: s_setpc_b64 s[30:31]
743 ; GISEL-LABEL: fold_f16_select_user_fsub_into_fneg_modifier_ieee:
745 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
746 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
747 ; GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
748 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
749 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
750 ; GISEL-NEXT: s_setpc_b64 s[30:31]
751 %sub = fsub half -0.0, %v0
752 %mul = select i1 %cond, half %sub, half %v1
756 define half @no_fold_f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, half %v0, half %v1) #1 {
757 ; SDAG-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_daz:
759 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
760 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
761 ; SDAG-NEXT: v_sub_f16_e32 v1, 0x8000, v1
762 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
763 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
764 ; SDAG-NEXT: s_setpc_b64 s[30:31]
766 ; GISEL-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_daz:
768 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
769 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
770 ; GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
771 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
772 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
773 ; GISEL-NEXT: s_setpc_b64 s[30:31]
774 %sub = fsub half -0.0, %v0
775 %mul = select i1 %cond, half %sub, half %v1
779 define half @no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, half %v0, half %v1) #2 {
780 ; SDAG-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic:
782 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
783 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
784 ; SDAG-NEXT: v_sub_f16_e32 v1, 0x8000, v1
785 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
786 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
787 ; SDAG-NEXT: s_setpc_b64 s[30:31]
789 ; GISEL-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic:
791 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
792 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
793 ; GISEL-NEXT: v_max_f16_e64 v1, -v1, -v1
794 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
795 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
796 ; GISEL-NEXT: s_setpc_b64 s[30:31]
797 %sub = fsub half -0.0, %v0
798 %mul = select i1 %cond, half %sub, half %v1
802 define double @fold_f64_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, double %v0, double %v1) #0 {
803 ; SDAG-LABEL: fold_f64_select_user_fsub_into_fneg_modifier_ieee:
805 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
806 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
807 ; SDAG-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
808 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
809 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
810 ; SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
811 ; SDAG-NEXT: s_setpc_b64 s[30:31]
813 ; GISEL-LABEL: fold_f64_select_user_fsub_into_fneg_modifier_ieee:
815 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816 ; GISEL-NEXT: v_max_f64 v[1:2], -v[1:2], -v[1:2]
817 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
818 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
819 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
820 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
821 ; GISEL-NEXT: s_setpc_b64 s[30:31]
822 %sub = fsub double -0.0, %v0
823 %mul = select i1 %cond, double %sub, double %v1
827 define double @no_fold_f64_select_user_fsub_into_fneg_modifier_daz(i1 %cond, double %v0, double %v1) #1 {
828 ; SDAG-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_daz:
830 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
831 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
832 ; SDAG-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
833 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
834 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
835 ; SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
836 ; SDAG-NEXT: s_setpc_b64 s[30:31]
838 ; GISEL-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_daz:
840 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
841 ; GISEL-NEXT: v_max_f64 v[1:2], -v[1:2], -v[1:2]
842 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
843 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
844 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
845 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
846 ; GISEL-NEXT: s_setpc_b64 s[30:31]
847 %sub = fsub double -0.0, %v0
848 %mul = select i1 %cond, double %sub, double %v1
852 define double @no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, double %v0, double %v1) #2 {
853 ; SDAG-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic:
855 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
856 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
857 ; SDAG-NEXT: v_xor_b32_e32 v2, 0x80000000, v2
858 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
859 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
860 ; SDAG-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
861 ; SDAG-NEXT: s_setpc_b64 s[30:31]
863 ; GISEL-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic:
865 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866 ; GISEL-NEXT: v_max_f64 v[1:2], -v[1:2], -v[1:2]
867 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
868 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
869 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc
870 ; GISEL-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc
871 ; GISEL-NEXT: s_setpc_b64 s[30:31]
872 %sub = fsub double -0.0, %v0
873 %mul = select i1 %cond, double %sub, double %v1
877 define <2 x half> @fold_v2f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, <2 x half> %v0, <2 x half> %v1) #0 {
878 ; SDAG-LABEL: fold_v2f16_select_user_fsub_into_fneg_modifier_ieee:
880 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
881 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
882 ; SDAG-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
883 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
884 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
885 ; SDAG-NEXT: s_setpc_b64 s[30:31]
887 ; GISEL-LABEL: fold_v2f16_select_user_fsub_into_fneg_modifier_ieee:
889 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
890 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
891 ; GISEL-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1]
892 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
893 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
894 ; GISEL-NEXT: s_setpc_b64 s[30:31]
895 %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
896 %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1
900 define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, <2 x half> %v0, <2 x half> %v1) #1 {
901 ; SDAG-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz:
903 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
904 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
905 ; SDAG-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
906 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
907 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
908 ; SDAG-NEXT: s_setpc_b64 s[30:31]
910 ; GISEL-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz:
912 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
913 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
914 ; GISEL-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1]
915 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
916 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
917 ; GISEL-NEXT: s_setpc_b64 s[30:31]
918 %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
919 %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1
923 define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, <2 x half> %v0, <2 x half> %v1) #2 {
924 ; SDAG-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic:
926 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
927 ; SDAG-NEXT: v_and_b32_e32 v0, 1, v0
928 ; SDAG-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
929 ; SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
930 ; SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
931 ; SDAG-NEXT: s_setpc_b64 s[30:31]
933 ; GISEL-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic:
935 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936 ; GISEL-NEXT: v_and_b32_e32 v0, 1, v0
937 ; GISEL-NEXT: v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1]
938 ; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
939 ; GISEL-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc
940 ; GISEL-NEXT: s_setpc_b64 s[30:31]
941 %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
942 %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1
946 define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #3 {
947 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee:
949 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
950 ; CHECK-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
951 ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
952 ; CHECK-NEXT: s_setpc_b64 s[30:31]
953 %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict")
954 %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict")
958 define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #4 {
959 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz:
961 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962 ; CHECK-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
963 ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
964 ; CHECK-NEXT: s_setpc_b64 s[30:31]
965 %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict")
966 %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict")
970 define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #5 {
971 ; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic:
973 ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
974 ; CHECK-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
975 ; CHECK-NEXT: v_mul_f32_e32 v0, v0, v1
976 ; CHECK-NEXT: s_setpc_b64 s[30:31]
977 %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict")
978 %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict")
982 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee(float %v0) #0 {
983 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee:
985 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
986 ; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, 1
987 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
988 ; SDAG-NEXT: s_setpc_b64 s[30:31]
990 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee:
992 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
993 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
994 ; GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1
995 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
996 ; GISEL-NEXT: s_setpc_b64 s[30:31]
997 %sub = fsub float -0.0, %v0
998 %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1)
1002 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz(float %v0) #1 {
1003 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz:
1005 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1006 ; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
1007 ; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1
1008 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1009 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1011 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz:
1013 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1014 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1015 ; GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1
1016 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1017 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1018 %sub = fsub float -0.0, %v0
1019 %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1)
1023 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic(float %v0) #2 {
1024 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic:
1026 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1027 ; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
1028 ; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1
1029 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1030 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1032 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic:
1034 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1035 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1036 ; GISEL-NEXT: v_cmp_class_f32_e64 s[4:5], v0, 1
1037 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1038 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1039 %sub = fsub float -0.0, %v0
1040 %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1)
1044 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee(float %v0) #0 {
1045 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee:
1047 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1048 ; SDAG-NEXT: v_mov_b32_e32 v1, 0x90
1049 ; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, v1
1050 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1051 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1053 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee:
1055 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1056 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1057 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x90
1058 ; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1059 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1060 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1061 %sub = fsub float -0.0, %v0
1062 %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144)
1066 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz(float %v0) #1 {
1067 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz:
1069 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1070 ; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
1071 ; SDAG-NEXT: v_mov_b32_e32 v1, 0x90
1072 ; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1073 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1074 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1076 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz:
1078 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1079 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1080 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x90
1081 ; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1082 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1083 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1084 %sub = fsub float -0.0, %v0
1085 %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144)
1089 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic(float %v0) #2 {
1090 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic:
1092 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1093 ; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
1094 ; SDAG-NEXT: v_mov_b32_e32 v1, 0x90
1095 ; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1096 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1097 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1099 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic:
1101 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1102 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1103 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x90
1104 ; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1105 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1106 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1107 %sub = fsub float -0.0, %v0
1108 %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144)
1112 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_ieee(float %v0, i32 %testmask) #0 {
1113 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_ieee:
1115 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1116 ; SDAG-NEXT: v_cmp_class_f32_e64 s[4:5], -v0, v1
1117 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1118 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1120 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_ieee:
1122 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1123 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1124 ; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1125 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1126 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1127 %sub = fsub float -0.0, %v0
1128 %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask)
1132 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_daz(float %v0, i32 %testmask) #1 {
1133 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_daz:
1135 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1136 ; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
1137 ; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1138 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1139 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1141 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_daz:
1143 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1144 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1145 ; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1146 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1147 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1148 %sub = fsub float -0.0, %v0
1149 %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask)
1153 define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic(float %v0, i32 %testmask) #2 {
1154 ; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic:
1156 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1157 ; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
1158 ; SDAG-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1159 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1160 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1162 ; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic:
1164 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1165 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1166 ; GISEL-NEXT: v_cmp_class_f32_e32 vcc, v0, v1
1167 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1168 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1169 %sub = fsub float -0.0, %v0
1170 %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask)
1174 define i1 @no_fold_f64_fsub_into_fneg_modifier_class_var_daz(double %v0, i32 %testmask) #1 {
1175 ; SDAG-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_var_daz:
1177 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1178 ; SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], -v[0:1], v2
1179 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1180 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1182 ; GISEL-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_var_daz:
1184 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185 ; GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
1186 ; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2
1187 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1188 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1189 %sub = fsub double -0.0, %v0
1190 %class = call i1 @llvm.amdgcn.class.f64(double %sub, i32 %testmask)
1194 define i1 @no_fold_f16_fsub_into_fneg_modifier_class_var_daz(half %v0, i32 %testmask) #1 {
1195 ; SDAG-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_var_daz:
1197 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198 ; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0
1199 ; SDAG-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
1200 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1201 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1203 ; GISEL-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_var_daz:
1205 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1206 ; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
1207 ; GISEL-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
1208 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1209 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1210 %sub = fsub half -0.0, %v0
1211 %class = call i1 @llvm.amdgcn.class.f16(half %sub, i32 %testmask)
1215 define i1 @no_fold_f64_fsub_into_fneg_modifier_class_daz(double %v0) #1 {
1216 ; SDAG-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_daz:
1218 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1219 ; SDAG-NEXT: v_mov_b32_e32 v2, 0x90
1220 ; SDAG-NEXT: v_cmp_class_f64_e64 s[4:5], -v[0:1], v2
1221 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
1222 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1224 ; GISEL-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_daz:
1226 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1227 ; GISEL-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1]
1228 ; GISEL-NEXT: v_mov_b32_e32 v2, 0x90
1229 ; GISEL-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v2
1230 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1231 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1232 %sub = fsub double -0.0, %v0
1233 %class = call i1 @llvm.is.fpclass.f64(double %sub, i32 144)
1237 define i1 @no_fold_f16_fsub_into_fneg_modifier_class_daz(half %v0) #1 {
1238 ; SDAG-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_daz:
1240 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1241 ; SDAG-NEXT: v_sub_f16_e32 v0, 0x8000, v0
1242 ; SDAG-NEXT: v_mov_b32_e32 v1, 0x90
1243 ; SDAG-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
1244 ; SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1245 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1247 ; GISEL-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_daz:
1249 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1250 ; GISEL-NEXT: v_max_f16_e64 v0, -v0, -v0
1251 ; GISEL-NEXT: v_mov_b32_e32 v1, 0x90
1252 ; GISEL-NEXT: v_cmp_class_f16_e32 vcc, v0, v1
1253 ; GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
1254 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1255 %sub = fsub half -0.0, %v0
1256 %class = call i1 @llvm.is.fpclass.f16(half %sub, i32 144)
1260 define amdgpu_gfx float @fold_f32_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %v1) #1 {
1261 ; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_interp_daz:
1263 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1264 ; SDAG-NEXT: v_sub_f32_e32 v0, 0x80000000, v0
1265 ; SDAG-NEXT: s_mov_b32 m0, s4
1266 ; SDAG-NEXT: s_nop 0
1267 ; SDAG-NEXT: v_interp_p1_f32_e32 v0, v0, attr0.x
1268 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1270 ; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_interp_daz:
1272 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1273 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1274 ; GISEL-NEXT: s_mov_b32 m0, s4
1275 ; GISEL-NEXT: s_nop 0
1276 ; GISEL-NEXT: v_interp_p1_f32_e32 v0, v0, attr0.x
1277 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1278 %sub = fsub float -0.0, %v0
1279 %p0_0 = call float @llvm.amdgcn.interp.p1(float %sub, i32 0, i32 0, i32 %v1)
1283 define amdgpu_gfx float @fold_f16_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %m0) #1 {
1284 ; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:
1286 ; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1287 ; SDAG-NEXT: s_mov_b32 m0, s4
1288 ; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
1289 ; SDAG-NEXT: v_interp_p1ll_f16 v0, -v0, attr2.y
1290 ; SDAG-NEXT: s_setpc_b64 s[30:31]
1292 ; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:
1294 ; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1295 ; GISEL-NEXT: v_max_f32_e64 v0, -v0, -v0
1296 ; GISEL-NEXT: s_mov_b32 m0, s4
1297 ; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
1298 ; GISEL-NEXT: v_interp_p1ll_f16 v0, v0, attr2.y
1299 ; GISEL-NEXT: s_setpc_b64 s[30:31]
1300 %sub = fsub float -0.0, %v0
1301 %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %sub, i32 1, i32 2, i1 0, i32 %m0)
1305 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
1306 declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
1307 declare i1 @llvm.is.fpclass.f32(float, i32 immarg)
1308 declare i1 @llvm.amdgcn.class.f32(float, i32)
1309 declare i1 @llvm.is.fpclass.f64(double, i32 immarg)
1310 declare i1 @llvm.amdgcn.class.f64(double, i32)
1311 declare i1 @llvm.is.fpclass.f16(half, i32 immarg)
1312 declare i1 @llvm.amdgcn.class.f16(half, i32)
1313 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32)
1314 declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
1316 attributes #0 = { "denormal-fp-math"="ieee,ieee" }
1317 attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
1318 attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }
1319 attributes #3 = { "denormal-fp-math"="ieee,ieee" strictfp }
1320 attributes #4 = { "denormal-fp-math"="preserve-sign,preserve-sign" strictfp }
1321 attributes #5 = { "denormal-fp-math"="dynamic,dynamic" strictfp }