1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG
4 ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF
5 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
6 ; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
8 ; Test FP transforms using instruction/node-level fast-math-flags.
9 ; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
10 ; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.
12 declare float @llvm.fma.f32(float, float, float)
13 declare float @llvm.sqrt.f32(float)
15 ; X * Y + Z --> fma(X, Y, Z)
17 ; contract bits in fmul is checked.
19 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
20 ; FMFDEBUG-NOT: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
21 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
23 define float @fmul_fadd_contract1(float %x, float %y, float %z) {
24 ; FMF-LABEL: fmul_fadd_contract1:
26 ; FMF-NEXT: xsmulsp 0, 1, 2
27 ; FMF-NEXT: xsaddsp 1, 0, 3
30 ; GLOBAL-LABEL: fmul_fadd_contract1:
32 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2
33 ; GLOBAL-NEXT: fmr 1, 3
35 %mul = fmul float %x, %y
36 %add = fadd contract float %mul, %z
40 ; contract bits in fadd is also checked.
42 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
43 ; FMFDEBUG-NOT: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
44 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
46 define float @fmul_fadd_contract2(float %x, float %y, float %z) {
47 ; FMF-LABEL: fmul_fadd_contract2:
49 ; FMF-NEXT: xsmulsp 0, 1, 2
50 ; FMF-NEXT: xsaddsp 1, 0, 3
53 ; GLOBAL-LABEL: fmul_fadd_contract2:
55 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2
56 ; GLOBAL-NEXT: fmr 1, 3
58 %mul = fmul contract float %x, %y
59 %add = fadd float %mul, %z
63 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract3:'
64 ; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
65 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract3:'
67 define float @fmul_fadd_contract3(float %x, float %y, float %z) {
68 ; FMF-LABEL: fmul_fadd_contract3:
70 ; FMF-NEXT: xsmaddasp 3, 1, 2
74 ; GLOBAL-LABEL: fmul_fadd_contract3:
76 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2
77 ; GLOBAL-NEXT: fmr 1, 3
79 %mul = fmul contract float %x, %y
80 %add = fadd contract float %mul, %z
84 ; Reassociation does NOT imply that FMA contraction is allowed.
86 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
87 ; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
88 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
90 define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
91 ; FMF-LABEL: fmul_fadd_reassoc1:
93 ; FMF-NEXT: xsmulsp 0, 1, 2
94 ; FMF-NEXT: xsaddsp 1, 0, 3
97 ; GLOBAL-LABEL: fmul_fadd_reassoc1:
99 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2
100 ; GLOBAL-NEXT: fmr 1, 3
102 %mul = fmul float %x, %y
103 %add = fadd reassoc float %mul, %z
107 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
108 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
109 ; FMFDEBUG-NOT: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
110 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
112 define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
113 ; FMF-LABEL: fmul_fadd_reassoc2:
115 ; FMF-NEXT: xsmulsp 0, 1, 2
116 ; FMF-NEXT: xsaddsp 1, 0, 3
119 ; GLOBAL-LABEL: fmul_fadd_reassoc2:
121 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2
122 ; GLOBAL-NEXT: fmr 1, 3
124 %mul = fmul reassoc float %x, %y
125 %add = fadd reassoc float %mul, %z
129 ; The fadd is now fully 'fast', but fmul is not yet.
131 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
132 ; FMFDEBUG-NOT: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
133 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
135 define float @fmul_fadd_fast1(float %x, float %y, float %z) {
136 ; FMF-LABEL: fmul_fadd_fast1:
138 ; FMF-NEXT: xsmulsp 0, 1, 2
139 ; FMF-NEXT: xsaddsp 1, 0, 3
142 ; GLOBAL-LABEL: fmul_fadd_fast1:
144 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2
145 ; GLOBAL-NEXT: fmr 1, 3
147 %mul = fmul float %x, %y
148 %add = fadd fast float %mul, %z
152 ; This implies that contraction is allowed - the intermediate fmul result is now also flagged.
154 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
155 ; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
156 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
158 define float @fmul_fadd_fast2(float %x, float %y, float %z) {
159 ; FMF-LABEL: fmul_fadd_fast2:
161 ; FMF-NEXT: xsmaddasp 3, 1, 2
165 ; GLOBAL-LABEL: fmul_fadd_fast2:
167 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2
168 ; GLOBAL-NEXT: fmr 1, 3
170 %mul = fmul fast float %x, %y
171 %add = fadd fast float %mul, %z
175 ; fma(X, 7.0, X * 42.0) --> X * 49.0
176 ; This is the minimum FMF needed for this transform - the FMA allows reassociation.
178 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
179 ; FMFDEBUG: fmul reassoc {{t[0-9]+}},
180 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
182 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
183 ; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
184 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
186 define float @fmul_fma_reassoc1(float %x) {
187 ; FMF-LABEL: fmul_fma_reassoc1:
189 ; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha
190 ; FMF-NEXT: lfs 0, .LCPI7_0@toc@l(3)
191 ; FMF-NEXT: xsmulsp 1, 1, 0
194 ; GLOBAL-LABEL: fmul_fma_reassoc1:
196 ; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha
197 ; GLOBAL-NEXT: lfs 0, .LCPI7_0@toc@l(3)
198 ; GLOBAL-NEXT: xsmulsp 1, 1, 0
200 %mul = fmul float %x, 42.0
201 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
205 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
207 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
208 ; FMFDEBUG: fmul reassoc {{t[0-9]+}}
209 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
211 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
212 ; GLOBALDEBUG: fmul reassoc {{t[0-9]+}}
213 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
215 define float @fmul_fma_reassoc2(float %x) {
216 ; FMF-LABEL: fmul_fma_reassoc2:
218 ; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha
219 ; FMF-NEXT: lfs 0, .LCPI8_0@toc@l(3)
220 ; FMF-NEXT: xsmulsp 1, 1, 0
223 ; GLOBAL-LABEL: fmul_fma_reassoc2:
225 ; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha
226 ; GLOBAL-NEXT: lfs 0, .LCPI8_0@toc@l(3)
227 ; GLOBAL-NEXT: xsmulsp 1, 1, 0
229 %mul = fmul reassoc float %x, 42.0
230 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
234 ; The FMA is now fully 'fast'. This implies that reassociation is allowed.
236 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
237 ; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
238 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
240 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
241 ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
242 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
244 define float @fmul_fma_fast1(float %x) {
245 ; FMF-LABEL: fmul_fma_fast1:
247 ; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha
248 ; FMF-NEXT: lfs 0, .LCPI9_0@toc@l(3)
249 ; FMF-NEXT: xsmulsp 1, 1, 0
252 ; GLOBAL-LABEL: fmul_fma_fast1:
254 ; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha
255 ; GLOBAL-NEXT: lfs 0, .LCPI9_0@toc@l(3)
256 ; GLOBAL-NEXT: xsmulsp 1, 1, 0
258 %mul = fmul float %x, 42.0
259 %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
263 ; This shouldn't change anything - the intermediate fmul result is now also flagged.
265 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
266 ; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
267 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
269 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
270 ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}
271 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
273 define float @fmul_fma_fast2(float %x) {
274 ; FMF-LABEL: fmul_fma_fast2:
276 ; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha
277 ; FMF-NEXT: lfs 0, .LCPI10_0@toc@l(3)
278 ; FMF-NEXT: xsmulsp 1, 1, 0
281 ; GLOBAL-LABEL: fmul_fma_fast2:
283 ; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha
284 ; GLOBAL-NEXT: lfs 0, .LCPI10_0@toc@l(3)
285 ; GLOBAL-NEXT: xsmulsp 1, 1, 0
287 %mul = fmul fast float %x, 42.0
288 %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul)
292 ; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
294 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
295 ; FMFDEBUG: fmul ninf afn {{t[0-9]+}}
296 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
298 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
299 ; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}}
300 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
302 define float @sqrt_afn_ieee(float %x) #0 {
303 ; FMF-LABEL: sqrt_afn_ieee:
305 ; FMF-NEXT: xsabsdp 0, 1
306 ; FMF-NEXT: addis 3, 2, .LCPI11_1@toc@ha
307 ; FMF-NEXT: lfs 2, .LCPI11_1@toc@l(3)
308 ; FMF-NEXT: fcmpu 0, 0, 2
309 ; FMF-NEXT: xxlxor 0, 0, 0
310 ; FMF-NEXT: blt 0, .LBB11_2
312 ; FMF-NEXT: xsrsqrtesp 0, 1
313 ; FMF-NEXT: vspltisw 2, -3
314 ; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha
315 ; FMF-NEXT: lfs 3, .LCPI11_0@toc@l(3)
316 ; FMF-NEXT: xvcvsxwdp 2, 34
317 ; FMF-NEXT: xsmulsp 1, 1, 0
318 ; FMF-NEXT: xsmulsp 0, 1, 0
319 ; FMF-NEXT: xsmulsp 1, 1, 3
320 ; FMF-NEXT: xsaddsp 0, 0, 2
321 ; FMF-NEXT: xsmulsp 0, 1, 0
322 ; FMF-NEXT: .LBB11_2:
326 ; GLOBAL-LABEL: sqrt_afn_ieee:
328 ; GLOBAL-NEXT: xsabsdp 0, 1
329 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_1@toc@ha
330 ; GLOBAL-NEXT: lfs 2, .LCPI11_1@toc@l(3)
331 ; GLOBAL-NEXT: fcmpu 0, 0, 2
332 ; GLOBAL-NEXT: xxlxor 0, 0, 0
333 ; GLOBAL-NEXT: blt 0, .LBB11_2
334 ; GLOBAL-NEXT: # %bb.1:
335 ; GLOBAL-NEXT: xsrsqrtesp 0, 1
336 ; GLOBAL-NEXT: vspltisw 2, -3
337 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha
338 ; GLOBAL-NEXT: lfs 3, .LCPI11_0@toc@l(3)
339 ; GLOBAL-NEXT: xvcvsxwdp 2, 34
340 ; GLOBAL-NEXT: xsmulsp 1, 1, 0
341 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0
342 ; GLOBAL-NEXT: xsmulsp 0, 1, 3
343 ; GLOBAL-NEXT: xsmulsp 0, 0, 2
344 ; GLOBAL-NEXT: .LBB11_2:
345 ; GLOBAL-NEXT: fmr 1, 0
347 %rt = call afn ninf float @llvm.sqrt.f32(float %x)
351 define float @sqrt_afn_ieee_inf(float %x) #0 {
352 ; FMF-LABEL: sqrt_afn_ieee_inf:
354 ; FMF-NEXT: xssqrtsp 1, 1
357 ; GLOBAL-LABEL: sqrt_afn_ieee_inf:
359 ; GLOBAL-NEXT: xssqrtsp 1, 1
361 %rt = call afn float @llvm.sqrt.f32(float %x)
365 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
366 ; FMFDEBUG: fmul ninf afn {{t[0-9]+}}
367 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
369 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
370 ; GLOBALDEBUG: fmul ninf afn {{t[0-9]+}}
371 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
373 define float @sqrt_afn_preserve_sign(float %x) #1 {
374 ; FMF-LABEL: sqrt_afn_preserve_sign:
376 ; FMF-NEXT: xxlxor 0, 0, 0
377 ; FMF-NEXT: fcmpu 0, 1, 0
378 ; FMF-NEXT: beq 0, .LBB13_2
380 ; FMF-NEXT: xsrsqrtesp 0, 1
381 ; FMF-NEXT: vspltisw 2, -3
382 ; FMF-NEXT: addis 3, 2, .LCPI13_0@toc@ha
383 ; FMF-NEXT: lfs 3, .LCPI13_0@toc@l(3)
384 ; FMF-NEXT: xvcvsxwdp 2, 34
385 ; FMF-NEXT: xsmulsp 1, 1, 0
386 ; FMF-NEXT: xsmulsp 0, 1, 0
387 ; FMF-NEXT: xsmulsp 1, 1, 3
388 ; FMF-NEXT: xsaddsp 0, 0, 2
389 ; FMF-NEXT: xsmulsp 0, 1, 0
390 ; FMF-NEXT: .LBB13_2:
394 ; GLOBAL-LABEL: sqrt_afn_preserve_sign:
396 ; GLOBAL-NEXT: xxlxor 0, 0, 0
397 ; GLOBAL-NEXT: fcmpu 0, 1, 0
398 ; GLOBAL-NEXT: beq 0, .LBB13_2
399 ; GLOBAL-NEXT: # %bb.1:
400 ; GLOBAL-NEXT: xsrsqrtesp 0, 1
401 ; GLOBAL-NEXT: vspltisw 2, -3
402 ; GLOBAL-NEXT: addis 3, 2, .LCPI13_0@toc@ha
403 ; GLOBAL-NEXT: lfs 3, .LCPI13_0@toc@l(3)
404 ; GLOBAL-NEXT: xvcvsxwdp 2, 34
405 ; GLOBAL-NEXT: xsmulsp 1, 1, 0
406 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0
407 ; GLOBAL-NEXT: xsmulsp 0, 1, 3
408 ; GLOBAL-NEXT: xsmulsp 0, 0, 2
409 ; GLOBAL-NEXT: .LBB13_2:
410 ; GLOBAL-NEXT: fmr 1, 0
412 %rt = call afn ninf float @llvm.sqrt.f32(float %x)
416 define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
417 ; FMF-LABEL: sqrt_afn_preserve_sign_inf:
419 ; FMF-NEXT: xssqrtsp 1, 1
422 ; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf:
424 ; GLOBAL-NEXT: xssqrtsp 1, 1
426 %rt = call afn float @llvm.sqrt.f32(float %x)
430 ; The call is now fully 'fast'. This implies that approximation is allowed.
432 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
433 ; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}}
434 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
436 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
437 ; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}}
438 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
440 define float @sqrt_fast_ieee(float %x) #0 {
441 ; FMF-LABEL: sqrt_fast_ieee:
443 ; FMF-NEXT: xsabsdp 0, 1
444 ; FMF-NEXT: addis 3, 2, .LCPI15_1@toc@ha
445 ; FMF-NEXT: lfs 2, .LCPI15_1@toc@l(3)
446 ; FMF-NEXT: fcmpu 0, 0, 2
447 ; FMF-NEXT: xxlxor 0, 0, 0
448 ; FMF-NEXT: blt 0, .LBB15_2
450 ; FMF-NEXT: xsrsqrtesp 0, 1
451 ; FMF-NEXT: vspltisw 2, -3
452 ; FMF-NEXT: addis 3, 2, .LCPI15_0@toc@ha
453 ; FMF-NEXT: lfs 3, .LCPI15_0@toc@l(3)
454 ; FMF-NEXT: xvcvsxwdp 2, 34
455 ; FMF-NEXT: xsmulsp 1, 1, 0
456 ; FMF-NEXT: xsmaddasp 2, 1, 0
457 ; FMF-NEXT: xsmulsp 0, 1, 3
458 ; FMF-NEXT: xsmulsp 0, 0, 2
459 ; FMF-NEXT: .LBB15_2:
463 ; GLOBAL-LABEL: sqrt_fast_ieee:
465 ; GLOBAL-NEXT: xsabsdp 0, 1
466 ; GLOBAL-NEXT: addis 3, 2, .LCPI15_1@toc@ha
467 ; GLOBAL-NEXT: lfs 2, .LCPI15_1@toc@l(3)
468 ; GLOBAL-NEXT: fcmpu 0, 0, 2
469 ; GLOBAL-NEXT: xxlxor 0, 0, 0
470 ; GLOBAL-NEXT: blt 0, .LBB15_2
471 ; GLOBAL-NEXT: # %bb.1:
472 ; GLOBAL-NEXT: xsrsqrtesp 0, 1
473 ; GLOBAL-NEXT: vspltisw 2, -3
474 ; GLOBAL-NEXT: addis 3, 2, .LCPI15_0@toc@ha
475 ; GLOBAL-NEXT: lfs 3, .LCPI15_0@toc@l(3)
476 ; GLOBAL-NEXT: xvcvsxwdp 2, 34
477 ; GLOBAL-NEXT: xsmulsp 1, 1, 0
478 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0
479 ; GLOBAL-NEXT: xsmulsp 0, 1, 3
480 ; GLOBAL-NEXT: xsmulsp 0, 0, 2
481 ; GLOBAL-NEXT: .LBB15_2:
482 ; GLOBAL-NEXT: fmr 1, 0
484 %rt = call contract reassoc afn ninf float @llvm.sqrt.f32(float %x)
488 ; The call is now fully 'fast'. This implies that approximation is allowed.
490 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
491 ; FMFDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}}
492 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
494 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
495 ; GLOBALDEBUG: fmul ninf contract afn reassoc {{t[0-9]+}}
496 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
498 define float @sqrt_fast_preserve_sign(float %x) #1 {
499 ; FMF-LABEL: sqrt_fast_preserve_sign:
501 ; FMF-NEXT: xxlxor 0, 0, 0
502 ; FMF-NEXT: fcmpu 0, 1, 0
503 ; FMF-NEXT: beq 0, .LBB16_2
505 ; FMF-NEXT: xsrsqrtesp 0, 1
506 ; FMF-NEXT: vspltisw 2, -3
507 ; FMF-NEXT: addis 3, 2, .LCPI16_0@toc@ha
508 ; FMF-NEXT: lfs 3, .LCPI16_0@toc@l(3)
509 ; FMF-NEXT: xvcvsxwdp 2, 34
510 ; FMF-NEXT: xsmulsp 1, 1, 0
511 ; FMF-NEXT: xsmaddasp 2, 1, 0
512 ; FMF-NEXT: xsmulsp 0, 1, 3
513 ; FMF-NEXT: xsmulsp 0, 0, 2
514 ; FMF-NEXT: .LBB16_2:
518 ; GLOBAL-LABEL: sqrt_fast_preserve_sign:
520 ; GLOBAL-NEXT: xxlxor 0, 0, 0
521 ; GLOBAL-NEXT: fcmpu 0, 1, 0
522 ; GLOBAL-NEXT: beq 0, .LBB16_2
523 ; GLOBAL-NEXT: # %bb.1:
524 ; GLOBAL-NEXT: xsrsqrtesp 0, 1
525 ; GLOBAL-NEXT: vspltisw 2, -3
526 ; GLOBAL-NEXT: addis 3, 2, .LCPI16_0@toc@ha
527 ; GLOBAL-NEXT: lfs 3, .LCPI16_0@toc@l(3)
528 ; GLOBAL-NEXT: xvcvsxwdp 2, 34
529 ; GLOBAL-NEXT: xsmulsp 1, 1, 0
530 ; GLOBAL-NEXT: xsmaddasp 2, 1, 0
531 ; GLOBAL-NEXT: xsmulsp 0, 1, 3
532 ; GLOBAL-NEXT: xsmulsp 0, 0, 2
533 ; GLOBAL-NEXT: .LBB16_2:
534 ; GLOBAL-NEXT: fmr 1, 0
536 %rt = call contract reassoc ninf afn float @llvm.sqrt.f32(float %x)
540 ; fcmp can have fast-math-flags.
542 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
543 ; FMFDEBUG: select_cc nnan {{t[0-9]+}}
544 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
546 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
547 ; GLOBALDEBUG: select_cc nnan {{t[0-9]+}}
548 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
550 define double @fcmp_nnan(double %a, double %y, double %z) {
551 ; FMF-LABEL: fcmp_nnan:
553 ; FMF-NEXT: xxlxor 0, 0, 0
554 ; FMF-NEXT: xscmpudp 0, 1, 0
555 ; FMF-NEXT: blt 0, .LBB17_2
558 ; FMF-NEXT: .LBB17_2:
562 ; GLOBAL-LABEL: fcmp_nnan:
564 ; GLOBAL-NEXT: xxlxor 0, 0, 0
565 ; GLOBAL-NEXT: xscmpudp 0, 1, 0
566 ; GLOBAL-NEXT: blt 0, .LBB17_2
567 ; GLOBAL-NEXT: # %bb.1:
568 ; GLOBAL-NEXT: fmr 3, 2
569 ; GLOBAL-NEXT: .LBB17_2:
570 ; GLOBAL-NEXT: fmr 1, 3
572 %cmp = fcmp nnan ult double %a, 0.0
573 %z.y = select i1 %cmp, double %z, double %y
577 ; FP library calls can have fast-math-flags.
579 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
580 ; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<ptr @log2>
581 ; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
582 ; FMFDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1
583 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'
585 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
586 ; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<ptr @log2>
587 ; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
588 ; GLOBALDEBUG: f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1
589 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:'
591 declare double @log2(double)
592 define double @log2_approx(double %x) nounwind {
593 ; FMF-LABEL: log2_approx:
596 ; FMF-NEXT: stdu 1, -32(1)
597 ; FMF-NEXT: std 0, 48(1)
600 ; FMF-NEXT: addi 1, 1, 32
601 ; FMF-NEXT: ld 0, 16(1)
605 ; GLOBAL-LABEL: log2_approx:
607 ; GLOBAL-NEXT: mflr 0
608 ; GLOBAL-NEXT: stdu 1, -32(1)
609 ; GLOBAL-NEXT: std 0, 48(1)
610 ; GLOBAL-NEXT: bl log2
612 ; GLOBAL-NEXT: addi 1, 1, 32
613 ; GLOBAL-NEXT: ld 0, 16(1)
614 ; GLOBAL-NEXT: mtlr 0
616 %r = call afn double @log2(double %x)
620 ; -(X - Y) --> (Y - X)
622 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
623 ; FMFDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
624 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
626 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
627 ; GLOBALDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
628 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
630 define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) {
631 ; FMF-LABEL: fneg_fsub_nozeros_1:
633 ; FMF-NEXT: xssubsp 1, 2, 1
636 ; GLOBAL-LABEL: fneg_fsub_nozeros_1:
638 ; GLOBAL-NEXT: xssubsp 1, 2, 1
640 %neg = fsub float %x, %y
641 %add = fsub nsz float 0.0, %neg
645 attributes #0 = { "denormal-fp-math"="ieee,ieee" }
646 attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
647 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
649 ; GLOBALDEBUG: {{.*}}