1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE-X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 -O3 | FileCheck %s --check-prefixes=SSE-X64
4 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+fma -O3 | FileCheck %s --check-prefixes=AVX-X86
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+fma -O3 | FileCheck %s --check-prefixes=AVX-X64
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX-X86
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -mattr=+avx512vl -O3 | FileCheck %s --check-prefixes=AVX-X64
8 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=-sse -O3 | FileCheck %s --check-prefixes=X87
10 declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
11 declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
12 declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
13 declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
14 declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
15 declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
16 declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
17 declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
18 declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
19 declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
20 declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
21 declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
22 declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
23 declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
25 define double @fadd_f64(double %a, double %b) nounwind strictfp {
26 ; SSE-X86-LABEL: fadd_f64:
28 ; SSE-X86-NEXT: pushl %ebp
29 ; SSE-X86-NEXT: movl %esp, %ebp
30 ; SSE-X86-NEXT: andl $-8, %esp
31 ; SSE-X86-NEXT: subl $8, %esp
32 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
33 ; SSE-X86-NEXT: addsd 16(%ebp), %xmm0
34 ; SSE-X86-NEXT: movsd %xmm0, (%esp)
35 ; SSE-X86-NEXT: fldl (%esp)
37 ; SSE-X86-NEXT: movl %ebp, %esp
38 ; SSE-X86-NEXT: popl %ebp
41 ; SSE-X64-LABEL: fadd_f64:
43 ; SSE-X64-NEXT: addsd %xmm1, %xmm0
46 ; AVX-X86-LABEL: fadd_f64:
48 ; AVX-X86-NEXT: pushl %ebp
49 ; AVX-X86-NEXT: movl %esp, %ebp
50 ; AVX-X86-NEXT: andl $-8, %esp
51 ; AVX-X86-NEXT: subl $8, %esp
52 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
53 ; AVX-X86-NEXT: vaddsd 16(%ebp), %xmm0, %xmm0
54 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
55 ; AVX-X86-NEXT: fldl (%esp)
57 ; AVX-X86-NEXT: movl %ebp, %esp
58 ; AVX-X86-NEXT: popl %ebp
61 ; AVX-X64-LABEL: fadd_f64:
63 ; AVX-X64-NEXT: vaddsd %xmm1, %xmm0, %xmm0
66 ; X87-LABEL: fadd_f64:
68 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
69 ; X87-NEXT: faddl {{[0-9]+}}(%esp)
72 %ret = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b,
73 metadata !"round.dynamic",
74 metadata !"fpexcept.strict") #0
78 define float @fadd_f32(float %a, float %b) nounwind strictfp {
79 ; SSE-X86-LABEL: fadd_f32:
81 ; SSE-X86-NEXT: pushl %eax
82 ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
83 ; SSE-X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0
84 ; SSE-X86-NEXT: movss %xmm0, (%esp)
85 ; SSE-X86-NEXT: flds (%esp)
87 ; SSE-X86-NEXT: popl %eax
90 ; SSE-X64-LABEL: fadd_f32:
92 ; SSE-X64-NEXT: addss %xmm1, %xmm0
95 ; AVX-X86-LABEL: fadd_f32:
97 ; AVX-X86-NEXT: pushl %eax
98 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
99 ; AVX-X86-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0
100 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
101 ; AVX-X86-NEXT: flds (%esp)
103 ; AVX-X86-NEXT: popl %eax
106 ; AVX-X64-LABEL: fadd_f32:
108 ; AVX-X64-NEXT: vaddss %xmm1, %xmm0, %xmm0
111 ; X87-LABEL: fadd_f32:
113 ; X87-NEXT: flds {{[0-9]+}}(%esp)
114 ; X87-NEXT: fadds {{[0-9]+}}(%esp)
117 %ret = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b,
118 metadata !"round.dynamic",
119 metadata !"fpexcept.strict") #0
123 define double @fsub_f64(double %a, double %b) nounwind strictfp {
124 ; SSE-X86-LABEL: fsub_f64:
126 ; SSE-X86-NEXT: pushl %ebp
127 ; SSE-X86-NEXT: movl %esp, %ebp
128 ; SSE-X86-NEXT: andl $-8, %esp
129 ; SSE-X86-NEXT: subl $8, %esp
130 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
131 ; SSE-X86-NEXT: subsd 16(%ebp), %xmm0
132 ; SSE-X86-NEXT: movsd %xmm0, (%esp)
133 ; SSE-X86-NEXT: fldl (%esp)
135 ; SSE-X86-NEXT: movl %ebp, %esp
136 ; SSE-X86-NEXT: popl %ebp
139 ; SSE-X64-LABEL: fsub_f64:
141 ; SSE-X64-NEXT: subsd %xmm1, %xmm0
144 ; AVX-X86-LABEL: fsub_f64:
146 ; AVX-X86-NEXT: pushl %ebp
147 ; AVX-X86-NEXT: movl %esp, %ebp
148 ; AVX-X86-NEXT: andl $-8, %esp
149 ; AVX-X86-NEXT: subl $8, %esp
150 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
151 ; AVX-X86-NEXT: vsubsd 16(%ebp), %xmm0, %xmm0
152 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
153 ; AVX-X86-NEXT: fldl (%esp)
155 ; AVX-X86-NEXT: movl %ebp, %esp
156 ; AVX-X86-NEXT: popl %ebp
159 ; AVX-X64-LABEL: fsub_f64:
161 ; AVX-X64-NEXT: vsubsd %xmm1, %xmm0, %xmm0
164 ; X87-LABEL: fsub_f64:
166 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
167 ; X87-NEXT: fsubl {{[0-9]+}}(%esp)
170 %ret = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b,
171 metadata !"round.dynamic",
172 metadata !"fpexcept.strict") #0
176 define float @fsub_f32(float %a, float %b) nounwind strictfp {
177 ; SSE-X86-LABEL: fsub_f32:
179 ; SSE-X86-NEXT: pushl %eax
180 ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
181 ; SSE-X86-NEXT: subss {{[0-9]+}}(%esp), %xmm0
182 ; SSE-X86-NEXT: movss %xmm0, (%esp)
183 ; SSE-X86-NEXT: flds (%esp)
185 ; SSE-X86-NEXT: popl %eax
188 ; SSE-X64-LABEL: fsub_f32:
190 ; SSE-X64-NEXT: subss %xmm1, %xmm0
193 ; AVX-X86-LABEL: fsub_f32:
195 ; AVX-X86-NEXT: pushl %eax
196 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
197 ; AVX-X86-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0
198 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
199 ; AVX-X86-NEXT: flds (%esp)
201 ; AVX-X86-NEXT: popl %eax
204 ; AVX-X64-LABEL: fsub_f32:
206 ; AVX-X64-NEXT: vsubss %xmm1, %xmm0, %xmm0
209 ; X87-LABEL: fsub_f32:
211 ; X87-NEXT: flds {{[0-9]+}}(%esp)
212 ; X87-NEXT: fsubs {{[0-9]+}}(%esp)
215 %ret = call float @llvm.experimental.constrained.fsub.f32(float %a, float %b,
216 metadata !"round.dynamic",
217 metadata !"fpexcept.strict") #0
221 define double @fmul_f64(double %a, double %b) nounwind strictfp {
222 ; SSE-X86-LABEL: fmul_f64:
224 ; SSE-X86-NEXT: pushl %ebp
225 ; SSE-X86-NEXT: movl %esp, %ebp
226 ; SSE-X86-NEXT: andl $-8, %esp
227 ; SSE-X86-NEXT: subl $8, %esp
228 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
229 ; SSE-X86-NEXT: mulsd 16(%ebp), %xmm0
230 ; SSE-X86-NEXT: movsd %xmm0, (%esp)
231 ; SSE-X86-NEXT: fldl (%esp)
233 ; SSE-X86-NEXT: movl %ebp, %esp
234 ; SSE-X86-NEXT: popl %ebp
237 ; SSE-X64-LABEL: fmul_f64:
239 ; SSE-X64-NEXT: mulsd %xmm1, %xmm0
242 ; AVX-X86-LABEL: fmul_f64:
244 ; AVX-X86-NEXT: pushl %ebp
245 ; AVX-X86-NEXT: movl %esp, %ebp
246 ; AVX-X86-NEXT: andl $-8, %esp
247 ; AVX-X86-NEXT: subl $8, %esp
248 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
249 ; AVX-X86-NEXT: vmulsd 16(%ebp), %xmm0, %xmm0
250 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
251 ; AVX-X86-NEXT: fldl (%esp)
253 ; AVX-X86-NEXT: movl %ebp, %esp
254 ; AVX-X86-NEXT: popl %ebp
257 ; AVX-X64-LABEL: fmul_f64:
259 ; AVX-X64-NEXT: vmulsd %xmm1, %xmm0, %xmm0
262 ; X87-LABEL: fmul_f64:
264 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
265 ; X87-NEXT: fmull {{[0-9]+}}(%esp)
268 %ret = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b,
269 metadata !"round.dynamic",
270 metadata !"fpexcept.strict") #0
274 define float @fmul_f32(float %a, float %b) nounwind strictfp {
275 ; SSE-X86-LABEL: fmul_f32:
277 ; SSE-X86-NEXT: pushl %eax
278 ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
279 ; SSE-X86-NEXT: mulss {{[0-9]+}}(%esp), %xmm0
280 ; SSE-X86-NEXT: movss %xmm0, (%esp)
281 ; SSE-X86-NEXT: flds (%esp)
283 ; SSE-X86-NEXT: popl %eax
286 ; SSE-X64-LABEL: fmul_f32:
288 ; SSE-X64-NEXT: mulss %xmm1, %xmm0
291 ; AVX-X86-LABEL: fmul_f32:
293 ; AVX-X86-NEXT: pushl %eax
294 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
295 ; AVX-X86-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0
296 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
297 ; AVX-X86-NEXT: flds (%esp)
299 ; AVX-X86-NEXT: popl %eax
302 ; AVX-X64-LABEL: fmul_f32:
304 ; AVX-X64-NEXT: vmulss %xmm1, %xmm0, %xmm0
307 ; X87-LABEL: fmul_f32:
309 ; X87-NEXT: flds {{[0-9]+}}(%esp)
310 ; X87-NEXT: fmuls {{[0-9]+}}(%esp)
313 %ret = call float @llvm.experimental.constrained.fmul.f32(float %a, float %b,
314 metadata !"round.dynamic",
315 metadata !"fpexcept.strict") #0
319 define double @fdiv_f64(double %a, double %b) nounwind strictfp {
320 ; SSE-X86-LABEL: fdiv_f64:
322 ; SSE-X86-NEXT: pushl %ebp
323 ; SSE-X86-NEXT: movl %esp, %ebp
324 ; SSE-X86-NEXT: andl $-8, %esp
325 ; SSE-X86-NEXT: subl $8, %esp
326 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
327 ; SSE-X86-NEXT: divsd 16(%ebp), %xmm0
328 ; SSE-X86-NEXT: movsd %xmm0, (%esp)
329 ; SSE-X86-NEXT: fldl (%esp)
331 ; SSE-X86-NEXT: movl %ebp, %esp
332 ; SSE-X86-NEXT: popl %ebp
335 ; SSE-X64-LABEL: fdiv_f64:
337 ; SSE-X64-NEXT: divsd %xmm1, %xmm0
340 ; AVX-X86-LABEL: fdiv_f64:
342 ; AVX-X86-NEXT: pushl %ebp
343 ; AVX-X86-NEXT: movl %esp, %ebp
344 ; AVX-X86-NEXT: andl $-8, %esp
345 ; AVX-X86-NEXT: subl $8, %esp
346 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
347 ; AVX-X86-NEXT: vdivsd 16(%ebp), %xmm0, %xmm0
348 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp)
349 ; AVX-X86-NEXT: fldl (%esp)
351 ; AVX-X86-NEXT: movl %ebp, %esp
352 ; AVX-X86-NEXT: popl %ebp
355 ; AVX-X64-LABEL: fdiv_f64:
357 ; AVX-X64-NEXT: vdivsd %xmm1, %xmm0, %xmm0
360 ; X87-LABEL: fdiv_f64:
362 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
363 ; X87-NEXT: fdivl {{[0-9]+}}(%esp)
366 %ret = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b,
367 metadata !"round.dynamic",
368 metadata !"fpexcept.strict") #0
372 define float @fdiv_f32(float %a, float %b) nounwind strictfp {
373 ; SSE-X86-LABEL: fdiv_f32:
375 ; SSE-X86-NEXT: pushl %eax
376 ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
377 ; SSE-X86-NEXT: divss {{[0-9]+}}(%esp), %xmm0
378 ; SSE-X86-NEXT: movss %xmm0, (%esp)
379 ; SSE-X86-NEXT: flds (%esp)
381 ; SSE-X86-NEXT: popl %eax
384 ; SSE-X64-LABEL: fdiv_f32:
386 ; SSE-X64-NEXT: divss %xmm1, %xmm0
389 ; AVX-X86-LABEL: fdiv_f32:
391 ; AVX-X86-NEXT: pushl %eax
392 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
393 ; AVX-X86-NEXT: vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0
394 ; AVX-X86-NEXT: vmovss %xmm0, (%esp)
395 ; AVX-X86-NEXT: flds (%esp)
397 ; AVX-X86-NEXT: popl %eax
400 ; AVX-X64-LABEL: fdiv_f32:
402 ; AVX-X64-NEXT: vdivss %xmm1, %xmm0, %xmm0
405 ; X87-LABEL: fdiv_f32:
407 ; X87-NEXT: flds {{[0-9]+}}(%esp)
408 ; X87-NEXT: fdivs {{[0-9]+}}(%esp)
411 %ret = call float @llvm.experimental.constrained.fdiv.f32(float %a, float %b,
412 metadata !"round.dynamic",
413 metadata !"fpexcept.strict") #0
417 define void @fpext_f32_to_f64(ptr %val, ptr %ret) nounwind strictfp {
418 ; SSE-X86-LABEL: fpext_f32_to_f64:
420 ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
421 ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
422 ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
423 ; SSE-X86-NEXT: cvtss2sd %xmm0, %xmm0
424 ; SSE-X86-NEXT: movsd %xmm0, (%eax)
427 ; SSE-X64-LABEL: fpext_f32_to_f64:
429 ; SSE-X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
430 ; SSE-X64-NEXT: cvtss2sd %xmm0, %xmm0
431 ; SSE-X64-NEXT: movsd %xmm0, (%rsi)
434 ; AVX-X86-LABEL: fpext_f32_to_f64:
436 ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
437 ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
438 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
439 ; AVX-X86-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
440 ; AVX-X86-NEXT: vmovsd %xmm0, (%eax)
443 ; AVX-X64-LABEL: fpext_f32_to_f64:
445 ; AVX-X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
446 ; AVX-X64-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
447 ; AVX-X64-NEXT: vmovsd %xmm0, (%rsi)
450 ; X87-LABEL: fpext_f32_to_f64:
452 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
453 ; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
454 ; X87-NEXT: flds (%ecx)
455 ; X87-NEXT: fstpl (%eax)
458 %1 = load float, ptr %val, align 4
459 %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1,
460 metadata !"fpexcept.strict") #0
461 store double %res, ptr %ret, align 8
465 define void @fptrunc_double_to_f32(ptr %val, ptr%ret) nounwind strictfp {
466 ; SSE-X86-LABEL: fptrunc_double_to_f32:
468 ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
469 ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
470 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
471 ; SSE-X86-NEXT: cvtsd2ss %xmm0, %xmm0
472 ; SSE-X86-NEXT: movss %xmm0, (%eax)
475 ; SSE-X64-LABEL: fptrunc_double_to_f32:
477 ; SSE-X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
478 ; SSE-X64-NEXT: cvtsd2ss %xmm0, %xmm0
479 ; SSE-X64-NEXT: movss %xmm0, (%rsi)
482 ; AVX-X86-LABEL: fptrunc_double_to_f32:
484 ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
485 ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
486 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
487 ; AVX-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
488 ; AVX-X86-NEXT: vmovss %xmm0, (%eax)
491 ; AVX-X64-LABEL: fptrunc_double_to_f32:
493 ; AVX-X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
494 ; AVX-X64-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
495 ; AVX-X64-NEXT: vmovss %xmm0, (%rsi)
498 ; X87-LABEL: fptrunc_double_to_f32:
500 ; X87-NEXT: pushl %eax
501 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
502 ; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx
503 ; X87-NEXT: fldl (%ecx)
504 ; X87-NEXT: fstps (%esp)
505 ; X87-NEXT: flds (%esp)
506 ; X87-NEXT: fstps (%eax)
508 ; X87-NEXT: popl %eax
510 %1 = load double, ptr %val, align 8
511 %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(double %1,
512 metadata !"round.dynamic",
513 metadata !"fpexcept.strict") #0
514 store float %res, ptr %ret, align 4
518 define void @fsqrt_f64(ptr %a) nounwind strictfp {
519 ; SSE-X86-LABEL: fsqrt_f64:
521 ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
522 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
523 ; SSE-X86-NEXT: sqrtsd %xmm0, %xmm0
524 ; SSE-X86-NEXT: movsd %xmm0, (%eax)
527 ; SSE-X64-LABEL: fsqrt_f64:
529 ; SSE-X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
530 ; SSE-X64-NEXT: sqrtsd %xmm0, %xmm0
531 ; SSE-X64-NEXT: movsd %xmm0, (%rdi)
534 ; AVX-X86-LABEL: fsqrt_f64:
536 ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
537 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
538 ; AVX-X86-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
539 ; AVX-X86-NEXT: vmovsd %xmm0, (%eax)
542 ; AVX-X64-LABEL: fsqrt_f64:
544 ; AVX-X64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
545 ; AVX-X64-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0
546 ; AVX-X64-NEXT: vmovsd %xmm0, (%rdi)
549 ; X87-LABEL: fsqrt_f64:
551 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
552 ; X87-NEXT: fldl (%eax)
554 ; X87-NEXT: fstpl (%eax)
557 %1 = load double, ptr %a, align 8
558 %res = call double @llvm.experimental.constrained.sqrt.f64(double %1,
559 metadata !"round.dynamic",
560 metadata !"fpexcept.strict") #0
561 store double %res, ptr %a, align 8
565 define void @fsqrt_f32(ptr %a) nounwind strictfp {
566 ; SSE-X86-LABEL: fsqrt_f32:
568 ; SSE-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
569 ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
570 ; SSE-X86-NEXT: sqrtss %xmm0, %xmm0
571 ; SSE-X86-NEXT: movss %xmm0, (%eax)
574 ; SSE-X64-LABEL: fsqrt_f32:
576 ; SSE-X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
577 ; SSE-X64-NEXT: sqrtss %xmm0, %xmm0
578 ; SSE-X64-NEXT: movss %xmm0, (%rdi)
581 ; AVX-X86-LABEL: fsqrt_f32:
583 ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %eax
584 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
585 ; AVX-X86-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
586 ; AVX-X86-NEXT: vmovss %xmm0, (%eax)
589 ; AVX-X64-LABEL: fsqrt_f32:
591 ; AVX-X64-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
592 ; AVX-X64-NEXT: vsqrtss %xmm0, %xmm0, %xmm0
593 ; AVX-X64-NEXT: vmovss %xmm0, (%rdi)
596 ; X87-LABEL: fsqrt_f32:
598 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
599 ; X87-NEXT: flds (%eax)
601 ; X87-NEXT: fstps (%eax)
604 %1 = load float, ptr %a, align 4
605 %res = call float @llvm.experimental.constrained.sqrt.f32(float %1,
606 metadata !"round.dynamic",
607 metadata !"fpexcept.strict") #0
608 store float %res, ptr %a, align 4
612 define double @fma_f64(double %a, double %b, double %c) nounwind strictfp {
613 ; SSE-X86-LABEL: fma_f64:
615 ; SSE-X86-NEXT: subl $24, %esp
616 ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
617 ; SSE-X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
618 ; SSE-X86-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
619 ; SSE-X86-NEXT: movsd %xmm2, {{[0-9]+}}(%esp)
620 ; SSE-X86-NEXT: movsd %xmm1, {{[0-9]+}}(%esp)
621 ; SSE-X86-NEXT: movsd %xmm0, (%esp)
622 ; SSE-X86-NEXT: calll fma
623 ; SSE-X86-NEXT: addl $24, %esp
626 ; SSE-X64-LABEL: fma_f64:
628 ; SSE-X64-NEXT: pushq %rax
629 ; SSE-X64-NEXT: callq fma@PLT
630 ; SSE-X64-NEXT: popq %rax
633 ; AVX-X86-LABEL: fma_f64:
635 ; AVX-X86-NEXT: pushl %ebp
636 ; AVX-X86-NEXT: movl %esp, %ebp
637 ; AVX-X86-NEXT: andl $-8, %esp
638 ; AVX-X86-NEXT: subl $8, %esp
639 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
640 ; AVX-X86-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
641 ; AVX-X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
642 ; AVX-X86-NEXT: vmovsd %xmm1, (%esp)
643 ; AVX-X86-NEXT: fldl (%esp)
645 ; AVX-X86-NEXT: movl %ebp, %esp
646 ; AVX-X86-NEXT: popl %ebp
649 ; AVX-X64-LABEL: fma_f64:
651 ; AVX-X64-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
654 ; X87-LABEL: fma_f64:
656 ; X87-NEXT: subl $24, %esp
657 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
658 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
659 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
660 ; X87-NEXT: fstpl {{[0-9]+}}(%esp)
661 ; X87-NEXT: fstpl {{[0-9]+}}(%esp)
662 ; X87-NEXT: fstpl (%esp)
664 ; X87-NEXT: calll fma
665 ; X87-NEXT: addl $24, %esp
667 %res = call double @llvm.experimental.constrained.fma.f64(double %a, double %b, double %c,
668 metadata !"round.dynamic",
669 metadata !"fpexcept.strict") #0
673 define float @fma_f32(float %a, float %b, float %c) nounwind strictfp {
674 ; SSE-X86-LABEL: fma_f32:
676 ; SSE-X86-NEXT: subl $12, %esp
677 ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
678 ; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
679 ; SSE-X86-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
680 ; SSE-X86-NEXT: movss %xmm2, {{[0-9]+}}(%esp)
681 ; SSE-X86-NEXT: movss %xmm1, {{[0-9]+}}(%esp)
682 ; SSE-X86-NEXT: movss %xmm0, (%esp)
683 ; SSE-X86-NEXT: calll fmaf
684 ; SSE-X86-NEXT: addl $12, %esp
687 ; SSE-X64-LABEL: fma_f32:
689 ; SSE-X64-NEXT: pushq %rax
690 ; SSE-X64-NEXT: callq fmaf@PLT
691 ; SSE-X64-NEXT: popq %rax
694 ; AVX-X86-LABEL: fma_f32:
696 ; AVX-X86-NEXT: pushl %eax
697 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
698 ; AVX-X86-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
699 ; AVX-X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
700 ; AVX-X86-NEXT: vmovss %xmm1, (%esp)
701 ; AVX-X86-NEXT: flds (%esp)
703 ; AVX-X86-NEXT: popl %eax
706 ; AVX-X64-LABEL: fma_f32:
708 ; AVX-X64-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
711 ; X87-LABEL: fma_f32:
713 ; X87-NEXT: subl $12, %esp
714 ; X87-NEXT: flds {{[0-9]+}}(%esp)
715 ; X87-NEXT: flds {{[0-9]+}}(%esp)
716 ; X87-NEXT: flds {{[0-9]+}}(%esp)
717 ; X87-NEXT: fstps {{[0-9]+}}(%esp)
718 ; X87-NEXT: fstps {{[0-9]+}}(%esp)
719 ; X87-NEXT: fstps (%esp)
721 ; X87-NEXT: calll fmaf
722 ; X87-NEXT: addl $12, %esp
724 %res = call float @llvm.experimental.constrained.fma.f32(float %a, float %b, float %c,
725 metadata !"round.dynamic",
726 metadata !"fpexcept.strict") #0
730 attributes #0 = { strictfp }