1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --default-march x86_64-unknown-linux-gnu --version 5
2 ; RUN: llc -mtriple=i686-- < %s | FileCheck %s -check-prefixes=X87
3 ; RUN: llc -mattr=+sse2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=SSE,SSE2
4 ; RUN: llc -mattr=+avx -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX1
5 ; RUN: llc -mattr=+avx2 -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX2
6 ; RUN: llc -mattr=+avx512f -mtriple=x86_64 < %s | FileCheck %s -check-prefixes=AVX,AVX512F
8 define float @canon_fp32_varargsf32(float %a) {
9 ; X87-LABEL: canon_fp32_varargsf32:
12 ; X87-NEXT: fmuls {{[0-9]+}}(%esp)
15 ; SSE-LABEL: canon_fp32_varargsf32:
17 ; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20 ; AVX-LABEL: canon_fp32_varargsf32:
22 ; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
24 %canonicalized = call float @llvm.canonicalize.f32(float %a)
25 ret float %canonicalized
28 define x86_fp80 @canon_fp32_varargsf80(x86_fp80 %a) {
29 ; X87-LABEL: canon_fp32_varargsf80:
31 ; X87-NEXT: fldt {{[0-9]+}}(%esp)
33 ; X87-NEXT: fmulp %st, %st(1)
36 ; SSE-LABEL: canon_fp32_varargsf80:
38 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
40 ; SSE-NEXT: fmulp %st, %st(1)
43 ; AVX-LABEL: canon_fp32_varargsf80:
45 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
47 ; AVX-NEXT: fmulp %st, %st(1)
49 %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %a)
50 ret x86_fp80 %canonicalized
53 define x86_fp80 @complex_canonicalize_fmul_x86_fp80(x86_fp80 %a, x86_fp80 %b) {
54 ; X87-LABEL: complex_canonicalize_fmul_x86_fp80:
55 ; X87: # %bb.0: # %entry
56 ; X87-NEXT: fldt {{[0-9]+}}(%esp)
57 ; X87-NEXT: fldt {{[0-9]+}}(%esp)
58 ; X87-NEXT: fsub %st(1), %st
59 ; X87-NEXT: fld %st(0)
60 ; X87-NEXT: fadd %st(2), %st
61 ; X87-NEXT: fsubp %st, %st(1)
63 ; X87-NEXT: fmulp %st, %st(1)
64 ; X87-NEXT: fsubp %st, %st(1)
67 ; SSE-LABEL: complex_canonicalize_fmul_x86_fp80:
68 ; SSE: # %bb.0: # %entry
69 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
70 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
71 ; SSE-NEXT: fsub %st(1), %st
72 ; SSE-NEXT: fld %st(0)
73 ; SSE-NEXT: fadd %st(2), %st
74 ; SSE-NEXT: fsubp %st, %st(1)
76 ; SSE-NEXT: fmulp %st, %st(1)
77 ; SSE-NEXT: fsubp %st, %st(1)
80 ; AVX-LABEL: complex_canonicalize_fmul_x86_fp80:
81 ; AVX: # %bb.0: # %entry
82 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
83 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
84 ; AVX-NEXT: fsub %st(1), %st
85 ; AVX-NEXT: fld %st(0)
86 ; AVX-NEXT: fadd %st(2), %st
87 ; AVX-NEXT: fsubp %st, %st(1)
89 ; AVX-NEXT: fmulp %st, %st(1)
90 ; AVX-NEXT: fsubp %st, %st(1)
93 %mul1 = fsub x86_fp80 %a, %b
94 %add = fadd x86_fp80 %mul1, %b
95 %mul2 = fsub x86_fp80 %add, %mul1
96 %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %mul2)
97 %result = fsub x86_fp80 %canonicalized, %b
101 define double @canonicalize_fp64(double %a, double %b) unnamed_addr #0 {
102 ; X87-LABEL: canonicalize_fp64:
103 ; X87: # %bb.0: # %start
104 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
105 ; X87-NEXT: fldl {{[0-9]+}}(%esp)
106 ; X87-NEXT: fucom %st(1)
107 ; X87-NEXT: fnstsw %ax
108 ; X87-NEXT: # kill: def $ah killed $ah killed $ax
110 ; X87-NEXT: fxch %st(1)
111 ; X87-NEXT: fucom %st(0)
112 ; X87-NEXT: fnstsw %ax
113 ; X87-NEXT: fld %st(1)
114 ; X87-NEXT: ja .LBB3_2
115 ; X87-NEXT: # %bb.1: # %start
116 ; X87-NEXT: fstp %st(0)
118 ; X87-NEXT: fxch %st(1)
119 ; X87-NEXT: .LBB3_2: # %start
120 ; X87-NEXT: fstp %st(1)
121 ; X87-NEXT: # kill: def $ah killed $ah killed $ax
123 ; X87-NEXT: jp .LBB3_4
124 ; X87-NEXT: # %bb.3: # %start
125 ; X87-NEXT: fstp %st(1)
127 ; X87-NEXT: .LBB3_4: # %start
128 ; X87-NEXT: fstp %st(0)
130 ; X87-NEXT: fmulp %st, %st(1)
133 ; SSE-LABEL: canonicalize_fp64:
134 ; SSE: # %bb.0: # %start
135 ; SSE-NEXT: movapd %xmm0, %xmm2
136 ; SSE-NEXT: cmpunordsd %xmm0, %xmm2
137 ; SSE-NEXT: movapd %xmm2, %xmm3
138 ; SSE-NEXT: andpd %xmm1, %xmm3
139 ; SSE-NEXT: maxsd %xmm0, %xmm1
140 ; SSE-NEXT: andnpd %xmm1, %xmm2
141 ; SSE-NEXT: orpd %xmm3, %xmm2
142 ; SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
143 ; SSE-NEXT: movapd %xmm2, %xmm0
146 ; AVX1-LABEL: canonicalize_fp64:
147 ; AVX1: # %bb.0: # %start
148 ; AVX1-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
149 ; AVX1-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
150 ; AVX1-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
151 ; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
154 ; AVX2-LABEL: canonicalize_fp64:
155 ; AVX2: # %bb.0: # %start
156 ; AVX2-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
157 ; AVX2-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0
158 ; AVX2-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0
159 ; AVX2-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
162 ; AVX512F-LABEL: canonicalize_fp64:
163 ; AVX512F: # %bb.0: # %start
164 ; AVX512F-NEXT: vmaxsd %xmm0, %xmm1, %xmm2
165 ; AVX512F-NEXT: vcmpunordsd %xmm0, %xmm0, %k1
166 ; AVX512F-NEXT: vmovsd %xmm1, %xmm2, %xmm2 {%k1}
167 ; AVX512F-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
170 %c = fcmp olt double %a, %b
171 %d = fcmp uno double %a, 0.000000e+00
172 %or.cond.i.i = or i1 %d, %c
173 %e = select i1 %or.cond.i.i, double %b, double %a
174 %f = tail call double @llvm.canonicalize.f64(double %e) #2
178 define float @canonicalize_fp32(float %aa, float %bb) unnamed_addr #0 {
179 ; X87-LABEL: canonicalize_fp32:
180 ; X87: # %bb.0: # %start
181 ; X87-NEXT: flds {{[0-9]+}}(%esp)
182 ; X87-NEXT: flds {{[0-9]+}}(%esp)
183 ; X87-NEXT: fucom %st(1)
184 ; X87-NEXT: fnstsw %ax
185 ; X87-NEXT: # kill: def $ah killed $ah killed $ax
187 ; X87-NEXT: fxch %st(1)
188 ; X87-NEXT: fucom %st(0)
189 ; X87-NEXT: fnstsw %ax
190 ; X87-NEXT: fld %st(1)
191 ; X87-NEXT: ja .LBB4_2
192 ; X87-NEXT: # %bb.1: # %start
193 ; X87-NEXT: fstp %st(0)
195 ; X87-NEXT: fxch %st(1)
196 ; X87-NEXT: .LBB4_2: # %start
197 ; X87-NEXT: fstp %st(1)
198 ; X87-NEXT: # kill: def $ah killed $ah killed $ax
200 ; X87-NEXT: jp .LBB4_4
201 ; X87-NEXT: # %bb.3: # %start
202 ; X87-NEXT: fstp %st(1)
204 ; X87-NEXT: .LBB4_4: # %start
205 ; X87-NEXT: fstp %st(0)
207 ; X87-NEXT: fmulp %st, %st(1)
210 ; SSE-LABEL: canonicalize_fp32:
211 ; SSE: # %bb.0: # %start
212 ; SSE-NEXT: movaps %xmm0, %xmm2
213 ; SSE-NEXT: cmpunordss %xmm0, %xmm2
214 ; SSE-NEXT: movaps %xmm2, %xmm3
215 ; SSE-NEXT: andps %xmm1, %xmm3
216 ; SSE-NEXT: maxss %xmm0, %xmm1
217 ; SSE-NEXT: andnps %xmm1, %xmm2
218 ; SSE-NEXT: orps %xmm3, %xmm2
219 ; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
220 ; SSE-NEXT: movaps %xmm2, %xmm0
223 ; AVX1-LABEL: canonicalize_fp32:
224 ; AVX1: # %bb.0: # %start
225 ; AVX1-NEXT: vmaxss %xmm0, %xmm1, %xmm2
226 ; AVX1-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
227 ; AVX1-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
228 ; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
231 ; AVX2-LABEL: canonicalize_fp32:
232 ; AVX2: # %bb.0: # %start
233 ; AVX2-NEXT: vmaxss %xmm0, %xmm1, %xmm2
234 ; AVX2-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0
235 ; AVX2-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0
236 ; AVX2-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
239 ; AVX512F-LABEL: canonicalize_fp32:
240 ; AVX512F: # %bb.0: # %start
241 ; AVX512F-NEXT: vmaxss %xmm0, %xmm1, %xmm2
242 ; AVX512F-NEXT: vcmpunordss %xmm0, %xmm0, %k1
243 ; AVX512F-NEXT: vmovss %xmm1, %xmm2, %xmm2 {%k1}
244 ; AVX512F-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm0
247 %cc = fcmp olt float %aa, %bb
248 %dd = fcmp uno float %aa, 0.000000e+00
249 %or.cond.i.i.x = or i1 %dd, %cc
250 %ee = select i1 %or.cond.i.i.x, float %bb, float %aa
251 %ff = tail call float @llvm.canonicalize.f32(float %ee) #2
255 define void @v_test_canonicalize_var_f32(float addrspace(1)* %out) #1 {
256 ; X87-LABEL: v_test_canonicalize_var_f32:
258 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
260 ; X87-NEXT: fmuls (%eax)
261 ; X87-NEXT: fstps (%eax)
264 ; SSE-LABEL: v_test_canonicalize_var_f32:
266 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
267 ; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
268 ; SSE-NEXT: movss %xmm0, (%rdi)
271 ; AVX-LABEL: v_test_canonicalize_var_f32:
273 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
274 ; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
275 ; AVX-NEXT: vmovss %xmm0, (%rdi)
277 %val = load float, float addrspace(1)* %out
278 %canonicalized = call float @llvm.canonicalize.f32(float %val)
279 store float %canonicalized, float addrspace(1)* %out
283 define void @v_test_canonicalize_x86_fp80(x86_fp80 addrspace(1)* %out) #1 {
284 ; X87-LABEL: v_test_canonicalize_x86_fp80:
286 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
287 ; X87-NEXT: fldt (%eax)
289 ; X87-NEXT: fmulp %st, %st(1)
290 ; X87-NEXT: fstpt (%eax)
293 ; SSE-LABEL: v_test_canonicalize_x86_fp80:
295 ; SSE-NEXT: fldt (%rdi)
297 ; SSE-NEXT: fmulp %st, %st(1)
298 ; SSE-NEXT: fstpt (%rdi)
301 ; AVX-LABEL: v_test_canonicalize_x86_fp80:
303 ; AVX-NEXT: fldt (%rdi)
305 ; AVX-NEXT: fmulp %st, %st(1)
306 ; AVX-NEXT: fstpt (%rdi)
308 %val = load x86_fp80, x86_fp80 addrspace(1)* %out
309 %canonicalized = call x86_fp80 @llvm.canonicalize.f80(x86_fp80 %val)
310 store x86_fp80 %canonicalized, x86_fp80 addrspace(1)* %out
314 define void @v_test_canonicalize_var_f64(double addrspace(1)* %out) #1 {
315 ; X87-LABEL: v_test_canonicalize_var_f64:
317 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
319 ; X87-NEXT: fmull (%eax)
320 ; X87-NEXT: fstpl (%eax)
323 ; SSE-LABEL: v_test_canonicalize_var_f64:
325 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
326 ; SSE-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
327 ; SSE-NEXT: movsd %xmm0, (%rdi)
330 ; AVX-LABEL: v_test_canonicalize_var_f64:
332 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
333 ; AVX-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
334 ; AVX-NEXT: vmovsd %xmm0, (%rdi)
336 %val = load double, double addrspace(1)* %out
337 %canonicalized = call double @llvm.canonicalize.f64(double %val)
338 store double %canonicalized, double addrspace(1)* %out
342 define void @canonicalize_undef(double addrspace(1)* %out) {
343 ; X87-LABEL: canonicalize_undef:
345 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
346 ; X87-NEXT: movl $2146959360, 4(%eax) # imm = 0x7FF80000
347 ; X87-NEXT: movl $0, (%eax)
350 ; SSE-LABEL: canonicalize_undef:
352 ; SSE-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
353 ; SSE-NEXT: movq %rax, (%rdi)
356 ; AVX-LABEL: canonicalize_undef:
358 ; AVX-NEXT: movabsq $9221120237041090560, %rax # imm = 0x7FF8000000000000
359 ; AVX-NEXT: movq %rax, (%rdi)
361 %canonicalized = call double @llvm.canonicalize.f64(double undef)
362 store double %canonicalized, double addrspace(1)* %out
366 define <4 x float> @canon_fp32_varargsv4f32(<4 x float> %a) {
367 ; X87-LABEL: canon_fp32_varargsv4f32:
369 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
371 ; X87-NEXT: fld %st(0)
372 ; X87-NEXT: fmuls {{[0-9]+}}(%esp)
373 ; X87-NEXT: fld %st(1)
374 ; X87-NEXT: fmuls {{[0-9]+}}(%esp)
375 ; X87-NEXT: fld %st(2)
376 ; X87-NEXT: fmuls {{[0-9]+}}(%esp)
377 ; X87-NEXT: fxch %st(3)
378 ; X87-NEXT: fmuls {{[0-9]+}}(%esp)
379 ; X87-NEXT: fstps 12(%eax)
380 ; X87-NEXT: fxch %st(2)
381 ; X87-NEXT: fstps 8(%eax)
382 ; X87-NEXT: fxch %st(1)
383 ; X87-NEXT: fstps 4(%eax)
384 ; X87-NEXT: fstps (%eax)
387 ; SSE-LABEL: canon_fp32_varargsv4f32:
389 ; SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
392 ; AVX1-LABEL: canon_fp32_varargsv4f32:
394 ; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
397 ; AVX2-LABEL: canon_fp32_varargsv4f32:
399 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
400 ; AVX2-NEXT: vmulps %xmm1, %xmm0, %xmm0
403 ; AVX512F-LABEL: canon_fp32_varargsv4f32:
405 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
406 ; AVX512F-NEXT: vmulps %xmm1, %xmm0, %xmm0
408 %canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %a)
409 ret <4 x float> %canonicalized
412 define <4 x double> @canon_fp64_varargsv4f64(<4 x double> %a) {
413 ; X87-LABEL: canon_fp64_varargsv4f64:
415 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
417 ; X87-NEXT: fld %st(0)
418 ; X87-NEXT: fmull {{[0-9]+}}(%esp)
419 ; X87-NEXT: fld %st(1)
420 ; X87-NEXT: fmull {{[0-9]+}}(%esp)
421 ; X87-NEXT: fld %st(2)
422 ; X87-NEXT: fmull {{[0-9]+}}(%esp)
423 ; X87-NEXT: fxch %st(3)
424 ; X87-NEXT: fmull {{[0-9]+}}(%esp)
425 ; X87-NEXT: fstpl 24(%eax)
426 ; X87-NEXT: fxch %st(2)
427 ; X87-NEXT: fstpl 16(%eax)
428 ; X87-NEXT: fxch %st(1)
429 ; X87-NEXT: fstpl 8(%eax)
430 ; X87-NEXT: fstpl (%eax)
433 ; SSE-LABEL: canon_fp64_varargsv4f64:
435 ; SSE-NEXT: movapd {{.*#+}} xmm2 = [1.0E+0,1.0E+0]
436 ; SSE-NEXT: mulpd %xmm2, %xmm0
437 ; SSE-NEXT: mulpd %xmm2, %xmm1
440 ; AVX1-LABEL: canon_fp64_varargsv4f64:
442 ; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
445 ; AVX2-LABEL: canon_fp64_varargsv4f64:
447 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
448 ; AVX2-NEXT: vmulpd %ymm1, %ymm0, %ymm0
451 ; AVX512F-LABEL: canon_fp64_varargsv4f64:
453 ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
454 ; AVX512F-NEXT: vmulpd %ymm1, %ymm0, %ymm0
456 %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %a)
457 ret <4 x double> %canonicalized
460 define <2 x x86_fp80> @canon_fp80_varargsv2fp80(<2 x x86_fp80> %a) {
461 ; X87-LABEL: canon_fp80_varargsv2fp80:
463 ; X87-NEXT: fldt {{[0-9]+}}(%esp)
464 ; X87-NEXT: fldt {{[0-9]+}}(%esp)
466 ; X87-NEXT: fmul %st, %st(1)
467 ; X87-NEXT: fmulp %st, %st(2)
468 ; X87-NEXT: fxch %st(1)
471 ; SSE-LABEL: canon_fp80_varargsv2fp80:
473 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
474 ; SSE-NEXT: fldt {{[0-9]+}}(%rsp)
476 ; SSE-NEXT: fmul %st, %st(1)
477 ; SSE-NEXT: fmulp %st, %st(2)
478 ; SSE-NEXT: fxch %st(1)
481 ; AVX-LABEL: canon_fp80_varargsv2fp80:
483 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
484 ; AVX-NEXT: fldt {{[0-9]+}}(%rsp)
486 ; AVX-NEXT: fmul %st, %st(1)
487 ; AVX-NEXT: fmulp %st, %st(2)
488 ; AVX-NEXT: fxch %st(1)
490 %canonicalized = call <2 x x86_fp80> @llvm.canonicalize.v2f80(<2 x x86_fp80> %a)
491 ret <2 x x86_fp80> %canonicalized
494 define void @vec_canonicalize_var_v4f32(<4 x float> addrspace(1)* %out) #1 {
495 ; X87-LABEL: vec_canonicalize_var_v4f32:
497 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
499 ; X87-NEXT: fld %st(0)
500 ; X87-NEXT: fmuls (%eax)
501 ; X87-NEXT: fld %st(1)
502 ; X87-NEXT: fmuls 4(%eax)
503 ; X87-NEXT: fld %st(2)
504 ; X87-NEXT: fmuls 8(%eax)
505 ; X87-NEXT: fxch %st(3)
506 ; X87-NEXT: fmuls 12(%eax)
507 ; X87-NEXT: fstps 12(%eax)
508 ; X87-NEXT: fxch %st(2)
509 ; X87-NEXT: fstps 8(%eax)
510 ; X87-NEXT: fxch %st(1)
511 ; X87-NEXT: fstps 4(%eax)
512 ; X87-NEXT: fstps (%eax)
515 ; SSE-LABEL: vec_canonicalize_var_v4f32:
517 ; SSE-NEXT: movaps (%rdi), %xmm0
518 ; SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
519 ; SSE-NEXT: movaps %xmm0, (%rdi)
522 ; AVX1-LABEL: vec_canonicalize_var_v4f32:
524 ; AVX1-NEXT: vmovaps (%rdi), %xmm0
525 ; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
526 ; AVX1-NEXT: vmovaps %xmm0, (%rdi)
529 ; AVX2-LABEL: vec_canonicalize_var_v4f32:
531 ; AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
532 ; AVX2-NEXT: vmulps (%rdi), %xmm0, %xmm0
533 ; AVX2-NEXT: vmovaps %xmm0, (%rdi)
536 ; AVX512F-LABEL: vec_canonicalize_var_v4f32:
538 ; AVX512F-NEXT: vbroadcastss {{.*#+}} xmm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
539 ; AVX512F-NEXT: vmulps (%rdi), %xmm0, %xmm0
540 ; AVX512F-NEXT: vmovaps %xmm0, (%rdi)
542 %val = load <4 x float>, <4 x float> addrspace(1)* %out
543 %canonicalized = call <4 x float> @llvm.canonicalize.v4f32(<4 x float> %val)
544 store <4 x float> %canonicalized, <4 x float> addrspace(1)* %out
548 define void @vec_canonicalize_var_v4f64(<4 x double> addrspace(1)* %out) #1 {
549 ; X87-LABEL: vec_canonicalize_var_v4f64:
551 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
553 ; X87-NEXT: fld %st(0)
554 ; X87-NEXT: fmull (%eax)
555 ; X87-NEXT: fld %st(1)
556 ; X87-NEXT: fmull 8(%eax)
557 ; X87-NEXT: fld %st(2)
558 ; X87-NEXT: fmull 16(%eax)
559 ; X87-NEXT: fxch %st(3)
560 ; X87-NEXT: fmull 24(%eax)
561 ; X87-NEXT: fstpl 24(%eax)
562 ; X87-NEXT: fxch %st(2)
563 ; X87-NEXT: fstpl 16(%eax)
564 ; X87-NEXT: fxch %st(1)
565 ; X87-NEXT: fstpl 8(%eax)
566 ; X87-NEXT: fstpl (%eax)
569 ; SSE-LABEL: vec_canonicalize_var_v4f64:
571 ; SSE-NEXT: movapd {{.*#+}} xmm0 = [1.0E+0,1.0E+0]
572 ; SSE-NEXT: movapd 16(%rdi), %xmm1
573 ; SSE-NEXT: mulpd %xmm0, %xmm1
574 ; SSE-NEXT: mulpd (%rdi), %xmm0
575 ; SSE-NEXT: movapd %xmm0, (%rdi)
576 ; SSE-NEXT: movapd %xmm1, 16(%rdi)
579 ; AVX1-LABEL: vec_canonicalize_var_v4f64:
581 ; AVX1-NEXT: vmovapd (%rdi), %ymm0
582 ; AVX1-NEXT: vmulpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
583 ; AVX1-NEXT: vmovapd %ymm0, (%rdi)
584 ; AVX1-NEXT: vzeroupper
587 ; AVX2-LABEL: vec_canonicalize_var_v4f64:
589 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
590 ; AVX2-NEXT: vmulpd (%rdi), %ymm0, %ymm0
591 ; AVX2-NEXT: vmovapd %ymm0, (%rdi)
592 ; AVX2-NEXT: vzeroupper
595 ; AVX512F-LABEL: vec_canonicalize_var_v4f64:
597 ; AVX512F-NEXT: vbroadcastsd {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
598 ; AVX512F-NEXT: vmulpd (%rdi), %ymm0, %ymm0
599 ; AVX512F-NEXT: vmovapd %ymm0, (%rdi)
600 ; AVX512F-NEXT: vzeroupper
602 %val = load <4 x double>, <4 x double> addrspace(1)* %out
603 %canonicalized = call <4 x double> @llvm.canonicalize.v4f32(<4 x double> %val)
604 store <4 x double> %canonicalized, <4 x double> addrspace(1)* %out
608 define void @vec_canonicalize_x86_fp80(<4 x x86_fp80> addrspace(1)* %out) #1 {
609 ; X87-LABEL: vec_canonicalize_x86_fp80:
611 ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax
612 ; X87-NEXT: fldt 30(%eax)
613 ; X87-NEXT: fldt 20(%eax)
614 ; X87-NEXT: fldt 10(%eax)
615 ; X87-NEXT: fldt (%eax)
617 ; X87-NEXT: fmul %st, %st(1)
618 ; X87-NEXT: fmul %st, %st(2)
619 ; X87-NEXT: fmul %st, %st(3)
620 ; X87-NEXT: fmulp %st, %st(4)
621 ; X87-NEXT: fxch %st(3)
622 ; X87-NEXT: fstpt 30(%eax)
623 ; X87-NEXT: fxch %st(1)
624 ; X87-NEXT: fstpt 20(%eax)
625 ; X87-NEXT: fstpt 10(%eax)
626 ; X87-NEXT: fstpt (%eax)
629 ; SSE-LABEL: vec_canonicalize_x86_fp80:
631 ; SSE-NEXT: fldt 30(%rdi)
632 ; SSE-NEXT: fldt 20(%rdi)
633 ; SSE-NEXT: fldt 10(%rdi)
634 ; SSE-NEXT: fldt (%rdi)
636 ; SSE-NEXT: fmul %st, %st(1)
637 ; SSE-NEXT: fmul %st, %st(2)
638 ; SSE-NEXT: fmul %st, %st(3)
639 ; SSE-NEXT: fmulp %st, %st(4)
640 ; SSE-NEXT: fxch %st(3)
641 ; SSE-NEXT: fstpt 30(%rdi)
642 ; SSE-NEXT: fxch %st(1)
643 ; SSE-NEXT: fstpt 20(%rdi)
644 ; SSE-NEXT: fstpt 10(%rdi)
645 ; SSE-NEXT: fstpt (%rdi)
648 ; AVX-LABEL: vec_canonicalize_x86_fp80:
650 ; AVX-NEXT: fldt 30(%rdi)
651 ; AVX-NEXT: fldt 20(%rdi)
652 ; AVX-NEXT: fldt 10(%rdi)
653 ; AVX-NEXT: fldt (%rdi)
655 ; AVX-NEXT: fmul %st, %st(1)
656 ; AVX-NEXT: fmul %st, %st(2)
657 ; AVX-NEXT: fmul %st, %st(3)
658 ; AVX-NEXT: fmulp %st, %st(4)
659 ; AVX-NEXT: fxch %st(3)
660 ; AVX-NEXT: fstpt 30(%rdi)
661 ; AVX-NEXT: fxch %st(1)
662 ; AVX-NEXT: fstpt 20(%rdi)
663 ; AVX-NEXT: fstpt 10(%rdi)
664 ; AVX-NEXT: fstpt (%rdi)
666 %val = load <4 x x86_fp80>, <4 x x86_fp80> addrspace(1)* %out
667 %canonicalized = call <4 x x86_fp80> @llvm.canonicalize.f80(<4 x x86_fp80> %val)
668 store <4 x x86_fp80> %canonicalized, <4 x x86_fp80> addrspace(1)* %out
671 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: