1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE
4 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefix=AVX
5 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX
6 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512dq,+avx512bw | FileCheck %s --check-prefix=AVX
8 define <4 x i32> @add_op1_constant(i32* %p) nounwind {
9 ; SSE-LABEL: add_op1_constant:
11 ; SSE-NEXT: movl (%rdi), %eax
12 ; SSE-NEXT: addl $42, %eax
13 ; SSE-NEXT: movd %eax, %xmm0
16 ; AVX-LABEL: add_op1_constant:
18 ; AVX-NEXT: movl (%rdi), %eax
19 ; AVX-NEXT: addl $42, %eax
20 ; AVX-NEXT: vmovd %eax, %xmm0
22 %x = load i32, i32* %p
24 %r = insertelement <4 x i32> undef, i32 %b, i32 0
28 ; Code and data size may increase by using more vector ops, so the transform is disabled here.
30 define <4 x i32> @add_op1_constant_optsize(i32* %p) nounwind optsize {
31 ; SSE-LABEL: add_op1_constant_optsize:
33 ; SSE-NEXT: movl (%rdi), %eax
34 ; SSE-NEXT: addl $42, %eax
35 ; SSE-NEXT: movd %eax, %xmm0
38 ; AVX-LABEL: add_op1_constant_optsize:
40 ; AVX-NEXT: movl (%rdi), %eax
41 ; AVX-NEXT: addl $42, %eax
42 ; AVX-NEXT: vmovd %eax, %xmm0
44 %x = load i32, i32* %p
46 %r = insertelement <4 x i32> undef, i32 %b, i32 0
50 define <8 x i16> @add_op0_constant(i16* %p) nounwind {
51 ; SSE-LABEL: add_op0_constant:
53 ; SSE-NEXT: movzwl (%rdi), %eax
54 ; SSE-NEXT: addl $42, %eax
55 ; SSE-NEXT: movd %eax, %xmm0
58 ; AVX-LABEL: add_op0_constant:
60 ; AVX-NEXT: movzwl (%rdi), %eax
61 ; AVX-NEXT: addl $42, %eax
62 ; AVX-NEXT: vmovd %eax, %xmm0
64 %x = load i16, i16* %p
66 %r = insertelement <8 x i16> undef, i16 %b, i32 0
70 define <2 x i64> @sub_op0_constant(i64* %p) nounwind {
71 ; SSE-LABEL: sub_op0_constant:
73 ; SSE-NEXT: movl $42, %eax
74 ; SSE-NEXT: subq (%rdi), %rax
75 ; SSE-NEXT: movq %rax, %xmm0
78 ; AVX-LABEL: sub_op0_constant:
80 ; AVX-NEXT: movl $42, %eax
81 ; AVX-NEXT: subq (%rdi), %rax
82 ; AVX-NEXT: vmovq %rax, %xmm0
84 %x = load i64, i64* %p
86 %r = insertelement <2 x i64> undef, i64 %b, i32 0
90 define <16 x i8> @sub_op1_constant(i8* %p) nounwind {
91 ; SSE-LABEL: sub_op1_constant:
93 ; SSE-NEXT: movb (%rdi), %al
94 ; SSE-NEXT: addb $-42, %al
95 ; SSE-NEXT: movzbl %al, %eax
96 ; SSE-NEXT: movd %eax, %xmm0
99 ; AVX-LABEL: sub_op1_constant:
101 ; AVX-NEXT: movb (%rdi), %al
102 ; AVX-NEXT: addb $-42, %al
103 ; AVX-NEXT: movzbl %al, %eax
104 ; AVX-NEXT: vmovd %eax, %xmm0
108 %r = insertelement <16 x i8> undef, i8 %b, i32 0
112 define <4 x i32> @mul_op1_constant(i32* %p) nounwind {
113 ; SSE-LABEL: mul_op1_constant:
115 ; SSE-NEXT: imull $42, (%rdi), %eax
116 ; SSE-NEXT: movd %eax, %xmm0
119 ; AVX-LABEL: mul_op1_constant:
121 ; AVX-NEXT: imull $42, (%rdi), %eax
122 ; AVX-NEXT: vmovd %eax, %xmm0
124 %x = load i32, i32* %p
126 %r = insertelement <4 x i32> undef, i32 %b, i32 0
130 define <8 x i16> @mul_op0_constant(i16* %p) nounwind {
131 ; SSE-LABEL: mul_op0_constant:
133 ; SSE-NEXT: movzwl (%rdi), %eax
134 ; SSE-NEXT: imull $42, %eax, %eax
135 ; SSE-NEXT: movd %eax, %xmm0
138 ; AVX-LABEL: mul_op0_constant:
140 ; AVX-NEXT: movzwl (%rdi), %eax
141 ; AVX-NEXT: imull $42, %eax, %eax
142 ; AVX-NEXT: vmovd %eax, %xmm0
144 %x = load i16, i16* %p
146 %r = insertelement <8 x i16> undef, i16 %b, i32 0
150 define <4 x i32> @and_op1_constant(i32* %p) nounwind {
151 ; SSE-LABEL: and_op1_constant:
153 ; SSE-NEXT: movl (%rdi), %eax
154 ; SSE-NEXT: andl $42, %eax
155 ; SSE-NEXT: movd %eax, %xmm0
158 ; AVX-LABEL: and_op1_constant:
160 ; AVX-NEXT: movl (%rdi), %eax
161 ; AVX-NEXT: andl $42, %eax
162 ; AVX-NEXT: vmovd %eax, %xmm0
164 %x = load i32, i32* %p
166 %r = insertelement <4 x i32> undef, i32 %b, i32 0
170 define <2 x i64> @or_op1_constant(i64* %p) nounwind {
171 ; SSE-LABEL: or_op1_constant:
173 ; SSE-NEXT: movq (%rdi), %rax
174 ; SSE-NEXT: orq $42, %rax
175 ; SSE-NEXT: movq %rax, %xmm0
178 ; AVX-LABEL: or_op1_constant:
180 ; AVX-NEXT: movq (%rdi), %rax
181 ; AVX-NEXT: orq $42, %rax
182 ; AVX-NEXT: vmovq %rax, %xmm0
184 %x = load i64, i64* %p
186 %r = insertelement <2 x i64> undef, i64 %b, i32 0
190 define <8 x i16> @xor_op1_constant(i16* %p) nounwind {
191 ; SSE-LABEL: xor_op1_constant:
193 ; SSE-NEXT: movzwl (%rdi), %eax
194 ; SSE-NEXT: xorl $42, %eax
195 ; SSE-NEXT: movd %eax, %xmm0
198 ; AVX-LABEL: xor_op1_constant:
200 ; AVX-NEXT: movzwl (%rdi), %eax
201 ; AVX-NEXT: xorl $42, %eax
202 ; AVX-NEXT: vmovd %eax, %xmm0
204 %x = load i16, i16* %p
206 %r = insertelement <8 x i16> undef, i16 %b, i32 0
210 define <4 x i32> @shl_op0_constant(i32* %p) nounwind {
211 ; SSE-LABEL: shl_op0_constant:
213 ; SSE-NEXT: movb (%rdi), %cl
214 ; SSE-NEXT: movl $42, %eax
215 ; SSE-NEXT: shll %cl, %eax
216 ; SSE-NEXT: movd %eax, %xmm0
219 ; AVX-LABEL: shl_op0_constant:
221 ; AVX-NEXT: movb (%rdi), %cl
222 ; AVX-NEXT: movl $42, %eax
223 ; AVX-NEXT: shll %cl, %eax
224 ; AVX-NEXT: vmovd %eax, %xmm0
226 %x = load i32, i32* %p
228 %r = insertelement <4 x i32> undef, i32 %b, i32 0
232 define <16 x i8> @shl_op1_constant(i8* %p) nounwind {
233 ; SSE-LABEL: shl_op1_constant:
235 ; SSE-NEXT: movb (%rdi), %al
236 ; SSE-NEXT: shlb $5, %al
237 ; SSE-NEXT: movzbl %al, %eax
238 ; SSE-NEXT: movd %eax, %xmm0
241 ; AVX-LABEL: shl_op1_constant:
243 ; AVX-NEXT: movb (%rdi), %al
244 ; AVX-NEXT: shlb $5, %al
245 ; AVX-NEXT: movzbl %al, %eax
246 ; AVX-NEXT: vmovd %eax, %xmm0
250 %r = insertelement <16 x i8> undef, i8 %b, i32 0
254 define <2 x i64> @lshr_op0_constant(i64* %p) nounwind {
255 ; SSE-LABEL: lshr_op0_constant:
257 ; SSE-NEXT: movb (%rdi), %cl
258 ; SSE-NEXT: movl $42, %eax
259 ; SSE-NEXT: shrq %cl, %rax
260 ; SSE-NEXT: movq %rax, %xmm0
263 ; AVX-LABEL: lshr_op0_constant:
265 ; AVX-NEXT: movb (%rdi), %cl
266 ; AVX-NEXT: movl $42, %eax
267 ; AVX-NEXT: shrq %cl, %rax
268 ; AVX-NEXT: vmovq %rax, %xmm0
270 %x = load i64, i64* %p
272 %r = insertelement <2 x i64> undef, i64 %b, i32 0
276 define <4 x i32> @lshr_op1_constant(i32* %p) nounwind {
277 ; SSE-LABEL: lshr_op1_constant:
279 ; SSE-NEXT: movl (%rdi), %eax
280 ; SSE-NEXT: shrl $17, %eax
281 ; SSE-NEXT: movd %eax, %xmm0
284 ; AVX-LABEL: lshr_op1_constant:
286 ; AVX-NEXT: movl (%rdi), %eax
287 ; AVX-NEXT: shrl $17, %eax
288 ; AVX-NEXT: vmovd %eax, %xmm0
290 %x = load i32, i32* %p
292 %r = insertelement <4 x i32> undef, i32 %b, i32 0
296 define <8 x i16> @ashr_op0_constant(i16* %p) nounwind {
297 ; SSE-LABEL: ashr_op0_constant:
299 ; SSE-NEXT: movb (%rdi), %cl
300 ; SSE-NEXT: movl $-42, %eax
301 ; SSE-NEXT: sarl %cl, %eax
302 ; SSE-NEXT: movd %eax, %xmm0
305 ; AVX-LABEL: ashr_op0_constant:
307 ; AVX-NEXT: movb (%rdi), %cl
308 ; AVX-NEXT: movl $-42, %eax
309 ; AVX-NEXT: sarl %cl, %eax
310 ; AVX-NEXT: vmovd %eax, %xmm0
312 %x = load i16, i16* %p
313 %b = ashr i16 -42, %x
314 %r = insertelement <8 x i16> undef, i16 %b, i32 0
318 define <8 x i16> @ashr_op1_constant(i16* %p) nounwind {
319 ; SSE-LABEL: ashr_op1_constant:
321 ; SSE-NEXT: movswl (%rdi), %eax
322 ; SSE-NEXT: sarl $7, %eax
323 ; SSE-NEXT: movd %eax, %xmm0
326 ; AVX-LABEL: ashr_op1_constant:
328 ; AVX-NEXT: movswl (%rdi), %eax
329 ; AVX-NEXT: sarl $7, %eax
330 ; AVX-NEXT: vmovd %eax, %xmm0
332 %x = load i16, i16* %p
334 %r = insertelement <8 x i16> undef, i16 %b, i32 0
338 define <4 x i32> @sdiv_op0_constant(i32* %p) nounwind {
339 ; SSE-LABEL: sdiv_op0_constant:
341 ; SSE-NEXT: movl $42, %eax
342 ; SSE-NEXT: xorl %edx, %edx
343 ; SSE-NEXT: idivl (%rdi)
344 ; SSE-NEXT: movd %eax, %xmm0
347 ; AVX-LABEL: sdiv_op0_constant:
349 ; AVX-NEXT: movl $42, %eax
350 ; AVX-NEXT: xorl %edx, %edx
351 ; AVX-NEXT: idivl (%rdi)
352 ; AVX-NEXT: vmovd %eax, %xmm0
354 %x = load i32, i32* %p
356 %r = insertelement <4 x i32> undef, i32 %b, i32 0
360 define <8 x i16> @sdiv_op1_constant(i16* %p) nounwind {
361 ; SSE-LABEL: sdiv_op1_constant:
363 ; SSE-NEXT: movswl (%rdi), %eax
364 ; SSE-NEXT: imull $-15603, %eax, %ecx # imm = 0xC30D
365 ; SSE-NEXT: shrl $16, %ecx
366 ; SSE-NEXT: addl %eax, %ecx
367 ; SSE-NEXT: movzwl %cx, %eax
368 ; SSE-NEXT: movswl %ax, %ecx
369 ; SSE-NEXT: shrl $15, %eax
370 ; SSE-NEXT: sarl $5, %ecx
371 ; SSE-NEXT: addl %eax, %ecx
372 ; SSE-NEXT: movd %ecx, %xmm0
375 ; AVX-LABEL: sdiv_op1_constant:
377 ; AVX-NEXT: movswl (%rdi), %eax
378 ; AVX-NEXT: imull $-15603, %eax, %ecx # imm = 0xC30D
379 ; AVX-NEXT: shrl $16, %ecx
380 ; AVX-NEXT: addl %eax, %ecx
381 ; AVX-NEXT: movzwl %cx, %eax
382 ; AVX-NEXT: movswl %ax, %ecx
383 ; AVX-NEXT: shrl $15, %eax
384 ; AVX-NEXT: sarl $5, %ecx
385 ; AVX-NEXT: addl %eax, %ecx
386 ; AVX-NEXT: vmovd %ecx, %xmm0
388 %x = load i16, i16* %p
390 %r = insertelement <8 x i16> undef, i16 %b, i32 0
394 define <8 x i16> @srem_op0_constant(i16* %p) nounwind {
395 ; SSE-LABEL: srem_op0_constant:
397 ; SSE-NEXT: movw $42, %ax
398 ; SSE-NEXT: xorl %edx, %edx
399 ; SSE-NEXT: idivw (%rdi)
400 ; SSE-NEXT: # kill: def $dx killed $dx def $edx
401 ; SSE-NEXT: movd %edx, %xmm0
404 ; AVX-LABEL: srem_op0_constant:
406 ; AVX-NEXT: movw $42, %ax
407 ; AVX-NEXT: xorl %edx, %edx
408 ; AVX-NEXT: idivw (%rdi)
409 ; AVX-NEXT: # kill: def $dx killed $dx def $edx
410 ; AVX-NEXT: vmovd %edx, %xmm0
412 %x = load i16, i16* %p
414 %r = insertelement <8 x i16> undef, i16 %b, i32 0
418 define <4 x i32> @srem_op1_constant(i32* %p) nounwind {
419 ; SSE-LABEL: srem_op1_constant:
421 ; SSE-NEXT: movslq (%rdi), %rax
422 ; SSE-NEXT: imulq $818089009, %rax, %rcx # imm = 0x30C30C31
423 ; SSE-NEXT: movq %rcx, %rdx
424 ; SSE-NEXT: shrq $63, %rdx
425 ; SSE-NEXT: sarq $35, %rcx
426 ; SSE-NEXT: addl %edx, %ecx
427 ; SSE-NEXT: imull $42, %ecx, %ecx
428 ; SSE-NEXT: subl %ecx, %eax
429 ; SSE-NEXT: movd %eax, %xmm0
432 ; AVX-LABEL: srem_op1_constant:
434 ; AVX-NEXT: movslq (%rdi), %rax
435 ; AVX-NEXT: imulq $818089009, %rax, %rcx # imm = 0x30C30C31
436 ; AVX-NEXT: movq %rcx, %rdx
437 ; AVX-NEXT: shrq $63, %rdx
438 ; AVX-NEXT: sarq $35, %rcx
439 ; AVX-NEXT: addl %edx, %ecx
440 ; AVX-NEXT: imull $42, %ecx, %ecx
441 ; AVX-NEXT: subl %ecx, %eax
442 ; AVX-NEXT: vmovd %eax, %xmm0
444 %x = load i32, i32* %p
446 %r = insertelement <4 x i32> undef, i32 %b, i32 0
450 define <4 x i32> @udiv_op0_constant(i32* %p) nounwind {
451 ; SSE-LABEL: udiv_op0_constant:
453 ; SSE-NEXT: movl $42, %eax
454 ; SSE-NEXT: xorl %edx, %edx
455 ; SSE-NEXT: divl (%rdi)
456 ; SSE-NEXT: movd %eax, %xmm0
459 ; AVX-LABEL: udiv_op0_constant:
461 ; AVX-NEXT: movl $42, %eax
462 ; AVX-NEXT: xorl %edx, %edx
463 ; AVX-NEXT: divl (%rdi)
464 ; AVX-NEXT: vmovd %eax, %xmm0
466 %x = load i32, i32* %p
468 %r = insertelement <4 x i32> undef, i32 %b, i32 0
472 define <2 x i64> @udiv_op1_constant(i64* %p) nounwind {
473 ; SSE-LABEL: udiv_op1_constant:
475 ; SSE-NEXT: movq (%rdi), %rax
476 ; SSE-NEXT: shrq %rax
477 ; SSE-NEXT: movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D
478 ; SSE-NEXT: mulq %rcx
479 ; SSE-NEXT: shrq $4, %rdx
480 ; SSE-NEXT: movq %rdx, %xmm0
483 ; AVX-LABEL: udiv_op1_constant:
485 ; AVX-NEXT: movq (%rdi), %rax
486 ; AVX-NEXT: shrq %rax
487 ; AVX-NEXT: movabsq $-4392081922311798003, %rcx # imm = 0xC30C30C30C30C30D
488 ; AVX-NEXT: mulq %rcx
489 ; AVX-NEXT: shrq $4, %rdx
490 ; AVX-NEXT: vmovq %rdx, %xmm0
492 %x = load i64, i64* %p
494 %r = insertelement <2 x i64> undef, i64 %b, i32 0
498 define <2 x i64> @urem_op0_constant(i64* %p) nounwind {
499 ; SSE-LABEL: urem_op0_constant:
501 ; SSE-NEXT: movl $42, %eax
502 ; SSE-NEXT: xorl %edx, %edx
503 ; SSE-NEXT: divq (%rdi)
504 ; SSE-NEXT: movq %rdx, %xmm0
507 ; AVX-LABEL: urem_op0_constant:
509 ; AVX-NEXT: movl $42, %eax
510 ; AVX-NEXT: xorl %edx, %edx
511 ; AVX-NEXT: divq (%rdi)
512 ; AVX-NEXT: vmovq %rdx, %xmm0
514 %x = load i64, i64* %p
516 %r = insertelement <2 x i64> undef, i64 %b, i32 0
520 define <16 x i8> @urem_op1_constant(i8* %p) nounwind {
521 ; SSE-LABEL: urem_op1_constant:
523 ; SSE-NEXT: movb (%rdi), %al
524 ; SSE-NEXT: movl %eax, %ecx
526 ; SSE-NEXT: movzbl %cl, %ecx
527 ; SSE-NEXT: imull $49, %ecx, %ecx
528 ; SSE-NEXT: shrl $10, %ecx
529 ; SSE-NEXT: imull $42, %ecx, %ecx
530 ; SSE-NEXT: subb %cl, %al
531 ; SSE-NEXT: movzbl %al, %eax
532 ; SSE-NEXT: movd %eax, %xmm0
535 ; AVX-LABEL: urem_op1_constant:
537 ; AVX-NEXT: movb (%rdi), %al
538 ; AVX-NEXT: movl %eax, %ecx
540 ; AVX-NEXT: movzbl %cl, %ecx
541 ; AVX-NEXT: imull $49, %ecx, %ecx
542 ; AVX-NEXT: shrl $10, %ecx
543 ; AVX-NEXT: imull $42, %ecx, %ecx
544 ; AVX-NEXT: subb %cl, %al
545 ; AVX-NEXT: movzbl %al, %eax
546 ; AVX-NEXT: vmovd %eax, %xmm0
550 %r = insertelement <16 x i8> undef, i8 %b, i32 0
554 define <4 x float> @fadd_op1_constant(float* %p) nounwind {
555 ; SSE-LABEL: fadd_op1_constant:
557 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
558 ; SSE-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
561 ; AVX-LABEL: fadd_op1_constant:
563 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
564 ; AVX-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
566 %x = load float, float* %p
567 %b = fadd float %x, 42.0
568 %r = insertelement <4 x float> undef, float %b, i32 0
572 define <2 x double> @fsub_op1_constant(double* %p) nounwind {
573 ; SSE-LABEL: fsub_op1_constant:
575 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
576 ; SSE-NEXT: addsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
579 ; AVX-LABEL: fsub_op1_constant:
581 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
582 ; AVX-NEXT: vaddsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
584 %x = load double, double* %p
585 %b = fsub double %x, 42.0
586 %r = insertelement <2 x double> undef, double %b, i32 0
590 define <4 x float> @fsub_op0_constant(float* %p) nounwind {
591 ; SSE-LABEL: fsub_op0_constant:
593 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
594 ; SSE-NEXT: subss (%rdi), %xmm0
597 ; AVX-LABEL: fsub_op0_constant:
599 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
600 ; AVX-NEXT: vsubss (%rdi), %xmm0, %xmm0
602 %x = load float, float* %p
603 %b = fsub float 42.0, %x
604 %r = insertelement <4 x float> undef, float %b, i32 0
608 define <4 x float> @fmul_op1_constant(float* %p) nounwind {
609 ; SSE-LABEL: fmul_op1_constant:
611 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
612 ; SSE-NEXT: mulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
615 ; AVX-LABEL: fmul_op1_constant:
617 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
618 ; AVX-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
620 %x = load float, float* %p
621 %b = fmul float %x, 42.0
622 %r = insertelement <4 x float> undef, float %b, i32 0
626 define <2 x double> @fdiv_op1_constant(double* %p) nounwind {
627 ; SSE-LABEL: fdiv_op1_constant:
629 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
630 ; SSE-NEXT: divsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
633 ; AVX-LABEL: fdiv_op1_constant:
635 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
636 ; AVX-NEXT: vdivsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
638 %x = load double, double* %p
639 %b = fdiv double %x, 42.0
640 %r = insertelement <2 x double> undef, double %b, i32 0
644 define <4 x float> @fdiv_op0_constant(float* %p) nounwind {
645 ; SSE-LABEL: fdiv_op0_constant:
647 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
648 ; SSE-NEXT: divss (%rdi), %xmm0
651 ; AVX-LABEL: fdiv_op0_constant:
653 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
654 ; AVX-NEXT: vdivss (%rdi), %xmm0, %xmm0
656 %x = load float, float* %p
657 %b = fdiv float 42.0, %x
658 %r = insertelement <4 x float> undef, float %b, i32 0
662 define <4 x float> @frem_op1_constant(float* %p) nounwind {
663 ; SSE-LABEL: frem_op1_constant:
665 ; SSE-NEXT: pushq %rax
666 ; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
667 ; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
668 ; SSE-NEXT: callq fmodf@PLT
669 ; SSE-NEXT: popq %rax
672 ; AVX-LABEL: frem_op1_constant:
674 ; AVX-NEXT: pushq %rax
675 ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
676 ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
677 ; AVX-NEXT: callq fmodf@PLT
678 ; AVX-NEXT: popq %rax
680 %x = load float, float* %p
681 %b = frem float %x, 42.0
682 %r = insertelement <4 x float> undef, float %b, i32 0
686 define <2 x double> @frem_op0_constant(double* %p) nounwind {
687 ; SSE-LABEL: frem_op0_constant:
689 ; SSE-NEXT: pushq %rax
690 ; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
691 ; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
692 ; SSE-NEXT: callq fmod@PLT
693 ; SSE-NEXT: popq %rax
696 ; AVX-LABEL: frem_op0_constant:
698 ; AVX-NEXT: pushq %rax
699 ; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
700 ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
701 ; AVX-NEXT: callq fmod@PLT
702 ; AVX-NEXT: popq %rax
704 %x = load double, double* %p
705 %b = frem double 42.0, %x
706 %r = insertelement <2 x double> undef, double %b, i32 0
710 ; Try again with 256-bit types.
712 define <8 x i32> @add_op1_constant_v8i32(i32* %p) nounwind {
713 ; SSE-LABEL: add_op1_constant_v8i32:
715 ; SSE-NEXT: movl (%rdi), %eax
716 ; SSE-NEXT: addl $42, %eax
717 ; SSE-NEXT: movd %eax, %xmm0
720 ; AVX-LABEL: add_op1_constant_v8i32:
722 ; AVX-NEXT: movl (%rdi), %eax
723 ; AVX-NEXT: addl $42, %eax
724 ; AVX-NEXT: vmovd %eax, %xmm0
726 %x = load i32, i32* %p
728 %r = insertelement <8 x i32> undef, i32 %b, i32 0
732 define <4 x i64> @sub_op0_constant_v4i64(i64* %p) nounwind {
733 ; SSE-LABEL: sub_op0_constant_v4i64:
735 ; SSE-NEXT: movl $42, %eax
736 ; SSE-NEXT: subq (%rdi), %rax
737 ; SSE-NEXT: movq %rax, %xmm0
740 ; AVX-LABEL: sub_op0_constant_v4i64:
742 ; AVX-NEXT: movl $42, %eax
743 ; AVX-NEXT: subq (%rdi), %rax
744 ; AVX-NEXT: vmovq %rax, %xmm0
746 %x = load i64, i64* %p
748 %r = insertelement <4 x i64> undef, i64 %b, i32 0
752 define <8 x i32> @mul_op1_constant_v8i32(i32* %p) nounwind {
753 ; SSE-LABEL: mul_op1_constant_v8i32:
755 ; SSE-NEXT: imull $42, (%rdi), %eax
756 ; SSE-NEXT: movd %eax, %xmm0
759 ; AVX-LABEL: mul_op1_constant_v8i32:
761 ; AVX-NEXT: imull $42, (%rdi), %eax
762 ; AVX-NEXT: vmovd %eax, %xmm0
764 %x = load i32, i32* %p
766 %r = insertelement <8 x i32> undef, i32 %b, i32 0
770 define <4 x i64> @or_op1_constant_v4i64(i64* %p) nounwind {
771 ; SSE-LABEL: or_op1_constant_v4i64:
773 ; SSE-NEXT: movq (%rdi), %rax
774 ; SSE-NEXT: orq $42, %rax
775 ; SSE-NEXT: movq %rax, %xmm0
778 ; AVX-LABEL: or_op1_constant_v4i64:
780 ; AVX-NEXT: movq (%rdi), %rax
781 ; AVX-NEXT: orq $42, %rax
782 ; AVX-NEXT: vmovq %rax, %xmm0
784 %x = load i64, i64* %p
786 %r = insertelement <4 x i64> undef, i64 %b, i32 0
790 ; Try again with 512-bit types.
792 define <16 x i32> @add_op1_constant_v16i32(i32* %p) nounwind {
793 ; SSE-LABEL: add_op1_constant_v16i32:
795 ; SSE-NEXT: movl (%rdi), %eax
796 ; SSE-NEXT: addl $42, %eax
797 ; SSE-NEXT: movd %eax, %xmm0
800 ; AVX-LABEL: add_op1_constant_v16i32:
802 ; AVX-NEXT: movl (%rdi), %eax
803 ; AVX-NEXT: addl $42, %eax
804 ; AVX-NEXT: vmovd %eax, %xmm0
806 %x = load i32, i32* %p
808 %r = insertelement <16 x i32> undef, i32 %b, i32 0
812 define <8 x i64> @sub_op0_constant_v8i64(i64* %p) nounwind {
813 ; SSE-LABEL: sub_op0_constant_v8i64:
815 ; SSE-NEXT: movl $42, %eax
816 ; SSE-NEXT: subq (%rdi), %rax
817 ; SSE-NEXT: movq %rax, %xmm0
820 ; AVX-LABEL: sub_op0_constant_v8i64:
822 ; AVX-NEXT: movl $42, %eax
823 ; AVX-NEXT: subq (%rdi), %rax
824 ; AVX-NEXT: vmovq %rax, %xmm0
826 %x = load i64, i64* %p
828 %r = insertelement <8 x i64> undef, i64 %b, i32 0
832 define <16 x i32> @mul_op1_constant_v16i32(i32* %p) nounwind {
833 ; SSE-LABEL: mul_op1_constant_v16i32:
835 ; SSE-NEXT: imull $42, (%rdi), %eax
836 ; SSE-NEXT: movd %eax, %xmm0
839 ; AVX-LABEL: mul_op1_constant_v16i32:
841 ; AVX-NEXT: imull $42, (%rdi), %eax
842 ; AVX-NEXT: vmovd %eax, %xmm0
844 %x = load i32, i32* %p
846 %r = insertelement <16 x i32> undef, i32 %b, i32 0
850 define <8 x i64> @or_op1_constant_v8i64(i64* %p) nounwind {
851 ; SSE-LABEL: or_op1_constant_v8i64:
853 ; SSE-NEXT: movq (%rdi), %rax
854 ; SSE-NEXT: orq $42, %rax
855 ; SSE-NEXT: movq %rax, %xmm0
858 ; AVX-LABEL: or_op1_constant_v8i64:
860 ; AVX-NEXT: movq (%rdi), %rax
861 ; AVX-NEXT: orq $42, %rax
862 ; AVX-NEXT: vmovq %rax, %xmm0
864 %x = load i64, i64* %p
866 %r = insertelement <8 x i64> undef, i64 %b, i32 0