1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
4 define <4 x i32> @foo(<4 x i32> %t, <4 x i32> %u) nounwind {
7 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
8 ; CHECK-NEXT: movd %xmm2, %eax
9 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
10 ; CHECK-NEXT: movd %xmm2, %ecx
12 ; CHECK-NEXT: idivl %ecx
13 ; CHECK-NEXT: movd %edx, %xmm2
14 ; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
15 ; CHECK-NEXT: movd %xmm3, %eax
16 ; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
17 ; CHECK-NEXT: movd %xmm3, %ecx
19 ; CHECK-NEXT: idivl %ecx
20 ; CHECK-NEXT: movd %edx, %xmm3
21 ; CHECK-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
22 ; CHECK-NEXT: movd %xmm0, %eax
23 ; CHECK-NEXT: movd %xmm1, %ecx
25 ; CHECK-NEXT: idivl %ecx
26 ; CHECK-NEXT: movd %edx, %xmm2
27 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
28 ; CHECK-NEXT: movd %xmm0, %eax
29 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
30 ; CHECK-NEXT: movd %xmm0, %ecx
32 ; CHECK-NEXT: idivl %ecx
33 ; CHECK-NEXT: movd %edx, %xmm0
34 ; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
35 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
36 ; CHECK-NEXT: movdqa %xmm2, %xmm0
38 %m = srem <4 x i32> %t, %u
42 define <4 x i32> @bar(<4 x i32> %t, <4 x i32> %u) nounwind {
45 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[3,1,2,3]
46 ; CHECK-NEXT: movd %xmm2, %eax
47 ; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm1[3,1,2,3]
48 ; CHECK-NEXT: movd %xmm2, %ecx
49 ; CHECK-NEXT: xorl %edx, %edx
50 ; CHECK-NEXT: divl %ecx
51 ; CHECK-NEXT: movd %edx, %xmm2
52 ; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,0,1]
53 ; CHECK-NEXT: movd %xmm3, %eax
54 ; CHECK-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
55 ; CHECK-NEXT: movd %xmm3, %ecx
56 ; CHECK-NEXT: xorl %edx, %edx
57 ; CHECK-NEXT: divl %ecx
58 ; CHECK-NEXT: movd %edx, %xmm3
59 ; CHECK-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
60 ; CHECK-NEXT: movd %xmm0, %eax
61 ; CHECK-NEXT: movd %xmm1, %ecx
62 ; CHECK-NEXT: xorl %edx, %edx
63 ; CHECK-NEXT: divl %ecx
64 ; CHECK-NEXT: movd %edx, %xmm2
65 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
66 ; CHECK-NEXT: movd %xmm0, %eax
67 ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
68 ; CHECK-NEXT: movd %xmm0, %ecx
69 ; CHECK-NEXT: xorl %edx, %edx
70 ; CHECK-NEXT: divl %ecx
71 ; CHECK-NEXT: movd %edx, %xmm0
72 ; CHECK-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
73 ; CHECK-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
74 ; CHECK-NEXT: movdqa %xmm2, %xmm0
76 %m = urem <4 x i32> %t, %u
80 define <4 x float> @qux(<4 x float> %t, <4 x float> %u) nounwind {
83 ; CHECK-NEXT: subq $72, %rsp
84 ; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill
85 ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
86 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
87 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
88 ; CHECK-NEXT: callq fmodf
89 ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
90 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
91 ; CHECK-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
92 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
93 ; CHECK-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1]
94 ; CHECK-NEXT: callq fmodf
95 ; CHECK-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload
96 ; CHECK-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
97 ; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill
98 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
99 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
100 ; CHECK-NEXT: callq fmodf
101 ; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill
102 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload
103 ; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
104 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
105 ; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3]
106 ; CHECK-NEXT: callq fmodf
107 ; CHECK-NEXT: movaps {{[0-9]+}}(%rsp), %xmm1 # 16-byte Reload
108 ; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
109 ; CHECK-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload
110 ; CHECK-NEXT: # xmm1 = xmm1[0],mem[0]
111 ; CHECK-NEXT: movaps %xmm1, %xmm0
112 ; CHECK-NEXT: addq $72, %rsp
114 %m = frem <4 x float> %t, %u