1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefix=X64
5 define i32 @test1(i32 %x) {
8 ; X86-NEXT: imull $-1030792151, {{[0-9]+}}(%esp), %eax # imm = 0xC28F5C29
13 ; X64-NEXT: imull $-1030792151, %edi, %eax # imm = 0xC28F5C29
15 %div = udiv exact i32 %x, 25
19 define i32 @test2(i32 %x) {
22 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
23 ; X86-NEXT: shrl $3, %eax
24 ; X86-NEXT: imull $-1431655765, %eax, %eax # imm = 0xAAAAAAAB
29 ; X64-NEXT: shrl $3, %edi
30 ; X64-NEXT: imull $-1431655765, %edi, %eax # imm = 0xAAAAAAAB
32 %div = udiv exact i32 %x, 24
36 define <4 x i32> @test3(<4 x i32> %x) {
39 ; X86-NEXT: psrld $3, %xmm0
40 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
41 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
42 ; X86-NEXT: pmuludq %xmm1, %xmm0
43 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
44 ; X86-NEXT: pmuludq %xmm1, %xmm2
45 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
46 ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
51 ; X64-NEXT: vpsrld $3, %xmm0, %xmm0
52 ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2863311531,2863311531,2863311531,2863311531]
53 ; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
55 %div = udiv exact <4 x i32> %x, <i32 24, i32 24, i32 24, i32 24>
59 define <4 x i32> @test4(<4 x i32> %x) {
62 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
63 ; X86-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
64 ; X86-NEXT: pmuludq %xmm1, %xmm0
65 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
66 ; X86-NEXT: pmuludq %xmm1, %xmm2
67 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
68 ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
73 ; X64-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
74 ; X64-NEXT: vpmulld %xmm1, %xmm0, %xmm0
76 %div = udiv exact <4 x i32> %x, <i32 25, i32 25, i32 25, i32 25>
80 define <4 x i32> @test5(<4 x i32> %x) {
83 ; X86-NEXT: movdqa %xmm0, %xmm1
84 ; X86-NEXT: psrld $3, %xmm1
85 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3]
86 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [2863311531,2863311531,3264175145,3264175145]
87 ; X86-NEXT: pmuludq %xmm1, %xmm0
88 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
89 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
90 ; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
91 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
92 ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
97 ; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
98 ; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
100 %div = udiv exact <4 x i32> %x, <i32 24, i32 24, i32 25, i32 25>
104 define <4 x i32> @test6(<4 x i32> %x) {
107 ; X86-NEXT: movdqa %xmm0, %xmm1
108 ; X86-NEXT: psrld $3, %xmm1
109 ; X86-NEXT: psrld $1, %xmm0
110 ; X86-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
111 ; X86-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,2863311531,3303820997,3303820997]
112 ; X86-NEXT: pmuludq %xmm0, %xmm1
113 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
114 ; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,3,3]
115 ; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
116 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
117 ; X86-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
118 ; X86-NEXT: movdqa %xmm1, %xmm0
123 ; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
124 ; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
126 %div = udiv exact <4 x i32> %x, <i32 24, i32 24, i32 26, i32 26>
130 define <4 x i32> @test7(<4 x i32> %x) {
133 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
134 ; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
135 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
136 ; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
137 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
138 ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
143 ; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
145 %div = udiv exact <4 x i32> %x, <i32 25, i32 25, i32 27, i32 27>
149 define <4 x i32> @test8(<4 x i32> %x) {
152 ; X86-NEXT: movdqa %xmm0, %xmm1
153 ; X86-NEXT: psrld $3, %xmm1
154 ; X86-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1]
155 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [1,1,2863311531,2863311531]
156 ; X86-NEXT: pmuludq %xmm1, %xmm0
157 ; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
158 ; X86-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3]
159 ; X86-NEXT: pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
160 ; X86-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
161 ; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
166 ; X64-NEXT: vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
167 ; X64-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
169 %div = udiv exact <4 x i32> %x, <i32 1, i32 1, i32 24, i32 24>