1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE41
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1
5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
6 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512VL
9 define <4 x i32> @test_srem_odd_even(<4 x i32> %X) nounwind {
10 ; CHECK-SSE2-LABEL: test_srem_odd_even:
11 ; CHECK-SSE2: # %bb.0:
12 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,0,0]
13 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
14 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
15 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
16 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
17 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
18 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
19 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
20 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
21 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,1374389535,1374389535]
22 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
23 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
24 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
25 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
26 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
27 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
28 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
29 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
30 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
31 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
32 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
33 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
34 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
35 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
36 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
37 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
38 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
39 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
40 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
41 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
42 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
43 ; CHECK-SSE2-NEXT: psrad $1, %xmm5
44 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
45 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
46 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
47 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
48 ; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2
49 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,25,100]
50 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
51 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
52 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
53 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
54 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
55 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
56 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
57 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
58 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
59 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
60 ; CHECK-SSE2-NEXT: retq
62 ; CHECK-SSE41-LABEL: test_srem_odd_even:
63 ; CHECK-SSE41: # %bb.0:
64 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535]
65 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
66 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
67 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
68 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
69 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
70 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
71 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,0,0]
72 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
73 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
74 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
75 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
76 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
77 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
78 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
79 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4
80 ; CHECK-SSE41-NEXT: psrad $1, %xmm4
81 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7]
82 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm1[2,3],xmm4[4,5],xmm1[6,7]
83 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
84 ; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2
85 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
86 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
87 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
88 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
89 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
90 ; CHECK-SSE41-NEXT: retq
92 ; CHECK-AVX1-LABEL: test_srem_odd_even:
93 ; CHECK-AVX1: # %bb.0:
94 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535]
95 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
96 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
97 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
98 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
99 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
100 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
101 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
102 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
103 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
104 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
105 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
106 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4
107 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
108 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
109 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
110 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
111 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
112 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
113 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
114 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
115 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
116 ; CHECK-AVX1-NEXT: retq
118 ; CHECK-AVX2-LABEL: test_srem_odd_even:
119 ; CHECK-AVX2: # %bb.0:
120 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,1374389535,1374389535]
121 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
122 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
123 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
124 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
125 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
126 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
127 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
128 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
129 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
130 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
131 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
132 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
133 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
134 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
135 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
136 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
137 ; CHECK-AVX2-NEXT: retq
139 ; CHECK-AVX512VL-LABEL: test_srem_odd_even:
140 ; CHECK-AVX512VL: # %bb.0:
141 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
142 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
143 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
144 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
145 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
146 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
147 ; CHECK-AVX512VL-NEXT: retq
148 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 25, i32 100>
149 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
150 %ret = zext <4 x i1> %cmp to <4 x i32>
154 ;==============================================================================;
156 ; One all-ones divisor in odd divisor
157 define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind {
158 ; CHECK-SSE2-LABEL: test_srem_odd_allones_eq:
159 ; CHECK-SSE2: # %bb.0:
160 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
161 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
162 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
163 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
164 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
165 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
166 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
167 ; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
168 ; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
169 ; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
170 ; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
171 ; CHECK-SSE2-NEXT: retq
173 ; CHECK-SSE41-LABEL: test_srem_odd_allones_eq:
174 ; CHECK-SSE41: # %bb.0:
175 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
176 ; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0
177 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
178 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
179 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
180 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
181 ; CHECK-SSE41-NEXT: retq
183 ; CHECK-AVX1-LABEL: test_srem_odd_allones_eq:
184 ; CHECK-AVX1: # %bb.0:
185 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
186 ; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
187 ; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
188 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
189 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
190 ; CHECK-AVX1-NEXT: retq
192 ; CHECK-AVX2-LABEL: test_srem_odd_allones_eq:
193 ; CHECK-AVX2: # %bb.0:
194 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
195 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
196 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
197 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
198 ; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
199 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
200 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
201 ; CHECK-AVX2-NEXT: retq
203 ; CHECK-AVX512VL-LABEL: test_srem_odd_allones_eq:
204 ; CHECK-AVX512VL: # %bb.0:
205 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
206 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
207 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
208 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
209 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
210 ; CHECK-AVX512VL-NEXT: retq
211 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
212 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
213 %ret = zext <4 x i1> %cmp to <4 x i32>
216 define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind {
217 ; CHECK-SSE2-LABEL: test_srem_odd_allones_ne:
218 ; CHECK-SSE2: # %bb.0:
219 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
220 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
221 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
222 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
223 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
224 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
225 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
226 ; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
227 ; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
228 ; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
229 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
230 ; CHECK-SSE2-NEXT: retq
232 ; CHECK-SSE41-LABEL: test_srem_odd_allones_ne:
233 ; CHECK-SSE41: # %bb.0:
234 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
235 ; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0
236 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
237 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
238 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
239 ; CHECK-SSE41-NEXT: pandn {{.*}}(%rip), %xmm0
240 ; CHECK-SSE41-NEXT: retq
242 ; CHECK-AVX1-LABEL: test_srem_odd_allones_ne:
243 ; CHECK-AVX1: # %bb.0:
244 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
245 ; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
246 ; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
247 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
248 ; CHECK-AVX1-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0
249 ; CHECK-AVX1-NEXT: retq
251 ; CHECK-AVX2-LABEL: test_srem_odd_allones_ne:
252 ; CHECK-AVX2: # %bb.0:
253 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
254 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
255 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
256 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
257 ; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
258 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
259 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
260 ; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
261 ; CHECK-AVX2-NEXT: retq
263 ; CHECK-AVX512VL-LABEL: test_srem_odd_allones_ne:
264 ; CHECK-AVX512VL: # %bb.0:
265 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
266 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
267 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
268 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
269 ; CHECK-AVX512VL-NEXT: vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0
270 ; CHECK-AVX512VL-NEXT: retq
271 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 4294967295, i32 5>
272 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
273 %ret = zext <4 x i1> %cmp to <4 x i32>
277 ; One all-ones divisor in even divisor
278 define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind {
279 ; CHECK-SSE2-LABEL: test_srem_even_allones_eq:
280 ; CHECK-SSE2: # %bb.0:
281 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295]
282 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
283 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm3
284 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
285 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
286 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
287 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027]
288 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
289 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4
290 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm5
291 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,3,2,3]
292 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
293 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2454267027,2454267027,2454267027,2454267027]
294 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm6
295 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,3,2,3]
296 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1]
297 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3
298 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <1,u,4294967295,u>
299 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm4
300 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
301 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm5
302 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
303 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1]
304 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4
305 ; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm3
306 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
307 ; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm5
308 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm3[3,0]
309 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm5[0,2]
310 ; CHECK-SSE2-NEXT: psrld $31, %xmm4
311 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm4
312 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4
313 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
314 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm4
315 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
316 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
317 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
318 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
319 ; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
320 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
321 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
322 ; CHECK-SSE2-NEXT: retq
324 ; CHECK-SSE41-LABEL: test_srem_even_allones_eq:
325 ; CHECK-SSE41: # %bb.0:
326 ; CHECK-SSE41-NEXT: movl $-1840700269, %eax # imm = 0x92492493
327 ; CHECK-SSE41-NEXT: movd %eax, %xmm1
328 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
329 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
330 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
331 ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm2
332 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
333 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,4294967295,1]
334 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1
335 ; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
336 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
337 ; CHECK-SSE41-NEXT: psrad $3, %xmm2
338 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
339 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
340 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
341 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
342 ; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
343 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
344 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
345 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
346 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
347 ; CHECK-SSE41-NEXT: retq
349 ; CHECK-AVX1-LABEL: test_srem_even_allones_eq:
350 ; CHECK-AVX1: # %bb.0:
351 ; CHECK-AVX1-NEXT: movl $-1840700269, %eax # imm = 0x92492493
352 ; CHECK-AVX1-NEXT: vmovd %eax, %xmm1
353 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
354 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
355 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
356 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm2, %xmm2
357 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
358 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
359 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
360 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
361 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
362 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
363 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
364 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
365 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
366 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
367 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
368 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
369 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
370 ; CHECK-AVX1-NEXT: retq
372 ; CHECK-AVX2-LABEL: test_srem_even_allones_eq:
373 ; CHECK-AVX2: # %bb.0:
374 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
375 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
376 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
377 ; CHECK-AVX2-NEXT: movl $-1840700269, %eax # imm = 0x92492493
378 ; CHECK-AVX2-NEXT: vmovd %eax, %xmm2
379 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
380 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
381 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
382 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
383 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
384 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
385 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
386 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
387 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
388 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
389 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
390 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
391 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
392 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
393 ; CHECK-AVX2-NEXT: retq
395 ; CHECK-AVX512VL-LABEL: test_srem_even_allones_eq:
396 ; CHECK-AVX512VL: # %bb.0:
397 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
398 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
399 ; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0
400 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
401 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
402 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
403 ; CHECK-AVX512VL-NEXT: retq
404 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
405 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
406 %ret = zext <4 x i1> %cmp to <4 x i32>
409 define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind {
410 ; CHECK-SSE2-LABEL: test_srem_even_allones_ne:
411 ; CHECK-SSE2: # %bb.0:
412 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295]
413 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
414 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm3
415 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
416 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
417 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
418 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027]
419 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
420 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4
421 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm5
422 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
423 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
424 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
425 ; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm3
426 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
427 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1]
428 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm5
429 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1]
430 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm6
431 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3]
432 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm6 = <1,u,4294967295,u>
433 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm6
434 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[0,2,2,3]
435 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1]
436 ; CHECK-SSE2-NEXT: paddd %xmm5, %xmm6
437 ; CHECK-SSE2-NEXT: movdqa %xmm6, %xmm4
438 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
439 ; CHECK-SSE2-NEXT: movdqa %xmm6, %xmm5
440 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0]
441 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2]
442 ; CHECK-SSE2-NEXT: psrld $31, %xmm6
443 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm6
444 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm6
445 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
446 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm6
447 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3]
448 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
449 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
450 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
451 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm0
452 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
453 ; CHECK-SSE2-NEXT: pandn %xmm3, %xmm0
454 ; CHECK-SSE2-NEXT: retq
456 ; CHECK-SSE41-LABEL: test_srem_even_allones_ne:
457 ; CHECK-SSE41: # %bb.0:
458 ; CHECK-SSE41-NEXT: movl $-1840700269, %eax # imm = 0x92492493
459 ; CHECK-SSE41-NEXT: movd %eax, %xmm1
460 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
461 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
462 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
463 ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm2
464 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
465 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,4294967295,1]
466 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1
467 ; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
468 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
469 ; CHECK-SSE41-NEXT: psrad $3, %xmm2
470 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
471 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
472 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
473 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
474 ; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
475 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
476 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
477 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
478 ; CHECK-SSE41-NEXT: pandn {{.*}}(%rip), %xmm0
479 ; CHECK-SSE41-NEXT: retq
481 ; CHECK-AVX1-LABEL: test_srem_even_allones_ne:
482 ; CHECK-AVX1: # %bb.0:
483 ; CHECK-AVX1-NEXT: movl $-1840700269, %eax # imm = 0x92492493
484 ; CHECK-AVX1-NEXT: vmovd %eax, %xmm1
485 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
486 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
487 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
488 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm2, %xmm2
489 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
490 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
491 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
492 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
493 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
494 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
495 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
496 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
497 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
498 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
499 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
500 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
501 ; CHECK-AVX1-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0
502 ; CHECK-AVX1-NEXT: retq
504 ; CHECK-AVX2-LABEL: test_srem_even_allones_ne:
505 ; CHECK-AVX2: # %bb.0:
506 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
507 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
508 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
509 ; CHECK-AVX2-NEXT: movl $-1840700269, %eax # imm = 0x92492493
510 ; CHECK-AVX2-NEXT: vmovd %eax, %xmm2
511 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
512 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
513 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
514 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
515 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
516 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
517 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
518 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
519 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
520 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
521 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
522 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
523 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
524 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
525 ; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
526 ; CHECK-AVX2-NEXT: retq
528 ; CHECK-AVX512VL-LABEL: test_srem_even_allones_ne:
529 ; CHECK-AVX512VL: # %bb.0:
530 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
531 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
532 ; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0
533 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
534 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
535 ; CHECK-AVX512VL-NEXT: vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0
536 ; CHECK-AVX512VL-NEXT: retq
537 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
538 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
539 %ret = zext <4 x i1> %cmp to <4 x i32>
543 ; One all-ones divisor in odd+even divisor
544 define <4 x i32> @test_srem_odd_even_allones_eq(<4 x i32> %X) nounwind {
545 ; CHECK-SSE2-LABEL: test_srem_odd_even_allones_eq:
546 ; CHECK-SSE2: # %bb.0:
547 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4294967295,0]
548 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
549 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
550 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
551 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
552 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
553 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
554 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
555 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
556 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,0,1374389535]
557 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
558 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
559 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
560 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
561 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
562 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
563 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
564 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
565 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
566 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
567 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
568 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
569 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
570 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
571 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
572 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
573 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
574 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
575 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
576 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
577 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
578 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
579 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
580 ; CHECK-SSE2-NEXT: psrad $1, %xmm5
581 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
582 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
583 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
584 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
585 ; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2
586 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,4294967295,100]
587 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
588 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
589 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
590 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
591 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
592 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
593 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
594 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
595 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
596 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
597 ; CHECK-SSE2-NEXT: retq
599 ; CHECK-SSE41-LABEL: test_srem_odd_even_allones_eq:
600 ; CHECK-SSE41: # %bb.0:
601 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
602 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
603 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
604 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
605 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
606 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
607 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
608 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4294967295,0]
609 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
610 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
611 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
612 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
613 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
614 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
615 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
616 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
617 ; CHECK-SSE41-NEXT: psrad $1, %xmm1
618 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
619 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
620 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
621 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
622 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
623 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
624 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
625 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
626 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
627 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
628 ; CHECK-SSE41-NEXT: retq
630 ; CHECK-AVX1-LABEL: test_srem_odd_even_allones_eq:
631 ; CHECK-AVX1: # %bb.0:
632 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
633 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
634 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
635 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
636 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
637 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
638 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
639 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
640 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
641 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
642 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
643 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
644 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
645 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
646 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
647 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
648 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
649 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
650 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
651 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
652 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
653 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
654 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
655 ; CHECK-AVX1-NEXT: retq
657 ; CHECK-AVX2-LABEL: test_srem_odd_even_allones_eq:
658 ; CHECK-AVX2: # %bb.0:
659 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
660 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
661 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
662 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
663 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
664 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
665 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
666 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
667 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
668 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
669 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
670 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
671 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
672 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
673 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
674 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
675 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
676 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
677 ; CHECK-AVX2-NEXT: retq
679 ; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_eq:
680 ; CHECK-AVX512VL: # %bb.0:
681 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
682 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
683 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
684 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
685 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
686 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
687 ; CHECK-AVX512VL-NEXT: retq
688 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
689 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
690 %ret = zext <4 x i1> %cmp to <4 x i32>
693 define <4 x i32> @test_srem_odd_even_allones_ne(<4 x i32> %X) nounwind {
694 ; CHECK-SSE2-LABEL: test_srem_odd_even_allones_ne:
695 ; CHECK-SSE2: # %bb.0:
696 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4294967295,0]
697 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
698 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
699 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
700 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
701 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
702 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
703 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
704 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
705 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,0,1374389535]
706 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
707 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
708 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
709 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
710 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
711 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
712 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
713 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
714 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
715 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
716 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
717 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
718 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
719 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
720 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
721 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
722 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
723 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
724 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
725 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
726 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
727 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
728 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
729 ; CHECK-SSE2-NEXT: psrad $1, %xmm5
730 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
731 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
732 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
733 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
734 ; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2
735 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,4294967295,100]
736 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
737 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
738 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
739 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
740 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
741 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
742 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
743 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
744 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
745 ; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
746 ; CHECK-SSE2-NEXT: retq
748 ; CHECK-SSE41-LABEL: test_srem_odd_even_allones_ne:
749 ; CHECK-SSE41: # %bb.0:
750 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
751 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
752 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
753 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
754 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
755 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
756 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
757 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4294967295,0]
758 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
759 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
760 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
761 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
762 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
763 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
764 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
765 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
766 ; CHECK-SSE41-NEXT: psrad $1, %xmm1
767 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
768 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
769 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
770 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
771 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
772 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
773 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
774 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
775 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
776 ; CHECK-SSE41-NEXT: pandn {{.*}}(%rip), %xmm0
777 ; CHECK-SSE41-NEXT: retq
779 ; CHECK-AVX1-LABEL: test_srem_odd_even_allones_ne:
780 ; CHECK-AVX1: # %bb.0:
781 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
782 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
783 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
784 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
785 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
786 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
787 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
788 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
789 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
790 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
791 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
792 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
793 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
794 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
795 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
796 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
797 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
798 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
799 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
800 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
801 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
802 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
803 ; CHECK-AVX1-NEXT: vpandn {{.*}}(%rip), %xmm0, %xmm0
804 ; CHECK-AVX1-NEXT: retq
806 ; CHECK-AVX2-LABEL: test_srem_odd_even_allones_ne:
807 ; CHECK-AVX2: # %bb.0:
808 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
809 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
810 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
811 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
812 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
813 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
814 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
815 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
816 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
817 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
818 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
819 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
820 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
821 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
822 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
823 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
824 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
825 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [1,1,1,1]
826 ; CHECK-AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
827 ; CHECK-AVX2-NEXT: retq
829 ; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_ne:
830 ; CHECK-AVX512VL: # %bb.0:
831 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
832 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
833 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
834 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
835 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
836 ; CHECK-AVX512VL-NEXT: vpandnd {{.*}}(%rip){1to4}, %xmm0, %xmm0
837 ; CHECK-AVX512VL-NEXT: retq
838 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 4294967295, i32 100>
839 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
840 %ret = zext <4 x i1> %cmp to <4 x i32>
844 ;------------------------------------------------------------------------------;
846 ; One power-of-two divisor in odd divisor
847 define <4 x i32> @test_srem_odd_poweroftwo(<4 x i32> %X) nounwind {
848 ; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo:
849 ; CHECK-SSE2: # %bb.0:
850 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
851 ; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2
852 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
853 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1717986919,1717986919,2147483649,1717986919]
854 ; CHECK-SSE2-NEXT: pand %xmm3, %xmm2
855 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [0,0,4294967295,0]
856 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm4
857 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4
858 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3
859 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
860 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
861 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
862 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
863 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
864 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
865 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = <0,u,1,u>
866 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3
867 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm3
868 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm3
869 ; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm2
870 ; CHECK-SSE2-NEXT: psrad $1, %xmm2
871 ; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm4
872 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
873 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[2,0],xmm2[3,0]
874 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm4[0,2]
875 ; CHECK-SSE2-NEXT: psrld $31, %xmm3
876 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm3
877 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
878 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
879 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
880 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
881 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
882 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
883 ; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
884 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
885 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
886 ; CHECK-SSE2-NEXT: retq
888 ; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo:
889 ; CHECK-SSE41: # %bb.0:
890 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
891 ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1
892 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <1717986919,u,2147483649,u>
893 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
894 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
895 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
896 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,0,1,0]
897 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1
898 ; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
899 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
900 ; CHECK-SSE41-NEXT: psrad $3, %xmm2
901 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm3
902 ; CHECK-SSE41-NEXT: psrad $1, %xmm3
903 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
904 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
905 ; CHECK-SSE41-NEXT: paddd %xmm3, %xmm1
906 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
907 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
908 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
909 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
910 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
911 ; CHECK-SSE41-NEXT: retq
913 ; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo:
914 ; CHECK-AVX1: # %bb.0:
915 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
916 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1
917 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
918 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
919 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
920 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
921 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
922 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
923 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
924 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
925 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
926 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
927 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
928 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
929 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
930 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
931 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
932 ; CHECK-AVX1-NEXT: retq
934 ; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo:
935 ; CHECK-AVX2: # %bb.0:
936 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
937 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1717986919,1717986919,1717986919,1717986919]
938 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
939 ; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
940 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
941 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
942 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
943 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
944 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
945 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
946 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
947 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
948 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
949 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
950 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
951 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
952 ; CHECK-AVX2-NEXT: retq
954 ; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo:
955 ; CHECK-AVX512VL: # %bb.0:
956 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
957 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
958 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
959 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
960 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
961 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
962 ; CHECK-AVX512VL-NEXT: retq
963 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 16, i32 5>
964 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
965 %ret = zext <4 x i1> %cmp to <4 x i32>
969 ; One power-of-two divisor in even divisor
970 define <4 x i32> @test_srem_even_poweroftwo(<4 x i32> %X) nounwind {
971 ; CHECK-SSE2-LABEL: test_srem_even_poweroftwo:
972 ; CHECK-SSE2: # %bb.0:
973 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
974 ; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2
975 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
976 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2147483649,2454267027]
977 ; CHECK-SSE2-NEXT: pand %xmm3, %xmm2
978 ; CHECK-SSE2-NEXT: paddd %xmm0, %xmm2
979 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3
980 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
981 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
982 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm4
983 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3]
984 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
985 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm3
986 ; CHECK-SSE2-NEXT: paddd %xmm0, %xmm3
987 ; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm2
988 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
989 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
990 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm3
991 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
992 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
993 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
994 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
995 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
996 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
997 ; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
998 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
999 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
1000 ; CHECK-SSE2-NEXT: retq
1002 ; CHECK-SSE41-LABEL: test_srem_even_poweroftwo:
1003 ; CHECK-SSE41: # %bb.0:
1004 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1005 ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1
1006 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,2147483649,u>
1007 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
1008 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1009 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1010 ; CHECK-SSE41-NEXT: paddd %xmm0, %xmm2
1011 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
1012 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
1013 ; CHECK-SSE41-NEXT: psrad $3, %xmm2
1014 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
1015 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
1016 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
1017 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
1018 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1019 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
1020 ; CHECK-SSE41-NEXT: retq
1022 ; CHECK-AVX1-LABEL: test_srem_even_poweroftwo:
1023 ; CHECK-AVX1: # %bb.0:
1024 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1025 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1
1026 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1027 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1028 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1029 ; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1
1030 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
1031 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm1
1032 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1033 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1034 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1035 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1036 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1037 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1038 ; CHECK-AVX1-NEXT: retq
1040 ; CHECK-AVX2-LABEL: test_srem_even_poweroftwo:
1041 ; CHECK-AVX2: # %bb.0:
1042 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1043 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
1044 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
1045 ; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1046 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1047 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
1048 ; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm1
1049 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
1050 ; CHECK-AVX2-NEXT: vpsrad $3, %xmm1, %xmm1
1051 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1052 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1053 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1054 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1055 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1056 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
1057 ; CHECK-AVX2-NEXT: retq
1059 ; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo:
1060 ; CHECK-AVX512VL: # %bb.0:
1061 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
1062 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
1063 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
1064 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
1065 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1066 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
1067 ; CHECK-AVX512VL-NEXT: retq
1068 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 16, i32 14>
1069 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1070 %ret = zext <4 x i1> %cmp to <4 x i32>
1074 ; One power-of-two divisor in odd+even divisor
1075 define <4 x i32> @test_srem_odd_even_poweroftwo(<4 x i32> %X) nounwind {
1076 ; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo:
1077 ; CHECK-SSE2: # %bb.0:
1078 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,0]
1079 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
1080 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
1081 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
1082 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1083 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1084 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
1085 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1086 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1087 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,2147483649,1374389535]
1088 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
1089 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
1090 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1091 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
1092 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
1093 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1094 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1095 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
1096 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
1097 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1098 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
1099 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,4294967295,0]
1100 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
1101 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
1102 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
1103 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
1104 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
1105 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
1106 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
1107 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
1108 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
1109 ; CHECK-SSE2-NEXT: psrad $1, %xmm5
1110 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0]
1111 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
1112 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
1113 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
1114 ; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2
1115 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,16,100]
1116 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1117 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
1118 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1119 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1120 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
1121 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1122 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1123 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
1124 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1125 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
1126 ; CHECK-SSE2-NEXT: retq
1128 ; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo:
1129 ; CHECK-SSE41: # %bb.0:
1130 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535]
1131 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1132 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1133 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
1134 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
1135 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1136 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1137 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,1,0]
1138 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
1139 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
1140 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
1141 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
1142 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
1143 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
1144 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1145 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4
1146 ; CHECK-SSE41-NEXT: psrad $1, %xmm4
1147 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1148 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm1[2,3],xmm4[4,5],xmm1[6,7]
1149 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
1150 ; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2
1151 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
1152 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
1153 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
1154 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1155 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
1156 ; CHECK-SSE41-NEXT: retq
1158 ; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo:
1159 ; CHECK-AVX1: # %bb.0:
1160 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535]
1161 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1162 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1163 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
1164 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
1165 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1166 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1167 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1168 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1169 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
1170 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
1171 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1172 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4
1173 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1174 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1175 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
1176 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
1177 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1178 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1179 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1180 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1181 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1182 ; CHECK-AVX1-NEXT: retq
1184 ; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo:
1185 ; CHECK-AVX2: # %bb.0:
1186 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483649,1374389535]
1187 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1188 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1189 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
1190 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
1191 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1192 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1193 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1194 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1195 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
1196 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
1197 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1198 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1199 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1200 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1201 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1202 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
1203 ; CHECK-AVX2-NEXT: retq
1205 ; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo:
1206 ; CHECK-AVX512VL: # %bb.0:
1207 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
1208 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
1209 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
1210 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
1211 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1212 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
1213 ; CHECK-AVX512VL-NEXT: retq
1214 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 16, i32 100>
1215 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1216 %ret = zext <4 x i1> %cmp to <4 x i32>
1220 ;------------------------------------------------------------------------------;
1222 ; One one divisor in odd divisor
1223 define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind {
1224 ; CHECK-SSE2-LABEL: test_srem_odd_one:
1225 ; CHECK-SSE2: # %bb.0:
1226 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
1227 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1228 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
1229 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
1230 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
1231 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
1232 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
1233 ; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
1234 ; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
1235 ; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
1236 ; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
1237 ; CHECK-SSE2-NEXT: retq
1239 ; CHECK-SSE41-LABEL: test_srem_odd_one:
1240 ; CHECK-SSE41: # %bb.0:
1241 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
1242 ; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0
1243 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,4294967295,858993458]
1244 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
1245 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1246 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
1247 ; CHECK-SSE41-NEXT: retq
1249 ; CHECK-AVX1-LABEL: test_srem_odd_one:
1250 ; CHECK-AVX1: # %bb.0:
1251 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
1252 ; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
1253 ; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
1254 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1255 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1256 ; CHECK-AVX1-NEXT: retq
1258 ; CHECK-AVX2-LABEL: test_srem_odd_one:
1259 ; CHECK-AVX2: # %bb.0:
1260 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
1261 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
1262 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
1263 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
1264 ; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
1265 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1266 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
1267 ; CHECK-AVX2-NEXT: retq
1269 ; CHECK-AVX512VL-LABEL: test_srem_odd_one:
1270 ; CHECK-AVX512VL: # %bb.0:
1271 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
1272 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1273 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
1274 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1275 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
1276 ; CHECK-AVX512VL-NEXT: retq
1277 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 1, i32 5>
1278 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1279 %ret = zext <4 x i1> %cmp to <4 x i32>
1283 ; One one divisor in even divisor
1284 define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind {
1285 ; CHECK-SSE2-LABEL: test_srem_even_one:
1286 ; CHECK-SSE2: # %bb.0:
1287 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295]
1288 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
1289 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm3
1290 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
1291 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
1292 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1293 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,0,2454267027]
1294 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
1295 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4
1296 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm5
1297 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,3,2,3]
1298 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,3,3]
1299 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm5
1300 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
1301 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
1302 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3
1303 ; CHECK-SSE2-NEXT: paddd %xmm0, %xmm3
1304 ; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm4
1305 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
1306 ; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm5
1307 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0]
1308 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2]
1309 ; CHECK-SSE2-NEXT: psrld $31, %xmm3
1310 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm3
1311 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm3
1312 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1313 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
1314 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1315 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
1316 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1317 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1318 ; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
1319 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1320 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
1321 ; CHECK-SSE2-NEXT: retq
1323 ; CHECK-SSE41-LABEL: test_srem_even_one:
1324 ; CHECK-SSE41: # %bb.0:
1325 ; CHECK-SSE41-NEXT: movl $-1840700269, %eax # imm = 0x92492493
1326 ; CHECK-SSE41-NEXT: movd %eax, %xmm1
1327 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
1328 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1329 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1330 ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm2
1331 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1332 ; CHECK-SSE41-NEXT: paddd %xmm0, %xmm2
1333 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
1334 ; CHECK-SSE41-NEXT: psrad $3, %xmm1
1335 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
1336 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
1337 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
1338 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
1339 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
1340 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
1341 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
1342 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
1343 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
1344 ; CHECK-SSE41-NEXT: retq
1346 ; CHECK-AVX1-LABEL: test_srem_even_one:
1347 ; CHECK-AVX1: # %bb.0:
1348 ; CHECK-AVX1-NEXT: movl $-1840700269, %eax # imm = 0x92492493
1349 ; CHECK-AVX1-NEXT: vmovd %eax, %xmm1
1350 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
1351 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1352 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
1353 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm2, %xmm2
1354 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1355 ; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1
1356 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
1357 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
1358 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
1359 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1360 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
1361 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
1362 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1363 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1364 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1365 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1366 ; CHECK-AVX1-NEXT: retq
1368 ; CHECK-AVX2-LABEL: test_srem_even_one:
1369 ; CHECK-AVX2: # %bb.0:
1370 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1371 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
1372 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
1373 ; CHECK-AVX2-NEXT: movl $-1840700269, %eax # imm = 0x92492493
1374 ; CHECK-AVX2-NEXT: vmovd %eax, %xmm2
1375 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
1376 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1377 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
1378 ; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm1
1379 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
1380 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1381 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
1382 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
1383 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1384 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1385 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1386 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1387 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
1388 ; CHECK-AVX2-NEXT: retq
1390 ; CHECK-AVX512VL-LABEL: test_srem_even_one:
1391 ; CHECK-AVX512VL: # %bb.0:
1392 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
1393 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1394 ; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0
1395 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
1396 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1397 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
1398 ; CHECK-AVX512VL-NEXT: retq
1399 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
1400 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1401 %ret = zext <4 x i1> %cmp to <4 x i32>
1405 ; One one divisor in odd+even divisor
1406 define <4 x i32> @test_srem_odd_even_one(<4 x i32> %X) nounwind {
1407 ; CHECK-SSE2-LABEL: test_srem_odd_even_one:
1408 ; CHECK-SSE2: # %bb.0:
1409 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,0]
1410 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
1411 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
1412 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
1413 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1414 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1415 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
1416 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1417 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1418 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,0,1374389535]
1419 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
1420 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
1421 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1422 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
1423 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
1424 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1425 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1426 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
1427 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
1428 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1429 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
1430 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
1431 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
1432 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
1433 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
1434 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
1435 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
1436 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
1437 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
1438 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
1439 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
1440 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
1441 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
1442 ; CHECK-SSE2-NEXT: psrad $1, %xmm5
1443 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
1444 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
1445 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
1446 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
1447 ; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2
1448 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,1,100]
1449 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1450 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
1451 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1452 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1453 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
1454 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1455 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1456 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
1457 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1458 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
1459 ; CHECK-SSE2-NEXT: retq
1461 ; CHECK-SSE41-LABEL: test_srem_odd_even_one:
1462 ; CHECK-SSE41: # %bb.0:
1463 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
1464 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1465 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1466 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
1467 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
1468 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1469 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1470 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,1,0]
1471 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
1472 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
1473 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
1474 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
1475 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
1476 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
1477 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1478 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
1479 ; CHECK-SSE41-NEXT: psrad $1, %xmm1
1480 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
1481 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1482 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
1483 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
1484 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
1485 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
1486 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
1487 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
1488 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
1489 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
1490 ; CHECK-SSE41-NEXT: retq
1492 ; CHECK-AVX1-LABEL: test_srem_odd_even_one:
1493 ; CHECK-AVX1: # %bb.0:
1494 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
1495 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1496 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1497 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
1498 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
1499 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1500 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1501 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1502 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1503 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
1504 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
1505 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1506 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
1507 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1508 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1509 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
1510 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
1511 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
1512 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
1513 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1514 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1515 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1516 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1517 ; CHECK-AVX1-NEXT: retq
1519 ; CHECK-AVX2-LABEL: test_srem_odd_even_one:
1520 ; CHECK-AVX2: # %bb.0:
1521 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,0,1374389535]
1522 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1523 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1524 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
1525 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
1526 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1527 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1528 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1529 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1530 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
1531 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
1532 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
1533 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
1534 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1535 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1536 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1537 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
1538 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
1539 ; CHECK-AVX2-NEXT: retq
1541 ; CHECK-AVX512VL-LABEL: test_srem_odd_even_one:
1542 ; CHECK-AVX512VL: # %bb.0:
1543 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
1544 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
1545 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
1546 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
1547 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1548 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
1549 ; CHECK-AVX512VL-NEXT: retq
1550 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 1, i32 100>
1551 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1552 %ret = zext <4 x i1> %cmp to <4 x i32>
1556 ;------------------------------------------------------------------------------;
1558 ; One INT_MIN divisor in odd divisor
1559 define <4 x i32> @test_srem_odd_INT_MIN(<4 x i32> %X) nounwind {
1560 ; CHECK-SSE2-LABEL: test_srem_odd_INT_MIN:
1561 ; CHECK-SSE2: # %bb.0:
1562 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
1563 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1564 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm2
1565 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
1566 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1567 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm0
1568 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,2,2,3]
1569 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1
1570 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,2,2,3]
1571 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
1572 ; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm3
1573 ; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm3
1574 ; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm3
1575 ; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0
1576 ; CHECK-SSE2-NEXT: pxor %xmm3, %xmm0
1577 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[2,0],xmm0[3,0]
1578 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0,2]
1579 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
1580 ; CHECK-SSE2-NEXT: retq
1582 ; CHECK-SSE41-LABEL: test_srem_odd_INT_MIN:
1583 ; CHECK-SSE41: # %bb.0:
1584 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
1585 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1586 ; CHECK-SSE41-NEXT: pand %xmm0, %xmm2
1587 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm2
1588 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
1589 ; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0
1590 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,858993458,0,858993458]
1591 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
1592 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1593 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm2[4,5],xmm0[6,7]
1594 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
1595 ; CHECK-SSE41-NEXT: retq
1597 ; CHECK-AVX1-LABEL: test_srem_odd_INT_MIN:
1598 ; CHECK-AVX1: # %bb.0:
1599 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1600 ; CHECK-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm2
1601 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
1602 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
1603 ; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
1604 ; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm2
1605 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1606 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7]
1607 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1608 ; CHECK-AVX1-NEXT: retq
1610 ; CHECK-AVX2-LABEL: test_srem_odd_INT_MIN:
1611 ; CHECK-AVX2: # %bb.0:
1612 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1613 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483647,2147483647,2147483647,2147483647]
1614 ; CHECK-AVX2-NEXT: vpand %xmm2, %xmm0, %xmm2
1615 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
1616 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
1617 ; CHECK-AVX2-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
1618 ; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm2
1619 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1620 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1621 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
1622 ; CHECK-AVX2-NEXT: retq
1624 ; CHECK-AVX512VL-LABEL: test_srem_odd_INT_MIN:
1625 ; CHECK-AVX512VL: # %bb.0:
1626 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1627 ; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm2
1628 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
1629 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
1630 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
1631 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm2
1632 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1633 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1634 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
1635 ; CHECK-AVX512VL-NEXT: retq
1636 %srem = srem <4 x i32> %X, <i32 5, i32 5, i32 2147483648, i32 5>
1637 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1638 %ret = zext <4 x i1> %cmp to <4 x i32>
1642 ; One INT_MIN divisor in even divisor
1643 define <4 x i32> @test_srem_even_INT_MIN(<4 x i32> %X) nounwind {
1644 ; CHECK-SSE2-LABEL: test_srem_even_INT_MIN:
1645 ; CHECK-SSE2: # %bb.0:
1646 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
1647 ; CHECK-SSE2-NEXT: pxor %xmm2, %xmm2
1648 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1649 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2147483647,2454267027]
1650 ; CHECK-SSE2-NEXT: pand %xmm3, %xmm2
1651 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4294967295,4294967295,0,4294967295]
1652 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm4
1653 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4
1654 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm3
1655 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
1656 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1657 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2454267027,2454267027,2454267027]
1658 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5
1659 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
1660 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1]
1661 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
1662 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = <1,u,4294967295,u>
1663 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm4
1664 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
1665 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm3
1666 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1667 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
1668 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4
1669 ; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm2
1670 ; CHECK-SSE2-NEXT: psrad $3, %xmm2
1671 ; CHECK-SSE2-NEXT: movdqa %xmm4, %xmm3
1672 ; CHECK-SSE2-NEXT: psrad $30, %xmm3
1673 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[3,0]
1674 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0,2]
1675 ; CHECK-SSE2-NEXT: psrld $31, %xmm4
1676 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm4
1677 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1678 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm4
1679 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[0,2,2,3]
1680 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm2
1681 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1682 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
1683 ; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
1684 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1685 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
1686 ; CHECK-SSE2-NEXT: retq
1688 ; CHECK-SSE41-LABEL: test_srem_even_INT_MIN:
1689 ; CHECK-SSE41: # %bb.0:
1690 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1691 ; CHECK-SSE41-NEXT: pmuldq {{.*}}(%rip), %xmm1
1692 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,2147483647,u>
1693 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
1694 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1695 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1696 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,1,4294967295,1]
1697 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1
1698 ; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
1699 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
1700 ; CHECK-SSE41-NEXT: psrad $30, %xmm2
1701 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm3
1702 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
1703 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
1704 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
1705 ; CHECK-SSE41-NEXT: paddd %xmm3, %xmm1
1706 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
1707 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
1708 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
1709 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1710 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
1711 ; CHECK-SSE41-NEXT: retq
1713 ; CHECK-AVX1-LABEL: test_srem_even_INT_MIN:
1714 ; CHECK-AVX1: # %bb.0:
1715 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1716 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm1, %xmm1
1717 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1718 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1719 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
1720 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1721 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1722 ; CHECK-AVX1-NEXT: vpsrad $30, %xmm1, %xmm2
1723 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
1724 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5],xmm3[6,7]
1725 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
1726 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
1727 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1728 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1729 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1730 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1731 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1732 ; CHECK-AVX1-NEXT: retq
1734 ; CHECK-AVX2-LABEL: test_srem_even_INT_MIN:
1735 ; CHECK-AVX2: # %bb.0:
1736 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
1737 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
1738 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
1739 ; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
1740 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1741 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
1742 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1743 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1744 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
1745 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
1746 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1747 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1748 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1749 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1750 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1751 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
1752 ; CHECK-AVX2-NEXT: retq
1754 ; CHECK-AVX512VL-LABEL: test_srem_even_INT_MIN:
1755 ; CHECK-AVX512VL: # %bb.0:
1756 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1757 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1758 ; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [306783378,306783378,0,306783378]
1759 ; CHECK-AVX512VL-NEXT: vpaddd %xmm3, %xmm2, %xmm2
1760 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm2, %xmm2
1761 ; CHECK-AVX512VL-NEXT: vpminud %xmm3, %xmm2, %xmm3
1762 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm3, %xmm2, %xmm2
1763 ; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
1764 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1765 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
1766 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
1767 ; CHECK-AVX512VL-NEXT: retq
1768 %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 2147483648, i32 14>
1769 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1770 %ret = zext <4 x i1> %cmp to <4 x i32>
1774 ; One INT_MIN divisor in odd+even divisor
1775 define <4 x i32> @test_srem_odd_even_INT_MIN(<4 x i32> %X) nounwind {
1776 ; CHECK-SSE2-LABEL: test_srem_odd_even_INT_MIN:
1777 ; CHECK-SSE2: # %bb.0:
1778 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4294967295,0]
1779 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
1780 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
1781 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
1782 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1783 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1784 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
1785 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1786 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1787 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2454267027,2147483647,1374389535]
1788 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
1789 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
1790 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1791 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
1792 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
1793 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1794 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1795 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
1796 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
1797 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1798 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
1799 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
1800 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
1801 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
1802 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
1803 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
1804 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
1805 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
1806 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
1807 ; CHECK-SSE2-NEXT: psrad $30, %xmm4
1808 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
1809 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
1810 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
1811 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
1812 ; CHECK-SSE2-NEXT: psrad $1, %xmm5
1813 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
1814 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
1815 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
1816 ; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2
1817 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,14,2147483648,100]
1818 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1819 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
1820 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1821 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1822 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
1823 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1824 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1825 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
1826 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1827 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
1828 ; CHECK-SSE2-NEXT: retq
1830 ; CHECK-SSE41-LABEL: test_srem_odd_even_INT_MIN:
1831 ; CHECK-SSE41: # %bb.0:
1832 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
1833 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1834 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1835 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
1836 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
1837 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1838 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1839 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4294967295,0]
1840 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
1841 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
1842 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
1843 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
1844 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
1845 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
1846 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
1847 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
1848 ; CHECK-SSE41-NEXT: psrad $30, %xmm1
1849 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4
1850 ; CHECK-SSE41-NEXT: psrad $1, %xmm4
1851 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm1[4,5,6,7]
1852 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
1853 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
1854 ; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2
1855 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
1856 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
1857 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
1858 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
1859 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
1860 ; CHECK-SSE41-NEXT: retq
1862 ; CHECK-AVX1-LABEL: test_srem_odd_even_INT_MIN:
1863 ; CHECK-AVX1: # %bb.0:
1864 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
1865 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1866 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1867 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
1868 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
1869 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1870 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
1871 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1872 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1873 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
1874 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
1875 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
1876 ; CHECK-AVX1-NEXT: vpsrad $30, %xmm1, %xmm3
1877 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4
1878 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
1879 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
1880 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
1881 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
1882 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1883 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1884 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
1885 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1886 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
1887 ; CHECK-AVX1-NEXT: retq
1889 ; CHECK-AVX2-LABEL: test_srem_odd_even_INT_MIN:
1890 ; CHECK-AVX2: # %bb.0:
1891 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2454267027,2147483647,1374389535]
1892 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1893 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1894 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
1895 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
1896 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1897 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
1898 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
1899 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1900 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
1901 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
1902 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
1903 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
1904 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
1905 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
1906 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
1907 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
1908 ; CHECK-AVX2-NEXT: retq
1910 ; CHECK-AVX512VL-LABEL: test_srem_odd_even_INT_MIN:
1911 ; CHECK-AVX512VL: # %bb.0:
1912 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
1913 ; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm2
1914 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm2, %xmm1
1915 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
1916 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
1917 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
1918 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm2
1919 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
1920 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3]
1921 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
1922 ; CHECK-AVX512VL-NEXT: retq
1923 %srem = srem <4 x i32> %X, <i32 5, i32 14, i32 2147483648, i32 100>
1924 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
1925 %ret = zext <4 x i1> %cmp to <4 x i32>
1929 ;==============================================================================;
1931 ; One all-ones divisor and power-of-two divisor divisor in odd divisor
1932 define <4 x i32> @test_srem_odd_allones_and_poweroftwo(<4 x i32> %X) nounwind {
1933 ; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo:
1934 ; CHECK-SSE2: # %bb.0:
1935 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,0]
1936 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
1937 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
1938 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
1939 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1940 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
1941 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
1942 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
1943 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
1944 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,0,2147483649,1717986919]
1945 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
1946 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
1947 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
1948 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
1949 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
1950 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
1951 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
1952 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
1953 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
1954 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1955 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
1956 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,0,4294967295,0]
1957 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
1958 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
1959 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
1960 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
1961 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
1962 ; CHECK-SSE2-NEXT: psrad $1, %xmm3
1963 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
1964 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
1965 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
1966 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
1967 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3]
1968 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
1969 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
1970 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
1971 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,16,5]
1972 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
1973 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
1974 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
1975 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1976 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
1977 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
1978 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
1979 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
1980 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
1981 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
1982 ; CHECK-SSE2-NEXT: retq
1984 ; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo:
1985 ; CHECK-SSE41: # %bb.0:
1986 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919]
1987 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
1988 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
1989 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
1990 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
1991 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
1992 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
1993 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,1,0]
1994 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
1995 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
1996 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
1997 ; CHECK-SSE41-NEXT: psrad $1, %xmm1
1998 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
1999 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
2000 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4
2001 ; CHECK-SSE41-NEXT: psrad $3, %xmm4
2002 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm1[0,1,2,3],xmm4[4,5,6,7]
2003 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm3[2,3],xmm4[4,5],xmm3[6,7]
2004 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
2005 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
2006 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7]
2007 ; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2
2008 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
2009 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
2010 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
2011 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
2012 ; CHECK-SSE41-NEXT: retq
2014 ; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo:
2015 ; CHECK-AVX1: # %bb.0:
2016 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919]
2017 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2018 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2019 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2020 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2021 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2022 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2023 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2024 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2025 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm2
2026 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm1[0,1,2,3],xmm2[4,5,6,7]
2027 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm4
2028 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm4[4,5,6,7]
2029 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
2030 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
2031 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2032 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7]
2033 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
2034 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2035 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2036 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2037 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
2038 ; CHECK-AVX1-NEXT: retq
2040 ; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo:
2041 ; CHECK-AVX2: # %bb.0:
2042 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1717986919]
2043 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2044 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2045 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2046 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2047 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2048 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2049 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2050 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2051 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
2052 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
2053 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3]
2054 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
2055 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2056 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2057 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2058 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2059 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
2060 ; CHECK-AVX2-NEXT: retq
2062 ; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo:
2063 ; CHECK-AVX512VL: # %bb.0:
2064 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2065 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2066 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
2067 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2068 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2069 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
2070 ; CHECK-AVX512VL-NEXT: retq
2071 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 5>
2072 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2073 %ret = zext <4 x i1> %cmp to <4 x i32>
2077 ; One all-ones divisor and power-of-two divisor divisor in even divisor
2078 define <4 x i32> @test_srem_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
2079 ; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo:
2080 ; CHECK-SSE2: # %bb.0:
2081 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,4294967295]
2082 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
2083 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm3
2084 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
2085 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
2086 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
2087 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,0,2147483649,2454267027]
2088 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
2089 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4
2090 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
2091 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm3
2092 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
2093 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2094 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2095 ; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm5
2096 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
2097 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2098 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3
2099 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,4294967295,1,1]
2100 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm5
2101 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm5
2102 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2103 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2104 ; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm4
2105 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
2106 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
2107 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm5
2108 ; CHECK-SSE2-NEXT: movdqa %xmm5, %xmm3
2109 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
2110 ; CHECK-SSE2-NEXT: movdqa %xmm5, %xmm4
2111 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,0],xmm3[0,0]
2112 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[2,0],xmm3[2,3]
2113 ; CHECK-SSE2-NEXT: psrld $31, %xmm5
2114 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm5
2115 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
2116 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,4294967295,16,14]
2117 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
2118 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm5
2119 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
2120 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2121 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
2122 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2123 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2124 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm0
2125 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
2126 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
2127 ; CHECK-SSE2-NEXT: retq
2129 ; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo:
2130 ; CHECK-SSE41: # %bb.0:
2131 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027]
2132 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2133 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2134 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
2135 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
2136 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2137 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2138 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,4294967295,1,1]
2139 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
2140 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
2141 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
2142 ; CHECK-SSE41-NEXT: psrad $3, %xmm1
2143 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5,6,7]
2144 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
2145 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
2146 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5,6,7]
2147 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
2148 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
2149 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
2150 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2151 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
2152 ; CHECK-SSE41-NEXT: retq
2154 ; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo:
2155 ; CHECK-AVX1: # %bb.0:
2156 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027]
2157 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2158 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2159 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2160 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2161 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2162 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2163 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2164 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2165 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
2166 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7]
2167 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
2168 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2169 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7]
2170 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
2171 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2172 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2173 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2174 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
2175 ; CHECK-AVX1-NEXT: retq
2177 ; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo:
2178 ; CHECK-AVX2: # %bb.0:
2179 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,0,2147483649,2454267027]
2180 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2181 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2182 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2183 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2184 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2185 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2186 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2187 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2188 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
2189 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
2190 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3]
2191 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
2192 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2193 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2194 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2195 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2196 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
2197 ; CHECK-AVX2-NEXT: retq
2199 ; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo:
2200 ; CHECK-AVX512VL: # %bb.0:
2201 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2202 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2203 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
2204 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2205 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2206 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
2207 ; CHECK-AVX512VL-NEXT: retq
2208 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 14>
2209 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2210 %ret = zext <4 x i1> %cmp to <4 x i32>
2214 ; One all-ones divisor and power-of-two divisor divisor in odd+even divisor
2215 define <4 x i32> @test_srem_odd_even_allones_and_poweroftwo(<4 x i32> %X) nounwind {
2216 ; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2217 ; CHECK-SSE2: # %bb.0:
2218 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,0]
2219 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
2220 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
2221 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
2222 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2223 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2224 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
2225 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2226 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2227 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,0,2147483649,1374389535]
2228 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
2229 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
2230 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
2231 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
2232 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
2233 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
2234 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2235 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
2236 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
2237 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
2238 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
2239 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,0,4294967295,0]
2240 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
2241 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
2242 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
2243 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
2244 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
2245 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
2246 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
2247 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
2248 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
2249 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
2250 ; CHECK-SSE2-NEXT: psrad $1, %xmm3
2251 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
2252 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3]
2253 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
2254 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
2255 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
2256 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,16,100]
2257 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2258 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
2259 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2260 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2261 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
2262 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2263 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2264 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
2265 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
2266 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
2267 ; CHECK-SSE2-NEXT: retq
2269 ; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2270 ; CHECK-SSE41: # %bb.0:
2271 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535]
2272 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2273 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2274 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
2275 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
2276 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2277 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2278 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,1,0]
2279 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
2280 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
2281 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
2282 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
2283 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2284 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
2285 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
2286 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm4
2287 ; CHECK-SSE41-NEXT: psrad $1, %xmm4
2288 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1,2,3],xmm3[4,5,6,7]
2289 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm4 = xmm4[0,1],xmm1[2,3],xmm4[4,5],xmm1[6,7]
2290 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
2291 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
2292 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5,6,7]
2293 ; CHECK-SSE41-NEXT: paddd %xmm4, %xmm2
2294 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
2295 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
2296 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
2297 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
2298 ; CHECK-SSE41-NEXT: retq
2300 ; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2301 ; CHECK-AVX1: # %bb.0:
2302 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535]
2303 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2304 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2305 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2306 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2307 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2308 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2309 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2310 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2311 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
2312 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7]
2313 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
2314 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm4
2315 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0,1,2,3],xmm3[4,5,6,7]
2316 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
2317 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
2318 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2319 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5,6,7]
2320 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
2321 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2322 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2323 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2324 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
2325 ; CHECK-AVX1-NEXT: retq
2327 ; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2328 ; CHECK-AVX2: # %bb.0:
2329 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,2147483649,1374389535]
2330 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2331 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2332 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2333 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2334 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2335 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2336 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2337 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2338 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
2339 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
2340 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2,3]
2341 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
2342 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2343 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2344 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2345 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2346 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
2347 ; CHECK-AVX2-NEXT: retq
2349 ; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_poweroftwo:
2350 ; CHECK-AVX512VL: # %bb.0:
2351 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2352 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2353 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
2354 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2355 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2356 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
2357 ; CHECK-AVX512VL-NEXT: retq
2358 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 100>
2359 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2360 %ret = zext <4 x i1> %cmp to <4 x i32>
2364 ;------------------------------------------------------------------------------;
2366 ; One all-ones divisor and one one divisor in odd divisor
2367 define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind {
2368 ; CHECK-SSE2-LABEL: test_srem_odd_allones_and_one:
2369 ; CHECK-SSE2: # %bb.0:
2370 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
2371 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
2372 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
2373 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
2374 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
2375 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
2376 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
2377 ; CHECK-SSE2-NEXT: paddd {{.*}}(%rip), %xmm0
2378 ; CHECK-SSE2-NEXT: pxor {{.*}}(%rip), %xmm0
2379 ; CHECK-SSE2-NEXT: pcmpgtd {{.*}}(%rip), %xmm0
2380 ; CHECK-SSE2-NEXT: pandn {{.*}}(%rip), %xmm0
2381 ; CHECK-SSE2-NEXT: retq
2383 ; CHECK-SSE41-LABEL: test_srem_odd_allones_and_one:
2384 ; CHECK-SSE41: # %bb.0:
2385 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm0
2386 ; CHECK-SSE41-NEXT: paddd {{.*}}(%rip), %xmm0
2387 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [858993458,4294967295,4294967295,858993458]
2388 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
2389 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
2390 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
2391 ; CHECK-SSE41-NEXT: retq
2393 ; CHECK-AVX1-LABEL: test_srem_odd_allones_and_one:
2394 ; CHECK-AVX1: # %bb.0:
2395 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2396 ; CHECK-AVX1-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2397 ; CHECK-AVX1-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2398 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2399 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
2400 ; CHECK-AVX1-NEXT: retq
2402 ; CHECK-AVX2-LABEL: test_srem_odd_allones_and_one:
2403 ; CHECK-AVX2: # %bb.0:
2404 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
2405 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
2406 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [429496729,429496729,429496729,429496729]
2407 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
2408 ; CHECK-AVX2-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2409 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2410 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
2411 ; CHECK-AVX2-NEXT: retq
2413 ; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_one:
2414 ; CHECK-AVX512VL: # %bb.0:
2415 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
2416 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
2417 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2418 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2419 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
2420 ; CHECK-AVX512VL-NEXT: retq
2421 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 5>
2422 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2423 %ret = zext <4 x i1> %cmp to <4 x i32>
2427 ; One all-ones divisor and one one divisor in even divisor
2428 define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
2429 ; CHECK-SSE2-LABEL: test_srem_even_allones_and_one:
2430 ; CHECK-SSE2: # %bb.0:
2431 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,0,4294967295]
2432 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
2433 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm3
2434 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
2435 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
2436 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
2437 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,0,0,2454267027]
2438 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
2439 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4
2440 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
2441 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm3
2442 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
2443 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,2,3,3]
2444 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2445 ; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm5
2446 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,3,2,3]
2447 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2448 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3
2449 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [1,4294967295,1,1]
2450 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm5
2451 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm5
2452 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
2453 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
2454 ; CHECK-SSE2-NEXT: pmuludq %xmm6, %xmm4
2455 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
2456 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1]
2457 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm5
2458 ; CHECK-SSE2-NEXT: movdqa %xmm5, %xmm3
2459 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
2460 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm5[1,2]
2461 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,2,3,1]
2462 ; CHECK-SSE2-NEXT: psrld $31, %xmm5
2463 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm5
2464 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm5
2465 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,4294967295,1,14]
2466 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
2467 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm5
2468 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[0,2,2,3]
2469 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2470 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
2471 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2472 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
2473 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm0
2474 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
2475 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
2476 ; CHECK-SSE2-NEXT: retq
2478 ; CHECK-SSE41-LABEL: test_srem_even_allones_and_one:
2479 ; CHECK-SSE41: # %bb.0:
2480 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027]
2481 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2482 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2483 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
2484 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
2485 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2486 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2487 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1,4294967295,1,1]
2488 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
2489 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
2490 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
2491 ; CHECK-SSE41-NEXT: psrad $3, %xmm1
2492 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5],xmm1[6,7]
2493 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
2494 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
2495 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3,4,5],xmm2[6,7]
2496 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
2497 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
2498 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
2499 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2500 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
2501 ; CHECK-SSE41-NEXT: retq
2503 ; CHECK-AVX1-LABEL: test_srem_even_allones_and_one:
2504 ; CHECK-AVX1: # %bb.0:
2505 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027]
2506 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2507 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2508 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2509 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2510 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2511 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2512 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2513 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2514 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
2515 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3,4,5],xmm2[6,7]
2516 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
2517 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2518 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
2519 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
2520 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2521 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2522 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2523 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
2524 ; CHECK-AVX1-NEXT: retq
2526 ; CHECK-AVX2-LABEL: test_srem_even_allones_and_one:
2527 ; CHECK-AVX2: # %bb.0:
2528 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,0,0,2454267027]
2529 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2530 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2531 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2532 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2533 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2534 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2535 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2536 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2537 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
2538 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
2539 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
2540 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
2541 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2542 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2543 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2544 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2545 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
2546 ; CHECK-AVX2-NEXT: retq
2548 ; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_one:
2549 ; CHECK-AVX512VL: # %bb.0:
2550 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm0, %xmm0
2551 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0
2552 ; CHECK-AVX512VL-NEXT: vprord $1, %xmm0, %xmm0
2553 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2554 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2555 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
2556 ; CHECK-AVX512VL-NEXT: retq
2557 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
2558 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2559 %ret = zext <4 x i1> %cmp to <4 x i32>
2563 ; One all-ones divisor and one one divisor in odd+even divisor
2564 define <4 x i32> @test_srem_odd_even_allones_and_one(<4 x i32> %X) nounwind {
2565 ; CHECK-SSE2-LABEL: test_srem_odd_even_allones_and_one:
2566 ; CHECK-SSE2: # %bb.0:
2567 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,0]
2568 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
2569 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
2570 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
2571 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2572 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2573 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
2574 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2575 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2576 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,0,0,1374389535]
2577 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
2578 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
2579 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
2580 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[2,2,3,3]
2581 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
2582 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
2583 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2584 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
2585 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
2586 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
2587 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
2588 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
2589 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
2590 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
2591 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
2592 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
2593 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
2594 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
2595 ; CHECK-SSE2-NEXT: psrad $1, %xmm3
2596 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm2[0]
2597 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3]
2598 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
2599 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
2600 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
2601 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,1,100]
2602 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2603 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
2604 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2605 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2606 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
2607 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2608 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2609 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
2610 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
2611 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
2612 ; CHECK-SSE2-NEXT: retq
2614 ; CHECK-SSE41-LABEL: test_srem_odd_even_allones_and_one:
2615 ; CHECK-SSE41: # %bb.0:
2616 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535]
2617 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2618 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2619 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
2620 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
2621 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2622 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2623 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,4294967295,1,0]
2624 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
2625 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
2626 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
2627 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
2628 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2629 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
2630 ; CHECK-SSE41-NEXT: psrad $1, %xmm3
2631 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2632 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,3],xmm3[4,5],xmm1[6,7]
2633 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
2634 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
2635 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3,4,5],xmm2[6,7]
2636 ; CHECK-SSE41-NEXT: paddd %xmm3, %xmm2
2637 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
2638 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
2639 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
2640 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
2641 ; CHECK-SSE41-NEXT: retq
2643 ; CHECK-AVX1-LABEL: test_srem_odd_even_allones_and_one:
2644 ; CHECK-AVX1: # %bb.0:
2645 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535]
2646 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2647 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2648 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2649 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2650 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2651 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2652 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2653 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2654 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
2655 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm1[0,1,2,3],xmm2[4,5,6,7]
2656 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
2657 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
2658 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
2659 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
2660 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2661 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3,4,5],xmm1[6,7]
2662 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
2663 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2664 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2665 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2666 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
2667 ; CHECK-AVX1-NEXT: retq
2669 ; CHECK-AVX2-LABEL: test_srem_odd_even_allones_and_one:
2670 ; CHECK-AVX2: # %bb.0:
2671 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,0,0,1374389535]
2672 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,2,3,3]
2673 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2674 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2675 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2676 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2677 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2678 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2679 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2680 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
2681 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
2682 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1,2],xmm2[3]
2683 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
2684 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2685 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2686 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2687 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2688 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
2689 ; CHECK-AVX2-NEXT: retq
2691 ; CHECK-AVX512VL-LABEL: test_srem_odd_even_allones_and_one:
2692 ; CHECK-AVX512VL: # %bb.0:
2693 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2694 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2695 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
2696 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2697 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2698 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
2699 ; CHECK-AVX512VL-NEXT: retq
2700 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 1, i32 100>
2701 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2702 %ret = zext <4 x i1> %cmp to <4 x i32>
2706 ;------------------------------------------------------------------------------;
2708 ; One power-of-two divisor divisor and one divisor in odd divisor
2709 define <4 x i32> @test_srem_odd_poweroftwo_and_one(<4 x i32> %X) nounwind {
2710 ; CHECK-SSE2-LABEL: test_srem_odd_poweroftwo_and_one:
2711 ; CHECK-SSE2: # %bb.0:
2712 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,0]
2713 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
2714 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
2715 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
2716 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
2717 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2718 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
2719 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2720 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2721 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2147483649,0,1717986919]
2722 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
2723 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
2724 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
2725 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
2726 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
2727 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
2728 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
2729 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
2730 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
2731 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
2732 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
2733 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
2734 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
2735 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
2736 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
2737 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
2738 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
2739 ; CHECK-SSE2-NEXT: psrad $1, %xmm3
2740 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
2741 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
2742 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
2743 ; CHECK-SSE2-NEXT: psrad $3, %xmm5
2744 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm5[0]
2745 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,3],xmm4[0,3]
2746 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
2747 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
2748 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
2749 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,16,1,5]
2750 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
2751 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
2752 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2753 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
2754 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
2755 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2756 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
2757 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
2758 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
2759 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
2760 ; CHECK-SSE2-NEXT: retq
2762 ; CHECK-SSE41-LABEL: test_srem_odd_poweroftwo_and_one:
2763 ; CHECK-SSE41: # %bb.0:
2764 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919]
2765 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2766 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2767 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
2768 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
2769 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2770 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2771 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,1,0]
2772 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
2773 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
2774 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
2775 ; CHECK-SSE41-NEXT: psrad $1, %xmm1
2776 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
2777 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
2778 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
2779 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
2780 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2781 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
2782 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
2783 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
2784 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
2785 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
2786 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
2787 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2788 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
2789 ; CHECK-SSE41-NEXT: retq
2791 ; CHECK-AVX1-LABEL: test_srem_odd_poweroftwo_and_one:
2792 ; CHECK-AVX1: # %bb.0:
2793 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919]
2794 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2795 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2796 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2797 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2798 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2799 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2800 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2801 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2802 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm2
2803 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
2804 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm2[4,5,6,7]
2805 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5,6,7]
2806 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
2807 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
2808 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2809 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
2810 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
2811 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2812 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2813 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2814 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
2815 ; CHECK-AVX1-NEXT: retq
2817 ; CHECK-AVX2-LABEL: test_srem_odd_poweroftwo_and_one:
2818 ; CHECK-AVX2: # %bb.0:
2819 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1717986919]
2820 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2821 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2822 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2823 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2824 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2825 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2826 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
2827 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2828 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
2829 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
2830 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
2831 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
2832 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2833 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2834 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2835 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2836 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
2837 ; CHECK-AVX2-NEXT: retq
2839 ; CHECK-AVX512VL-LABEL: test_srem_odd_poweroftwo_and_one:
2840 ; CHECK-AVX512VL: # %bb.0:
2841 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2842 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2843 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
2844 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2845 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2846 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
2847 ; CHECK-AVX512VL-NEXT: retq
2848 %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 5>
2849 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2850 %ret = zext <4 x i1> %cmp to <4 x i32>
2854 ; One power-of-two divisor divisor and one divisor in even divisor
2855 define <4 x i32> @test_srem_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
2856 ; CHECK-SSE2-LABEL: test_srem_even_poweroftwo_and_one:
2857 ; CHECK-SSE2: # %bb.0:
2858 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,4294967295,0,4294967295]
2859 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
2860 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm3
2861 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
2862 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
2863 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
2864 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2454267027,2147483649,0,2454267027]
2865 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
2866 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm4
2867 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
2868 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm3
2869 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
2870 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3]
2871 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm0[1,1,3,3]
2872 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm6
2873 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm6[1,3,2,3]
2874 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1]
2875 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm3
2876 ; CHECK-SSE2-NEXT: paddd %xmm0, %xmm3
2877 ; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm4
2878 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
2879 ; CHECK-SSE2-NEXT: movdqa %xmm3, %xmm5
2880 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[2,0],xmm4[3,0]
2881 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,1],xmm5[0,2]
2882 ; CHECK-SSE2-NEXT: psrld $31, %xmm3
2883 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm3
2884 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm3
2885 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,16,1,14]
2886 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
2887 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3
2888 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
2889 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
2890 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
2891 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
2892 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
2893 ; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
2894 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
2895 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
2896 ; CHECK-SSE2-NEXT: retq
2898 ; CHECK-SSE41-LABEL: test_srem_even_poweroftwo_and_one:
2899 ; CHECK-SSE41: # %bb.0:
2900 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027]
2901 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2902 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2903 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
2904 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
2905 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2906 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
2907 ; CHECK-SSE41-NEXT: paddd %xmm0, %xmm1
2908 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
2909 ; CHECK-SSE41-NEXT: psrad $3, %xmm2
2910 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
2911 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
2912 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
2913 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
2914 ; CHECK-SSE41-NEXT: paddd %xmm2, %xmm1
2915 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
2916 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
2917 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
2918 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
2919 ; CHECK-SSE41-NEXT: retq
2921 ; CHECK-AVX1-LABEL: test_srem_even_poweroftwo_and_one:
2922 ; CHECK-AVX1: # %bb.0:
2923 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027]
2924 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2925 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2926 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2927 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2928 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2929 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
2930 ; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm1
2931 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
2932 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm1[4,5],xmm2[6,7]
2933 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
2934 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
2935 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
2936 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
2937 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2938 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2939 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2940 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
2941 ; CHECK-AVX1-NEXT: retq
2943 ; CHECK-AVX2-LABEL: test_srem_even_poweroftwo_and_one:
2944 ; CHECK-AVX2: # %bb.0:
2945 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2454267027,2147483649,0,2454267027]
2946 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
2947 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
2948 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
2949 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
2950 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
2951 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
2952 ; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm1
2953 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
2954 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
2955 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
2956 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
2957 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
2958 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
2959 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
2960 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
2961 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
2962 ; CHECK-AVX2-NEXT: retq
2964 ; CHECK-AVX512VL-LABEL: test_srem_even_poweroftwo_and_one:
2965 ; CHECK-AVX512VL: # %bb.0:
2966 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
2967 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
2968 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
2969 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
2970 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
2971 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
2972 ; CHECK-AVX512VL-NEXT: retq
2973 %srem = srem <4 x i32> %X, <i32 14, i32 16, i32 1, i32 14>
2974 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
2975 %ret = zext <4 x i1> %cmp to <4 x i32>
2979 ; One power-of-two divisor divisor and one divisor in odd+even divisor
2980 define <4 x i32> @test_srem_odd_even_poweroftwo_and_one(<4 x i32> %X) nounwind {
2981 ; CHECK-SSE2-LABEL: test_srem_odd_even_poweroftwo_and_one:
2982 ; CHECK-SSE2: # %bb.0:
2983 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,0]
2984 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
2985 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
2986 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
2987 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
2988 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm0[1,1,3,3]
2989 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
2990 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
2991 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
2992 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [1717986919,2147483649,0,1374389535]
2993 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
2994 ; CHECK-SSE2-NEXT: pmuludq %xmm5, %xmm1
2995 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,3,2,3]
2996 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm5[1,1,3,3]
2997 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
2998 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
2999 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3000 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
3001 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
3002 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
3003 ; CHECK-SSE2-NEXT: pand %xmm5, %xmm4
3004 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,4294967295,0,0]
3005 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
3006 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
3007 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
3008 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
3009 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
3010 ; CHECK-SSE2-NEXT: psrad $5, %xmm3
3011 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
3012 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm4 = xmm4[1],xmm3[1]
3013 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
3014 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
3015 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm5
3016 ; CHECK-SSE2-NEXT: psrad $1, %xmm5
3017 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm3[0]
3018 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm4[0,3]
3019 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
3020 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
3021 ; CHECK-SSE2-NEXT: paddd %xmm5, %xmm2
3022 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,16,1,100]
3023 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
3024 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
3025 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3026 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3027 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
3028 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
3029 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3030 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
3031 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
3032 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
3033 ; CHECK-SSE2-NEXT: retq
3035 ; CHECK-SSE41-LABEL: test_srem_odd_even_poweroftwo_and_one:
3036 ; CHECK-SSE41: # %bb.0:
3037 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535]
3038 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
3039 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
3040 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm3
3041 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm1
3042 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3043 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
3044 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [0,1,1,0]
3045 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm2
3046 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
3047 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
3048 ; CHECK-SSE41-NEXT: psrad $5, %xmm1
3049 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
3050 ; CHECK-SSE41-NEXT: psrad $3, %xmm3
3051 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
3052 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
3053 ; CHECK-SSE41-NEXT: psrad $1, %xmm1
3054 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
3055 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
3056 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
3057 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
3058 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5],xmm2[6,7]
3059 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
3060 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
3061 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
3062 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
3063 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
3064 ; CHECK-SSE41-NEXT: retq
3066 ; CHECK-AVX1-LABEL: test_srem_odd_even_poweroftwo_and_one:
3067 ; CHECK-AVX1: # %bb.0:
3068 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535]
3069 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
3070 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
3071 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
3072 ; CHECK-AVX1-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
3073 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3074 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
3075 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
3076 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
3077 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm2
3078 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm3
3079 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
3080 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
3081 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
3082 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
3083 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
3084 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3085 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm3[4,5],xmm1[6,7]
3086 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
3087 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
3088 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
3089 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
3090 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
3091 ; CHECK-AVX1-NEXT: retq
3093 ; CHECK-AVX2-LABEL: test_srem_odd_even_poweroftwo_and_one:
3094 ; CHECK-AVX2: # %bb.0:
3095 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1717986919,2147483649,0,1374389535]
3096 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
3097 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
3098 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm3, %xmm2
3099 ; CHECK-AVX2-NEXT: vpmuldq %xmm1, %xmm0, %xmm1
3100 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3101 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
3102 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm2
3103 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
3104 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
3105 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
3106 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0,1],xmm3[2],xmm2[3]
3107 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
3108 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
3109 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
3110 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
3111 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
3112 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
3113 ; CHECK-AVX2-NEXT: retq
3115 ; CHECK-AVX512VL-LABEL: test_srem_odd_even_poweroftwo_and_one:
3116 ; CHECK-AVX512VL: # %bb.0:
3117 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
3118 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
3119 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
3120 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
3121 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
3122 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
3123 ; CHECK-AVX512VL-NEXT: retq
3124 %srem = srem <4 x i32> %X, <i32 5, i32 16, i32 1, i32 100>
3125 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
3126 %ret = zext <4 x i1> %cmp to <4 x i32>
3130 ;------------------------------------------------------------------------------;
3132 define <4 x i32> @test_srem_odd_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
3133 ; CHECK-SSE2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3134 ; CHECK-SSE2: # %bb.0:
3135 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,1]
3136 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
3137 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
3138 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,2,2,3]
3139 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3140 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
3141 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
3142 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
3143 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
3144 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
3145 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
3146 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
3147 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1717986919,0,2147483649,0]
3148 ; CHECK-SSE2-NEXT: pand %xmm2, %xmm4
3149 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm5 = [0,0,4294967295,0]
3150 ; CHECK-SSE2-NEXT: pand %xmm0, %xmm5
3151 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
3152 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm2
3153 ; CHECK-SSE2-NEXT: psrlq $32, %xmm2
3154 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm2
3155 ; CHECK-SSE2-NEXT: paddd %xmm3, %xmm2
3156 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
3157 ; CHECK-SSE2-NEXT: psrad $3, %xmm3
3158 ; CHECK-SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm2[1]
3159 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm4
3160 ; CHECK-SSE2-NEXT: psrad $1, %xmm4
3161 ; CHECK-SSE2-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm2[0]
3162 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[0,3],xmm3[0,3]
3163 ; CHECK-SSE2-NEXT: psrld $31, %xmm2
3164 ; CHECK-SSE2-NEXT: pand {{.*}}(%rip), %xmm2
3165 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm2
3166 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [5,4294967295,16,1]
3167 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
3168 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
3169 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3170 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
3171 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm3
3172 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
3173 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
3174 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
3175 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
3176 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
3177 ; CHECK-SSE2-NEXT: retq
3179 ; CHECK-SSE41-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3180 ; CHECK-SSE41: # %bb.0:
3181 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [0,4294967295,1,1]
3182 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1
3183 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <1717986919,u,2147483649,u>
3184 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
3185 ; CHECK-SSE41-NEXT: psrlq $32, %xmm2
3186 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
3187 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
3188 ; CHECK-SSE41-NEXT: psrad $3, %xmm1
3189 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
3190 ; CHECK-SSE41-NEXT: psrad $1, %xmm3
3191 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
3192 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm2[2,3],xmm3[4,5],xmm2[6,7]
3193 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
3194 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
3195 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
3196 ; CHECK-SSE41-NEXT: paddd %xmm3, %xmm2
3197 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
3198 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
3199 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
3200 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
3201 ; CHECK-SSE41-NEXT: retq
3203 ; CHECK-AVX1-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3204 ; CHECK-AVX1: # %bb.0:
3205 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
3206 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
3207 ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2
3208 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
3209 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
3210 ; CHECK-AVX1-NEXT: vpsrad $1, %xmm1, %xmm3
3211 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm3[0,1,2,3],xmm2[4,5,6,7]
3212 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
3213 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
3214 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3215 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
3216 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
3217 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
3218 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
3219 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
3220 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
3221 ; CHECK-AVX1-NEXT: retq
3223 ; CHECK-AVX2-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3224 ; CHECK-AVX2: # %bb.0:
3225 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
3226 ; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
3227 ; CHECK-AVX2-NEXT: vpsrlq $32, %xmm2, %xmm2
3228 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm2, %xmm1
3229 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
3230 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
3231 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3]
3232 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
3233 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
3234 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
3235 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
3236 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
3237 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
3238 ; CHECK-AVX2-NEXT: retq
3240 ; CHECK-AVX512VL-LABEL: test_srem_odd_allones_and_poweroftwo_and_one:
3241 ; CHECK-AVX512VL: # %bb.0:
3242 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
3243 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
3244 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
3245 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
3246 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
3247 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
3248 ; CHECK-AVX512VL-NEXT: retq
3249 %srem = srem <4 x i32> %X, <i32 5, i32 4294967295, i32 16, i32 1>
3250 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
3251 %ret = zext <4 x i1> %cmp to <4 x i32>
3255 define <4 x i32> @test_srem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
3256 ; CHECK-SSE2-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3257 ; CHECK-SSE2: # %bb.0:
3258 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,4294967295,1,1]
3259 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
3260 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
3261 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3262 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
3263 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
3264 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
3265 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[0,2,2,3]
3266 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
3267 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm3 = [4294967295,0,4294967295,0]
3268 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
3269 ; CHECK-SSE2-NEXT: pand %xmm3, %xmm4
3270 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
3271 ; CHECK-SSE2-NEXT: pxor %xmm5, %xmm5
3272 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm5
3273 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm6 = [2454267027,0,2147483649,0]
3274 ; CHECK-SSE2-NEXT: pand %xmm6, %xmm5
3275 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm5
3276 ; CHECK-SSE2-NEXT: pmuludq %xmm0, %xmm6
3277 ; CHECK-SSE2-NEXT: psrlq $32, %xmm6
3278 ; CHECK-SSE2-NEXT: psubd %xmm5, %xmm6
3279 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm6
3280 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm6[1,3,2,3]
3281 ; CHECK-SSE2-NEXT: movdqa %xmm6, %xmm4
3282 ; CHECK-SSE2-NEXT: psrad $3, %xmm4
3283 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,2,2,3]
3284 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
3285 ; CHECK-SSE2-NEXT: psrld $31, %xmm6
3286 ; CHECK-SSE2-NEXT: pand %xmm3, %xmm6
3287 ; CHECK-SSE2-NEXT: paddd %xmm4, %xmm6
3288 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,4294967295,16,1]
3289 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
3290 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm6
3291 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm6[0,2,2,3]
3292 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
3293 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm2
3294 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
3295 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
3296 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm0
3297 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
3298 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
3299 ; CHECK-SSE2-NEXT: retq
3301 ; CHECK-SSE41-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3302 ; CHECK-SSE41: # %bb.0:
3303 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,4294967295,1,1]
3304 ; CHECK-SSE41-NEXT: pmulld %xmm0, %xmm1
3305 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = <2454267027,u,2147483649,u>
3306 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
3307 ; CHECK-SSE41-NEXT: psrlq $32, %xmm2
3308 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
3309 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
3310 ; CHECK-SSE41-NEXT: psrad $3, %xmm1
3311 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
3312 ; CHECK-SSE41-NEXT: psrld $31, %xmm2
3313 ; CHECK-SSE41-NEXT: pxor %xmm3, %xmm3
3314 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm3[2,3],xmm2[4,5],xmm3[6,7]
3315 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
3316 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
3317 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
3318 ; CHECK-SSE41-NEXT: pcmpeqd %xmm3, %xmm0
3319 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
3320 ; CHECK-SSE41-NEXT: retq
3322 ; CHECK-AVX1-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3323 ; CHECK-AVX1: # %bb.0:
3324 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
3325 ; CHECK-AVX1-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
3326 ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2
3327 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
3328 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm2
3329 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
3330 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
3331 ; CHECK-AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
3332 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
3333 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
3334 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
3335 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
3336 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
3337 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
3338 ; CHECK-AVX1-NEXT: retq
3340 ; CHECK-AVX2-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3341 ; CHECK-AVX2: # %bb.0:
3342 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm1
3343 ; CHECK-AVX2-NEXT: vpmuldq {{.*}}(%rip), %xmm0, %xmm2
3344 ; CHECK-AVX2-NEXT: vpsrlq $32, %xmm2, %xmm2
3345 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm2, %xmm1
3346 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
3347 ; CHECK-AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3
3348 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3]
3349 ; CHECK-AVX2-NEXT: vpsravd {{.*}}(%rip), %xmm1, %xmm1
3350 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
3351 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
3352 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
3353 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
3354 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
3355 ; CHECK-AVX2-NEXT: retq
3357 ; CHECK-AVX512VL-LABEL: test_srem_even_allones_and_poweroftwo_and_one:
3358 ; CHECK-AVX512VL: # %bb.0:
3359 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm0, %xmm0
3360 ; CHECK-AVX512VL-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0
3361 ; CHECK-AVX512VL-NEXT: vprorvd {{.*}}(%rip), %xmm0, %xmm0
3362 ; CHECK-AVX512VL-NEXT: vpminud {{.*}}(%rip), %xmm0, %xmm1
3363 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
3364 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
3365 ; CHECK-AVX512VL-NEXT: retq
3366 %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
3367 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
3368 %ret = zext <4 x i1> %cmp to <4 x i32>