1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE2
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE41
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1
5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2
6 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL
9 define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind {
10 ; CHECK-SSE2-LABEL: test_srem_odd_25:
11 ; CHECK-SSE2: # %bb.0:
12 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
13 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
14 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
15 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
16 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
17 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
18 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
19 ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23 ; CHECK-SSE2-NEXT: retq
25 ; CHECK-SSE41-LABEL: test_srem_odd_25:
26 ; CHECK-SSE41: # %bb.0:
27 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
30 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
31 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
32 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
33 ; CHECK-SSE41-NEXT: retq
35 ; CHECK-AVX1-LABEL: test_srem_odd_25:
36 ; CHECK-AVX1: # %bb.0:
37 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38 ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
39 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
40 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
41 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
42 ; CHECK-AVX1-NEXT: retq
44 ; CHECK-AVX2-LABEL: test_srem_odd_25:
45 ; CHECK-AVX2: # %bb.0:
46 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
47 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
48 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [85899345,85899345,85899345,85899345]
49 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
50 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
51 ; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
52 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
53 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
54 ; CHECK-AVX2-NEXT: retq
56 ; CHECK-AVX512VL-LABEL: test_srem_odd_25:
57 ; CHECK-AVX512VL: # %bb.0:
58 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
59 ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
60 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
61 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
62 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
63 ; CHECK-AVX512VL-NEXT: retq
64 %srem = srem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
65 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
66 %ret = zext <4 x i1> %cmp to <4 x i32>
71 define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind {
72 ; CHECK-SSE2-LABEL: test_srem_even_100:
73 ; CHECK-SSE2: # %bb.0:
74 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
75 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
76 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
77 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
78 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
79 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
80 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
81 ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
82 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
83 ; CHECK-SSE2-NEXT: psrld $2, %xmm1
84 ; CHECK-SSE2-NEXT: pslld $30, %xmm0
85 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
86 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
87 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
88 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
89 ; CHECK-SSE2-NEXT: retq
91 ; CHECK-SSE41-LABEL: test_srem_even_100:
92 ; CHECK-SSE41: # %bb.0:
93 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
94 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
95 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
96 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
97 ; CHECK-SSE41-NEXT: pslld $30, %xmm0
98 ; CHECK-SSE41-NEXT: por %xmm1, %xmm0
99 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
100 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
101 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
102 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
103 ; CHECK-SSE41-NEXT: retq
105 ; CHECK-AVX1-LABEL: test_srem_even_100:
106 ; CHECK-AVX1: # %bb.0:
107 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
108 ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
109 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1
110 ; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0
111 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
112 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
113 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
114 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
115 ; CHECK-AVX1-NEXT: retq
117 ; CHECK-AVX2-LABEL: test_srem_even_100:
118 ; CHECK-AVX2: # %bb.0:
119 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
120 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
121 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [85899344,85899344,85899344,85899344]
122 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
123 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm0, %xmm1
124 ; CHECK-AVX2-NEXT: vpslld $30, %xmm0, %xmm0
125 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
126 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
127 ; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
128 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
129 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
130 ; CHECK-AVX2-NEXT: retq
132 ; CHECK-AVX512VL-LABEL: test_srem_even_100:
133 ; CHECK-AVX512VL: # %bb.0:
134 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
135 ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
136 ; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0
137 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
138 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
139 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
140 ; CHECK-AVX512VL-NEXT: retq
141 %srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
142 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
143 %ret = zext <4 x i1> %cmp to <4 x i32>
147 ; Negative divisors should be negated, and thus this is still splat vectors.
150 define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind {
151 ; CHECK-SSE2-LABEL: test_srem_odd_neg25:
152 ; CHECK-SSE2: # %bb.0:
153 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
154 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
155 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
156 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
157 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
158 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
159 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
160 ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
161 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
162 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
163 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
164 ; CHECK-SSE2-NEXT: retq
166 ; CHECK-SSE41-LABEL: test_srem_odd_neg25:
167 ; CHECK-SSE41: # %bb.0:
168 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
169 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
170 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
171 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
172 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
173 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
174 ; CHECK-SSE41-NEXT: retq
176 ; CHECK-AVX1-LABEL: test_srem_odd_neg25:
177 ; CHECK-AVX1: # %bb.0:
178 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
179 ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
180 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
181 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
182 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
183 ; CHECK-AVX1-NEXT: retq
185 ; CHECK-AVX2-LABEL: test_srem_odd_neg25:
186 ; CHECK-AVX2: # %bb.0:
187 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
188 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
189 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [85899345,85899345,85899345,85899345]
190 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
191 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
192 ; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
193 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
194 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
195 ; CHECK-AVX2-NEXT: retq
197 ; CHECK-AVX512VL-LABEL: test_srem_odd_neg25:
198 ; CHECK-AVX512VL: # %bb.0:
199 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
200 ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
201 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
202 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
203 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
204 ; CHECK-AVX512VL-NEXT: retq
205 %srem = srem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
206 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
207 %ret = zext <4 x i1> %cmp to <4 x i32>
212 define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
213 ; CHECK-SSE2-LABEL: test_srem_even_neg100:
214 ; CHECK-SSE2: # %bb.0:
215 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
216 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
217 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
218 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
219 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
220 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
221 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
222 ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
223 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
224 ; CHECK-SSE2-NEXT: psrld $2, %xmm1
225 ; CHECK-SSE2-NEXT: pslld $30, %xmm0
226 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
227 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
228 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
229 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
230 ; CHECK-SSE2-NEXT: retq
232 ; CHECK-SSE41-LABEL: test_srem_even_neg100:
233 ; CHECK-SSE41: # %bb.0:
234 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
235 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
236 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
237 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
238 ; CHECK-SSE41-NEXT: pslld $30, %xmm0
239 ; CHECK-SSE41-NEXT: por %xmm1, %xmm0
240 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
241 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
242 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
243 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
244 ; CHECK-SSE41-NEXT: retq
246 ; CHECK-AVX1-LABEL: test_srem_even_neg100:
247 ; CHECK-AVX1: # %bb.0:
248 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
249 ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
250 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1
251 ; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0
252 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
253 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
254 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
255 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
256 ; CHECK-AVX1-NEXT: retq
258 ; CHECK-AVX2-LABEL: test_srem_even_neg100:
259 ; CHECK-AVX2: # %bb.0:
260 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
261 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
262 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [85899344,85899344,85899344,85899344]
263 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
264 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm0, %xmm1
265 ; CHECK-AVX2-NEXT: vpslld $30, %xmm0, %xmm0
266 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
267 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
268 ; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
269 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
270 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
271 ; CHECK-AVX2-NEXT: retq
273 ; CHECK-AVX512VL-LABEL: test_srem_even_neg100:
274 ; CHECK-AVX512VL: # %bb.0:
275 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
276 ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
277 ; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0
278 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
279 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
280 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
281 ; CHECK-AVX512VL-NEXT: retq
282 %srem = srem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
283 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
284 %ret = zext <4 x i1> %cmp to <4 x i32>
288 ;------------------------------------------------------------------------------;
289 ; Comparison constant has undef elements.
290 ;------------------------------------------------------------------------------;
292 define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind {
293 ; CHECK-SSE2-LABEL: test_srem_odd_undef1:
294 ; CHECK-SSE2: # %bb.0:
295 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
296 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
297 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
298 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
299 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
300 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
301 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
302 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
303 ; CHECK-SSE2-NEXT: pxor %xmm3, %xmm3
304 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
305 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
306 ; CHECK-SSE2-NEXT: pand %xmm1, %xmm4
307 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
308 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
309 ; CHECK-SSE2-NEXT: psrld $31, %xmm1
310 ; CHECK-SSE2-NEXT: psrad $3, %xmm2
311 ; CHECK-SSE2-NEXT: paddd %xmm1, %xmm2
312 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [25,25,25,25]
313 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
314 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
315 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
316 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4
317 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
318 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
319 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
320 ; CHECK-SSE2-NEXT: pcmpeqd %xmm3, %xmm0
321 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
322 ; CHECK-SSE2-NEXT: retq
324 ; CHECK-SSE41-LABEL: test_srem_odd_undef1:
325 ; CHECK-SSE41: # %bb.0:
326 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
327 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
328 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm1
329 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
330 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
331 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
332 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
333 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
334 ; CHECK-SSE41-NEXT: psrad $3, %xmm2
335 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
336 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
337 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
338 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
339 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
340 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
341 ; CHECK-SSE41-NEXT: retq
343 ; CHECK-AVX1-LABEL: test_srem_odd_undef1:
344 ; CHECK-AVX1: # %bb.0:
345 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
346 ; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
347 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
348 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
349 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
350 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
351 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
352 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm1
353 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
354 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
355 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
356 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
357 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
358 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
359 ; CHECK-AVX1-NEXT: retq
361 ; CHECK-AVX2-LABEL: test_srem_odd_undef1:
362 ; CHECK-AVX2: # %bb.0:
363 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
364 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
365 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
366 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
367 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
368 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
369 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
370 ; CHECK-AVX2-NEXT: vpsrad $3, %xmm1, %xmm1
371 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
372 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [25,25,25,25]
373 ; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
374 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
375 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
376 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
377 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
378 ; CHECK-AVX2-NEXT: retq
380 ; CHECK-AVX512VL-LABEL: test_srem_odd_undef1:
381 ; CHECK-AVX512VL: # %bb.0:
382 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
383 ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
384 ; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
385 ; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
386 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
387 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
388 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
389 ; CHECK-AVX512VL-NEXT: vpsrad $3, %xmm1, %xmm1
390 ; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
391 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
392 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
393 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
394 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
395 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
396 ; CHECK-AVX512VL-NEXT: retq
397 %srem = srem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
398 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 undef, i32 0>
399 %ret = zext <4 x i1> %cmp to <4 x i32>
403 define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind {
404 ; CHECK-SSE2-LABEL: test_srem_even_undef1:
405 ; CHECK-SSE2: # %bb.0:
406 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
407 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
408 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
409 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
410 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
411 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
412 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
413 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
414 ; CHECK-SSE2-NEXT: pxor %xmm3, %xmm3
415 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
416 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
417 ; CHECK-SSE2-NEXT: pand %xmm1, %xmm4
418 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
419 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
420 ; CHECK-SSE2-NEXT: psrld $31, %xmm1
421 ; CHECK-SSE2-NEXT: psrad $5, %xmm2
422 ; CHECK-SSE2-NEXT: paddd %xmm1, %xmm2
423 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [100,100,100,100]
424 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
425 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
426 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
427 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4
428 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
429 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
430 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
431 ; CHECK-SSE2-NEXT: pcmpeqd %xmm3, %xmm0
432 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
433 ; CHECK-SSE2-NEXT: retq
435 ; CHECK-SSE41-LABEL: test_srem_even_undef1:
436 ; CHECK-SSE41: # %bb.0:
437 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
438 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
439 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm1
440 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
441 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
442 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
443 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
444 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
445 ; CHECK-SSE41-NEXT: psrad $5, %xmm2
446 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
447 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
448 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
449 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
450 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
451 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
452 ; CHECK-SSE41-NEXT: retq
454 ; CHECK-AVX1-LABEL: test_srem_even_undef1:
455 ; CHECK-AVX1: # %bb.0:
456 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
457 ; CHECK-AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
458 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
459 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
460 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
461 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
462 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
463 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm1
464 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
465 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
466 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
467 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
468 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
469 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
470 ; CHECK-AVX1-NEXT: retq
472 ; CHECK-AVX2-LABEL: test_srem_even_undef1:
473 ; CHECK-AVX2: # %bb.0:
474 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
475 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
476 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
477 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
478 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
479 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
480 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
481 ; CHECK-AVX2-NEXT: vpsrad $5, %xmm1, %xmm1
482 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
483 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [100,100,100,100]
484 ; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
485 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
486 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
487 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
488 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
489 ; CHECK-AVX2-NEXT: retq
491 ; CHECK-AVX512VL-LABEL: test_srem_even_undef1:
492 ; CHECK-AVX512VL: # %bb.0:
493 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
494 ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
495 ; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
496 ; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
497 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
498 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
499 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
500 ; CHECK-AVX512VL-NEXT: vpsrad $5, %xmm1, %xmm1
501 ; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
502 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
503 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
504 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
505 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
506 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
507 ; CHECK-AVX512VL-NEXT: retq
508 %srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
509 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 undef, i32 0>
510 %ret = zext <4 x i1> %cmp to <4 x i32>
514 ;------------------------------------------------------------------------------;
516 ;------------------------------------------------------------------------------;
518 define <4 x i32> @test_srem_one_eq(<4 x i32> %X) nounwind {
519 ; CHECK-SSE-LABEL: test_srem_one_eq:
520 ; CHECK-SSE: # %bb.0:
521 ; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
522 ; CHECK-SSE-NEXT: retq
524 ; CHECK-AVX-LABEL: test_srem_one_eq:
525 ; CHECK-AVX: # %bb.0:
526 ; CHECK-AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
527 ; CHECK-AVX-NEXT: retq
528 %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
529 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
530 %ret = zext <4 x i1> %cmp to <4 x i32>
533 define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
534 ; CHECK-SSE-LABEL: test_srem_one_ne:
535 ; CHECK-SSE: # %bb.0:
536 ; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
537 ; CHECK-SSE-NEXT: retq
539 ; CHECK-AVX-LABEL: test_srem_one_ne:
540 ; CHECK-AVX: # %bb.0:
541 ; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
542 ; CHECK-AVX-NEXT: retq
543 %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
544 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
545 %ret = zext <4 x i1> %cmp to <4 x i32>
549 ; We can lower remainder of division by powers of two much better elsewhere.
550 define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
551 ; CHECK-SSE-LABEL: test_srem_pow2:
552 ; CHECK-SSE: # %bb.0:
553 ; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1
554 ; CHECK-SSE-NEXT: psrad $31, %xmm1
555 ; CHECK-SSE-NEXT: psrld $28, %xmm1
556 ; CHECK-SSE-NEXT: paddd %xmm0, %xmm1
557 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
558 ; CHECK-SSE-NEXT: psubd %xmm1, %xmm0
559 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
560 ; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
561 ; CHECK-SSE-NEXT: psrld $31, %xmm0
562 ; CHECK-SSE-NEXT: retq
564 ; CHECK-AVX1-LABEL: test_srem_pow2:
565 ; CHECK-AVX1: # %bb.0:
566 ; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
567 ; CHECK-AVX1-NEXT: vpsrld $28, %xmm1, %xmm1
568 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
569 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
570 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
571 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
572 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
573 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
574 ; CHECK-AVX1-NEXT: retq
576 ; CHECK-AVX2-LABEL: test_srem_pow2:
577 ; CHECK-AVX2: # %bb.0:
578 ; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
579 ; CHECK-AVX2-NEXT: vpsrld $28, %xmm1, %xmm1
580 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
581 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967280,4294967280,4294967280,4294967280]
582 ; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
583 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
584 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
585 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
586 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
587 ; CHECK-AVX2-NEXT: retq
589 ; CHECK-AVX512VL-LABEL: test_srem_pow2:
590 ; CHECK-AVX512VL: # %bb.0:
591 ; CHECK-AVX512VL-NEXT: vpsrad $31, %xmm0, %xmm1
592 ; CHECK-AVX512VL-NEXT: vpsrld $28, %xmm1, %xmm1
593 ; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
594 ; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
595 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
596 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
597 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
598 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
599 ; CHECK-AVX512VL-NEXT: retq
600 %srem = srem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16>
601 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
602 %ret = zext <4 x i1> %cmp to <4 x i32>
606 ; We could lower remainder of division by INT_MIN much better elsewhere.
607 define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
608 ; CHECK-SSE-LABEL: test_srem_int_min:
609 ; CHECK-SSE: # %bb.0:
610 ; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1
611 ; CHECK-SSE-NEXT: psrad $31, %xmm1
612 ; CHECK-SSE-NEXT: psrld $1, %xmm1
613 ; CHECK-SSE-NEXT: paddd %xmm0, %xmm1
614 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
615 ; CHECK-SSE-NEXT: paddd %xmm1, %xmm0
616 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
617 ; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
618 ; CHECK-SSE-NEXT: psrld $31, %xmm0
619 ; CHECK-SSE-NEXT: retq
621 ; CHECK-AVX1-LABEL: test_srem_int_min:
622 ; CHECK-AVX1: # %bb.0:
623 ; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
624 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
625 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
626 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
627 ; CHECK-AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0
628 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
629 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
630 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
631 ; CHECK-AVX1-NEXT: retq
633 ; CHECK-AVX2-LABEL: test_srem_int_min:
634 ; CHECK-AVX2: # %bb.0:
635 ; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
636 ; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
637 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
638 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
639 ; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
640 ; CHECK-AVX2-NEXT: vpaddd %xmm0, %xmm1, %xmm0
641 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
642 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
643 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
644 ; CHECK-AVX2-NEXT: retq
646 ; CHECK-AVX512VL-LABEL: test_srem_int_min:
647 ; CHECK-AVX512VL: # %bb.0:
648 ; CHECK-AVX512VL-NEXT: vpsrad $31, %xmm0, %xmm1
649 ; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
650 ; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
651 ; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
652 ; CHECK-AVX512VL-NEXT: vpaddd %xmm0, %xmm1, %xmm0
653 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
654 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
655 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
656 ; CHECK-AVX512VL-NEXT: retq
657 %srem = srem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
658 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
659 %ret = zext <4 x i1> %cmp to <4 x i32>
663 ; We could lower remainder of division by all-ones much better elsewhere.
664 define <4 x i32> @test_srem_allones(<4 x i32> %X) nounwind {
665 ; CHECK-SSE-LABEL: test_srem_allones:
666 ; CHECK-SSE: # %bb.0:
667 ; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
668 ; CHECK-SSE-NEXT: retq
670 ; CHECK-AVX-LABEL: test_srem_allones:
671 ; CHECK-AVX: # %bb.0:
672 ; CHECK-AVX-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
673 ; CHECK-AVX-NEXT: retq
674 %srem = srem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
675 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
676 %ret = zext <4 x i1> %cmp to <4 x i32>