1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE2
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK-SSE,CHECK-SSE41
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX1
5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX2
6 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK-AVX,CHECK-AVX512VL
9 define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind {
10 ; CHECK-SSE2-LABEL: test_srem_odd_25:
11 ; CHECK-SSE2: # %bb.0:
12 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
13 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
14 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
15 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
16 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
17 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
18 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
19 ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
20 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
21 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
22 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
23 ; CHECK-SSE2-NEXT: retq
25 ; CHECK-SSE41-LABEL: test_srem_odd_25:
26 ; CHECK-SSE41: # %bb.0:
27 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
28 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
29 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
30 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
31 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
32 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
33 ; CHECK-SSE41-NEXT: retq
35 ; CHECK-AVX1-LABEL: test_srem_odd_25:
36 ; CHECK-AVX1: # %bb.0:
37 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
38 ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
39 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
40 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
41 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
42 ; CHECK-AVX1-NEXT: retq
44 ; CHECK-AVX2-LABEL: test_srem_odd_25:
45 ; CHECK-AVX2: # %bb.0:
46 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
47 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
48 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [85899345,85899345,85899345,85899345]
49 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
50 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
51 ; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
52 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
53 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
54 ; CHECK-AVX2-NEXT: retq
56 ; CHECK-AVX512VL-LABEL: test_srem_odd_25:
57 ; CHECK-AVX512VL: # %bb.0:
58 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
59 ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
60 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
61 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
62 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
63 ; CHECK-AVX512VL-NEXT: retq
64 %srem = srem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
65 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
66 %ret = zext <4 x i1> %cmp to <4 x i32>
71 define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind {
72 ; CHECK-SSE2-LABEL: test_srem_even_100:
73 ; CHECK-SSE2: # %bb.0:
74 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
75 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
76 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
77 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
78 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
79 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
80 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
81 ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
82 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
83 ; CHECK-SSE2-NEXT: psrld $2, %xmm1
84 ; CHECK-SSE2-NEXT: pslld $30, %xmm0
85 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
86 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
87 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
88 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
89 ; CHECK-SSE2-NEXT: retq
91 ; CHECK-SSE41-LABEL: test_srem_even_100:
92 ; CHECK-SSE41: # %bb.0:
93 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
94 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
95 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
96 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
97 ; CHECK-SSE41-NEXT: pslld $30, %xmm0
98 ; CHECK-SSE41-NEXT: por %xmm1, %xmm0
99 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
100 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
101 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
102 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
103 ; CHECK-SSE41-NEXT: retq
105 ; CHECK-AVX1-LABEL: test_srem_even_100:
106 ; CHECK-AVX1: # %bb.0:
107 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
108 ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
109 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1
110 ; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0
111 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
112 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
113 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
114 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
115 ; CHECK-AVX1-NEXT: retq
117 ; CHECK-AVX2-LABEL: test_srem_even_100:
118 ; CHECK-AVX2: # %bb.0:
119 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
120 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
121 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [85899344,85899344,85899344,85899344]
122 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
123 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm0, %xmm1
124 ; CHECK-AVX2-NEXT: vpslld $30, %xmm0, %xmm0
125 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
126 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
127 ; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
128 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
129 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
130 ; CHECK-AVX2-NEXT: retq
132 ; CHECK-AVX512VL-LABEL: test_srem_even_100:
133 ; CHECK-AVX512VL: # %bb.0:
134 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
135 ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
136 ; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0
137 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
138 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
139 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
140 ; CHECK-AVX512VL-NEXT: retq
141 %srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
142 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
143 %ret = zext <4 x i1> %cmp to <4 x i32>
147 ; Negative divisors should be negated, and thus this is still splat vectors.
150 define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind {
151 ; CHECK-SSE2-LABEL: test_srem_odd_neg25:
152 ; CHECK-SSE2: # %bb.0:
153 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
154 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
155 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
156 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
157 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
158 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
159 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
160 ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
161 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
162 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
163 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
164 ; CHECK-SSE2-NEXT: retq
166 ; CHECK-SSE41-LABEL: test_srem_odd_neg25:
167 ; CHECK-SSE41: # %bb.0:
168 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
169 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
170 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
171 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
172 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
173 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
174 ; CHECK-SSE41-NEXT: retq
176 ; CHECK-AVX1-LABEL: test_srem_odd_neg25:
177 ; CHECK-AVX1: # %bb.0:
178 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
179 ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
180 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
181 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
182 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
183 ; CHECK-AVX1-NEXT: retq
185 ; CHECK-AVX2-LABEL: test_srem_odd_neg25:
186 ; CHECK-AVX2: # %bb.0:
187 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
188 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
189 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [85899345,85899345,85899345,85899345]
190 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
191 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [171798690,171798690,171798690,171798690]
192 ; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
193 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
194 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
195 ; CHECK-AVX2-NEXT: retq
197 ; CHECK-AVX512VL-LABEL: test_srem_odd_neg25:
198 ; CHECK-AVX512VL: # %bb.0:
199 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
200 ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
201 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
202 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
203 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
204 ; CHECK-AVX512VL-NEXT: retq
205 %srem = srem <4 x i32> %X, <i32 25, i32 -25, i32 -25, i32 25>
206 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
207 %ret = zext <4 x i1> %cmp to <4 x i32>
212 define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
213 ; CHECK-SSE2-LABEL: test_srem_even_neg100:
214 ; CHECK-SSE2: # %bb.0:
215 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
216 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
217 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm0
218 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
219 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
220 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
221 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
222 ; CHECK-SSE2-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
223 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
224 ; CHECK-SSE2-NEXT: psrld $2, %xmm1
225 ; CHECK-SSE2-NEXT: pslld $30, %xmm0
226 ; CHECK-SSE2-NEXT: por %xmm1, %xmm0
227 ; CHECK-SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
228 ; CHECK-SSE2-NEXT: pcmpgtd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
229 ; CHECK-SSE2-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
230 ; CHECK-SSE2-NEXT: retq
232 ; CHECK-SSE41-LABEL: test_srem_even_neg100:
233 ; CHECK-SSE41: # %bb.0:
234 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
235 ; CHECK-SSE41-NEXT: paddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
236 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
237 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
238 ; CHECK-SSE41-NEXT: pslld $30, %xmm0
239 ; CHECK-SSE41-NEXT: por %xmm1, %xmm0
240 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
241 ; CHECK-SSE41-NEXT: pminud %xmm0, %xmm1
242 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
243 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
244 ; CHECK-SSE41-NEXT: retq
246 ; CHECK-AVX1-LABEL: test_srem_even_neg100:
247 ; CHECK-AVX1: # %bb.0:
248 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
249 ; CHECK-AVX1-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
250 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm0, %xmm1
251 ; CHECK-AVX1-NEXT: vpslld $30, %xmm0, %xmm0
252 ; CHECK-AVX1-NEXT: vpor %xmm1, %xmm0, %xmm0
253 ; CHECK-AVX1-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
254 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
255 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
256 ; CHECK-AVX1-NEXT: retq
258 ; CHECK-AVX2-LABEL: test_srem_even_neg100:
259 ; CHECK-AVX2: # %bb.0:
260 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [3264175145,3264175145,3264175145,3264175145]
261 ; CHECK-AVX2-NEXT: vpmulld %xmm1, %xmm0, %xmm0
262 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [85899344,85899344,85899344,85899344]
263 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
264 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm0, %xmm1
265 ; CHECK-AVX2-NEXT: vpslld $30, %xmm0, %xmm0
266 ; CHECK-AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
267 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [42949672,42949672,42949672,42949672]
268 ; CHECK-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm1
269 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
270 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
271 ; CHECK-AVX2-NEXT: retq
273 ; CHECK-AVX512VL-LABEL: test_srem_even_neg100:
274 ; CHECK-AVX512VL: # %bb.0:
275 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
276 ; CHECK-AVX512VL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0
277 ; CHECK-AVX512VL-NEXT: vprord $2, %xmm0, %xmm0
278 ; CHECK-AVX512VL-NEXT: vpminud {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm1
279 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
280 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
281 ; CHECK-AVX512VL-NEXT: retq
282 %srem = srem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
283 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
284 %ret = zext <4 x i1> %cmp to <4 x i32>
288 ;------------------------------------------------------------------------------;
289 ; Comparison constant has undef elements.
290 ;------------------------------------------------------------------------------;
292 define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind {
293 ; CHECK-SSE2-LABEL: test_srem_odd_undef1:
294 ; CHECK-SSE2: # %bb.0:
295 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
296 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
297 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
298 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
299 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
300 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
301 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
302 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
303 ; CHECK-SSE2-NEXT: pxor %xmm3, %xmm3
304 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
305 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
306 ; CHECK-SSE2-NEXT: pand %xmm1, %xmm4
307 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
308 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
309 ; CHECK-SSE2-NEXT: psrld $31, %xmm1
310 ; CHECK-SSE2-NEXT: psrad $3, %xmm2
311 ; CHECK-SSE2-NEXT: paddd %xmm1, %xmm2
312 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [25,25,25,25]
313 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
314 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
315 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
316 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4
317 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
318 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
319 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
320 ; CHECK-SSE2-NEXT: pcmpeqd %xmm3, %xmm0
321 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
322 ; CHECK-SSE2-NEXT: retq
324 ; CHECK-SSE41-LABEL: test_srem_odd_undef1:
325 ; CHECK-SSE41: # %bb.0:
326 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
327 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
328 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm1
329 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
330 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
331 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
332 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
333 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
334 ; CHECK-SSE41-NEXT: psrad $3, %xmm2
335 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
336 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
337 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
338 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
339 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
340 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
341 ; CHECK-SSE41-NEXT: retq
343 ; CHECK-AVX1-LABEL: test_srem_odd_undef1:
344 ; CHECK-AVX1: # %bb.0:
345 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
346 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
347 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
348 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
349 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
350 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
351 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
352 ; CHECK-AVX1-NEXT: vpsrad $3, %xmm1, %xmm1
353 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
354 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
355 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
356 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
357 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
358 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
359 ; CHECK-AVX1-NEXT: retq
361 ; CHECK-AVX2-LABEL: test_srem_odd_undef1:
362 ; CHECK-AVX2: # %bb.0:
363 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
364 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
365 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
366 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
367 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
368 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
369 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
370 ; CHECK-AVX2-NEXT: vpsrad $3, %xmm1, %xmm1
371 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
372 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [25,25,25,25]
373 ; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
374 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
375 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
376 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
377 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
378 ; CHECK-AVX2-NEXT: retq
380 ; CHECK-AVX512VL-LABEL: test_srem_odd_undef1:
381 ; CHECK-AVX512VL: # %bb.0:
382 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
383 ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
384 ; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
385 ; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
386 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
387 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
388 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
389 ; CHECK-AVX512VL-NEXT: vpsrad $3, %xmm1, %xmm1
390 ; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
391 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
392 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
393 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
394 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
395 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
396 ; CHECK-AVX512VL-NEXT: retq
397 %srem = srem <4 x i32> %X, <i32 25, i32 25, i32 25, i32 25>
398 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 undef, i32 0>
399 %ret = zext <4 x i1> %cmp to <4 x i32>
403 define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind {
404 ; CHECK-SSE2-LABEL: test_srem_even_undef1:
405 ; CHECK-SSE2: # %bb.0:
406 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1374389535,1374389535,1374389535,1374389535]
407 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
408 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
409 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
410 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
411 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
412 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
413 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
414 ; CHECK-SSE2-NEXT: pxor %xmm3, %xmm3
415 ; CHECK-SSE2-NEXT: pxor %xmm4, %xmm4
416 ; CHECK-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
417 ; CHECK-SSE2-NEXT: pand %xmm1, %xmm4
418 ; CHECK-SSE2-NEXT: psubd %xmm4, %xmm2
419 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
420 ; CHECK-SSE2-NEXT: psrld $31, %xmm1
421 ; CHECK-SSE2-NEXT: psrad $5, %xmm2
422 ; CHECK-SSE2-NEXT: paddd %xmm1, %xmm2
423 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [100,100,100,100]
424 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm2[1,1,3,3]
425 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
426 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
427 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm4
428 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm4[0,2,2,3]
429 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
430 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
431 ; CHECK-SSE2-NEXT: pcmpeqd %xmm3, %xmm0
432 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
433 ; CHECK-SSE2-NEXT: retq
435 ; CHECK-SSE41-LABEL: test_srem_even_undef1:
436 ; CHECK-SSE41: # %bb.0:
437 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
438 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
439 ; CHECK-SSE41-NEXT: pmuldq %xmm2, %xmm1
440 ; CHECK-SSE41-NEXT: pmuldq %xmm0, %xmm2
441 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
442 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
443 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
444 ; CHECK-SSE41-NEXT: psrld $31, %xmm1
445 ; CHECK-SSE41-NEXT: psrad $5, %xmm2
446 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
447 ; CHECK-SSE41-NEXT: pmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
448 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
449 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
450 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
451 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
452 ; CHECK-SSE41-NEXT: retq
454 ; CHECK-AVX1-LABEL: test_srem_even_undef1:
455 ; CHECK-AVX1: # %bb.0:
456 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
457 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
458 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
459 ; CHECK-AVX1-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
460 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
461 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
462 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
463 ; CHECK-AVX1-NEXT: vpsrad $5, %xmm1, %xmm1
464 ; CHECK-AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1
465 ; CHECK-AVX1-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
466 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
467 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
468 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
469 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
470 ; CHECK-AVX1-NEXT: retq
472 ; CHECK-AVX2-LABEL: test_srem_even_undef1:
473 ; CHECK-AVX2: # %bb.0:
474 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
475 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
476 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
477 ; CHECK-AVX2-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
478 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
479 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
480 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm1, %xmm2
481 ; CHECK-AVX2-NEXT: vpsrad $5, %xmm1, %xmm1
482 ; CHECK-AVX2-NEXT: vpaddd %xmm2, %xmm1, %xmm1
483 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [100,100,100,100]
484 ; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
485 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
486 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
487 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
488 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
489 ; CHECK-AVX2-NEXT: retq
491 ; CHECK-AVX512VL-LABEL: test_srem_even_undef1:
492 ; CHECK-AVX512VL: # %bb.0:
493 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
494 ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1374389535,1374389535,1374389535,1374389535]
495 ; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm1, %xmm1
496 ; CHECK-AVX512VL-NEXT: vpmuldq %xmm2, %xmm0, %xmm2
497 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
498 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
499 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm1, %xmm2
500 ; CHECK-AVX512VL-NEXT: vpsrad $5, %xmm1, %xmm1
501 ; CHECK-AVX512VL-NEXT: vpaddd %xmm2, %xmm1, %xmm1
502 ; CHECK-AVX512VL-NEXT: vpmulld {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
503 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
504 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
505 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
506 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
507 ; CHECK-AVX512VL-NEXT: retq
508 %srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
509 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 undef, i32 0>
510 %ret = zext <4 x i1> %cmp to <4 x i32>
514 ;------------------------------------------------------------------------------;
516 ;------------------------------------------------------------------------------;
518 define <4 x i32> @test_srem_one_eq(<4 x i32> %X) nounwind {
519 ; CHECK-SSE-LABEL: test_srem_one_eq:
520 ; CHECK-SSE: # %bb.0:
521 ; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
522 ; CHECK-SSE-NEXT: retq
524 ; CHECK-AVX1-LABEL: test_srem_one_eq:
525 ; CHECK-AVX1: # %bb.0:
526 ; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
527 ; CHECK-AVX1-NEXT: retq
529 ; CHECK-AVX2-LABEL: test_srem_one_eq:
530 ; CHECK-AVX2: # %bb.0:
531 ; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
532 ; CHECK-AVX2-NEXT: retq
534 ; CHECK-AVX512VL-LABEL: test_srem_one_eq:
535 ; CHECK-AVX512VL: # %bb.0:
536 ; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
537 ; CHECK-AVX512VL-NEXT: retq
538 %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
539 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
540 %ret = zext <4 x i1> %cmp to <4 x i32>
543 define <4 x i32> @test_srem_one_ne(<4 x i32> %X) nounwind {
544 ; CHECK-SSE-LABEL: test_srem_one_ne:
545 ; CHECK-SSE: # %bb.0:
546 ; CHECK-SSE-NEXT: xorps %xmm0, %xmm0
547 ; CHECK-SSE-NEXT: retq
549 ; CHECK-AVX-LABEL: test_srem_one_ne:
550 ; CHECK-AVX: # %bb.0:
551 ; CHECK-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
552 ; CHECK-AVX-NEXT: retq
553 %srem = srem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
554 %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
555 %ret = zext <4 x i1> %cmp to <4 x i32>
559 ; We can lower remainder of division by powers of two much better elsewhere.
560 define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind {
561 ; CHECK-SSE-LABEL: test_srem_pow2:
562 ; CHECK-SSE: # %bb.0:
563 ; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1
564 ; CHECK-SSE-NEXT: psrad $31, %xmm1
565 ; CHECK-SSE-NEXT: psrld $28, %xmm1
566 ; CHECK-SSE-NEXT: paddd %xmm0, %xmm1
567 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
568 ; CHECK-SSE-NEXT: psubd %xmm1, %xmm0
569 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
570 ; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
571 ; CHECK-SSE-NEXT: psrld $31, %xmm0
572 ; CHECK-SSE-NEXT: retq
574 ; CHECK-AVX1-LABEL: test_srem_pow2:
575 ; CHECK-AVX1: # %bb.0:
576 ; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
577 ; CHECK-AVX1-NEXT: vpsrld $28, %xmm1, %xmm1
578 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
579 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
580 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
581 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
582 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
583 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
584 ; CHECK-AVX1-NEXT: retq
586 ; CHECK-AVX2-LABEL: test_srem_pow2:
587 ; CHECK-AVX2: # %bb.0:
588 ; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
589 ; CHECK-AVX2-NEXT: vpsrld $28, %xmm1, %xmm1
590 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
591 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [4294967280,4294967280,4294967280,4294967280]
592 ; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
593 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
594 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
595 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
596 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
597 ; CHECK-AVX2-NEXT: retq
599 ; CHECK-AVX512VL-LABEL: test_srem_pow2:
600 ; CHECK-AVX512VL: # %bb.0:
601 ; CHECK-AVX512VL-NEXT: vpsrad $31, %xmm0, %xmm1
602 ; CHECK-AVX512VL-NEXT: vpsrld $28, %xmm1, %xmm1
603 ; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
604 ; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
605 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
606 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
607 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
608 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
609 ; CHECK-AVX512VL-NEXT: retq
610 %srem = srem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16>
611 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
612 %ret = zext <4 x i1> %cmp to <4 x i32>
616 ; We could lower remainder of division by INT_MIN much better elsewhere.
617 define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind {
618 ; CHECK-SSE-LABEL: test_srem_int_min:
619 ; CHECK-SSE: # %bb.0:
620 ; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1
621 ; CHECK-SSE-NEXT: psrad $31, %xmm1
622 ; CHECK-SSE-NEXT: psrld $1, %xmm1
623 ; CHECK-SSE-NEXT: paddd %xmm0, %xmm1
624 ; CHECK-SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
625 ; CHECK-SSE-NEXT: psubd %xmm1, %xmm0
626 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
627 ; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
628 ; CHECK-SSE-NEXT: psrld $31, %xmm0
629 ; CHECK-SSE-NEXT: retq
631 ; CHECK-AVX1-LABEL: test_srem_int_min:
632 ; CHECK-AVX1: # %bb.0:
633 ; CHECK-AVX1-NEXT: vpsrad $31, %xmm0, %xmm1
634 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
635 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm1
636 ; CHECK-AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
637 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
638 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
639 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
640 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
641 ; CHECK-AVX1-NEXT: retq
643 ; CHECK-AVX2-LABEL: test_srem_int_min:
644 ; CHECK-AVX2: # %bb.0:
645 ; CHECK-AVX2-NEXT: vpsrad $31, %xmm0, %xmm1
646 ; CHECK-AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
647 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm1
648 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
649 ; CHECK-AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1
650 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
651 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
652 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
653 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
654 ; CHECK-AVX2-NEXT: retq
656 ; CHECK-AVX512VL-LABEL: test_srem_int_min:
657 ; CHECK-AVX512VL: # %bb.0:
658 ; CHECK-AVX512VL-NEXT: vpsrad $31, %xmm0, %xmm1
659 ; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm1, %xmm1
660 ; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm1
661 ; CHECK-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1
662 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
663 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
664 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
665 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
666 ; CHECK-AVX512VL-NEXT: retq
667 %srem = srem <4 x i32> %X, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
668 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
669 %ret = zext <4 x i1> %cmp to <4 x i32>
673 ; We could lower remainder of division by all-ones much better elsewhere.
674 define <4 x i32> @test_srem_allones(<4 x i32> %X) nounwind {
675 ; CHECK-SSE-LABEL: test_srem_allones:
676 ; CHECK-SSE: # %bb.0:
677 ; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
678 ; CHECK-SSE-NEXT: retq
680 ; CHECK-AVX1-LABEL: test_srem_allones:
681 ; CHECK-AVX1: # %bb.0:
682 ; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
683 ; CHECK-AVX1-NEXT: retq
685 ; CHECK-AVX2-LABEL: test_srem_allones:
686 ; CHECK-AVX2: # %bb.0:
687 ; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
688 ; CHECK-AVX2-NEXT: retq
690 ; CHECK-AVX512VL-LABEL: test_srem_allones:
691 ; CHECK-AVX512VL: # %bb.0:
692 ; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
693 ; CHECK-AVX512VL-NEXT: retq
694 %srem = srem <4 x i32> %X, <i32 4294967295, i32 4294967295, i32 4294967295, i32 4294967295>
695 %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
696 %ret = zext <4 x i1> %cmp to <4 x i32>