1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE41
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1
5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
6 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512VL
8 ; Tests BuildUREMEqFold for 4 x i32 splat vectors with odd divisor.
9 ; See urem-seteq.ll for justification behind constants emitted.
10 define <4 x i32> @test_urem_odd_vec_i32(<4 x i32> %X) nounwind readnone {
11 ; CHECK-SSE2-LABEL: test_urem_odd_vec_i32:
12 ; CHECK-SSE2: # %bb.0:
13 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
14 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
15 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
16 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
17 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
18 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
19 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
20 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
21 ; CHECK-SSE2-NEXT: psrld $2, %xmm2
22 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
23 ; CHECK-SSE2-NEXT: pslld $2, %xmm1
24 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
25 ; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
26 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
27 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
28 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
29 ; CHECK-SSE2-NEXT: retq
31 ; CHECK-SSE41-LABEL: test_urem_odd_vec_i32:
32 ; CHECK-SSE41: # %bb.0:
33 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
34 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
35 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
36 ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
37 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
38 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
39 ; CHECK-SSE41-NEXT: psrld $2, %xmm2
40 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
41 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
42 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
43 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
44 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
45 ; CHECK-SSE41-NEXT: retq
47 ; CHECK-AVX1-LABEL: test_urem_odd_vec_i32:
48 ; CHECK-AVX1: # %bb.0:
49 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
50 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
51 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
52 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
53 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
54 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
55 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
56 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
57 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
58 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
59 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
60 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
61 ; CHECK-AVX1-NEXT: retq
63 ; CHECK-AVX2-LABEL: test_urem_odd_vec_i32:
64 ; CHECK-AVX2: # %bb.0:
65 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
66 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
67 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
68 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
69 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
70 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
71 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
72 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5]
73 ; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
74 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
75 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
76 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
77 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
78 ; CHECK-AVX2-NEXT: retq
80 ; CHECK-AVX512VL-LABEL: test_urem_odd_vec_i32:
81 ; CHECK-AVX512VL: # %bb.0:
82 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
83 ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
84 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
85 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
86 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
87 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
88 ; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
89 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
90 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
91 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
92 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
93 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
94 ; CHECK-AVX512VL-NEXT: retq
95 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
96 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
97 %ret = zext <4 x i1> %cmp to <4 x i32>
101 ; Like test_urem_odd_vec_i32, but with 8 x i16 vectors.
102 define <8 x i16> @test_urem_odd_vec_i16(<8 x i16> %X) nounwind readnone {
103 ; CHECK-SSE-LABEL: test_urem_odd_vec_i16:
104 ; CHECK-SSE: # %bb.0:
105 ; CHECK-SSE-NEXT: movdqa {{.*#+}} xmm1 = [52429,52429,52429,52429,52429,52429,52429,52429]
106 ; CHECK-SSE-NEXT: pmulhuw %xmm0, %xmm1
107 ; CHECK-SSE-NEXT: psrlw $2, %xmm1
108 ; CHECK-SSE-NEXT: pmullw {{.*}}(%rip), %xmm1
109 ; CHECK-SSE-NEXT: psubw %xmm1, %xmm0
110 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
111 ; CHECK-SSE-NEXT: pcmpeqw %xmm1, %xmm0
112 ; CHECK-SSE-NEXT: psrlw $15, %xmm0
113 ; CHECK-SSE-NEXT: retq
115 ; CHECK-AVX-LABEL: test_urem_odd_vec_i16:
116 ; CHECK-AVX: # %bb.0:
117 ; CHECK-AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm0, %xmm1
118 ; CHECK-AVX-NEXT: vpsrlw $2, %xmm1, %xmm1
119 ; CHECK-AVX-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
120 ; CHECK-AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
121 ; CHECK-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
122 ; CHECK-AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
123 ; CHECK-AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
124 ; CHECK-AVX-NEXT: retq
125 %urem = urem <8 x i16> %X, <i16 5, i16 5, i16 5, i16 5,
126 i16 5, i16 5, i16 5, i16 5>
127 %cmp = icmp eq <8 x i16> %urem, <i16 0, i16 0, i16 0, i16 0,
128 i16 0, i16 0, i16 0, i16 0>
129 %ret = zext <8 x i1> %cmp to <8 x i16>
133 ; Tests BuildUREMEqFold for 4 x i32 splat vectors with even divisor.
134 ; The expected behavior is that the fold is _not_ applied
135 ; because it requires a ROTR in the even case, which has to be expanded.
136 define <4 x i32> @test_urem_even_vec_i32(<4 x i32> %X) nounwind readnone {
137 ; CHECK-SSE2-LABEL: test_urem_even_vec_i32:
138 ; CHECK-SSE2: # %bb.0:
139 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
140 ; CHECK-SSE2-NEXT: psrld $1, %xmm1
141 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2454267027,2454267027,2454267027,2454267027]
142 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
143 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
144 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
145 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3
146 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,3,2,3]
147 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
148 ; CHECK-SSE2-NEXT: psrld $2, %xmm1
149 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [14,14,14,14]
150 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3]
151 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm1
152 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
153 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3
154 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3]
155 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
156 ; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
157 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
158 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
159 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
160 ; CHECK-SSE2-NEXT: retq
162 ; CHECK-SSE41-LABEL: test_urem_even_vec_i32:
163 ; CHECK-SSE41: # %bb.0:
164 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
165 ; CHECK-SSE41-NEXT: psrld $1, %xmm1
166 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
167 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
168 ; CHECK-SSE41-NEXT: pmuludq %xmm3, %xmm2
169 ; CHECK-SSE41-NEXT: pmuludq %xmm3, %xmm1
170 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
171 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
172 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
173 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
174 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
175 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
176 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
177 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
178 ; CHECK-SSE41-NEXT: retq
180 ; CHECK-AVX1-LABEL: test_urem_even_vec_i32:
181 ; CHECK-AVX1: # %bb.0:
182 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
183 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
184 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
185 ; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
186 ; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
187 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
188 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
189 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
190 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
191 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
192 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
193 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
194 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
195 ; CHECK-AVX1-NEXT: retq
197 ; CHECK-AVX2-LABEL: test_urem_even_vec_i32:
198 ; CHECK-AVX2: # %bb.0:
199 ; CHECK-AVX2-NEXT: vpsrld $1, %xmm0, %xmm1
200 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
201 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
202 ; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
203 ; CHECK-AVX2-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
204 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
205 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
206 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
207 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [14,14,14,14]
208 ; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
209 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
210 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
211 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
212 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
213 ; CHECK-AVX2-NEXT: retq
215 ; CHECK-AVX512VL-LABEL: test_urem_even_vec_i32:
216 ; CHECK-AVX512VL: # %bb.0:
217 ; CHECK-AVX512VL-NEXT: vpsrld $1, %xmm0, %xmm1
218 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
219 ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm3 = [2454267027,2454267027,2454267027,2454267027]
220 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm3, %xmm2, %xmm2
221 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
222 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
223 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
224 ; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
225 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
226 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
227 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
228 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
229 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
230 ; CHECK-AVX512VL-NEXT: retq
231 %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 14, i32 14>
232 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
233 %ret = zext <4 x i1> %cmp to <4 x i32>
237 ; Like test_urem_even_vec_i32, but with 8 x i16 vectors.
238 define <8 x i16> @test_urem_even_vec_i16(<8 x i16> %X) nounwind readnone {
239 ; CHECK-SSE-LABEL: test_urem_even_vec_i16:
240 ; CHECK-SSE: # %bb.0:
241 ; CHECK-SSE-NEXT: movdqa %xmm0, %xmm1
242 ; CHECK-SSE-NEXT: psrlw $1, %xmm1
243 ; CHECK-SSE-NEXT: pmulhuw {{.*}}(%rip), %xmm1
244 ; CHECK-SSE-NEXT: psrlw $1, %xmm1
245 ; CHECK-SSE-NEXT: pmullw {{.*}}(%rip), %xmm1
246 ; CHECK-SSE-NEXT: psubw %xmm1, %xmm0
247 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
248 ; CHECK-SSE-NEXT: pcmpeqw %xmm1, %xmm0
249 ; CHECK-SSE-NEXT: psrlw $15, %xmm0
250 ; CHECK-SSE-NEXT: retq
252 ; CHECK-AVX-LABEL: test_urem_even_vec_i16:
253 ; CHECK-AVX: # %bb.0:
254 ; CHECK-AVX-NEXT: vpsrlw $1, %xmm0, %xmm1
255 ; CHECK-AVX-NEXT: vpmulhuw {{.*}}(%rip), %xmm1, %xmm1
256 ; CHECK-AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
257 ; CHECK-AVX-NEXT: vpmullw {{.*}}(%rip), %xmm1, %xmm1
258 ; CHECK-AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0
259 ; CHECK-AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
260 ; CHECK-AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
261 ; CHECK-AVX-NEXT: vpsrlw $15, %xmm0, %xmm0
262 ; CHECK-AVX-NEXT: retq
263 %urem = urem <8 x i16> %X, <i16 14, i16 14, i16 14, i16 14,
264 i16 14, i16 14, i16 14, i16 14>
265 %cmp = icmp eq <8 x i16> %urem, <i16 0, i16 0, i16 0, i16 0,
266 i16 0, i16 0, i16 0, i16 0>
267 %ret = zext <8 x i1> %cmp to <8 x i16>
271 ; We should not proceed with this fold if the divisor is 1 or -1
272 define <4 x i32> @test_urem_one_vec(<4 x i32> %X) nounwind readnone {
273 ; CHECK-SSE-LABEL: test_urem_one_vec:
274 ; CHECK-SSE: # %bb.0:
275 ; CHECK-SSE-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1]
276 ; CHECK-SSE-NEXT: retq
278 ; CHECK-AVX1-LABEL: test_urem_one_vec:
279 ; CHECK-AVX1: # %bb.0:
280 ; CHECK-AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [1,1,1,1]
281 ; CHECK-AVX1-NEXT: retq
283 ; CHECK-AVX2-LABEL: test_urem_one_vec:
284 ; CHECK-AVX2: # %bb.0:
285 ; CHECK-AVX2-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
286 ; CHECK-AVX2-NEXT: retq
288 ; CHECK-AVX512VL-LABEL: test_urem_one_vec:
289 ; CHECK-AVX512VL: # %bb.0:
290 ; CHECK-AVX512VL-NEXT: vbroadcastss {{.*#+}} xmm0 = [1,1,1,1]
291 ; CHECK-AVX512VL-NEXT: retq
292 %urem = urem <4 x i32> %X, <i32 1, i32 1, i32 1, i32 1>
293 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
294 %ret = zext <4 x i1> %cmp to <4 x i32>
298 ; BuildUREMEqFold does not work when the only odd factor of the divisor is 1.
299 ; This ensures we don't touch powers of two.
300 define <4 x i32> @test_urem_pow2_vec(<4 x i32> %X) nounwind readnone {
301 ; CHECK-SSE-LABEL: test_urem_pow2_vec:
302 ; CHECK-SSE: # %bb.0:
303 ; CHECK-SSE-NEXT: pand {{.*}}(%rip), %xmm0
304 ; CHECK-SSE-NEXT: pxor %xmm1, %xmm1
305 ; CHECK-SSE-NEXT: pcmpeqd %xmm1, %xmm0
306 ; CHECK-SSE-NEXT: psrld $31, %xmm0
307 ; CHECK-SSE-NEXT: retq
309 ; CHECK-AVX1-LABEL: test_urem_pow2_vec:
310 ; CHECK-AVX1: # %bb.0:
311 ; CHECK-AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
312 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
313 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
314 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
315 ; CHECK-AVX1-NEXT: retq
317 ; CHECK-AVX2-LABEL: test_urem_pow2_vec:
318 ; CHECK-AVX2: # %bb.0:
319 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [15,15,15,15]
320 ; CHECK-AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
321 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
322 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
323 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
324 ; CHECK-AVX2-NEXT: retq
326 ; CHECK-AVX512VL-LABEL: test_urem_pow2_vec:
327 ; CHECK-AVX512VL: # %bb.0:
328 ; CHECK-AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0
329 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
330 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
331 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
332 ; CHECK-AVX512VL-NEXT: retq
333 %urem = urem <4 x i32> %X, <i32 16, i32 16, i32 16, i32 16>
334 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
335 %ret = zext <4 x i1> %cmp to <4 x i32>