1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE2
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse4.1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE,CHECK-SSE41
4 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX1
5 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX2
6 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl < %s | FileCheck %s --check-prefixes=CHECK,CHECK-AVX,CHECK-AVX512VL
8 ; At the moment, BuildUREMEqFold does not handle nonsplat vectors.
10 define <4 x i32> @test_urem_odd_div(<4 x i32> %X) nounwind readnone {
11 ; CHECK-SSE2-LABEL: test_urem_odd_div:
12 ; CHECK-SSE2: # %bb.0:
13 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,613566757,954437177]
14 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
15 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
16 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
17 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
18 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
19 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
20 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
21 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
22 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm1
23 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm1
24 ; CHECK-SSE2-NEXT: pmuludq {{.*}}(%rip), %xmm1
25 ; CHECK-SSE2-NEXT: psrlq $32, %xmm1
26 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
27 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm2
28 ; CHECK-SSE2-NEXT: psrld $1, %xmm2
29 ; CHECK-SSE2-NEXT: psrld $2, %xmm1
30 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm3
31 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3]
32 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [3,5,7,9]
33 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
34 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5
35 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
36 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm1[1,2]
37 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2,3,1]
38 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
39 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
40 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
41 ; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
42 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
43 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
44 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
45 ; CHECK-SSE2-NEXT: retq
47 ; CHECK-SSE41-LABEL: test_urem_odd_div:
48 ; CHECK-SSE41: # %bb.0:
49 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,613566757,954437177]
50 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
51 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
52 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3
53 ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
54 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
55 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
56 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm2
57 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm2
58 ; CHECK-SSE41-NEXT: pmuludq {{.*}}(%rip), %xmm2
59 ; CHECK-SSE41-NEXT: psrlq $32, %xmm2
60 ; CHECK-SSE41-NEXT: paddd %xmm1, %xmm2
61 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm1
62 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
63 ; CHECK-SSE41-NEXT: psrld $1, %xmm2
64 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3,4,5],xmm2[6,7]
65 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
66 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
67 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
68 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
69 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
70 ; CHECK-SSE41-NEXT: retq
72 ; CHECK-AVX1-LABEL: test_urem_odd_div:
73 ; CHECK-AVX1: # %bb.0:
74 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,613566757,954437177]
75 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
76 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
77 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
78 ; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
79 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
80 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
81 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm2
82 ; CHECK-AVX1-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2
83 ; CHECK-AVX1-NEXT: vpsrlq $32, %xmm2, %xmm2
84 ; CHECK-AVX1-NEXT: vpaddd %xmm1, %xmm2, %xmm1
85 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm2
86 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
87 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5],xmm1[6,7]
88 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
89 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
90 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
91 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
92 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
93 ; CHECK-AVX1-NEXT: retq
95 ; CHECK-AVX2-LABEL: test_urem_odd_div:
96 ; CHECK-AVX2: # %bb.0:
97 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,613566757,954437177]
98 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
99 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
100 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
101 ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
102 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
103 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
104 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm2
105 ; CHECK-AVX2-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2
106 ; CHECK-AVX2-NEXT: vpsrlq $32, %xmm2, %xmm2
107 ; CHECK-AVX2-NEXT: vpaddd %xmm1, %xmm2, %xmm1
108 ; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
109 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
110 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
111 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
112 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
113 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
114 ; CHECK-AVX2-NEXT: retq
116 ; CHECK-AVX512VL-LABEL: test_urem_odd_div:
117 ; CHECK-AVX512VL: # %bb.0:
118 ; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,613566757,954437177]
119 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
120 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
121 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
122 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
123 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
124 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
125 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm2
126 ; CHECK-AVX512VL-NEXT: vpmuludq {{.*}}(%rip), %xmm2, %xmm2
127 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
128 ; CHECK-AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3
129 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3]
130 ; CHECK-AVX512VL-NEXT: vpaddd %xmm1, %xmm2, %xmm1
131 ; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
132 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
133 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
134 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0
135 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
136 ; CHECK-AVX512VL-NEXT: retq
137 %urem = urem <4 x i32> %X, <i32 3, i32 5, i32 7, i32 9>
138 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
139 %ret = zext <4 x i1> %cmp to <4 x i32>
143 define <4 x i32> @test_urem_even_div(<4 x i32> %X) nounwind readnone {
144 ; CHECK-SSE2-LABEL: test_urem_even_div:
145 ; CHECK-SSE2: # %bb.0:
146 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,2454267027]
147 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
148 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
149 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
150 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
151 ; CHECK-SSE2-NEXT: psrld $1, %xmm3
152 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
153 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm3[3,3]
154 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
155 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
156 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
157 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
158 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
159 ; CHECK-SSE2-NEXT: psrld $2, %xmm1
160 ; CHECK-SSE2-NEXT: psrld $3, %xmm2
161 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm3
162 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[3,3]
163 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [6,10,12,14]
164 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
165 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5
166 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
167 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,3],xmm2[1,2]
168 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,3,1]
169 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
170 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
171 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
172 ; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
173 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
174 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
175 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
176 ; CHECK-SSE2-NEXT: retq
178 ; CHECK-SSE41-LABEL: test_urem_even_div:
179 ; CHECK-SSE41: # %bb.0:
180 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
181 ; CHECK-SSE41-NEXT: psrld $1, %xmm1
182 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5],xmm1[6,7]
183 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
184 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2863311531,3435973837,2863311531,2454267027]
185 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
186 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm4
187 ; CHECK-SSE41-NEXT: pmuludq %xmm3, %xmm1
188 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
189 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
190 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
191 ; CHECK-SSE41-NEXT: psrld $3, %xmm2
192 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
193 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5],xmm1[6,7]
194 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
195 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
196 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
197 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
198 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
199 ; CHECK-SSE41-NEXT: retq
201 ; CHECK-AVX1-LABEL: test_urem_even_div:
202 ; CHECK-AVX1: # %bb.0:
203 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
204 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5],xmm1[6,7]
205 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
206 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2863311531,3435973837,2863311531,2454267027]
207 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
208 ; CHECK-AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2
209 ; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
210 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
211 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
212 ; CHECK-AVX1-NEXT: vpsrld $3, %xmm1, %xmm2
213 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
214 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3,4,5],xmm1[6,7]
215 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
216 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
217 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
218 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
219 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
220 ; CHECK-AVX1-NEXT: retq
222 ; CHECK-AVX2-LABEL: test_urem_even_div:
223 ; CHECK-AVX2: # %bb.0:
224 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,2454267027]
225 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
226 ; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
227 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
228 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
229 ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
230 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
231 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
232 ; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
233 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
234 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
235 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
236 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
237 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
238 ; CHECK-AVX2-NEXT: retq
240 ; CHECK-AVX512VL-LABEL: test_urem_even_div:
241 ; CHECK-AVX512VL: # %bb.0:
242 ; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,2454267027]
243 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
244 ; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
245 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
246 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
247 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
248 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
249 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
250 ; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
251 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
252 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
253 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
254 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
255 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
256 ; CHECK-AVX512VL-NEXT: retq
257 %urem = urem <4 x i32> %X, <i32 6, i32 10, i32 12, i32 14>
258 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
259 %ret = zext <4 x i1> %cmp to <4 x i32>
263 define <4 x i32> @test_urem_pow2(<4 x i32> %X) nounwind readnone {
264 ; CHECK-SSE2-LABEL: test_urem_pow2:
265 ; CHECK-SSE2: # %bb.0:
266 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,268435456]
267 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
268 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
269 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
270 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
271 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
272 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
273 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
274 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
275 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
276 ; CHECK-SSE2-NEXT: psrld $3, %xmm1
277 ; CHECK-SSE2-NEXT: movdqa %xmm1, %xmm3
278 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm2[3,3]
279 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [6,10,12,16]
280 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[1,1,3,3]
281 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm5
282 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,2,2,3]
283 ; CHECK-SSE2-NEXT: psrld $2, %xmm2
284 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm1[2,3]
285 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm2
286 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3]
287 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
288 ; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
289 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
290 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
291 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
292 ; CHECK-SSE2-NEXT: retq
294 ; CHECK-SSE41-LABEL: test_urem_pow2:
295 ; CHECK-SSE41: # %bb.0:
296 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,268435456]
297 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
298 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
299 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3
300 ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
301 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
302 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
303 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
304 ; CHECK-SSE41-NEXT: psrld $3, %xmm2
305 ; CHECK-SSE41-NEXT: movdqa %xmm2, %xmm3
306 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm1[4,5,6,7]
307 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
308 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
309 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
310 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
311 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
312 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
313 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
314 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
315 ; CHECK-SSE41-NEXT: retq
317 ; CHECK-AVX1-LABEL: test_urem_pow2:
318 ; CHECK-AVX1: # %bb.0:
319 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,268435456]
320 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
321 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
322 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
323 ; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
324 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
325 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
326 ; CHECK-AVX1-NEXT: vpsrld $3, %xmm1, %xmm2
327 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm2[0,1,2,3],xmm1[4,5,6,7]
328 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
329 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
330 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
331 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
332 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
333 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
334 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
335 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
336 ; CHECK-AVX1-NEXT: retq
338 ; CHECK-AVX2-LABEL: test_urem_pow2:
339 ; CHECK-AVX2: # %bb.0:
340 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,268435456]
341 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
342 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
343 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
344 ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
345 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
346 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
347 ; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
348 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
349 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
350 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
351 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
352 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
353 ; CHECK-AVX2-NEXT: retq
355 ; CHECK-AVX512VL-LABEL: test_urem_pow2:
356 ; CHECK-AVX512VL: # %bb.0:
357 ; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,268435456]
358 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
359 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
360 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
361 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
362 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
363 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
364 ; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
365 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
366 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
367 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
368 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
369 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
370 ; CHECK-AVX512VL-NEXT: retq
371 %urem = urem <4 x i32> %X, <i32 6, i32 10, i32 12, i32 16>
372 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
373 %ret = zext <4 x i1> %cmp to <4 x i32>
377 define <4 x i32> @test_urem_one(<4 x i32> %X) nounwind readnone {
378 ; CHECK-SSE2-LABEL: test_urem_one:
379 ; CHECK-SSE2: # %bb.0:
380 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,0,2863311531,2454267027]
381 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
382 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
383 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
384 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
385 ; CHECK-SSE2-NEXT: psrld $1, %xmm3
386 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
387 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm3[3,3]
388 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
389 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
390 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
391 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
392 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
393 ; CHECK-SSE2-NEXT: psrld $2, %xmm1
394 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm3
395 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,0],xmm1[0,0]
396 ; CHECK-SSE2-NEXT: psrld $3, %xmm2
397 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[2,0],xmm2[2,3]
398 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [6,1,12,14]
399 ; CHECK-SSE2-NEXT: pmuludq %xmm2, %xmm3
400 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[0,2,2,3]
401 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm4
402 ; CHECK-SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[3,3]
403 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
404 ; CHECK-SSE2-NEXT: pmuludq %xmm4, %xmm1
405 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
406 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1]
407 ; CHECK-SSE2-NEXT: psubd %xmm3, %xmm0
408 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
409 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
410 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
411 ; CHECK-SSE2-NEXT: retq
413 ; CHECK-SSE41-LABEL: test_urem_one:
414 ; CHECK-SSE41: # %bb.0:
415 ; CHECK-SSE41-NEXT: movdqa %xmm0, %xmm1
416 ; CHECK-SSE41-NEXT: psrld $1, %xmm1
417 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5],xmm1[6,7]
418 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
419 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm3 = [2863311531,0,2863311531,2454267027]
420 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
421 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm4
422 ; CHECK-SSE41-NEXT: pmuludq %xmm3, %xmm1
423 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
424 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm4[2,3],xmm1[4,5],xmm4[6,7]
425 ; CHECK-SSE41-NEXT: movdqa %xmm1, %xmm2
426 ; CHECK-SSE41-NEXT: psrld $3, %xmm2
427 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
428 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
429 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
430 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
431 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
432 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
433 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
434 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
435 ; CHECK-SSE41-NEXT: retq
437 ; CHECK-AVX1-LABEL: test_urem_one:
438 ; CHECK-AVX1: # %bb.0:
439 ; CHECK-AVX1-NEXT: vpsrld $1, %xmm0, %xmm1
440 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5],xmm1[6,7]
441 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
442 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2863311531,0,2863311531,2454267027]
443 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
444 ; CHECK-AVX1-NEXT: vpmuludq %xmm4, %xmm2, %xmm2
445 ; CHECK-AVX1-NEXT: vpmuludq %xmm3, %xmm1, %xmm1
446 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
447 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
448 ; CHECK-AVX1-NEXT: vpsrld $3, %xmm1, %xmm2
449 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
450 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5],xmm1[6,7]
451 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3],xmm1[4,5,6,7]
452 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
453 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
454 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
455 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
456 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
457 ; CHECK-AVX1-NEXT: retq
459 ; CHECK-AVX2-LABEL: test_urem_one:
460 ; CHECK-AVX2: # %bb.0:
461 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,0,2863311531,2454267027]
462 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
463 ; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
464 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
465 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
466 ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
467 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
468 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
469 ; CHECK-AVX2-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
470 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2,3]
471 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
472 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
473 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
474 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
475 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
476 ; CHECK-AVX2-NEXT: retq
478 ; CHECK-AVX512VL-LABEL: test_urem_one:
479 ; CHECK-AVX512VL: # %bb.0:
480 ; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,0,2863311531,2454267027]
481 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
482 ; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm3
483 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm4 = xmm3[1,1,3,3]
484 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm4, %xmm2
485 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm3, %xmm1
486 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
487 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
488 ; CHECK-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1
489 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1],xmm1[2,3]
490 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
491 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
492 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
493 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
494 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
495 ; CHECK-AVX512VL-NEXT: retq
496 %urem = urem <4 x i32> %X, <i32 6, i32 1, i32 12, i32 14>
497 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
498 %ret = zext <4 x i1> %cmp to <4 x i32>
502 define <4 x i32> @test_urem_comp(<4 x i32> %X) nounwind readnone {
503 ; CHECK-SSE2-LABEL: test_urem_comp:
504 ; CHECK-SSE2: # %bb.0:
505 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
506 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
507 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
508 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
509 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
510 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
511 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
512 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
513 ; CHECK-SSE2-NEXT: psrld $2, %xmm2
514 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
515 ; CHECK-SSE2-NEXT: pslld $2, %xmm1
516 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
517 ; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
518 ; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
519 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
520 ; CHECK-SSE2-NEXT: retq
522 ; CHECK-SSE41-LABEL: test_urem_comp:
523 ; CHECK-SSE41: # %bb.0:
524 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
525 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
526 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
527 ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
528 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
529 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
530 ; CHECK-SSE41-NEXT: psrld $2, %xmm2
531 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
532 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
533 ; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
534 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
535 ; CHECK-SSE41-NEXT: retq
537 ; CHECK-AVX1-LABEL: test_urem_comp:
538 ; CHECK-AVX1: # %bb.0:
539 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
540 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
541 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
542 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
543 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
544 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
545 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
546 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
547 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
548 ; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
549 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
550 ; CHECK-AVX1-NEXT: retq
552 ; CHECK-AVX2-LABEL: test_urem_comp:
553 ; CHECK-AVX2: # %bb.0:
554 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
555 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
556 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
557 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
558 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
559 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
560 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
561 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5]
562 ; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
563 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
564 ; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
565 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
566 ; CHECK-AVX2-NEXT: retq
568 ; CHECK-AVX512VL-LABEL: test_urem_comp:
569 ; CHECK-AVX512VL: # %bb.0:
570 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
571 ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
572 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
573 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
574 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
575 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
576 ; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
577 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
578 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
579 ; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
580 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
581 ; CHECK-AVX512VL-NEXT: retq
582 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
583 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 1, i32 0>
584 %ret = zext <4 x i1> %cmp to <4 x i32>
588 define <4 x i32> @test_urem_both(<4 x i32> %X) nounwind readnone {
589 ; CHECK-SSE2-LABEL: test_urem_both:
590 ; CHECK-SSE2: # %bb.0:
591 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,3435973837]
592 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
593 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
594 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
595 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
596 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
597 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
598 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
599 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
600 ; CHECK-SSE2-NEXT: psrld $2, %xmm2
601 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [6,5,6,5]
602 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
603 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
604 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
605 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
606 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1
607 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
608 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
609 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
610 ; CHECK-SSE2-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
611 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
612 ; CHECK-SSE2-NEXT: retq
614 ; CHECK-SSE41-LABEL: test_urem_both:
615 ; CHECK-SSE41: # %bb.0:
616 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,3435973837]
617 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
618 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
619 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3
620 ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
621 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
622 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
623 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
624 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
625 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
626 ; CHECK-SSE41-NEXT: pcmpeqd {{.*}}(%rip), %xmm0
627 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
628 ; CHECK-SSE41-NEXT: retq
630 ; CHECK-AVX1-LABEL: test_urem_both:
631 ; CHECK-AVX1: # %bb.0:
632 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,3435973837]
633 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
634 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
635 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
636 ; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
637 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
638 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
639 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
640 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
641 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
642 ; CHECK-AVX1-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
643 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
644 ; CHECK-AVX1-NEXT: retq
646 ; CHECK-AVX2-LABEL: test_urem_both:
647 ; CHECK-AVX2: # %bb.0:
648 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,3435973837]
649 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
650 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
651 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
652 ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
653 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
654 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
655 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
656 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
657 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
658 ; CHECK-AVX2-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
659 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
660 ; CHECK-AVX2-NEXT: retq
662 ; CHECK-AVX512VL-LABEL: test_urem_both:
663 ; CHECK-AVX512VL: # %bb.0:
664 ; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [2863311531,3435973837,2863311531,3435973837]
665 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
666 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
667 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
668 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
669 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
670 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
671 ; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
672 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
673 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
674 ; CHECK-AVX512VL-NEXT: vpcmpeqd {{.*}}(%rip), %xmm0, %xmm0
675 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
676 ; CHECK-AVX512VL-NEXT: retq
677 %urem = urem <4 x i32> %X, <i32 6, i32 5, i32 6, i32 5>
678 %cmp = icmp eq <4 x i32> %urem, <i32 1, i32 0, i32 1, i32 0>
679 %ret = zext <4 x i1> %cmp to <4 x i32>
683 define <4 x i32> @test_urem_div_undef(<4 x i32> %X) nounwind readnone {
684 ; CHECK-SSE-LABEL: test_urem_div_undef:
685 ; CHECK-SSE: # %bb.0:
686 ; CHECK-SSE-NEXT: pxor %xmm0, %xmm0
687 ; CHECK-SSE-NEXT: pcmpeqd %xmm0, %xmm0
688 ; CHECK-SSE-NEXT: psrld $31, %xmm0
689 ; CHECK-SSE-NEXT: retq
691 ; CHECK-AVX-LABEL: test_urem_div_undef:
692 ; CHECK-AVX: # %bb.0:
693 ; CHECK-AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
694 ; CHECK-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
695 ; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0
696 ; CHECK-AVX-NEXT: retq
697 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 undef, i32 5>
698 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
699 %ret = zext <4 x i1> %cmp to <4 x i32>
703 define <4 x i32> @test_urem_comp_undef(<4 x i32> %X) nounwind readnone {
704 ; CHECK-SSE2-LABEL: test_urem_comp_undef:
705 ; CHECK-SSE2: # %bb.0:
706 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,3435973837,3435973837]
707 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
708 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
709 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
710 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
711 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
712 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
713 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
714 ; CHECK-SSE2-NEXT: psrld $2, %xmm2
715 ; CHECK-SSE2-NEXT: movdqa %xmm2, %xmm1
716 ; CHECK-SSE2-NEXT: pslld $2, %xmm1
717 ; CHECK-SSE2-NEXT: paddd %xmm2, %xmm1
718 ; CHECK-SSE2-NEXT: psubd %xmm1, %xmm0
719 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
720 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
721 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
722 ; CHECK-SSE2-NEXT: retq
724 ; CHECK-SSE41-LABEL: test_urem_comp_undef:
725 ; CHECK-SSE41: # %bb.0:
726 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
727 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
728 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm1
729 ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm2
730 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
731 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
732 ; CHECK-SSE41-NEXT: psrld $2, %xmm2
733 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm2
734 ; CHECK-SSE41-NEXT: psubd %xmm2, %xmm0
735 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
736 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
737 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
738 ; CHECK-SSE41-NEXT: retq
740 ; CHECK-AVX1-LABEL: test_urem_comp_undef:
741 ; CHECK-AVX1: # %bb.0:
742 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
743 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
744 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
745 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
746 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
747 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3],xmm2[4,5],xmm1[6,7]
748 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
749 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
750 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
751 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
752 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
753 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
754 ; CHECK-AVX1-NEXT: retq
756 ; CHECK-AVX2-LABEL: test_urem_comp_undef:
757 ; CHECK-AVX2: # %bb.0:
758 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
759 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
760 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
761 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
762 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
763 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
764 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
765 ; CHECK-AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [5,5,5,5]
766 ; CHECK-AVX2-NEXT: vpmulld %xmm2, %xmm1, %xmm1
767 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
768 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
769 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
770 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
771 ; CHECK-AVX2-NEXT: retq
773 ; CHECK-AVX512VL-LABEL: test_urem_comp_undef:
774 ; CHECK-AVX512VL: # %bb.0:
775 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
776 ; CHECK-AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [3435973837,3435973837,3435973837,3435973837]
777 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm1, %xmm1
778 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm0, %xmm2
779 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
780 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm2[0],xmm1[1],xmm2[2],xmm1[3]
781 ; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
782 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip){1to4}, %xmm1, %xmm1
783 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
784 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
785 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
786 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
787 ; CHECK-AVX512VL-NEXT: retq
788 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 5, i32 5>
789 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 undef, i32 0, i32 0>
790 %ret = zext <4 x i1> %cmp to <4 x i32>
794 define <4 x i32> @test_urem_both_undef(<4 x i32> %X) nounwind readnone {
795 ; CHECK-SSE-LABEL: test_urem_both_undef:
796 ; CHECK-SSE: # %bb.0:
797 ; CHECK-SSE-NEXT: pxor %xmm0, %xmm0
798 ; CHECK-SSE-NEXT: pcmpeqd %xmm0, %xmm0
799 ; CHECK-SSE-NEXT: psrld $31, %xmm0
800 ; CHECK-SSE-NEXT: retq
802 ; CHECK-AVX-LABEL: test_urem_both_undef:
803 ; CHECK-AVX: # %bb.0:
804 ; CHECK-AVX-NEXT: vpxor %xmm0, %xmm0, %xmm0
805 ; CHECK-AVX-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
806 ; CHECK-AVX-NEXT: vpsrld $31, %xmm0, %xmm0
807 ; CHECK-AVX-NEXT: retq
808 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 undef, i32 5>
809 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 undef, i32 0, i32 0>
810 %ret = zext <4 x i1> %cmp to <4 x i32>
814 define <4 x i32> @test_urem_div_even_odd(<4 x i32> %X) nounwind readnone {
815 ; CHECK-SSE2-LABEL: test_urem_div_even_odd:
816 ; CHECK-SSE2: # %bb.0:
817 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,2863311531,2863311531]
818 ; CHECK-SSE2-NEXT: movdqa %xmm0, %xmm2
819 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
820 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
821 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
822 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
823 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm3
824 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,3,2,3]
825 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
826 ; CHECK-SSE2-NEXT: psrld $2, %xmm2
827 ; CHECK-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [5,5,6,6]
828 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[1,1,3,3]
829 ; CHECK-SSE2-NEXT: pmuludq %xmm1, %xmm2
830 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,2,2,3]
831 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
832 ; CHECK-SSE2-NEXT: pmuludq %xmm3, %xmm1
833 ; CHECK-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
834 ; CHECK-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
835 ; CHECK-SSE2-NEXT: psubd %xmm2, %xmm0
836 ; CHECK-SSE2-NEXT: pxor %xmm1, %xmm1
837 ; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm0
838 ; CHECK-SSE2-NEXT: psrld $31, %xmm0
839 ; CHECK-SSE2-NEXT: retq
841 ; CHECK-SSE41-LABEL: test_urem_div_even_odd:
842 ; CHECK-SSE41: # %bb.0:
843 ; CHECK-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [3435973837,3435973837,2863311531,2863311531]
844 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
845 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
846 ; CHECK-SSE41-NEXT: pmuludq %xmm2, %xmm3
847 ; CHECK-SSE41-NEXT: pmuludq %xmm0, %xmm1
848 ; CHECK-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
849 ; CHECK-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3],xmm1[4,5],xmm3[6,7]
850 ; CHECK-SSE41-NEXT: psrld $2, %xmm1
851 ; CHECK-SSE41-NEXT: pmulld {{.*}}(%rip), %xmm1
852 ; CHECK-SSE41-NEXT: psubd %xmm1, %xmm0
853 ; CHECK-SSE41-NEXT: pxor %xmm1, %xmm1
854 ; CHECK-SSE41-NEXT: pcmpeqd %xmm1, %xmm0
855 ; CHECK-SSE41-NEXT: psrld $31, %xmm0
856 ; CHECK-SSE41-NEXT: retq
858 ; CHECK-AVX1-LABEL: test_urem_div_even_odd:
859 ; CHECK-AVX1: # %bb.0:
860 ; CHECK-AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,3435973837,2863311531,2863311531]
861 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
862 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
863 ; CHECK-AVX1-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
864 ; CHECK-AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
865 ; CHECK-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
866 ; CHECK-AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7]
867 ; CHECK-AVX1-NEXT: vpsrld $2, %xmm1, %xmm1
868 ; CHECK-AVX1-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
869 ; CHECK-AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0
870 ; CHECK-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
871 ; CHECK-AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
872 ; CHECK-AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
873 ; CHECK-AVX1-NEXT: retq
875 ; CHECK-AVX2-LABEL: test_urem_div_even_odd:
876 ; CHECK-AVX2: # %bb.0:
877 ; CHECK-AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,3435973837,2863311531,2863311531]
878 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
879 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
880 ; CHECK-AVX2-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
881 ; CHECK-AVX2-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
882 ; CHECK-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
883 ; CHECK-AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
884 ; CHECK-AVX2-NEXT: vpsrld $2, %xmm1, %xmm1
885 ; CHECK-AVX2-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
886 ; CHECK-AVX2-NEXT: vpsubd %xmm1, %xmm0, %xmm0
887 ; CHECK-AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
888 ; CHECK-AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
889 ; CHECK-AVX2-NEXT: vpsrld $31, %xmm0, %xmm0
890 ; CHECK-AVX2-NEXT: retq
892 ; CHECK-AVX512VL-LABEL: test_urem_div_even_odd:
893 ; CHECK-AVX512VL: # %bb.0:
894 ; CHECK-AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [3435973837,3435973837,2863311531,2863311531]
895 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,3,3]
896 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
897 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm2, %xmm3, %xmm2
898 ; CHECK-AVX512VL-NEXT: vpmuludq %xmm1, %xmm0, %xmm1
899 ; CHECK-AVX512VL-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
900 ; CHECK-AVX512VL-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3]
901 ; CHECK-AVX512VL-NEXT: vpsrld $2, %xmm1, %xmm1
902 ; CHECK-AVX512VL-NEXT: vpmulld {{.*}}(%rip), %xmm1, %xmm1
903 ; CHECK-AVX512VL-NEXT: vpsubd %xmm1, %xmm0, %xmm0
904 ; CHECK-AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
905 ; CHECK-AVX512VL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
906 ; CHECK-AVX512VL-NEXT: vpsrld $31, %xmm0, %xmm0
907 ; CHECK-AVX512VL-NEXT: retq
908 %urem = urem <4 x i32> %X, <i32 5, i32 5, i32 6, i32 6>
909 %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
910 %ret = zext <4 x i1> %cmp to <4 x i32>