1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE2
3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86-SSE42
4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1
5 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64-SSE2
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64-SSE42
8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1
9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2
10 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512
16 define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17 ; X86-SSE2-LABEL: test_reduce_v2i64:
19 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
20 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
21 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
22 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3
23 ; X86-SSE2-NEXT: pxor %xmm1, %xmm2
24 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
25 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
26 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
28 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29 ; X86-SSE2-NEXT: pand %xmm5, %xmm2
30 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31 ; X86-SSE2-NEXT: por %xmm2, %xmm3
32 ; X86-SSE2-NEXT: pand %xmm3, %xmm0
33 ; X86-SSE2-NEXT: pandn %xmm1, %xmm3
34 ; X86-SSE2-NEXT: por %xmm0, %xmm3
35 ; X86-SSE2-NEXT: movd %xmm3, %eax
36 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
37 ; X86-SSE2-NEXT: movd %xmm0, %edx
40 ; X86-SSE42-LABEL: test_reduce_v2i64:
41 ; X86-SSE42: ## %bb.0:
42 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
43 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
44 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648]
45 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm3
46 ; X86-SSE42-NEXT: pxor %xmm0, %xmm3
47 ; X86-SSE42-NEXT: pxor %xmm2, %xmm0
48 ; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
49 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
50 ; X86-SSE42-NEXT: movd %xmm2, %eax
51 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
52 ; X86-SSE42-NEXT: retl
54 ; X86-AVX1-LABEL: test_reduce_v2i64:
56 ; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
57 ; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
58 ; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
59 ; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm3
60 ; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm2
61 ; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
62 ; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
63 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
64 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
67 ; X86-AVX2-LABEL: test_reduce_v2i64:
69 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
70 ; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
71 ; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
72 ; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
73 ; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
74 ; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
75 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
76 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
79 ; X64-SSE2-LABEL: test_reduce_v2i64:
81 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
82 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
83 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
84 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3
85 ; X64-SSE2-NEXT: pxor %xmm1, %xmm2
86 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm4
87 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
88 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
89 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
90 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
91 ; X64-SSE2-NEXT: pand %xmm5, %xmm2
92 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
93 ; X64-SSE2-NEXT: por %xmm2, %xmm3
94 ; X64-SSE2-NEXT: pand %xmm3, %xmm0
95 ; X64-SSE2-NEXT: pandn %xmm1, %xmm3
96 ; X64-SSE2-NEXT: por %xmm0, %xmm3
97 ; X64-SSE2-NEXT: movq %xmm3, %rax
100 ; X64-SSE42-LABEL: test_reduce_v2i64:
101 ; X64-SSE42: ## %bb.0:
102 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
103 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
104 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808]
105 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm3
106 ; X64-SSE42-NEXT: pxor %xmm0, %xmm3
107 ; X64-SSE42-NEXT: pxor %xmm2, %xmm0
108 ; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
109 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
110 ; X64-SSE42-NEXT: movq %xmm2, %rax
111 ; X64-SSE42-NEXT: retq
113 ; X64-AVX1-LABEL: test_reduce_v2i64:
114 ; X64-AVX1: ## %bb.0:
115 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
116 ; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
117 ; X64-AVX1-NEXT: ## xmm2 = mem[0,0]
118 ; X64-AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm3
119 ; X64-AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm2
120 ; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
121 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
122 ; X64-AVX1-NEXT: vmovq %xmm0, %rax
123 ; X64-AVX1-NEXT: retq
125 ; X64-AVX2-LABEL: test_reduce_v2i64:
126 ; X64-AVX2: ## %bb.0:
127 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
128 ; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
129 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
130 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm2
131 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
132 ; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
133 ; X64-AVX2-NEXT: vmovq %xmm0, %rax
134 ; X64-AVX2-NEXT: retq
136 ; X64-AVX512-LABEL: test_reduce_v2i64:
137 ; X64-AVX512: ## %bb.0:
138 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
139 ; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
140 ; X64-AVX512-NEXT: vmovq %xmm0, %rax
141 ; X64-AVX512-NEXT: retq
142 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
143 %2 = icmp ult <2 x i64> %a0, %1
144 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
145 %4 = extractelement <2 x i64> %3, i32 0
149 define i32 @test_reduce_v4i32(<4 x i32> %a0) {
150 ; X86-SSE2-LABEL: test_reduce_v4i32:
151 ; X86-SSE2: ## %bb.0:
152 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
153 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648]
154 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
155 ; X86-SSE2-NEXT: pxor %xmm1, %xmm4
156 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm2
157 ; X86-SSE2-NEXT: pxor %xmm1, %xmm2
158 ; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm2
159 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
160 ; X86-SSE2-NEXT: pandn %xmm3, %xmm2
161 ; X86-SSE2-NEXT: por %xmm0, %xmm2
162 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
163 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
164 ; X86-SSE2-NEXT: pxor %xmm1, %xmm3
165 ; X86-SSE2-NEXT: pxor %xmm0, %xmm1
166 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm1
167 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
168 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
169 ; X86-SSE2-NEXT: por %xmm2, %xmm1
170 ; X86-SSE2-NEXT: movd %xmm1, %eax
171 ; X86-SSE2-NEXT: retl
173 ; X86-SSE42-LABEL: test_reduce_v4i32:
174 ; X86-SSE42: ## %bb.0:
175 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
176 ; X86-SSE42-NEXT: pminud %xmm0, %xmm1
177 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
178 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0
179 ; X86-SSE42-NEXT: movd %xmm0, %eax
180 ; X86-SSE42-NEXT: retl
182 ; X86-AVX-LABEL: test_reduce_v4i32:
184 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
185 ; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
186 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
187 ; X86-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
188 ; X86-AVX-NEXT: vmovd %xmm0, %eax
191 ; X64-SSE2-LABEL: test_reduce_v4i32:
192 ; X64-SSE2: ## %bb.0:
193 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
194 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
195 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
196 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3
197 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4
198 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4
199 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
200 ; X64-SSE2-NEXT: pand %xmm4, %xmm0
201 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4
202 ; X64-SSE2-NEXT: por %xmm0, %xmm4
203 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[1,1,1,1]
204 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
205 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1
206 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2
207 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
208 ; X64-SSE2-NEXT: pand %xmm2, %xmm4
209 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
210 ; X64-SSE2-NEXT: por %xmm4, %xmm2
211 ; X64-SSE2-NEXT: movd %xmm2, %eax
212 ; X64-SSE2-NEXT: retq
214 ; X64-SSE42-LABEL: test_reduce_v4i32:
215 ; X64-SSE42: ## %bb.0:
216 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
217 ; X64-SSE42-NEXT: pminud %xmm0, %xmm1
218 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
219 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0
220 ; X64-SSE42-NEXT: movd %xmm0, %eax
221 ; X64-SSE42-NEXT: retq
223 ; X64-AVX-LABEL: test_reduce_v4i32:
225 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
226 ; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
227 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
228 ; X64-AVX-NEXT: vpminud %xmm1, %xmm0, %xmm0
229 ; X64-AVX-NEXT: vmovd %xmm0, %eax
231 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
232 %2 = icmp ult <4 x i32> %a0, %1
233 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
234 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
235 %5 = icmp ult <4 x i32> %3, %4
236 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
237 %7 = extractelement <4 x i32> %6, i32 0
241 define i16 @test_reduce_v8i16(<8 x i16> %a0) {
242 ; X86-SSE2-LABEL: test_reduce_v8i16:
243 ; X86-SSE2: ## %bb.0:
244 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
245 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
246 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
247 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
248 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
249 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
250 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
251 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
252 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
253 ; X86-SSE2-NEXT: psrld $16, %xmm1
254 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
255 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
256 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
257 ; X86-SSE2-NEXT: movd %xmm0, %eax
258 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
259 ; X86-SSE2-NEXT: retl
261 ; X86-SSE42-LABEL: test_reduce_v8i16:
262 ; X86-SSE42: ## %bb.0:
263 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
264 ; X86-SSE42-NEXT: movd %xmm0, %eax
265 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
266 ; X86-SSE42-NEXT: retl
268 ; X86-AVX-LABEL: test_reduce_v8i16:
270 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
271 ; X86-AVX-NEXT: vmovd %xmm0, %eax
272 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
275 ; X64-SSE2-LABEL: test_reduce_v8i16:
276 ; X64-SSE2: ## %bb.0:
277 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
278 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
279 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
280 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
281 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
282 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
283 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
284 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
285 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
286 ; X64-SSE2-NEXT: psrld $16, %xmm1
287 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
288 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
289 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
290 ; X64-SSE2-NEXT: movd %xmm0, %eax
291 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
292 ; X64-SSE2-NEXT: retq
294 ; X64-SSE42-LABEL: test_reduce_v8i16:
295 ; X64-SSE42: ## %bb.0:
296 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
297 ; X64-SSE42-NEXT: movd %xmm0, %eax
298 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
299 ; X64-SSE42-NEXT: retq
301 ; X64-AVX-LABEL: test_reduce_v8i16:
303 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
304 ; X64-AVX-NEXT: vmovd %xmm0, %eax
305 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
307 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
308 %2 = icmp ult <8 x i16> %a0, %1
309 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
310 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
311 %5 = icmp ult <8 x i16> %3, %4
312 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
313 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
314 %8 = icmp ult <8 x i16> %6, %7
315 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
316 %10 = extractelement <8 x i16> %9, i32 0
320 define i8 @test_reduce_v16i8(<16 x i8> %a0) {
321 ; X86-SSE2-LABEL: test_reduce_v16i8:
322 ; X86-SSE2: ## %bb.0:
323 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
324 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
325 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
326 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
327 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
328 ; X86-SSE2-NEXT: psrld $16, %xmm1
329 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
330 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
331 ; X86-SSE2-NEXT: psrlw $8, %xmm0
332 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
333 ; X86-SSE2-NEXT: movd %xmm0, %eax
334 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
335 ; X86-SSE2-NEXT: retl
337 ; X86-SSE42-LABEL: test_reduce_v16i8:
338 ; X86-SSE42: ## %bb.0:
339 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
340 ; X86-SSE42-NEXT: psrlw $8, %xmm1
341 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1
342 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
343 ; X86-SSE42-NEXT: movd %xmm0, %eax
344 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
345 ; X86-SSE42-NEXT: retl
347 ; X86-AVX-LABEL: test_reduce_v16i8:
349 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
350 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
351 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
352 ; X86-AVX-NEXT: vmovd %xmm0, %eax
353 ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
356 ; X64-SSE2-LABEL: test_reduce_v16i8:
357 ; X64-SSE2: ## %bb.0:
358 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
359 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
360 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
361 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
362 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
363 ; X64-SSE2-NEXT: psrld $16, %xmm1
364 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
365 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
366 ; X64-SSE2-NEXT: psrlw $8, %xmm0
367 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
368 ; X64-SSE2-NEXT: movd %xmm0, %eax
369 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
370 ; X64-SSE2-NEXT: retq
372 ; X64-SSE42-LABEL: test_reduce_v16i8:
373 ; X64-SSE42: ## %bb.0:
374 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
375 ; X64-SSE42-NEXT: psrlw $8, %xmm1
376 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1
377 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
378 ; X64-SSE42-NEXT: movd %xmm0, %eax
379 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
380 ; X64-SSE42-NEXT: retq
382 ; X64-AVX-LABEL: test_reduce_v16i8:
384 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
385 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
386 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
387 ; X64-AVX-NEXT: vmovd %xmm0, %eax
388 ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
390 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
391 %2 = icmp ult <16 x i8> %a0, %1
392 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
393 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
394 %5 = icmp ult <16 x i8> %3, %4
395 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
396 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
397 %8 = icmp ult <16 x i8> %6, %7
398 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
399 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
400 %11 = icmp ult <16 x i8> %9, %10
401 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
402 %13 = extractelement <16 x i8> %12, i32 0
410 define i64 @test_reduce_v4i64(<4 x i64> %a0) {
411 ; X86-SSE2-LABEL: test_reduce_v4i64:
412 ; X86-SSE2: ## %bb.0:
413 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
414 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
415 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3
416 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4
417 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4
418 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
419 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
420 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
421 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
422 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
423 ; X86-SSE2-NEXT: pand %xmm6, %xmm4
424 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[1,1,3,3]
425 ; X86-SSE2-NEXT: por %xmm4, %xmm3
426 ; X86-SSE2-NEXT: pand %xmm3, %xmm0
427 ; X86-SSE2-NEXT: pandn %xmm1, %xmm3
428 ; X86-SSE2-NEXT: por %xmm0, %xmm3
429 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[2,3,2,3]
430 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm1
431 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1
432 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2
433 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
434 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm4
435 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
436 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
437 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
438 ; X86-SSE2-NEXT: pand %xmm5, %xmm1
439 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
440 ; X86-SSE2-NEXT: por %xmm1, %xmm2
441 ; X86-SSE2-NEXT: pand %xmm2, %xmm3
442 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
443 ; X86-SSE2-NEXT: por %xmm3, %xmm2
444 ; X86-SSE2-NEXT: movd %xmm2, %eax
445 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
446 ; X86-SSE2-NEXT: movd %xmm0, %edx
447 ; X86-SSE2-NEXT: retl
449 ; X86-SSE42-LABEL: test_reduce_v4i64:
450 ; X86-SSE42: ## %bb.0:
451 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm3
452 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
453 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm4
454 ; X86-SSE42-NEXT: pxor %xmm2, %xmm4
455 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
456 ; X86-SSE42-NEXT: pxor %xmm2, %xmm0
457 ; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
458 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
459 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,2,3]
460 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
461 ; X86-SSE42-NEXT: pxor %xmm2, %xmm0
462 ; X86-SSE42-NEXT: pxor %xmm3, %xmm2
463 ; X86-SSE42-NEXT: pcmpgtq %xmm0, %xmm2
464 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
465 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
466 ; X86-SSE42-NEXT: movd %xmm3, %eax
467 ; X86-SSE42-NEXT: pextrd $1, %xmm3, %edx
468 ; X86-SSE42-NEXT: retl
470 ; X86-AVX1-LABEL: test_reduce_v4i64:
471 ; X86-AVX1: ## %bb.0:
472 ; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [0,2147483648,0,2147483648]
473 ; X86-AVX1-NEXT: ## xmm1 = mem[0,0]
474 ; X86-AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm2
475 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
476 ; X86-AVX1-NEXT: vxorps %xmm1, %xmm3, %xmm4
477 ; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
478 ; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
479 ; X86-AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[2,3,2,3]
480 ; X86-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3
481 ; X86-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1
482 ; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
483 ; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
484 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
485 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
486 ; X86-AVX1-NEXT: vzeroupper
487 ; X86-AVX1-NEXT: retl
489 ; X86-AVX2-LABEL: test_reduce_v4i64:
490 ; X86-AVX2: ## %bb.0:
491 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
492 ; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
493 ; X86-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
494 ; X86-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
495 ; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
496 ; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
497 ; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
498 ; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
499 ; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
500 ; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
501 ; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
502 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
503 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
504 ; X86-AVX2-NEXT: vzeroupper
505 ; X86-AVX2-NEXT: retl
507 ; X64-SSE2-LABEL: test_reduce_v4i64:
508 ; X64-SSE2: ## %bb.0:
509 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [9223372039002259456,9223372039002259456]
510 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
511 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3
512 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4
513 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4
514 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm5
515 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
516 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
517 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
518 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
519 ; X64-SSE2-NEXT: pand %xmm6, %xmm3
520 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
521 ; X64-SSE2-NEXT: por %xmm3, %xmm4
522 ; X64-SSE2-NEXT: pand %xmm4, %xmm0
523 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4
524 ; X64-SSE2-NEXT: por %xmm0, %xmm4
525 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
526 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
527 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1
528 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2
529 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
530 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
531 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
532 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
533 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
534 ; X64-SSE2-NEXT: pand %xmm5, %xmm1
535 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
536 ; X64-SSE2-NEXT: por %xmm1, %xmm2
537 ; X64-SSE2-NEXT: pand %xmm2, %xmm4
538 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
539 ; X64-SSE2-NEXT: por %xmm4, %xmm2
540 ; X64-SSE2-NEXT: movq %xmm2, %rax
541 ; X64-SSE2-NEXT: retq
543 ; X64-SSE42-LABEL: test_reduce_v4i64:
544 ; X64-SSE42: ## %bb.0:
545 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
546 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
547 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm4
548 ; X64-SSE42-NEXT: pxor %xmm3, %xmm4
549 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
550 ; X64-SSE42-NEXT: pxor %xmm3, %xmm0
551 ; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
552 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
553 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,2,3]
554 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
555 ; X64-SSE42-NEXT: pxor %xmm3, %xmm0
556 ; X64-SSE42-NEXT: pxor %xmm2, %xmm3
557 ; X64-SSE42-NEXT: pcmpgtq %xmm0, %xmm3
558 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
559 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
560 ; X64-SSE42-NEXT: movq %xmm2, %rax
561 ; X64-SSE42-NEXT: retq
563 ; X64-AVX1-LABEL: test_reduce_v4i64:
564 ; X64-AVX1: ## %bb.0:
565 ; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
566 ; X64-AVX1-NEXT: ## xmm1 = mem[0,0]
567 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm2
568 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
569 ; X64-AVX1-NEXT: vpxor %xmm1, %xmm3, %xmm4
570 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2
571 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm3, %xmm0
572 ; X64-AVX1-NEXT: vshufps {{.*#+}} xmm2 = xmm0[2,3,2,3]
573 ; X64-AVX1-NEXT: vxorpd %xmm1, %xmm0, %xmm3
574 ; X64-AVX1-NEXT: vxorpd %xmm1, %xmm2, %xmm1
575 ; X64-AVX1-NEXT: vpcmpgtq %xmm3, %xmm1, %xmm1
576 ; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
577 ; X64-AVX1-NEXT: vmovq %xmm0, %rax
578 ; X64-AVX1-NEXT: vzeroupper
579 ; X64-AVX1-NEXT: retq
581 ; X64-AVX2-LABEL: test_reduce_v4i64:
582 ; X64-AVX2: ## %bb.0:
583 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
584 ; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
585 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm3
586 ; X64-AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm4
587 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
588 ; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
589 ; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
590 ; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
591 ; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
592 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
593 ; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
594 ; X64-AVX2-NEXT: vmovq %xmm0, %rax
595 ; X64-AVX2-NEXT: vzeroupper
596 ; X64-AVX2-NEXT: retq
598 ; X64-AVX512-LABEL: test_reduce_v4i64:
599 ; X64-AVX512: ## %bb.0:
600 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
601 ; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
602 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
603 ; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
604 ; X64-AVX512-NEXT: vmovq %xmm0, %rax
605 ; X64-AVX512-NEXT: vzeroupper
606 ; X64-AVX512-NEXT: retq
607 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
608 %2 = icmp ult <4 x i64> %a0, %1
609 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
610 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
611 %5 = icmp ult <4 x i64> %3, %4
612 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
613 %7 = extractelement <4 x i64> %6, i32 0
617 define i32 @test_reduce_v8i32(<8 x i32> %a0) {
618 ; X86-SSE2-LABEL: test_reduce_v8i32:
619 ; X86-SSE2: ## %bb.0:
620 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
621 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm4
622 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4
623 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm3
624 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3
625 ; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm3
626 ; X86-SSE2-NEXT: pand %xmm3, %xmm0
627 ; X86-SSE2-NEXT: pandn %xmm1, %xmm3
628 ; X86-SSE2-NEXT: por %xmm0, %xmm3
629 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
630 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm4
631 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4
632 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
633 ; X86-SSE2-NEXT: pxor %xmm2, %xmm0
634 ; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm0
635 ; X86-SSE2-NEXT: pand %xmm0, %xmm3
636 ; X86-SSE2-NEXT: pandn %xmm1, %xmm0
637 ; X86-SSE2-NEXT: por %xmm3, %xmm0
638 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
639 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
640 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3
641 ; X86-SSE2-NEXT: pxor %xmm1, %xmm2
642 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm2
643 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
644 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2
645 ; X86-SSE2-NEXT: por %xmm0, %xmm2
646 ; X86-SSE2-NEXT: movd %xmm2, %eax
647 ; X86-SSE2-NEXT: retl
649 ; X86-SSE42-LABEL: test_reduce_v8i32:
650 ; X86-SSE42: ## %bb.0:
651 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0
652 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
653 ; X86-SSE42-NEXT: pminud %xmm0, %xmm1
654 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
655 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0
656 ; X86-SSE42-NEXT: movd %xmm0, %eax
657 ; X86-SSE42-NEXT: retl
659 ; X86-AVX1-LABEL: test_reduce_v8i32:
660 ; X86-AVX1: ## %bb.0:
661 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
662 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
663 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
664 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
665 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
666 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
667 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
668 ; X86-AVX1-NEXT: vzeroupper
669 ; X86-AVX1-NEXT: retl
671 ; X86-AVX2-LABEL: test_reduce_v8i32:
672 ; X86-AVX2: ## %bb.0:
673 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
674 ; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
675 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
676 ; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
677 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
678 ; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
679 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
680 ; X86-AVX2-NEXT: vzeroupper
681 ; X86-AVX2-NEXT: retl
683 ; X64-SSE2-LABEL: test_reduce_v8i32:
684 ; X64-SSE2: ## %bb.0:
685 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
686 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
687 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3
688 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4
689 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4
690 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
691 ; X64-SSE2-NEXT: pand %xmm4, %xmm0
692 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4
693 ; X64-SSE2-NEXT: por %xmm0, %xmm4
694 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,2,3]
695 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
696 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1
697 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
698 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3
699 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
700 ; X64-SSE2-NEXT: pand %xmm3, %xmm4
701 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3
702 ; X64-SSE2-NEXT: por %xmm4, %xmm3
703 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
704 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm1
705 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1
706 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2
707 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
708 ; X64-SSE2-NEXT: pand %xmm2, %xmm3
709 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
710 ; X64-SSE2-NEXT: por %xmm3, %xmm2
711 ; X64-SSE2-NEXT: movd %xmm2, %eax
712 ; X64-SSE2-NEXT: retq
714 ; X64-SSE42-LABEL: test_reduce_v8i32:
715 ; X64-SSE42: ## %bb.0:
716 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0
717 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
718 ; X64-SSE42-NEXT: pminud %xmm0, %xmm1
719 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
720 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0
721 ; X64-SSE42-NEXT: movd %xmm0, %eax
722 ; X64-SSE42-NEXT: retq
724 ; X64-AVX1-LABEL: test_reduce_v8i32:
725 ; X64-AVX1: ## %bb.0:
726 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
727 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
728 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
729 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
730 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
731 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
732 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
733 ; X64-AVX1-NEXT: vzeroupper
734 ; X64-AVX1-NEXT: retq
736 ; X64-AVX2-LABEL: test_reduce_v8i32:
737 ; X64-AVX2: ## %bb.0:
738 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
739 ; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
740 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
741 ; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
742 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
743 ; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
744 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
745 ; X64-AVX2-NEXT: vzeroupper
746 ; X64-AVX2-NEXT: retq
748 ; X64-AVX512-LABEL: test_reduce_v8i32:
749 ; X64-AVX512: ## %bb.0:
750 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
751 ; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
752 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
753 ; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
754 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
755 ; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
756 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
757 ; X64-AVX512-NEXT: vzeroupper
758 ; X64-AVX512-NEXT: retq
759 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
760 %2 = icmp ult <8 x i32> %a0, %1
761 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
762 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
763 %5 = icmp ult <8 x i32> %3, %4
764 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
765 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
766 %8 = icmp ult <8 x i32> %6, %7
767 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
768 %10 = extractelement <8 x i32> %9, i32 0
772 define i16 @test_reduce_v16i16(<16 x i16> %a0) {
773 ; X86-SSE2-LABEL: test_reduce_v16i16:
774 ; X86-SSE2: ## %bb.0:
775 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
776 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
777 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
778 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
779 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
780 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
781 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
782 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
783 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
784 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
785 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
786 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
787 ; X86-SSE2-NEXT: psrld $16, %xmm1
788 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
789 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
790 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
791 ; X86-SSE2-NEXT: movd %xmm0, %eax
792 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
793 ; X86-SSE2-NEXT: retl
795 ; X86-SSE42-LABEL: test_reduce_v16i16:
796 ; X86-SSE42: ## %bb.0:
797 ; X86-SSE42-NEXT: pminuw %xmm1, %xmm0
798 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
799 ; X86-SSE42-NEXT: movd %xmm0, %eax
800 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
801 ; X86-SSE42-NEXT: retl
803 ; X86-AVX1-LABEL: test_reduce_v16i16:
804 ; X86-AVX1: ## %bb.0:
805 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
806 ; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
807 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
808 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
809 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
810 ; X86-AVX1-NEXT: vzeroupper
811 ; X86-AVX1-NEXT: retl
813 ; X86-AVX2-LABEL: test_reduce_v16i16:
814 ; X86-AVX2: ## %bb.0:
815 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
816 ; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
817 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
818 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
819 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
820 ; X86-AVX2-NEXT: vzeroupper
821 ; X86-AVX2-NEXT: retl
823 ; X64-SSE2-LABEL: test_reduce_v16i16:
824 ; X64-SSE2: ## %bb.0:
825 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
826 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
827 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
828 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
829 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
830 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
831 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
832 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
833 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
834 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
835 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
836 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
837 ; X64-SSE2-NEXT: psrld $16, %xmm1
838 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
839 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
840 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
841 ; X64-SSE2-NEXT: movd %xmm0, %eax
842 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
843 ; X64-SSE2-NEXT: retq
845 ; X64-SSE42-LABEL: test_reduce_v16i16:
846 ; X64-SSE42: ## %bb.0:
847 ; X64-SSE42-NEXT: pminuw %xmm1, %xmm0
848 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
849 ; X64-SSE42-NEXT: movd %xmm0, %eax
850 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
851 ; X64-SSE42-NEXT: retq
853 ; X64-AVX1-LABEL: test_reduce_v16i16:
854 ; X64-AVX1: ## %bb.0:
855 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
856 ; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
857 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
858 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
859 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
860 ; X64-AVX1-NEXT: vzeroupper
861 ; X64-AVX1-NEXT: retq
863 ; X64-AVX2-LABEL: test_reduce_v16i16:
864 ; X64-AVX2: ## %bb.0:
865 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
866 ; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
867 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
868 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
869 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
870 ; X64-AVX2-NEXT: vzeroupper
871 ; X64-AVX2-NEXT: retq
873 ; X64-AVX512-LABEL: test_reduce_v16i16:
874 ; X64-AVX512: ## %bb.0:
875 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
876 ; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0
877 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
878 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
879 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
880 ; X64-AVX512-NEXT: vzeroupper
881 ; X64-AVX512-NEXT: retq
882 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
883 %2 = icmp ult <16 x i16> %a0, %1
884 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
885 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
886 %5 = icmp ult <16 x i16> %3, %4
887 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
888 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
889 %8 = icmp ult <16 x i16> %6, %7
890 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
891 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
892 %11 = icmp ult <16 x i16> %9, %10
893 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
894 %13 = extractelement <16 x i16> %12, i32 0
898 define i8 @test_reduce_v32i8(<32 x i8> %a0) {
899 ; X86-SSE2-LABEL: test_reduce_v32i8:
900 ; X86-SSE2: ## %bb.0:
901 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
902 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
903 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
904 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
905 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
906 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
907 ; X86-SSE2-NEXT: psrld $16, %xmm1
908 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
909 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
910 ; X86-SSE2-NEXT: psrlw $8, %xmm0
911 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
912 ; X86-SSE2-NEXT: movd %xmm0, %eax
913 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
914 ; X86-SSE2-NEXT: retl
916 ; X86-SSE42-LABEL: test_reduce_v32i8:
917 ; X86-SSE42: ## %bb.0:
918 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0
919 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
920 ; X86-SSE42-NEXT: psrlw $8, %xmm1
921 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1
922 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
923 ; X86-SSE42-NEXT: movd %xmm0, %eax
924 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
925 ; X86-SSE42-NEXT: retl
927 ; X86-AVX1-LABEL: test_reduce_v32i8:
928 ; X86-AVX1: ## %bb.0:
929 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
930 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
931 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
932 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
933 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
934 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
935 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
936 ; X86-AVX1-NEXT: vzeroupper
937 ; X86-AVX1-NEXT: retl
939 ; X86-AVX2-LABEL: test_reduce_v32i8:
940 ; X86-AVX2: ## %bb.0:
941 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
942 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
943 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
944 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
945 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
946 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
947 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
948 ; X86-AVX2-NEXT: vzeroupper
949 ; X86-AVX2-NEXT: retl
951 ; X64-SSE2-LABEL: test_reduce_v32i8:
952 ; X64-SSE2: ## %bb.0:
953 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
954 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
955 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
956 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
957 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
958 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
959 ; X64-SSE2-NEXT: psrld $16, %xmm1
960 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
961 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
962 ; X64-SSE2-NEXT: psrlw $8, %xmm0
963 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
964 ; X64-SSE2-NEXT: movd %xmm0, %eax
965 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
966 ; X64-SSE2-NEXT: retq
968 ; X64-SSE42-LABEL: test_reduce_v32i8:
969 ; X64-SSE42: ## %bb.0:
970 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0
971 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
972 ; X64-SSE42-NEXT: psrlw $8, %xmm1
973 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1
974 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
975 ; X64-SSE42-NEXT: movd %xmm0, %eax
976 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
977 ; X64-SSE42-NEXT: retq
979 ; X64-AVX1-LABEL: test_reduce_v32i8:
980 ; X64-AVX1: ## %bb.0:
981 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
982 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
983 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
984 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
985 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
986 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
987 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
988 ; X64-AVX1-NEXT: vzeroupper
989 ; X64-AVX1-NEXT: retq
991 ; X64-AVX2-LABEL: test_reduce_v32i8:
992 ; X64-AVX2: ## %bb.0:
993 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
994 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
995 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
996 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
997 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
998 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
999 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
1000 ; X64-AVX2-NEXT: vzeroupper
1001 ; X64-AVX2-NEXT: retq
1003 ; X64-AVX512-LABEL: test_reduce_v32i8:
1004 ; X64-AVX512: ## %bb.0:
1005 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1006 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1007 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1008 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1009 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1010 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
1011 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
1012 ; X64-AVX512-NEXT: vzeroupper
1013 ; X64-AVX512-NEXT: retq
1014 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1015 %2 = icmp ult <32 x i8> %a0, %1
1016 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1017 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1018 %5 = icmp ult <32 x i8> %3, %4
1019 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1020 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1021 %8 = icmp ult <32 x i8> %6, %7
1022 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1023 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1024 %11 = icmp ult <32 x i8> %9, %10
1025 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1026 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1027 %14 = icmp ult <32 x i8> %12, %13
1028 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1029 %16 = extractelement <32 x i8> %15, i32 0
1037 define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1038 ; X86-SSE2-LABEL: test_reduce_v8i64:
1039 ; X86-SSE2: ## %bb.0:
1040 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1041 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm5
1042 ; X86-SSE2-NEXT: pxor %xmm4, %xmm5
1043 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6
1044 ; X86-SSE2-NEXT: pxor %xmm4, %xmm6
1045 ; X86-SSE2-NEXT: movdqa %xmm6, %xmm7
1046 ; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1047 ; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1048 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1049 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1050 ; X86-SSE2-NEXT: pand %xmm5, %xmm6
1051 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1052 ; X86-SSE2-NEXT: por %xmm6, %xmm5
1053 ; X86-SSE2-NEXT: pand %xmm5, %xmm1
1054 ; X86-SSE2-NEXT: pandn %xmm3, %xmm5
1055 ; X86-SSE2-NEXT: por %xmm1, %xmm5
1056 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1057 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1
1058 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1059 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3
1060 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6
1061 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6
1062 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3
1063 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
1064 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1065 ; X86-SSE2-NEXT: pand %xmm1, %xmm3
1066 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
1067 ; X86-SSE2-NEXT: por %xmm3, %xmm1
1068 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
1069 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1
1070 ; X86-SSE2-NEXT: por %xmm0, %xmm1
1071 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
1072 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0
1073 ; X86-SSE2-NEXT: movdqa %xmm5, %xmm2
1074 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1075 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1076 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1077 ; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2
1078 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1079 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1080 ; X86-SSE2-NEXT: pand %xmm0, %xmm2
1081 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1082 ; X86-SSE2-NEXT: por %xmm2, %xmm0
1083 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
1084 ; X86-SSE2-NEXT: pandn %xmm5, %xmm0
1085 ; X86-SSE2-NEXT: por %xmm1, %xmm0
1086 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1087 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1088 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1089 ; X86-SSE2-NEXT: pxor %xmm1, %xmm4
1090 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm3
1091 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1092 ; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1093 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1094 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1095 ; X86-SSE2-NEXT: pand %xmm2, %xmm4
1096 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1097 ; X86-SSE2-NEXT: por %xmm4, %xmm2
1098 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
1099 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1100 ; X86-SSE2-NEXT: por %xmm0, %xmm2
1101 ; X86-SSE2-NEXT: movd %xmm2, %eax
1102 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,1,1]
1103 ; X86-SSE2-NEXT: movd %xmm0, %edx
1104 ; X86-SSE2-NEXT: retl
1106 ; X86-SSE42-LABEL: test_reduce_v8i64:
1107 ; X86-SSE42: ## %bb.0:
1108 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm4
1109 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm5 = [0,2147483648,0,2147483648]
1110 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm6
1111 ; X86-SSE42-NEXT: pxor %xmm5, %xmm6
1112 ; X86-SSE42-NEXT: movdqa %xmm3, %xmm0
1113 ; X86-SSE42-NEXT: pxor %xmm5, %xmm0
1114 ; X86-SSE42-NEXT: pcmpgtq %xmm6, %xmm0
1115 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1116 ; X86-SSE42-NEXT: movdqa %xmm4, %xmm1
1117 ; X86-SSE42-NEXT: pxor %xmm5, %xmm1
1118 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
1119 ; X86-SSE42-NEXT: pxor %xmm5, %xmm0
1120 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1121 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1122 ; X86-SSE42-NEXT: movapd %xmm2, %xmm1
1123 ; X86-SSE42-NEXT: xorpd %xmm5, %xmm1
1124 ; X86-SSE42-NEXT: movapd %xmm3, %xmm0
1125 ; X86-SSE42-NEXT: xorpd %xmm5, %xmm0
1126 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1127 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1128 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1129 ; X86-SSE42-NEXT: movdqa %xmm3, %xmm0
1130 ; X86-SSE42-NEXT: pxor %xmm5, %xmm0
1131 ; X86-SSE42-NEXT: pxor %xmm1, %xmm5
1132 ; X86-SSE42-NEXT: pcmpgtq %xmm0, %xmm5
1133 ; X86-SSE42-NEXT: movdqa %xmm5, %xmm0
1134 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1135 ; X86-SSE42-NEXT: movd %xmm1, %eax
1136 ; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx
1137 ; X86-SSE42-NEXT: retl
1139 ; X86-AVX1-LABEL: test_reduce_v8i64:
1140 ; X86-AVX1: ## %bb.0:
1141 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1142 ; X86-AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [0,2147483648,0,2147483648]
1143 ; X86-AVX1-NEXT: ## xmm2 = mem[0,0]
1144 ; X86-AVX1-NEXT: vxorps %xmm2, %xmm3, %xmm4
1145 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
1146 ; X86-AVX1-NEXT: vxorps %xmm2, %xmm5, %xmm6
1147 ; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
1148 ; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm3, %xmm5, %xmm3
1149 ; X86-AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm4
1150 ; X86-AVX1-NEXT: vxorps %xmm2, %xmm1, %xmm5
1151 ; X86-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
1152 ; X86-AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm0
1153 ; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm1
1154 ; X86-AVX1-NEXT: vxorpd %xmm2, %xmm3, %xmm4
1155 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
1156 ; X86-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm3, %xmm0
1157 ; X86-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1158 ; X86-AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm3
1159 ; X86-AVX1-NEXT: vxorpd %xmm2, %xmm1, %xmm2
1160 ; X86-AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1161 ; X86-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1162 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
1163 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
1164 ; X86-AVX1-NEXT: vzeroupper
1165 ; X86-AVX1-NEXT: retl
1167 ; X86-AVX2-LABEL: test_reduce_v8i64:
1168 ; X86-AVX2: ## %bb.0:
1169 ; X86-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [0,2147483648,0,2147483648,0,2147483648,0,2147483648]
1170 ; X86-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
1171 ; X86-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
1172 ; X86-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
1173 ; X86-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
1174 ; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1175 ; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
1176 ; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
1177 ; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1178 ; X86-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1179 ; X86-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1180 ; X86-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
1181 ; X86-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
1182 ; X86-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1183 ; X86-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1184 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
1185 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
1186 ; X86-AVX2-NEXT: vzeroupper
1187 ; X86-AVX2-NEXT: retl
1189 ; X64-SSE2-LABEL: test_reduce_v8i64:
1190 ; X64-SSE2: ## %bb.0:
1191 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
1192 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm5
1193 ; X64-SSE2-NEXT: pxor %xmm4, %xmm5
1194 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6
1195 ; X64-SSE2-NEXT: pxor %xmm4, %xmm6
1196 ; X64-SSE2-NEXT: movdqa %xmm6, %xmm7
1197 ; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1198 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1199 ; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1200 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1201 ; X64-SSE2-NEXT: pand %xmm8, %xmm6
1202 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1203 ; X64-SSE2-NEXT: por %xmm6, %xmm5
1204 ; X64-SSE2-NEXT: pand %xmm5, %xmm1
1205 ; X64-SSE2-NEXT: pandn %xmm3, %xmm5
1206 ; X64-SSE2-NEXT: por %xmm1, %xmm5
1207 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1208 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1209 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
1210 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3
1211 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6
1212 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6
1213 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1214 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3
1215 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1216 ; X64-SSE2-NEXT: pand %xmm7, %xmm1
1217 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1218 ; X64-SSE2-NEXT: por %xmm1, %xmm3
1219 ; X64-SSE2-NEXT: pand %xmm3, %xmm0
1220 ; X64-SSE2-NEXT: pandn %xmm2, %xmm3
1221 ; X64-SSE2-NEXT: por %xmm0, %xmm3
1222 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0
1223 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0
1224 ; X64-SSE2-NEXT: movdqa %xmm5, %xmm1
1225 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1226 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1227 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1228 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
1229 ; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1230 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1231 ; X64-SSE2-NEXT: pand %xmm6, %xmm0
1232 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1233 ; X64-SSE2-NEXT: por %xmm0, %xmm1
1234 ; X64-SSE2-NEXT: pand %xmm1, %xmm3
1235 ; X64-SSE2-NEXT: pandn %xmm5, %xmm1
1236 ; X64-SSE2-NEXT: por %xmm3, %xmm1
1237 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1238 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1239 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2
1240 ; X64-SSE2-NEXT: pxor %xmm0, %xmm4
1241 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm3
1242 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1243 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1244 ; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1245 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1246 ; X64-SSE2-NEXT: pand %xmm5, %xmm2
1247 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1248 ; X64-SSE2-NEXT: por %xmm2, %xmm3
1249 ; X64-SSE2-NEXT: pand %xmm3, %xmm1
1250 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3
1251 ; X64-SSE2-NEXT: por %xmm1, %xmm3
1252 ; X64-SSE2-NEXT: movq %xmm3, %rax
1253 ; X64-SSE2-NEXT: retq
1255 ; X64-SSE42-LABEL: test_reduce_v8i64:
1256 ; X64-SSE42: ## %bb.0:
1257 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm5
1258 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm4 = [9223372036854775808,9223372036854775808]
1259 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm6
1260 ; X64-SSE42-NEXT: pxor %xmm4, %xmm6
1261 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
1262 ; X64-SSE42-NEXT: pxor %xmm4, %xmm0
1263 ; X64-SSE42-NEXT: pcmpgtq %xmm6, %xmm0
1264 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1265 ; X64-SSE42-NEXT: movdqa %xmm5, %xmm1
1266 ; X64-SSE42-NEXT: pxor %xmm4, %xmm1
1267 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
1268 ; X64-SSE42-NEXT: pxor %xmm4, %xmm0
1269 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1270 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm5, %xmm2
1271 ; X64-SSE42-NEXT: movapd %xmm2, %xmm1
1272 ; X64-SSE42-NEXT: xorpd %xmm4, %xmm1
1273 ; X64-SSE42-NEXT: movapd %xmm3, %xmm0
1274 ; X64-SSE42-NEXT: xorpd %xmm4, %xmm0
1275 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1276 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1277 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,2,3]
1278 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
1279 ; X64-SSE42-NEXT: pxor %xmm4, %xmm0
1280 ; X64-SSE42-NEXT: pxor %xmm1, %xmm4
1281 ; X64-SSE42-NEXT: pcmpgtq %xmm0, %xmm4
1282 ; X64-SSE42-NEXT: movdqa %xmm4, %xmm0
1283 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1284 ; X64-SSE42-NEXT: movq %xmm1, %rax
1285 ; X64-SSE42-NEXT: retq
1287 ; X64-AVX1-LABEL: test_reduce_v8i64:
1288 ; X64-AVX1: ## %bb.0:
1289 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1290 ; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808]
1291 ; X64-AVX1-NEXT: ## xmm3 = mem[0,0]
1292 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm4
1293 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
1294 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6
1295 ; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4
1296 ; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm2, %xmm5, %xmm2
1297 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
1298 ; X64-AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5
1299 ; X64-AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4
1300 ; X64-AVX1-NEXT: vblendvpd %xmm4, %xmm0, %xmm1, %xmm0
1301 ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm1
1302 ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm2, %xmm4
1303 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1
1304 ; X64-AVX1-NEXT: vblendvpd %xmm1, %xmm0, %xmm2, %xmm0
1305 ; X64-AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1306 ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm0, %xmm2
1307 ; X64-AVX1-NEXT: vxorpd %xmm3, %xmm1, %xmm3
1308 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1309 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1310 ; X64-AVX1-NEXT: vmovq %xmm0, %rax
1311 ; X64-AVX1-NEXT: vzeroupper
1312 ; X64-AVX1-NEXT: retq
1314 ; X64-AVX2-LABEL: test_reduce_v8i64:
1315 ; X64-AVX2: ## %bb.0:
1316 ; X64-AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808]
1317 ; X64-AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm3
1318 ; X64-AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm4
1319 ; X64-AVX2-NEXT: vpcmpgtq %ymm3, %ymm4, %ymm3
1320 ; X64-AVX2-NEXT: vblendvpd %ymm3, %ymm0, %ymm1, %ymm0
1321 ; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1322 ; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
1323 ; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm4
1324 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm4, %xmm3
1325 ; X64-AVX2-NEXT: vblendvpd %xmm3, %xmm0, %xmm1, %xmm0
1326 ; X64-AVX2-NEXT: vshufps {{.*#+}} xmm1 = xmm0[2,3,2,3]
1327 ; X64-AVX2-NEXT: vxorpd %xmm2, %xmm0, %xmm3
1328 ; X64-AVX2-NEXT: vxorpd %xmm2, %xmm1, %xmm2
1329 ; X64-AVX2-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2
1330 ; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
1331 ; X64-AVX2-NEXT: vmovq %xmm0, %rax
1332 ; X64-AVX2-NEXT: vzeroupper
1333 ; X64-AVX2-NEXT: retq
1335 ; X64-AVX512-LABEL: test_reduce_v8i64:
1336 ; X64-AVX512: ## %bb.0:
1337 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1338 ; X64-AVX512-NEXT: vpminuq %zmm1, %zmm0, %zmm0
1339 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1340 ; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
1341 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1342 ; X64-AVX512-NEXT: vpminuq %xmm1, %xmm0, %xmm0
1343 ; X64-AVX512-NEXT: vmovq %xmm0, %rax
1344 ; X64-AVX512-NEXT: vzeroupper
1345 ; X64-AVX512-NEXT: retq
1346 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1347 %2 = icmp ult <8 x i64> %a0, %1
1348 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1349 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1350 %5 = icmp ult <8 x i64> %3, %4
1351 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1352 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1353 %8 = icmp ult <8 x i64> %6, %7
1354 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1355 %10 = extractelement <8 x i64> %9, i32 0
1359 define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1360 ; X86-SSE2-LABEL: test_reduce_v16i32:
1361 ; X86-SSE2: ## %bb.0:
1362 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1363 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm6
1364 ; X86-SSE2-NEXT: pxor %xmm4, %xmm6
1365 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm5
1366 ; X86-SSE2-NEXT: pxor %xmm4, %xmm5
1367 ; X86-SSE2-NEXT: pcmpgtd %xmm6, %xmm5
1368 ; X86-SSE2-NEXT: pand %xmm5, %xmm1
1369 ; X86-SSE2-NEXT: pandn %xmm3, %xmm5
1370 ; X86-SSE2-NEXT: por %xmm1, %xmm5
1371 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
1372 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3
1373 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
1374 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1
1375 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm1
1376 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
1377 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1
1378 ; X86-SSE2-NEXT: por %xmm0, %xmm1
1379 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1380 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1381 ; X86-SSE2-NEXT: movdqa %xmm5, %xmm0
1382 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0
1383 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm0
1384 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
1385 ; X86-SSE2-NEXT: pandn %xmm5, %xmm0
1386 ; X86-SSE2-NEXT: por %xmm1, %xmm0
1387 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
1388 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
1389 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3
1390 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm1
1391 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1
1392 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm1
1393 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
1394 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1
1395 ; X86-SSE2-NEXT: por %xmm0, %xmm1
1396 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1397 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
1398 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1399 ; X86-SSE2-NEXT: pxor %xmm0, %xmm4
1400 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm4
1401 ; X86-SSE2-NEXT: pand %xmm4, %xmm1
1402 ; X86-SSE2-NEXT: pandn %xmm0, %xmm4
1403 ; X86-SSE2-NEXT: por %xmm1, %xmm4
1404 ; X86-SSE2-NEXT: movd %xmm4, %eax
1405 ; X86-SSE2-NEXT: retl
1407 ; X86-SSE42-LABEL: test_reduce_v16i32:
1408 ; X86-SSE42: ## %bb.0:
1409 ; X86-SSE42-NEXT: pminud %xmm3, %xmm1
1410 ; X86-SSE42-NEXT: pminud %xmm2, %xmm0
1411 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0
1412 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1413 ; X86-SSE42-NEXT: pminud %xmm0, %xmm1
1414 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1415 ; X86-SSE42-NEXT: pminud %xmm1, %xmm0
1416 ; X86-SSE42-NEXT: movd %xmm0, %eax
1417 ; X86-SSE42-NEXT: retl
1419 ; X86-AVX1-LABEL: test_reduce_v16i32:
1420 ; X86-AVX1: ## %bb.0:
1421 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1422 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1423 ; X86-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1424 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1425 ; X86-AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0
1426 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1427 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1428 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1429 ; X86-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1430 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
1431 ; X86-AVX1-NEXT: vzeroupper
1432 ; X86-AVX1-NEXT: retl
1434 ; X86-AVX2-LABEL: test_reduce_v16i32:
1435 ; X86-AVX2: ## %bb.0:
1436 ; X86-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1437 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1438 ; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
1439 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1440 ; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
1441 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1442 ; X86-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
1443 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
1444 ; X86-AVX2-NEXT: vzeroupper
1445 ; X86-AVX2-NEXT: retl
1447 ; X64-SSE2-LABEL: test_reduce_v16i32:
1448 ; X64-SSE2: ## %bb.0:
1449 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648,2147483648,2147483648]
1450 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm5
1451 ; X64-SSE2-NEXT: pxor %xmm4, %xmm5
1452 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6
1453 ; X64-SSE2-NEXT: pxor %xmm4, %xmm6
1454 ; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm6
1455 ; X64-SSE2-NEXT: pand %xmm6, %xmm1
1456 ; X64-SSE2-NEXT: pandn %xmm3, %xmm6
1457 ; X64-SSE2-NEXT: por %xmm1, %xmm6
1458 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1459 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1460 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
1461 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3
1462 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
1463 ; X64-SSE2-NEXT: pand %xmm3, %xmm0
1464 ; X64-SSE2-NEXT: pandn %xmm2, %xmm3
1465 ; X64-SSE2-NEXT: por %xmm0, %xmm3
1466 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0
1467 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0
1468 ; X64-SSE2-NEXT: movdqa %xmm6, %xmm1
1469 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1470 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm1
1471 ; X64-SSE2-NEXT: pand %xmm1, %xmm3
1472 ; X64-SSE2-NEXT: pandn %xmm6, %xmm1
1473 ; X64-SSE2-NEXT: por %xmm3, %xmm1
1474 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
1475 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1476 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2
1477 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
1478 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3
1479 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1480 ; X64-SSE2-NEXT: pand %xmm3, %xmm1
1481 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3
1482 ; X64-SSE2-NEXT: por %xmm1, %xmm3
1483 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
1484 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm1
1485 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1486 ; X64-SSE2-NEXT: pxor %xmm0, %xmm4
1487 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm4
1488 ; X64-SSE2-NEXT: pand %xmm4, %xmm3
1489 ; X64-SSE2-NEXT: pandn %xmm0, %xmm4
1490 ; X64-SSE2-NEXT: por %xmm3, %xmm4
1491 ; X64-SSE2-NEXT: movd %xmm4, %eax
1492 ; X64-SSE2-NEXT: retq
1494 ; X64-SSE42-LABEL: test_reduce_v16i32:
1495 ; X64-SSE42: ## %bb.0:
1496 ; X64-SSE42-NEXT: pminud %xmm3, %xmm1
1497 ; X64-SSE42-NEXT: pminud %xmm2, %xmm0
1498 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0
1499 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1500 ; X64-SSE42-NEXT: pminud %xmm0, %xmm1
1501 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1502 ; X64-SSE42-NEXT: pminud %xmm1, %xmm0
1503 ; X64-SSE42-NEXT: movd %xmm0, %eax
1504 ; X64-SSE42-NEXT: retq
1506 ; X64-AVX1-LABEL: test_reduce_v16i32:
1507 ; X64-AVX1: ## %bb.0:
1508 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1509 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1510 ; X64-AVX1-NEXT: vpminud %xmm2, %xmm3, %xmm2
1511 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1512 ; X64-AVX1-NEXT: vpminud %xmm2, %xmm0, %xmm0
1513 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1514 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1515 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1516 ; X64-AVX1-NEXT: vpminud %xmm1, %xmm0, %xmm0
1517 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
1518 ; X64-AVX1-NEXT: vzeroupper
1519 ; X64-AVX1-NEXT: retq
1521 ; X64-AVX2-LABEL: test_reduce_v16i32:
1522 ; X64-AVX2: ## %bb.0:
1523 ; X64-AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm0
1524 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1525 ; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
1526 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1527 ; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
1528 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1529 ; X64-AVX2-NEXT: vpminud %xmm1, %xmm0, %xmm0
1530 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
1531 ; X64-AVX2-NEXT: vzeroupper
1532 ; X64-AVX2-NEXT: retq
1534 ; X64-AVX512-LABEL: test_reduce_v16i32:
1535 ; X64-AVX512: ## %bb.0:
1536 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1537 ; X64-AVX512-NEXT: vpminud %zmm1, %zmm0, %zmm0
1538 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1539 ; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
1540 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1541 ; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
1542 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1543 ; X64-AVX512-NEXT: vpminud %xmm1, %xmm0, %xmm0
1544 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
1545 ; X64-AVX512-NEXT: vzeroupper
1546 ; X64-AVX512-NEXT: retq
1547 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1548 %2 = icmp ult <16 x i32> %a0, %1
1549 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1550 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1551 %5 = icmp ult <16 x i32> %3, %4
1552 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1553 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1554 %8 = icmp ult <16 x i32> %6, %7
1555 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1556 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1557 %11 = icmp ult <16 x i32> %9, %10
1558 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1559 %13 = extractelement <16 x i32> %12, i32 0
1563 define i16 @test_reduce_v32i16(<32 x i16> %a0) {
1564 ; X86-SSE2-LABEL: test_reduce_v32i16:
1565 ; X86-SSE2: ## %bb.0:
1566 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4
1567 ; X86-SSE2-NEXT: psubusw %xmm3, %xmm4
1568 ; X86-SSE2-NEXT: psubw %xmm4, %xmm1
1569 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
1570 ; X86-SSE2-NEXT: psubusw %xmm2, %xmm3
1571 ; X86-SSE2-NEXT: psubw %xmm3, %xmm0
1572 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1573 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1574 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1575 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1576 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1577 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1578 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1579 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1580 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1581 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1582 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1583 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1584 ; X86-SSE2-NEXT: psrld $16, %xmm1
1585 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1586 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1587 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1588 ; X86-SSE2-NEXT: movd %xmm0, %eax
1589 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
1590 ; X86-SSE2-NEXT: retl
1592 ; X86-SSE42-LABEL: test_reduce_v32i16:
1593 ; X86-SSE42: ## %bb.0:
1594 ; X86-SSE42-NEXT: pminuw %xmm3, %xmm1
1595 ; X86-SSE42-NEXT: pminuw %xmm2, %xmm0
1596 ; X86-SSE42-NEXT: pminuw %xmm1, %xmm0
1597 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
1598 ; X86-SSE42-NEXT: movd %xmm0, %eax
1599 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
1600 ; X86-SSE42-NEXT: retl
1602 ; X86-AVX1-LABEL: test_reduce_v32i16:
1603 ; X86-AVX1: ## %bb.0:
1604 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1605 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1606 ; X86-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1607 ; X86-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1608 ; X86-AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
1609 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1610 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
1611 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
1612 ; X86-AVX1-NEXT: vzeroupper
1613 ; X86-AVX1-NEXT: retl
1615 ; X86-AVX2-LABEL: test_reduce_v32i16:
1616 ; X86-AVX2: ## %bb.0:
1617 ; X86-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1618 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1619 ; X86-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1620 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1621 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
1622 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
1623 ; X86-AVX2-NEXT: vzeroupper
1624 ; X86-AVX2-NEXT: retl
1626 ; X64-SSE2-LABEL: test_reduce_v32i16:
1627 ; X64-SSE2: ## %bb.0:
1628 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4
1629 ; X64-SSE2-NEXT: psubusw %xmm3, %xmm4
1630 ; X64-SSE2-NEXT: psubw %xmm4, %xmm1
1631 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
1632 ; X64-SSE2-NEXT: psubusw %xmm2, %xmm3
1633 ; X64-SSE2-NEXT: psubw %xmm3, %xmm0
1634 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1635 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
1636 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
1637 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1638 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1639 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
1640 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
1641 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1642 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1643 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
1644 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
1645 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1646 ; X64-SSE2-NEXT: psrld $16, %xmm1
1647 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1648 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
1649 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
1650 ; X64-SSE2-NEXT: movd %xmm0, %eax
1651 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
1652 ; X64-SSE2-NEXT: retq
1654 ; X64-SSE42-LABEL: test_reduce_v32i16:
1655 ; X64-SSE42: ## %bb.0:
1656 ; X64-SSE42-NEXT: pminuw %xmm3, %xmm1
1657 ; X64-SSE42-NEXT: pminuw %xmm2, %xmm0
1658 ; X64-SSE42-NEXT: pminuw %xmm1, %xmm0
1659 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
1660 ; X64-SSE42-NEXT: movd %xmm0, %eax
1661 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
1662 ; X64-SSE42-NEXT: retq
1664 ; X64-AVX1-LABEL: test_reduce_v32i16:
1665 ; X64-AVX1: ## %bb.0:
1666 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1667 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1668 ; X64-AVX1-NEXT: vpminuw %xmm2, %xmm3, %xmm2
1669 ; X64-AVX1-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1670 ; X64-AVX1-NEXT: vpminuw %xmm2, %xmm0, %xmm0
1671 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1672 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
1673 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
1674 ; X64-AVX1-NEXT: vzeroupper
1675 ; X64-AVX1-NEXT: retq
1677 ; X64-AVX2-LABEL: test_reduce_v32i16:
1678 ; X64-AVX2: ## %bb.0:
1679 ; X64-AVX2-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1680 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1681 ; X64-AVX2-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1682 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1683 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
1684 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
1685 ; X64-AVX2-NEXT: vzeroupper
1686 ; X64-AVX2-NEXT: retq
1688 ; X64-AVX512-LABEL: test_reduce_v32i16:
1689 ; X64-AVX512: ## %bb.0:
1690 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1691 ; X64-AVX512-NEXT: vpminuw %ymm1, %ymm0, %ymm0
1692 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1693 ; X64-AVX512-NEXT: vpminuw %xmm1, %xmm0, %xmm0
1694 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1695 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
1696 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
1697 ; X64-AVX512-NEXT: vzeroupper
1698 ; X64-AVX512-NEXT: retq
1699 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1700 %2 = icmp ult <32 x i16> %a0, %1
1701 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1702 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1703 %5 = icmp ult <32 x i16> %3, %4
1704 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1705 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1706 %8 = icmp ult <32 x i16> %6, %7
1707 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1708 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1709 %11 = icmp ult <32 x i16> %9, %10
1710 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1711 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1712 %14 = icmp ult <32 x i16> %12, %13
1713 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1714 %16 = extractelement <32 x i16> %15, i32 0
1718 define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1719 ; X86-SSE2-LABEL: test_reduce_v64i8:
1720 ; X86-SSE2: ## %bb.0:
1721 ; X86-SSE2-NEXT: pminub %xmm3, %xmm1
1722 ; X86-SSE2-NEXT: pminub %xmm2, %xmm0
1723 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
1724 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1725 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
1726 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1727 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
1728 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1729 ; X86-SSE2-NEXT: psrld $16, %xmm1
1730 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
1731 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
1732 ; X86-SSE2-NEXT: psrlw $8, %xmm0
1733 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
1734 ; X86-SSE2-NEXT: movd %xmm0, %eax
1735 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
1736 ; X86-SSE2-NEXT: retl
1738 ; X86-SSE42-LABEL: test_reduce_v64i8:
1739 ; X86-SSE42: ## %bb.0:
1740 ; X86-SSE42-NEXT: pminub %xmm3, %xmm1
1741 ; X86-SSE42-NEXT: pminub %xmm2, %xmm0
1742 ; X86-SSE42-NEXT: pminub %xmm1, %xmm0
1743 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
1744 ; X86-SSE42-NEXT: psrlw $8, %xmm1
1745 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1
1746 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
1747 ; X86-SSE42-NEXT: movd %xmm0, %eax
1748 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
1749 ; X86-SSE42-NEXT: retl
1751 ; X86-AVX1-LABEL: test_reduce_v64i8:
1752 ; X86-AVX1: ## %bb.0:
1753 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1754 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1755 ; X86-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1756 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1757 ; X86-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1758 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1759 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1760 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1761 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
1762 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
1763 ; X86-AVX1-NEXT: vzeroupper
1764 ; X86-AVX1-NEXT: retl
1766 ; X86-AVX2-LABEL: test_reduce_v64i8:
1767 ; X86-AVX2: ## %bb.0:
1768 ; X86-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1769 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1770 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1771 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1772 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1773 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1774 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
1775 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
1776 ; X86-AVX2-NEXT: vzeroupper
1777 ; X86-AVX2-NEXT: retl
1779 ; X64-SSE2-LABEL: test_reduce_v64i8:
1780 ; X64-SSE2: ## %bb.0:
1781 ; X64-SSE2-NEXT: pminub %xmm3, %xmm1
1782 ; X64-SSE2-NEXT: pminub %xmm2, %xmm0
1783 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
1784 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1785 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
1786 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
1787 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
1788 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1789 ; X64-SSE2-NEXT: psrld $16, %xmm1
1790 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
1791 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
1792 ; X64-SSE2-NEXT: psrlw $8, %xmm0
1793 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
1794 ; X64-SSE2-NEXT: movd %xmm0, %eax
1795 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
1796 ; X64-SSE2-NEXT: retq
1798 ; X64-SSE42-LABEL: test_reduce_v64i8:
1799 ; X64-SSE42: ## %bb.0:
1800 ; X64-SSE42-NEXT: pminub %xmm3, %xmm1
1801 ; X64-SSE42-NEXT: pminub %xmm2, %xmm0
1802 ; X64-SSE42-NEXT: pminub %xmm1, %xmm0
1803 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
1804 ; X64-SSE42-NEXT: psrlw $8, %xmm1
1805 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1
1806 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
1807 ; X64-SSE42-NEXT: movd %xmm0, %eax
1808 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
1809 ; X64-SSE42-NEXT: retq
1811 ; X64-AVX1-LABEL: test_reduce_v64i8:
1812 ; X64-AVX1: ## %bb.0:
1813 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1814 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1815 ; X64-AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
1816 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1817 ; X64-AVX1-NEXT: vpminub %xmm2, %xmm0, %xmm0
1818 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1819 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1820 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1821 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
1822 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
1823 ; X64-AVX1-NEXT: vzeroupper
1824 ; X64-AVX1-NEXT: retq
1826 ; X64-AVX2-LABEL: test_reduce_v64i8:
1827 ; X64-AVX2: ## %bb.0:
1828 ; X64-AVX2-NEXT: vpminub %ymm1, %ymm0, %ymm0
1829 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1830 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1831 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1832 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1833 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1834 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
1835 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
1836 ; X64-AVX2-NEXT: vzeroupper
1837 ; X64-AVX2-NEXT: retq
1839 ; X64-AVX512-LABEL: test_reduce_v64i8:
1840 ; X64-AVX512: ## %bb.0:
1841 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1842 ; X64-AVX512-NEXT: vpminub %ymm1, %ymm0, %ymm0
1843 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1844 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1845 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1846 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1847 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1848 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
1849 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
1850 ; X64-AVX512-NEXT: vzeroupper
1851 ; X64-AVX512-NEXT: retq
1852 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1853 %2 = icmp ult <64 x i8> %a0, %1
1854 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1855 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1856 %5 = icmp ult <64 x i8> %3, %4
1857 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1858 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1859 %8 = icmp ult <64 x i8> %6, %7
1860 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1861 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1862 %11 = icmp ult <64 x i8> %9, %10
1863 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1864 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1865 %14 = icmp ult <64 x i8> %12, %13
1866 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1867 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1868 %17 = icmp ult <64 x i8> %15, %16
1869 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1870 %19 = extractelement <64 x i8> %18, i32 0
1875 ; Partial Vector Reductions
1878 define i16 @test_reduce_v16i16_v8i16(<16 x i16> %a0) {
1879 ; X86-SSE2-LABEL: test_reduce_v16i16_v8i16:
1880 ; X86-SSE2: ## %bb.0:
1881 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1882 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1883 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1884 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1885 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1886 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1887 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1888 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1889 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1890 ; X86-SSE2-NEXT: psrld $16, %xmm1
1891 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1892 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1893 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1894 ; X86-SSE2-NEXT: movd %xmm0, %eax
1895 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
1896 ; X86-SSE2-NEXT: retl
1898 ; X86-SSE42-LABEL: test_reduce_v16i16_v8i16:
1899 ; X86-SSE42: ## %bb.0:
1900 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
1901 ; X86-SSE42-NEXT: movd %xmm0, %eax
1902 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
1903 ; X86-SSE42-NEXT: retl
1905 ; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
1906 ; X86-AVX: ## %bb.0:
1907 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
1908 ; X86-AVX-NEXT: vmovd %xmm0, %eax
1909 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
1910 ; X86-AVX-NEXT: vzeroupper
1911 ; X86-AVX-NEXT: retl
1913 ; X64-SSE2-LABEL: test_reduce_v16i16_v8i16:
1914 ; X64-SSE2: ## %bb.0:
1915 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1916 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1917 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
1918 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
1919 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1920 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1921 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
1922 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
1923 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1924 ; X64-SSE2-NEXT: psrld $16, %xmm1
1925 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1926 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
1927 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
1928 ; X64-SSE2-NEXT: movd %xmm0, %eax
1929 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
1930 ; X64-SSE2-NEXT: retq
1932 ; X64-SSE42-LABEL: test_reduce_v16i16_v8i16:
1933 ; X64-SSE42: ## %bb.0:
1934 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
1935 ; X64-SSE42-NEXT: movd %xmm0, %eax
1936 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
1937 ; X64-SSE42-NEXT: retq
1939 ; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
1940 ; X64-AVX: ## %bb.0:
1941 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
1942 ; X64-AVX-NEXT: vmovd %xmm0, %eax
1943 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
1944 ; X64-AVX-NEXT: vzeroupper
1945 ; X64-AVX-NEXT: retq
1946 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1947 %2 = icmp ult <16 x i16> %a0, %1
1948 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
1949 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1950 %5 = icmp ult <16 x i16> %3, %4
1951 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
1952 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1953 %8 = icmp ult <16 x i16> %6, %7
1954 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
1955 %10 = extractelement <16 x i16> %9, i32 0
1959 define i16 @test_reduce_v32i16_v8i16(<32 x i16> %a0) {
1960 ; X86-SSE2-LABEL: test_reduce_v32i16_v8i16:
1961 ; X86-SSE2: ## %bb.0:
1962 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1963 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1964 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1965 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1966 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
1967 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1968 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1969 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1970 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1971 ; X86-SSE2-NEXT: psrld $16, %xmm1
1972 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1973 ; X86-SSE2-NEXT: psubusw %xmm1, %xmm2
1974 ; X86-SSE2-NEXT: psubw %xmm2, %xmm0
1975 ; X86-SSE2-NEXT: movd %xmm0, %eax
1976 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
1977 ; X86-SSE2-NEXT: retl
1979 ; X86-SSE42-LABEL: test_reduce_v32i16_v8i16:
1980 ; X86-SSE42: ## %bb.0:
1981 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
1982 ; X86-SSE42-NEXT: movd %xmm0, %eax
1983 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
1984 ; X86-SSE42-NEXT: retl
1986 ; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
1987 ; X86-AVX: ## %bb.0:
1988 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
1989 ; X86-AVX-NEXT: vmovd %xmm0, %eax
1990 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
1991 ; X86-AVX-NEXT: vzeroupper
1992 ; X86-AVX-NEXT: retl
1994 ; X64-SSE2-LABEL: test_reduce_v32i16_v8i16:
1995 ; X64-SSE2: ## %bb.0:
1996 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
1997 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1998 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
1999 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
2000 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
2001 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
2002 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
2003 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
2004 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
2005 ; X64-SSE2-NEXT: psrld $16, %xmm1
2006 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
2007 ; X64-SSE2-NEXT: psubusw %xmm1, %xmm2
2008 ; X64-SSE2-NEXT: psubw %xmm2, %xmm0
2009 ; X64-SSE2-NEXT: movd %xmm0, %eax
2010 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
2011 ; X64-SSE2-NEXT: retq
2013 ; X64-SSE42-LABEL: test_reduce_v32i16_v8i16:
2014 ; X64-SSE42: ## %bb.0:
2015 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
2016 ; X64-SSE42-NEXT: movd %xmm0, %eax
2017 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
2018 ; X64-SSE42-NEXT: retq
2020 ; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
2021 ; X64-AVX: ## %bb.0:
2022 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
2023 ; X64-AVX-NEXT: vmovd %xmm0, %eax
2024 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
2025 ; X64-AVX-NEXT: vzeroupper
2026 ; X64-AVX-NEXT: retq
2027 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2028 %2 = icmp ult <32 x i16> %a0, %1
2029 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
2030 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2031 %5 = icmp ult <32 x i16> %3, %4
2032 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
2033 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2034 %8 = icmp ult <32 x i16> %6, %7
2035 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
2036 %10 = extractelement <32 x i16> %9, i32 0
2040 define i8 @test_reduce_v32i8_v16i8(<32 x i8> %a0) {
2041 ; X86-SSE2-LABEL: test_reduce_v32i8_v16i8:
2042 ; X86-SSE2: ## %bb.0:
2043 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2044 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
2045 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2046 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
2047 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
2048 ; X86-SSE2-NEXT: psrld $16, %xmm1
2049 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
2050 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
2051 ; X86-SSE2-NEXT: psrlw $8, %xmm0
2052 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
2053 ; X86-SSE2-NEXT: movd %xmm0, %eax
2054 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
2055 ; X86-SSE2-NEXT: retl
2057 ; X86-SSE42-LABEL: test_reduce_v32i8_v16i8:
2058 ; X86-SSE42: ## %bb.0:
2059 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
2060 ; X86-SSE42-NEXT: psrlw $8, %xmm1
2061 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1
2062 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
2063 ; X86-SSE42-NEXT: movd %xmm0, %eax
2064 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
2065 ; X86-SSE42-NEXT: retl
2067 ; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
2068 ; X86-AVX: ## %bb.0:
2069 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
2070 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
2071 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
2072 ; X86-AVX-NEXT: vmovd %xmm0, %eax
2073 ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
2074 ; X86-AVX-NEXT: vzeroupper
2075 ; X86-AVX-NEXT: retl
2077 ; X64-SSE2-LABEL: test_reduce_v32i8_v16i8:
2078 ; X64-SSE2: ## %bb.0:
2079 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2080 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
2081 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2082 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
2083 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
2084 ; X64-SSE2-NEXT: psrld $16, %xmm1
2085 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
2086 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
2087 ; X64-SSE2-NEXT: psrlw $8, %xmm0
2088 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
2089 ; X64-SSE2-NEXT: movd %xmm0, %eax
2090 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
2091 ; X64-SSE2-NEXT: retq
2093 ; X64-SSE42-LABEL: test_reduce_v32i8_v16i8:
2094 ; X64-SSE42: ## %bb.0:
2095 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
2096 ; X64-SSE42-NEXT: psrlw $8, %xmm1
2097 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1
2098 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
2099 ; X64-SSE42-NEXT: movd %xmm0, %eax
2100 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
2101 ; X64-SSE42-NEXT: retq
2103 ; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
2104 ; X64-AVX: ## %bb.0:
2105 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
2106 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
2107 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
2108 ; X64-AVX-NEXT: vmovd %xmm0, %eax
2109 ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
2110 ; X64-AVX-NEXT: vzeroupper
2111 ; X64-AVX-NEXT: retq
2112 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2113 %2 = icmp ult <32 x i8> %a0, %1
2114 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
2115 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2116 %5 = icmp ult <32 x i8> %3, %4
2117 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
2118 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2119 %8 = icmp ult <32 x i8> %6, %7
2120 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
2121 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2122 %11 = icmp ult <32 x i8> %9, %10
2123 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
2124 %13 = extractelement <32 x i8> %12, i32 0
2128 define i8 @test_reduce_v64i8_v16i8(<64 x i8> %a0) {
2129 ; X86-SSE2-LABEL: test_reduce_v64i8_v16i8:
2130 ; X86-SSE2: ## %bb.0:
2131 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2132 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
2133 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2134 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
2135 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
2136 ; X86-SSE2-NEXT: psrld $16, %xmm1
2137 ; X86-SSE2-NEXT: pminub %xmm0, %xmm1
2138 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
2139 ; X86-SSE2-NEXT: psrlw $8, %xmm0
2140 ; X86-SSE2-NEXT: pminub %xmm1, %xmm0
2141 ; X86-SSE2-NEXT: movd %xmm0, %eax
2142 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
2143 ; X86-SSE2-NEXT: retl
2145 ; X86-SSE42-LABEL: test_reduce_v64i8_v16i8:
2146 ; X86-SSE42: ## %bb.0:
2147 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
2148 ; X86-SSE42-NEXT: psrlw $8, %xmm1
2149 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1
2150 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
2151 ; X86-SSE42-NEXT: movd %xmm0, %eax
2152 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
2153 ; X86-SSE42-NEXT: retl
2155 ; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
2156 ; X86-AVX: ## %bb.0:
2157 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
2158 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
2159 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
2160 ; X86-AVX-NEXT: vmovd %xmm0, %eax
2161 ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
2162 ; X86-AVX-NEXT: vzeroupper
2163 ; X86-AVX-NEXT: retl
2165 ; X64-SSE2-LABEL: test_reduce_v64i8_v16i8:
2166 ; X64-SSE2: ## %bb.0:
2167 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
2168 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
2169 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
2170 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
2171 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
2172 ; X64-SSE2-NEXT: psrld $16, %xmm1
2173 ; X64-SSE2-NEXT: pminub %xmm0, %xmm1
2174 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
2175 ; X64-SSE2-NEXT: psrlw $8, %xmm0
2176 ; X64-SSE2-NEXT: pminub %xmm1, %xmm0
2177 ; X64-SSE2-NEXT: movd %xmm0, %eax
2178 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
2179 ; X64-SSE2-NEXT: retq
2181 ; X64-SSE42-LABEL: test_reduce_v64i8_v16i8:
2182 ; X64-SSE42: ## %bb.0:
2183 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
2184 ; X64-SSE42-NEXT: psrlw $8, %xmm1
2185 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1
2186 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
2187 ; X64-SSE42-NEXT: movd %xmm0, %eax
2188 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
2189 ; X64-SSE42-NEXT: retq
2191 ; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
2192 ; X64-AVX: ## %bb.0:
2193 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
2194 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
2195 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
2196 ; X64-AVX-NEXT: vmovd %xmm0, %eax
2197 ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
2198 ; X64-AVX-NEXT: vzeroupper
2199 ; X64-AVX-NEXT: retq
2200 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2201 %2 = icmp ult <64 x i8> %a0, %1
2202 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
2203 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2204 %5 = icmp ult <64 x i8> %3, %4
2205 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
2206 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2207 %8 = icmp ult <64 x i8> %6, %7
2208 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
2209 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
2210 %11 = icmp ult <64 x i8> %9, %10
2211 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
2212 %13 = extractelement <64 x i8> %12, i32 0