1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE2
3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-SSE --check-prefix=X86-SSE42
4 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX1
5 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX --check-prefix=X86-AVX2
6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE2
7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-SSE --check-prefix=X64-SSE42
8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX1
9 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX2
10 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX --check-prefix=X64-AVX512
16 define i64 @test_reduce_v2i64(<2 x i64> %a0) {
17 ; X86-SSE2-LABEL: test_reduce_v2i64:
19 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
20 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
21 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
22 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3
23 ; X86-SSE2-NEXT: pxor %xmm1, %xmm2
24 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
25 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
26 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
27 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
28 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
29 ; X86-SSE2-NEXT: pand %xmm5, %xmm2
30 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
31 ; X86-SSE2-NEXT: por %xmm2, %xmm3
32 ; X86-SSE2-NEXT: pand %xmm3, %xmm0
33 ; X86-SSE2-NEXT: pandn %xmm1, %xmm3
34 ; X86-SSE2-NEXT: por %xmm0, %xmm3
35 ; X86-SSE2-NEXT: movd %xmm3, %eax
36 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,2,3]
37 ; X86-SSE2-NEXT: movd %xmm0, %edx
40 ; X86-SSE42-LABEL: test_reduce_v2i64:
41 ; X86-SSE42: ## %bb.0:
42 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
43 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
44 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
45 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
46 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
47 ; X86-SSE42-NEXT: movd %xmm2, %eax
48 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
49 ; X86-SSE42-NEXT: retl
51 ; X86-AVX-LABEL: test_reduce_v2i64:
53 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
54 ; X86-AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
55 ; X86-AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
56 ; X86-AVX-NEXT: vmovd %xmm0, %eax
57 ; X86-AVX-NEXT: vpextrd $1, %xmm0, %edx
60 ; X64-SSE2-LABEL: test_reduce_v2i64:
62 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
63 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
64 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
65 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3
66 ; X64-SSE2-NEXT: pxor %xmm1, %xmm2
67 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm4
68 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm4
69 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
70 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2
71 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
72 ; X64-SSE2-NEXT: pand %xmm5, %xmm2
73 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
74 ; X64-SSE2-NEXT: por %xmm2, %xmm3
75 ; X64-SSE2-NEXT: pand %xmm3, %xmm0
76 ; X64-SSE2-NEXT: pandn %xmm1, %xmm3
77 ; X64-SSE2-NEXT: por %xmm0, %xmm3
78 ; X64-SSE2-NEXT: movq %xmm3, %rax
81 ; X64-SSE42-LABEL: test_reduce_v2i64:
82 ; X64-SSE42: ## %bb.0:
83 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
84 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1]
85 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
86 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
87 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
88 ; X64-SSE42-NEXT: movq %xmm2, %rax
89 ; X64-SSE42-NEXT: retq
91 ; X64-AVX1-LABEL: test_reduce_v2i64:
93 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
94 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
95 ; X64-AVX1-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
96 ; X64-AVX1-NEXT: vmovq %xmm0, %rax
99 ; X64-AVX2-LABEL: test_reduce_v2i64:
100 ; X64-AVX2: ## %bb.0:
101 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
102 ; X64-AVX2-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
103 ; X64-AVX2-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
104 ; X64-AVX2-NEXT: vmovq %xmm0, %rax
105 ; X64-AVX2-NEXT: retq
107 ; X64-AVX512-LABEL: test_reduce_v2i64:
108 ; X64-AVX512: ## %bb.0:
109 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
110 ; X64-AVX512-NEXT: vpminsq %xmm1, %xmm0, %xmm0
111 ; X64-AVX512-NEXT: vmovq %xmm0, %rax
112 ; X64-AVX512-NEXT: retq
113 %1 = shufflevector <2 x i64> %a0, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
114 %2 = icmp slt <2 x i64> %a0, %1
115 %3 = select <2 x i1> %2, <2 x i64> %a0, <2 x i64> %1
116 %4 = extractelement <2 x i64> %3, i32 0
120 define i32 @test_reduce_v4i32(<4 x i32> %a0) {
121 ; X86-SSE2-LABEL: test_reduce_v4i32:
122 ; X86-SSE2: ## %bb.0:
123 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
124 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
125 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
126 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
127 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2
128 ; X86-SSE2-NEXT: por %xmm0, %xmm2
129 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
130 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
131 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
132 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
133 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
134 ; X86-SSE2-NEXT: por %xmm2, %xmm1
135 ; X86-SSE2-NEXT: movd %xmm1, %eax
136 ; X86-SSE2-NEXT: retl
138 ; X86-SSE42-LABEL: test_reduce_v4i32:
139 ; X86-SSE42: ## %bb.0:
140 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
141 ; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
142 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
143 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
144 ; X86-SSE42-NEXT: movd %xmm0, %eax
145 ; X86-SSE42-NEXT: retl
147 ; X86-AVX-LABEL: test_reduce_v4i32:
149 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
150 ; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
151 ; X86-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
152 ; X86-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
153 ; X86-AVX-NEXT: vmovd %xmm0, %eax
156 ; X64-SSE2-LABEL: test_reduce_v4i32:
157 ; X64-SSE2: ## %bb.0:
158 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
159 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
160 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
161 ; X64-SSE2-NEXT: pand %xmm2, %xmm0
162 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2
163 ; X64-SSE2-NEXT: por %xmm0, %xmm2
164 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
165 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
166 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
167 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
168 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
169 ; X64-SSE2-NEXT: por %xmm2, %xmm1
170 ; X64-SSE2-NEXT: movd %xmm1, %eax
171 ; X64-SSE2-NEXT: retq
173 ; X64-SSE42-LABEL: test_reduce_v4i32:
174 ; X64-SSE42: ## %bb.0:
175 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
176 ; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
177 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
178 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
179 ; X64-SSE42-NEXT: movd %xmm0, %eax
180 ; X64-SSE42-NEXT: retq
182 ; X64-AVX-LABEL: test_reduce_v4i32:
184 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
185 ; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
186 ; X64-AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
187 ; X64-AVX-NEXT: vpminsd %xmm1, %xmm0, %xmm0
188 ; X64-AVX-NEXT: vmovd %xmm0, %eax
190 %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
191 %2 = icmp slt <4 x i32> %a0, %1
192 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %1
193 %4 = shufflevector <4 x i32> %3, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
194 %5 = icmp slt <4 x i32> %3, %4
195 %6 = select <4 x i1> %5, <4 x i32> %3, <4 x i32> %4
196 %7 = extractelement <4 x i32> %6, i32 0
200 define i16 @test_reduce_v8i16(<8 x i16> %a0) {
201 ; X86-SSE2-LABEL: test_reduce_v8i16:
202 ; X86-SSE2: ## %bb.0:
203 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
204 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
205 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
206 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
207 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
208 ; X86-SSE2-NEXT: psrld $16, %xmm1
209 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
210 ; X86-SSE2-NEXT: movd %xmm1, %eax
211 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
212 ; X86-SSE2-NEXT: retl
214 ; X86-SSE42-LABEL: test_reduce_v8i16:
215 ; X86-SSE42: ## %bb.0:
216 ; X86-SSE42-NEXT: pxor LCPI2_0, %xmm0
217 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
218 ; X86-SSE42-NEXT: movd %xmm0, %eax
219 ; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
220 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
221 ; X86-SSE42-NEXT: retl
223 ; X86-AVX-LABEL: test_reduce_v8i16:
225 ; X86-AVX-NEXT: vpxor LCPI2_0, %xmm0, %xmm0
226 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
227 ; X86-AVX-NEXT: vmovd %xmm0, %eax
228 ; X86-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
229 ; X86-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
232 ; X64-SSE2-LABEL: test_reduce_v8i16:
233 ; X64-SSE2: ## %bb.0:
234 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
235 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
236 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
237 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
238 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
239 ; X64-SSE2-NEXT: psrld $16, %xmm1
240 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
241 ; X64-SSE2-NEXT: movd %xmm1, %eax
242 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
243 ; X64-SSE2-NEXT: retq
245 ; X64-SSE42-LABEL: test_reduce_v8i16:
246 ; X64-SSE42: ## %bb.0:
247 ; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
248 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
249 ; X64-SSE42-NEXT: movd %xmm0, %eax
250 ; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
251 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
252 ; X64-SSE42-NEXT: retq
254 ; X64-AVX-LABEL: test_reduce_v8i16:
256 ; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
257 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
258 ; X64-AVX-NEXT: vmovd %xmm0, %eax
259 ; X64-AVX-NEXT: xorl $32768, %eax ## imm = 0x8000
260 ; X64-AVX-NEXT: ## kill: def $ax killed $ax killed $eax
262 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
263 %2 = icmp slt <8 x i16> %a0, %1
264 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %1
265 %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
266 %5 = icmp slt <8 x i16> %3, %4
267 %6 = select <8 x i1> %5, <8 x i16> %3, <8 x i16> %4
268 %7 = shufflevector <8 x i16> %6, <8 x i16> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
269 %8 = icmp slt <8 x i16> %6, %7
270 %9 = select <8 x i1> %8, <8 x i16> %6, <8 x i16> %7
271 %10 = extractelement <8 x i16> %9, i32 0
275 define i8 @test_reduce_v16i8(<16 x i8> %a0) {
276 ; X86-SSE2-LABEL: test_reduce_v16i8:
277 ; X86-SSE2: ## %bb.0:
278 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
279 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
280 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
281 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
282 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2
283 ; X86-SSE2-NEXT: por %xmm0, %xmm2
284 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
285 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
286 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
287 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
288 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
289 ; X86-SSE2-NEXT: por %xmm2, %xmm1
290 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
291 ; X86-SSE2-NEXT: psrld $16, %xmm0
292 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
293 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
294 ; X86-SSE2-NEXT: pand %xmm2, %xmm1
295 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
296 ; X86-SSE2-NEXT: por %xmm1, %xmm2
297 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
298 ; X86-SSE2-NEXT: psrlw $8, %xmm0
299 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
300 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
301 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
302 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
303 ; X86-SSE2-NEXT: por %xmm2, %xmm1
304 ; X86-SSE2-NEXT: movd %xmm1, %eax
305 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
306 ; X86-SSE2-NEXT: retl
308 ; X86-SSE42-LABEL: test_reduce_v16i8:
309 ; X86-SSE42: ## %bb.0:
310 ; X86-SSE42-NEXT: pxor LCPI3_0, %xmm0
311 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
312 ; X86-SSE42-NEXT: psrlw $8, %xmm1
313 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1
314 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
315 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
316 ; X86-SSE42-NEXT: xorb $-128, %al
317 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
318 ; X86-SSE42-NEXT: retl
320 ; X86-AVX-LABEL: test_reduce_v16i8:
322 ; X86-AVX-NEXT: vpxor LCPI3_0, %xmm0, %xmm0
323 ; X86-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
324 ; X86-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
325 ; X86-AVX-NEXT: vphminposuw %xmm0, %xmm0
326 ; X86-AVX-NEXT: vpextrb $0, %xmm0, %eax
327 ; X86-AVX-NEXT: xorb $-128, %al
328 ; X86-AVX-NEXT: ## kill: def $al killed $al killed $eax
331 ; X64-SSE2-LABEL: test_reduce_v16i8:
332 ; X64-SSE2: ## %bb.0:
333 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
334 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
335 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
336 ; X64-SSE2-NEXT: pand %xmm2, %xmm0
337 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2
338 ; X64-SSE2-NEXT: por %xmm0, %xmm2
339 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
340 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
341 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
342 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
343 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
344 ; X64-SSE2-NEXT: por %xmm2, %xmm1
345 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
346 ; X64-SSE2-NEXT: psrld $16, %xmm0
347 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
348 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
349 ; X64-SSE2-NEXT: pand %xmm2, %xmm1
350 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
351 ; X64-SSE2-NEXT: por %xmm1, %xmm2
352 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
353 ; X64-SSE2-NEXT: psrlw $8, %xmm0
354 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
355 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
356 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
357 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
358 ; X64-SSE2-NEXT: por %xmm2, %xmm1
359 ; X64-SSE2-NEXT: movd %xmm1, %eax
360 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
361 ; X64-SSE2-NEXT: retq
363 ; X64-SSE42-LABEL: test_reduce_v16i8:
364 ; X64-SSE42: ## %bb.0:
365 ; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
366 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
367 ; X64-SSE42-NEXT: psrlw $8, %xmm1
368 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1
369 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
370 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
371 ; X64-SSE42-NEXT: xorb $-128, %al
372 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
373 ; X64-SSE42-NEXT: retq
375 ; X64-AVX-LABEL: test_reduce_v16i8:
377 ; X64-AVX-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
378 ; X64-AVX-NEXT: vpsrlw $8, %xmm0, %xmm1
379 ; X64-AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0
380 ; X64-AVX-NEXT: vphminposuw %xmm0, %xmm0
381 ; X64-AVX-NEXT: vpextrb $0, %xmm0, %eax
382 ; X64-AVX-NEXT: xorb $-128, %al
383 ; X64-AVX-NEXT: ## kill: def $al killed $al killed $eax
385 %1 = shufflevector <16 x i8> %a0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
386 %2 = icmp slt <16 x i8> %a0, %1
387 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %1
388 %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
389 %5 = icmp slt <16 x i8> %3, %4
390 %6 = select <16 x i1> %5, <16 x i8> %3, <16 x i8> %4
391 %7 = shufflevector <16 x i8> %6, <16 x i8> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
392 %8 = icmp slt <16 x i8> %6, %7
393 %9 = select <16 x i1> %8, <16 x i8> %6, <16 x i8> %7
394 %10 = shufflevector <16 x i8> %9, <16 x i8> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
395 %11 = icmp slt <16 x i8> %9, %10
396 %12 = select <16 x i1> %11, <16 x i8> %9, <16 x i8> %10
397 %13 = extractelement <16 x i8> %12, i32 0
405 define i64 @test_reduce_v4i64(<4 x i64> %a0) {
406 ; X86-SSE2-LABEL: test_reduce_v4i64:
407 ; X86-SSE2: ## %bb.0:
408 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,0,2147483648,0]
409 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm3
410 ; X86-SSE2-NEXT: pxor %xmm2, %xmm3
411 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm4
412 ; X86-SSE2-NEXT: pxor %xmm2, %xmm4
413 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm5
414 ; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
415 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
416 ; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
417 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
418 ; X86-SSE2-NEXT: pand %xmm6, %xmm3
419 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
420 ; X86-SSE2-NEXT: por %xmm3, %xmm4
421 ; X86-SSE2-NEXT: pand %xmm4, %xmm0
422 ; X86-SSE2-NEXT: pandn %xmm1, %xmm4
423 ; X86-SSE2-NEXT: por %xmm0, %xmm4
424 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
425 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm1
426 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1
427 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2
428 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
429 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
430 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
431 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
432 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
433 ; X86-SSE2-NEXT: pand %xmm5, %xmm1
434 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
435 ; X86-SSE2-NEXT: por %xmm1, %xmm2
436 ; X86-SSE2-NEXT: pand %xmm2, %xmm4
437 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
438 ; X86-SSE2-NEXT: por %xmm4, %xmm2
439 ; X86-SSE2-NEXT: movd %xmm2, %eax
440 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
441 ; X86-SSE2-NEXT: movd %xmm0, %edx
442 ; X86-SSE2-NEXT: retl
444 ; X86-SSE42-LABEL: test_reduce_v4i64:
445 ; X86-SSE42: ## %bb.0:
446 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2
447 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
448 ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
449 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
450 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
451 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
452 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
453 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
454 ; X86-SSE42-NEXT: movd %xmm2, %eax
455 ; X86-SSE42-NEXT: pextrd $1, %xmm2, %edx
456 ; X86-SSE42-NEXT: retl
458 ; X86-AVX1-LABEL: test_reduce_v4i64:
459 ; X86-AVX1: ## %bb.0:
460 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
461 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
462 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
463 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
464 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
465 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
466 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
467 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
468 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
469 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
470 ; X86-AVX1-NEXT: vzeroupper
471 ; X86-AVX1-NEXT: retl
473 ; X86-AVX2-LABEL: test_reduce_v4i64:
474 ; X86-AVX2: ## %bb.0:
475 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
476 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
477 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
478 ; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
479 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
480 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
481 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
482 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
483 ; X86-AVX2-NEXT: vzeroupper
484 ; X86-AVX2-NEXT: retl
486 ; X64-SSE2-LABEL: test_reduce_v4i64:
487 ; X64-SSE2: ## %bb.0:
488 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648]
489 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm3
490 ; X64-SSE2-NEXT: pxor %xmm2, %xmm3
491 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm4
492 ; X64-SSE2-NEXT: pxor %xmm2, %xmm4
493 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm5
494 ; X64-SSE2-NEXT: pcmpgtd %xmm3, %xmm5
495 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
496 ; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm4
497 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
498 ; X64-SSE2-NEXT: pand %xmm6, %xmm3
499 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
500 ; X64-SSE2-NEXT: por %xmm3, %xmm4
501 ; X64-SSE2-NEXT: pand %xmm4, %xmm0
502 ; X64-SSE2-NEXT: pandn %xmm1, %xmm4
503 ; X64-SSE2-NEXT: por %xmm0, %xmm4
504 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm4[2,3,0,1]
505 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm1
506 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1
507 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2
508 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
509 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm3
510 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
511 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm2
512 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
513 ; X64-SSE2-NEXT: pand %xmm5, %xmm1
514 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
515 ; X64-SSE2-NEXT: por %xmm1, %xmm2
516 ; X64-SSE2-NEXT: pand %xmm2, %xmm4
517 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
518 ; X64-SSE2-NEXT: por %xmm4, %xmm2
519 ; X64-SSE2-NEXT: movq %xmm2, %rax
520 ; X64-SSE2-NEXT: retq
522 ; X64-SSE42-LABEL: test_reduce_v4i64:
523 ; X64-SSE42: ## %bb.0:
524 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2
525 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
526 ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
527 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm1
528 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1]
529 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
530 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
531 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2
532 ; X64-SSE42-NEXT: movq %xmm2, %rax
533 ; X64-SSE42-NEXT: retq
535 ; X64-AVX1-LABEL: test_reduce_v4i64:
536 ; X64-AVX1: ## %bb.0:
537 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
538 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm2
539 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
540 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
541 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
542 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
543 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
544 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
545 ; X64-AVX1-NEXT: vmovq %xmm0, %rax
546 ; X64-AVX1-NEXT: vzeroupper
547 ; X64-AVX1-NEXT: retq
549 ; X64-AVX2-LABEL: test_reduce_v4i64:
550 ; X64-AVX2: ## %bb.0:
551 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
552 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
553 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
554 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
555 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
556 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
557 ; X64-AVX2-NEXT: vmovq %xmm0, %rax
558 ; X64-AVX2-NEXT: vzeroupper
559 ; X64-AVX2-NEXT: retq
561 ; X64-AVX512-LABEL: test_reduce_v4i64:
562 ; X64-AVX512: ## %bb.0:
563 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
564 ; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
565 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
566 ; X64-AVX512-NEXT: vpminsq %ymm1, %ymm0, %ymm0
567 ; X64-AVX512-NEXT: vmovq %xmm0, %rax
568 ; X64-AVX512-NEXT: vzeroupper
569 ; X64-AVX512-NEXT: retq
570 %1 = shufflevector <4 x i64> %a0, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
571 %2 = icmp slt <4 x i64> %a0, %1
572 %3 = select <4 x i1> %2, <4 x i64> %a0, <4 x i64> %1
573 %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
574 %5 = icmp slt <4 x i64> %3, %4
575 %6 = select <4 x i1> %5, <4 x i64> %3, <4 x i64> %4
576 %7 = extractelement <4 x i64> %6, i32 0
580 define i32 @test_reduce_v8i32(<8 x i32> %a0) {
581 ; X86-SSE2-LABEL: test_reduce_v8i32:
582 ; X86-SSE2: ## %bb.0:
583 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
584 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
585 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
586 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2
587 ; X86-SSE2-NEXT: por %xmm0, %xmm2
588 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
589 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
590 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
591 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
592 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
593 ; X86-SSE2-NEXT: por %xmm2, %xmm1
594 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
595 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
596 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
597 ; X86-SSE2-NEXT: pand %xmm2, %xmm1
598 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
599 ; X86-SSE2-NEXT: por %xmm1, %xmm2
600 ; X86-SSE2-NEXT: movd %xmm2, %eax
601 ; X86-SSE2-NEXT: retl
603 ; X86-SSE42-LABEL: test_reduce_v8i32:
604 ; X86-SSE42: ## %bb.0:
605 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
606 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
607 ; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
608 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
609 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
610 ; X86-SSE42-NEXT: movd %xmm0, %eax
611 ; X86-SSE42-NEXT: retl
613 ; X86-AVX1-LABEL: test_reduce_v8i32:
614 ; X86-AVX1: ## %bb.0:
615 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
616 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
617 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
618 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
619 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
620 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
621 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
622 ; X86-AVX1-NEXT: vzeroupper
623 ; X86-AVX1-NEXT: retl
625 ; X86-AVX2-LABEL: test_reduce_v8i32:
626 ; X86-AVX2: ## %bb.0:
627 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
628 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
629 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
630 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
631 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
632 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
633 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
634 ; X86-AVX2-NEXT: vzeroupper
635 ; X86-AVX2-NEXT: retl
637 ; X64-SSE2-LABEL: test_reduce_v8i32:
638 ; X64-SSE2: ## %bb.0:
639 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
640 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
641 ; X64-SSE2-NEXT: pand %xmm2, %xmm0
642 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2
643 ; X64-SSE2-NEXT: por %xmm0, %xmm2
644 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
645 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
646 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
647 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
648 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
649 ; X64-SSE2-NEXT: por %xmm2, %xmm1
650 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
651 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
652 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
653 ; X64-SSE2-NEXT: pand %xmm2, %xmm1
654 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
655 ; X64-SSE2-NEXT: por %xmm1, %xmm2
656 ; X64-SSE2-NEXT: movd %xmm2, %eax
657 ; X64-SSE2-NEXT: retq
659 ; X64-SSE42-LABEL: test_reduce_v8i32:
660 ; X64-SSE42: ## %bb.0:
661 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
662 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
663 ; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
664 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
665 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
666 ; X64-SSE42-NEXT: movd %xmm0, %eax
667 ; X64-SSE42-NEXT: retq
669 ; X64-AVX1-LABEL: test_reduce_v8i32:
670 ; X64-AVX1: ## %bb.0:
671 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
672 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
673 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
674 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
675 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
676 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
677 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
678 ; X64-AVX1-NEXT: vzeroupper
679 ; X64-AVX1-NEXT: retq
681 ; X64-AVX2-LABEL: test_reduce_v8i32:
682 ; X64-AVX2: ## %bb.0:
683 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
684 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
685 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
686 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
687 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
688 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
689 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
690 ; X64-AVX2-NEXT: vzeroupper
691 ; X64-AVX2-NEXT: retq
693 ; X64-AVX512-LABEL: test_reduce_v8i32:
694 ; X64-AVX512: ## %bb.0:
695 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
696 ; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
697 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
698 ; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
699 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
700 ; X64-AVX512-NEXT: vpminsd %ymm1, %ymm0, %ymm0
701 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
702 ; X64-AVX512-NEXT: vzeroupper
703 ; X64-AVX512-NEXT: retq
704 %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
705 %2 = icmp slt <8 x i32> %a0, %1
706 %3 = select <8 x i1> %2, <8 x i32> %a0, <8 x i32> %1
707 %4 = shufflevector <8 x i32> %3, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
708 %5 = icmp slt <8 x i32> %3, %4
709 %6 = select <8 x i1> %5, <8 x i32> %3, <8 x i32> %4
710 %7 = shufflevector <8 x i32> %6, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
711 %8 = icmp slt <8 x i32> %6, %7
712 %9 = select <8 x i1> %8, <8 x i32> %6, <8 x i32> %7
713 %10 = extractelement <8 x i32> %9, i32 0
717 define i16 @test_reduce_v16i16(<16 x i16> %a0) {
718 ; X86-SSE2-LABEL: test_reduce_v16i16:
719 ; X86-SSE2: ## %bb.0:
720 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
721 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
722 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
723 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
724 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
725 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
726 ; X86-SSE2-NEXT: psrld $16, %xmm1
727 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
728 ; X86-SSE2-NEXT: movd %xmm1, %eax
729 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
730 ; X86-SSE2-NEXT: retl
732 ; X86-SSE42-LABEL: test_reduce_v16i16:
733 ; X86-SSE42: ## %bb.0:
734 ; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
735 ; X86-SSE42-NEXT: pxor LCPI6_0, %xmm0
736 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
737 ; X86-SSE42-NEXT: movd %xmm0, %eax
738 ; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
739 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
740 ; X86-SSE42-NEXT: retl
742 ; X86-AVX1-LABEL: test_reduce_v16i16:
743 ; X86-AVX1: ## %bb.0:
744 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
745 ; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
746 ; X86-AVX1-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
747 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
748 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
749 ; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
750 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
751 ; X86-AVX1-NEXT: vzeroupper
752 ; X86-AVX1-NEXT: retl
754 ; X86-AVX2-LABEL: test_reduce_v16i16:
755 ; X86-AVX2: ## %bb.0:
756 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
757 ; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
758 ; X86-AVX2-NEXT: vpxor LCPI6_0, %xmm0, %xmm0
759 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
760 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
761 ; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
762 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
763 ; X86-AVX2-NEXT: vzeroupper
764 ; X86-AVX2-NEXT: retl
766 ; X64-SSE2-LABEL: test_reduce_v16i16:
767 ; X64-SSE2: ## %bb.0:
768 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
769 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
770 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
771 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
772 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
773 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
774 ; X64-SSE2-NEXT: psrld $16, %xmm1
775 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
776 ; X64-SSE2-NEXT: movd %xmm1, %eax
777 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
778 ; X64-SSE2-NEXT: retq
780 ; X64-SSE42-LABEL: test_reduce_v16i16:
781 ; X64-SSE42: ## %bb.0:
782 ; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
783 ; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
784 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
785 ; X64-SSE42-NEXT: movd %xmm0, %eax
786 ; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
787 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
788 ; X64-SSE42-NEXT: retq
790 ; X64-AVX1-LABEL: test_reduce_v16i16:
791 ; X64-AVX1: ## %bb.0:
792 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
793 ; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
794 ; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
795 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
796 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
797 ; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
798 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
799 ; X64-AVX1-NEXT: vzeroupper
800 ; X64-AVX1-NEXT: retq
802 ; X64-AVX2-LABEL: test_reduce_v16i16:
803 ; X64-AVX2: ## %bb.0:
804 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
805 ; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
806 ; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
807 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
808 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
809 ; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
810 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
811 ; X64-AVX2-NEXT: vzeroupper
812 ; X64-AVX2-NEXT: retq
814 ; X64-AVX512-LABEL: test_reduce_v16i16:
815 ; X64-AVX512: ## %bb.0:
816 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
817 ; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
818 ; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
819 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
820 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
821 ; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
822 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
823 ; X64-AVX512-NEXT: vzeroupper
824 ; X64-AVX512-NEXT: retq
825 %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
826 %2 = icmp slt <16 x i16> %a0, %1
827 %3 = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
828 %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
829 %5 = icmp slt <16 x i16> %3, %4
830 %6 = select <16 x i1> %5, <16 x i16> %3, <16 x i16> %4
831 %7 = shufflevector <16 x i16> %6, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
832 %8 = icmp slt <16 x i16> %6, %7
833 %9 = select <16 x i1> %8, <16 x i16> %6, <16 x i16> %7
834 %10 = shufflevector <16 x i16> %9, <16 x i16> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
835 %11 = icmp slt <16 x i16> %9, %10
836 %12 = select <16 x i1> %11, <16 x i16> %9, <16 x i16> %10
837 %13 = extractelement <16 x i16> %12, i32 0
841 define i8 @test_reduce_v32i8(<32 x i8> %a0) {
842 ; X86-SSE2-LABEL: test_reduce_v32i8:
843 ; X86-SSE2: ## %bb.0:
844 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm2
845 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
846 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
847 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2
848 ; X86-SSE2-NEXT: por %xmm0, %xmm2
849 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
850 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
851 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
852 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
853 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
854 ; X86-SSE2-NEXT: por %xmm2, %xmm1
855 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
856 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
857 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
858 ; X86-SSE2-NEXT: pand %xmm2, %xmm1
859 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
860 ; X86-SSE2-NEXT: por %xmm1, %xmm2
861 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
862 ; X86-SSE2-NEXT: psrld $16, %xmm0
863 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
864 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
865 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
866 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
867 ; X86-SSE2-NEXT: por %xmm2, %xmm1
868 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
869 ; X86-SSE2-NEXT: psrlw $8, %xmm0
870 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
871 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
872 ; X86-SSE2-NEXT: pand %xmm2, %xmm1
873 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
874 ; X86-SSE2-NEXT: por %xmm1, %xmm2
875 ; X86-SSE2-NEXT: movd %xmm2, %eax
876 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
877 ; X86-SSE2-NEXT: retl
879 ; X86-SSE42-LABEL: test_reduce_v32i8:
880 ; X86-SSE42: ## %bb.0:
881 ; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
882 ; X86-SSE42-NEXT: pxor LCPI7_0, %xmm0
883 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
884 ; X86-SSE42-NEXT: psrlw $8, %xmm1
885 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1
886 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
887 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
888 ; X86-SSE42-NEXT: xorb $-128, %al
889 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
890 ; X86-SSE42-NEXT: retl
892 ; X86-AVX1-LABEL: test_reduce_v32i8:
893 ; X86-AVX1: ## %bb.0:
894 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
895 ; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
896 ; X86-AVX1-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
897 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
898 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
899 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
900 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
901 ; X86-AVX1-NEXT: xorb $-128, %al
902 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
903 ; X86-AVX1-NEXT: vzeroupper
904 ; X86-AVX1-NEXT: retl
906 ; X86-AVX2-LABEL: test_reduce_v32i8:
907 ; X86-AVX2: ## %bb.0:
908 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
909 ; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
910 ; X86-AVX2-NEXT: vpxor LCPI7_0, %xmm0, %xmm0
911 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
912 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
913 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
914 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
915 ; X86-AVX2-NEXT: xorb $-128, %al
916 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
917 ; X86-AVX2-NEXT: vzeroupper
918 ; X86-AVX2-NEXT: retl
920 ; X64-SSE2-LABEL: test_reduce_v32i8:
921 ; X64-SSE2: ## %bb.0:
922 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
923 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm2
924 ; X64-SSE2-NEXT: pand %xmm2, %xmm0
925 ; X64-SSE2-NEXT: pandn %xmm1, %xmm2
926 ; X64-SSE2-NEXT: por %xmm0, %xmm2
927 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[2,3,0,1]
928 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
929 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
930 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
931 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
932 ; X64-SSE2-NEXT: por %xmm2, %xmm1
933 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
934 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
935 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
936 ; X64-SSE2-NEXT: pand %xmm2, %xmm1
937 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
938 ; X64-SSE2-NEXT: por %xmm1, %xmm2
939 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
940 ; X64-SSE2-NEXT: psrld $16, %xmm0
941 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
942 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
943 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
944 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
945 ; X64-SSE2-NEXT: por %xmm2, %xmm1
946 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
947 ; X64-SSE2-NEXT: psrlw $8, %xmm0
948 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
949 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
950 ; X64-SSE2-NEXT: pand %xmm2, %xmm1
951 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
952 ; X64-SSE2-NEXT: por %xmm1, %xmm2
953 ; X64-SSE2-NEXT: movd %xmm2, %eax
954 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
955 ; X64-SSE2-NEXT: retq
957 ; X64-SSE42-LABEL: test_reduce_v32i8:
958 ; X64-SSE42: ## %bb.0:
959 ; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
960 ; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
961 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
962 ; X64-SSE42-NEXT: psrlw $8, %xmm1
963 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1
964 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
965 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
966 ; X64-SSE42-NEXT: xorb $-128, %al
967 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
968 ; X64-SSE42-NEXT: retq
970 ; X64-AVX1-LABEL: test_reduce_v32i8:
971 ; X64-AVX1: ## %bb.0:
972 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
973 ; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
974 ; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
975 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
976 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
977 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
978 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
979 ; X64-AVX1-NEXT: xorb $-128, %al
980 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
981 ; X64-AVX1-NEXT: vzeroupper
982 ; X64-AVX1-NEXT: retq
984 ; X64-AVX2-LABEL: test_reduce_v32i8:
985 ; X64-AVX2: ## %bb.0:
986 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
987 ; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
988 ; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
989 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
990 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
991 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
992 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
993 ; X64-AVX2-NEXT: xorb $-128, %al
994 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
995 ; X64-AVX2-NEXT: vzeroupper
996 ; X64-AVX2-NEXT: retq
998 ; X64-AVX512-LABEL: test_reduce_v32i8:
999 ; X64-AVX512: ## %bb.0:
1000 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1001 ; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1002 ; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1003 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1004 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1005 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1006 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
1007 ; X64-AVX512-NEXT: xorb $-128, %al
1008 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
1009 ; X64-AVX512-NEXT: vzeroupper
1010 ; X64-AVX512-NEXT: retq
1011 %1 = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1012 %2 = icmp slt <32 x i8> %a0, %1
1013 %3 = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
1014 %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1015 %5 = icmp slt <32 x i8> %3, %4
1016 %6 = select <32 x i1> %5, <32 x i8> %3, <32 x i8> %4
1017 %7 = shufflevector <32 x i8> %6, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1018 %8 = icmp slt <32 x i8> %6, %7
1019 %9 = select <32 x i1> %8, <32 x i8> %6, <32 x i8> %7
1020 %10 = shufflevector <32 x i8> %9, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1021 %11 = icmp slt <32 x i8> %9, %10
1022 %12 = select <32 x i1> %11, <32 x i8> %9, <32 x i8> %10
1023 %13 = shufflevector <32 x i8> %12, <32 x i8> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1024 %14 = icmp slt <32 x i8> %12, %13
1025 %15 = select <32 x i1> %14, <32 x i8> %12, <32 x i8> %13
1026 %16 = extractelement <32 x i8> %15, i32 0
1034 define i64 @test_reduce_v8i64(<8 x i64> %a0) {
1035 ; X86-SSE2-LABEL: test_reduce_v8i64:
1036 ; X86-SSE2: ## %bb.0:
1037 ; X86-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,0,2147483648,0]
1038 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm5
1039 ; X86-SSE2-NEXT: pxor %xmm4, %xmm5
1040 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6
1041 ; X86-SSE2-NEXT: pxor %xmm4, %xmm6
1042 ; X86-SSE2-NEXT: movdqa %xmm6, %xmm7
1043 ; X86-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1044 ; X86-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1045 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[0,0,2,2]
1046 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1047 ; X86-SSE2-NEXT: pand %xmm5, %xmm6
1048 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1049 ; X86-SSE2-NEXT: por %xmm6, %xmm5
1050 ; X86-SSE2-NEXT: pand %xmm5, %xmm1
1051 ; X86-SSE2-NEXT: pandn %xmm3, %xmm5
1052 ; X86-SSE2-NEXT: por %xmm1, %xmm5
1053 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1054 ; X86-SSE2-NEXT: pxor %xmm4, %xmm1
1055 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1056 ; X86-SSE2-NEXT: pxor %xmm4, %xmm3
1057 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm6
1058 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm6
1059 ; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm3
1060 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[0,0,2,2]
1061 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1062 ; X86-SSE2-NEXT: pand %xmm1, %xmm3
1063 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm6[1,1,3,3]
1064 ; X86-SSE2-NEXT: por %xmm3, %xmm1
1065 ; X86-SSE2-NEXT: pand %xmm1, %xmm0
1066 ; X86-SSE2-NEXT: pandn %xmm2, %xmm1
1067 ; X86-SSE2-NEXT: por %xmm0, %xmm1
1068 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
1069 ; X86-SSE2-NEXT: pxor %xmm4, %xmm0
1070 ; X86-SSE2-NEXT: movdqa %xmm5, %xmm2
1071 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1072 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm3
1073 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3
1074 ; X86-SSE2-NEXT: pcmpeqd %xmm0, %xmm2
1075 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,2,2]
1076 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
1077 ; X86-SSE2-NEXT: pand %xmm0, %xmm2
1078 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,3,3]
1079 ; X86-SSE2-NEXT: por %xmm2, %xmm0
1080 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
1081 ; X86-SSE2-NEXT: pandn %xmm5, %xmm0
1082 ; X86-SSE2-NEXT: por %xmm1, %xmm0
1083 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1084 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1085 ; X86-SSE2-NEXT: pxor %xmm4, %xmm2
1086 ; X86-SSE2-NEXT: pxor %xmm1, %xmm4
1087 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm3
1088 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1089 ; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1090 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,0,2,2]
1091 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
1092 ; X86-SSE2-NEXT: pand %xmm2, %xmm4
1093 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[1,1,3,3]
1094 ; X86-SSE2-NEXT: por %xmm4, %xmm2
1095 ; X86-SSE2-NEXT: pand %xmm2, %xmm0
1096 ; X86-SSE2-NEXT: pandn %xmm1, %xmm2
1097 ; X86-SSE2-NEXT: por %xmm0, %xmm2
1098 ; X86-SSE2-NEXT: movd %xmm2, %eax
1099 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1100 ; X86-SSE2-NEXT: movd %xmm0, %edx
1101 ; X86-SSE2-NEXT: retl
1103 ; X86-SSE42-LABEL: test_reduce_v8i64:
1104 ; X86-SSE42: ## %bb.0:
1105 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm4
1106 ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0
1107 ; X86-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1108 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1109 ; X86-SSE42-NEXT: movdqa %xmm3, %xmm0
1110 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1111 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1112 ; X86-SSE42-NEXT: movapd %xmm3, %xmm0
1113 ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1114 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1115 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1116 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0
1117 ; X86-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1118 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1119 ; X86-SSE42-NEXT: movd %xmm1, %eax
1120 ; X86-SSE42-NEXT: pextrd $1, %xmm1, %edx
1121 ; X86-SSE42-NEXT: retl
1123 ; X86-AVX1-LABEL: test_reduce_v8i64:
1124 ; X86-AVX1: ## %bb.0:
1125 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1126 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1127 ; X86-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1128 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1129 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1130 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1131 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1132 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1133 ; X86-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
1134 ; X86-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1135 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1136 ; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1137 ; X86-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1138 ; X86-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1139 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
1140 ; X86-AVX1-NEXT: vpextrd $1, %xmm0, %edx
1141 ; X86-AVX1-NEXT: vzeroupper
1142 ; X86-AVX1-NEXT: retl
1144 ; X86-AVX2-LABEL: test_reduce_v8i64:
1145 ; X86-AVX2: ## %bb.0:
1146 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1147 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1148 ; X86-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1149 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1150 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1151 ; X86-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1152 ; X86-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1153 ; X86-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1154 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
1155 ; X86-AVX2-NEXT: vpextrd $1, %xmm0, %edx
1156 ; X86-AVX2-NEXT: vzeroupper
1157 ; X86-AVX2-NEXT: retl
1159 ; X64-SSE2-LABEL: test_reduce_v8i64:
1160 ; X64-SSE2: ## %bb.0:
1161 ; X64-SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
1162 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm5
1163 ; X64-SSE2-NEXT: pxor %xmm4, %xmm5
1164 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6
1165 ; X64-SSE2-NEXT: pxor %xmm4, %xmm6
1166 ; X64-SSE2-NEXT: movdqa %xmm6, %xmm7
1167 ; X64-SSE2-NEXT: pcmpgtd %xmm5, %xmm7
1168 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm8 = xmm7[0,0,2,2]
1169 ; X64-SSE2-NEXT: pcmpeqd %xmm5, %xmm6
1170 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm6[1,1,3,3]
1171 ; X64-SSE2-NEXT: pand %xmm8, %xmm6
1172 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm7[1,1,3,3]
1173 ; X64-SSE2-NEXT: por %xmm6, %xmm5
1174 ; X64-SSE2-NEXT: pand %xmm5, %xmm1
1175 ; X64-SSE2-NEXT: pandn %xmm3, %xmm5
1176 ; X64-SSE2-NEXT: por %xmm1, %xmm5
1177 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1178 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1179 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm3
1180 ; X64-SSE2-NEXT: pxor %xmm4, %xmm3
1181 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm6
1182 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm6
1183 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm7 = xmm6[0,0,2,2]
1184 ; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm3
1185 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
1186 ; X64-SSE2-NEXT: pand %xmm7, %xmm1
1187 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm6[1,1,3,3]
1188 ; X64-SSE2-NEXT: por %xmm1, %xmm3
1189 ; X64-SSE2-NEXT: pand %xmm3, %xmm0
1190 ; X64-SSE2-NEXT: pandn %xmm2, %xmm3
1191 ; X64-SSE2-NEXT: por %xmm0, %xmm3
1192 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0
1193 ; X64-SSE2-NEXT: pxor %xmm4, %xmm0
1194 ; X64-SSE2-NEXT: movdqa %xmm5, %xmm1
1195 ; X64-SSE2-NEXT: pxor %xmm4, %xmm1
1196 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1197 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm2
1198 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm2[0,0,2,2]
1199 ; X64-SSE2-NEXT: pcmpeqd %xmm0, %xmm1
1200 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
1201 ; X64-SSE2-NEXT: pand %xmm6, %xmm0
1202 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
1203 ; X64-SSE2-NEXT: por %xmm0, %xmm1
1204 ; X64-SSE2-NEXT: pand %xmm1, %xmm3
1205 ; X64-SSE2-NEXT: pandn %xmm5, %xmm1
1206 ; X64-SSE2-NEXT: por %xmm3, %xmm1
1207 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1208 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm2
1209 ; X64-SSE2-NEXT: pxor %xmm4, %xmm2
1210 ; X64-SSE2-NEXT: pxor %xmm0, %xmm4
1211 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm3
1212 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm3
1213 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
1214 ; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm4
1215 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
1216 ; X64-SSE2-NEXT: pand %xmm5, %xmm2
1217 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
1218 ; X64-SSE2-NEXT: por %xmm2, %xmm3
1219 ; X64-SSE2-NEXT: pand %xmm3, %xmm1
1220 ; X64-SSE2-NEXT: pandn %xmm0, %xmm3
1221 ; X64-SSE2-NEXT: por %xmm1, %xmm3
1222 ; X64-SSE2-NEXT: movq %xmm3, %rax
1223 ; X64-SSE2-NEXT: retq
1225 ; X64-SSE42-LABEL: test_reduce_v8i64:
1226 ; X64-SSE42: ## %bb.0:
1227 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm4
1228 ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0
1229 ; X64-SSE42-NEXT: pcmpgtq %xmm4, %xmm0
1230 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm4, %xmm2
1231 ; X64-SSE42-NEXT: movdqa %xmm3, %xmm0
1232 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0
1233 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm3
1234 ; X64-SSE42-NEXT: movapd %xmm3, %xmm0
1235 ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0
1236 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm2, %xmm3
1237 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm3[2,3,0,1]
1238 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0
1239 ; X64-SSE42-NEXT: pcmpgtq %xmm3, %xmm0
1240 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm3, %xmm1
1241 ; X64-SSE42-NEXT: movq %xmm1, %rax
1242 ; X64-SSE42-NEXT: retq
1244 ; X64-AVX1-LABEL: test_reduce_v8i64:
1245 ; X64-AVX1: ## %bb.0:
1246 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
1247 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
1248 ; X64-AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2
1249 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm3
1250 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
1251 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1252 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
1253 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1254 ; X64-AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3
1255 ; X64-AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2
1256 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1257 ; X64-AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1258 ; X64-AVX1-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm2
1259 ; X64-AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1260 ; X64-AVX1-NEXT: vmovq %xmm0, %rax
1261 ; X64-AVX1-NEXT: vzeroupper
1262 ; X64-AVX1-NEXT: retq
1264 ; X64-AVX2-LABEL: test_reduce_v8i64:
1265 ; X64-AVX2: ## %bb.0:
1266 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1267 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1268 ; X64-AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1
1269 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1270 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1271 ; X64-AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[2,3,0,1]
1272 ; X64-AVX2-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm2
1273 ; X64-AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
1274 ; X64-AVX2-NEXT: vmovq %xmm0, %rax
1275 ; X64-AVX2-NEXT: vzeroupper
1276 ; X64-AVX2-NEXT: retq
1278 ; X64-AVX512-LABEL: test_reduce_v8i64:
1279 ; X64-AVX512: ## %bb.0:
1280 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1281 ; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1282 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1283 ; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1284 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1285 ; X64-AVX512-NEXT: vpminsq %zmm1, %zmm0, %zmm0
1286 ; X64-AVX512-NEXT: vmovq %xmm0, %rax
1287 ; X64-AVX512-NEXT: vzeroupper
1288 ; X64-AVX512-NEXT: retq
1289 %1 = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
1290 %2 = icmp slt <8 x i64> %a0, %1
1291 %3 = select <8 x i1> %2, <8 x i64> %a0, <8 x i64> %1
1292 %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1293 %5 = icmp slt <8 x i64> %3, %4
1294 %6 = select <8 x i1> %5, <8 x i64> %3, <8 x i64> %4
1295 %7 = shufflevector <8 x i64> %6, <8 x i64> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1296 %8 = icmp slt <8 x i64> %6, %7
1297 %9 = select <8 x i1> %8, <8 x i64> %6, <8 x i64> %7
1298 %10 = extractelement <8 x i64> %9, i32 0
1302 define i32 @test_reduce_v16i32(<16 x i32> %a0) {
1303 ; X86-SSE2-LABEL: test_reduce_v16i32:
1304 ; X86-SSE2: ## %bb.0:
1305 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
1306 ; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1307 ; X86-SSE2-NEXT: pand %xmm4, %xmm0
1308 ; X86-SSE2-NEXT: pandn %xmm2, %xmm4
1309 ; X86-SSE2-NEXT: por %xmm0, %xmm4
1310 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm0
1311 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1312 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
1313 ; X86-SSE2-NEXT: pandn %xmm3, %xmm0
1314 ; X86-SSE2-NEXT: por %xmm1, %xmm0
1315 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1316 ; X86-SSE2-NEXT: pcmpgtd %xmm4, %xmm1
1317 ; X86-SSE2-NEXT: pand %xmm1, %xmm4
1318 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1319 ; X86-SSE2-NEXT: por %xmm4, %xmm1
1320 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1321 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1322 ; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1323 ; X86-SSE2-NEXT: pand %xmm2, %xmm1
1324 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
1325 ; X86-SSE2-NEXT: por %xmm1, %xmm2
1326 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1327 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1328 ; X86-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1329 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
1330 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1331 ; X86-SSE2-NEXT: por %xmm2, %xmm1
1332 ; X86-SSE2-NEXT: movd %xmm1, %eax
1333 ; X86-SSE2-NEXT: retl
1335 ; X86-SSE42-LABEL: test_reduce_v16i32:
1336 ; X86-SSE42: ## %bb.0:
1337 ; X86-SSE42-NEXT: pminsd %xmm3, %xmm1
1338 ; X86-SSE42-NEXT: pminsd %xmm2, %xmm0
1339 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
1340 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1341 ; X86-SSE42-NEXT: pminsd %xmm0, %xmm1
1342 ; X86-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1343 ; X86-SSE42-NEXT: pminsd %xmm1, %xmm0
1344 ; X86-SSE42-NEXT: movd %xmm0, %eax
1345 ; X86-SSE42-NEXT: retl
1347 ; X86-AVX1-LABEL: test_reduce_v16i32:
1348 ; X86-AVX1: ## %bb.0:
1349 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1350 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1351 ; X86-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1352 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1353 ; X86-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
1354 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1355 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1356 ; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1357 ; X86-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1358 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
1359 ; X86-AVX1-NEXT: vzeroupper
1360 ; X86-AVX1-NEXT: retl
1362 ; X86-AVX2-LABEL: test_reduce_v16i32:
1363 ; X86-AVX2: ## %bb.0:
1364 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1365 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1366 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1367 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1368 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1369 ; X86-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1370 ; X86-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1371 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
1372 ; X86-AVX2-NEXT: vzeroupper
1373 ; X86-AVX2-NEXT: retl
1375 ; X64-SSE2-LABEL: test_reduce_v16i32:
1376 ; X64-SSE2: ## %bb.0:
1377 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm4
1378 ; X64-SSE2-NEXT: pcmpgtd %xmm0, %xmm4
1379 ; X64-SSE2-NEXT: pand %xmm4, %xmm0
1380 ; X64-SSE2-NEXT: pandn %xmm2, %xmm4
1381 ; X64-SSE2-NEXT: por %xmm0, %xmm4
1382 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0
1383 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm0
1384 ; X64-SSE2-NEXT: pand %xmm0, %xmm1
1385 ; X64-SSE2-NEXT: pandn %xmm3, %xmm0
1386 ; X64-SSE2-NEXT: por %xmm1, %xmm0
1387 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1388 ; X64-SSE2-NEXT: pcmpgtd %xmm4, %xmm1
1389 ; X64-SSE2-NEXT: pand %xmm1, %xmm4
1390 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1391 ; X64-SSE2-NEXT: por %xmm4, %xmm1
1392 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1393 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1394 ; X64-SSE2-NEXT: pcmpgtd %xmm1, %xmm2
1395 ; X64-SSE2-NEXT: pand %xmm2, %xmm1
1396 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
1397 ; X64-SSE2-NEXT: por %xmm1, %xmm2
1398 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1399 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1400 ; X64-SSE2-NEXT: pcmpgtd %xmm2, %xmm1
1401 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
1402 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1403 ; X64-SSE2-NEXT: por %xmm2, %xmm1
1404 ; X64-SSE2-NEXT: movd %xmm1, %eax
1405 ; X64-SSE2-NEXT: retq
1407 ; X64-SSE42-LABEL: test_reduce_v16i32:
1408 ; X64-SSE42: ## %bb.0:
1409 ; X64-SSE42-NEXT: pminsd %xmm3, %xmm1
1410 ; X64-SSE42-NEXT: pminsd %xmm2, %xmm0
1411 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
1412 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1413 ; X64-SSE42-NEXT: pminsd %xmm0, %xmm1
1414 ; X64-SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1415 ; X64-SSE42-NEXT: pminsd %xmm1, %xmm0
1416 ; X64-SSE42-NEXT: movd %xmm0, %eax
1417 ; X64-SSE42-NEXT: retq
1419 ; X64-AVX1-LABEL: test_reduce_v16i32:
1420 ; X64-AVX1: ## %bb.0:
1421 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1422 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1423 ; X64-AVX1-NEXT: vpminsd %xmm2, %xmm3, %xmm2
1424 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1425 ; X64-AVX1-NEXT: vpminsd %xmm2, %xmm0, %xmm0
1426 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1427 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1428 ; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1429 ; X64-AVX1-NEXT: vpminsd %xmm1, %xmm0, %xmm0
1430 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
1431 ; X64-AVX1-NEXT: vzeroupper
1432 ; X64-AVX1-NEXT: retq
1434 ; X64-AVX2-LABEL: test_reduce_v16i32:
1435 ; X64-AVX2: ## %bb.0:
1436 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1437 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1438 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1439 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1440 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1441 ; X64-AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1442 ; X64-AVX2-NEXT: vpminsd %ymm1, %ymm0, %ymm0
1443 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
1444 ; X64-AVX2-NEXT: vzeroupper
1445 ; X64-AVX2-NEXT: retq
1447 ; X64-AVX512-LABEL: test_reduce_v16i32:
1448 ; X64-AVX512: ## %bb.0:
1449 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1450 ; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1451 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1452 ; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1453 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1454 ; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1455 ; X64-AVX512-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
1456 ; X64-AVX512-NEXT: vpminsd %zmm1, %zmm0, %zmm0
1457 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
1458 ; X64-AVX512-NEXT: vzeroupper
1459 ; X64-AVX512-NEXT: retq
1460 %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1461 %2 = icmp slt <16 x i32> %a0, %1
1462 %3 = select <16 x i1> %2, <16 x i32> %a0, <16 x i32> %1
1463 %4 = shufflevector <16 x i32> %3, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1464 %5 = icmp slt <16 x i32> %3, %4
1465 %6 = select <16 x i1> %5, <16 x i32> %3, <16 x i32> %4
1466 %7 = shufflevector <16 x i32> %6, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1467 %8 = icmp slt <16 x i32> %6, %7
1468 %9 = select <16 x i1> %8, <16 x i32> %6, <16 x i32> %7
1469 %10 = shufflevector <16 x i32> %9, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1470 %11 = icmp slt <16 x i32> %9, %10
1471 %12 = select <16 x i1> %11, <16 x i32> %9, <16 x i32> %10
1472 %13 = extractelement <16 x i32> %12, i32 0
1476 define i16 @test_reduce_v32i16(<32 x i16> %a0) {
1477 ; X86-SSE2-LABEL: test_reduce_v32i16:
1478 ; X86-SSE2: ## %bb.0:
1479 ; X86-SSE2-NEXT: pminsw %xmm3, %xmm1
1480 ; X86-SSE2-NEXT: pminsw %xmm2, %xmm0
1481 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
1482 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1483 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
1484 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1485 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm0
1486 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1487 ; X86-SSE2-NEXT: psrld $16, %xmm1
1488 ; X86-SSE2-NEXT: pminsw %xmm0, %xmm1
1489 ; X86-SSE2-NEXT: movd %xmm1, %eax
1490 ; X86-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
1491 ; X86-SSE2-NEXT: retl
1493 ; X86-SSE42-LABEL: test_reduce_v32i16:
1494 ; X86-SSE42: ## %bb.0:
1495 ; X86-SSE42-NEXT: pminsw %xmm3, %xmm1
1496 ; X86-SSE42-NEXT: pminsw %xmm2, %xmm0
1497 ; X86-SSE42-NEXT: pminsw %xmm1, %xmm0
1498 ; X86-SSE42-NEXT: pxor LCPI10_0, %xmm0
1499 ; X86-SSE42-NEXT: phminposuw %xmm0, %xmm0
1500 ; X86-SSE42-NEXT: movd %xmm0, %eax
1501 ; X86-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
1502 ; X86-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
1503 ; X86-SSE42-NEXT: retl
1505 ; X86-AVX1-LABEL: test_reduce_v32i16:
1506 ; X86-AVX1: ## %bb.0:
1507 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1508 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1509 ; X86-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1510 ; X86-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1511 ; X86-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
1512 ; X86-AVX1-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
1513 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1514 ; X86-AVX1-NEXT: vmovd %xmm0, %eax
1515 ; X86-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
1516 ; X86-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
1517 ; X86-AVX1-NEXT: vzeroupper
1518 ; X86-AVX1-NEXT: retl
1520 ; X86-AVX2-LABEL: test_reduce_v32i16:
1521 ; X86-AVX2: ## %bb.0:
1522 ; X86-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1523 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1524 ; X86-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1525 ; X86-AVX2-NEXT: vpxor LCPI10_0, %xmm0, %xmm0
1526 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1527 ; X86-AVX2-NEXT: vmovd %xmm0, %eax
1528 ; X86-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
1529 ; X86-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
1530 ; X86-AVX2-NEXT: vzeroupper
1531 ; X86-AVX2-NEXT: retl
1533 ; X64-SSE2-LABEL: test_reduce_v32i16:
1534 ; X64-SSE2: ## %bb.0:
1535 ; X64-SSE2-NEXT: pminsw %xmm3, %xmm1
1536 ; X64-SSE2-NEXT: pminsw %xmm2, %xmm0
1537 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
1538 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
1539 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
1540 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
1541 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm0
1542 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1543 ; X64-SSE2-NEXT: psrld $16, %xmm1
1544 ; X64-SSE2-NEXT: pminsw %xmm0, %xmm1
1545 ; X64-SSE2-NEXT: movd %xmm1, %eax
1546 ; X64-SSE2-NEXT: ## kill: def $ax killed $ax killed $eax
1547 ; X64-SSE2-NEXT: retq
1549 ; X64-SSE42-LABEL: test_reduce_v32i16:
1550 ; X64-SSE42: ## %bb.0:
1551 ; X64-SSE42-NEXT: pminsw %xmm3, %xmm1
1552 ; X64-SSE42-NEXT: pminsw %xmm2, %xmm0
1553 ; X64-SSE42-NEXT: pminsw %xmm1, %xmm0
1554 ; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
1555 ; X64-SSE42-NEXT: phminposuw %xmm0, %xmm0
1556 ; X64-SSE42-NEXT: movd %xmm0, %eax
1557 ; X64-SSE42-NEXT: xorl $32768, %eax ## imm = 0x8000
1558 ; X64-SSE42-NEXT: ## kill: def $ax killed $ax killed $eax
1559 ; X64-SSE42-NEXT: retq
1561 ; X64-AVX1-LABEL: test_reduce_v32i16:
1562 ; X64-AVX1: ## %bb.0:
1563 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1564 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1565 ; X64-AVX1-NEXT: vpminsw %xmm2, %xmm3, %xmm2
1566 ; X64-AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1567 ; X64-AVX1-NEXT: vpminsw %xmm2, %xmm0, %xmm0
1568 ; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1569 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1570 ; X64-AVX1-NEXT: vmovd %xmm0, %eax
1571 ; X64-AVX1-NEXT: xorl $32768, %eax ## imm = 0x8000
1572 ; X64-AVX1-NEXT: ## kill: def $ax killed $ax killed $eax
1573 ; X64-AVX1-NEXT: vzeroupper
1574 ; X64-AVX1-NEXT: retq
1576 ; X64-AVX2-LABEL: test_reduce_v32i16:
1577 ; X64-AVX2: ## %bb.0:
1578 ; X64-AVX2-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1579 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1580 ; X64-AVX2-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1581 ; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1582 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1583 ; X64-AVX2-NEXT: vmovd %xmm0, %eax
1584 ; X64-AVX2-NEXT: xorl $32768, %eax ## imm = 0x8000
1585 ; X64-AVX2-NEXT: ## kill: def $ax killed $ax killed $eax
1586 ; X64-AVX2-NEXT: vzeroupper
1587 ; X64-AVX2-NEXT: retq
1589 ; X64-AVX512-LABEL: test_reduce_v32i16:
1590 ; X64-AVX512: ## %bb.0:
1591 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1592 ; X64-AVX512-NEXT: vpminsw %ymm1, %ymm0, %ymm0
1593 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1594 ; X64-AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0
1595 ; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1596 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1597 ; X64-AVX512-NEXT: vmovd %xmm0, %eax
1598 ; X64-AVX512-NEXT: xorl $32768, %eax ## imm = 0x8000
1599 ; X64-AVX512-NEXT: ## kill: def $ax killed $ax killed $eax
1600 ; X64-AVX512-NEXT: vzeroupper
1601 ; X64-AVX512-NEXT: retq
1602 %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1603 %2 = icmp slt <32 x i16> %a0, %1
1604 %3 = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
1605 %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1606 %5 = icmp slt <32 x i16> %3, %4
1607 %6 = select <32 x i1> %5, <32 x i16> %3, <32 x i16> %4
1608 %7 = shufflevector <32 x i16> %6, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1609 %8 = icmp slt <32 x i16> %6, %7
1610 %9 = select <32 x i1> %8, <32 x i16> %6, <32 x i16> %7
1611 %10 = shufflevector <32 x i16> %9, <32 x i16> undef, <32 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1612 %11 = icmp slt <32 x i16> %9, %10
1613 %12 = select <32 x i1> %11, <32 x i16> %9, <32 x i16> %10
1614 %13 = shufflevector <32 x i16> %12, <32 x i16> undef, <32 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1615 %14 = icmp slt <32 x i16> %12, %13
1616 %15 = select <32 x i1> %14, <32 x i16> %12, <32 x i16> %13
1617 %16 = extractelement <32 x i16> %15, i32 0
1621 define i8 @test_reduce_v64i8(<64 x i8> %a0) {
1622 ; X86-SSE2-LABEL: test_reduce_v64i8:
1623 ; X86-SSE2: ## %bb.0:
1624 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm4
1625 ; X86-SSE2-NEXT: pcmpgtb %xmm0, %xmm4
1626 ; X86-SSE2-NEXT: pand %xmm4, %xmm0
1627 ; X86-SSE2-NEXT: pandn %xmm2, %xmm4
1628 ; X86-SSE2-NEXT: por %xmm0, %xmm4
1629 ; X86-SSE2-NEXT: movdqa %xmm3, %xmm0
1630 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm0
1631 ; X86-SSE2-NEXT: pand %xmm0, %xmm1
1632 ; X86-SSE2-NEXT: pandn %xmm3, %xmm0
1633 ; X86-SSE2-NEXT: por %xmm1, %xmm0
1634 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1635 ; X86-SSE2-NEXT: pcmpgtb %xmm4, %xmm1
1636 ; X86-SSE2-NEXT: pand %xmm1, %xmm4
1637 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1638 ; X86-SSE2-NEXT: por %xmm4, %xmm1
1639 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1640 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1641 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1642 ; X86-SSE2-NEXT: pand %xmm2, %xmm1
1643 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
1644 ; X86-SSE2-NEXT: por %xmm1, %xmm2
1645 ; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1646 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1647 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1648 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
1649 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1650 ; X86-SSE2-NEXT: por %xmm2, %xmm1
1651 ; X86-SSE2-NEXT: movdqa %xmm1, %xmm0
1652 ; X86-SSE2-NEXT: psrld $16, %xmm0
1653 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm2
1654 ; X86-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1655 ; X86-SSE2-NEXT: pand %xmm2, %xmm1
1656 ; X86-SSE2-NEXT: pandn %xmm0, %xmm2
1657 ; X86-SSE2-NEXT: por %xmm1, %xmm2
1658 ; X86-SSE2-NEXT: movdqa %xmm2, %xmm0
1659 ; X86-SSE2-NEXT: psrlw $8, %xmm0
1660 ; X86-SSE2-NEXT: movdqa %xmm0, %xmm1
1661 ; X86-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1662 ; X86-SSE2-NEXT: pand %xmm1, %xmm2
1663 ; X86-SSE2-NEXT: pandn %xmm0, %xmm1
1664 ; X86-SSE2-NEXT: por %xmm2, %xmm1
1665 ; X86-SSE2-NEXT: movd %xmm1, %eax
1666 ; X86-SSE2-NEXT: ## kill: def $al killed $al killed $eax
1667 ; X86-SSE2-NEXT: retl
1669 ; X86-SSE42-LABEL: test_reduce_v64i8:
1670 ; X86-SSE42: ## %bb.0:
1671 ; X86-SSE42-NEXT: pminsb %xmm3, %xmm1
1672 ; X86-SSE42-NEXT: pminsb %xmm2, %xmm0
1673 ; X86-SSE42-NEXT: pminsb %xmm1, %xmm0
1674 ; X86-SSE42-NEXT: pxor LCPI11_0, %xmm0
1675 ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1
1676 ; X86-SSE42-NEXT: psrlw $8, %xmm1
1677 ; X86-SSE42-NEXT: pminub %xmm0, %xmm1
1678 ; X86-SSE42-NEXT: phminposuw %xmm1, %xmm0
1679 ; X86-SSE42-NEXT: pextrb $0, %xmm0, %eax
1680 ; X86-SSE42-NEXT: xorb $-128, %al
1681 ; X86-SSE42-NEXT: ## kill: def $al killed $al killed $eax
1682 ; X86-SSE42-NEXT: retl
1684 ; X86-AVX1-LABEL: test_reduce_v64i8:
1685 ; X86-AVX1: ## %bb.0:
1686 ; X86-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1687 ; X86-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1688 ; X86-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1689 ; X86-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1690 ; X86-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
1691 ; X86-AVX1-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
1692 ; X86-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1693 ; X86-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1694 ; X86-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1695 ; X86-AVX1-NEXT: vpextrb $0, %xmm0, %eax
1696 ; X86-AVX1-NEXT: xorb $-128, %al
1697 ; X86-AVX1-NEXT: ## kill: def $al killed $al killed $eax
1698 ; X86-AVX1-NEXT: vzeroupper
1699 ; X86-AVX1-NEXT: retl
1701 ; X86-AVX2-LABEL: test_reduce_v64i8:
1702 ; X86-AVX2: ## %bb.0:
1703 ; X86-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1704 ; X86-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1705 ; X86-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1706 ; X86-AVX2-NEXT: vpxor LCPI11_0, %xmm0, %xmm0
1707 ; X86-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1708 ; X86-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1709 ; X86-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1710 ; X86-AVX2-NEXT: vpextrb $0, %xmm0, %eax
1711 ; X86-AVX2-NEXT: xorb $-128, %al
1712 ; X86-AVX2-NEXT: ## kill: def $al killed $al killed $eax
1713 ; X86-AVX2-NEXT: vzeroupper
1714 ; X86-AVX2-NEXT: retl
1716 ; X64-SSE2-LABEL: test_reduce_v64i8:
1717 ; X64-SSE2: ## %bb.0:
1718 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm4
1719 ; X64-SSE2-NEXT: pcmpgtb %xmm0, %xmm4
1720 ; X64-SSE2-NEXT: pand %xmm4, %xmm0
1721 ; X64-SSE2-NEXT: pandn %xmm2, %xmm4
1722 ; X64-SSE2-NEXT: por %xmm0, %xmm4
1723 ; X64-SSE2-NEXT: movdqa %xmm3, %xmm0
1724 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm0
1725 ; X64-SSE2-NEXT: pand %xmm0, %xmm1
1726 ; X64-SSE2-NEXT: pandn %xmm3, %xmm0
1727 ; X64-SSE2-NEXT: por %xmm1, %xmm0
1728 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1729 ; X64-SSE2-NEXT: pcmpgtb %xmm4, %xmm1
1730 ; X64-SSE2-NEXT: pand %xmm1, %xmm4
1731 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1732 ; X64-SSE2-NEXT: por %xmm4, %xmm1
1733 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
1734 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1735 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1736 ; X64-SSE2-NEXT: pand %xmm2, %xmm1
1737 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
1738 ; X64-SSE2-NEXT: por %xmm1, %xmm2
1739 ; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,2,3]
1740 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1741 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1742 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
1743 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1744 ; X64-SSE2-NEXT: por %xmm2, %xmm1
1745 ; X64-SSE2-NEXT: movdqa %xmm1, %xmm0
1746 ; X64-SSE2-NEXT: psrld $16, %xmm0
1747 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm2
1748 ; X64-SSE2-NEXT: pcmpgtb %xmm1, %xmm2
1749 ; X64-SSE2-NEXT: pand %xmm2, %xmm1
1750 ; X64-SSE2-NEXT: pandn %xmm0, %xmm2
1751 ; X64-SSE2-NEXT: por %xmm1, %xmm2
1752 ; X64-SSE2-NEXT: movdqa %xmm2, %xmm0
1753 ; X64-SSE2-NEXT: psrlw $8, %xmm0
1754 ; X64-SSE2-NEXT: movdqa %xmm0, %xmm1
1755 ; X64-SSE2-NEXT: pcmpgtb %xmm2, %xmm1
1756 ; X64-SSE2-NEXT: pand %xmm1, %xmm2
1757 ; X64-SSE2-NEXT: pandn %xmm0, %xmm1
1758 ; X64-SSE2-NEXT: por %xmm2, %xmm1
1759 ; X64-SSE2-NEXT: movd %xmm1, %eax
1760 ; X64-SSE2-NEXT: ## kill: def $al killed $al killed $eax
1761 ; X64-SSE2-NEXT: retq
1763 ; X64-SSE42-LABEL: test_reduce_v64i8:
1764 ; X64-SSE42: ## %bb.0:
1765 ; X64-SSE42-NEXT: pminsb %xmm3, %xmm1
1766 ; X64-SSE42-NEXT: pminsb %xmm2, %xmm0
1767 ; X64-SSE42-NEXT: pminsb %xmm1, %xmm0
1768 ; X64-SSE42-NEXT: pxor {{.*}}(%rip), %xmm0
1769 ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1
1770 ; X64-SSE42-NEXT: psrlw $8, %xmm1
1771 ; X64-SSE42-NEXT: pminub %xmm0, %xmm1
1772 ; X64-SSE42-NEXT: phminposuw %xmm1, %xmm0
1773 ; X64-SSE42-NEXT: pextrb $0, %xmm0, %eax
1774 ; X64-SSE42-NEXT: xorb $-128, %al
1775 ; X64-SSE42-NEXT: ## kill: def $al killed $al killed $eax
1776 ; X64-SSE42-NEXT: retq
1778 ; X64-AVX1-LABEL: test_reduce_v64i8:
1779 ; X64-AVX1: ## %bb.0:
1780 ; X64-AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
1781 ; X64-AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
1782 ; X64-AVX1-NEXT: vpminsb %xmm2, %xmm3, %xmm2
1783 ; X64-AVX1-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1784 ; X64-AVX1-NEXT: vpminsb %xmm2, %xmm0, %xmm0
1785 ; X64-AVX1-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1786 ; X64-AVX1-NEXT: vpsrlw $8, %xmm0, %xmm1
1787 ; X64-AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0
1788 ; X64-AVX1-NEXT: vphminposuw %xmm0, %xmm0
1789 ; X64-AVX1-NEXT: vpextrb $0, %xmm0, %eax
1790 ; X64-AVX1-NEXT: xorb $-128, %al
1791 ; X64-AVX1-NEXT: ## kill: def $al killed $al killed $eax
1792 ; X64-AVX1-NEXT: vzeroupper
1793 ; X64-AVX1-NEXT: retq
1795 ; X64-AVX2-LABEL: test_reduce_v64i8:
1796 ; X64-AVX2: ## %bb.0:
1797 ; X64-AVX2-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1798 ; X64-AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
1799 ; X64-AVX2-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1800 ; X64-AVX2-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1801 ; X64-AVX2-NEXT: vpsrlw $8, %xmm0, %xmm1
1802 ; X64-AVX2-NEXT: vpminub %xmm1, %xmm0, %xmm0
1803 ; X64-AVX2-NEXT: vphminposuw %xmm0, %xmm0
1804 ; X64-AVX2-NEXT: vpextrb $0, %xmm0, %eax
1805 ; X64-AVX2-NEXT: xorb $-128, %al
1806 ; X64-AVX2-NEXT: ## kill: def $al killed $al killed $eax
1807 ; X64-AVX2-NEXT: vzeroupper
1808 ; X64-AVX2-NEXT: retq
1810 ; X64-AVX512-LABEL: test_reduce_v64i8:
1811 ; X64-AVX512: ## %bb.0:
1812 ; X64-AVX512-NEXT: vextracti64x4 $1, %zmm0, %ymm1
1813 ; X64-AVX512-NEXT: vpminsb %ymm1, %ymm0, %ymm0
1814 ; X64-AVX512-NEXT: vextracti128 $1, %ymm0, %xmm1
1815 ; X64-AVX512-NEXT: vpminsb %xmm1, %xmm0, %xmm0
1816 ; X64-AVX512-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0
1817 ; X64-AVX512-NEXT: vpsrlw $8, %xmm0, %xmm1
1818 ; X64-AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0
1819 ; X64-AVX512-NEXT: vphminposuw %xmm0, %xmm0
1820 ; X64-AVX512-NEXT: vpextrb $0, %xmm0, %eax
1821 ; X64-AVX512-NEXT: xorb $-128, %al
1822 ; X64-AVX512-NEXT: ## kill: def $al killed $al killed $eax
1823 ; X64-AVX512-NEXT: vzeroupper
1824 ; X64-AVX512-NEXT: retq
1825 %1 = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1826 %2 = icmp slt <64 x i8> %a0, %1
1827 %3 = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1
1828 %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1829 %5 = icmp slt <64 x i8> %3, %4
1830 %6 = select <64 x i1> %5, <64 x i8> %3, <64 x i8> %4
1831 %7 = shufflevector <64 x i8> %6, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1832 %8 = icmp slt <64 x i8> %6, %7
1833 %9 = select <64 x i1> %8, <64 x i8> %6, <64 x i8> %7
1834 %10 = shufflevector <64 x i8> %9, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1835 %11 = icmp slt <64 x i8> %9, %10
1836 %12 = select <64 x i1> %11, <64 x i8> %9, <64 x i8> %10
1837 %13 = shufflevector <64 x i8> %12, <64 x i8> undef, <64 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1838 %14 = icmp slt <64 x i8> %12, %13
1839 %15 = select <64 x i1> %14, <64 x i8> %12, <64 x i8> %13
1840 %16 = shufflevector <64 x i8> %15, <64 x i8> undef, <64 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1841 %17 = icmp slt <64 x i8> %15, %16
1842 %18 = select <64 x i1> %17, <64 x i8> %15, <64 x i8> %16
1843 %19 = extractelement <64 x i8> %18, i32 0