1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
10 define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
11 ; SSE2-LABEL: vsel_float:
12 ; SSE2: # %bb.0: # %entry
13 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
14 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
17 ; SSSE3-LABEL: vsel_float:
18 ; SSSE3: # %bb.0: # %entry
19 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
20 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
23 ; SSE41-LABEL: vsel_float:
24 ; SSE41: # %bb.0: # %entry
25 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
28 ; AVX-LABEL: vsel_float:
29 ; AVX: # %bb.0: # %entry
30 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
33 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
37 define <4 x float> @vsel_float2(<4 x float> %v1, <4 x float> %v2) {
38 ; SSE2-LABEL: vsel_float2:
39 ; SSE2: # %bb.0: # %entry
40 ; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
41 ; SSE2-NEXT: movaps %xmm1, %xmm0
44 ; SSSE3-LABEL: vsel_float2:
45 ; SSSE3: # %bb.0: # %entry
46 ; SSSE3-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
47 ; SSSE3-NEXT: movaps %xmm1, %xmm0
50 ; SSE41-LABEL: vsel_float2:
51 ; SSE41: # %bb.0: # %entry
52 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
55 ; AVX-LABEL: vsel_float2:
56 ; AVX: # %bb.0: # %entry
57 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
60 %vsel = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %v1, <4 x float> %v2
64 define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
65 ; SSE2-LABEL: vsel_4xi8:
66 ; SSE2: # %bb.0: # %entry
67 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
68 ; SSE2-NEXT: andps %xmm2, %xmm0
69 ; SSE2-NEXT: andnps %xmm1, %xmm2
70 ; SSE2-NEXT: orps %xmm2, %xmm0
73 ; SSSE3-LABEL: vsel_4xi8:
74 ; SSSE3: # %bb.0: # %entry
75 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
76 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,5,6,u,u,u,u,u,u,u,u,u,u,u,u]
79 ; SSE41-LABEL: vsel_4xi8:
80 ; SSE41: # %bb.0: # %entry
81 ; SSE41-NEXT: movdqa %xmm0, %xmm2
82 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = <255,255,0,255,u,u,u,u,u,u,u,u,u,u,u,u>
83 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1
84 ; SSE41-NEXT: movdqa %xmm1, %xmm0
87 ; AVX1-LABEL: vsel_4xi8:
88 ; AVX1: # %bb.0: # %entry
89 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255]
90 ; AVX1-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
93 ; AVX2-LABEL: vsel_4xi8:
94 ; AVX2: # %bb.0: # %entry
95 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,0,255,255,255,0,255,255,255,0,255,255,255,0,255]
96 ; AVX2-NEXT: vpblendvb %xmm2, %xmm0, %xmm1, %xmm0
99 %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
103 define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
104 ; SSE2-LABEL: vsel_4xi16:
105 ; SSE2: # %bb.0: # %entry
106 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535]
107 ; SSE2-NEXT: andps %xmm2, %xmm0
108 ; SSE2-NEXT: andnps %xmm1, %xmm2
109 ; SSE2-NEXT: orps %xmm2, %xmm0
112 ; SSSE3-LABEL: vsel_4xi16:
113 ; SSSE3: # %bb.0: # %entry
114 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,65535,65535,65535,65535,65535]
115 ; SSSE3-NEXT: andps %xmm2, %xmm0
116 ; SSSE3-NEXT: andnps %xmm1, %xmm2
117 ; SSSE3-NEXT: orps %xmm2, %xmm0
120 ; SSE41-LABEL: vsel_4xi16:
121 ; SSE41: # %bb.0: # %entry
122 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
125 ; AVX-LABEL: vsel_4xi16:
126 ; AVX: # %bb.0: # %entry
127 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3,4,5,6,7]
130 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
134 define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
135 ; SSE2-LABEL: vsel_i32:
136 ; SSE2: # %bb.0: # %entry
137 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
138 ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
139 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
142 ; SSSE3-LABEL: vsel_i32:
143 ; SSSE3: # %bb.0: # %entry
144 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3]
145 ; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
146 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
149 ; SSE41-LABEL: vsel_i32:
150 ; SSE41: # %bb.0: # %entry
151 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
154 ; AVX-LABEL: vsel_i32:
155 ; AVX: # %bb.0: # %entry
156 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
159 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
163 define <2 x double> @vsel_double(<2 x double> %v1, <2 x double> %v2) {
164 ; SSE2-LABEL: vsel_double:
165 ; SSE2: # %bb.0: # %entry
166 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
169 ; SSSE3-LABEL: vsel_double:
170 ; SSSE3: # %bb.0: # %entry
171 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
174 ; SSE41-LABEL: vsel_double:
175 ; SSE41: # %bb.0: # %entry
176 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
179 ; AVX-LABEL: vsel_double:
180 ; AVX: # %bb.0: # %entry
181 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
184 %vsel = select <2 x i1> <i1 true, i1 false>, <2 x double> %v1, <2 x double> %v2
185 ret <2 x double> %vsel
188 define <2 x i64> @vsel_i64(<2 x i64> %v1, <2 x i64> %v2) {
189 ; SSE2-LABEL: vsel_i64:
190 ; SSE2: # %bb.0: # %entry
191 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
194 ; SSSE3-LABEL: vsel_i64:
195 ; SSSE3: # %bb.0: # %entry
196 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
199 ; SSE41-LABEL: vsel_i64:
200 ; SSE41: # %bb.0: # %entry
201 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
204 ; AVX-LABEL: vsel_i64:
205 ; AVX: # %bb.0: # %entry
206 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
209 %vsel = select <2 x i1> <i1 true, i1 false>, <2 x i64> %v1, <2 x i64> %v2
213 define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
214 ; SSE2-LABEL: vsel_8xi16:
215 ; SSE2: # %bb.0: # %entry
216 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
217 ; SSE2-NEXT: andps %xmm2, %xmm1
218 ; SSE2-NEXT: andnps %xmm0, %xmm2
219 ; SSE2-NEXT: orps %xmm1, %xmm2
220 ; SSE2-NEXT: movaps %xmm2, %xmm0
223 ; SSSE3-LABEL: vsel_8xi16:
224 ; SSSE3: # %bb.0: # %entry
225 ; SSSE3-NEXT: movaps {{.*#+}} xmm2 = [0,65535,65535,65535,0,65535,65535,65535]
226 ; SSSE3-NEXT: andps %xmm2, %xmm1
227 ; SSSE3-NEXT: andnps %xmm0, %xmm2
228 ; SSSE3-NEXT: orps %xmm1, %xmm2
229 ; SSSE3-NEXT: movaps %xmm2, %xmm0
232 ; SSE41-LABEL: vsel_8xi16:
233 ; SSE41: # %bb.0: # %entry
234 ; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
237 ; AVX-LABEL: vsel_8xi16:
238 ; AVX: # %bb.0: # %entry
239 ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7]
242 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
246 define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
247 ; SSE2-LABEL: vsel_i8:
248 ; SSE2: # %bb.0: # %entry
249 ; SSE2-NEXT: movaps {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
250 ; SSE2-NEXT: andps %xmm2, %xmm1
251 ; SSE2-NEXT: andnps %xmm0, %xmm2
252 ; SSE2-NEXT: orps %xmm1, %xmm2
253 ; SSE2-NEXT: movaps %xmm2, %xmm0
256 ; SSSE3-LABEL: vsel_i8:
257 ; SSSE3: # %bb.0: # %entry
258 ; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[12],zero,zero,zero
259 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = zero,xmm1[1,2,3],zero,xmm1[5,6,7],zero,xmm1[9,10,11],zero,xmm1[13,14,15]
260 ; SSSE3-NEXT: por %xmm1, %xmm0
263 ; SSE41-LABEL: vsel_i8:
264 ; SSE41: # %bb.0: # %entry
265 ; SSE41-NEXT: movdqa %xmm0, %xmm2
266 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
267 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2
268 ; SSE41-NEXT: movdqa %xmm2, %xmm0
271 ; AVX1-LABEL: vsel_i8:
272 ; AVX1: # %bb.0: # %entry
273 ; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
274 ; AVX1-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
277 ; AVX2-LABEL: vsel_i8:
278 ; AVX2: # %bb.0: # %entry
279 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [0,255,255,255,0,255,255,255,0,255,255,255,0,255,255,255]
280 ; AVX2-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
283 %vsel = select <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <16 x i8> %v1, <16 x i8> %v2
290 define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
291 ; SSE2-LABEL: vsel_float8:
292 ; SSE2: # %bb.0: # %entry
293 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
294 ; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
295 ; SSE2-NEXT: movaps %xmm2, %xmm0
296 ; SSE2-NEXT: movaps %xmm3, %xmm1
299 ; SSSE3-LABEL: vsel_float8:
300 ; SSSE3: # %bb.0: # %entry
301 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
302 ; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
303 ; SSSE3-NEXT: movaps %xmm2, %xmm0
304 ; SSSE3-NEXT: movaps %xmm3, %xmm1
307 ; SSE41-LABEL: vsel_float8:
308 ; SSE41: # %bb.0: # %entry
309 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
310 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
313 ; AVX-LABEL: vsel_float8:
314 ; AVX: # %bb.0: # %entry
315 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
318 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
319 ret <8 x float> %vsel
322 define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
323 ; SSE2-LABEL: vsel_i328:
324 ; SSE2: # %bb.0: # %entry
325 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
326 ; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
327 ; SSE2-NEXT: movaps %xmm2, %xmm0
328 ; SSE2-NEXT: movaps %xmm3, %xmm1
331 ; SSSE3-LABEL: vsel_i328:
332 ; SSSE3: # %bb.0: # %entry
333 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
334 ; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm1[0],xmm3[1,2,3]
335 ; SSSE3-NEXT: movaps %xmm2, %xmm0
336 ; SSSE3-NEXT: movaps %xmm3, %xmm1
339 ; SSE41-LABEL: vsel_i328:
340 ; SSE41: # %bb.0: # %entry
341 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
342 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3]
345 ; AVX-LABEL: vsel_i328:
346 ; AVX: # %bb.0: # %entry
347 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
350 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
354 define <8 x double> @vsel_double8(<8 x double> %v1, <8 x double> %v2) {
355 ; SSE2-LABEL: vsel_double8:
356 ; SSE2: # %bb.0: # %entry
357 ; SSE2-NEXT: movaps %xmm7, %xmm3
358 ; SSE2-NEXT: movaps %xmm5, %xmm1
359 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
360 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
363 ; SSSE3-LABEL: vsel_double8:
364 ; SSSE3: # %bb.0: # %entry
365 ; SSSE3-NEXT: movaps %xmm7, %xmm3
366 ; SSSE3-NEXT: movaps %xmm5, %xmm1
367 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
368 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
371 ; SSE41-LABEL: vsel_double8:
372 ; SSE41: # %bb.0: # %entry
373 ; SSE41-NEXT: movaps %xmm7, %xmm3
374 ; SSE41-NEXT: movaps %xmm5, %xmm1
375 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
376 ; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
379 ; AVX-LABEL: vsel_double8:
380 ; AVX: # %bb.0: # %entry
381 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
382 ; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
385 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x double> %v1, <8 x double> %v2
386 ret <8 x double> %vsel
389 define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
390 ; SSE2-LABEL: vsel_i648:
391 ; SSE2: # %bb.0: # %entry
392 ; SSE2-NEXT: movaps %xmm7, %xmm3
393 ; SSE2-NEXT: movaps %xmm5, %xmm1
394 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
395 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
398 ; SSSE3-LABEL: vsel_i648:
399 ; SSSE3: # %bb.0: # %entry
400 ; SSSE3-NEXT: movaps %xmm7, %xmm3
401 ; SSSE3-NEXT: movaps %xmm5, %xmm1
402 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
403 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
406 ; SSE41-LABEL: vsel_i648:
407 ; SSE41: # %bb.0: # %entry
408 ; SSE41-NEXT: movaps %xmm7, %xmm3
409 ; SSE41-NEXT: movaps %xmm5, %xmm1
410 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm4[2,3]
411 ; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0,1],xmm6[2,3]
414 ; AVX-LABEL: vsel_i648:
415 ; AVX: # %bb.0: # %entry
416 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5,6,7]
417 ; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm3[2,3,4,5,6,7]
420 %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i64> %v1, <8 x i64> %v2
424 define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
425 ; SSE2-LABEL: vsel_double4:
426 ; SSE2: # %bb.0: # %entry
427 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
428 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
431 ; SSSE3-LABEL: vsel_double4:
432 ; SSSE3: # %bb.0: # %entry
433 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
434 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
437 ; SSE41-LABEL: vsel_double4:
438 ; SSE41: # %bb.0: # %entry
439 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
440 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
443 ; AVX-LABEL: vsel_double4:
444 ; AVX: # %bb.0: # %entry
445 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
448 %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
449 ret <4 x double> %vsel
452 define <2 x double> @testa(<2 x double> %x, <2 x double> %y) {
454 ; SSE2: # %bb.0: # %entry
455 ; SSE2-NEXT: movapd %xmm1, %xmm2
456 ; SSE2-NEXT: cmplepd %xmm0, %xmm2
457 ; SSE2-NEXT: andpd %xmm2, %xmm0
458 ; SSE2-NEXT: andnpd %xmm1, %xmm2
459 ; SSE2-NEXT: orpd %xmm2, %xmm0
462 ; SSSE3-LABEL: testa:
463 ; SSSE3: # %bb.0: # %entry
464 ; SSSE3-NEXT: movapd %xmm1, %xmm2
465 ; SSSE3-NEXT: cmplepd %xmm0, %xmm2
466 ; SSSE3-NEXT: andpd %xmm2, %xmm0
467 ; SSSE3-NEXT: andnpd %xmm1, %xmm2
468 ; SSSE3-NEXT: orpd %xmm2, %xmm0
471 ; SSE41-LABEL: testa:
472 ; SSE41: # %bb.0: # %entry
473 ; SSE41-NEXT: movapd %xmm0, %xmm2
474 ; SSE41-NEXT: movapd %xmm1, %xmm0
475 ; SSE41-NEXT: cmplepd %xmm2, %xmm0
476 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
477 ; SSE41-NEXT: movapd %xmm1, %xmm0
481 ; AVX: # %bb.0: # %entry
482 ; AVX-NEXT: vcmplepd %xmm0, %xmm1, %xmm2
483 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
486 %max_is_x = fcmp oge <2 x double> %x, %y
487 %max = select <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
488 ret <2 x double> %max
491 define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
493 ; SSE2: # %bb.0: # %entry
494 ; SSE2-NEXT: movapd %xmm1, %xmm2
495 ; SSE2-NEXT: cmpnlepd %xmm0, %xmm2
496 ; SSE2-NEXT: andpd %xmm2, %xmm0
497 ; SSE2-NEXT: andnpd %xmm1, %xmm2
498 ; SSE2-NEXT: orpd %xmm2, %xmm0
501 ; SSSE3-LABEL: testb:
502 ; SSSE3: # %bb.0: # %entry
503 ; SSSE3-NEXT: movapd %xmm1, %xmm2
504 ; SSSE3-NEXT: cmpnlepd %xmm0, %xmm2
505 ; SSSE3-NEXT: andpd %xmm2, %xmm0
506 ; SSSE3-NEXT: andnpd %xmm1, %xmm2
507 ; SSSE3-NEXT: orpd %xmm2, %xmm0
510 ; SSE41-LABEL: testb:
511 ; SSE41: # %bb.0: # %entry
512 ; SSE41-NEXT: movapd %xmm0, %xmm2
513 ; SSE41-NEXT: movapd %xmm1, %xmm0
514 ; SSE41-NEXT: cmpnlepd %xmm2, %xmm0
515 ; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1
516 ; SSE41-NEXT: movapd %xmm1, %xmm0
520 ; AVX: # %bb.0: # %entry
521 ; AVX-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm2
522 ; AVX-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0
525 %min_is_x = fcmp ult <2 x double> %x, %y
526 %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
527 ret <2 x double> %min
530 ; If we can figure out a blend has a constant mask, we should emit the
531 ; blend instruction with an immediate mask
532 define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
533 ; SSE2-LABEL: constant_blendvpd_avx:
534 ; SSE2: # %bb.0: # %entry
535 ; SSE2-NEXT: movaps %xmm2, %xmm0
536 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
539 ; SSSE3-LABEL: constant_blendvpd_avx:
540 ; SSSE3: # %bb.0: # %entry
541 ; SSSE3-NEXT: movaps %xmm2, %xmm0
542 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
545 ; SSE41-LABEL: constant_blendvpd_avx:
546 ; SSE41: # %bb.0: # %entry
547 ; SSE41-NEXT: movaps %xmm2, %xmm0
548 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm3[2,3]
551 ; AVX-LABEL: constant_blendvpd_avx:
552 ; AVX: # %bb.0: # %entry
553 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5],ymm1[6,7]
556 %select = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
557 ret <4 x double> %select
560 define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
561 ; SSE2-LABEL: constant_blendvps_avx:
562 ; SSE2: # %bb.0: # %entry
563 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
564 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
565 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
566 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
567 ; SSE2-NEXT: movaps %xmm2, %xmm0
568 ; SSE2-NEXT: movaps %xmm3, %xmm1
571 ; SSSE3-LABEL: constant_blendvps_avx:
572 ; SSSE3: # %bb.0: # %entry
573 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,0],xmm2[2,0]
574 ; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,1],xmm0[2,0]
575 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,0],xmm3[2,0]
576 ; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[2,0]
577 ; SSSE3-NEXT: movaps %xmm2, %xmm0
578 ; SSSE3-NEXT: movaps %xmm3, %xmm1
581 ; SSE41-LABEL: constant_blendvps_avx:
582 ; SSE41: # %bb.0: # %entry
583 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[3]
584 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm3[0,1,2],xmm1[3]
587 ; AVX-LABEL: constant_blendvps_avx:
588 ; AVX: # %bb.0: # %entry
589 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3],ymm1[4,5,6],ymm0[7]
592 %select = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
593 ret <8 x float> %select
596 define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
597 ; SSE2-LABEL: constant_pblendvb_avx2:
598 ; SSE2: # %bb.0: # %entry
599 ; SSE2-NEXT: movaps {{.*#+}} xmm4 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
600 ; SSE2-NEXT: movaps %xmm4, %xmm5
601 ; SSE2-NEXT: andnps %xmm0, %xmm5
602 ; SSE2-NEXT: andps %xmm4, %xmm2
603 ; SSE2-NEXT: orps %xmm2, %xmm5
604 ; SSE2-NEXT: andps %xmm4, %xmm3
605 ; SSE2-NEXT: andnps %xmm1, %xmm4
606 ; SSE2-NEXT: orps %xmm3, %xmm4
607 ; SSE2-NEXT: movaps %xmm5, %xmm0
608 ; SSE2-NEXT: movaps %xmm4, %xmm1
611 ; SSSE3-LABEL: constant_pblendvb_avx2:
612 ; SSSE3: # %bb.0: # %entry
613 ; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [128,128,2,128,4,5,6,128,128,128,10,128,12,13,14,128]
614 ; SSSE3-NEXT: pshufb %xmm4, %xmm0
615 ; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [0,1,128,3,128,128,128,7,8,9,128,11,128,128,128,15]
616 ; SSSE3-NEXT: pshufb %xmm5, %xmm2
617 ; SSSE3-NEXT: por %xmm2, %xmm0
618 ; SSSE3-NEXT: pshufb %xmm4, %xmm1
619 ; SSSE3-NEXT: pshufb %xmm5, %xmm3
620 ; SSSE3-NEXT: por %xmm3, %xmm1
623 ; SSE41-LABEL: constant_pblendvb_avx2:
624 ; SSE41: # %bb.0: # %entry
625 ; SSE41-NEXT: movdqa %xmm0, %xmm4
626 ; SSE41-NEXT: movaps {{.*#+}} xmm0 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
627 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm4
628 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm1
629 ; SSE41-NEXT: movdqa %xmm4, %xmm0
632 ; AVX1-LABEL: constant_pblendvb_avx2:
633 ; AVX1: # %bb.0: # %entry
634 ; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
635 ; AVX1-NEXT: vandnps %ymm0, %ymm2, %ymm0
636 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
637 ; AVX1-NEXT: vorps %ymm0, %ymm1, %ymm0
640 ; AVX2-LABEL: constant_pblendvb_avx2:
641 ; AVX2: # %bb.0: # %entry
642 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255,255,255,0,255,0,0,0,255]
643 ; AVX2-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
646 %select = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
647 ret <32 x i8> %select
650 declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
651 declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
653 ;; 4 tests for shufflevectors that optimize to blend + immediate
654 define <4 x float> @blend_shufflevector_4xfloat(<4 x float> %a, <4 x float> %b) {
655 ; SSE2-LABEL: blend_shufflevector_4xfloat:
656 ; SSE2: # %bb.0: # %entry
657 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
658 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
661 ; SSSE3-LABEL: blend_shufflevector_4xfloat:
662 ; SSSE3: # %bb.0: # %entry
663 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[1,3]
664 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,1,3]
667 ; SSE41-LABEL: blend_shufflevector_4xfloat:
668 ; SSE41: # %bb.0: # %entry
669 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
672 ; AVX-LABEL: blend_shufflevector_4xfloat:
673 ; AVX: # %bb.0: # %entry
674 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
677 %select = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
678 ret <4 x float> %select
681 define <8 x float> @blend_shufflevector_8xfloat(<8 x float> %a, <8 x float> %b) {
682 ; SSE2-LABEL: blend_shufflevector_8xfloat:
683 ; SSE2: # %bb.0: # %entry
684 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
685 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
686 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
687 ; SSE2-NEXT: movaps %xmm2, %xmm0
688 ; SSE2-NEXT: movaps %xmm3, %xmm1
691 ; SSSE3-LABEL: blend_shufflevector_8xfloat:
692 ; SSSE3: # %bb.0: # %entry
693 ; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3]
694 ; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm3[3,0]
695 ; SSSE3-NEXT: shufps {{.*#+}} xmm3 = xmm3[0,1],xmm1[0,2]
696 ; SSSE3-NEXT: movaps %xmm2, %xmm0
697 ; SSSE3-NEXT: movaps %xmm3, %xmm1
700 ; SSE41-LABEL: blend_shufflevector_8xfloat:
701 ; SSE41: # %bb.0: # %entry
702 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
703 ; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm3[0,1],xmm1[2],xmm3[3]
706 ; AVX-LABEL: blend_shufflevector_8xfloat:
707 ; AVX: # %bb.0: # %entry
708 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5],ymm0[6],ymm1[7]
711 %select = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 6, i32 15>
712 ret <8 x float> %select
715 define <4 x double> @blend_shufflevector_4xdouble(<4 x double> %a, <4 x double> %b) {
716 ; SSE2-LABEL: blend_shufflevector_4xdouble:
717 ; SSE2: # %bb.0: # %entry
718 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
721 ; SSSE3-LABEL: blend_shufflevector_4xdouble:
722 ; SSSE3: # %bb.0: # %entry
723 ; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
726 ; SSE41-LABEL: blend_shufflevector_4xdouble:
727 ; SSE41: # %bb.0: # %entry
728 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3]
731 ; AVX-LABEL: blend_shufflevector_4xdouble:
732 ; AVX: # %bb.0: # %entry
733 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
736 %select = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
737 ret <4 x double> %select
740 define <4 x i64> @blend_shufflevector_4xi64(<4 x i64> %a, <4 x i64> %b) {
741 ; SSE2-LABEL: blend_shufflevector_4xi64:
742 ; SSE2: # %bb.0: # %entry
743 ; SSE2-NEXT: movaps %xmm3, %xmm1
744 ; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
747 ; SSSE3-LABEL: blend_shufflevector_4xi64:
748 ; SSSE3: # %bb.0: # %entry
749 ; SSSE3-NEXT: movaps %xmm3, %xmm1
750 ; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1]
753 ; SSE41-LABEL: blend_shufflevector_4xi64:
754 ; SSE41: # %bb.0: # %entry
755 ; SSE41-NEXT: movaps %xmm3, %xmm1
756 ; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3]
759 ; AVX-LABEL: blend_shufflevector_4xi64:
760 ; AVX: # %bb.0: # %entry
761 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5,6,7]
764 %select = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 7>
765 ret <4 x i64> %select
768 define <4 x i32> @blend_logic_v4i32(<4 x i32> %b, <4 x i32> %a, <4 x i32> %c) {
769 ; SSE2-LABEL: blend_logic_v4i32:
770 ; SSE2: # %bb.0: # %entry
771 ; SSE2-NEXT: psrad $31, %xmm0
772 ; SSE2-NEXT: pand %xmm0, %xmm1
773 ; SSE2-NEXT: pandn %xmm2, %xmm0
774 ; SSE2-NEXT: por %xmm1, %xmm0
777 ; SSSE3-LABEL: blend_logic_v4i32:
778 ; SSSE3: # %bb.0: # %entry
779 ; SSSE3-NEXT: psrad $31, %xmm0
780 ; SSSE3-NEXT: pand %xmm0, %xmm1
781 ; SSSE3-NEXT: pandn %xmm2, %xmm0
782 ; SSSE3-NEXT: por %xmm1, %xmm0
785 ; SSE41-LABEL: blend_logic_v4i32:
786 ; SSE41: # %bb.0: # %entry
787 ; SSE41-NEXT: psrad $31, %xmm0
788 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2
789 ; SSE41-NEXT: movdqa %xmm2, %xmm0
792 ; AVX-LABEL: blend_logic_v4i32:
793 ; AVX: # %bb.0: # %entry
794 ; AVX-NEXT: vpsrad $31, %xmm0, %xmm0
795 ; AVX-NEXT: vpblendvb %xmm0, %xmm1, %xmm2, %xmm0
798 %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
799 %sub = sub nsw <4 x i32> zeroinitializer, %a
800 %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
801 %1 = and <4 x i32> %c, %0
802 %2 = and <4 x i32> %a, %b.lobit
803 %cond = or <4 x i32> %1, %2
807 define <8 x i32> @blend_logic_v8i32(<8 x i32> %b, <8 x i32> %a, <8 x i32> %c) {
808 ; SSE2-LABEL: blend_logic_v8i32:
809 ; SSE2: # %bb.0: # %entry
810 ; SSE2-NEXT: psrad $31, %xmm0
811 ; SSE2-NEXT: psrad $31, %xmm1
812 ; SSE2-NEXT: pand %xmm1, %xmm3
813 ; SSE2-NEXT: pandn %xmm5, %xmm1
814 ; SSE2-NEXT: por %xmm3, %xmm1
815 ; SSE2-NEXT: pand %xmm0, %xmm2
816 ; SSE2-NEXT: pandn %xmm4, %xmm0
817 ; SSE2-NEXT: por %xmm2, %xmm0
820 ; SSSE3-LABEL: blend_logic_v8i32:
821 ; SSSE3: # %bb.0: # %entry
822 ; SSSE3-NEXT: psrad $31, %xmm0
823 ; SSSE3-NEXT: psrad $31, %xmm1
824 ; SSSE3-NEXT: pand %xmm1, %xmm3
825 ; SSSE3-NEXT: pandn %xmm5, %xmm1
826 ; SSSE3-NEXT: por %xmm3, %xmm1
827 ; SSSE3-NEXT: pand %xmm0, %xmm2
828 ; SSSE3-NEXT: pandn %xmm4, %xmm0
829 ; SSSE3-NEXT: por %xmm2, %xmm0
832 ; SSE41-LABEL: blend_logic_v8i32:
833 ; SSE41: # %bb.0: # %entry
834 ; SSE41-NEXT: psrad $31, %xmm1
835 ; SSE41-NEXT: psrad $31, %xmm0
836 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm4
837 ; SSE41-NEXT: movdqa %xmm1, %xmm0
838 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm5
839 ; SSE41-NEXT: movdqa %xmm4, %xmm0
840 ; SSE41-NEXT: movdqa %xmm5, %xmm1
843 ; AVX1-LABEL: blend_logic_v8i32:
844 ; AVX1: # %bb.0: # %entry
845 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm3
846 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
847 ; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0
848 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0
849 ; AVX1-NEXT: vandnps %ymm2, %ymm0, %ymm2
850 ; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm0
851 ; AVX1-NEXT: vorps %ymm0, %ymm2, %ymm0
854 ; AVX2-LABEL: blend_logic_v8i32:
855 ; AVX2: # %bb.0: # %entry
856 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
857 ; AVX2-NEXT: vpblendvb %ymm0, %ymm1, %ymm2, %ymm0
860 %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
861 %sub = sub nsw <8 x i32> zeroinitializer, %a
862 %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
863 %1 = and <8 x i32> %c, %0
864 %2 = and <8 x i32> %a, %b.lobit
865 %cond = or <8 x i32> %1, %2
869 define <4 x i32> @blend_neg_logic_v4i32(<4 x i32> %a, <4 x i32> %b) {
870 ; SSE-LABEL: blend_neg_logic_v4i32:
871 ; SSE: # %bb.0: # %entry
872 ; SSE-NEXT: psrad $31, %xmm1
873 ; SSE-NEXT: pxor %xmm1, %xmm0
874 ; SSE-NEXT: psubd %xmm1, %xmm0
877 ; AVX-LABEL: blend_neg_logic_v4i32:
878 ; AVX: # %bb.0: # %entry
879 ; AVX-NEXT: vpsrad $31, %xmm1, %xmm1
880 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
881 ; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0
884 %b.lobit = ashr <4 x i32> %b, <i32 31, i32 31, i32 31, i32 31>
885 %sub = sub nsw <4 x i32> zeroinitializer, %a
886 %0 = xor <4 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1>
887 %1 = and <4 x i32> %a, %0
888 %2 = and <4 x i32> %b.lobit, %sub
889 %cond = or <4 x i32> %1, %2
893 define <8 x i32> @blend_neg_logic_v8i32(<8 x i32> %a, <8 x i32> %b) {
894 ; SSE-LABEL: blend_neg_logic_v8i32:
895 ; SSE: # %bb.0: # %entry
896 ; SSE-NEXT: psrad $31, %xmm3
897 ; SSE-NEXT: psrad $31, %xmm2
898 ; SSE-NEXT: pxor %xmm2, %xmm0
899 ; SSE-NEXT: psubd %xmm2, %xmm0
900 ; SSE-NEXT: pxor %xmm3, %xmm1
901 ; SSE-NEXT: psubd %xmm3, %xmm1
904 ; AVX1-LABEL: blend_neg_logic_v8i32:
905 ; AVX1: # %bb.0: # %entry
906 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm2
907 ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
908 ; AVX1-NEXT: vpsrad $31, %xmm1, %xmm1
909 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
910 ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
911 ; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
912 ; AVX1-NEXT: vpsubd %xmm2, %xmm3, %xmm2
913 ; AVX1-NEXT: vpsubd %xmm0, %xmm3, %xmm3
914 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2
915 ; AVX1-NEXT: vandnps %ymm0, %ymm1, %ymm0
916 ; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
917 ; AVX1-NEXT: vorps %ymm1, %ymm0, %ymm0
920 ; AVX2-LABEL: blend_neg_logic_v8i32:
921 ; AVX2: # %bb.0: # %entry
922 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
923 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
924 ; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm0
927 %b.lobit = ashr <8 x i32> %b, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
928 %sub = sub nsw <8 x i32> zeroinitializer, %a
929 %0 = xor <8 x i32> %b.lobit, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
930 %1 = and <8 x i32> %a, %0
931 %2 = and <8 x i32> %b.lobit, %sub
932 %cond = or <8 x i32> %1, %2
936 define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) {
937 ; SSE-LABEL: blend_neg_logic_v4i32_2:
938 ; SSE: # %bb.0: # %entry
939 ; SSE-NEXT: psrad $31, %xmm1
940 ; SSE-NEXT: pxor %xmm1, %xmm0
941 ; SSE-NEXT: psubd %xmm0, %xmm1
942 ; SSE-NEXT: movdqa %xmm1, %xmm0
945 ; AVX-LABEL: blend_neg_logic_v4i32_2:
946 ; AVX: # %bb.0: # %entry
947 ; AVX-NEXT: vpsrad $31, %xmm1, %xmm1
948 ; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
949 ; AVX-NEXT: vpsubd %xmm0, %xmm1, %xmm0
952 %0 = ashr <4 x i32> %c, <i32 31, i32 31, i32 31, i32 31>
953 %1 = trunc <4 x i32> %0 to <4 x i1>
954 %2 = sub nsw <4 x i32> zeroinitializer, %v
955 %3 = select <4 x i1> %1, <4 x i32> %v, <4 x i32> %2