1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX2
5 define <8 x i32> @zext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
6 ; SSE2-LABEL: zext_and_v8i32:
8 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9 ; SSE2-NEXT: pand %xmm1, %xmm2
10 ; SSE2-NEXT: pxor %xmm1, %xmm1
11 ; SSE2-NEXT: movdqa %xmm2, %xmm0
12 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
14 ; SSE2-NEXT: movdqa %xmm2, %xmm1
17 ; AVX2-LABEL: zext_and_v8i32:
19 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
22 %xz = zext <8 x i16> %x to <8 x i32>
23 %yz = zext <8 x i16> %y to <8 x i32>
24 %r = and <8 x i32> %xz, %yz
28 define <8 x i32> @zext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
29 ; SSE2-LABEL: zext_or_v8i32:
31 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32 ; SSE2-NEXT: por %xmm1, %xmm2
33 ; SSE2-NEXT: pxor %xmm1, %xmm1
34 ; SSE2-NEXT: movdqa %xmm2, %xmm0
35 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
36 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
37 ; SSE2-NEXT: movdqa %xmm2, %xmm1
40 ; AVX2-LABEL: zext_or_v8i32:
42 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
43 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
45 %xz = zext <8 x i16> %x to <8 x i32>
46 %yz = zext <8 x i16> %y to <8 x i32>
47 %r = or <8 x i32> %xz, %yz
51 define <8 x i32> @zext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
52 ; SSE2-LABEL: zext_xor_v8i32:
54 ; SSE2-NEXT: movdqa %xmm0, %xmm2
55 ; SSE2-NEXT: pxor %xmm1, %xmm2
56 ; SSE2-NEXT: pxor %xmm1, %xmm1
57 ; SSE2-NEXT: movdqa %xmm2, %xmm0
58 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
59 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
60 ; SSE2-NEXT: movdqa %xmm2, %xmm1
63 ; AVX2-LABEL: zext_xor_v8i32:
65 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
66 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
68 %xz = zext <8 x i16> %x to <8 x i32>
69 %yz = zext <8 x i16> %y to <8 x i32>
70 %r = xor <8 x i32> %xz, %yz
74 define <8 x i32> @sext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
75 ; SSE2-LABEL: sext_and_v8i32:
77 ; SSE2-NEXT: pand %xmm1, %xmm0
78 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
79 ; SSE2-NEXT: psrad $16, %xmm2
80 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
81 ; SSE2-NEXT: psrad $16, %xmm1
82 ; SSE2-NEXT: movdqa %xmm2, %xmm0
85 ; AVX2-LABEL: sext_and_v8i32:
87 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
88 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
90 %xs = sext <8 x i16> %x to <8 x i32>
91 %ys = sext <8 x i16> %y to <8 x i32>
92 %r = and <8 x i32> %xs, %ys
96 define <8 x i32> @sext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
97 ; SSE2-LABEL: sext_or_v8i32:
99 ; SSE2-NEXT: por %xmm1, %xmm0
100 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
101 ; SSE2-NEXT: psrad $16, %xmm2
102 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
103 ; SSE2-NEXT: psrad $16, %xmm1
104 ; SSE2-NEXT: movdqa %xmm2, %xmm0
107 ; AVX2-LABEL: sext_or_v8i32:
109 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
110 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
112 %xs = sext <8 x i16> %x to <8 x i32>
113 %ys = sext <8 x i16> %y to <8 x i32>
114 %r = or <8 x i32> %xs, %ys
118 define <8 x i32> @sext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
119 ; SSE2-LABEL: sext_xor_v8i32:
121 ; SSE2-NEXT: pxor %xmm1, %xmm0
122 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
123 ; SSE2-NEXT: psrad $16, %xmm2
124 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
125 ; SSE2-NEXT: psrad $16, %xmm1
126 ; SSE2-NEXT: movdqa %xmm2, %xmm0
129 ; AVX2-LABEL: sext_xor_v8i32:
131 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
132 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
134 %xs = sext <8 x i16> %x to <8 x i32>
135 %ys = sext <8 x i16> %y to <8 x i32>
136 %r = xor <8 x i32> %xs, %ys
140 define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
141 ; SSE2-LABEL: zext_and_v8i16:
143 ; SSE2-NEXT: andps %xmm1, %xmm0
144 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
147 ; AVX2-LABEL: zext_and_v8i16:
149 ; AVX2-NEXT: vandps %xmm1, %xmm0, %xmm0
150 ; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
152 %xz = zext <8 x i8> %x to <8 x i16>
153 %yz = zext <8 x i8> %y to <8 x i16>
154 %r = and <8 x i16> %xz, %yz
158 define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
159 ; SSE2-LABEL: zext_or_v8i16:
161 ; SSE2-NEXT: orps %xmm1, %xmm0
162 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
165 ; AVX2-LABEL: zext_or_v8i16:
167 ; AVX2-NEXT: vorps %xmm1, %xmm0, %xmm0
168 ; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
170 %xz = zext <8 x i8> %x to <8 x i16>
171 %yz = zext <8 x i8> %y to <8 x i16>
172 %r = or <8 x i16> %xz, %yz
176 define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
177 ; SSE2-LABEL: zext_xor_v8i16:
179 ; SSE2-NEXT: xorps %xmm1, %xmm0
180 ; SSE2-NEXT: andps {{.*}}(%rip), %xmm0
183 ; AVX2-LABEL: zext_xor_v8i16:
185 ; AVX2-NEXT: vxorps %xmm1, %xmm0, %xmm0
186 ; AVX2-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
188 %xz = zext <8 x i8> %x to <8 x i16>
189 %yz = zext <8 x i8> %y to <8 x i16>
190 %r = xor <8 x i16> %xz, %yz
194 define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
195 ; SSE2-LABEL: sext_and_v8i16:
197 ; SSE2-NEXT: psllw $8, %xmm0
198 ; SSE2-NEXT: psraw $8, %xmm0
199 ; SSE2-NEXT: psllw $8, %xmm1
200 ; SSE2-NEXT: psraw $8, %xmm1
201 ; SSE2-NEXT: pand %xmm1, %xmm0
204 ; AVX2-LABEL: sext_and_v8i16:
206 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm0
207 ; AVX2-NEXT: vpsraw $8, %xmm0, %xmm0
208 ; AVX2-NEXT: vpsllw $8, %xmm1, %xmm1
209 ; AVX2-NEXT: vpsraw $8, %xmm1, %xmm1
210 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
212 %xs = sext <8 x i8> %x to <8 x i16>
213 %ys = sext <8 x i8> %y to <8 x i16>
214 %r = and <8 x i16> %xs, %ys
218 define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
219 ; SSE2-LABEL: sext_or_v8i16:
221 ; SSE2-NEXT: psllw $8, %xmm0
222 ; SSE2-NEXT: psraw $8, %xmm0
223 ; SSE2-NEXT: psllw $8, %xmm1
224 ; SSE2-NEXT: psraw $8, %xmm1
225 ; SSE2-NEXT: por %xmm1, %xmm0
228 ; AVX2-LABEL: sext_or_v8i16:
230 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm0
231 ; AVX2-NEXT: vpsraw $8, %xmm0, %xmm0
232 ; AVX2-NEXT: vpsllw $8, %xmm1, %xmm1
233 ; AVX2-NEXT: vpsraw $8, %xmm1, %xmm1
234 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
236 %xs = sext <8 x i8> %x to <8 x i16>
237 %ys = sext <8 x i8> %y to <8 x i16>
238 %r = or <8 x i16> %xs, %ys
242 define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
243 ; SSE2-LABEL: sext_xor_v8i16:
245 ; SSE2-NEXT: psllw $8, %xmm0
246 ; SSE2-NEXT: psraw $8, %xmm0
247 ; SSE2-NEXT: psllw $8, %xmm1
248 ; SSE2-NEXT: psraw $8, %xmm1
249 ; SSE2-NEXT: pxor %xmm1, %xmm0
252 ; AVX2-LABEL: sext_xor_v8i16:
254 ; AVX2-NEXT: vpsllw $8, %xmm0, %xmm0
255 ; AVX2-NEXT: vpsraw $8, %xmm0, %xmm0
256 ; AVX2-NEXT: vpsllw $8, %xmm1, %xmm1
257 ; AVX2-NEXT: vpsraw $8, %xmm1, %xmm1
258 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
260 %xs = sext <8 x i8> %x to <8 x i16>
261 %ys = sext <8 x i8> %y to <8 x i16>
262 %r = xor <8 x i16> %xs, %ys
266 define <8 x i32> @bool_zext_and(<8 x i1> %x, <8 x i1> %y) {
267 ; SSE2-LABEL: bool_zext_and:
269 ; SSE2-NEXT: movdqa %xmm0, %xmm3
270 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
271 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
272 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
273 ; SSE2-NEXT: pxor %xmm4, %xmm4
274 ; SSE2-NEXT: movdqa %xmm1, %xmm2
275 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
276 ; SSE2-NEXT: pand %xmm3, %xmm2
277 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
278 ; SSE2-NEXT: pand %xmm1, %xmm0
279 ; SSE2-NEXT: movdqa %xmm2, %xmm1
282 ; AVX2-LABEL: bool_zext_and:
284 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
285 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
286 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
287 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
289 %xz = zext <8 x i1> %x to <8 x i32>
290 %yz = zext <8 x i1> %y to <8 x i32>
291 %r = and <8 x i32> %xz, %yz
295 define <8 x i32> @bool_zext_or(<8 x i1> %x, <8 x i1> %y) {
296 ; SSE2-LABEL: bool_zext_or:
298 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
299 ; SSE2-NEXT: pand %xmm2, %xmm0
300 ; SSE2-NEXT: pxor %xmm3, %xmm3
301 ; SSE2-NEXT: movdqa %xmm0, %xmm4
302 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
303 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
304 ; SSE2-NEXT: pand %xmm2, %xmm1
305 ; SSE2-NEXT: movdqa %xmm1, %xmm2
306 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
307 ; SSE2-NEXT: por %xmm4, %xmm2
308 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
309 ; SSE2-NEXT: por %xmm1, %xmm0
310 ; SSE2-NEXT: movdqa %xmm2, %xmm1
313 ; AVX2-LABEL: bool_zext_or:
315 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
316 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
317 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
319 %xz = zext <8 x i1> %x to <8 x i32>
320 %yz = zext <8 x i1> %y to <8 x i32>
321 %r = or <8 x i32> %xz, %yz
325 define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) {
326 ; SSE2-LABEL: bool_zext_xor:
328 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1]
329 ; SSE2-NEXT: pand %xmm2, %xmm0
330 ; SSE2-NEXT: pxor %xmm3, %xmm3
331 ; SSE2-NEXT: movdqa %xmm0, %xmm4
332 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm4 = xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7]
333 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3]
334 ; SSE2-NEXT: pand %xmm2, %xmm1
335 ; SSE2-NEXT: movdqa %xmm1, %xmm2
336 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
337 ; SSE2-NEXT: pxor %xmm4, %xmm2
338 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3]
339 ; SSE2-NEXT: pxor %xmm1, %xmm0
340 ; SSE2-NEXT: movdqa %xmm2, %xmm1
343 ; AVX2-LABEL: bool_zext_xor:
345 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
346 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
347 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
349 %xz = zext <8 x i1> %x to <8 x i32>
350 %yz = zext <8 x i1> %y to <8 x i32>
351 %r = xor <8 x i32> %xz, %yz
355 define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
356 ; SSE2-LABEL: bool_sext_and:
358 ; SSE2-NEXT: movdqa %xmm1, %xmm3
359 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
360 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
361 ; SSE2-NEXT: movdqa %xmm0, %xmm2
362 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
363 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
364 ; SSE2-NEXT: pslld $31, %xmm0
365 ; SSE2-NEXT: psrad $31, %xmm0
366 ; SSE2-NEXT: pslld $31, %xmm2
367 ; SSE2-NEXT: psrad $31, %xmm2
368 ; SSE2-NEXT: pslld $31, %xmm1
369 ; SSE2-NEXT: psrad $31, %xmm1
370 ; SSE2-NEXT: pand %xmm0, %xmm1
371 ; SSE2-NEXT: pslld $31, %xmm3
372 ; SSE2-NEXT: psrad $31, %xmm3
373 ; SSE2-NEXT: pand %xmm3, %xmm2
374 ; SSE2-NEXT: movdqa %xmm2, %xmm0
377 ; AVX2-LABEL: bool_sext_and:
379 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
380 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
381 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
382 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
383 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
384 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
385 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
387 %xs = sext <8 x i1> %x to <8 x i32>
388 %ys = sext <8 x i1> %y to <8 x i32>
389 %r = and <8 x i32> %xs, %ys
393 define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
394 ; SSE2-LABEL: bool_sext_or:
396 ; SSE2-NEXT: movdqa %xmm1, %xmm3
397 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
398 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
399 ; SSE2-NEXT: movdqa %xmm0, %xmm2
400 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
401 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
402 ; SSE2-NEXT: pslld $31, %xmm0
403 ; SSE2-NEXT: psrad $31, %xmm0
404 ; SSE2-NEXT: pslld $31, %xmm2
405 ; SSE2-NEXT: psrad $31, %xmm2
406 ; SSE2-NEXT: pslld $31, %xmm1
407 ; SSE2-NEXT: psrad $31, %xmm1
408 ; SSE2-NEXT: por %xmm0, %xmm1
409 ; SSE2-NEXT: pslld $31, %xmm3
410 ; SSE2-NEXT: psrad $31, %xmm3
411 ; SSE2-NEXT: por %xmm3, %xmm2
412 ; SSE2-NEXT: movdqa %xmm2, %xmm0
415 ; AVX2-LABEL: bool_sext_or:
417 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
418 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
419 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
420 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
421 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
422 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
423 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
425 %xs = sext <8 x i1> %x to <8 x i32>
426 %ys = sext <8 x i1> %y to <8 x i32>
427 %r = or <8 x i32> %xs, %ys
431 define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) {
432 ; SSE2-LABEL: bool_sext_xor:
434 ; SSE2-NEXT: movdqa %xmm1, %xmm3
435 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
436 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
437 ; SSE2-NEXT: movdqa %xmm0, %xmm2
438 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
439 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
440 ; SSE2-NEXT: pslld $31, %xmm0
441 ; SSE2-NEXT: psrad $31, %xmm0
442 ; SSE2-NEXT: pslld $31, %xmm2
443 ; SSE2-NEXT: psrad $31, %xmm2
444 ; SSE2-NEXT: pslld $31, %xmm1
445 ; SSE2-NEXT: psrad $31, %xmm1
446 ; SSE2-NEXT: pxor %xmm0, %xmm1
447 ; SSE2-NEXT: pslld $31, %xmm3
448 ; SSE2-NEXT: psrad $31, %xmm3
449 ; SSE2-NEXT: pxor %xmm3, %xmm2
450 ; SSE2-NEXT: movdqa %xmm2, %xmm0
453 ; AVX2-LABEL: bool_sext_xor:
455 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
456 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
457 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
458 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
459 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
460 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
461 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
463 %xs = sext <8 x i1> %x to <8 x i32>
464 %ys = sext <8 x i1> %y to <8 x i32>
465 %r = xor <8 x i32> %xs, %ys