1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE2
3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX2
5 define <8 x i32> @zext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
6 ; SSE2-LABEL: zext_and_v8i32:
8 ; SSE2-NEXT: movdqa %xmm0, %xmm2
9 ; SSE2-NEXT: pand %xmm1, %xmm2
10 ; SSE2-NEXT: pxor %xmm1, %xmm1
11 ; SSE2-NEXT: movdqa %xmm2, %xmm0
12 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
13 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
14 ; SSE2-NEXT: movdqa %xmm2, %xmm1
17 ; AVX2-LABEL: zext_and_v8i32:
19 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
20 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
22 %xz = zext <8 x i16> %x to <8 x i32>
23 %yz = zext <8 x i16> %y to <8 x i32>
24 %r = and <8 x i32> %xz, %yz
28 define <8 x i32> @zext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
29 ; SSE2-LABEL: zext_or_v8i32:
31 ; SSE2-NEXT: movdqa %xmm0, %xmm2
32 ; SSE2-NEXT: por %xmm1, %xmm2
33 ; SSE2-NEXT: pxor %xmm1, %xmm1
34 ; SSE2-NEXT: movdqa %xmm2, %xmm0
35 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
36 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
37 ; SSE2-NEXT: movdqa %xmm2, %xmm1
40 ; AVX2-LABEL: zext_or_v8i32:
42 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
43 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
45 %xz = zext <8 x i16> %x to <8 x i32>
46 %yz = zext <8 x i16> %y to <8 x i32>
47 %r = or <8 x i32> %xz, %yz
51 define <8 x i32> @zext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
52 ; SSE2-LABEL: zext_xor_v8i32:
54 ; SSE2-NEXT: movdqa %xmm0, %xmm2
55 ; SSE2-NEXT: pxor %xmm1, %xmm2
56 ; SSE2-NEXT: pxor %xmm1, %xmm1
57 ; SSE2-NEXT: movdqa %xmm2, %xmm0
58 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
59 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
60 ; SSE2-NEXT: movdqa %xmm2, %xmm1
63 ; AVX2-LABEL: zext_xor_v8i32:
65 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
66 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
68 %xz = zext <8 x i16> %x to <8 x i32>
69 %yz = zext <8 x i16> %y to <8 x i32>
70 %r = xor <8 x i32> %xz, %yz
74 define <8 x i32> @sext_and_v8i32(<8 x i16> %x, <8 x i16> %y) {
75 ; SSE2-LABEL: sext_and_v8i32:
77 ; SSE2-NEXT: pand %xmm1, %xmm0
78 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
79 ; SSE2-NEXT: psrad $16, %xmm2
80 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
81 ; SSE2-NEXT: psrad $16, %xmm1
82 ; SSE2-NEXT: movdqa %xmm2, %xmm0
85 ; AVX2-LABEL: sext_and_v8i32:
87 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
88 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
90 %xs = sext <8 x i16> %x to <8 x i32>
91 %ys = sext <8 x i16> %y to <8 x i32>
92 %r = and <8 x i32> %xs, %ys
96 define <8 x i32> @sext_or_v8i32(<8 x i16> %x, <8 x i16> %y) {
97 ; SSE2-LABEL: sext_or_v8i32:
99 ; SSE2-NEXT: por %xmm1, %xmm0
100 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
101 ; SSE2-NEXT: psrad $16, %xmm2
102 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
103 ; SSE2-NEXT: psrad $16, %xmm1
104 ; SSE2-NEXT: movdqa %xmm2, %xmm0
107 ; AVX2-LABEL: sext_or_v8i32:
109 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
110 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
112 %xs = sext <8 x i16> %x to <8 x i32>
113 %ys = sext <8 x i16> %y to <8 x i32>
114 %r = or <8 x i32> %xs, %ys
118 define <8 x i32> @sext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) {
119 ; SSE2-LABEL: sext_xor_v8i32:
121 ; SSE2-NEXT: pxor %xmm1, %xmm0
122 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
123 ; SSE2-NEXT: psrad $16, %xmm2
124 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
125 ; SSE2-NEXT: psrad $16, %xmm1
126 ; SSE2-NEXT: movdqa %xmm2, %xmm0
129 ; AVX2-LABEL: sext_xor_v8i32:
131 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
132 ; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0
134 %xs = sext <8 x i16> %x to <8 x i32>
135 %ys = sext <8 x i16> %y to <8 x i32>
136 %r = xor <8 x i32> %xs, %ys
140 define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
141 ; SSE2-LABEL: zext_and_v8i16:
143 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
144 ; SSE2-NEXT: pxor %xmm2, %xmm2
145 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
146 ; SSE2-NEXT: pand %xmm1, %xmm0
149 ; AVX2-LABEL: zext_and_v8i16:
151 ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
152 ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
153 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
155 %xz = zext <8 x i8> %x to <8 x i16>
156 %yz = zext <8 x i8> %y to <8 x i16>
157 %r = and <8 x i16> %xz, %yz
161 define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
162 ; SSE2-LABEL: zext_or_v8i16:
164 ; SSE2-NEXT: pxor %xmm2, %xmm2
165 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
166 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
167 ; SSE2-NEXT: por %xmm1, %xmm0
170 ; AVX2-LABEL: zext_or_v8i16:
172 ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
173 ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
174 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
176 %xz = zext <8 x i8> %x to <8 x i16>
177 %yz = zext <8 x i8> %y to <8 x i16>
178 %r = or <8 x i16> %xz, %yz
182 define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
183 ; SSE2-LABEL: zext_xor_v8i16:
185 ; SSE2-NEXT: pxor %xmm2, %xmm2
186 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
187 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7]
188 ; SSE2-NEXT: pxor %xmm1, %xmm0
191 ; AVX2-LABEL: zext_xor_v8i16:
193 ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
194 ; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
195 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
197 %xz = zext <8 x i8> %x to <8 x i16>
198 %yz = zext <8 x i8> %y to <8 x i16>
199 %r = xor <8 x i16> %xz, %yz
203 define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) {
204 ; SSE2-LABEL: sext_and_v8i16:
206 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
207 ; SSE2-NEXT: psraw $8, %xmm2
208 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
209 ; SSE2-NEXT: psraw $8, %xmm0
210 ; SSE2-NEXT: pand %xmm2, %xmm0
213 ; AVX2-LABEL: sext_and_v8i16:
215 ; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
216 ; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
217 ; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
219 %xs = sext <8 x i8> %x to <8 x i16>
220 %ys = sext <8 x i8> %y to <8 x i16>
221 %r = and <8 x i16> %xs, %ys
225 define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) {
226 ; SSE2-LABEL: sext_or_v8i16:
228 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
229 ; SSE2-NEXT: psraw $8, %xmm2
230 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
231 ; SSE2-NEXT: psraw $8, %xmm0
232 ; SSE2-NEXT: por %xmm2, %xmm0
235 ; AVX2-LABEL: sext_or_v8i16:
237 ; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
238 ; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
239 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
241 %xs = sext <8 x i8> %x to <8 x i16>
242 %ys = sext <8 x i8> %y to <8 x i16>
243 %r = or <8 x i16> %xs, %ys
247 define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) {
248 ; SSE2-LABEL: sext_xor_v8i16:
250 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7]
251 ; SSE2-NEXT: psraw $8, %xmm2
252 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
253 ; SSE2-NEXT: psraw $8, %xmm0
254 ; SSE2-NEXT: pxor %xmm2, %xmm0
257 ; AVX2-LABEL: sext_xor_v8i16:
259 ; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0
260 ; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1
261 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
263 %xs = sext <8 x i8> %x to <8 x i16>
264 %ys = sext <8 x i8> %y to <8 x i16>
265 %r = xor <8 x i16> %xs, %ys
269 define <8 x i32> @bool_zext_and(<8 x i1> %x, <8 x i1> %y) {
270 ; SSE2-LABEL: bool_zext_and:
272 ; SSE2-NEXT: movdqa %xmm0, %xmm3
273 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7]
274 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3]
275 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1
276 ; SSE2-NEXT: pxor %xmm4, %xmm4
277 ; SSE2-NEXT: movdqa %xmm1, %xmm2
278 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7]
279 ; SSE2-NEXT: pand %xmm3, %xmm2
280 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3]
281 ; SSE2-NEXT: pand %xmm1, %xmm0
282 ; SSE2-NEXT: movdqa %xmm2, %xmm1
285 ; AVX2-LABEL: bool_zext_and:
287 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
288 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
289 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
290 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
292 %xz = zext <8 x i1> %x to <8 x i32>
293 %yz = zext <8 x i1> %y to <8 x i32>
294 %r = and <8 x i32> %xz, %yz
298 define <8 x i32> @bool_zext_or(<8 x i1> %x, <8 x i1> %y) {
299 ; SSE2-LABEL: bool_zext_or:
301 ; SSE2-NEXT: movdqa %xmm0, %xmm2
302 ; SSE2-NEXT: por %xmm1, %xmm2
303 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
304 ; SSE2-NEXT: pxor %xmm1, %xmm1
305 ; SSE2-NEXT: movdqa %xmm2, %xmm0
306 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
307 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
308 ; SSE2-NEXT: movdqa %xmm2, %xmm1
311 ; AVX2-LABEL: bool_zext_or:
313 ; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0
314 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
315 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
317 %xz = zext <8 x i1> %x to <8 x i32>
318 %yz = zext <8 x i1> %y to <8 x i32>
319 %r = or <8 x i32> %xz, %yz
323 define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) {
324 ; SSE2-LABEL: bool_zext_xor:
326 ; SSE2-NEXT: movdqa %xmm0, %xmm2
327 ; SSE2-NEXT: pxor %xmm1, %xmm2
328 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm2
329 ; SSE2-NEXT: pxor %xmm1, %xmm1
330 ; SSE2-NEXT: movdqa %xmm2, %xmm0
331 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
332 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7]
333 ; SSE2-NEXT: movdqa %xmm2, %xmm1
336 ; AVX2-LABEL: bool_zext_xor:
338 ; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0
339 ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
340 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
342 %xz = zext <8 x i1> %x to <8 x i32>
343 %yz = zext <8 x i1> %y to <8 x i32>
344 %r = xor <8 x i32> %xz, %yz
348 define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) {
349 ; SSE2-LABEL: bool_sext_and:
351 ; SSE2-NEXT: movdqa %xmm1, %xmm3
352 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
353 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
354 ; SSE2-NEXT: movdqa %xmm0, %xmm2
355 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
356 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
357 ; SSE2-NEXT: pslld $31, %xmm0
358 ; SSE2-NEXT: psrad $31, %xmm0
359 ; SSE2-NEXT: pslld $31, %xmm2
360 ; SSE2-NEXT: psrad $31, %xmm2
361 ; SSE2-NEXT: pslld $31, %xmm1
362 ; SSE2-NEXT: psrad $31, %xmm1
363 ; SSE2-NEXT: pand %xmm0, %xmm1
364 ; SSE2-NEXT: pslld $31, %xmm3
365 ; SSE2-NEXT: psrad $31, %xmm3
366 ; SSE2-NEXT: pand %xmm3, %xmm2
367 ; SSE2-NEXT: movdqa %xmm2, %xmm0
370 ; AVX2-LABEL: bool_sext_and:
372 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
373 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
374 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
375 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
376 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
377 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
378 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
380 %xs = sext <8 x i1> %x to <8 x i32>
381 %ys = sext <8 x i1> %y to <8 x i32>
382 %r = and <8 x i32> %xs, %ys
386 define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) {
387 ; SSE2-LABEL: bool_sext_or:
389 ; SSE2-NEXT: movdqa %xmm1, %xmm3
390 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
391 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
392 ; SSE2-NEXT: movdqa %xmm0, %xmm2
393 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
394 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
395 ; SSE2-NEXT: pslld $31, %xmm0
396 ; SSE2-NEXT: psrad $31, %xmm0
397 ; SSE2-NEXT: pslld $31, %xmm2
398 ; SSE2-NEXT: psrad $31, %xmm2
399 ; SSE2-NEXT: pslld $31, %xmm1
400 ; SSE2-NEXT: psrad $31, %xmm1
401 ; SSE2-NEXT: por %xmm0, %xmm1
402 ; SSE2-NEXT: pslld $31, %xmm3
403 ; SSE2-NEXT: psrad $31, %xmm3
404 ; SSE2-NEXT: por %xmm3, %xmm2
405 ; SSE2-NEXT: movdqa %xmm2, %xmm0
408 ; AVX2-LABEL: bool_sext_or:
410 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
411 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
412 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
413 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
414 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
415 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
416 ; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0
418 %xs = sext <8 x i1> %x to <8 x i32>
419 %ys = sext <8 x i1> %y to <8 x i32>
420 %r = or <8 x i32> %xs, %ys
424 define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) {
425 ; SSE2-LABEL: bool_sext_xor:
427 ; SSE2-NEXT: movdqa %xmm1, %xmm3
428 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3]
429 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7]
430 ; SSE2-NEXT: movdqa %xmm0, %xmm2
431 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3]
432 ; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
433 ; SSE2-NEXT: pslld $31, %xmm0
434 ; SSE2-NEXT: psrad $31, %xmm0
435 ; SSE2-NEXT: pslld $31, %xmm2
436 ; SSE2-NEXT: psrad $31, %xmm2
437 ; SSE2-NEXT: pslld $31, %xmm1
438 ; SSE2-NEXT: psrad $31, %xmm1
439 ; SSE2-NEXT: pxor %xmm0, %xmm1
440 ; SSE2-NEXT: pslld $31, %xmm3
441 ; SSE2-NEXT: psrad $31, %xmm3
442 ; SSE2-NEXT: pxor %xmm3, %xmm2
443 ; SSE2-NEXT: movdqa %xmm2, %xmm0
446 ; AVX2-LABEL: bool_sext_xor:
448 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
449 ; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
450 ; AVX2-NEXT: vpslld $31, %ymm0, %ymm0
451 ; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0
452 ; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
453 ; AVX2-NEXT: vpsrad $31, %ymm1, %ymm1
454 ; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0
456 %xs = sext <8 x i1> %x to <8 x i32>
457 %ys = sext <8 x i1> %y to <8 x i32>
458 %r = xor <8 x i32> %xs, %ys