1 ; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with
4 ; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
6 declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
7 declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
8 declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
10 ; PACKS_CC (operand elements are 0): i64 -> i32
11 define <4 x i32> @f0() {
13 ; CHECK-LABEL: # %bb.0:
14 ; CHECK-NEXT: vgbm %v24, 0
16 %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
17 %extr = extractvalue {<4 x i32>, i32} %call, 0
18 %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
22 ; PACKS_CC (operand elements are 1): i64 -> i32
23 ; NOTE: The vector AND is optimized away, but vrepig+vpksgs is used instead
24 ; of vrepif. Similarly for more test cases below.
25 define <4 x i32> @f1() {
27 ; CHECK-LABEL: # %bb.0:
28 ; CHECK-NEXT: vrepig %v0, 1
29 ; CHECK-NEXT: vpksgs %v24, %v0, %v0
31 %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
32 %extr = extractvalue {<4 x i32>, i32} %call, 0
33 %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
37 ; PACKS_CC (operand elements are 0): i32 -> i16
38 define <8 x i16> @f2() {
40 ; CHECK-LABEL: # %bb.0:
41 ; CHECK-NEXT: vgbm %v24, 0
43 %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
44 <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
45 %extr = extractvalue {<8 x i16>, i32} %call, 0
46 %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
50 ; PACKS_CC (operand elements are 1): i32 -> i16
51 define <8 x i16> @f3() {
53 ; CHECK-LABEL: # %bb.0:
54 ; CHECK-NEXT: vrepif %v0, 1
55 ; CHECK-NEXT: vpksfs %v24, %v0, %v0
57 %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
58 <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
59 %extr = extractvalue {<8 x i16>, i32} %call, 0
60 %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
64 ; PACKS_CC (operand elements are 0): i16 -> i8
65 define <16 x i8> @f4() {
67 ; CHECK-LABEL: # %bb.0:
68 ; CHECK-NEXT: vgbm %v24, 0
70 %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
71 <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
72 <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
73 %extr = extractvalue {<16 x i8>, i32} %call, 0
74 %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
75 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
79 ; PACKS_CC (operand elements are 1): i16 -> i8
80 define <16 x i8> @f5() {
82 ; CHECK-LABEL: # %bb.0:
83 ; CHECK-NEXT: vrepih %v0, 1
84 ; CHECK-NEXT: vpkshs %v24, %v0, %v0
86 %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
87 <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
88 <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
89 %extr = extractvalue {<16 x i8>, i32} %call, 0
90 %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
91 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
95 declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
96 declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
97 declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
99 ; PACKLS_CC (operand elements are 0): i64 -> i32
100 define <4 x i32> @f6() {
102 ; CHECK-LABEL: # %bb.0:
103 ; CHECK-NEXT: vgbm %v24, 0
104 ; CHECK-NEXT: br %r14
105 %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
106 %extr = extractvalue {<4 x i32>, i32} %call, 0
107 %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
111 ; PACKLS_CC (operand elements are 1): i64 -> i32
112 define <4 x i32> @f7() {
114 ; CHECK-LABEL: # %bb.0:
115 ; CHECK-NEXT: vrepig %v0, 1
116 ; CHECK-NEXT: vpklsgs %v24, %v0, %v0
117 ; CHECK-NEXT: br %r14
118 %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
119 %extr = extractvalue {<4 x i32>, i32} %call, 0
120 %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
124 ; PACKLS_CC (operand elements are 0): i32 -> i16
125 define <8 x i16> @f8() {
127 ; CHECK-LABEL: # %bb.0:
128 ; CHECK-NEXT: vgbm %v24, 0
129 ; CHECK-NEXT: br %r14
130 %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
131 <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
132 %extr = extractvalue {<8 x i16>, i32} %call, 0
133 %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
137 ; PACKLS_CC (operand elements are 1): i32 -> i16
138 define <8 x i16> @f9() {
140 ; CHECK-LABEL: # %bb.0:
141 ; CHECK-NEXT: vrepif %v0, 1
142 ; CHECK-NEXT: vpklsfs %v24, %v0, %v0
143 ; CHECK-NEXT: br %r14
144 %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
145 <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
146 %extr = extractvalue {<8 x i16>, i32} %call, 0
147 %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
151 ; PACKLS_CC (operand elements are 0): i16 -> i8
152 define <16 x i8> @f10() {
154 ; CHECK-LABEL: # %bb.0:
155 ; CHECK-NEXT: vgbm %v24, 0
156 ; CHECK-NEXT: br %r14
157 %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
158 <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
159 <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
160 %extr = extractvalue {<16 x i8>, i32} %call, 0
161 %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
162 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
166 ; PACKLS_CC (operand elements are 1): i16 -> i8
167 define <16 x i8> @f11() {
169 ; CHECK-LABEL: # %bb.0:
170 ; CHECK-NEXT: vrepih %v0, 1
171 ; CHECK-NEXT: vpklshs %v24, %v0, %v0
172 ; CHECK-NEXT: br %r14
173 %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
174 <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
175 <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
176 %extr = extractvalue {<16 x i8>, i32} %call, 0
177 %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
178 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
182 declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
183 declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
184 declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
186 ; PACKS (operand elements are 0): i64 -> i32
187 define <4 x i32> @f12() {
189 ; CHECK-LABEL: # %bb.0:
190 ; CHECK-NEXT: vgbm %v24, 0
191 ; CHECK-NEXT: br %r14
192 %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
193 %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
197 ; PACKS (operand elements are 1): i64 -> i32
198 define <4 x i32> @f13() {
200 ; CHECK-LABEL: # %bb.0:
201 ; CHECK-NEXT: vrepig %v0, 1
202 ; CHECK-NEXT: vpksg %v24, %v0, %v0
203 ; CHECK-NEXT: br %r14
204 %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
205 %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
209 ; PACKS (operand elements are 0): i32 -> i16
210 define <8 x i16> @f14() {
212 ; CHECK-LABEL: # %bb.0:
213 ; CHECK-NEXT: vgbm %v24, 0
214 ; CHECK-NEXT: br %r14
215 %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
216 <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
217 %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
221 ; PACKS (operand elements are 1): i32 -> i16
222 define <8 x i16> @f15() {
224 ; CHECK-LABEL: # %bb.0:
225 ; CHECK-NEXT: vrepif %v0, 1
226 ; CHECK-NEXT: vpksf %v24, %v0, %v0
227 ; CHECK-NEXT: br %r14
228 %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
229 <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
230 %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
234 ; PACKS (operand elements are 0): i16 -> i8
235 define <16 x i8> @f16() {
237 ; CHECK-LABEL: # %bb.0:
238 ; CHECK-NEXT: vgbm %v24, 0
239 ; CHECK-NEXT: br %r14
240 %call = call <16 x i8> @llvm.s390.vpksh(
241 <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
242 <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
243 %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
244 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
248 ; PACKS (operand elements are 1): i16 -> i8
249 define <16 x i8> @f17() {
251 ; CHECK-LABEL: # %bb.0:
252 ; CHECK-NEXT: vrepih %v0, 1
253 ; CHECK-NEXT: vpksh %v24, %v0, %v0
254 ; CHECK-NEXT: br %r14
255 %call = call <16 x i8> @llvm.s390.vpksh(
256 <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
257 <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
258 %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
259 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
263 declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
264 declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
265 declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
267 ; PACKLS (operand elements are 0): i64 -> i32
268 define <4 x i32> @f18() {
270 ; CHECK-LABEL: # %bb.0:
271 ; CHECK-NEXT: vgbm %v24, 0
272 ; CHECK-NEXT: br %r14
273 %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
274 %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
278 ; PACKLS (operand elements are 1): i64 -> i32
279 define <4 x i32> @f19() {
281 ; CHECK-LABEL: # %bb.0:
282 ; CHECK-NEXT: vrepig %v0, 1
283 ; CHECK-NEXT: vpklsg %v24, %v0, %v0
284 ; CHECK-NEXT: br %r14
285 %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
286 %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
290 ; PACKLS (operand elements are 0): i32 -> i16
291 define <8 x i16> @f20() {
293 ; CHECK-LABEL: # %bb.0:
294 ; CHECK-NEXT: vgbm %v24, 0
295 ; CHECK-NEXT: br %r14
296 %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
297 <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
298 %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
302 ; PACKLS (operand elements are 1): i32 -> i16
303 define <8 x i16> @f21() {
305 ; CHECK-LABEL: # %bb.0:
306 ; CHECK-NEXT: vrepif %v0, 1
307 ; CHECK-NEXT: vpklsf %v24, %v0, %v0
308 ; CHECK-NEXT: br %r14
309 %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
310 <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
311 %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
315 ; PACKLS (operand elements are 0): i16 -> i8
316 define <16 x i8> @f22() {
318 ; CHECK-LABEL: # %bb.0:
319 ; CHECK-NEXT: vgbm %v24, 0
320 ; CHECK-NEXT: br %r14
321 %call = call <16 x i8> @llvm.s390.vpklsh(
322 <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
323 <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
324 %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
325 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
329 ; PACKLS (operand elements are 1): i16 -> i8
330 define <16 x i8> @f23() {
332 ; CHECK-LABEL: # %bb.0:
333 ; CHECK-NEXT: vrepih %v0, 1
334 ; CHECK-NEXT: vpklsh %v24, %v0, %v0
335 ; CHECK-NEXT: br %r14
336 %call = call <16 x i8> @llvm.s390.vpklsh(
337 <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
338 <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
339 %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
340 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
344 declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
346 ; VPDI (operand elements are 0):
347 define <2 x i64> @f24() {
349 ; CHECK-LABEL: # %bb.0:
350 ; CHECK-NEXT: vgbm %v24, 0
351 ; CHECK-NEXT: br %r14
352 %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 0>,
353 <2 x i64> <i64 0, i64 0>, i32 0)
354 %res = and <2 x i64> %perm, <i64 1, i64 1>
358 ; VPDI (operand elements are 1):
359 define <2 x i64> @f25() {
361 ; CHECK-LABEL: # %bb.0:
362 ; CHECK-NEXT: vrepig %v0, 1
363 ; CHECK-NEXT: vpdi %v24, %v0, %v0, 0
364 ; CHECK-NEXT: br %r14
365 %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 1, i64 1>,
366 <2 x i64> <i64 1, i64 1>, i32 0)
367 %res = and <2 x i64> %perm, <i64 1, i64 1>
371 declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
373 ; VSLDB (operand elements are 0):
374 define <16 x i8> @f26() {
376 ; CHECK-LABEL: # %bb.0:
377 ; CHECK-NEXT: vgbm %v24, 0
378 ; CHECK-NEXT: br %r14
379 %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
380 <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
381 i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>
382 <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
383 i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
386 %res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
387 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
391 ; VSLDB (operand elements are 1):
392 define <16 x i8> @f27() {
394 ; CHECK-LABEL: # %bb.0:
395 ; CHECK-NEXT: vrepib %v0, 1
396 ; CHECK-NEXT: vsldb %v24, %v0, %v0, 1
397 ; CHECK-NEXT: br %r14
398 %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
399 <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
400 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>
401 <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
402 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
405 %res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
406 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
410 ; Test that intrinsic CC result is recognized.
411 define i32 @f28(<4 x i32> %a, <4 x i32> %b) {
413 ; CHECK-LABEL: # %bb.0:
414 ; CHECK-NEXT: lhi %r2, 0
415 ; CHECK-NEXT: br %r14
416 %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
417 %cc = extractvalue {<8 x i16>, i32} %call, 1
418 %res = and i32 %cc, -4
422 declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
424 ; Test VPERM (operand elements are 0):
425 define <16 x i8> @f29() {
427 ; CHECK-LABEL: # %bb.0:
428 ; CHECK-NEXT: vgbm %v24, 0
429 ; CHECK-NEXT: br %r14
430 %perm = call <16 x i8> @llvm.s390.vperm(
431 <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
432 i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
433 <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
434 i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
435 <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
436 i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
437 %res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
438 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
442 ; Test VPERM (operand elements are 1):
443 define <16 x i8> @f30() {
445 ; CHECK-LABEL: # %bb.0:
446 ; CHECK-NEXT: vgbm %v0, 0
447 ; CHECK-NEXT: vrepib %v1, 1
448 ; CHECK-NEXT: vperm %v24, %v1, %v1, %v0
449 ; CHECK-NEXT: br %r14
450 %perm = call <16 x i8> @llvm.s390.vperm(
451 <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
452 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
453 <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
454 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
455 <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
456 i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
457 %res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
458 i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>