1 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
3 ; Test that no vperm of the vector compare is needed for the extracts.
6 ; CHECK: # %bb.0: # %bb
7 ; CHECK-NEXT: vlrepf %v0, 0(%r1)
8 ; CHECK-NEXT: vgbm %v1, 0
9 ; CHECK-NEXT: vceqb %v0, %v0, %v1
10 ; CHECK-NEXT: vuphb %v0, %v0
11 ; CHECK-NEXT: vuphh %v0, %v0
12 ; CHECK-NEXT: vlgvf %r0, %v0, 0
13 ; CHECK-NEXT: tmll %r0, 1
14 ; CHECK-NEXT: je .LBB0_2
15 ; CHECK-NEXT: # %bb.1: # %bb1
16 ; CHECK-NEXT: .LBB0_2: # %bb2
17 ; CHECK-NEXT: vlgvf %r0, %v0, 1
18 ; CHECK-NEXT: tmll %r0, 1
19 ; CHECK-NEXT: je .LBB0_4
20 ; CHECK-NEXT: # %bb.3: # %bb3
21 ; CHECK-NEXT: .LBB0_4: # %bb4
23 %tmp = load <4 x i8>, ptr undef
24 %tmp1 = icmp eq <4 x i8> zeroinitializer, %tmp
25 %tmp2 = extractelement <4 x i1> %tmp1, i32 0
26 br i1 %tmp2, label %bb1, label %bb2
32 %tmp3 = extractelement <4 x i1> %tmp1, i32 1
33 br i1 %tmp3, label %bb3, label %bb4
42 ; Test that a zero index in the permute vector is used instead of VGBM, with
43 ; a zero index into the other source operand.
44 define <4 x i8> @fun1(<2 x i8> %arg) {
45 ; CHECK-LABEL:.LCPI1_0:
46 ; CHECK-NEXT: .byte 1 # 0x1
47 ; CHECK-NEXT: .byte 18 # 0x12
48 ; CHECK-NEXT: .byte 0 # 0x0
49 ; CHECK-NEXT: .byte 18 # 0x12
50 ; CHECK-NEXT: .space 1
51 ; CHECK-NEXT: .space 1
52 ; CHECK-NEXT: .space 1
53 ; CHECK-NEXT: .space 1
54 ; CHECK-NEXT: .space 1
55 ; CHECK-NEXT: .space 1
56 ; CHECK-NEXT: .space 1
57 ; CHECK-NEXT: .space 1
58 ; CHECK-NEXT: .space 1
59 ; CHECK-NEXT: .space 1
60 ; CHECK-NEXT: .space 1
61 ; CHECK-NEXT: .space 1
63 ; CHECK-NEXT: .globl fun1
64 ; CHECK-NEXT: .p2align 4
65 ; CHECK-NEXT: .type fun1,@function
66 ; CHECK-NEXT: fun1: # @fun1
67 ; CHECK-NEXT: .cfi_startproc
68 ; CHECK-NEXT: # %bb.0:
69 ; CHECK-NEXT: larl %r1, .LCPI1_0
70 ; CHECK-NEXT: vl %v0, 0(%r1), 3
71 ; CHECK-NEXT: vperm %v24, %v24, %v0, %v0
73 %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
74 <4 x i32> <i32 1, i32 2, i32 0, i32 3>
78 ; Same, but with the first byte indexing into an element of the zero vector.
79 define <4 x i8> @fun2(<2 x i8> %arg) {
80 ; CHECK-LABEL:.LCPI2_0:
81 ; CHECK-NEXT: .byte 0 # 0x0
82 ; CHECK-NEXT: .byte 17 # 0x11
83 ; CHECK-NEXT: .byte 17 # 0x11
84 ; CHECK-NEXT: .byte 0 # 0x0
85 ; CHECK-NEXT: .space 1
86 ; CHECK-NEXT: .space 1
87 ; CHECK-NEXT: .space 1
88 ; CHECK-NEXT: .space 1
89 ; CHECK-NEXT: .space 1
90 ; CHECK-NEXT: .space 1
91 ; CHECK-NEXT: .space 1
92 ; CHECK-NEXT: .space 1
93 ; CHECK-NEXT: .space 1
94 ; CHECK-NEXT: .space 1
95 ; CHECK-NEXT: .space 1
96 ; CHECK-NEXT: .space 1
98 ; CHECK-NEXT: .globl fun2
99 ; CHECK-NEXT: .p2align 4
100 ; CHECK-NEXT: .type fun2,@function
101 ; CHECK-NEXT:fun2: # @fun2
102 ; CHECK-NEXT: .cfi_startproc
103 ; CHECK-NEXT:# %bb.0:
104 ; CHECK-NEXT: larl %r1, .LCPI2_0
105 ; CHECK-NEXT: vl %v0, 0(%r1), 3
106 ; CHECK-NEXT: vperm %v24, %v0, %v24, %v0
107 ; CHECK-NEXT: br %r14
108 %res = shufflevector <2 x i8> %arg, <2 x i8> zeroinitializer,
109 <4 x i32> <i32 3, i32 1, i32 1, i32 2>