1 ; Test replications of a byte-swapped scalar memory value.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
5 declare i16 @llvm.bswap.i16(i16)
6 declare i32 @llvm.bswap.i32(i32)
7 declare i64 @llvm.bswap.i64(i64)
8 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
9 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
10 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
12 ; Test a v8i16 replicating load with no offset.
13 define <8 x i16> @f1(i16 *%ptr) {
15 ; CHECK: vlbrreph %v24, 0(%r2)
17 %scalar = load i16, i16 *%ptr
18 %swap = call i16 @llvm.bswap.i16(i16 %scalar)
19 %val = insertelement <8 x i16> undef, i16 %swap, i32 0
20 %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
21 <8 x i32> zeroinitializer
25 ; Test a v8i16 replicating load with the maximum in-range offset.
26 define <8 x i16> @f2(i16 *%base) {
28 ; CHECK: vlbrreph %v24, 4094(%r2)
30 %ptr = getelementptr i16, i16 *%base, i64 2047
31 %scalar = load i16, i16 *%ptr
32 %swap = call i16 @llvm.bswap.i16(i16 %scalar)
33 %val = insertelement <8 x i16> undef, i16 %swap, i32 0
34 %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
35 <8 x i32> zeroinitializer
39 ; Test a v8i16 replicating load with the first out-of-range offset.
40 define <8 x i16> @f3(i16 *%base) {
42 ; CHECK: aghi %r2, 4096
43 ; CHECK: vlbrreph %v24, 0(%r2)
45 %ptr = getelementptr i16, i16 *%base, i64 2048
46 %scalar = load i16, i16 *%ptr
47 %swap = call i16 @llvm.bswap.i16(i16 %scalar)
48 %val = insertelement <8 x i16> undef, i16 %swap, i32 0
49 %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
50 <8 x i32> zeroinitializer
54 ; Test a v8i16 replicating load using a vector bswap.
55 define <8 x i16> @f4(i16 *%ptr) {
57 ; CHECK: vlbrreph %v24, 0(%r2)
59 %scalar = load i16, i16 *%ptr
60 %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
61 %rep = shufflevector <8 x i16> %val, <8 x i16> undef,
62 <8 x i32> zeroinitializer
63 %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %rep)
67 ; Test a v4i32 replicating load with no offset.
68 define <4 x i32> @f5(i32 *%ptr) {
70 ; CHECK: vlbrrepf %v24, 0(%r2)
72 %scalar = load i32, i32 *%ptr
73 %swap = call i32 @llvm.bswap.i32(i32 %scalar)
74 %val = insertelement <4 x i32> undef, i32 %swap, i32 0
75 %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
76 <4 x i32> zeroinitializer
80 ; Test a v4i32 replicating load with the maximum in-range offset.
81 define <4 x i32> @f6(i32 *%base) {
83 ; CHECK: vlbrrepf %v24, 4092(%r2)
85 %ptr = getelementptr i32, i32 *%base, i64 1023
86 %scalar = load i32, i32 *%ptr
87 %swap = call i32 @llvm.bswap.i32(i32 %scalar)
88 %val = insertelement <4 x i32> undef, i32 %swap, i32 0
89 %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
90 <4 x i32> zeroinitializer
94 ; Test a v4i32 replicating load with the first out-of-range offset.
95 define <4 x i32> @f7(i32 *%base) {
97 ; CHECK: aghi %r2, 4096
98 ; CHECK: vlbrrepf %v24, 0(%r2)
100 %ptr = getelementptr i32, i32 *%base, i64 1024
101 %scalar = load i32, i32 *%ptr
102 %swap = call i32 @llvm.bswap.i32(i32 %scalar)
103 %val = insertelement <4 x i32> undef, i32 %swap, i32 0
104 %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
105 <4 x i32> zeroinitializer
109 ; Test a v4i32 replicating load using a vector bswap.
110 define <4 x i32> @f8(i32 *%ptr) {
112 ; CHECK: vlbrrepf %v24, 0(%r2)
114 %scalar = load i32, i32 *%ptr
115 %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
116 %rep = shufflevector <4 x i32> %val, <4 x i32> undef,
117 <4 x i32> zeroinitializer
118 %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %rep)
122 ; Test a v2i64 replicating load with no offset.
123 define <2 x i64> @f9(i64 *%ptr) {
125 ; CHECK: vlbrrepg %v24, 0(%r2)
127 %scalar = load i64, i64 *%ptr
128 %swap = call i64 @llvm.bswap.i64(i64 %scalar)
129 %val = insertelement <2 x i64> undef, i64 %swap, i32 0
130 %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
131 <2 x i32> zeroinitializer
135 ; Test a v2i64 replicating load with the maximum in-range offset.
136 define <2 x i64> @f10(i64 *%base) {
138 ; CHECK: vlbrrepg %v24, 4088(%r2)
140 %ptr = getelementptr i64, i64 *%base, i32 511
141 %scalar = load i64, i64 *%ptr
142 %swap = call i64 @llvm.bswap.i64(i64 %scalar)
143 %val = insertelement <2 x i64> undef, i64 %swap, i32 0
144 %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
145 <2 x i32> zeroinitializer
149 ; Test a v2i64 replicating load with the first out-of-range offset.
150 define <2 x i64> @f11(i64 *%base) {
152 ; CHECK: aghi %r2, 4096
153 ; CHECK: vlbrrepg %v24, 0(%r2)
155 %ptr = getelementptr i64, i64 *%base, i32 512
156 %scalar = load i64, i64 *%ptr
157 %swap = call i64 @llvm.bswap.i64(i64 %scalar)
158 %val = insertelement <2 x i64> undef, i64 %swap, i32 0
159 %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
160 <2 x i32> zeroinitializer
164 ; Test a v2i64 replicating load using a vector bswap.
165 define <2 x i64> @f12(i64 *%ptr) {
167 ; CHECK: vlbrrepg %v24, 0(%r2)
169 %scalar = load i64, i64 *%ptr
170 %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
171 %rep = shufflevector <2 x i64> %val, <2 x i64> undef,
172 <2 x i32> zeroinitializer
173 %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %rep)
177 ; Test a v8i16 replicating load with an index.
178 define <8 x i16> @f13(i16 *%base, i64 %index) {
180 ; CHECK: sllg [[REG:%r[1-5]]], %r3, 1
181 ; CHECK: vlbrreph %v24, 2046([[REG]],%r2)
183 %ptr1 = getelementptr i16, i16 *%base, i64 %index
184 %ptr = getelementptr i16, i16 *%ptr1, i64 1023
185 %scalar = load i16, i16 *%ptr
186 %swap = call i16 @llvm.bswap.i16(i16 %scalar)
187 %val = insertelement <8 x i16> undef, i16 %swap, i32 0
188 %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
189 <8 x i32> zeroinitializer