1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; Test replications of a byte-swapped scalar memory value.
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
6 declare i16 @llvm.bswap.i16(i16)
7 declare i32 @llvm.bswap.i32(i32)
8 declare i64 @llvm.bswap.i64(i64)
9 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
10 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
11 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
13 ; Test a v8i16 replicating load with no offset.
14 define <8 x i16> @f1(ptr %ptr) {
17 ; CHECK-NEXT: vlbrreph %v24, 0(%r2)
19 %scalar = load i16, ptr %ptr
20 %swap = call i16 @llvm.bswap.i16(i16 %scalar)
21 %val = insertelement <8 x i16> undef, i16 %swap, i32 0
22 %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
23 <8 x i32> zeroinitializer
27 ; Test a v8i16 replicating load with the maximum in-range offset.
28 define <8 x i16> @f2(ptr %base) {
31 ; CHECK-NEXT: vlbrreph %v24, 4094(%r2)
33 %ptr = getelementptr i16, ptr %base, i64 2047
34 %scalar = load i16, ptr %ptr
35 %swap = call i16 @llvm.bswap.i16(i16 %scalar)
36 %val = insertelement <8 x i16> undef, i16 %swap, i32 0
37 %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
38 <8 x i32> zeroinitializer
42 ; Test a v8i16 replicating load with the first out-of-range offset.
43 define <8 x i16> @f3(ptr %base) {
46 ; CHECK-NEXT: aghi %r2, 4096
47 ; CHECK-NEXT: vlbrreph %v24, 0(%r2)
49 %ptr = getelementptr i16, ptr %base, i64 2048
50 %scalar = load i16, ptr %ptr
51 %swap = call i16 @llvm.bswap.i16(i16 %scalar)
52 %val = insertelement <8 x i16> undef, i16 %swap, i32 0
53 %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
54 <8 x i32> zeroinitializer
58 ; Test a v8i16 replicating load using a vector bswap.
59 define <8 x i16> @f4(ptr %ptr) {
62 ; CHECK-NEXT: vlbrreph %v24, 0(%r2)
64 %scalar = load i16, ptr %ptr
65 %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
66 %rep = shufflevector <8 x i16> %val, <8 x i16> undef,
67 <8 x i32> zeroinitializer
68 %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %rep)
72 ; Test a v4i32 replicating load with no offset.
73 define <4 x i32> @f5(ptr %ptr) {
76 ; CHECK-NEXT: vlbrrepf %v24, 0(%r2)
78 %scalar = load i32, ptr %ptr
79 %swap = call i32 @llvm.bswap.i32(i32 %scalar)
80 %val = insertelement <4 x i32> undef, i32 %swap, i32 0
81 %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
82 <4 x i32> zeroinitializer
86 ; Test a v4i32 replicating load with the maximum in-range offset.
87 define <4 x i32> @f6(ptr %base) {
90 ; CHECK-NEXT: vlbrrepf %v24, 4092(%r2)
92 %ptr = getelementptr i32, ptr %base, i64 1023
93 %scalar = load i32, ptr %ptr
94 %swap = call i32 @llvm.bswap.i32(i32 %scalar)
95 %val = insertelement <4 x i32> undef, i32 %swap, i32 0
96 %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
97 <4 x i32> zeroinitializer
101 ; Test a v4i32 replicating load with the first out-of-range offset.
102 define <4 x i32> @f7(ptr %base) {
105 ; CHECK-NEXT: aghi %r2, 4096
106 ; CHECK-NEXT: vlbrrepf %v24, 0(%r2)
107 ; CHECK-NEXT: br %r14
108 %ptr = getelementptr i32, ptr %base, i64 1024
109 %scalar = load i32, ptr %ptr
110 %swap = call i32 @llvm.bswap.i32(i32 %scalar)
111 %val = insertelement <4 x i32> undef, i32 %swap, i32 0
112 %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
113 <4 x i32> zeroinitializer
117 ; Test a v4i32 replicating load using a vector bswap.
118 define <4 x i32> @f8(ptr %ptr) {
121 ; CHECK-NEXT: vlbrrepf %v24, 0(%r2)
122 ; CHECK-NEXT: br %r14
123 %scalar = load i32, ptr %ptr
124 %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
125 %rep = shufflevector <4 x i32> %val, <4 x i32> undef,
126 <4 x i32> zeroinitializer
127 %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %rep)
131 ; Test a v2i64 replicating load with no offset.
132 define <2 x i64> @f9(ptr %ptr) {
135 ; CHECK-NEXT: vlbrrepg %v24, 0(%r2)
136 ; CHECK-NEXT: br %r14
137 %scalar = load i64, ptr %ptr
138 %swap = call i64 @llvm.bswap.i64(i64 %scalar)
139 %val = insertelement <2 x i64> undef, i64 %swap, i32 0
140 %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
141 <2 x i32> zeroinitializer
145 ; Test a v2i64 replicating load with the maximum in-range offset.
146 define <2 x i64> @f10(ptr %base) {
149 ; CHECK-NEXT: vlbrrepg %v24, 4088(%r2)
150 ; CHECK-NEXT: br %r14
151 %ptr = getelementptr i64, ptr %base, i32 511
152 %scalar = load i64, ptr %ptr
153 %swap = call i64 @llvm.bswap.i64(i64 %scalar)
154 %val = insertelement <2 x i64> undef, i64 %swap, i32 0
155 %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
156 <2 x i32> zeroinitializer
160 ; Test a v2i64 replicating load with the first out-of-range offset.
161 define <2 x i64> @f11(ptr %base) {
164 ; CHECK-NEXT: aghi %r2, 4096
165 ; CHECK-NEXT: vlbrrepg %v24, 0(%r2)
166 ; CHECK-NEXT: br %r14
167 %ptr = getelementptr i64, ptr %base, i32 512
168 %scalar = load i64, ptr %ptr
169 %swap = call i64 @llvm.bswap.i64(i64 %scalar)
170 %val = insertelement <2 x i64> undef, i64 %swap, i32 0
171 %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
172 <2 x i32> zeroinitializer
176 ; Test a v2i64 replicating load using a vector bswap.
177 define <2 x i64> @f12(ptr %ptr) {
180 ; CHECK-NEXT: vlbrrepg %v24, 0(%r2)
181 ; CHECK-NEXT: br %r14
182 %scalar = load i64, ptr %ptr
183 %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
184 %rep = shufflevector <2 x i64> %val, <2 x i64> undef,
185 <2 x i32> zeroinitializer
186 %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %rep)
190 ; Test a v8i16 replicating load with an index.
191 define <8 x i16> @f13(ptr %base, i64 %index) {
194 ; CHECK-NEXT: sllg %r1, %r3, 1
195 ; CHECK-NEXT: vlbrreph %v24, 2046(%r1,%r2)
196 ; CHECK-NEXT: br %r14
197 %ptr1 = getelementptr i16, ptr %base, i64 %index
198 %ptr = getelementptr i16, ptr %ptr1, i64 1023
199 %scalar = load i16, ptr %ptr
200 %swap = call i16 @llvm.bswap.i16(i16 %scalar)
201 %val = insertelement <8 x i16> undef, i16 %swap, i32 0
202 %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
203 <8 x i32> zeroinitializer