1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; Test vector insertion of byte-swapped memory values.
4 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
6 declare i16 @llvm.bswap.i16(i16)
7 declare i32 @llvm.bswap.i32(i32)
8 declare i64 @llvm.bswap.i64(i64)
9 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
10 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
11 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
13 ; Test v8i16 insertion into the first element.
14 define <8 x i16> @f1(<8 x i16> %val, ptr %ptr) {
17 ; CHECK-NEXT: vlebrh %v24, 0(%r2), 0
19 %element = load i16, ptr %ptr
20 %swap = call i16 @llvm.bswap.i16(i16 %element)
21 %ret = insertelement <8 x i16> %val, i16 %swap, i32 0
25 ; Test v8i16 insertion into the last element.
26 define <8 x i16> @f2(<8 x i16> %val, ptr %ptr) {
29 ; CHECK-NEXT: vlebrh %v24, 0(%r2), 7
31 %element = load i16, ptr %ptr
32 %swap = call i16 @llvm.bswap.i16(i16 %element)
33 %ret = insertelement <8 x i16> %val, i16 %swap, i32 7
37 ; Test v8i16 insertion with the highest in-range offset.
38 define <8 x i16> @f3(<8 x i16> %val, ptr %base) {
41 ; CHECK-NEXT: vlebrh %v24, 4094(%r2), 5
43 %ptr = getelementptr i16, ptr %base, i32 2047
44 %element = load i16, ptr %ptr
45 %swap = call i16 @llvm.bswap.i16(i16 %element)
46 %ret = insertelement <8 x i16> %val, i16 %swap, i32 5
50 ; Test v8i16 insertion with the first ouf-of-range offset.
51 define <8 x i16> @f4(<8 x i16> %val, ptr %base) {
54 ; CHECK-NEXT: aghi %r2, 4096
55 ; CHECK-NEXT: vlebrh %v24, 0(%r2), 1
57 %ptr = getelementptr i16, ptr %base, i32 2048
58 %element = load i16, ptr %ptr
59 %swap = call i16 @llvm.bswap.i16(i16 %element)
60 %ret = insertelement <8 x i16> %val, i16 %swap, i32 1
64 ; Test v8i16 insertion into a variable element.
65 define <8 x i16> @f5(<8 x i16> %val, ptr %ptr, i32 %index) {
68 ; CHECK-NEXT: lrvh %r0, 0(%r2)
69 ; CHECK-NEXT: vlvgh %v24, %r0, 0(%r3)
71 %element = load i16, ptr %ptr
72 %swap = call i16 @llvm.bswap.i16(i16 %element)
73 %ret = insertelement <8 x i16> %val, i16 %swap, i32 %index
77 ; Test v8i16 insertion using a pair of vector bswaps.
78 define <8 x i16> @f6(<8 x i16> %val, ptr %ptr) {
81 ; CHECK-NEXT: vlebrh %v24, 0(%r2), 0
83 %element = load i16, ptr %ptr
84 %swapval = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
85 %insert = insertelement <8 x i16> %swapval, i16 %element, i32 0
86 %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
90 ; Test v4i32 insertion into the first element.
91 define <4 x i32> @f7(<4 x i32> %val, ptr %ptr) {
94 ; CHECK-NEXT: vlebrf %v24, 0(%r2), 0
96 %element = load i32, ptr %ptr
97 %swap = call i32 @llvm.bswap.i32(i32 %element)
98 %ret = insertelement <4 x i32> %val, i32 %swap, i32 0
102 ; Test v4i32 insertion into the last element.
103 define <4 x i32> @f8(<4 x i32> %val, ptr %ptr) {
106 ; CHECK-NEXT: vlebrf %v24, 0(%r2), 3
107 ; CHECK-NEXT: br %r14
108 %element = load i32, ptr %ptr
109 %swap = call i32 @llvm.bswap.i32(i32 %element)
110 %ret = insertelement <4 x i32> %val, i32 %swap, i32 3
114 ; Test v4i32 insertion with the highest in-range offset.
115 define <4 x i32> @f9(<4 x i32> %val, ptr %base) {
118 ; CHECK-NEXT: vlebrf %v24, 4092(%r2), 2
119 ; CHECK-NEXT: br %r14
120 %ptr = getelementptr i32, ptr %base, i32 1023
121 %element = load i32, ptr %ptr
122 %swap = call i32 @llvm.bswap.i32(i32 %element)
123 %ret = insertelement <4 x i32> %val, i32 %swap, i32 2
127 ; Test v4i32 insertion with the first ouf-of-range offset.
128 define <4 x i32> @f10(<4 x i32> %val, ptr %base) {
131 ; CHECK-NEXT: aghi %r2, 4096
132 ; CHECK-NEXT: vlebrf %v24, 0(%r2), 1
133 ; CHECK-NEXT: br %r14
134 %ptr = getelementptr i32, ptr %base, i32 1024
135 %element = load i32, ptr %ptr
136 %swap = call i32 @llvm.bswap.i32(i32 %element)
137 %ret = insertelement <4 x i32> %val, i32 %swap, i32 1
141 ; Test v4i32 insertion into a variable element.
142 define <4 x i32> @f11(<4 x i32> %val, ptr %ptr, i32 %index) {
145 ; CHECK-NEXT: lrv %r0, 0(%r2)
146 ; CHECK-NEXT: vlvgf %v24, %r0, 0(%r3)
147 ; CHECK-NEXT: br %r14
148 %element = load i32, ptr %ptr
149 %swap = call i32 @llvm.bswap.i32(i32 %element)
150 %ret = insertelement <4 x i32> %val, i32 %swap, i32 %index
154 ; Test v4i32 insertion using a pair of vector bswaps.
155 define <4 x i32> @f12(<4 x i32> %val, ptr %ptr) {
158 ; CHECK-NEXT: vlebrf %v24, 0(%r2), 0
159 ; CHECK-NEXT: br %r14
160 %element = load i32, ptr %ptr
161 %swapval = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
162 %insert = insertelement <4 x i32> %swapval, i32 %element, i32 0
163 %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
167 ; Test v2i64 insertion into the first element.
168 define <2 x i64> @f13(<2 x i64> %val, ptr %ptr) {
171 ; CHECK-NEXT: vlebrg %v24, 0(%r2), 0
172 ; CHECK-NEXT: br %r14
173 %element = load i64, ptr %ptr
174 %swap = call i64 @llvm.bswap.i64(i64 %element)
175 %ret = insertelement <2 x i64> %val, i64 %swap, i32 0
179 ; Test v2i64 insertion into the last element.
180 define <2 x i64> @f14(<2 x i64> %val, ptr %ptr) {
183 ; CHECK-NEXT: vlebrg %v24, 0(%r2), 1
184 ; CHECK-NEXT: br %r14
185 %element = load i64, ptr %ptr
186 %swap = call i64 @llvm.bswap.i64(i64 %element)
187 %ret = insertelement <2 x i64> %val, i64 %swap, i32 1
191 ; Test v2i64 insertion with the highest in-range offset.
192 define <2 x i64> @f15(<2 x i64> %val, ptr %base) {
195 ; CHECK-NEXT: vlebrg %v24, 4088(%r2), 1
196 ; CHECK-NEXT: br %r14
197 %ptr = getelementptr i64, ptr %base, i32 511
198 %element = load i64, ptr %ptr
199 %swap = call i64 @llvm.bswap.i64(i64 %element)
200 %ret = insertelement <2 x i64> %val, i64 %swap, i32 1
204 ; Test v2i64 insertion with the first ouf-of-range offset.
205 define <2 x i64> @f16(<2 x i64> %val, ptr %base) {
208 ; CHECK-NEXT: aghi %r2, 4096
209 ; CHECK-NEXT: vlebrg %v24, 0(%r2), 0
210 ; CHECK-NEXT: br %r14
211 %ptr = getelementptr i64, ptr %base, i32 512
212 %element = load i64, ptr %ptr
213 %swap = call i64 @llvm.bswap.i64(i64 %element)
214 %ret = insertelement <2 x i64> %val, i64 %swap, i32 0
218 ; Test v2i64 insertion into a variable element.
219 define <2 x i64> @f17(<2 x i64> %val, ptr %ptr, i32 %index) {
222 ; CHECK-NEXT: lrvg %r0, 0(%r2)
223 ; CHECK-NEXT: vlvgg %v24, %r0, 0(%r3)
224 ; CHECK-NEXT: br %r14
225 %element = load i64, ptr %ptr
226 %swap = call i64 @llvm.bswap.i64(i64 %element)
227 %ret = insertelement <2 x i64> %val, i64 %swap, i32 %index
231 ; Test v2i64 insertion using a pair of vector bswaps.
232 define <2 x i64> @f18(<2 x i64> %val, ptr %ptr) {
235 ; CHECK-NEXT: vlebrg %v24, 0(%r2), 0
236 ; CHECK-NEXT: br %r14
237 %element = load i64, ptr %ptr
238 %swapval = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
239 %insert = insertelement <2 x i64> %swapval, i64 %element, i32 0
240 %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)