1 ; Test vector extraction of byte-swapped value to memory.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
5 declare i16 @llvm.bswap.i16(i16)
6 declare i32 @llvm.bswap.i32(i32)
7 declare i64 @llvm.bswap.i64(i64)
8 declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
9 declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
10 declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
12 ; Test v8i16 extraction from the first element.
13 define void @f1(<8 x i16> %val, i16 *%ptr) {
15 ; CHECK: vstebrh %v24, 0(%r2), 0
17 %element = extractelement <8 x i16> %val, i32 0
18 %swap = call i16 @llvm.bswap.i16(i16 %element)
19 store i16 %swap, i16 *%ptr
23 ; Test v8i16 extraction from the last element.
24 define void @f2(<8 x i16> %val, i16 *%ptr) {
26 ; CHECK: vstebrh %v24, 0(%r2), 7
28 %element = extractelement <8 x i16> %val, i32 7
29 %swap = call i16 @llvm.bswap.i16(i16 %element)
30 store i16 %swap, i16 *%ptr
34 ; Test v8i16 extraction of an invalid element. This must compile,
35 ; but we don't care what it does.
36 define void @f3(<8 x i16> %val, i16 *%ptr) {
38 ; CHECK-NOT: vstebrh %v24, 0(%r2), 8
40 %element = extractelement <8 x i16> %val, i32 8
41 %swap = call i16 @llvm.bswap.i16(i16 %element)
42 store i16 %swap, i16 *%ptr
46 ; Test v8i16 extraction with the highest in-range offset.
47 define void @f4(<8 x i16> %val, i16 *%base) {
49 ; CHECK: vstebrh %v24, 4094(%r2), 5
51 %ptr = getelementptr i16, i16 *%base, i32 2047
52 %element = extractelement <8 x i16> %val, i32 5
53 %swap = call i16 @llvm.bswap.i16(i16 %element)
54 store i16 %swap, i16 *%ptr
58 ; Test v8i16 extraction with the first ouf-of-range offset.
59 define void @f5(<8 x i16> %val, i16 *%base) {
61 ; CHECK: aghi %r2, 4096
62 ; CHECK: vstebrh %v24, 0(%r2), 1
64 %ptr = getelementptr i16, i16 *%base, i32 2048
65 %element = extractelement <8 x i16> %val, i32 1
66 %swap = call i16 @llvm.bswap.i16(i16 %element)
67 store i16 %swap, i16 *%ptr
71 ; Test v8i16 extraction from a variable element.
72 define void @f6(<8 x i16> %val, i16 *%ptr, i32 %index) {
76 %element = extractelement <8 x i16> %val, i32 %index
77 %swap = call i16 @llvm.bswap.i16(i16 %element)
78 store i16 %swap, i16 *%ptr
82 ; Test v8i16 extraction using a vector bswap.
83 define void @f7(<8 x i16> %val, i16 *%ptr) {
85 ; CHECK: vstebrh %v24, 0(%r2), 0
87 %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
88 %element = extractelement <8 x i16> %swap, i32 0
89 store i16 %element, i16 *%ptr
93 ; Test v4i32 extraction from the first element.
94 define void @f8(<4 x i32> %val, i32 *%ptr) {
96 ; CHECK: vstebrf %v24, 0(%r2), 0
98 %element = extractelement <4 x i32> %val, i32 0
99 %swap = call i32 @llvm.bswap.i32(i32 %element)
100 store i32 %swap, i32 *%ptr
104 ; Test v4i32 extraction from the last element.
105 define void @f9(<4 x i32> %val, i32 *%ptr) {
107 ; CHECK: vstebrf %v24, 0(%r2), 3
109 %element = extractelement <4 x i32> %val, i32 3
110 %swap = call i32 @llvm.bswap.i32(i32 %element)
111 store i32 %swap, i32 *%ptr
115 ; Test v4i32 extraction of an invalid element. This must compile,
116 ; but we don't care what it does.
117 define void @f10(<4 x i32> %val, i32 *%ptr) {
119 ; CHECK-NOT: vstebrf %v24, 0(%r2), 4
121 %element = extractelement <4 x i32> %val, i32 4
122 %swap = call i32 @llvm.bswap.i32(i32 %element)
123 store i32 %swap, i32 *%ptr
127 ; Test v4i32 extraction with the highest in-range offset.
128 define void @f11(<4 x i32> %val, i32 *%base) {
130 ; CHECK: vstebrf %v24, 4092(%r2), 2
132 %ptr = getelementptr i32, i32 *%base, i32 1023
133 %element = extractelement <4 x i32> %val, i32 2
134 %swap = call i32 @llvm.bswap.i32(i32 %element)
135 store i32 %swap, i32 *%ptr
139 ; Test v4i32 extraction with the first ouf-of-range offset.
140 define void @f12(<4 x i32> %val, i32 *%base) {
142 ; CHECK: aghi %r2, 4096
143 ; CHECK: vstebrf %v24, 0(%r2), 1
145 %ptr = getelementptr i32, i32 *%base, i32 1024
146 %element = extractelement <4 x i32> %val, i32 1
147 %swap = call i32 @llvm.bswap.i32(i32 %element)
148 store i32 %swap, i32 *%ptr
152 ; Test v4i32 extraction from a variable element.
153 define void @f13(<4 x i32> %val, i32 *%ptr, i32 %index) {
157 %element = extractelement <4 x i32> %val, i32 %index
158 %swap = call i32 @llvm.bswap.i32(i32 %element)
159 store i32 %swap, i32 *%ptr
163 ; Test v4i32 extraction using a vector bswap.
164 define void @f14(<4 x i32> %val, i32 *%ptr) {
166 ; CHECK: vstebrf %v24, 0(%r2), 0
168 %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
169 %element = extractelement <4 x i32> %swap, i32 0
170 store i32 %element, i32 *%ptr
174 ; Test v2i64 extraction from the first element.
175 define void @f15(<2 x i64> %val, i64 *%ptr) {
177 ; CHECK: vstebrg %v24, 0(%r2), 0
179 %element = extractelement <2 x i64> %val, i32 0
180 %swap = call i64 @llvm.bswap.i64(i64 %element)
181 store i64 %swap, i64 *%ptr
185 ; Test v2i64 extraction from the last element.
186 define void @f16(<2 x i64> %val, i64 *%ptr) {
188 ; CHECK: vstebrg %v24, 0(%r2), 1
190 %element = extractelement <2 x i64> %val, i32 1
191 %swap = call i64 @llvm.bswap.i64(i64 %element)
192 store i64 %swap, i64 *%ptr
196 ; Test v2i64 extraction of an invalid element. This must compile,
197 ; but we don't care what it does.
198 define void @f17(<2 x i64> %val, i64 *%ptr) {
200 ; CHECK-NOT: vstebrg %v24, 0(%r2), 2
202 %element = extractelement <2 x i64> %val, i32 2
203 %swap = call i64 @llvm.bswap.i64(i64 %element)
204 store i64 %swap, i64 *%ptr
208 ; Test v2i64 extraction with the highest in-range offset.
209 define void @f18(<2 x i64> %val, i64 *%base) {
211 ; CHECK: vstebrg %v24, 4088(%r2), 1
213 %ptr = getelementptr i64, i64 *%base, i32 511
214 %element = extractelement <2 x i64> %val, i32 1
215 %swap = call i64 @llvm.bswap.i64(i64 %element)
216 store i64 %swap, i64 *%ptr
220 ; Test v2i64 extraction with the first ouf-of-range offset.
221 define void @f19(<2 x i64> %val, i64 *%base) {
223 ; CHECK: aghi %r2, 4096
224 ; CHECK: vstebrg %v24, 0(%r2), 0
226 %ptr = getelementptr i64, i64 *%base, i32 512
227 %element = extractelement <2 x i64> %val, i32 0
228 %swap = call i64 @llvm.bswap.i64(i64 %element)
229 store i64 %swap, i64 *%ptr
233 ; Test v2i64 extraction from a variable element.
234 define void @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
238 %element = extractelement <2 x i64> %val, i32 %index
239 %swap = call i64 @llvm.bswap.i64(i64 %element)
240 store i64 %swap, i64 *%ptr
244 ; Test v2i64 extraction using a vector bswap.
245 define void @f21(<2 x i64> %val, i64 *%ptr) {
247 ; CHECK: vstebrg %v24, 0(%r2), 0
249 %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
250 %element = extractelement <2 x i64> %swap, i32 0
251 store i64 %element, i64 *%ptr