1 ; Test stores of element-swapped vector elements.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
6 define void @f1(<16 x i8> %val, <16 x i8> *%ptr) {
8 ; CHECK: vstbrq %v24, 0(%r2)
10 %swap = shufflevector <16 x i8> %val, <16 x i8> undef,
11 <16 x i32> <i32 15, i32 14, i32 13, i32 12,
12 i32 11, i32 10, i32 9, i32 8,
13 i32 7, i32 6, i32 5, i32 4,
14 i32 3, i32 2, i32 1, i32 0>
15 store <16 x i8> %swap, <16 x i8> *%ptr
20 define void @f2(<8 x i16> %val, <8 x i16> *%ptr) {
22 ; CHECK: vsterh %v24, 0(%r2)
24 %swap = shufflevector <8 x i16> %val, <8 x i16> undef,
25 <8 x i32> <i32 7, i32 6, i32 5, i32 4,
26 i32 3, i32 2, i32 1, i32 0>
27 store <8 x i16> %swap, <8 x i16> *%ptr
32 define void @f3(<4 x i32> %val, <4 x i32> *%ptr) {
34 ; CHECK: vsterf %v24, 0(%r2)
36 %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
37 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
38 store <4 x i32> %swap, <4 x i32> *%ptr
43 define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
45 ; CHECK: vsterg %v24, 0(%r2)
47 %swap = shufflevector <2 x i64> %val, <2 x i64> undef,
48 <2 x i32> <i32 1, i32 0>
49 store <2 x i64> %swap, <2 x i64> *%ptr
54 define void @f5(<4 x float> %val, <4 x float> *%ptr) {
56 ; CHECK: vsterf %v24, 0(%r2)
58 %swap = shufflevector <4 x float> %val, <4 x float> undef,
59 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
60 store <4 x float> %swap, <4 x float> *%ptr
65 define void @f6(<2 x double> %val, <2 x double> *%ptr) {
67 ; CHECK: vsterg %v24, 0(%r2)
69 %swap = shufflevector <2 x double> %val, <2 x double> undef,
70 <2 x i32> <i32 1, i32 0>
71 store <2 x double> %swap, <2 x double> *%ptr
75 ; Test the highest aligned in-range offset.
76 define void @f7(<4 x i32> %val, <4 x i32> *%base) {
78 ; CHECK: vsterf %v24, 4080(%r2)
80 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
81 %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
82 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
83 store <4 x i32> %swap, <4 x i32> *%ptr
87 ; Test the highest unaligned in-range offset.
88 define void @f8(<4 x i32> %val, i8 *%base) {
90 ; CHECK: vsterf %v24, 4095(%r2)
92 %addr = getelementptr i8, i8 *%base, i64 4095
93 %ptr = bitcast i8 *%addr to <4 x i32> *
94 %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
95 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
96 store <4 x i32> %swap, <4 x i32> *%ptr, align 1
100 ; Test the next offset up, which requires separate address logic,
101 define void @f9(<4 x i32> %val, <4 x i32> *%base) {
103 ; CHECK: aghi %r2, 4096
104 ; CHECK: vsterf %v24, 0(%r2)
106 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
107 %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
108 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
109 store <4 x i32> %swap, <4 x i32> *%ptr
113 ; Test negative offsets, which also require separate address logic,
114 define void @f10(<4 x i32> %val, <4 x i32> *%base) {
116 ; CHECK: aghi %r2, -16
117 ; CHECK: vsterf %v24, 0(%r2)
119 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
120 %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
121 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
122 store <4 x i32> %swap, <4 x i32> *%ptr
126 ; Check that indexes are allowed.
127 define void @f11(<4 x i32> %val, i8 *%base, i64 %index) {
129 ; CHECK: vsterf %v24, 0(%r3,%r2)
131 %addr = getelementptr i8, i8 *%base, i64 %index
132 %ptr = bitcast i8 *%addr to <4 x i32> *
133 %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
134 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
135 store <4 x i32> %swap, <4 x i32> *%ptr, align 1