1 ; Test loads of byte-swapped vector elements.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
6 define <16 x i8> @f1(<16 x i8> *%ptr) {
8 ; CHECK: vlbrq %v24, 0(%r2)
10 %load = load <16 x i8>, <16 x i8> *%ptr
11 %ret = shufflevector <16 x i8> %load, <16 x i8> undef,
12 <16 x i32> <i32 15, i32 14, i32 13, i32 12,
13 i32 11, i32 10, i32 9, i32 8,
14 i32 7, i32 6, i32 5, i32 4,
15 i32 3, i32 2, i32 1, i32 0>
20 define <8 x i16> @f2(<8 x i16> *%ptr) {
22 ; CHECK: vlerh %v24, 0(%r2)
24 %load = load <8 x i16>, <8 x i16> *%ptr
25 %ret = shufflevector <8 x i16> %load, <8 x i16> undef,
26 <8 x i32> <i32 7, i32 6, i32 5, i32 4,
27 i32 3, i32 2, i32 1, i32 0>
32 define <4 x i32> @f3(<4 x i32> *%ptr) {
34 ; CHECK: vlerf %v24, 0(%r2)
36 %load = load <4 x i32>, <4 x i32> *%ptr
37 %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
38 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
43 define <2 x i64> @f4(<2 x i64> *%ptr) {
45 ; CHECK: vlerg %v24, 0(%r2)
47 %load = load <2 x i64>, <2 x i64> *%ptr
48 %ret = shufflevector <2 x i64> %load, <2 x i64> undef,
49 <2 x i32> <i32 1, i32 0>
54 define <4 x float> @f5(<4 x float> *%ptr) {
56 ; CHECK: vlerf %v24, 0(%r2)
58 %load = load <4 x float>, <4 x float> *%ptr
59 %ret = shufflevector <4 x float> %load, <4 x float> undef,
60 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
65 define <2 x double> @f6(<2 x double> *%ptr) {
67 ; CHECK: vlerg %v24, 0(%r2)
69 %load = load <2 x double>, <2 x double> *%ptr
70 %ret = shufflevector <2 x double> %load, <2 x double> undef,
71 <2 x i32> <i32 1, i32 0>
75 ; Test the highest aligned in-range offset.
76 define <4 x i32> @f7(<4 x i32> *%base) {
78 ; CHECK: vlerf %v24, 4080(%r2)
80 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
81 %load = load <4 x i32>, <4 x i32> *%ptr
82 %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
83 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
87 ; Test the highest unaligned in-range offset.
88 define <4 x i32> @f8(i8 *%base) {
90 ; CHECK: vlerf %v24, 4095(%r2)
92 %addr = getelementptr i8, i8 *%base, i64 4095
93 %ptr = bitcast i8 *%addr to <4 x i32> *
94 %load = load <4 x i32>, <4 x i32> *%ptr
95 %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
96 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
100 ; Test the next offset up, which requires separate address logic,
101 define <4 x i32> @f9(<4 x i32> *%base) {
103 ; CHECK: aghi %r2, 4096
104 ; CHECK: vlerf %v24, 0(%r2)
106 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
107 %load = load <4 x i32>, <4 x i32> *%ptr
108 %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
109 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
113 ; Test negative offsets, which also require separate address logic,
114 define <4 x i32> @f10(<4 x i32> *%base) {
116 ; CHECK: aghi %r2, -16
117 ; CHECK: vlerf %v24, 0(%r2)
119 %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
120 %load = load <4 x i32>, <4 x i32> *%ptr
121 %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
122 <4 x i32> <i32 3, i32 2, i32 1, i32 0>
126 ; Check that indexes are allowed.
127 define <4 x i32> @f11(i8 *%base, i64 %index) {
129 ; CHECK: vlerf %v24, 0(%r3,%r2)
131 %addr = getelementptr i8, i8 *%base, i64 %index
132 %ptr = bitcast i8 *%addr to <4 x i32> *
133 %load = load <4 x i32>, <4 x i32> *%ptr
134 %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
135 <4 x i32> <i32 3, i32 2, i32 1, i32 0>