1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s
4 ; Test storing of replicated values using vector replicate type instructions.
6 ;; Replicated registers
8 define void @fun_2x1b(i8* %Src, i16* %Dst) {
9 ; CHECK-LABEL: fun_2x1b:
11 ; CHECK-NEXT: vlrepb %v0, 0(%r2)
12 ; CHECK-NEXT: vsteh %v0, 0(%r3), 0
14 %i = load i8, i8* %Src
15 %ZE = zext i8 %i to i16
16 %Val = mul i16 %ZE, 257
17 store i16 %Val, i16* %Dst
21 ; Test multiple stores of same value.
22 define void @fun_4x1b(i8* %Src, i32* %Dst, i32* %Dst2) {
23 ; CHECK-LABEL: fun_4x1b:
25 ; CHECK-NEXT: vlrepb %v0, 0(%r2)
26 ; CHECK-NEXT: vstef %v0, 0(%r3), 0
27 ; CHECK-NEXT: vstef %v0, 0(%r4), 0
29 %i = load i8, i8* %Src
30 %ZE = zext i8 %i to i32
31 %Val = mul i32 %ZE, 16843009
32 store i32 %Val, i32* %Dst
33 store i32 %Val, i32* %Dst2
37 define void @fun_8x1b(i8* %Src, i64* %Dst) {
38 ; CHECK-LABEL: fun_8x1b:
40 ; CHECK-NEXT: vlrepb %v0, 0(%r2)
41 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0
43 %i = load i8, i8* %Src
44 %ZE = zext i8 %i to i64
45 %Val = mul i64 %ZE, 72340172838076673
46 store i64 %Val, i64* %Dst
50 ; A second truncated store of same value.
51 define void @fun_8x1b_4x1b(i8* %Src, i64* %Dst, i32* %Dst2) {
52 ; CHECK-LABEL: fun_8x1b_4x1b:
54 ; CHECK-NEXT: vlrepb %v0, 0(%r2)
55 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0
56 ; CHECK-NEXT: vstef %v0, 0(%r4), 0
58 %i = load i8, i8* %Src
59 %ZE = zext i8 %i to i64
60 %Val = mul i64 %ZE, 72340172838076673
61 store i64 %Val, i64* %Dst
62 %TrVal = trunc i64 %Val to i32
63 store i32 %TrVal, i32* %Dst2
67 define void @fun_2x2b(i16* %Src, i32* %Dst) {
68 ; CHECK-LABEL: fun_2x2b:
70 ; CHECK-NEXT: vlreph %v0, 0(%r2)
71 ; CHECK-NEXT: vstef %v0, 0(%r3), 0
73 %i = load i16, i16* %Src
74 %ZE = zext i16 %i to i32
75 %Val = mul i32 %ZE, 65537
76 store i32 %Val, i32* %Dst
80 define void @fun_4x2b(i16* %Src, i64* %Dst) {
81 ; CHECK-LABEL: fun_4x2b:
83 ; CHECK-NEXT: vlreph %v0, 0(%r2)
84 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0
86 %i = load i16, i16* %Src
87 %ZE = zext i16 %i to i64
88 %Val = mul i64 %ZE, 281479271743489
89 store i64 %Val, i64* %Dst
93 define void @fun_2x4b(i32* %Src, i64* %Dst) {
94 ; CHECK-LABEL: fun_2x4b:
96 ; CHECK-NEXT: vlrepf %v0, 0(%r2)
97 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0
99 %i = load i32, i32* %Src
100 %ZE = zext i32 %i to i64
101 %Val = mul i64 %ZE, 4294967297
102 store i64 %Val, i64* %Dst
106 ;; Replicated registers already in a vector.
108 ; Test multiple stores of same value.
109 define void @fun_2Eltsx8x1b(i8* %Src, <2 x i64>* %Dst, <2 x i64>* %Dst2) {
110 ; CHECK-LABEL: fun_2Eltsx8x1b:
112 ; CHECK-NEXT: vlrepb %v0, 0(%r2)
113 ; CHECK-NEXT: vst %v0, 0(%r3), 3
114 ; CHECK-NEXT: vst %v0, 0(%r4), 3
115 ; CHECK-NEXT: br %r14
116 %i = load i8, i8* %Src
117 %ZE = zext i8 %i to i64
118 %Mul = mul i64 %ZE, 72340172838076673
119 %tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0
120 %Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
121 store <2 x i64> %Val, <2 x i64>* %Dst
122 store <2 x i64> %Val, <2 x i64>* %Dst2
126 define void @fun_4Eltsx2x2b(i16* %Src, <4 x i32>* %Dst) {
127 ; CHECK-LABEL: fun_4Eltsx2x2b:
129 ; CHECK-NEXT: vlreph %v0, 0(%r2)
130 ; CHECK-NEXT: vst %v0, 0(%r3), 3
131 ; CHECK-NEXT: br %r14
132 %i = load i16, i16* %Src
133 %ZE = zext i16 %i to i32
134 %Mul = mul i32 %ZE, 65537
135 %tmp = insertelement <4 x i32> undef, i32 %Mul, i32 0
136 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
137 store <4 x i32> %Val, <4 x i32>* %Dst
141 define void @fun_6Eltsx2x2b(i16* %Src, <6 x i32>* %Dst) {
142 ; CHECK-LABEL: fun_6Eltsx2x2b:
144 ; CHECK-NEXT: vlreph %v0, 0(%r2)
145 ; CHECK-NEXT: vsteg %v0, 16(%r3), 0
146 ; CHECK-NEXT: vst %v0, 0(%r3), 4
147 ; CHECK-NEXT: br %r14
148 %i = load i16, i16* %Src
149 %ZE = zext i16 %i to i32
150 %Mul = mul i32 %ZE, 65537
151 %tmp = insertelement <6 x i32> undef, i32 %Mul, i32 0
152 %Val = shufflevector <6 x i32> %tmp, <6 x i32> undef, <6 x i32> zeroinitializer
153 store <6 x i32> %Val, <6 x i32>* %Dst
157 define void @fun_2Eltsx2x4b(i32* %Src, <2 x i64>* %Dst) {
158 ; CHECK-LABEL: fun_2Eltsx2x4b:
160 ; CHECK-NEXT: vlrepf %v0, 0(%r2)
161 ; CHECK-NEXT: vst %v0, 0(%r3), 3
162 ; CHECK-NEXT: br %r14
163 %i = load i32, i32* %Src
164 %ZE = zext i32 %i to i64
165 %Mul = mul i64 %ZE, 4294967297
166 %tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0
167 %Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer
168 store <2 x i64> %Val, <2 x i64>* %Dst
172 define void @fun_5Eltsx2x4b(i32* %Src, <5 x i64>* %Dst) {
173 ; CHECK-LABEL: fun_5Eltsx2x4b:
175 ; CHECK-NEXT: vlrepf %v0, 0(%r2)
176 ; CHECK-NEXT: vsteg %v0, 32(%r3), 0
177 ; CHECK-NEXT: vst %v0, 16(%r3), 4
178 ; CHECK-NEXT: vst %v0, 0(%r3), 4
179 ; CHECK-NEXT: br %r14
180 %i = load i32, i32* %Src
181 %ZE = zext i32 %i to i64
182 %Mul = mul i64 %ZE, 4294967297
183 %tmp = insertelement <5 x i64> undef, i64 %Mul, i32 0
184 %Val = shufflevector <5 x i64> %tmp, <5 x i64> undef, <5 x i32> zeroinitializer
185 store <5 x i64> %Val, <5 x i64>* %Dst
189 ; Test replicating an incoming argument.
190 define void @fun_8x1b_arg(i8 %Arg, i64* %Dst) {
191 ; CHECK-LABEL: fun_8x1b_arg:
193 ; CHECK-NEXT: vlvgp %v0, %r2, %r2
194 ; CHECK-NEXT: vrepb %v0, %v0, 7
195 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0
196 ; CHECK-NEXT: br %r14
197 %ZE = zext i8 %Arg to i64
198 %Val = mul i64 %ZE, 72340172838076673
199 store i64 %Val, i64* %Dst
203 ; A replication of a non-local value (ISD::AssertZext case).
204 define void @fun_nonlocalval() {
205 ; CHECK-LABEL: fun_nonlocalval:
207 ; CHECK-NEXT: lhi %r0, 0
208 ; CHECK-NEXT: ciblh %r0, 0, 0(%r14)
209 ; CHECK-NEXT: .LBB13_1: # %bb2
210 ; CHECK-NEXT: llgf %r0, 0(%r1)
211 ; CHECK-NEXT: vlvgp %v0, %r0, %r0
212 ; CHECK-NEXT: vrepf %v0, %v0, 1
213 ; CHECK-NEXT: vst %v0, 0(%r1), 3
214 ; CHECK-NEXT: br %r14
215 %i = load i32, i32* undef, align 4
216 br i1 undef, label %bb2, label %bb7
219 %i3 = zext i32 %i to i64
220 %i4 = mul nuw i64 %i3, 4294967297
221 %i5 = insertelement <2 x i64> poison, i64 %i4, i64 0
222 %i6 = shufflevector <2 x i64> %i5, <2 x i64> poison, <2 x i32> zeroinitializer
223 store <2 x i64> %i6, <2 x i64>* undef, align 8
230 ;; Replicated immediates
232 ; Some cases where scalar instruction is better
233 define void @fun_8x1i_zero(i64* %Dst) {
234 ; CHECK-LABEL: fun_8x1i_zero:
236 ; CHECK-NEXT: mvghi 0(%r2), 0
237 ; CHECK-NEXT: br %r14
238 store i64 0, i64* %Dst
242 define void @fun_4x1i_minus1(i32* %Dst) {
243 ; CHECK-LABEL: fun_4x1i_minus1:
245 ; CHECK-NEXT: mvhi 0(%r2), -1
246 ; CHECK-NEXT: br %r14
247 store i32 -1, i32* %Dst
251 define void @fun_4x1i_allones(i32* %Dst) {
252 ; CHECK-LABEL: fun_4x1i_allones:
254 ; CHECK-NEXT: mvhi 0(%r2), -1
255 ; CHECK-NEXT: br %r14
256 store i32 4294967295, i32* %Dst
260 define void @fun_2i(i16* %Dst) {
261 ; CHECK-LABEL: fun_2i:
263 ; CHECK-NEXT: mvhhi 0(%r2), 1
264 ; CHECK-NEXT: br %r14
265 store i16 1, i16* %Dst
269 define void @fun_2x2i(i32* %Dst) {
270 ; CHECK-LABEL: fun_2x2i:
272 ; CHECK-NEXT: vrepih %v0, 1
273 ; CHECK-NEXT: vstef %v0, 0(%r2), 0
274 ; CHECK-NEXT: br %r14
275 store i32 65537, i32* %Dst
279 define void @fun_4x2i(i64* %Dst) {
280 ; CHECK-LABEL: fun_4x2i:
282 ; CHECK-NEXT: vrepih %v0, 1
283 ; CHECK-NEXT: vsteg %v0, 0(%r2), 0
284 ; CHECK-NEXT: br %r14
285 store i64 281479271743489, i64* %Dst
289 define void @fun_2x4i(i64* %Dst) {
290 ; CHECK-LABEL: fun_2x4i:
292 ; CHECK-NEXT: vrepif %v0, 1
293 ; CHECK-NEXT: vsteg %v0, 0(%r2), 0
294 ; CHECK-NEXT: br %r14
295 store i64 4294967297, i64* %Dst
299 ; Store replicated immediate twice using the same vector.
300 define void @fun_4x1i(i32* %Dst, i32* %Dst2) {
301 ; CHECK-LABEL: fun_4x1i:
303 ; CHECK-NEXT: vrepib %v0, 3
304 ; CHECK-NEXT: vstef %v0, 0(%r2), 0
305 ; CHECK-NEXT: vstef %v0, 0(%r3), 0
306 ; CHECK-NEXT: br %r14
307 store i32 50529027, i32* %Dst
308 store i32 50529027, i32* %Dst2
312 define void @fun_8x1i(i64* %Dst, i64* %Dst2) {
313 ; CHECK-LABEL: fun_8x1i:
315 ; CHECK-NEXT: vrepib %v0, 1
316 ; CHECK-NEXT: vsteg %v0, 0(%r2), 0
317 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0
318 ; CHECK-NEXT: br %r14
319 store i64 72340172838076673, i64* %Dst
320 store i64 72340172838076673, i64* %Dst2
324 ; Similar, but with vectors.
325 define void @fun_4Eltsx4x1i_2Eltsx4x1i(<4 x i32>* %Dst, <2 x i32>* %Dst2) {
326 ; CHECK-LABEL: fun_4Eltsx4x1i_2Eltsx4x1i:
328 ; CHECK-NEXT: vrepib %v0, 3
329 ; CHECK-NEXT: vst %v0, 0(%r2), 3
330 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0
331 ; CHECK-NEXT: br %r14
332 %tmp = insertelement <4 x i32> undef, i32 50529027, i32 0
333 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
334 store <4 x i32> %Val, <4 x i32>* %Dst
335 %tmp2 = insertelement <2 x i32> undef, i32 50529027, i32 0
336 %Val2 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer
337 store <2 x i32> %Val2, <2 x i32>* %Dst2
341 ; Same, but 64-bit store is scalar.
342 define void @fun_4Eltsx4x1i_8x1i(<4 x i32>* %Dst, i64* %Dst2) {
343 ; CHECK-LABEL: fun_4Eltsx4x1i_8x1i:
345 ; CHECK-NEXT: vrepib %v0, 3
346 ; CHECK-NEXT: vst %v0, 0(%r2), 3
347 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0
348 ; CHECK-NEXT: br %r14
349 %tmp = insertelement <4 x i32> undef, i32 50529027, i32 0
350 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer
351 store <4 x i32> %Val, <4 x i32>* %Dst
352 store i64 217020518514230019, i64* %Dst2
356 define void @fun_3Eltsx2x4i(<3 x i64>* %Dst) {
357 ; CHECK-LABEL: fun_3Eltsx2x4i:
359 ; CHECK-NEXT: vrepif %v0, 1
360 ; CHECK-NEXT: vsteg %v0, 16(%r2), 0
361 ; CHECK-NEXT: vst %v0, 0(%r2), 4
362 ; CHECK-NEXT: br %r14
363 %tmp = insertelement <3 x i64> undef, i64 4294967297, i32 0
364 %Val = shufflevector <3 x i64> %tmp, <3 x i64> undef, <3 x i32> zeroinitializer
365 store <3 x i64> %Val, <3 x i64>* %Dst
369 ; i128 replicated '1': not using vrepib, but should compile.
370 define void @fun_16x1i(i128* %Dst) {
371 ; CHECK-LABEL: fun_16x1i:
373 ; CHECK-NEXT: llihf %r0, 16843009
374 ; CHECK-NEXT: oilf %r0, 16843009
375 ; CHECK-NEXT: stg %r0, 8(%r2)
376 ; CHECK-NEXT: stg %r0, 0(%r2)
377 ; CHECK-NEXT: br %r14
378 store i128 1334440654591915542993625911497130241, i128* %Dst