1 ; Test vector insertion of memory values.
3 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
5 ; Test v16i8 insertion into the first element.
6 define <16 x i8> @f1(<16 x i8> %val, i8 *%ptr) {
8 ; CHECK: vleb %v24, 0(%r2), 0
10 %element = load i8, i8 *%ptr
11 %ret = insertelement <16 x i8> %val, i8 %element, i32 0
15 ; Test v16i8 insertion into the last element.
16 define <16 x i8> @f2(<16 x i8> %val, i8 *%ptr) {
18 ; CHECK: vleb %v24, 0(%r2), 15
20 %element = load i8, i8 *%ptr
21 %ret = insertelement <16 x i8> %val, i8 %element, i32 15
25 ; Test v16i8 insertion with the highest in-range offset.
26 define <16 x i8> @f3(<16 x i8> %val, i8 *%base) {
28 ; CHECK: vleb %v24, 4095(%r2), 10
30 %ptr = getelementptr i8, i8 *%base, i32 4095
31 %element = load i8, i8 *%ptr
32 %ret = insertelement <16 x i8> %val, i8 %element, i32 10
36 ; Test v16i8 insertion with the first ouf-of-range offset.
37 define <16 x i8> @f4(<16 x i8> %val, i8 *%base) {
39 ; CHECK: aghi %r2, 4096
40 ; CHECK: vleb %v24, 0(%r2), 5
42 %ptr = getelementptr i8, i8 *%base, i32 4096
43 %element = load i8, i8 *%ptr
44 %ret = insertelement <16 x i8> %val, i8 %element, i32 5
48 ; Test v16i8 insertion into a variable element.
49 define <16 x i8> @f5(<16 x i8> %val, i8 *%ptr, i32 %index) {
53 %element = load i8, i8 *%ptr
54 %ret = insertelement <16 x i8> %val, i8 %element, i32 %index
58 ; Test v8i16 insertion into the first element.
59 define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
61 ; CHECK: vleh %v24, 0(%r2), 0
63 %element = load i16, i16 *%ptr
64 %ret = insertelement <8 x i16> %val, i16 %element, i32 0
68 ; Test v8i16 insertion into the last element.
69 define <8 x i16> @f7(<8 x i16> %val, i16 *%ptr) {
71 ; CHECK: vleh %v24, 0(%r2), 7
73 %element = load i16, i16 *%ptr
74 %ret = insertelement <8 x i16> %val, i16 %element, i32 7
78 ; Test v8i16 insertion with the highest in-range offset.
79 define <8 x i16> @f8(<8 x i16> %val, i16 *%base) {
81 ; CHECK: vleh %v24, 4094(%r2), 5
83 %ptr = getelementptr i16, i16 *%base, i32 2047
84 %element = load i16, i16 *%ptr
85 %ret = insertelement <8 x i16> %val, i16 %element, i32 5
89 ; Test v8i16 insertion with the first ouf-of-range offset.
90 define <8 x i16> @f9(<8 x i16> %val, i16 *%base) {
92 ; CHECK: aghi %r2, 4096
93 ; CHECK: vleh %v24, 0(%r2), 1
95 %ptr = getelementptr i16, i16 *%base, i32 2048
96 %element = load i16, i16 *%ptr
97 %ret = insertelement <8 x i16> %val, i16 %element, i32 1
101 ; Test v8i16 insertion into a variable element.
102 define <8 x i16> @f10(<8 x i16> %val, i16 *%ptr, i32 %index) {
106 %element = load i16, i16 *%ptr
107 %ret = insertelement <8 x i16> %val, i16 %element, i32 %index
111 ; Test v4i32 insertion into the first element.
112 define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr) {
114 ; CHECK: vlef %v24, 0(%r2), 0
116 %element = load i32, i32 *%ptr
117 %ret = insertelement <4 x i32> %val, i32 %element, i32 0
121 ; Test v4i32 insertion into the last element.
122 define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
124 ; CHECK: vlef %v24, 0(%r2), 3
126 %element = load i32, i32 *%ptr
127 %ret = insertelement <4 x i32> %val, i32 %element, i32 3
131 ; Test v4i32 insertion with the highest in-range offset.
132 define <4 x i32> @f13(<4 x i32> %val, i32 *%base) {
134 ; CHECK: vlef %v24, 4092(%r2), 2
136 %ptr = getelementptr i32, i32 *%base, i32 1023
137 %element = load i32, i32 *%ptr
138 %ret = insertelement <4 x i32> %val, i32 %element, i32 2
142 ; Test v4i32 insertion with the first ouf-of-range offset.
143 define <4 x i32> @f14(<4 x i32> %val, i32 *%base) {
145 ; CHECK: aghi %r2, 4096
146 ; CHECK: vlef %v24, 0(%r2), 1
148 %ptr = getelementptr i32, i32 *%base, i32 1024
149 %element = load i32, i32 *%ptr
150 %ret = insertelement <4 x i32> %val, i32 %element, i32 1
154 ; Test v4i32 insertion into a variable element.
155 define <4 x i32> @f15(<4 x i32> %val, i32 *%ptr, i32 %index) {
159 %element = load i32, i32 *%ptr
160 %ret = insertelement <4 x i32> %val, i32 %element, i32 %index
164 ; Test v2i64 insertion into the first element.
165 define <2 x i64> @f16(<2 x i64> %val, i64 *%ptr) {
167 ; CHECK: vleg %v24, 0(%r2), 0
169 %element = load i64, i64 *%ptr
170 %ret = insertelement <2 x i64> %val, i64 %element, i32 0
174 ; Test v2i64 insertion into the last element.
175 define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr) {
177 ; CHECK: vleg %v24, 0(%r2), 1
179 %element = load i64, i64 *%ptr
180 %ret = insertelement <2 x i64> %val, i64 %element, i32 1
184 ; Test v2i64 insertion with the highest in-range offset.
185 define <2 x i64> @f18(<2 x i64> %val, i64 *%base) {
187 ; CHECK: vleg %v24, 4088(%r2), 1
189 %ptr = getelementptr i64, i64 *%base, i32 511
190 %element = load i64, i64 *%ptr
191 %ret = insertelement <2 x i64> %val, i64 %element, i32 1
195 ; Test v2i64 insertion with the first ouf-of-range offset.
196 define <2 x i64> @f19(<2 x i64> %val, i64 *%base) {
198 ; CHECK: aghi %r2, 4096
199 ; CHECK: vleg %v24, 0(%r2), 0
201 %ptr = getelementptr i64, i64 *%base, i32 512
202 %element = load i64, i64 *%ptr
203 %ret = insertelement <2 x i64> %val, i64 %element, i32 0
207 ; Test v2i64 insertion into a variable element.
208 define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
212 %element = load i64, i64 *%ptr
213 %ret = insertelement <2 x i64> %val, i64 %element, i32 %index
217 ; Test v4f32 insertion into the first element.
218 define <4 x float> @f21(<4 x float> %val, float *%ptr) {
220 ; CHECK: vlef %v24, 0(%r2), 0
222 %element = load float, float *%ptr
223 %ret = insertelement <4 x float> %val, float %element, i32 0
227 ; Test v4f32 insertion into the last element.
228 define <4 x float> @f22(<4 x float> %val, float *%ptr) {
230 ; CHECK: vlef %v24, 0(%r2), 3
232 %element = load float, float *%ptr
233 %ret = insertelement <4 x float> %val, float %element, i32 3
237 ; Test v4f32 insertion with the highest in-range offset.
238 define <4 x float> @f23(<4 x float> %val, float *%base) {
240 ; CHECK: vlef %v24, 4092(%r2), 2
242 %ptr = getelementptr float, float *%base, i32 1023
243 %element = load float, float *%ptr
244 %ret = insertelement <4 x float> %val, float %element, i32 2
248 ; Test v4f32 insertion with the first ouf-of-range offset.
249 define <4 x float> @f24(<4 x float> %val, float *%base) {
251 ; CHECK: aghi %r2, 4096
252 ; CHECK: vlef %v24, 0(%r2), 1
254 %ptr = getelementptr float, float *%base, i32 1024
255 %element = load float, float *%ptr
256 %ret = insertelement <4 x float> %val, float %element, i32 1
260 ; Test v4f32 insertion into a variable element.
261 define <4 x float> @f25(<4 x float> %val, float *%ptr, i32 %index) {
265 %element = load float, float *%ptr
266 %ret = insertelement <4 x float> %val, float %element, i32 %index
270 ; Test v2f64 insertion into the first element.
271 define <2 x double> @f26(<2 x double> %val, double *%ptr) {
273 ; CHECK: vleg %v24, 0(%r2), 0
275 %element = load double, double *%ptr
276 %ret = insertelement <2 x double> %val, double %element, i32 0
277 ret <2 x double> %ret
280 ; Test v2f64 insertion into the last element.
281 define <2 x double> @f27(<2 x double> %val, double *%ptr) {
283 ; CHECK: vleg %v24, 0(%r2), 1
285 %element = load double, double *%ptr
286 %ret = insertelement <2 x double> %val, double %element, i32 1
287 ret <2 x double> %ret
290 ; Test v2f64 insertion with the highest in-range offset.
291 define <2 x double> @f28(<2 x double> %val, double *%base) {
293 ; CHECK: vleg %v24, 4088(%r2), 1
295 %ptr = getelementptr double, double *%base, i32 511
296 %element = load double, double *%ptr
297 %ret = insertelement <2 x double> %val, double %element, i32 1
298 ret <2 x double> %ret
301 ; Test v2f64 insertion with the first ouf-of-range offset.
302 define <2 x double> @f29(<2 x double> %val, double *%base) {
304 ; CHECK: aghi %r2, 4096
305 ; CHECK: vleg %v24, 0(%r2), 0
307 %ptr = getelementptr double, double *%base, i32 512
308 %element = load double, double *%ptr
309 %ret = insertelement <2 x double> %val, double %element, i32 0
310 ret <2 x double> %ret
313 ; Test v2f64 insertion into a variable element.
314 define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) {
318 %element = load double, double *%ptr
319 %ret = insertelement <2 x double> %val, double %element, i32 %index
320 ret <2 x double> %ret
323 ; Test a v4i32 gather of the first element.
324 define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) {
326 ; CHECK: vgef %v24, 0(%v26,%r2), 0
328 %elem = extractelement <4 x i32> %index, i32 0
329 %ext = zext i32 %elem to i64
330 %add = add i64 %base, %ext
331 %ptr = inttoptr i64 %add to i32 *
332 %element = load i32, i32 *%ptr
333 %ret = insertelement <4 x i32> %val, i32 %element, i32 0
337 ; Test a v4i32 gather of the last element.
338 define <4 x i32> @f32(<4 x i32> %val, <4 x i32> %index, i64 %base) {
340 ; CHECK: vgef %v24, 0(%v26,%r2), 3
342 %elem = extractelement <4 x i32> %index, i32 3
343 %ext = zext i32 %elem to i64
344 %add = add i64 %base, %ext
345 %ptr = inttoptr i64 %add to i32 *
346 %element = load i32, i32 *%ptr
347 %ret = insertelement <4 x i32> %val, i32 %element, i32 3
351 ; Test a v4i32 gather with the highest in-range offset.
352 define <4 x i32> @f33(<4 x i32> %val, <4 x i32> %index, i64 %base) {
354 ; CHECK: vgef %v24, 4095(%v26,%r2), 1
356 %elem = extractelement <4 x i32> %index, i32 1
357 %ext = zext i32 %elem to i64
358 %add1 = add i64 %base, %ext
359 %add2 = add i64 %add1, 4095
360 %ptr = inttoptr i64 %add2 to i32 *
361 %element = load i32, i32 *%ptr
362 %ret = insertelement <4 x i32> %val, i32 %element, i32 1
366 ; Test a v2i64 gather of the first element.
367 define <2 x i64> @f34(<2 x i64> %val, <2 x i64> %index, i64 %base) {
369 ; CHECK: vgeg %v24, 0(%v26,%r2), 0
371 %elem = extractelement <2 x i64> %index, i32 0
372 %add = add i64 %base, %elem
373 %ptr = inttoptr i64 %add to i64 *
374 %element = load i64, i64 *%ptr
375 %ret = insertelement <2 x i64> %val, i64 %element, i32 0
379 ; Test a v2i64 gather of the last element.
380 define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) {
382 ; CHECK: vgeg %v24, 0(%v26,%r2), 1
384 %elem = extractelement <2 x i64> %index, i32 1
385 %add = add i64 %base, %elem
386 %ptr = inttoptr i64 %add to i64 *
387 %element = load i64, i64 *%ptr
388 %ret = insertelement <2 x i64> %val, i64 %element, i32 1
392 ; Test a v4f32 gather of the first element.
393 define <4 x float> @f36(<4 x float> %val, <4 x i32> %index, i64 %base) {
395 ; CHECK: vgef %v24, 0(%v26,%r2), 0
397 %elem = extractelement <4 x i32> %index, i32 0
398 %ext = zext i32 %elem to i64
399 %add = add i64 %base, %ext
400 %ptr = inttoptr i64 %add to float *
401 %element = load float, float *%ptr
402 %ret = insertelement <4 x float> %val, float %element, i32 0
406 ; Test a v4f32 gather of the last element.
407 define <4 x float> @f37(<4 x float> %val, <4 x i32> %index, i64 %base) {
409 ; CHECK: vgef %v24, 0(%v26,%r2), 3
411 %elem = extractelement <4 x i32> %index, i32 3
412 %ext = zext i32 %elem to i64
413 %add = add i64 %base, %ext
414 %ptr = inttoptr i64 %add to float *
415 %element = load float, float *%ptr
416 %ret = insertelement <4 x float> %val, float %element, i32 3
420 ; Test a v2f64 gather of the first element.
421 define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) {
423 ; CHECK: vgeg %v24, 0(%v26,%r2), 0
425 %elem = extractelement <2 x i64> %index, i32 0
426 %add = add i64 %base, %elem
427 %ptr = inttoptr i64 %add to double *
428 %element = load double, double *%ptr
429 %ret = insertelement <2 x double> %val, double %element, i32 0
430 ret <2 x double> %ret
433 ; Test a v2f64 gather of the last element.
434 define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) {
436 ; CHECK: vgeg %v24, 0(%v26,%r2), 1
438 %elem = extractelement <2 x i64> %index, i32 1
439 %add = add i64 %base, %elem
440 %ptr = inttoptr i64 %add to double *
441 %element = load double, double *%ptr
442 %ret = insertelement <2 x double> %val, double %element, i32 1
443 ret <2 x double> %ret
446 ; Test a v4i32 gather where the load is chained.
447 define void @f40(<4 x i32> %val, <4 x i32> %index, i64 %base, <4 x i32> *%res) {
449 ; CHECK: vgef %v24, 0(%v26,%r2), 1
450 ; CHECK: vst %v24, 0(%r3)
452 %elem = extractelement <4 x i32> %index, i32 1
453 %ext = zext i32 %elem to i64
454 %add = add i64 %base, %ext
455 %ptr = inttoptr i64 %add to i32 *
456 %element = load i32, i32 *%ptr
457 %ret = insertelement <4 x i32> %val, i32 %element, i32 1
458 store <4 x i32> %ret, <4 x i32> *%res
462 ; Test a v2i64 gather where the load is chained.
463 define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base, <2 x i64> *%res) {
465 ; CHECK: vgeg %v24, 0(%v26,%r2), 1
466 ; CHECK: vst %v24, 0(%r3)
468 %elem = extractelement <2 x i64> %index, i32 1
469 %add = add i64 %base, %elem
470 %ptr = inttoptr i64 %add to i64 *
471 %element = load i64, i64 *%ptr
472 %ret = insertelement <2 x i64> %val, i64 %element, i32 1
473 store <2 x i64> %ret, <2 x i64> *%res