1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
4 ; Check that ldr1* instruction is generated to splat scalar during load,
5 ; rather than mov from scalar to vector register (which would require the vector unit).
7 ; one-off: ld1r_stack checks that ldr1b works with stack objects.
10 ; types = [i8, i16, i32, i64, half, float, double]
11 ; methods = [direct load, gep upper bound - 1, gep out of range x {neg,pos}, sext..., zext..., unpacked_floats...]
14 @g8 = external global i8
16 ; One-off test for splatted value coming from stack load.
17 define <vscale x 16 x i8> @ld1r_stack() {
18 ; CHECK-LABEL: ld1r_stack:
20 ; CHECK-NEXT: sub sp, sp, #16
21 ; CHECK-NEXT: .cfi_def_cfa_offset 16
22 ; CHECK-NEXT: adrp x8, :got:g8
23 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:g8]
24 ; CHECK-NEXT: ptrue p0.b
25 ; CHECK-NEXT: ldrb w8, [x8]
26 ; CHECK-NEXT: strb w8, [sp, #12]
27 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [sp, #14]
28 ; CHECK-NEXT: add sp, sp, #16
31 %valp2 = load volatile i8, i8* @g8
32 store volatile i8 %valp2, i8* %valp
33 %valp3 = getelementptr i8, i8* %valp, i32 2
34 %val = load i8, i8* %valp3
35 %1 = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
36 %2 = shufflevector <vscale x 16 x i8> %1, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
37 ret <vscale x 16 x i8> %2
40 define <vscale x 16 x i8> @ld1rb(i8* %valp) {
43 ; CHECK-NEXT: ptrue p0.b
44 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0]
46 %val = load i8, i8* %valp
47 %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
48 %shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
49 ret <vscale x 16 x i8> %shf
52 define <vscale x 16 x i8> @ld1rb_gep(i8* %valp) {
53 ; CHECK-LABEL: ld1rb_gep:
55 ; CHECK-NEXT: ptrue p0.b
56 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0, #63]
58 %valp2 = getelementptr i8, i8* %valp, i32 63
59 %val = load i8, i8* %valp2
60 %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
61 %shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
62 ret <vscale x 16 x i8> %shf
65 define <vscale x 16 x i8> @ld1rb_gep_out_of_range_up(i8* %valp) {
66 ; CHECK-LABEL: ld1rb_gep_out_of_range_up:
68 ; CHECK-NEXT: add x8, x0, #64
69 ; CHECK-NEXT: ptrue p0.b
70 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8]
72 %valp2 = getelementptr i8, i8* %valp, i32 64
73 %val = load i8, i8* %valp2
74 %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
75 %shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
76 ret <vscale x 16 x i8> %shf
79 define <vscale x 16 x i8> @ld1rb_gep_out_of_range_down(i8* %valp) {
80 ; CHECK-LABEL: ld1rb_gep_out_of_range_down:
82 ; CHECK-NEXT: sub x8, x0, #1
83 ; CHECK-NEXT: ptrue p0.b
84 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8]
86 %valp2 = getelementptr i8, i8* %valp, i32 -1
87 %val = load i8, i8* %valp2
88 %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0
89 %shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer
90 ret <vscale x 16 x i8> %shf
93 define <vscale x 8 x i16> @ld1rb_i8_i16_zext(i8* %valp) {
94 ; CHECK-LABEL: ld1rb_i8_i16_zext:
96 ; CHECK-NEXT: ptrue p0.h
97 ; CHECK-NEXT: ld1rb { z0.h }, p0/z, [x0]
99 %val = load i8, i8* %valp
100 %ext = zext i8 %val to i16
101 %ins = insertelement <vscale x 8 x i16> undef, i16 %ext, i32 0
102 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
103 ret <vscale x 8 x i16> %shf
106 define <vscale x 8 x i16> @ld1rb_i8_i16_sext(i8* %valp) {
107 ; CHECK-LABEL: ld1rb_i8_i16_sext:
109 ; CHECK-NEXT: ptrue p0.h
110 ; CHECK-NEXT: ld1rsb { z0.h }, p0/z, [x0]
112 %val = load i8, i8* %valp
113 %ext = sext i8 %val to i16
114 %ins = insertelement <vscale x 8 x i16> undef, i16 %ext, i32 0
115 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
116 ret <vscale x 8 x i16> %shf
119 define <vscale x 4 x i32> @ld1rb_i8_i32_zext(i8* %valp) {
120 ; CHECK-LABEL: ld1rb_i8_i32_zext:
122 ; CHECK-NEXT: ptrue p0.s
123 ; CHECK-NEXT: ld1rb { z0.s }, p0/z, [x0]
125 %val = load i8, i8* %valp
126 %ext = zext i8 %val to i32
127 %ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0
128 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
129 ret <vscale x 4 x i32> %shf
132 define <vscale x 4 x i32> @ld1rb_i8_i32_sext(i8* %valp) {
133 ; CHECK-LABEL: ld1rb_i8_i32_sext:
135 ; CHECK-NEXT: ptrue p0.s
136 ; CHECK-NEXT: ld1rsb { z0.s }, p0/z, [x0]
138 %val = load i8, i8* %valp
139 %ext = sext i8 %val to i32
140 %ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0
141 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
142 ret <vscale x 4 x i32> %shf
145 define <vscale x 2 x i64> @ld1rb_i8_i64_zext(i8* %valp) {
146 ; CHECK-LABEL: ld1rb_i8_i64_zext:
148 ; CHECK-NEXT: ptrue p0.d
149 ; CHECK-NEXT: ld1rb { z0.d }, p0/z, [x0]
151 %val = load i8, i8* %valp
152 %ext = zext i8 %val to i64
153 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
154 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
155 ret <vscale x 2 x i64> %shf
158 define <vscale x 2 x i64> @ld1rb_i8_i64_sext(i8* %valp) {
159 ; CHECK-LABEL: ld1rb_i8_i64_sext:
161 ; CHECK-NEXT: ptrue p0.d
162 ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0]
164 %val = load i8, i8* %valp
165 %ext = sext i8 %val to i64
166 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
167 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
168 ret <vscale x 2 x i64> %shf
171 define <vscale x 8 x i16> @ld1rh(i16* %valp) {
172 ; CHECK-LABEL: ld1rh:
174 ; CHECK-NEXT: ptrue p0.h
175 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
177 %val = load i16, i16* %valp
178 %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
179 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
180 ret <vscale x 8 x i16> %shf
183 define <vscale x 8 x i16> @ld1rh_gep(i16* %valp) {
184 ; CHECK-LABEL: ld1rh_gep:
186 ; CHECK-NEXT: ptrue p0.h
187 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126]
189 %valp2 = getelementptr i16, i16* %valp, i32 63
190 %val = load i16, i16* %valp2
191 %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
192 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
193 ret <vscale x 8 x i16> %shf
196 define <vscale x 8 x i16> @ld1rh_gep_out_of_range_up(i16* %valp) {
197 ; CHECK-LABEL: ld1rh_gep_out_of_range_up:
199 ; CHECK-NEXT: add x8, x0, #128
200 ; CHECK-NEXT: ptrue p0.h
201 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
203 %valp2 = getelementptr i16, i16* %valp, i32 64
204 %val = load i16, i16* %valp2
205 %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
206 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
207 ret <vscale x 8 x i16> %shf
210 define <vscale x 8 x i16> @ld1rh_gep_out_of_range_down(i16* %valp) {
211 ; CHECK-LABEL: ld1rh_gep_out_of_range_down:
213 ; CHECK-NEXT: sub x8, x0, #2
214 ; CHECK-NEXT: ptrue p0.h
215 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
217 %valp2 = getelementptr i16, i16* %valp, i32 -1
218 %val = load i16, i16* %valp2
219 %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0
220 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
221 ret <vscale x 8 x i16> %shf
224 define <vscale x 4 x i32> @ld1rh_i16_i32_zext(i16* %valp) {
225 ; CHECK-LABEL: ld1rh_i16_i32_zext:
227 ; CHECK-NEXT: ptrue p0.s
228 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0]
230 %val = load i16, i16* %valp
231 %ext = zext i16 %val to i32
232 %ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0
233 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
234 ret <vscale x 4 x i32> %shf
237 define <vscale x 4 x i32> @ld1rh_i16_i32_sext(i16* %valp) {
238 ; CHECK-LABEL: ld1rh_i16_i32_sext:
240 ; CHECK-NEXT: ptrue p0.s
241 ; CHECK-NEXT: ld1rsh { z0.s }, p0/z, [x0]
243 %val = load i16, i16* %valp
244 %ext = sext i16 %val to i32
245 %ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0
246 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
247 ret <vscale x 4 x i32> %shf
250 define <vscale x 2 x i64> @ld1rh_i16_i64_zext(i16* %valp) {
251 ; CHECK-LABEL: ld1rh_i16_i64_zext:
253 ; CHECK-NEXT: ptrue p0.d
254 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0]
256 %val = load i16, i16* %valp
257 %ext = zext i16 %val to i64
258 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
259 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
260 ret <vscale x 2 x i64> %shf
263 define <vscale x 2 x i64> @ld1rh_i16_i64_sext(i16* %valp) {
264 ; CHECK-LABEL: ld1rh_i16_i64_sext:
266 ; CHECK-NEXT: ptrue p0.d
267 ; CHECK-NEXT: ld1rsh { z0.d }, p0/z, [x0]
269 %val = load i16, i16* %valp
270 %ext = sext i16 %val to i64
271 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
272 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
273 ret <vscale x 2 x i64> %shf
276 define <vscale x 4 x i32> @ld1rw(i32* %valp) {
277 ; CHECK-LABEL: ld1rw:
279 ; CHECK-NEXT: ptrue p0.s
280 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
282 %val = load i32, i32* %valp
283 %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
284 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
285 ret <vscale x 4 x i32> %shf
288 define <vscale x 4 x i32> @ld1rw_gep(i32* %valp) {
289 ; CHECK-LABEL: ld1rw_gep:
291 ; CHECK-NEXT: ptrue p0.s
292 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252]
294 %valp2 = getelementptr i32, i32* %valp, i32 63
295 %val = load i32, i32* %valp2
296 %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
297 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
298 ret <vscale x 4 x i32> %shf
301 define <vscale x 4 x i32> @ld1rw_gep_out_of_range_up(i32* %valp) {
302 ; CHECK-LABEL: ld1rw_gep_out_of_range_up:
304 ; CHECK-NEXT: add x8, x0, #256
305 ; CHECK-NEXT: ptrue p0.s
306 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
308 %valp2 = getelementptr i32, i32* %valp, i32 64
309 %val = load i32, i32* %valp2
310 %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
311 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
312 ret <vscale x 4 x i32> %shf
315 define <vscale x 4 x i32> @ld1rw_gep_out_of_range_down(i32* %valp) {
316 ; CHECK-LABEL: ld1rw_gep_out_of_range_down:
318 ; CHECK-NEXT: sub x8, x0, #4
319 ; CHECK-NEXT: ptrue p0.s
320 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
322 %valp2 = getelementptr i32, i32* %valp, i32 -1
323 %val = load i32, i32* %valp2
324 %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0
325 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
326 ret <vscale x 4 x i32> %shf
329 define <vscale x 2 x i64> @ld1rw_i32_i64_zext(i32* %valp) {
330 ; CHECK-LABEL: ld1rw_i32_i64_zext:
332 ; CHECK-NEXT: ptrue p0.d
333 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0]
335 %val = load i32, i32* %valp
336 %ext = zext i32 %val to i64
337 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
338 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
339 ret <vscale x 2 x i64> %shf
342 define <vscale x 2 x i64> @ld1rw_i32_i64_sext(i32* %valp) {
343 ; CHECK-LABEL: ld1rw_i32_i64_sext:
345 ; CHECK-NEXT: ptrue p0.d
346 ; CHECK-NEXT: ld1rsw { z0.d }, p0/z, [x0]
348 %val = load i32, i32* %valp
349 %ext = sext i32 %val to i64
350 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0
351 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
352 ret <vscale x 2 x i64> %shf
355 define <vscale x 2 x i64> @ld1rd(i64* %valp) {
356 ; CHECK-LABEL: ld1rd:
358 ; CHECK-NEXT: ptrue p0.d
359 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0]
361 %val = load i64, i64* %valp
362 %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
363 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
364 ret <vscale x 2 x i64> %shf
367 define <vscale x 2 x i64> @ld1rd_gep(i64* %valp) {
368 ; CHECK-LABEL: ld1rd_gep:
370 ; CHECK-NEXT: ptrue p0.d
371 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504]
373 %valp2 = getelementptr i64, i64* %valp, i32 63
374 %val = load i64, i64* %valp2
375 %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
376 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
377 ret <vscale x 2 x i64> %shf
380 define <vscale x 2 x i64> @ld1rd_gep_out_of_range_up(i64* %valp) {
381 ; CHECK-LABEL: ld1rd_gep_out_of_range_up:
383 ; CHECK-NEXT: add x8, x0, #512
384 ; CHECK-NEXT: ptrue p0.d
385 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
387 %valp2 = getelementptr i64, i64* %valp, i32 64
388 %val = load i64, i64* %valp2
389 %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
390 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
391 ret <vscale x 2 x i64> %shf
394 define <vscale x 2 x i64> @ld1rd_gep_out_of_range_down(i64* %valp) {
395 ; CHECK-LABEL: ld1rd_gep_out_of_range_down:
397 ; CHECK-NEXT: sub x8, x0, #8
398 ; CHECK-NEXT: ptrue p0.d
399 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
401 %valp2 = getelementptr i64, i64* %valp, i32 -1
402 %val = load i64, i64* %valp2
403 %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0
404 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
405 ret <vscale x 2 x i64> %shf
408 define <vscale x 8 x half> @ld1rh_half(half* %valp) {
409 ; CHECK-LABEL: ld1rh_half:
411 ; CHECK-NEXT: ptrue p0.h
412 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0]
414 %val = load half, half* %valp
415 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
416 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
417 ret <vscale x 8 x half> %shf
420 define <vscale x 8 x half> @ld1rh_half_gep(half* %valp) {
421 ; CHECK-LABEL: ld1rh_half_gep:
423 ; CHECK-NEXT: ptrue p0.h
424 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126]
426 %valp2 = getelementptr half, half* %valp, i32 63
427 %val = load half, half* %valp2
428 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
429 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
430 ret <vscale x 8 x half> %shf
433 define <vscale x 8 x half> @ld1rh_half_gep_out_of_range_up(half* %valp) {
434 ; CHECK-LABEL: ld1rh_half_gep_out_of_range_up:
436 ; CHECK-NEXT: add x8, x0, #128
437 ; CHECK-NEXT: ptrue p0.h
438 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
440 %valp2 = getelementptr half, half* %valp, i32 64
441 %val = load half, half* %valp2
442 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
443 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
444 ret <vscale x 8 x half> %shf
447 define <vscale x 8 x half> @ld1rh_half_gep_out_of_range_down(half* %valp) {
448 ; CHECK-LABEL: ld1rh_half_gep_out_of_range_down:
450 ; CHECK-NEXT: sub x8, x0, #2
451 ; CHECK-NEXT: ptrue p0.h
452 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8]
454 %valp2 = getelementptr half, half* %valp, i32 -1
455 %val = load half, half* %valp2
456 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0
457 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer
458 ret <vscale x 8 x half> %shf
461 define <vscale x 4 x half> @ld1rh_half_unpacked4(half* %valp) {
462 ; CHECK-LABEL: ld1rh_half_unpacked4:
464 ; CHECK-NEXT: ptrue p0.s
465 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0]
467 %val = load half, half* %valp
468 %ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
469 %shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
470 ret <vscale x 4 x half> %shf
473 define <vscale x 4 x half> @ld1rh_half_unpacked4_gep(half* %valp) {
474 ; CHECK-LABEL: ld1rh_half_unpacked4_gep:
476 ; CHECK-NEXT: ptrue p0.s
477 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0, #126]
479 %valp2 = getelementptr half, half* %valp, i32 63
480 %val = load half, half* %valp2
481 %ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
482 %shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
483 ret <vscale x 4 x half> %shf
486 define <vscale x 4 x half> @ld1rh_half_unpacked4_gep_out_of_range_up(half* %valp) {
487 ; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up:
489 ; CHECK-NEXT: add x8, x0, #128
490 ; CHECK-NEXT: ptrue p0.s
491 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8]
493 %valp2 = getelementptr half, half* %valp, i32 64
494 %val = load half, half* %valp2
495 %ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
496 %shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
497 ret <vscale x 4 x half> %shf
500 define <vscale x 4 x half> @ld1rh_half_unpacked4_gep_out_of_range_down(half* %valp) {
501 ; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down:
503 ; CHECK-NEXT: sub x8, x0, #2
504 ; CHECK-NEXT: ptrue p0.s
505 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8]
507 %valp2 = getelementptr half, half* %valp, i32 -1
508 %val = load half, half* %valp2
509 %ins = insertelement <vscale x 4 x half> undef, half %val, i32 0
510 %shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer
511 ret <vscale x 4 x half> %shf
514 define <vscale x 2 x half> @ld1rh_half_unpacked2(half* %valp) {
515 ; CHECK-LABEL: ld1rh_half_unpacked2:
517 ; CHECK-NEXT: ptrue p0.d
518 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0]
520 %val = load half, half* %valp
521 %ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
522 %shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
523 ret <vscale x 2 x half> %shf
526 define <vscale x 2 x half> @ld1rh_half_unpacked2_gep(half* %valp) {
527 ; CHECK-LABEL: ld1rh_half_unpacked2_gep:
529 ; CHECK-NEXT: ptrue p0.d
530 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0, #126]
532 %valp2 = getelementptr half, half* %valp, i32 63
533 %val = load half, half* %valp2
534 %ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
535 %shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
536 ret <vscale x 2 x half> %shf
539 define <vscale x 2 x half> @ld1rh_half_unpacked2_gep_out_of_range_up(half* %valp) {
540 ; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up:
542 ; CHECK-NEXT: add x8, x0, #128
543 ; CHECK-NEXT: ptrue p0.d
544 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8]
546 %valp2 = getelementptr half, half* %valp, i32 64
547 %val = load half, half* %valp2
548 %ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
549 %shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
550 ret <vscale x 2 x half> %shf
553 define <vscale x 2 x half> @ld1rh_half_unpacked2_gep_out_of_range_down(half* %valp) {
554 ; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down:
556 ; CHECK-NEXT: sub x8, x0, #2
557 ; CHECK-NEXT: ptrue p0.d
558 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8]
560 %valp2 = getelementptr half, half* %valp, i32 -1
561 %val = load half, half* %valp2
562 %ins = insertelement <vscale x 2 x half> undef, half %val, i32 0
563 %shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer
564 ret <vscale x 2 x half> %shf
567 define <vscale x 4 x float> @ld1rw_float(float* %valp) {
568 ; CHECK-LABEL: ld1rw_float:
570 ; CHECK-NEXT: ptrue p0.s
571 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0]
573 %val = load float, float* %valp
574 %ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
575 %shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
576 ret <vscale x 4 x float> %shf
579 define <vscale x 4 x float> @ld1rw_float_gep(float* %valp) {
580 ; CHECK-LABEL: ld1rw_float_gep:
582 ; CHECK-NEXT: ptrue p0.s
583 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252]
585 %valp2 = getelementptr float, float* %valp, i32 63
586 %val = load float, float* %valp2
587 %ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
588 %shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
589 ret <vscale x 4 x float> %shf
592 define <vscale x 4 x float> @ld1rw_float_gep_out_of_range_up(float* %valp) {
593 ; CHECK-LABEL: ld1rw_float_gep_out_of_range_up:
595 ; CHECK-NEXT: add x8, x0, #256
596 ; CHECK-NEXT: ptrue p0.s
597 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
599 %valp2 = getelementptr float, float* %valp, i32 64
600 %val = load float, float* %valp2
601 %ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
602 %shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
603 ret <vscale x 4 x float> %shf
606 define <vscale x 4 x float> @ld1rw_float_gep_out_of_range_down(float* %valp) {
607 ; CHECK-LABEL: ld1rw_float_gep_out_of_range_down:
609 ; CHECK-NEXT: sub x8, x0, #4
610 ; CHECK-NEXT: ptrue p0.s
611 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8]
613 %valp2 = getelementptr float, float* %valp, i32 -1
614 %val = load float, float* %valp2
615 %ins = insertelement <vscale x 4 x float> undef, float %val, i32 0
616 %shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer
617 ret <vscale x 4 x float> %shf
620 define <vscale x 2 x float> @ld1rw_float_unpacked2(float* %valp) {
621 ; CHECK-LABEL: ld1rw_float_unpacked2:
623 ; CHECK-NEXT: ptrue p0.d
624 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0]
626 %val = load float, float* %valp
627 %ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
628 %shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
629 ret <vscale x 2 x float> %shf
632 define <vscale x 2 x float> @ld1rw_float_unpacked2_gep(float* %valp) {
633 ; CHECK-LABEL: ld1rw_float_unpacked2_gep:
635 ; CHECK-NEXT: ptrue p0.d
636 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0, #252]
638 %valp2 = getelementptr float, float* %valp, i32 63
639 %val = load float, float* %valp2
640 %ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
641 %shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
642 ret <vscale x 2 x float> %shf
645 define <vscale x 2 x float> @ld1rw_float_unpacked2_gep_out_of_range_up(float* %valp) {
646 ; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up:
648 ; CHECK-NEXT: add x8, x0, #256
649 ; CHECK-NEXT: ptrue p0.d
650 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8]
652 %valp2 = getelementptr float, float* %valp, i32 64
653 %val = load float, float* %valp2
654 %ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
655 %shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
656 ret <vscale x 2 x float> %shf
659 define <vscale x 2 x float> @ld1rw_float_unpacked2_gep_out_of_range_down(float* %valp) {
660 ; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down:
662 ; CHECK-NEXT: sub x8, x0, #4
663 ; CHECK-NEXT: ptrue p0.d
664 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8]
666 %valp2 = getelementptr float, float* %valp, i32 -1
667 %val = load float, float* %valp2
668 %ins = insertelement <vscale x 2 x float> undef, float %val, i32 0
669 %shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
670 ret <vscale x 2 x float> %shf
673 define <vscale x 2 x double> @ld1rd_double(double* %valp) {
674 ; CHECK-LABEL: ld1rd_double:
676 ; CHECK-NEXT: ptrue p0.d
677 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0]
679 %val = load double, double* %valp
680 %ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
681 %shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
682 ret <vscale x 2 x double> %shf
685 define <vscale x 2 x double> @ld1rd_double_gep(double* %valp) {
686 ; CHECK-LABEL: ld1rd_double_gep:
688 ; CHECK-NEXT: ptrue p0.d
689 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504]
691 %valp2 = getelementptr double, double* %valp, i32 63
692 %val = load double, double* %valp2
693 %ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
694 %shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
695 ret <vscale x 2 x double> %shf
698 define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_up(double* %valp) {
699 ; CHECK-LABEL: ld1rd_double_gep_out_of_range_up:
701 ; CHECK-NEXT: add x8, x0, #512
702 ; CHECK-NEXT: ptrue p0.d
703 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
705 %valp2 = getelementptr double, double* %valp, i32 64
706 %val = load double, double* %valp2
707 %ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
708 %shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
709 ret <vscale x 2 x double> %shf
712 define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_down(double* %valp) {
713 ; CHECK-LABEL: ld1rd_double_gep_out_of_range_down:
715 ; CHECK-NEXT: sub x8, x0, #8
716 ; CHECK-NEXT: ptrue p0.d
717 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8]
719 %valp2 = getelementptr double, double* %valp, i32 -1
720 %val = load double, double* %valp2
721 %ins = insertelement <vscale x 2 x double> undef, double %val, i32 0
722 %shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer
723 ret <vscale x 2 x double> %shf