1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
6 %type1 = type { <16 x i8> }
7 %type2 = type { <8 x i8> }
8 %type3 = type { <4 x i16> }
11 define hidden fastcc void @t1(ptr %argtable) nounwind {
13 ; CHECK: // %bb.0: // %entry
14 ; CHECK-NEXT: movi.2d v0, #0000000000000000
15 ; CHECK-NEXT: ldr x8, [x0]
16 ; CHECK-NEXT: str q0, [x8]
19 %tmp1 = load ptr, ptr %argtable, align 8
20 store <16 x i8> zeroinitializer, ptr %tmp1, align 16
24 define hidden fastcc void @t2(ptr %argtable) nounwind {
26 ; CHECK: // %bb.0: // %entry
27 ; CHECK-NEXT: movi.2d v0, #0000000000000000
28 ; CHECK-NEXT: ldr x8, [x0]
29 ; CHECK-NEXT: str d0, [x8]
32 %tmp1 = load ptr, ptr %argtable, align 8
33 store <8 x i8> zeroinitializer, ptr %tmp1, align 8
37 ; add a bunch of tests for rdar://11246289
39 @globalArray64x2 = common global ptr null, align 8
40 @globalArray32x4 = common global ptr null, align 8
41 @globalArray16x8 = common global ptr null, align 8
42 @globalArray8x16 = common global ptr null, align 8
43 @globalArray64x1 = common global ptr null, align 8
44 @globalArray32x2 = common global ptr null, align 8
45 @globalArray16x4 = common global ptr null, align 8
46 @globalArray8x8 = common global ptr null, align 8
47 @floatglobalArray64x2 = common global ptr null, align 8
48 @floatglobalArray32x4 = common global ptr null, align 8
49 @floatglobalArray64x1 = common global ptr null, align 8
50 @floatglobalArray32x2 = common global ptr null, align 8
52 define void @fct1_64x2(ptr nocapture %array, i64 %offset) nounwind ssp {
53 ; CHECK-LABEL: fct1_64x2:
54 ; CHECK: // %bb.0: // %entry
55 ; CHECK-NEXT: adrp x8, :got:globalArray64x2
56 ; CHECK-NEXT: lsl x9, x1, #4
57 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x2]
58 ; CHECK-NEXT: ldr q0, [x0, x9]
59 ; CHECK-NEXT: ldr x8, [x8]
60 ; CHECK-NEXT: str q0, [x8, x9]
63 %arrayidx = getelementptr inbounds <2 x i64>, ptr %array, i64 %offset
64 %tmp = load <2 x i64>, ptr %arrayidx, align 16
65 %tmp1 = load ptr, ptr @globalArray64x2, align 8
66 %arrayidx1 = getelementptr inbounds <2 x i64>, ptr %tmp1, i64 %offset
67 store <2 x i64> %tmp, ptr %arrayidx1, align 16
71 define void @fct2_64x2(ptr nocapture %array) nounwind ssp {
72 ; CHECK-LABEL: fct2_64x2:
73 ; CHECK: // %bb.0: // %entry
74 ; CHECK-NEXT: adrp x8, :got:globalArray64x2
75 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x2]
76 ; CHECK-NEXT: ldr q0, [x0, #48]
77 ; CHECK-NEXT: ldr x8, [x8]
78 ; CHECK-NEXT: str q0, [x8, #80]
81 %arrayidx = getelementptr inbounds <2 x i64>, ptr %array, i64 3
82 %tmp = load <2 x i64>, ptr %arrayidx, align 16
83 %tmp1 = load ptr, ptr @globalArray64x2, align 8
84 %arrayidx1 = getelementptr inbounds <2 x i64>, ptr %tmp1, i64 5
85 store <2 x i64> %tmp, ptr %arrayidx1, align 16
89 define void @fct1_32x4(ptr nocapture %array, i64 %offset) nounwind ssp {
90 ; CHECK-LABEL: fct1_32x4:
91 ; CHECK: // %bb.0: // %entry
92 ; CHECK-NEXT: adrp x8, :got:globalArray32x4
93 ; CHECK-NEXT: lsl x9, x1, #4
94 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x4]
95 ; CHECK-NEXT: ldr q0, [x0, x9]
96 ; CHECK-NEXT: ldr x8, [x8]
97 ; CHECK-NEXT: str q0, [x8, x9]
100 %arrayidx = getelementptr inbounds <4 x i32>, ptr %array, i64 %offset
101 %tmp = load <4 x i32>, ptr %arrayidx, align 16
102 %tmp1 = load ptr, ptr @globalArray32x4, align 8
103 %arrayidx1 = getelementptr inbounds <4 x i32>, ptr %tmp1, i64 %offset
104 store <4 x i32> %tmp, ptr %arrayidx1, align 16
108 define void @fct2_32x4(ptr nocapture %array) nounwind ssp {
109 ; CHECK-LABEL: fct2_32x4:
110 ; CHECK: // %bb.0: // %entry
111 ; CHECK-NEXT: adrp x8, :got:globalArray32x4
112 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x4]
113 ; CHECK-NEXT: ldr q0, [x0, #48]
114 ; CHECK-NEXT: ldr x8, [x8]
115 ; CHECK-NEXT: str q0, [x8, #80]
118 %arrayidx = getelementptr inbounds <4 x i32>, ptr %array, i64 3
119 %tmp = load <4 x i32>, ptr %arrayidx, align 16
120 %tmp1 = load ptr, ptr @globalArray32x4, align 8
121 %arrayidx1 = getelementptr inbounds <4 x i32>, ptr %tmp1, i64 5
122 store <4 x i32> %tmp, ptr %arrayidx1, align 16
126 define void @fct1_16x8(ptr nocapture %array, i64 %offset) nounwind ssp {
127 ; CHECK-LABEL: fct1_16x8:
128 ; CHECK: // %bb.0: // %entry
129 ; CHECK-NEXT: adrp x8, :got:globalArray16x8
130 ; CHECK-NEXT: lsl x9, x1, #4
131 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x8]
132 ; CHECK-NEXT: ldr q0, [x0, x9]
133 ; CHECK-NEXT: ldr x8, [x8]
134 ; CHECK-NEXT: str q0, [x8, x9]
137 %arrayidx = getelementptr inbounds <8 x i16>, ptr %array, i64 %offset
138 %tmp = load <8 x i16>, ptr %arrayidx, align 16
139 %tmp1 = load ptr, ptr @globalArray16x8, align 8
140 %arrayidx1 = getelementptr inbounds <8 x i16>, ptr %tmp1, i64 %offset
141 store <8 x i16> %tmp, ptr %arrayidx1, align 16
145 define void @fct2_16x8(ptr nocapture %array) nounwind ssp {
146 ; CHECK-LABEL: fct2_16x8:
147 ; CHECK: // %bb.0: // %entry
148 ; CHECK-NEXT: adrp x8, :got:globalArray16x8
149 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x8]
150 ; CHECK-NEXT: ldr q0, [x0, #48]
151 ; CHECK-NEXT: ldr x8, [x8]
152 ; CHECK-NEXT: str q0, [x8, #80]
155 %arrayidx = getelementptr inbounds <8 x i16>, ptr %array, i64 3
156 %tmp = load <8 x i16>, ptr %arrayidx, align 16
157 %tmp1 = load ptr, ptr @globalArray16x8, align 8
158 %arrayidx1 = getelementptr inbounds <8 x i16>, ptr %tmp1, i64 5
159 store <8 x i16> %tmp, ptr %arrayidx1, align 16
163 define void @fct1_8x16(ptr nocapture %array, i64 %offset) nounwind ssp {
164 ; CHECK-LABEL: fct1_8x16:
165 ; CHECK: // %bb.0: // %entry
166 ; CHECK-NEXT: adrp x8, :got:globalArray8x16
167 ; CHECK-NEXT: lsl x9, x1, #4
168 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x16]
169 ; CHECK-NEXT: ldr q0, [x0, x9]
170 ; CHECK-NEXT: ldr x8, [x8]
171 ; CHECK-NEXT: str q0, [x8, x9]
174 %arrayidx = getelementptr inbounds <16 x i8>, ptr %array, i64 %offset
175 %tmp = load <16 x i8>, ptr %arrayidx, align 16
176 %tmp1 = load ptr, ptr @globalArray8x16, align 8
177 %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %tmp1, i64 %offset
178 store <16 x i8> %tmp, ptr %arrayidx1, align 16
182 define void @fct2_8x16(ptr nocapture %array) nounwind ssp {
183 ; CHECK-LABEL: fct2_8x16:
184 ; CHECK: // %bb.0: // %entry
185 ; CHECK-NEXT: adrp x8, :got:globalArray8x16
186 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x16]
187 ; CHECK-NEXT: ldr q0, [x0, #48]
188 ; CHECK-NEXT: ldr x8, [x8]
189 ; CHECK-NEXT: str q0, [x8, #80]
192 %arrayidx = getelementptr inbounds <16 x i8>, ptr %array, i64 3
193 %tmp = load <16 x i8>, ptr %arrayidx, align 16
194 %tmp1 = load ptr, ptr @globalArray8x16, align 8
195 %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %tmp1, i64 5
196 store <16 x i8> %tmp, ptr %arrayidx1, align 16
200 define void @fct1_64x1(ptr nocapture %array, i64 %offset) nounwind ssp {
201 ; CHECK-LABEL: fct1_64x1:
202 ; CHECK: // %bb.0: // %entry
203 ; CHECK-NEXT: adrp x8, :got:globalArray64x1
204 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1]
205 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
206 ; CHECK-NEXT: ldr x8, [x8]
207 ; CHECK-NEXT: str d0, [x8, x1, lsl #3]
210 %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 %offset
211 %tmp = load <1 x i64>, ptr %arrayidx, align 8
212 %tmp1 = load ptr, ptr @globalArray64x1, align 8
213 %arrayidx1 = getelementptr inbounds <1 x i64>, ptr %tmp1, i64 %offset
214 store <1 x i64> %tmp, ptr %arrayidx1, align 8
218 define void @fct2_64x1(ptr nocapture %array) nounwind ssp {
219 ; CHECK-LABEL: fct2_64x1:
220 ; CHECK: // %bb.0: // %entry
221 ; CHECK-NEXT: adrp x8, :got:globalArray64x1
222 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1]
223 ; CHECK-NEXT: ldr d0, [x0, #24]
224 ; CHECK-NEXT: ldr x8, [x8]
225 ; CHECK-NEXT: str d0, [x8, #40]
228 %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 3
229 %tmp = load <1 x i64>, ptr %arrayidx, align 8
230 %tmp1 = load ptr, ptr @globalArray64x1, align 8
231 %arrayidx1 = getelementptr inbounds <1 x i64>, ptr %tmp1, i64 5
232 store <1 x i64> %tmp, ptr %arrayidx1, align 8
236 define void @fct1_32x2(ptr nocapture %array, i64 %offset) nounwind ssp {
237 ; CHECK-LABEL: fct1_32x2:
238 ; CHECK: // %bb.0: // %entry
239 ; CHECK-NEXT: adrp x8, :got:globalArray32x2
240 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2]
241 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
242 ; CHECK-NEXT: ldr x8, [x8]
243 ; CHECK-NEXT: str d0, [x8, x1, lsl #3]
246 %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 %offset
247 %tmp = load <2 x i32>, ptr %arrayidx, align 8
248 %tmp1 = load ptr, ptr @globalArray32x2, align 8
249 %arrayidx1 = getelementptr inbounds <2 x i32>, ptr %tmp1, i64 %offset
250 store <2 x i32> %tmp, ptr %arrayidx1, align 8
254 define void @fct2_32x2(ptr nocapture %array) nounwind ssp {
255 ; CHECK-LABEL: fct2_32x2:
256 ; CHECK: // %bb.0: // %entry
257 ; CHECK-NEXT: adrp x8, :got:globalArray32x2
258 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2]
259 ; CHECK-NEXT: ldr d0, [x0, #24]
260 ; CHECK-NEXT: ldr x8, [x8]
261 ; CHECK-NEXT: str d0, [x8, #40]
264 %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 3
265 %tmp = load <2 x i32>, ptr %arrayidx, align 8
266 %tmp1 = load ptr, ptr @globalArray32x2, align 8
267 %arrayidx1 = getelementptr inbounds <2 x i32>, ptr %tmp1, i64 5
268 store <2 x i32> %tmp, ptr %arrayidx1, align 8
272 define void @fct1_16x4(ptr nocapture %array, i64 %offset) nounwind ssp {
273 ; CHECK-LABEL: fct1_16x4:
274 ; CHECK: // %bb.0: // %entry
275 ; CHECK-NEXT: adrp x8, :got:globalArray16x4
276 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4]
277 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
278 ; CHECK-NEXT: ldr x8, [x8]
279 ; CHECK-NEXT: str d0, [x8, x1, lsl #3]
282 %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 %offset
283 %tmp = load <4 x i16>, ptr %arrayidx, align 8
284 %tmp1 = load ptr, ptr @globalArray16x4, align 8
285 %arrayidx1 = getelementptr inbounds <4 x i16>, ptr %tmp1, i64 %offset
286 store <4 x i16> %tmp, ptr %arrayidx1, align 8
290 define void @fct2_16x4(ptr nocapture %array) nounwind ssp {
291 ; CHECK-LABEL: fct2_16x4:
292 ; CHECK: // %bb.0: // %entry
293 ; CHECK-NEXT: adrp x8, :got:globalArray16x4
294 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4]
295 ; CHECK-NEXT: ldr d0, [x0, #24]
296 ; CHECK-NEXT: ldr x8, [x8]
297 ; CHECK-NEXT: str d0, [x8, #40]
300 %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 3
301 %tmp = load <4 x i16>, ptr %arrayidx, align 8
302 %tmp1 = load ptr, ptr @globalArray16x4, align 8
303 %arrayidx1 = getelementptr inbounds <4 x i16>, ptr %tmp1, i64 5
304 store <4 x i16> %tmp, ptr %arrayidx1, align 8
308 define void @fct1_8x8(ptr nocapture %array, i64 %offset) nounwind ssp {
309 ; CHECK-LABEL: fct1_8x8:
310 ; CHECK: // %bb.0: // %entry
311 ; CHECK-NEXT: adrp x8, :got:globalArray8x8
312 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x8]
313 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
314 ; CHECK-NEXT: ldr x8, [x8]
315 ; CHECK-NEXT: str d0, [x8, x1, lsl #3]
318 %arrayidx = getelementptr inbounds <8 x i8>, ptr %array, i64 %offset
319 %tmp = load <8 x i8>, ptr %arrayidx, align 8
320 %tmp1 = load ptr, ptr @globalArray8x8, align 8
321 %arrayidx1 = getelementptr inbounds <8 x i8>, ptr %tmp1, i64 %offset
322 store <8 x i8> %tmp, ptr %arrayidx1, align 8
326 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
327 ; registers for unscaled vector accesses
329 define <1 x i64> @fct0(ptr %str) nounwind readonly ssp {
331 ; CHECK: // %bb.0: // %entry
332 ; CHECK-NEXT: ldur d0, [x0, #3]
335 %p = getelementptr inbounds i8, ptr %str, i64 3
336 %0 = load <1 x i64>, ptr %p, align 8
340 define <2 x i32> @fct1(ptr %str) nounwind readonly ssp {
342 ; CHECK: // %bb.0: // %entry
343 ; CHECK-NEXT: ldur d0, [x0, #3]
346 %p = getelementptr inbounds i8, ptr %str, i64 3
347 %0 = load <2 x i32>, ptr %p, align 8
351 define <4 x i16> @fct2(ptr %str) nounwind readonly ssp {
353 ; CHECK: // %bb.0: // %entry
354 ; CHECK-NEXT: ldur d0, [x0, #3]
357 %p = getelementptr inbounds i8, ptr %str, i64 3
358 %0 = load <4 x i16>, ptr %p, align 8
362 define <8 x i8> @fct3(ptr %str) nounwind readonly ssp {
364 ; CHECK: // %bb.0: // %entry
365 ; CHECK-NEXT: ldur d0, [x0, #3]
368 %p = getelementptr inbounds i8, ptr %str, i64 3
369 %0 = load <8 x i8>, ptr %p, align 8
373 define <2 x i64> @fct4(ptr %str) nounwind readonly ssp {
375 ; CHECK: // %bb.0: // %entry
376 ; CHECK-NEXT: ldur q0, [x0, #3]
379 %p = getelementptr inbounds i8, ptr %str, i64 3
380 %0 = load <2 x i64>, ptr %p, align 16
384 define <4 x i32> @fct5(ptr %str) nounwind readonly ssp {
386 ; CHECK: // %bb.0: // %entry
387 ; CHECK-NEXT: ldur q0, [x0, #3]
390 %p = getelementptr inbounds i8, ptr %str, i64 3
391 %0 = load <4 x i32>, ptr %p, align 16
395 define <8 x i16> @fct6(ptr %str) nounwind readonly ssp {
397 ; CHECK: // %bb.0: // %entry
398 ; CHECK-NEXT: ldur q0, [x0, #3]
401 %p = getelementptr inbounds i8, ptr %str, i64 3
402 %0 = load <8 x i16>, ptr %p, align 16
406 define <16 x i8> @fct7(ptr %str) nounwind readonly ssp {
408 ; CHECK: // %bb.0: // %entry
409 ; CHECK-NEXT: ldur q0, [x0, #3]
412 %p = getelementptr inbounds i8, ptr %str, i64 3
413 %0 = load <16 x i8>, ptr %p, align 16
417 define void @fct8(ptr %str) nounwind ssp {
419 ; CHECK: // %bb.0: // %entry
420 ; CHECK-NEXT: ldur d0, [x0, #3]
421 ; CHECK-NEXT: stur d0, [x0, #4]
424 %p = getelementptr inbounds i8, ptr %str, i64 3
425 %0 = load <1 x i64>, ptr %p, align 8
426 %p2 = getelementptr inbounds i8, ptr %str, i64 4
427 store <1 x i64> %0, ptr %p2, align 8
431 define void @fct9(ptr %str) nounwind ssp {
433 ; CHECK: // %bb.0: // %entry
434 ; CHECK-NEXT: ldur d0, [x0, #3]
435 ; CHECK-NEXT: stur d0, [x0, #4]
438 %p = getelementptr inbounds i8, ptr %str, i64 3
439 %0 = load <2 x i32>, ptr %p, align 8
440 %p2 = getelementptr inbounds i8, ptr %str, i64 4
441 store <2 x i32> %0, ptr %p2, align 8
445 define void @fct10(ptr %str) nounwind ssp {
446 ; CHECK-LABEL: fct10:
447 ; CHECK: // %bb.0: // %entry
448 ; CHECK-NEXT: ldur d0, [x0, #3]
449 ; CHECK-NEXT: stur d0, [x0, #4]
452 %p = getelementptr inbounds i8, ptr %str, i64 3
453 %0 = load <4 x i16>, ptr %p, align 8
454 %p2 = getelementptr inbounds i8, ptr %str, i64 4
455 store <4 x i16> %0, ptr %p2, align 8
459 define void @fct11(ptr %str) nounwind ssp {
460 ; CHECK-LABEL: fct11:
461 ; CHECK: // %bb.0: // %entry
462 ; CHECK-NEXT: ldur d0, [x0, #3]
463 ; CHECK-NEXT: stur d0, [x0, #4]
466 %p = getelementptr inbounds i8, ptr %str, i64 3
467 %0 = load <8 x i8>, ptr %p, align 8
468 %p2 = getelementptr inbounds i8, ptr %str, i64 4
469 store <8 x i8> %0, ptr %p2, align 8
473 define void @fct12(ptr %str) nounwind ssp {
474 ; CHECK-LABEL: fct12:
475 ; CHECK: // %bb.0: // %entry
476 ; CHECK-NEXT: ldur q0, [x0, #3]
477 ; CHECK-NEXT: stur q0, [x0, #4]
480 %p = getelementptr inbounds i8, ptr %str, i64 3
481 %0 = load <2 x i64>, ptr %p, align 16
482 %p2 = getelementptr inbounds i8, ptr %str, i64 4
483 store <2 x i64> %0, ptr %p2, align 16
487 define void @fct13(ptr %str) nounwind ssp {
488 ; CHECK-LABEL: fct13:
489 ; CHECK: // %bb.0: // %entry
490 ; CHECK-NEXT: ldur q0, [x0, #3]
491 ; CHECK-NEXT: stur q0, [x0, #4]
494 %p = getelementptr inbounds i8, ptr %str, i64 3
495 %0 = load <4 x i32>, ptr %p, align 16
496 %p2 = getelementptr inbounds i8, ptr %str, i64 4
497 store <4 x i32> %0, ptr %p2, align 16
501 define void @fct14(ptr %str) nounwind ssp {
502 ; CHECK-LABEL: fct14:
503 ; CHECK: // %bb.0: // %entry
504 ; CHECK-NEXT: ldur q0, [x0, #3]
505 ; CHECK-NEXT: stur q0, [x0, #4]
508 %p = getelementptr inbounds i8, ptr %str, i64 3
509 %0 = load <8 x i16>, ptr %p, align 16
510 %p2 = getelementptr inbounds i8, ptr %str, i64 4
511 store <8 x i16> %0, ptr %p2, align 16
515 define void @fct15(ptr %str) nounwind ssp {
516 ; CHECK-LABEL: fct15:
517 ; CHECK: // %bb.0: // %entry
518 ; CHECK-NEXT: ldur q0, [x0, #3]
519 ; CHECK-NEXT: stur q0, [x0, #4]
522 %p = getelementptr inbounds i8, ptr %str, i64 3
523 %0 = load <16 x i8>, ptr %p, align 16
524 %p2 = getelementptr inbounds i8, ptr %str, i64 4
525 store <16 x i8> %0, ptr %p2, align 16
529 ; Check the building of vector from a single loaded value.
530 ; Part of <rdar://problem/14170854>
532 ; Single loads with immediate offset.
533 define <8 x i8> @fct16(ptr nocapture %sp0) {
534 ; CHECK-LABEL: fct16:
535 ; CHECK: // %bb.0: // %entry
536 ; CHECK-NEXT: ldr b0, [x0, #1]
537 ; CHECK-NEXT: mul.8b v0, v0, v0
540 %addr = getelementptr i8, ptr %sp0, i64 1
541 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
542 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
543 %vmull.i = mul <8 x i8> %vec, %vec
544 ret <8 x i8> %vmull.i
547 define <16 x i8> @fct17(ptr nocapture %sp0) {
548 ; CHECK-LABEL: fct17:
549 ; CHECK: // %bb.0: // %entry
550 ; CHECK-NEXT: ldr b0, [x0, #1]
551 ; CHECK-NEXT: mul.16b v0, v0, v0
554 %addr = getelementptr i8, ptr %sp0, i64 1
555 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
556 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
557 %vmull.i = mul <16 x i8> %vec, %vec
558 ret <16 x i8> %vmull.i
561 define <4 x i16> @fct18(ptr nocapture %sp0) {
562 ; CHECK-LABEL: fct18:
563 ; CHECK: // %bb.0: // %entry
564 ; CHECK-NEXT: ldr h0, [x0, #2]
565 ; CHECK-NEXT: mul.4h v0, v0, v0
568 %addr = getelementptr i16, ptr %sp0, i64 1
569 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
570 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
571 %vmull.i = mul <4 x i16> %vec, %vec
572 ret <4 x i16> %vmull.i
575 define <8 x i16> @fct19(ptr nocapture %sp0) {
576 ; CHECK-LABEL: fct19:
577 ; CHECK: // %bb.0: // %entry
578 ; CHECK-NEXT: ldr h0, [x0, #2]
579 ; CHECK-NEXT: mul.8h v0, v0, v0
582 %addr = getelementptr i16, ptr %sp0, i64 1
583 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
584 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
585 %vmull.i = mul <8 x i16> %vec, %vec
586 ret <8 x i16> %vmull.i
589 define <2 x i32> @fct20(ptr nocapture %sp0) {
590 ; CHECK-LABEL: fct20:
591 ; CHECK: // %bb.0: // %entry
592 ; CHECK-NEXT: ldr s0, [x0, #4]
593 ; CHECK-NEXT: mul.2s v0, v0, v0
596 %addr = getelementptr i32, ptr %sp0, i64 1
597 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
598 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
599 %vmull.i = mul <2 x i32> %vec, %vec
600 ret <2 x i32> %vmull.i
603 define <4 x i32> @fct21(ptr nocapture %sp0) {
604 ; CHECK-LABEL: fct21:
605 ; CHECK: // %bb.0: // %entry
606 ; CHECK-NEXT: ldr s0, [x0, #4]
607 ; CHECK-NEXT: mul.4s v0, v0, v0
610 %addr = getelementptr i32, ptr %sp0, i64 1
611 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
612 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
613 %vmull.i = mul <4 x i32> %vec, %vec
614 ret <4 x i32> %vmull.i
617 define <1 x i64> @fct22(ptr nocapture %sp0) {
618 ; CHECK-LABEL: fct22:
619 ; CHECK: // %bb.0: // %entry
620 ; CHECK-NEXT: ldr d0, [x0, #8]
623 %addr = getelementptr i64, ptr %sp0, i64 1
624 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
625 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
629 define <2 x i64> @fct23(ptr nocapture %sp0) {
630 ; CHECK-LABEL: fct23:
631 ; CHECK: // %bb.0: // %entry
632 ; CHECK-NEXT: ldr d0, [x0, #8]
635 %addr = getelementptr i64, ptr %sp0, i64 1
636 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
637 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
642 ; Single loads with register offset.
643 define <8 x i8> @fct24(ptr nocapture %sp0, i64 %offset) {
644 ; CHECK-LABEL: fct24:
645 ; CHECK: // %bb.0: // %entry
646 ; CHECK-NEXT: ldr b0, [x0, x1]
647 ; CHECK-NEXT: mul.8b v0, v0, v0
650 %addr = getelementptr i8, ptr %sp0, i64 %offset
651 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
652 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
653 %vmull.i = mul <8 x i8> %vec, %vec
654 ret <8 x i8> %vmull.i
657 define <16 x i8> @fct25(ptr nocapture %sp0, i64 %offset) {
658 ; CHECK-LABEL: fct25:
659 ; CHECK: // %bb.0: // %entry
660 ; CHECK-NEXT: ldr b0, [x0, x1]
661 ; CHECK-NEXT: mul.16b v0, v0, v0
664 %addr = getelementptr i8, ptr %sp0, i64 %offset
665 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
666 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
667 %vmull.i = mul <16 x i8> %vec, %vec
668 ret <16 x i8> %vmull.i
671 define <4 x i16> @fct26(ptr nocapture %sp0, i64 %offset) {
672 ; CHECK-LABEL: fct26:
673 ; CHECK: // %bb.0: // %entry
674 ; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
675 ; CHECK-NEXT: mul.4h v0, v0, v0
678 %addr = getelementptr i16, ptr %sp0, i64 %offset
679 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
680 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
681 %vmull.i = mul <4 x i16> %vec, %vec
682 ret <4 x i16> %vmull.i
685 define <8 x i16> @fct27(ptr nocapture %sp0, i64 %offset) {
686 ; CHECK-LABEL: fct27:
687 ; CHECK: // %bb.0: // %entry
688 ; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
689 ; CHECK-NEXT: mul.8h v0, v0, v0
692 %addr = getelementptr i16, ptr %sp0, i64 %offset
693 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
694 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
695 %vmull.i = mul <8 x i16> %vec, %vec
696 ret <8 x i16> %vmull.i
699 define <2 x i32> @fct28(ptr nocapture %sp0, i64 %offset) {
700 ; CHECK-LABEL: fct28:
701 ; CHECK: // %bb.0: // %entry
702 ; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
703 ; CHECK-NEXT: mul.2s v0, v0, v0
706 %addr = getelementptr i32, ptr %sp0, i64 %offset
707 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
708 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
709 %vmull.i = mul <2 x i32> %vec, %vec
710 ret <2 x i32> %vmull.i
713 define <4 x i32> @fct29(ptr nocapture %sp0, i64 %offset) {
714 ; CHECK-LABEL: fct29:
715 ; CHECK: // %bb.0: // %entry
716 ; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
717 ; CHECK-NEXT: mul.4s v0, v0, v0
720 %addr = getelementptr i32, ptr %sp0, i64 %offset
721 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
722 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
723 %vmull.i = mul <4 x i32> %vec, %vec
724 ret <4 x i32> %vmull.i
727 define <1 x i64> @fct30(ptr nocapture %sp0, i64 %offset) {
728 ; CHECK-LABEL: fct30:
729 ; CHECK: // %bb.0: // %entry
730 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
733 %addr = getelementptr i64, ptr %sp0, i64 %offset
734 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
735 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
739 define <2 x i64> @fct31(ptr nocapture %sp0, i64 %offset) {
740 ; CHECK-LABEL: fct31:
741 ; CHECK: // %bb.0: // %entry
742 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
745 %addr = getelementptr i64, ptr %sp0, i64 %offset
746 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
747 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0