1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
6 %type1 = type { <16 x i8> }
7 %type2 = type { <8 x i8> }
8 %type3 = type { <4 x i16> }
11 define hidden fastcc void @t1(ptr %argtable) nounwind {
13 ; CHECK: // %bb.0: // %entry
14 ; CHECK-NEXT: movi.2d v0, #0000000000000000
15 ; CHECK-NEXT: ldr x8, [x0]
16 ; CHECK-NEXT: str q0, [x8]
19 %tmp1 = load ptr, ptr %argtable, align 8
20 store <16 x i8> zeroinitializer, ptr %tmp1, align 16
24 define hidden fastcc void @t2(ptr %argtable) nounwind {
26 ; CHECK: // %bb.0: // %entry
27 ; CHECK-NEXT: movi.2d v0, #0000000000000000
28 ; CHECK-NEXT: ldr x8, [x0]
29 ; CHECK-NEXT: str d0, [x8]
32 %tmp1 = load ptr, ptr %argtable, align 8
33 store <8 x i8> zeroinitializer, ptr %tmp1, align 8
37 ; add a bunch of tests for rdar://11246289
39 @globalArray64x2 = common global ptr null, align 8
40 @globalArray32x4 = common global ptr null, align 8
41 @globalArray16x8 = common global ptr null, align 8
42 @globalArray8x16 = common global ptr null, align 8
43 @globalArray64x1 = common global ptr null, align 8
44 @globalArray32x2 = common global ptr null, align 8
45 @globalArray16x4 = common global ptr null, align 8
46 @globalArray8x8 = common global ptr null, align 8
47 @floatglobalArray64x2 = common global ptr null, align 8
48 @floatglobalArray32x4 = common global ptr null, align 8
49 @floatglobalArray64x1 = common global ptr null, align 8
50 @floatglobalArray32x2 = common global ptr null, align 8
52 define void @fct1_64x2(ptr nocapture %array, i64 %offset) nounwind ssp {
53 ; CHECK-LABEL: fct1_64x2:
54 ; CHECK: // %bb.0: // %entry
55 ; CHECK-NEXT: adrp x8, :got:globalArray64x2
56 ; CHECK-NEXT: lsl x9, x1, #4
57 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x2]
58 ; CHECK-NEXT: ldr q0, [x0, x9]
59 ; CHECK-NEXT: ldr x8, [x8]
60 ; CHECK-NEXT: str q0, [x8, x9]
63 %arrayidx = getelementptr inbounds <2 x i64>, ptr %array, i64 %offset
64 %tmp = load <2 x i64>, ptr %arrayidx, align 16
65 %tmp1 = load ptr, ptr @globalArray64x2, align 8
66 %arrayidx1 = getelementptr inbounds <2 x i64>, ptr %tmp1, i64 %offset
67 store <2 x i64> %tmp, ptr %arrayidx1, align 16
71 define void @fct2_64x2(ptr nocapture %array) nounwind ssp {
72 ; CHECK-LABEL: fct2_64x2:
73 ; CHECK: // %bb.0: // %entry
74 ; CHECK-NEXT: adrp x8, :got:globalArray64x2
75 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x2]
76 ; CHECK-NEXT: ldr q0, [x0, #48]
77 ; CHECK-NEXT: ldr x8, [x8]
78 ; CHECK-NEXT: str q0, [x8, #80]
81 %arrayidx = getelementptr inbounds <2 x i64>, ptr %array, i64 3
82 %tmp = load <2 x i64>, ptr %arrayidx, align 16
83 %tmp1 = load ptr, ptr @globalArray64x2, align 8
84 %arrayidx1 = getelementptr inbounds <2 x i64>, ptr %tmp1, i64 5
85 store <2 x i64> %tmp, ptr %arrayidx1, align 16
89 define void @fct1_32x4(ptr nocapture %array, i64 %offset) nounwind ssp {
90 ; CHECK-LABEL: fct1_32x4:
91 ; CHECK: // %bb.0: // %entry
92 ; CHECK-NEXT: adrp x8, :got:globalArray32x4
93 ; CHECK-NEXT: lsl x9, x1, #4
94 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x4]
95 ; CHECK-NEXT: ldr q0, [x0, x9]
96 ; CHECK-NEXT: ldr x8, [x8]
97 ; CHECK-NEXT: str q0, [x8, x9]
100 %arrayidx = getelementptr inbounds <4 x i32>, ptr %array, i64 %offset
101 %tmp = load <4 x i32>, ptr %arrayidx, align 16
102 %tmp1 = load ptr, ptr @globalArray32x4, align 8
103 %arrayidx1 = getelementptr inbounds <4 x i32>, ptr %tmp1, i64 %offset
104 store <4 x i32> %tmp, ptr %arrayidx1, align 16
108 define void @fct2_32x4(ptr nocapture %array) nounwind ssp {
109 ; CHECK-LABEL: fct2_32x4:
110 ; CHECK: // %bb.0: // %entry
111 ; CHECK-NEXT: adrp x8, :got:globalArray32x4
112 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x4]
113 ; CHECK-NEXT: ldr q0, [x0, #48]
114 ; CHECK-NEXT: ldr x8, [x8]
115 ; CHECK-NEXT: str q0, [x8, #80]
118 %arrayidx = getelementptr inbounds <4 x i32>, ptr %array, i64 3
119 %tmp = load <4 x i32>, ptr %arrayidx, align 16
120 %tmp1 = load ptr, ptr @globalArray32x4, align 8
121 %arrayidx1 = getelementptr inbounds <4 x i32>, ptr %tmp1, i64 5
122 store <4 x i32> %tmp, ptr %arrayidx1, align 16
126 define void @fct1_16x8(ptr nocapture %array, i64 %offset) nounwind ssp {
127 ; CHECK-LABEL: fct1_16x8:
128 ; CHECK: // %bb.0: // %entry
129 ; CHECK-NEXT: adrp x8, :got:globalArray16x8
130 ; CHECK-NEXT: lsl x9, x1, #4
131 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x8]
132 ; CHECK-NEXT: ldr q0, [x0, x9]
133 ; CHECK-NEXT: ldr x8, [x8]
134 ; CHECK-NEXT: str q0, [x8, x9]
137 %arrayidx = getelementptr inbounds <8 x i16>, ptr %array, i64 %offset
138 %tmp = load <8 x i16>, ptr %arrayidx, align 16
139 %tmp1 = load ptr, ptr @globalArray16x8, align 8
140 %arrayidx1 = getelementptr inbounds <8 x i16>, ptr %tmp1, i64 %offset
141 store <8 x i16> %tmp, ptr %arrayidx1, align 16
145 define void @fct2_16x8(ptr nocapture %array) nounwind ssp {
146 ; CHECK-LABEL: fct2_16x8:
147 ; CHECK: // %bb.0: // %entry
148 ; CHECK-NEXT: adrp x8, :got:globalArray16x8
149 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x8]
150 ; CHECK-NEXT: ldr q0, [x0, #48]
151 ; CHECK-NEXT: ldr x8, [x8]
152 ; CHECK-NEXT: str q0, [x8, #80]
155 %arrayidx = getelementptr inbounds <8 x i16>, ptr %array, i64 3
156 %tmp = load <8 x i16>, ptr %arrayidx, align 16
157 %tmp1 = load ptr, ptr @globalArray16x8, align 8
158 %arrayidx1 = getelementptr inbounds <8 x i16>, ptr %tmp1, i64 5
159 store <8 x i16> %tmp, ptr %arrayidx1, align 16
163 define void @fct1_8x16(ptr nocapture %array, i64 %offset) nounwind ssp {
164 ; CHECK-LABEL: fct1_8x16:
165 ; CHECK: // %bb.0: // %entry
166 ; CHECK-NEXT: adrp x8, :got:globalArray8x16
167 ; CHECK-NEXT: lsl x9, x1, #4
168 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x16]
169 ; CHECK-NEXT: ldr q0, [x0, x9]
170 ; CHECK-NEXT: ldr x8, [x8]
171 ; CHECK-NEXT: str q0, [x8, x9]
174 %arrayidx = getelementptr inbounds <16 x i8>, ptr %array, i64 %offset
175 %tmp = load <16 x i8>, ptr %arrayidx, align 16
176 %tmp1 = load ptr, ptr @globalArray8x16, align 8
177 %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %tmp1, i64 %offset
178 store <16 x i8> %tmp, ptr %arrayidx1, align 16
182 define void @fct2_8x16(ptr nocapture %array) nounwind ssp {
183 ; CHECK-LABEL: fct2_8x16:
184 ; CHECK: // %bb.0: // %entry
185 ; CHECK-NEXT: adrp x8, :got:globalArray8x16
186 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x16]
187 ; CHECK-NEXT: ldr q0, [x0, #48]
188 ; CHECK-NEXT: ldr x8, [x8]
189 ; CHECK-NEXT: str q0, [x8, #80]
192 %arrayidx = getelementptr inbounds <16 x i8>, ptr %array, i64 3
193 %tmp = load <16 x i8>, ptr %arrayidx, align 16
194 %tmp1 = load ptr, ptr @globalArray8x16, align 8
195 %arrayidx1 = getelementptr inbounds <16 x i8>, ptr %tmp1, i64 5
196 store <16 x i8> %tmp, ptr %arrayidx1, align 16
200 define void @fct1_64x1(ptr nocapture %array, i64 %offset) nounwind ssp {
201 ; CHECK-LABEL: fct1_64x1:
202 ; CHECK: // %bb.0: // %entry
203 ; CHECK-NEXT: adrp x8, :got:globalArray64x1
204 ; CHECK-NEXT: lsl x9, x1, #3
205 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1]
206 ; CHECK-NEXT: ldr d0, [x0, x9]
207 ; CHECK-NEXT: ldr x8, [x8]
208 ; CHECK-NEXT: str d0, [x8, x9]
211 %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 %offset
212 %tmp = load <1 x i64>, ptr %arrayidx, align 8
213 %tmp1 = load ptr, ptr @globalArray64x1, align 8
214 %arrayidx1 = getelementptr inbounds <1 x i64>, ptr %tmp1, i64 %offset
215 store <1 x i64> %tmp, ptr %arrayidx1, align 8
219 define void @fct2_64x1(ptr nocapture %array) nounwind ssp {
220 ; CHECK-LABEL: fct2_64x1:
221 ; CHECK: // %bb.0: // %entry
222 ; CHECK-NEXT: adrp x8, :got:globalArray64x1
223 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1]
224 ; CHECK-NEXT: ldr d0, [x0, #24]
225 ; CHECK-NEXT: ldr x8, [x8]
226 ; CHECK-NEXT: str d0, [x8, #40]
229 %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 3
230 %tmp = load <1 x i64>, ptr %arrayidx, align 8
231 %tmp1 = load ptr, ptr @globalArray64x1, align 8
232 %arrayidx1 = getelementptr inbounds <1 x i64>, ptr %tmp1, i64 5
233 store <1 x i64> %tmp, ptr %arrayidx1, align 8
237 define void @fct1_32x2(ptr nocapture %array, i64 %offset) nounwind ssp {
238 ; CHECK-LABEL: fct1_32x2:
239 ; CHECK: // %bb.0: // %entry
240 ; CHECK-NEXT: adrp x8, :got:globalArray32x2
241 ; CHECK-NEXT: lsl x9, x1, #3
242 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2]
243 ; CHECK-NEXT: ldr d0, [x0, x9]
244 ; CHECK-NEXT: ldr x8, [x8]
245 ; CHECK-NEXT: str d0, [x8, x9]
248 %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 %offset
249 %tmp = load <2 x i32>, ptr %arrayidx, align 8
250 %tmp1 = load ptr, ptr @globalArray32x2, align 8
251 %arrayidx1 = getelementptr inbounds <2 x i32>, ptr %tmp1, i64 %offset
252 store <2 x i32> %tmp, ptr %arrayidx1, align 8
256 define void @fct2_32x2(ptr nocapture %array) nounwind ssp {
257 ; CHECK-LABEL: fct2_32x2:
258 ; CHECK: // %bb.0: // %entry
259 ; CHECK-NEXT: adrp x8, :got:globalArray32x2
260 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2]
261 ; CHECK-NEXT: ldr d0, [x0, #24]
262 ; CHECK-NEXT: ldr x8, [x8]
263 ; CHECK-NEXT: str d0, [x8, #40]
266 %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 3
267 %tmp = load <2 x i32>, ptr %arrayidx, align 8
268 %tmp1 = load ptr, ptr @globalArray32x2, align 8
269 %arrayidx1 = getelementptr inbounds <2 x i32>, ptr %tmp1, i64 5
270 store <2 x i32> %tmp, ptr %arrayidx1, align 8
274 define void @fct1_16x4(ptr nocapture %array, i64 %offset) nounwind ssp {
275 ; CHECK-LABEL: fct1_16x4:
276 ; CHECK: // %bb.0: // %entry
277 ; CHECK-NEXT: adrp x8, :got:globalArray16x4
278 ; CHECK-NEXT: lsl x9, x1, #3
279 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4]
280 ; CHECK-NEXT: ldr d0, [x0, x9]
281 ; CHECK-NEXT: ldr x8, [x8]
282 ; CHECK-NEXT: str d0, [x8, x9]
285 %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 %offset
286 %tmp = load <4 x i16>, ptr %arrayidx, align 8
287 %tmp1 = load ptr, ptr @globalArray16x4, align 8
288 %arrayidx1 = getelementptr inbounds <4 x i16>, ptr %tmp1, i64 %offset
289 store <4 x i16> %tmp, ptr %arrayidx1, align 8
293 define void @fct2_16x4(ptr nocapture %array) nounwind ssp {
294 ; CHECK-LABEL: fct2_16x4:
295 ; CHECK: // %bb.0: // %entry
296 ; CHECK-NEXT: adrp x8, :got:globalArray16x4
297 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4]
298 ; CHECK-NEXT: ldr d0, [x0, #24]
299 ; CHECK-NEXT: ldr x8, [x8]
300 ; CHECK-NEXT: str d0, [x8, #40]
303 %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 3
304 %tmp = load <4 x i16>, ptr %arrayidx, align 8
305 %tmp1 = load ptr, ptr @globalArray16x4, align 8
306 %arrayidx1 = getelementptr inbounds <4 x i16>, ptr %tmp1, i64 5
307 store <4 x i16> %tmp, ptr %arrayidx1, align 8
311 define void @fct1_8x8(ptr nocapture %array, i64 %offset) nounwind ssp {
312 ; CHECK-LABEL: fct1_8x8:
313 ; CHECK: // %bb.0: // %entry
314 ; CHECK-NEXT: adrp x8, :got:globalArray8x8
315 ; CHECK-NEXT: lsl x9, x1, #3
316 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x8]
317 ; CHECK-NEXT: ldr d0, [x0, x9]
318 ; CHECK-NEXT: ldr x8, [x8]
319 ; CHECK-NEXT: str d0, [x8, x9]
322 %arrayidx = getelementptr inbounds <8 x i8>, ptr %array, i64 %offset
323 %tmp = load <8 x i8>, ptr %arrayidx, align 8
324 %tmp1 = load ptr, ptr @globalArray8x8, align 8
325 %arrayidx1 = getelementptr inbounds <8 x i8>, ptr %tmp1, i64 %offset
326 store <8 x i8> %tmp, ptr %arrayidx1, align 8
330 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
331 ; registers for unscaled vector accesses
333 define <1 x i64> @fct0(ptr %str) nounwind readonly ssp {
335 ; CHECK: // %bb.0: // %entry
336 ; CHECK-NEXT: ldur d0, [x0, #3]
339 %p = getelementptr inbounds i8, ptr %str, i64 3
340 %0 = load <1 x i64>, ptr %p, align 8
344 define <2 x i32> @fct1(ptr %str) nounwind readonly ssp {
346 ; CHECK: // %bb.0: // %entry
347 ; CHECK-NEXT: ldur d0, [x0, #3]
350 %p = getelementptr inbounds i8, ptr %str, i64 3
351 %0 = load <2 x i32>, ptr %p, align 8
355 define <4 x i16> @fct2(ptr %str) nounwind readonly ssp {
357 ; CHECK: // %bb.0: // %entry
358 ; CHECK-NEXT: ldur d0, [x0, #3]
361 %p = getelementptr inbounds i8, ptr %str, i64 3
362 %0 = load <4 x i16>, ptr %p, align 8
366 define <8 x i8> @fct3(ptr %str) nounwind readonly ssp {
368 ; CHECK: // %bb.0: // %entry
369 ; CHECK-NEXT: ldur d0, [x0, #3]
372 %p = getelementptr inbounds i8, ptr %str, i64 3
373 %0 = load <8 x i8>, ptr %p, align 8
377 define <2 x i64> @fct4(ptr %str) nounwind readonly ssp {
379 ; CHECK: // %bb.0: // %entry
380 ; CHECK-NEXT: ldur q0, [x0, #3]
383 %p = getelementptr inbounds i8, ptr %str, i64 3
384 %0 = load <2 x i64>, ptr %p, align 16
388 define <4 x i32> @fct5(ptr %str) nounwind readonly ssp {
390 ; CHECK: // %bb.0: // %entry
391 ; CHECK-NEXT: ldur q0, [x0, #3]
394 %p = getelementptr inbounds i8, ptr %str, i64 3
395 %0 = load <4 x i32>, ptr %p, align 16
399 define <8 x i16> @fct6(ptr %str) nounwind readonly ssp {
401 ; CHECK: // %bb.0: // %entry
402 ; CHECK-NEXT: ldur q0, [x0, #3]
405 %p = getelementptr inbounds i8, ptr %str, i64 3
406 %0 = load <8 x i16>, ptr %p, align 16
410 define <16 x i8> @fct7(ptr %str) nounwind readonly ssp {
412 ; CHECK: // %bb.0: // %entry
413 ; CHECK-NEXT: ldur q0, [x0, #3]
416 %p = getelementptr inbounds i8, ptr %str, i64 3
417 %0 = load <16 x i8>, ptr %p, align 16
421 define void @fct8(ptr %str) nounwind ssp {
423 ; CHECK: // %bb.0: // %entry
424 ; CHECK-NEXT: ldur d0, [x0, #3]
425 ; CHECK-NEXT: stur d0, [x0, #4]
428 %p = getelementptr inbounds i8, ptr %str, i64 3
429 %0 = load <1 x i64>, ptr %p, align 8
430 %p2 = getelementptr inbounds i8, ptr %str, i64 4
431 store <1 x i64> %0, ptr %p2, align 8
435 define void @fct9(ptr %str) nounwind ssp {
437 ; CHECK: // %bb.0: // %entry
438 ; CHECK-NEXT: ldur d0, [x0, #3]
439 ; CHECK-NEXT: stur d0, [x0, #4]
442 %p = getelementptr inbounds i8, ptr %str, i64 3
443 %0 = load <2 x i32>, ptr %p, align 8
444 %p2 = getelementptr inbounds i8, ptr %str, i64 4
445 store <2 x i32> %0, ptr %p2, align 8
449 define void @fct10(ptr %str) nounwind ssp {
450 ; CHECK-LABEL: fct10:
451 ; CHECK: // %bb.0: // %entry
452 ; CHECK-NEXT: ldur d0, [x0, #3]
453 ; CHECK-NEXT: stur d0, [x0, #4]
456 %p = getelementptr inbounds i8, ptr %str, i64 3
457 %0 = load <4 x i16>, ptr %p, align 8
458 %p2 = getelementptr inbounds i8, ptr %str, i64 4
459 store <4 x i16> %0, ptr %p2, align 8
463 define void @fct11(ptr %str) nounwind ssp {
464 ; CHECK-LABEL: fct11:
465 ; CHECK: // %bb.0: // %entry
466 ; CHECK-NEXT: ldur d0, [x0, #3]
467 ; CHECK-NEXT: stur d0, [x0, #4]
470 %p = getelementptr inbounds i8, ptr %str, i64 3
471 %0 = load <8 x i8>, ptr %p, align 8
472 %p2 = getelementptr inbounds i8, ptr %str, i64 4
473 store <8 x i8> %0, ptr %p2, align 8
477 define void @fct12(ptr %str) nounwind ssp {
478 ; CHECK-LABEL: fct12:
479 ; CHECK: // %bb.0: // %entry
480 ; CHECK-NEXT: ldur q0, [x0, #3]
481 ; CHECK-NEXT: stur q0, [x0, #4]
484 %p = getelementptr inbounds i8, ptr %str, i64 3
485 %0 = load <2 x i64>, ptr %p, align 16
486 %p2 = getelementptr inbounds i8, ptr %str, i64 4
487 store <2 x i64> %0, ptr %p2, align 16
491 define void @fct13(ptr %str) nounwind ssp {
492 ; CHECK-LABEL: fct13:
493 ; CHECK: // %bb.0: // %entry
494 ; CHECK-NEXT: ldur q0, [x0, #3]
495 ; CHECK-NEXT: stur q0, [x0, #4]
498 %p = getelementptr inbounds i8, ptr %str, i64 3
499 %0 = load <4 x i32>, ptr %p, align 16
500 %p2 = getelementptr inbounds i8, ptr %str, i64 4
501 store <4 x i32> %0, ptr %p2, align 16
505 define void @fct14(ptr %str) nounwind ssp {
506 ; CHECK-LABEL: fct14:
507 ; CHECK: // %bb.0: // %entry
508 ; CHECK-NEXT: ldur q0, [x0, #3]
509 ; CHECK-NEXT: stur q0, [x0, #4]
512 %p = getelementptr inbounds i8, ptr %str, i64 3
513 %0 = load <8 x i16>, ptr %p, align 16
514 %p2 = getelementptr inbounds i8, ptr %str, i64 4
515 store <8 x i16> %0, ptr %p2, align 16
519 define void @fct15(ptr %str) nounwind ssp {
520 ; CHECK-LABEL: fct15:
521 ; CHECK: // %bb.0: // %entry
522 ; CHECK-NEXT: ldur q0, [x0, #3]
523 ; CHECK-NEXT: stur q0, [x0, #4]
526 %p = getelementptr inbounds i8, ptr %str, i64 3
527 %0 = load <16 x i8>, ptr %p, align 16
528 %p2 = getelementptr inbounds i8, ptr %str, i64 4
529 store <16 x i8> %0, ptr %p2, align 16
533 ; Check the building of vector from a single loaded value.
534 ; Part of <rdar://problem/14170854>
536 ; Single loads with immediate offset.
537 define <8 x i8> @fct16(ptr nocapture %sp0) {
538 ; CHECK-LABEL: fct16:
539 ; CHECK: // %bb.0: // %entry
540 ; CHECK-NEXT: ldr b0, [x0, #1]
541 ; CHECK-NEXT: mul.8b v0, v0, v0
544 %addr = getelementptr i8, ptr %sp0, i64 1
545 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
546 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
547 %vmull.i = mul <8 x i8> %vec, %vec
548 ret <8 x i8> %vmull.i
551 define <16 x i8> @fct17(ptr nocapture %sp0) {
552 ; CHECK-LABEL: fct17:
553 ; CHECK: // %bb.0: // %entry
554 ; CHECK-NEXT: ldr b0, [x0, #1]
555 ; CHECK-NEXT: mul.16b v0, v0, v0
558 %addr = getelementptr i8, ptr %sp0, i64 1
559 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
560 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
561 %vmull.i = mul <16 x i8> %vec, %vec
562 ret <16 x i8> %vmull.i
565 define <4 x i16> @fct18(ptr nocapture %sp0) {
566 ; CHECK-LABEL: fct18:
567 ; CHECK: // %bb.0: // %entry
568 ; CHECK-NEXT: ldr h0, [x0, #2]
569 ; CHECK-NEXT: mul.4h v0, v0, v0
572 %addr = getelementptr i16, ptr %sp0, i64 1
573 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
574 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
575 %vmull.i = mul <4 x i16> %vec, %vec
576 ret <4 x i16> %vmull.i
579 define <8 x i16> @fct19(ptr nocapture %sp0) {
580 ; CHECK-LABEL: fct19:
581 ; CHECK: // %bb.0: // %entry
582 ; CHECK-NEXT: ldr h0, [x0, #2]
583 ; CHECK-NEXT: mul.8h v0, v0, v0
586 %addr = getelementptr i16, ptr %sp0, i64 1
587 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
588 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
589 %vmull.i = mul <8 x i16> %vec, %vec
590 ret <8 x i16> %vmull.i
593 define <2 x i32> @fct20(ptr nocapture %sp0) {
594 ; CHECK-LABEL: fct20:
595 ; CHECK: // %bb.0: // %entry
596 ; CHECK-NEXT: ldr s0, [x0, #4]
597 ; CHECK-NEXT: mul.2s v0, v0, v0
600 %addr = getelementptr i32, ptr %sp0, i64 1
601 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
602 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
603 %vmull.i = mul <2 x i32> %vec, %vec
604 ret <2 x i32> %vmull.i
607 define <4 x i32> @fct21(ptr nocapture %sp0) {
608 ; CHECK-LABEL: fct21:
609 ; CHECK: // %bb.0: // %entry
610 ; CHECK-NEXT: ldr s0, [x0, #4]
611 ; CHECK-NEXT: mul.4s v0, v0, v0
614 %addr = getelementptr i32, ptr %sp0, i64 1
615 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
616 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
617 %vmull.i = mul <4 x i32> %vec, %vec
618 ret <4 x i32> %vmull.i
621 define <1 x i64> @fct22(ptr nocapture %sp0) {
622 ; CHECK-LABEL: fct22:
623 ; CHECK: // %bb.0: // %entry
624 ; CHECK-NEXT: ldr d0, [x0, #8]
627 %addr = getelementptr i64, ptr %sp0, i64 1
628 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
629 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
633 define <2 x i64> @fct23(ptr nocapture %sp0) {
634 ; CHECK-LABEL: fct23:
635 ; CHECK: // %bb.0: // %entry
636 ; CHECK-NEXT: ldr d0, [x0, #8]
639 %addr = getelementptr i64, ptr %sp0, i64 1
640 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
641 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
646 ; Single loads with register offset.
647 define <8 x i8> @fct24(ptr nocapture %sp0, i64 %offset) {
648 ; CHECK-LABEL: fct24:
649 ; CHECK: // %bb.0: // %entry
650 ; CHECK-NEXT: ldr b0, [x0, x1]
651 ; CHECK-NEXT: mul.8b v0, v0, v0
654 %addr = getelementptr i8, ptr %sp0, i64 %offset
655 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
656 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
657 %vmull.i = mul <8 x i8> %vec, %vec
658 ret <8 x i8> %vmull.i
661 define <16 x i8> @fct25(ptr nocapture %sp0, i64 %offset) {
662 ; CHECK-LABEL: fct25:
663 ; CHECK: // %bb.0: // %entry
664 ; CHECK-NEXT: ldr b0, [x0, x1]
665 ; CHECK-NEXT: mul.16b v0, v0, v0
668 %addr = getelementptr i8, ptr %sp0, i64 %offset
669 %pix_sp0.0.copyload = load i8, ptr %addr, align 1
670 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
671 %vmull.i = mul <16 x i8> %vec, %vec
672 ret <16 x i8> %vmull.i
675 define <4 x i16> @fct26(ptr nocapture %sp0, i64 %offset) {
676 ; CHECK-LABEL: fct26:
677 ; CHECK: // %bb.0: // %entry
678 ; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
679 ; CHECK-NEXT: mul.4h v0, v0, v0
682 %addr = getelementptr i16, ptr %sp0, i64 %offset
683 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
684 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
685 %vmull.i = mul <4 x i16> %vec, %vec
686 ret <4 x i16> %vmull.i
689 define <8 x i16> @fct27(ptr nocapture %sp0, i64 %offset) {
690 ; CHECK-LABEL: fct27:
691 ; CHECK: // %bb.0: // %entry
692 ; CHECK-NEXT: ldr h0, [x0, x1, lsl #1]
693 ; CHECK-NEXT: mul.8h v0, v0, v0
696 %addr = getelementptr i16, ptr %sp0, i64 %offset
697 %pix_sp0.0.copyload = load i16, ptr %addr, align 1
698 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
699 %vmull.i = mul <8 x i16> %vec, %vec
700 ret <8 x i16> %vmull.i
703 define <2 x i32> @fct28(ptr nocapture %sp0, i64 %offset) {
704 ; CHECK-LABEL: fct28:
705 ; CHECK: // %bb.0: // %entry
706 ; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
707 ; CHECK-NEXT: mul.2s v0, v0, v0
710 %addr = getelementptr i32, ptr %sp0, i64 %offset
711 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
712 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
713 %vmull.i = mul <2 x i32> %vec, %vec
714 ret <2 x i32> %vmull.i
717 define <4 x i32> @fct29(ptr nocapture %sp0, i64 %offset) {
718 ; CHECK-LABEL: fct29:
719 ; CHECK: // %bb.0: // %entry
720 ; CHECK-NEXT: ldr s0, [x0, x1, lsl #2]
721 ; CHECK-NEXT: mul.4s v0, v0, v0
724 %addr = getelementptr i32, ptr %sp0, i64 %offset
725 %pix_sp0.0.copyload = load i32, ptr %addr, align 1
726 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
727 %vmull.i = mul <4 x i32> %vec, %vec
728 ret <4 x i32> %vmull.i
731 define <1 x i64> @fct30(ptr nocapture %sp0, i64 %offset) {
732 ; CHECK-LABEL: fct30:
733 ; CHECK: // %bb.0: // %entry
734 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
737 %addr = getelementptr i64, ptr %sp0, i64 %offset
738 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
739 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
743 define <2 x i64> @fct31(ptr nocapture %sp0, i64 %offset) {
744 ; CHECK-LABEL: fct31:
745 ; CHECK: // %bb.0: // %entry
746 ; CHECK-NEXT: ldr d0, [x0, x1, lsl #3]
749 %addr = getelementptr i64, ptr %sp0, i64 %offset
750 %pix_sp0.0.copyload = load i64, ptr %addr, align 1
751 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0