1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
5 %type1 = type { <16 x i8> }
6 %type2 = type { <8 x i8> }
7 %type3 = type { <4 x i16> }
10 define hidden fastcc void @t1(%type1** %argtable) nounwind {
13 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
14 ; CHECK: str q0, [x[[REG]]]
15 %tmp1 = load %type1*, %type1** %argtable, align 8
16 %tmp2 = getelementptr inbounds %type1, %type1* %tmp1, i64 0, i32 0
17 store <16 x i8> zeroinitializer, <16 x i8>* %tmp2, align 16
21 define hidden fastcc void @t2(%type2** %argtable) nounwind {
24 ; CHECK: ldr x[[REG:[0-9]+]], [x0]
25 ; CHECK: str d0, [x[[REG]]]
26 %tmp1 = load %type2*, %type2** %argtable, align 8
27 %tmp2 = getelementptr inbounds %type2, %type2* %tmp1, i64 0, i32 0
28 store <8 x i8> zeroinitializer, <8 x i8>* %tmp2, align 8
32 ; add a bunch of tests for rdar://11246289
34 @globalArray64x2 = common global <2 x i64>* null, align 8
35 @globalArray32x4 = common global <4 x i32>* null, align 8
36 @globalArray16x8 = common global <8 x i16>* null, align 8
37 @globalArray8x16 = common global <16 x i8>* null, align 8
38 @globalArray64x1 = common global <1 x i64>* null, align 8
39 @globalArray32x2 = common global <2 x i32>* null, align 8
40 @globalArray16x4 = common global <4 x i16>* null, align 8
41 @globalArray8x8 = common global <8 x i8>* null, align 8
42 @floatglobalArray64x2 = common global <2 x double>* null, align 8
43 @floatglobalArray32x4 = common global <4 x float>* null, align 8
44 @floatglobalArray64x1 = common global <1 x double>* null, align 8
45 @floatglobalArray32x2 = common global <2 x float>* null, align 8
47 define void @fct1_64x2(<2 x i64>* nocapture %array, i64 %offset) nounwind ssp {
49 ; CHECK-LABEL: fct1_64x2:
50 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
51 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]
52 ; CHECK: ldr [[BASE:x[0-9]+]],
53 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
54 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 %offset
55 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
56 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
57 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 %offset
58 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
62 define void @fct2_64x2(<2 x i64>* nocapture %array) nounwind ssp {
64 ; CHECK-LABEL: fct2_64x2:
65 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
66 ; CHECK: ldr [[BASE:x[0-9]+]],
67 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
68 %arrayidx = getelementptr inbounds <2 x i64>, <2 x i64>* %array, i64 3
69 %tmp = load <2 x i64>, <2 x i64>* %arrayidx, align 16
70 %tmp1 = load <2 x i64>*, <2 x i64>** @globalArray64x2, align 8
71 %arrayidx1 = getelementptr inbounds <2 x i64>, <2 x i64>* %tmp1, i64 5
72 store <2 x i64> %tmp, <2 x i64>* %arrayidx1, align 16
76 define void @fct1_32x4(<4 x i32>* nocapture %array, i64 %offset) nounwind ssp {
78 ; CHECK-LABEL: fct1_32x4:
79 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
80 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
81 ; CHECK: ldr [[BASE:x[0-9]+]],
82 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
83 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 %offset
84 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
85 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
86 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 %offset
87 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
91 define void @fct2_32x4(<4 x i32>* nocapture %array) nounwind ssp {
93 ; CHECK-LABEL: fct2_32x4:
94 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
95 ; CHECK: ldr [[BASE:x[0-9]+]],
96 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
97 %arrayidx = getelementptr inbounds <4 x i32>, <4 x i32>* %array, i64 3
98 %tmp = load <4 x i32>, <4 x i32>* %arrayidx, align 16
99 %tmp1 = load <4 x i32>*, <4 x i32>** @globalArray32x4, align 8
100 %arrayidx1 = getelementptr inbounds <4 x i32>, <4 x i32>* %tmp1, i64 5
101 store <4 x i32> %tmp, <4 x i32>* %arrayidx1, align 16
105 define void @fct1_16x8(<8 x i16>* nocapture %array, i64 %offset) nounwind ssp {
107 ; CHECK-LABEL: fct1_16x8:
108 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
109 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
110 ; CHECK: ldr [[BASE:x[0-9]+]],
111 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
112 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 %offset
113 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
114 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
115 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 %offset
116 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
120 define void @fct2_16x8(<8 x i16>* nocapture %array) nounwind ssp {
122 ; CHECK-LABEL: fct2_16x8:
123 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
124 ; CHECK: ldr [[BASE:x[0-9]+]],
125 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
126 %arrayidx = getelementptr inbounds <8 x i16>, <8 x i16>* %array, i64 3
127 %tmp = load <8 x i16>, <8 x i16>* %arrayidx, align 16
128 %tmp1 = load <8 x i16>*, <8 x i16>** @globalArray16x8, align 8
129 %arrayidx1 = getelementptr inbounds <8 x i16>, <8 x i16>* %tmp1, i64 5
130 store <8 x i16> %tmp, <8 x i16>* %arrayidx1, align 16
134 define void @fct1_8x16(<16 x i8>* nocapture %array, i64 %offset) nounwind ssp {
136 ; CHECK-LABEL: fct1_8x16:
137 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #4
138 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
139 ; CHECK: ldr [[BASE:x[0-9]+]],
140 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
141 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 %offset
142 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
143 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
144 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 %offset
145 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
149 define void @fct2_8x16(<16 x i8>* nocapture %array) nounwind ssp {
151 ; CHECK-LABEL: fct2_8x16:
152 ; CHECK: ldr [[DEST:q[0-9]+]], [x0, #48]
153 ; CHECK: ldr [[BASE:x[0-9]+]],
154 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #80]
155 %arrayidx = getelementptr inbounds <16 x i8>, <16 x i8>* %array, i64 3
156 %tmp = load <16 x i8>, <16 x i8>* %arrayidx, align 16
157 %tmp1 = load <16 x i8>*, <16 x i8>** @globalArray8x16, align 8
158 %arrayidx1 = getelementptr inbounds <16 x i8>, <16 x i8>* %tmp1, i64 5
159 store <16 x i8> %tmp, <16 x i8>* %arrayidx1, align 16
163 define void @fct1_64x1(<1 x i64>* nocapture %array, i64 %offset) nounwind ssp {
165 ; CHECK-LABEL: fct1_64x1:
166 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
167 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
168 ; CHECK: ldr [[BASE:x[0-9]+]],
169 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
170 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 %offset
171 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
172 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
173 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 %offset
174 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
178 define void @fct2_64x1(<1 x i64>* nocapture %array) nounwind ssp {
180 ; CHECK-LABEL: fct2_64x1:
181 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
182 ; CHECK: ldr [[BASE:x[0-9]+]],
183 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
184 %arrayidx = getelementptr inbounds <1 x i64>, <1 x i64>* %array, i64 3
185 %tmp = load <1 x i64>, <1 x i64>* %arrayidx, align 8
186 %tmp1 = load <1 x i64>*, <1 x i64>** @globalArray64x1, align 8
187 %arrayidx1 = getelementptr inbounds <1 x i64>, <1 x i64>* %tmp1, i64 5
188 store <1 x i64> %tmp, <1 x i64>* %arrayidx1, align 8
192 define void @fct1_32x2(<2 x i32>* nocapture %array, i64 %offset) nounwind ssp {
194 ; CHECK-LABEL: fct1_32x2:
195 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
196 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
197 ; CHECK: ldr [[BASE:x[0-9]+]],
198 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
199 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 %offset
200 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
201 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
202 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 %offset
203 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
207 define void @fct2_32x2(<2 x i32>* nocapture %array) nounwind ssp {
209 ; CHECK-LABEL: fct2_32x2:
210 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
211 ; CHECK: ldr [[BASE:x[0-9]+]],
212 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
213 %arrayidx = getelementptr inbounds <2 x i32>, <2 x i32>* %array, i64 3
214 %tmp = load <2 x i32>, <2 x i32>* %arrayidx, align 8
215 %tmp1 = load <2 x i32>*, <2 x i32>** @globalArray32x2, align 8
216 %arrayidx1 = getelementptr inbounds <2 x i32>, <2 x i32>* %tmp1, i64 5
217 store <2 x i32> %tmp, <2 x i32>* %arrayidx1, align 8
221 define void @fct1_16x4(<4 x i16>* nocapture %array, i64 %offset) nounwind ssp {
223 ; CHECK-LABEL: fct1_16x4:
224 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
225 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
226 ; CHECK: ldr [[BASE:x[0-9]+]],
227 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
228 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 %offset
229 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
230 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
231 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 %offset
232 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
236 define void @fct2_16x4(<4 x i16>* nocapture %array) nounwind ssp {
238 ; CHECK-LABEL: fct2_16x4:
239 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, #24]
240 ; CHECK: ldr [[BASE:x[0-9]+]],
241 ; CHECK: str [[DEST]], {{\[}}[[BASE]], #40]
242 %arrayidx = getelementptr inbounds <4 x i16>, <4 x i16>* %array, i64 3
243 %tmp = load <4 x i16>, <4 x i16>* %arrayidx, align 8
244 %tmp1 = load <4 x i16>*, <4 x i16>** @globalArray16x4, align 8
245 %arrayidx1 = getelementptr inbounds <4 x i16>, <4 x i16>* %tmp1, i64 5
246 store <4 x i16> %tmp, <4 x i16>* %arrayidx1, align 8
250 define void @fct1_8x8(<8 x i8>* nocapture %array, i64 %offset) nounwind ssp {
252 ; CHECK-LABEL: fct1_8x8:
253 ; CHECK: lsl [[SHIFTEDOFFSET:x[0-9]+]], x1, #3
254 ; CHECK: ldr [[DEST:d[0-9]+]], [x0, [[SHIFTEDOFFSET]]]
255 ; CHECK: ldr [[BASE:x[0-9]+]],
256 ; CHECK: str [[DEST]], {{\[}}[[BASE]], [[SHIFTEDOFFSET]]]
257 %arrayidx = getelementptr inbounds <8 x i8>, <8 x i8>* %array, i64 %offset
258 %tmp = load <8 x i8>, <8 x i8>* %arrayidx, align 8
259 %tmp1 = load <8 x i8>*, <8 x i8>** @globalArray8x8, align 8
260 %arrayidx1 = getelementptr inbounds <8 x i8>, <8 x i8>* %tmp1, i64 %offset
261 store <8 x i8> %tmp, <8 x i8>* %arrayidx1, align 8
265 ; Add a bunch of tests for rdar://13258794: Match LDUR/STUR for D and Q
266 ; registers for unscaled vector accesses
268 define <1 x i64> @fct0(i8* %str) nounwind readonly ssp {
271 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
272 %p = getelementptr inbounds i8, i8* %str, i64 3
273 %q = bitcast i8* %p to <1 x i64>*
274 %0 = load <1 x i64>, <1 x i64>* %q, align 8
278 define <2 x i32> @fct1(i8* %str) nounwind readonly ssp {
281 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
282 %p = getelementptr inbounds i8, i8* %str, i64 3
283 %q = bitcast i8* %p to <2 x i32>*
284 %0 = load <2 x i32>, <2 x i32>* %q, align 8
288 define <4 x i16> @fct2(i8* %str) nounwind readonly ssp {
291 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
292 %p = getelementptr inbounds i8, i8* %str, i64 3
293 %q = bitcast i8* %p to <4 x i16>*
294 %0 = load <4 x i16>, <4 x i16>* %q, align 8
298 define <8 x i8> @fct3(i8* %str) nounwind readonly ssp {
301 ; CHECK: ldur {{d[0-9]+}}, [{{x[0-9]+}}, #3]
302 %p = getelementptr inbounds i8, i8* %str, i64 3
303 %q = bitcast i8* %p to <8 x i8>*
304 %0 = load <8 x i8>, <8 x i8>* %q, align 8
308 define <2 x i64> @fct4(i8* %str) nounwind readonly ssp {
311 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
312 %p = getelementptr inbounds i8, i8* %str, i64 3
313 %q = bitcast i8* %p to <2 x i64>*
314 %0 = load <2 x i64>, <2 x i64>* %q, align 16
318 define <4 x i32> @fct5(i8* %str) nounwind readonly ssp {
321 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
322 %p = getelementptr inbounds i8, i8* %str, i64 3
323 %q = bitcast i8* %p to <4 x i32>*
324 %0 = load <4 x i32>, <4 x i32>* %q, align 16
328 define <8 x i16> @fct6(i8* %str) nounwind readonly ssp {
331 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
332 %p = getelementptr inbounds i8, i8* %str, i64 3
333 %q = bitcast i8* %p to <8 x i16>*
334 %0 = load <8 x i16>, <8 x i16>* %q, align 16
338 define <16 x i8> @fct7(i8* %str) nounwind readonly ssp {
341 ; CHECK: ldur {{q[0-9]+}}, [{{x[0-9]+}}, #3]
342 %p = getelementptr inbounds i8, i8* %str, i64 3
343 %q = bitcast i8* %p to <16 x i8>*
344 %0 = load <16 x i8>, <16 x i8>* %q, align 16
348 define void @fct8(i8* %str) nounwind ssp {
351 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
352 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
353 %p = getelementptr inbounds i8, i8* %str, i64 3
354 %q = bitcast i8* %p to <1 x i64>*
355 %0 = load <1 x i64>, <1 x i64>* %q, align 8
356 %p2 = getelementptr inbounds i8, i8* %str, i64 4
357 %q2 = bitcast i8* %p2 to <1 x i64>*
358 store <1 x i64> %0, <1 x i64>* %q2, align 8
362 define void @fct9(i8* %str) nounwind ssp {
365 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
366 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
367 %p = getelementptr inbounds i8, i8* %str, i64 3
368 %q = bitcast i8* %p to <2 x i32>*
369 %0 = load <2 x i32>, <2 x i32>* %q, align 8
370 %p2 = getelementptr inbounds i8, i8* %str, i64 4
371 %q2 = bitcast i8* %p2 to <2 x i32>*
372 store <2 x i32> %0, <2 x i32>* %q2, align 8
376 define void @fct10(i8* %str) nounwind ssp {
378 ; CHECK-LABEL: fct10:
379 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
380 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
381 %p = getelementptr inbounds i8, i8* %str, i64 3
382 %q = bitcast i8* %p to <4 x i16>*
383 %0 = load <4 x i16>, <4 x i16>* %q, align 8
384 %p2 = getelementptr inbounds i8, i8* %str, i64 4
385 %q2 = bitcast i8* %p2 to <4 x i16>*
386 store <4 x i16> %0, <4 x i16>* %q2, align 8
390 define void @fct11(i8* %str) nounwind ssp {
392 ; CHECK-LABEL: fct11:
393 ; CHECK: ldur [[DESTREG:d[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
394 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
395 %p = getelementptr inbounds i8, i8* %str, i64 3
396 %q = bitcast i8* %p to <8 x i8>*
397 %0 = load <8 x i8>, <8 x i8>* %q, align 8
398 %p2 = getelementptr inbounds i8, i8* %str, i64 4
399 %q2 = bitcast i8* %p2 to <8 x i8>*
400 store <8 x i8> %0, <8 x i8>* %q2, align 8
404 define void @fct12(i8* %str) nounwind ssp {
406 ; CHECK-LABEL: fct12:
407 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
408 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
409 %p = getelementptr inbounds i8, i8* %str, i64 3
410 %q = bitcast i8* %p to <2 x i64>*
411 %0 = load <2 x i64>, <2 x i64>* %q, align 16
412 %p2 = getelementptr inbounds i8, i8* %str, i64 4
413 %q2 = bitcast i8* %p2 to <2 x i64>*
414 store <2 x i64> %0, <2 x i64>* %q2, align 16
418 define void @fct13(i8* %str) nounwind ssp {
420 ; CHECK-LABEL: fct13:
421 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
422 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
423 %p = getelementptr inbounds i8, i8* %str, i64 3
424 %q = bitcast i8* %p to <4 x i32>*
425 %0 = load <4 x i32>, <4 x i32>* %q, align 16
426 %p2 = getelementptr inbounds i8, i8* %str, i64 4
427 %q2 = bitcast i8* %p2 to <4 x i32>*
428 store <4 x i32> %0, <4 x i32>* %q2, align 16
432 define void @fct14(i8* %str) nounwind ssp {
434 ; CHECK-LABEL: fct14:
435 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
436 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
437 %p = getelementptr inbounds i8, i8* %str, i64 3
438 %q = bitcast i8* %p to <8 x i16>*
439 %0 = load <8 x i16>, <8 x i16>* %q, align 16
440 %p2 = getelementptr inbounds i8, i8* %str, i64 4
441 %q2 = bitcast i8* %p2 to <8 x i16>*
442 store <8 x i16> %0, <8 x i16>* %q2, align 16
446 define void @fct15(i8* %str) nounwind ssp {
448 ; CHECK-LABEL: fct15:
449 ; CHECK: ldur [[DESTREG:q[0-9]+]], {{\[}}[[BASEREG:x[0-9]+]], #3]
450 ; CHECK: stur [[DESTREG]], {{\[}}[[BASEREG]], #4]
451 %p = getelementptr inbounds i8, i8* %str, i64 3
452 %q = bitcast i8* %p to <16 x i8>*
453 %0 = load <16 x i8>, <16 x i8>* %q, align 16
454 %p2 = getelementptr inbounds i8, i8* %str, i64 4
455 %q2 = bitcast i8* %p2 to <16 x i8>*
456 store <16 x i8> %0, <16 x i8>* %q2, align 16
460 ; Check the building of vector from a single loaded value.
461 ; Part of <rdar://problem/14170854>
463 ; Single loads with immediate offset.
464 define <8 x i8> @fct16(i8* nocapture %sp0) {
465 ; CHECK-LABEL: fct16:
466 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
467 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
469 %addr = getelementptr i8, i8* %sp0, i64 1
470 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
471 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
472 %vmull.i = mul <8 x i8> %vec, %vec
473 ret <8 x i8> %vmull.i
476 define <16 x i8> @fct17(i8* nocapture %sp0) {
477 ; CHECK-LABEL: fct17:
478 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, #1]
479 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
481 %addr = getelementptr i8, i8* %sp0, i64 1
482 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
483 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
484 %vmull.i = mul <16 x i8> %vec, %vec
485 ret <16 x i8> %vmull.i
488 define <4 x i16> @fct18(i16* nocapture %sp0) {
489 ; CHECK-LABEL: fct18:
490 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
491 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
493 %addr = getelementptr i16, i16* %sp0, i64 1
494 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
495 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
496 %vmull.i = mul <4 x i16> %vec, %vec
497 ret <4 x i16> %vmull.i
500 define <8 x i16> @fct19(i16* nocapture %sp0) {
501 ; CHECK-LABEL: fct19:
502 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, #2]
503 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
505 %addr = getelementptr i16, i16* %sp0, i64 1
506 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
507 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
508 %vmull.i = mul <8 x i16> %vec, %vec
509 ret <8 x i16> %vmull.i
512 define <2 x i32> @fct20(i32* nocapture %sp0) {
513 ; CHECK-LABEL: fct20:
514 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
515 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
517 %addr = getelementptr i32, i32* %sp0, i64 1
518 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
519 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
520 %vmull.i = mul <2 x i32> %vec, %vec
521 ret <2 x i32> %vmull.i
524 define <4 x i32> @fct21(i32* nocapture %sp0) {
525 ; CHECK-LABEL: fct21:
526 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, #4]
527 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
529 %addr = getelementptr i32, i32* %sp0, i64 1
530 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
531 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
532 %vmull.i = mul <4 x i32> %vec, %vec
533 ret <4 x i32> %vmull.i
536 define <1 x i64> @fct22(i64* nocapture %sp0) {
537 ; CHECK-LABEL: fct22:
538 ; CHECK: ldr d0, [x0, #8]
540 %addr = getelementptr i64, i64* %sp0, i64 1
541 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
542 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
546 define <2 x i64> @fct23(i64* nocapture %sp0) {
547 ; CHECK-LABEL: fct23:
548 ; CHECK: ldr d[[REGNUM:[0-9]+]], [x0, #8]
550 %addr = getelementptr i64, i64* %sp0, i64 1
551 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
552 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
557 ; Single loads with register offset.
558 define <8 x i8> @fct24(i8* nocapture %sp0, i64 %offset) {
559 ; CHECK-LABEL: fct24:
560 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
561 ; CHECK-NEXT: mul.8b v0, v[[REGNUM]], v[[REGNUM]]
563 %addr = getelementptr i8, i8* %sp0, i64 %offset
564 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
565 %vec = insertelement <8 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
566 %vmull.i = mul <8 x i8> %vec, %vec
567 ret <8 x i8> %vmull.i
570 define <16 x i8> @fct25(i8* nocapture %sp0, i64 %offset) {
571 ; CHECK-LABEL: fct25:
572 ; CHECK: ldr b[[REGNUM:[0-9]+]], [x0, x1]
573 ; CHECK-NEXT: mul.16b v0, v[[REGNUM]], v[[REGNUM]]
575 %addr = getelementptr i8, i8* %sp0, i64 %offset
576 %pix_sp0.0.copyload = load i8, i8* %addr, align 1
577 %vec = insertelement <16 x i8> undef, i8 %pix_sp0.0.copyload, i32 0
578 %vmull.i = mul <16 x i8> %vec, %vec
579 ret <16 x i8> %vmull.i
582 define <4 x i16> @fct26(i16* nocapture %sp0, i64 %offset) {
583 ; CHECK-LABEL: fct26:
584 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
585 ; CHECK-NEXT: mul.4h v0, v[[REGNUM]], v[[REGNUM]]
587 %addr = getelementptr i16, i16* %sp0, i64 %offset
588 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
589 %vec = insertelement <4 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
590 %vmull.i = mul <4 x i16> %vec, %vec
591 ret <4 x i16> %vmull.i
594 define <8 x i16> @fct27(i16* nocapture %sp0, i64 %offset) {
595 ; CHECK-LABEL: fct27:
596 ; CHECK: ldr h[[REGNUM:[0-9]+]], [x0, x1, lsl #1]
597 ; CHECK-NEXT: mul.8h v0, v[[REGNUM]], v[[REGNUM]]
599 %addr = getelementptr i16, i16* %sp0, i64 %offset
600 %pix_sp0.0.copyload = load i16, i16* %addr, align 1
601 %vec = insertelement <8 x i16> undef, i16 %pix_sp0.0.copyload, i32 0
602 %vmull.i = mul <8 x i16> %vec, %vec
603 ret <8 x i16> %vmull.i
606 define <2 x i32> @fct28(i32* nocapture %sp0, i64 %offset) {
607 ; CHECK-LABEL: fct28:
608 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
609 ; CHECK-NEXT: mul.2s v0, v[[REGNUM]], v[[REGNUM]]
611 %addr = getelementptr i32, i32* %sp0, i64 %offset
612 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
613 %vec = insertelement <2 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
614 %vmull.i = mul <2 x i32> %vec, %vec
615 ret <2 x i32> %vmull.i
618 define <4 x i32> @fct29(i32* nocapture %sp0, i64 %offset) {
619 ; CHECK-LABEL: fct29:
620 ; CHECK: ldr s[[REGNUM:[0-9]+]], [x0, x1, lsl #2]
621 ; CHECK-NEXT: mul.4s v0, v[[REGNUM]], v[[REGNUM]]
623 %addr = getelementptr i32, i32* %sp0, i64 %offset
624 %pix_sp0.0.copyload = load i32, i32* %addr, align 1
625 %vec = insertelement <4 x i32> undef, i32 %pix_sp0.0.copyload, i32 0
626 %vmull.i = mul <4 x i32> %vec, %vec
627 ret <4 x i32> %vmull.i
630 define <1 x i64> @fct30(i64* nocapture %sp0, i64 %offset) {
631 ; CHECK-LABEL: fct30:
632 ; CHECK: ldr d0, [x0, x1, lsl #3]
634 %addr = getelementptr i64, i64* %sp0, i64 %offset
635 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
636 %vec = insertelement <1 x i64> undef, i64 %pix_sp0.0.copyload, i32 0
640 define <2 x i64> @fct31(i64* nocapture %sp0, i64 %offset) {
641 ; CHECK-LABEL: fct31:
642 ; CHECK: ldr d0, [x0, x1, lsl #3]
644 %addr = getelementptr i64, i64* %sp0, i64 %offset
645 %pix_sp0.0.copyload = load i64, i64* %addr, align 1
646 %vec = insertelement <2 x i64> undef, i64 %pix_sp0.0.copyload, i32 0