1 ; RUN: llc -mtriple=arm64-apple-ios7.0 -disable-post-ra -o - %s | FileCheck %s
5 define <8 x i8> @test_v8i8_pre_load(<8 x i8>* %addr) {
6 ; CHECK-LABEL: test_v8i8_pre_load:
7 ; CHECK: ldr d0, [x0, #40]!
8 %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
9 %val = load <8 x i8>, <8 x i8>* %newaddr, align 8
10 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
14 define <8 x i8> @test_v8i8_post_load(<8 x i8>* %addr) {
15 ; CHECK-LABEL: test_v8i8_post_load:
16 ; CHECK: ldr d0, [x0], #40
17 %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
18 %val = load <8 x i8>, <8 x i8>* %addr, align 8
19 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
23 define void @test_v8i8_pre_store(<8 x i8> %in, <8 x i8>* %addr) {
24 ; CHECK-LABEL: test_v8i8_pre_store:
25 ; CHECK: str d0, [x0, #40]!
26 %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
27 store <8 x i8> %in, <8 x i8>* %newaddr, align 8
28 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
32 define void @test_v8i8_post_store(<8 x i8> %in, <8 x i8>* %addr) {
33 ; CHECK-LABEL: test_v8i8_post_store:
34 ; CHECK: str d0, [x0], #40
35 %newaddr = getelementptr <8 x i8>, <8 x i8>* %addr, i32 5
36 store <8 x i8> %in, <8 x i8>* %addr, align 8
37 store <8 x i8>* %newaddr, <8 x i8>** bitcast(i8** @ptr to <8 x i8>**)
41 define <4 x i16> @test_v4i16_pre_load(<4 x i16>* %addr) {
42 ; CHECK-LABEL: test_v4i16_pre_load:
43 ; CHECK: ldr d0, [x0, #40]!
44 %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
45 %val = load <4 x i16>, <4 x i16>* %newaddr, align 8
46 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
50 define <4 x i16> @test_v4i16_post_load(<4 x i16>* %addr) {
51 ; CHECK-LABEL: test_v4i16_post_load:
52 ; CHECK: ldr d0, [x0], #40
53 %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
54 %val = load <4 x i16>, <4 x i16>* %addr, align 8
55 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
59 define void @test_v4i16_pre_store(<4 x i16> %in, <4 x i16>* %addr) {
60 ; CHECK-LABEL: test_v4i16_pre_store:
61 ; CHECK: str d0, [x0, #40]!
62 %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
63 store <4 x i16> %in, <4 x i16>* %newaddr, align 8
64 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
68 define void @test_v4i16_post_store(<4 x i16> %in, <4 x i16>* %addr) {
69 ; CHECK-LABEL: test_v4i16_post_store:
70 ; CHECK: str d0, [x0], #40
71 %newaddr = getelementptr <4 x i16>, <4 x i16>* %addr, i32 5
72 store <4 x i16> %in, <4 x i16>* %addr, align 8
73 store <4 x i16>* %newaddr, <4 x i16>** bitcast(i8** @ptr to <4 x i16>**)
77 define <2 x i32> @test_v2i32_pre_load(<2 x i32>* %addr) {
78 ; CHECK-LABEL: test_v2i32_pre_load:
79 ; CHECK: ldr d0, [x0, #40]!
80 %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
81 %val = load <2 x i32>, <2 x i32>* %newaddr, align 8
82 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
86 define <2 x i32> @test_v2i32_post_load(<2 x i32>* %addr) {
87 ; CHECK-LABEL: test_v2i32_post_load:
88 ; CHECK: ldr d0, [x0], #40
89 %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
90 %val = load <2 x i32>, <2 x i32>* %addr, align 8
91 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
95 define void @test_v2i32_pre_store(<2 x i32> %in, <2 x i32>* %addr) {
96 ; CHECK-LABEL: test_v2i32_pre_store:
97 ; CHECK: str d0, [x0, #40]!
98 %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
99 store <2 x i32> %in, <2 x i32>* %newaddr, align 8
100 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
104 define void @test_v2i32_post_store(<2 x i32> %in, <2 x i32>* %addr) {
105 ; CHECK-LABEL: test_v2i32_post_store:
106 ; CHECK: str d0, [x0], #40
107 %newaddr = getelementptr <2 x i32>, <2 x i32>* %addr, i32 5
108 store <2 x i32> %in, <2 x i32>* %addr, align 8
109 store <2 x i32>* %newaddr, <2 x i32>** bitcast(i8** @ptr to <2 x i32>**)
113 define <2 x float> @test_v2f32_pre_load(<2 x float>* %addr) {
114 ; CHECK-LABEL: test_v2f32_pre_load:
115 ; CHECK: ldr d0, [x0, #40]!
116 %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
117 %val = load <2 x float>, <2 x float>* %newaddr, align 8
118 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
122 define <2 x float> @test_v2f32_post_load(<2 x float>* %addr) {
123 ; CHECK-LABEL: test_v2f32_post_load:
124 ; CHECK: ldr d0, [x0], #40
125 %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
126 %val = load <2 x float>, <2 x float>* %addr, align 8
127 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
131 define void @test_v2f32_pre_store(<2 x float> %in, <2 x float>* %addr) {
132 ; CHECK-LABEL: test_v2f32_pre_store:
133 ; CHECK: str d0, [x0, #40]!
134 %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
135 store <2 x float> %in, <2 x float>* %newaddr, align 8
136 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
140 define void @test_v2f32_post_store(<2 x float> %in, <2 x float>* %addr) {
141 ; CHECK-LABEL: test_v2f32_post_store:
142 ; CHECK: str d0, [x0], #40
143 %newaddr = getelementptr <2 x float>, <2 x float>* %addr, i32 5
144 store <2 x float> %in, <2 x float>* %addr, align 8
145 store <2 x float>* %newaddr, <2 x float>** bitcast(i8** @ptr to <2 x float>**)
149 define <1 x i64> @test_v1i64_pre_load(<1 x i64>* %addr) {
150 ; CHECK-LABEL: test_v1i64_pre_load:
151 ; CHECK: ldr d0, [x0, #40]!
152 %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
153 %val = load <1 x i64>, <1 x i64>* %newaddr, align 8
154 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
158 define <1 x i64> @test_v1i64_post_load(<1 x i64>* %addr) {
159 ; CHECK-LABEL: test_v1i64_post_load:
160 ; CHECK: ldr d0, [x0], #40
161 %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
162 %val = load <1 x i64>, <1 x i64>* %addr, align 8
163 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
167 define void @test_v1i64_pre_store(<1 x i64> %in, <1 x i64>* %addr) {
168 ; CHECK-LABEL: test_v1i64_pre_store:
169 ; CHECK: str d0, [x0, #40]!
170 %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
171 store <1 x i64> %in, <1 x i64>* %newaddr, align 8
172 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
176 define void @test_v1i64_post_store(<1 x i64> %in, <1 x i64>* %addr) {
177 ; CHECK-LABEL: test_v1i64_post_store:
178 ; CHECK: str d0, [x0], #40
179 %newaddr = getelementptr <1 x i64>, <1 x i64>* %addr, i32 5
180 store <1 x i64> %in, <1 x i64>* %addr, align 8
181 store <1 x i64>* %newaddr, <1 x i64>** bitcast(i8** @ptr to <1 x i64>**)
185 define <16 x i8> @test_v16i8_pre_load(<16 x i8>* %addr) {
186 ; CHECK-LABEL: test_v16i8_pre_load:
187 ; CHECK: ldr q0, [x0, #80]!
188 %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
189 %val = load <16 x i8>, <16 x i8>* %newaddr, align 8
190 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
194 define <16 x i8> @test_v16i8_post_load(<16 x i8>* %addr) {
195 ; CHECK-LABEL: test_v16i8_post_load:
196 ; CHECK: ldr q0, [x0], #80
197 %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
198 %val = load <16 x i8>, <16 x i8>* %addr, align 8
199 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
203 define void @test_v16i8_pre_store(<16 x i8> %in, <16 x i8>* %addr) {
204 ; CHECK-LABEL: test_v16i8_pre_store:
205 ; CHECK: str q0, [x0, #80]!
206 %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
207 store <16 x i8> %in, <16 x i8>* %newaddr, align 8
208 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
212 define void @test_v16i8_post_store(<16 x i8> %in, <16 x i8>* %addr) {
213 ; CHECK-LABEL: test_v16i8_post_store:
214 ; CHECK: str q0, [x0], #80
215 %newaddr = getelementptr <16 x i8>, <16 x i8>* %addr, i32 5
216 store <16 x i8> %in, <16 x i8>* %addr, align 8
217 store <16 x i8>* %newaddr, <16 x i8>** bitcast(i8** @ptr to <16 x i8>**)
221 define <8 x i16> @test_v8i16_pre_load(<8 x i16>* %addr) {
222 ; CHECK-LABEL: test_v8i16_pre_load:
223 ; CHECK: ldr q0, [x0, #80]!
224 %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
225 %val = load <8 x i16>, <8 x i16>* %newaddr, align 8
226 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
230 define <8 x i16> @test_v8i16_post_load(<8 x i16>* %addr) {
231 ; CHECK-LABEL: test_v8i16_post_load:
232 ; CHECK: ldr q0, [x0], #80
233 %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
234 %val = load <8 x i16>, <8 x i16>* %addr, align 8
235 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
239 define void @test_v8i16_pre_store(<8 x i16> %in, <8 x i16>* %addr) {
240 ; CHECK-LABEL: test_v8i16_pre_store:
241 ; CHECK: str q0, [x0, #80]!
242 %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
243 store <8 x i16> %in, <8 x i16>* %newaddr, align 8
244 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
248 define void @test_v8i16_post_store(<8 x i16> %in, <8 x i16>* %addr) {
249 ; CHECK-LABEL: test_v8i16_post_store:
250 ; CHECK: str q0, [x0], #80
251 %newaddr = getelementptr <8 x i16>, <8 x i16>* %addr, i32 5
252 store <8 x i16> %in, <8 x i16>* %addr, align 8
253 store <8 x i16>* %newaddr, <8 x i16>** bitcast(i8** @ptr to <8 x i16>**)
257 define <4 x i32> @test_v4i32_pre_load(<4 x i32>* %addr) {
258 ; CHECK-LABEL: test_v4i32_pre_load:
259 ; CHECK: ldr q0, [x0, #80]!
260 %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
261 %val = load <4 x i32>, <4 x i32>* %newaddr, align 8
262 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
266 define <4 x i32> @test_v4i32_post_load(<4 x i32>* %addr) {
267 ; CHECK-LABEL: test_v4i32_post_load:
268 ; CHECK: ldr q0, [x0], #80
269 %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
270 %val = load <4 x i32>, <4 x i32>* %addr, align 8
271 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
275 define void @test_v4i32_pre_store(<4 x i32> %in, <4 x i32>* %addr) {
276 ; CHECK-LABEL: test_v4i32_pre_store:
277 ; CHECK: str q0, [x0, #80]!
278 %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
279 store <4 x i32> %in, <4 x i32>* %newaddr, align 8
280 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
284 define void @test_v4i32_post_store(<4 x i32> %in, <4 x i32>* %addr) {
285 ; CHECK-LABEL: test_v4i32_post_store:
286 ; CHECK: str q0, [x0], #80
287 %newaddr = getelementptr <4 x i32>, <4 x i32>* %addr, i32 5
288 store <4 x i32> %in, <4 x i32>* %addr, align 8
289 store <4 x i32>* %newaddr, <4 x i32>** bitcast(i8** @ptr to <4 x i32>**)
294 define <4 x float> @test_v4f32_pre_load(<4 x float>* %addr) {
295 ; CHECK-LABEL: test_v4f32_pre_load:
296 ; CHECK: ldr q0, [x0, #80]!
297 %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
298 %val = load <4 x float>, <4 x float>* %newaddr, align 8
299 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
303 define <4 x float> @test_v4f32_post_load(<4 x float>* %addr) {
304 ; CHECK-LABEL: test_v4f32_post_load:
305 ; CHECK: ldr q0, [x0], #80
306 %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
307 %val = load <4 x float>, <4 x float>* %addr, align 8
308 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
312 define void @test_v4f32_pre_store(<4 x float> %in, <4 x float>* %addr) {
313 ; CHECK-LABEL: test_v4f32_pre_store:
314 ; CHECK: str q0, [x0, #80]!
315 %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
316 store <4 x float> %in, <4 x float>* %newaddr, align 8
317 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
321 define void @test_v4f32_post_store(<4 x float> %in, <4 x float>* %addr) {
322 ; CHECK-LABEL: test_v4f32_post_store:
323 ; CHECK: str q0, [x0], #80
324 %newaddr = getelementptr <4 x float>, <4 x float>* %addr, i32 5
325 store <4 x float> %in, <4 x float>* %addr, align 8
326 store <4 x float>* %newaddr, <4 x float>** bitcast(i8** @ptr to <4 x float>**)
331 define <2 x i64> @test_v2i64_pre_load(<2 x i64>* %addr) {
332 ; CHECK-LABEL: test_v2i64_pre_load:
333 ; CHECK: ldr q0, [x0, #80]!
334 %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
335 %val = load <2 x i64>, <2 x i64>* %newaddr, align 8
336 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
340 define <2 x i64> @test_v2i64_post_load(<2 x i64>* %addr) {
341 ; CHECK-LABEL: test_v2i64_post_load:
342 ; CHECK: ldr q0, [x0], #80
343 %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
344 %val = load <2 x i64>, <2 x i64>* %addr, align 8
345 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
349 define void @test_v2i64_pre_store(<2 x i64> %in, <2 x i64>* %addr) {
350 ; CHECK-LABEL: test_v2i64_pre_store:
351 ; CHECK: str q0, [x0, #80]!
352 %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
353 store <2 x i64> %in, <2 x i64>* %newaddr, align 8
354 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
358 define void @test_v2i64_post_store(<2 x i64> %in, <2 x i64>* %addr) {
359 ; CHECK-LABEL: test_v2i64_post_store:
360 ; CHECK: str q0, [x0], #80
361 %newaddr = getelementptr <2 x i64>, <2 x i64>* %addr, i32 5
362 store <2 x i64> %in, <2 x i64>* %addr, align 8
363 store <2 x i64>* %newaddr, <2 x i64>** bitcast(i8** @ptr to <2 x i64>**)
368 define <2 x double> @test_v2f64_pre_load(<2 x double>* %addr) {
369 ; CHECK-LABEL: test_v2f64_pre_load:
370 ; CHECK: ldr q0, [x0, #80]!
371 %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
372 %val = load <2 x double>, <2 x double>* %newaddr, align 8
373 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
374 ret <2 x double> %val
377 define <2 x double> @test_v2f64_post_load(<2 x double>* %addr) {
378 ; CHECK-LABEL: test_v2f64_post_load:
379 ; CHECK: ldr q0, [x0], #80
380 %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
381 %val = load <2 x double>, <2 x double>* %addr, align 8
382 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
383 ret <2 x double> %val
386 define void @test_v2f64_pre_store(<2 x double> %in, <2 x double>* %addr) {
387 ; CHECK-LABEL: test_v2f64_pre_store:
388 ; CHECK: str q0, [x0, #80]!
389 %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
390 store <2 x double> %in, <2 x double>* %newaddr, align 8
391 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
395 define void @test_v2f64_post_store(<2 x double> %in, <2 x double>* %addr) {
396 ; CHECK-LABEL: test_v2f64_post_store:
397 ; CHECK: str q0, [x0], #80
398 %newaddr = getelementptr <2 x double>, <2 x double>* %addr, i32 5
399 store <2 x double> %in, <2 x double>* %addr, align 8
400 store <2 x double>* %newaddr, <2 x double>** bitcast(i8** @ptr to <2 x double>**)
404 define i8* @test_v16i8_post_imm_st1_lane(<16 x i8> %in, i8* %addr) {
405 ; CHECK-LABEL: test_v16i8_post_imm_st1_lane:
406 ; CHECK: st1.b { v0 }[3], [x0], #1
407 %elt = extractelement <16 x i8> %in, i32 3
408 store i8 %elt, i8* %addr
410 %newaddr = getelementptr i8, i8* %addr, i32 1
414 define i8* @test_v16i8_post_reg_st1_lane(<16 x i8> %in, i8* %addr) {
415 ; CHECK-LABEL: test_v16i8_post_reg_st1_lane:
416 ; CHECK: mov w[[OFFSET:[0-9]+]], #2
417 ; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
418 %elt = extractelement <16 x i8> %in, i32 3
419 store i8 %elt, i8* %addr
421 %newaddr = getelementptr i8, i8* %addr, i32 2
426 define i16* @test_v8i16_post_imm_st1_lane(<8 x i16> %in, i16* %addr) {
427 ; CHECK-LABEL: test_v8i16_post_imm_st1_lane:
428 ; CHECK: st1.h { v0 }[3], [x0], #2
429 %elt = extractelement <8 x i16> %in, i32 3
430 store i16 %elt, i16* %addr
432 %newaddr = getelementptr i16, i16* %addr, i32 1
436 define i16* @test_v8i16_post_reg_st1_lane(<8 x i16> %in, i16* %addr) {
437 ; CHECK-LABEL: test_v8i16_post_reg_st1_lane:
438 ; CHECK: mov w[[OFFSET:[0-9]+]], #4
439 ; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
440 %elt = extractelement <8 x i16> %in, i32 3
441 store i16 %elt, i16* %addr
443 %newaddr = getelementptr i16, i16* %addr, i32 2
447 define i32* @test_v4i32_post_imm_st1_lane(<4 x i32> %in, i32* %addr) {
448 ; CHECK-LABEL: test_v4i32_post_imm_st1_lane:
449 ; CHECK: st1.s { v0 }[3], [x0], #4
450 %elt = extractelement <4 x i32> %in, i32 3
451 store i32 %elt, i32* %addr
453 %newaddr = getelementptr i32, i32* %addr, i32 1
457 define i32* @test_v4i32_post_reg_st1_lane(<4 x i32> %in, i32* %addr) {
458 ; CHECK-LABEL: test_v4i32_post_reg_st1_lane:
459 ; CHECK: mov w[[OFFSET:[0-9]+]], #8
460 ; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
461 %elt = extractelement <4 x i32> %in, i32 3
462 store i32 %elt, i32* %addr
464 %newaddr = getelementptr i32, i32* %addr, i32 2
468 define float* @test_v4f32_post_imm_st1_lane(<4 x float> %in, float* %addr) {
469 ; CHECK-LABEL: test_v4f32_post_imm_st1_lane:
470 ; CHECK: st1.s { v0 }[3], [x0], #4
471 %elt = extractelement <4 x float> %in, i32 3
472 store float %elt, float* %addr
474 %newaddr = getelementptr float, float* %addr, i32 1
478 define float* @test_v4f32_post_reg_st1_lane(<4 x float> %in, float* %addr) {
479 ; CHECK-LABEL: test_v4f32_post_reg_st1_lane:
480 ; CHECK: mov w[[OFFSET:[0-9]+]], #8
481 ; CHECK: st1.s { v0 }[3], [x0], x[[OFFSET]]
482 %elt = extractelement <4 x float> %in, i32 3
483 store float %elt, float* %addr
485 %newaddr = getelementptr float, float* %addr, i32 2
489 define i64* @test_v2i64_post_imm_st1_lane(<2 x i64> %in, i64* %addr) {
490 ; CHECK-LABEL: test_v2i64_post_imm_st1_lane:
491 ; CHECK: st1.d { v0 }[1], [x0], #8
492 %elt = extractelement <2 x i64> %in, i64 1
493 store i64 %elt, i64* %addr
495 %newaddr = getelementptr i64, i64* %addr, i64 1
499 define i64* @test_v2i64_post_reg_st1_lane(<2 x i64> %in, i64* %addr) {
500 ; CHECK-LABEL: test_v2i64_post_reg_st1_lane:
501 ; CHECK: mov w[[OFFSET:[0-9]+]], #16
502 ; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
503 %elt = extractelement <2 x i64> %in, i64 1
504 store i64 %elt, i64* %addr
506 %newaddr = getelementptr i64, i64* %addr, i64 2
510 define double* @test_v2f64_post_imm_st1_lane(<2 x double> %in, double* %addr) {
511 ; CHECK-LABEL: test_v2f64_post_imm_st1_lane:
512 ; CHECK: st1.d { v0 }[1], [x0], #8
513 %elt = extractelement <2 x double> %in, i32 1
514 store double %elt, double* %addr
516 %newaddr = getelementptr double, double* %addr, i32 1
520 define double* @test_v2f64_post_reg_st1_lane(<2 x double> %in, double* %addr) {
521 ; CHECK-LABEL: test_v2f64_post_reg_st1_lane:
522 ; CHECK: mov w[[OFFSET:[0-9]+]], #16
523 ; CHECK: st1.d { v0 }[1], [x0], x[[OFFSET]]
524 %elt = extractelement <2 x double> %in, i32 1
525 store double %elt, double* %addr
527 %newaddr = getelementptr double, double* %addr, i32 2
531 define i8* @test_v8i8_post_imm_st1_lane(<8 x i8> %in, i8* %addr) {
532 ; CHECK-LABEL: test_v8i8_post_imm_st1_lane:
533 ; CHECK: st1.b { v0 }[3], [x0], #1
534 %elt = extractelement <8 x i8> %in, i32 3
535 store i8 %elt, i8* %addr
537 %newaddr = getelementptr i8, i8* %addr, i32 1
541 define i8* @test_v8i8_post_reg_st1_lane(<8 x i8> %in, i8* %addr) {
542 ; CHECK-LABEL: test_v8i8_post_reg_st1_lane:
543 ; CHECK: mov w[[OFFSET:[0-9]+]], #2
544 ; CHECK: st1.b { v0 }[3], [x0], x[[OFFSET]]
545 %elt = extractelement <8 x i8> %in, i32 3
546 store i8 %elt, i8* %addr
548 %newaddr = getelementptr i8, i8* %addr, i32 2
552 define i16* @test_v4i16_post_imm_st1_lane(<4 x i16> %in, i16* %addr) {
553 ; CHECK-LABEL: test_v4i16_post_imm_st1_lane:
554 ; CHECK: st1.h { v0 }[3], [x0], #2
555 %elt = extractelement <4 x i16> %in, i32 3
556 store i16 %elt, i16* %addr
558 %newaddr = getelementptr i16, i16* %addr, i32 1
562 define i16* @test_v4i16_post_reg_st1_lane(<4 x i16> %in, i16* %addr) {
563 ; CHECK-LABEL: test_v4i16_post_reg_st1_lane:
564 ; CHECK: mov w[[OFFSET:[0-9]+]], #4
565 ; CHECK: st1.h { v0 }[3], [x0], x[[OFFSET]]
566 %elt = extractelement <4 x i16> %in, i32 3
567 store i16 %elt, i16* %addr
569 %newaddr = getelementptr i16, i16* %addr, i32 2
573 define i32* @test_v2i32_post_imm_st1_lane(<2 x i32> %in, i32* %addr) {
574 ; CHECK-LABEL: test_v2i32_post_imm_st1_lane:
575 ; CHECK: st1.s { v0 }[1], [x0], #4
576 %elt = extractelement <2 x i32> %in, i32 1
577 store i32 %elt, i32* %addr
579 %newaddr = getelementptr i32, i32* %addr, i32 1
583 define i32* @test_v2i32_post_reg_st1_lane(<2 x i32> %in, i32* %addr) {
584 ; CHECK-LABEL: test_v2i32_post_reg_st1_lane:
585 ; CHECK: mov w[[OFFSET:[0-9]+]], #8
586 ; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
587 %elt = extractelement <2 x i32> %in, i32 1
588 store i32 %elt, i32* %addr
590 %newaddr = getelementptr i32, i32* %addr, i32 2
594 define float* @test_v2f32_post_imm_st1_lane(<2 x float> %in, float* %addr) {
595 ; CHECK-LABEL: test_v2f32_post_imm_st1_lane:
596 ; CHECK: st1.s { v0 }[1], [x0], #4
597 %elt = extractelement <2 x float> %in, i32 1
598 store float %elt, float* %addr
600 %newaddr = getelementptr float, float* %addr, i32 1
604 define float* @test_v2f32_post_reg_st1_lane(<2 x float> %in, float* %addr) {
605 ; CHECK-LABEL: test_v2f32_post_reg_st1_lane:
606 ; CHECK: mov w[[OFFSET:[0-9]+]], #8
607 ; CHECK: st1.s { v0 }[1], [x0], x[[OFFSET]]
608 %elt = extractelement <2 x float> %in, i32 1
609 store float %elt, float* %addr
611 %newaddr = getelementptr float, float* %addr, i32 2
615 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2(i8* %A, i8** %ptr) {
616 ;CHECK-LABEL: test_v16i8_post_imm_ld2:
617 ;CHECK: ld2.16b { v0, v1 }, [x0], #32
618 %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
619 %tmp = getelementptr i8, i8* %A, i32 32
620 store i8* %tmp, i8** %ptr
621 ret { <16 x i8>, <16 x i8> } %ld2
624 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
625 ;CHECK-LABEL: test_v16i8_post_reg_ld2:
626 ;CHECK: ld2.16b { v0, v1 }, [x0], x{{[0-9]+}}
627 %ld2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8* %A)
628 %tmp = getelementptr i8, i8* %A, i64 %inc
629 store i8* %tmp, i8** %ptr
630 ret { <16 x i8>, <16 x i8> } %ld2
633 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2.v16i8.p0i8(i8*)
636 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2(i8* %A, i8** %ptr) {
637 ;CHECK-LABEL: test_v8i8_post_imm_ld2:
638 ;CHECK: ld2.8b { v0, v1 }, [x0], #16
639 %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
640 %tmp = getelementptr i8, i8* %A, i32 16
641 store i8* %tmp, i8** %ptr
642 ret { <8 x i8>, <8 x i8> } %ld2
645 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2(i8* %A, i8** %ptr, i64 %inc) {
646 ;CHECK-LABEL: test_v8i8_post_reg_ld2:
647 ;CHECK: ld2.8b { v0, v1 }, [x0], x{{[0-9]+}}
648 %ld2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8* %A)
649 %tmp = getelementptr i8, i8* %A, i64 %inc
650 store i8* %tmp, i8** %ptr
651 ret { <8 x i8>, <8 x i8> } %ld2
654 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0i8(i8*)
657 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2(i16* %A, i16** %ptr) {
658 ;CHECK-LABEL: test_v8i16_post_imm_ld2:
659 ;CHECK: ld2.8h { v0, v1 }, [x0], #32
660 %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
661 %tmp = getelementptr i16, i16* %A, i32 16
662 store i16* %tmp, i16** %ptr
663 ret { <8 x i16>, <8 x i16> } %ld2
666 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
667 ;CHECK-LABEL: test_v8i16_post_reg_ld2:
668 ;CHECK: ld2.8h { v0, v1 }, [x0], x{{[0-9]+}}
669 %ld2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16* %A)
670 %tmp = getelementptr i16, i16* %A, i64 %inc
671 store i16* %tmp, i16** %ptr
672 ret { <8 x i16>, <8 x i16> } %ld2
675 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2.v8i16.p0i16(i16*)
678 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2(i16* %A, i16** %ptr) {
679 ;CHECK-LABEL: test_v4i16_post_imm_ld2:
680 ;CHECK: ld2.4h { v0, v1 }, [x0], #16
681 %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
682 %tmp = getelementptr i16, i16* %A, i32 8
683 store i16* %tmp, i16** %ptr
684 ret { <4 x i16>, <4 x i16> } %ld2
687 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2(i16* %A, i16** %ptr, i64 %inc) {
688 ;CHECK-LABEL: test_v4i16_post_reg_ld2:
689 ;CHECK: ld2.4h { v0, v1 }, [x0], x{{[0-9]+}}
690 %ld2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16* %A)
691 %tmp = getelementptr i16, i16* %A, i64 %inc
692 store i16* %tmp, i16** %ptr
693 ret { <4 x i16>, <4 x i16> } %ld2
696 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2.v4i16.p0i16(i16*)
699 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2(i32* %A, i32** %ptr) {
700 ;CHECK-LABEL: test_v4i32_post_imm_ld2:
701 ;CHECK: ld2.4s { v0, v1 }, [x0], #32
702 %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
703 %tmp = getelementptr i32, i32* %A, i32 8
704 store i32* %tmp, i32** %ptr
705 ret { <4 x i32>, <4 x i32> } %ld2
708 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
709 ;CHECK-LABEL: test_v4i32_post_reg_ld2:
710 ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
711 %ld2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32* %A)
712 %tmp = getelementptr i32, i32* %A, i64 %inc
713 store i32* %tmp, i32** %ptr
714 ret { <4 x i32>, <4 x i32> } %ld2
717 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2.v4i32.p0i32(i32*)
720 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2(i32* %A, i32** %ptr) {
721 ;CHECK-LABEL: test_v2i32_post_imm_ld2:
722 ;CHECK: ld2.2s { v0, v1 }, [x0], #16
723 %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
724 %tmp = getelementptr i32, i32* %A, i32 4
725 store i32* %tmp, i32** %ptr
726 ret { <2 x i32>, <2 x i32> } %ld2
729 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2(i32* %A, i32** %ptr, i64 %inc) {
730 ;CHECK-LABEL: test_v2i32_post_reg_ld2:
731 ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
732 %ld2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32* %A)
733 %tmp = getelementptr i32, i32* %A, i64 %inc
734 store i32* %tmp, i32** %ptr
735 ret { <2 x i32>, <2 x i32> } %ld2
738 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2.v2i32.p0i32(i32*)
741 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2(i64* %A, i64** %ptr) {
742 ;CHECK-LABEL: test_v2i64_post_imm_ld2:
743 ;CHECK: ld2.2d { v0, v1 }, [x0], #32
744 %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
745 %tmp = getelementptr i64, i64* %A, i32 4
746 store i64* %tmp, i64** %ptr
747 ret { <2 x i64>, <2 x i64> } %ld2
750 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
751 ;CHECK-LABEL: test_v2i64_post_reg_ld2:
752 ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
753 %ld2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64* %A)
754 %tmp = getelementptr i64, i64* %A, i64 %inc
755 store i64* %tmp, i64** %ptr
756 ret { <2 x i64>, <2 x i64> } %ld2
759 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0i64(i64*)
762 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2(i64* %A, i64** %ptr) {
763 ;CHECK-LABEL: test_v1i64_post_imm_ld2:
764 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
765 %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
766 %tmp = getelementptr i64, i64* %A, i32 2
767 store i64* %tmp, i64** %ptr
768 ret { <1 x i64>, <1 x i64> } %ld2
771 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2(i64* %A, i64** %ptr, i64 %inc) {
772 ;CHECK-LABEL: test_v1i64_post_reg_ld2:
773 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
774 %ld2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64* %A)
775 %tmp = getelementptr i64, i64* %A, i64 %inc
776 store i64* %tmp, i64** %ptr
777 ret { <1 x i64>, <1 x i64> } %ld2
780 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0i64(i64*)
783 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2(float* %A, float** %ptr) {
784 ;CHECK-LABEL: test_v4f32_post_imm_ld2:
785 ;CHECK: ld2.4s { v0, v1 }, [x0], #32
786 %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
787 %tmp = getelementptr float, float* %A, i32 8
788 store float* %tmp, float** %ptr
789 ret { <4 x float>, <4 x float> } %ld2
792 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
793 ;CHECK-LABEL: test_v4f32_post_reg_ld2:
794 ;CHECK: ld2.4s { v0, v1 }, [x0], x{{[0-9]+}}
795 %ld2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float* %A)
796 %tmp = getelementptr float, float* %A, i64 %inc
797 store float* %tmp, float** %ptr
798 ret { <4 x float>, <4 x float> } %ld2
801 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0f32(float*)
804 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2(float* %A, float** %ptr) {
805 ;CHECK-LABEL: test_v2f32_post_imm_ld2:
806 ;CHECK: ld2.2s { v0, v1 }, [x0], #16
807 %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
808 %tmp = getelementptr float, float* %A, i32 4
809 store float* %tmp, float** %ptr
810 ret { <2 x float>, <2 x float> } %ld2
813 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2(float* %A, float** %ptr, i64 %inc) {
814 ;CHECK-LABEL: test_v2f32_post_reg_ld2:
815 ;CHECK: ld2.2s { v0, v1 }, [x0], x{{[0-9]+}}
816 %ld2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float* %A)
817 %tmp = getelementptr float, float* %A, i64 %inc
818 store float* %tmp, float** %ptr
819 ret { <2 x float>, <2 x float> } %ld2
822 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2.v2f32.p0f32(float*)
825 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2(double* %A, double** %ptr) {
826 ;CHECK-LABEL: test_v2f64_post_imm_ld2:
827 ;CHECK: ld2.2d { v0, v1 }, [x0], #32
828 %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
829 %tmp = getelementptr double, double* %A, i32 4
830 store double* %tmp, double** %ptr
831 ret { <2 x double>, <2 x double> } %ld2
834 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
835 ;CHECK-LABEL: test_v2f64_post_reg_ld2:
836 ;CHECK: ld2.2d { v0, v1 }, [x0], x{{[0-9]+}}
837 %ld2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double* %A)
838 %tmp = getelementptr double, double* %A, i64 %inc
839 store double* %tmp, double** %ptr
840 ret { <2 x double>, <2 x double> } %ld2
843 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2.v2f64.p0f64(double*)
846 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2(double* %A, double** %ptr) {
847 ;CHECK-LABEL: test_v1f64_post_imm_ld2:
848 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
849 %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
850 %tmp = getelementptr double, double* %A, i32 2
851 store double* %tmp, double** %ptr
852 ret { <1 x double>, <1 x double> } %ld2
855 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2(double* %A, double** %ptr, i64 %inc) {
856 ;CHECK-LABEL: test_v1f64_post_reg_ld2:
857 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
858 %ld2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double* %A)
859 %tmp = getelementptr double, double* %A, i64 %inc
860 store double* %tmp, double** %ptr
861 ret { <1 x double>, <1 x double> } %ld2
864 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2.v1f64.p0f64(double*)
867 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3(i8* %A, i8** %ptr) {
868 ;CHECK-LABEL: test_v16i8_post_imm_ld3:
869 ;CHECK: ld3.16b { v0, v1, v2 }, [x0], #48
870 %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
871 %tmp = getelementptr i8, i8* %A, i32 48
872 store i8* %tmp, i8** %ptr
873 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
876 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
877 ;CHECK-LABEL: test_v16i8_post_reg_ld3:
878 ;CHECK: ld3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
879 %ld3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8* %A)
880 %tmp = getelementptr i8, i8* %A, i64 %inc
881 store i8* %tmp, i8** %ptr
882 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
885 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0i8(i8*)
888 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3(i8* %A, i8** %ptr) {
889 ;CHECK-LABEL: test_v8i8_post_imm_ld3:
890 ;CHECK: ld3.8b { v0, v1, v2 }, [x0], #24
891 %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
892 %tmp = getelementptr i8, i8* %A, i32 24
893 store i8* %tmp, i8** %ptr
894 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
897 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3(i8* %A, i8** %ptr, i64 %inc) {
898 ;CHECK-LABEL: test_v8i8_post_reg_ld3:
899 ;CHECK: ld3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
900 %ld3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8* %A)
901 %tmp = getelementptr i8, i8* %A, i64 %inc
902 store i8* %tmp, i8** %ptr
903 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
906 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0i8(i8*)
909 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3(i16* %A, i16** %ptr) {
910 ;CHECK-LABEL: test_v8i16_post_imm_ld3:
911 ;CHECK: ld3.8h { v0, v1, v2 }, [x0], #48
912 %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
913 %tmp = getelementptr i16, i16* %A, i32 24
914 store i16* %tmp, i16** %ptr
915 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
918 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
919 ;CHECK-LABEL: test_v8i16_post_reg_ld3:
920 ;CHECK: ld3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
921 %ld3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16* %A)
922 %tmp = getelementptr i16, i16* %A, i64 %inc
923 store i16* %tmp, i16** %ptr
924 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
927 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3.v8i16.p0i16(i16*)
930 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3(i16* %A, i16** %ptr) {
931 ;CHECK-LABEL: test_v4i16_post_imm_ld3:
932 ;CHECK: ld3.4h { v0, v1, v2 }, [x0], #24
933 %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
934 %tmp = getelementptr i16, i16* %A, i32 12
935 store i16* %tmp, i16** %ptr
936 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
939 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3(i16* %A, i16** %ptr, i64 %inc) {
940 ;CHECK-LABEL: test_v4i16_post_reg_ld3:
941 ;CHECK: ld3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
942 %ld3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16* %A)
943 %tmp = getelementptr i16, i16* %A, i64 %inc
944 store i16* %tmp, i16** %ptr
945 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
948 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3.v4i16.p0i16(i16*)
951 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3(i32* %A, i32** %ptr) {
952 ;CHECK-LABEL: test_v4i32_post_imm_ld3:
953 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
954 %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
955 %tmp = getelementptr i32, i32* %A, i32 12
956 store i32* %tmp, i32** %ptr
957 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
960 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
961 ;CHECK-LABEL: test_v4i32_post_reg_ld3:
962 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
963 %ld3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32* %A)
964 %tmp = getelementptr i32, i32* %A, i64 %inc
965 store i32* %tmp, i32** %ptr
966 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
969 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3.v4i32.p0i32(i32*)
972 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3(i32* %A, i32** %ptr) {
973 ;CHECK-LABEL: test_v2i32_post_imm_ld3:
974 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
975 %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
976 %tmp = getelementptr i32, i32* %A, i32 6
977 store i32* %tmp, i32** %ptr
978 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
981 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3(i32* %A, i32** %ptr, i64 %inc) {
982 ;CHECK-LABEL: test_v2i32_post_reg_ld3:
983 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
984 %ld3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32* %A)
985 %tmp = getelementptr i32, i32* %A, i64 %inc
986 store i32* %tmp, i32** %ptr
987 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
990 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3.v2i32.p0i32(i32*)
993 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3(i64* %A, i64** %ptr) {
994 ;CHECK-LABEL: test_v2i64_post_imm_ld3:
995 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
996 %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
997 %tmp = getelementptr i64, i64* %A, i32 6
998 store i64* %tmp, i64** %ptr
999 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
1002 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
1003 ;CHECK-LABEL: test_v2i64_post_reg_ld3:
1004 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1005 %ld3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64* %A)
1006 %tmp = getelementptr i64, i64* %A, i64 %inc
1007 store i64* %tmp, i64** %ptr
1008 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
1011 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0i64(i64*)
1014 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3(i64* %A, i64** %ptr) {
1015 ;CHECK-LABEL: test_v1i64_post_imm_ld3:
1016 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1017 %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
1018 %tmp = getelementptr i64, i64* %A, i32 3
1019 store i64* %tmp, i64** %ptr
1020 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
1023 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3(i64* %A, i64** %ptr, i64 %inc) {
1024 ;CHECK-LABEL: test_v1i64_post_reg_ld3:
1025 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1026 %ld3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64* %A)
1027 %tmp = getelementptr i64, i64* %A, i64 %inc
1028 store i64* %tmp, i64** %ptr
1029 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
1032 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0i64(i64*)
1035 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3(float* %A, float** %ptr) {
1036 ;CHECK-LABEL: test_v4f32_post_imm_ld3:
1037 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], #48
1038 %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
1039 %tmp = getelementptr float, float* %A, i32 12
1040 store float* %tmp, float** %ptr
1041 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
1044 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
1045 ;CHECK-LABEL: test_v4f32_post_reg_ld3:
1046 ;CHECK: ld3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1047 %ld3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float* %A)
1048 %tmp = getelementptr float, float* %A, i64 %inc
1049 store float* %tmp, float** %ptr
1050 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
1053 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3.v4f32.p0f32(float*)
1056 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3(float* %A, float** %ptr) {
1057 ;CHECK-LABEL: test_v2f32_post_imm_ld3:
1058 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], #24
1059 %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
1060 %tmp = getelementptr float, float* %A, i32 6
1061 store float* %tmp, float** %ptr
1062 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
1065 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3(float* %A, float** %ptr, i64 %inc) {
1066 ;CHECK-LABEL: test_v2f32_post_reg_ld3:
1067 ;CHECK: ld3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1068 %ld3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float* %A)
1069 %tmp = getelementptr float, float* %A, i64 %inc
1070 store float* %tmp, float** %ptr
1071 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
1074 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3.v2f32.p0f32(float*)
1077 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3(double* %A, double** %ptr) {
1078 ;CHECK-LABEL: test_v2f64_post_imm_ld3:
1079 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], #48
1080 %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
1081 %tmp = getelementptr double, double* %A, i32 6
1082 store double* %tmp, double** %ptr
1083 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
1086 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
1087 ;CHECK-LABEL: test_v2f64_post_reg_ld3:
1088 ;CHECK: ld3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1089 %ld3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double* %A)
1090 %tmp = getelementptr double, double* %A, i64 %inc
1091 store double* %tmp, double** %ptr
1092 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
1095 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3.v2f64.p0f64(double*)
1098 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3(double* %A, double** %ptr) {
1099 ;CHECK-LABEL: test_v1f64_post_imm_ld3:
1100 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1101 %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
1102 %tmp = getelementptr double, double* %A, i32 3
1103 store double* %tmp, double** %ptr
1104 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
1107 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3(double* %A, double** %ptr, i64 %inc) {
1108 ;CHECK-LABEL: test_v1f64_post_reg_ld3:
1109 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1110 %ld3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double* %A)
1111 %tmp = getelementptr double, double* %A, i64 %inc
1112 store double* %tmp, double** %ptr
1113 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
1116 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3.v1f64.p0f64(double*)
1119 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4(i8* %A, i8** %ptr) {
1120 ;CHECK-LABEL: test_v16i8_post_imm_ld4:
1121 ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], #64
1122 %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
1123 %tmp = getelementptr i8, i8* %A, i32 64
1124 store i8* %tmp, i8** %ptr
1125 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
1128 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
1129 ;CHECK-LABEL: test_v16i8_post_reg_ld4:
1130 ;CHECK: ld4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1131 %ld4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8* %A)
1132 %tmp = getelementptr i8, i8* %A, i64 %inc
1133 store i8* %tmp, i8** %ptr
1134 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
1137 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0i8(i8*)
1140 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4(i8* %A, i8** %ptr) {
1141 ;CHECK-LABEL: test_v8i8_post_imm_ld4:
1142 ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], #32
1143 %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
1144 %tmp = getelementptr i8, i8* %A, i32 32
1145 store i8* %tmp, i8** %ptr
1146 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
1149 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4(i8* %A, i8** %ptr, i64 %inc) {
1150 ;CHECK-LABEL: test_v8i8_post_reg_ld4:
1151 ;CHECK: ld4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1152 %ld4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8* %A)
1153 %tmp = getelementptr i8, i8* %A, i64 %inc
1154 store i8* %tmp, i8** %ptr
1155 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
1158 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4.v8i8.p0i8(i8*)
1161 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4(i16* %A, i16** %ptr) {
1162 ;CHECK-LABEL: test_v8i16_post_imm_ld4:
1163 ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], #64
1164 %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
1165 %tmp = getelementptr i16, i16* %A, i32 32
1166 store i16* %tmp, i16** %ptr
1167 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
1170 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
1171 ;CHECK-LABEL: test_v8i16_post_reg_ld4:
1172 ;CHECK: ld4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1173 %ld4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16* %A)
1174 %tmp = getelementptr i16, i16* %A, i64 %inc
1175 store i16* %tmp, i16** %ptr
1176 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
1179 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4.v8i16.p0i16(i16*)
1182 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4(i16* %A, i16** %ptr) {
1183 ;CHECK-LABEL: test_v4i16_post_imm_ld4:
1184 ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], #32
1185 %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
1186 %tmp = getelementptr i16, i16* %A, i32 16
1187 store i16* %tmp, i16** %ptr
1188 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
1191 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4(i16* %A, i16** %ptr, i64 %inc) {
1192 ;CHECK-LABEL: test_v4i16_post_reg_ld4:
1193 ;CHECK: ld4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1194 %ld4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16* %A)
1195 %tmp = getelementptr i16, i16* %A, i64 %inc
1196 store i16* %tmp, i16** %ptr
1197 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
1200 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4.v4i16.p0i16(i16*)
1203 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4(i32* %A, i32** %ptr) {
1204 ;CHECK-LABEL: test_v4i32_post_imm_ld4:
1205 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
1206 %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
1207 %tmp = getelementptr i32, i32* %A, i32 16
1208 store i32* %tmp, i32** %ptr
1209 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
1212 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
1213 ;CHECK-LABEL: test_v4i32_post_reg_ld4:
1214 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1215 %ld4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32* %A)
1216 %tmp = getelementptr i32, i32* %A, i64 %inc
1217 store i32* %tmp, i32** %ptr
1218 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
1221 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4.v4i32.p0i32(i32*)
1224 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4(i32* %A, i32** %ptr) {
1225 ;CHECK-LABEL: test_v2i32_post_imm_ld4:
1226 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
1227 %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
1228 %tmp = getelementptr i32, i32* %A, i32 8
1229 store i32* %tmp, i32** %ptr
1230 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
1233 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4(i32* %A, i32** %ptr, i64 %inc) {
1234 ;CHECK-LABEL: test_v2i32_post_reg_ld4:
1235 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1236 %ld4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32* %A)
1237 %tmp = getelementptr i32, i32* %A, i64 %inc
1238 store i32* %tmp, i32** %ptr
1239 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
1242 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4.v2i32.p0i32(i32*)
1245 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4(i64* %A, i64** %ptr) {
1246 ;CHECK-LABEL: test_v2i64_post_imm_ld4:
1247 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
1248 %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
1249 %tmp = getelementptr i64, i64* %A, i32 8
1250 store i64* %tmp, i64** %ptr
1251 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
1254 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
1255 ;CHECK-LABEL: test_v2i64_post_reg_ld4:
1256 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1257 %ld4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64* %A)
1258 %tmp = getelementptr i64, i64* %A, i64 %inc
1259 store i64* %tmp, i64** %ptr
1260 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
1263 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0i64(i64*)
1266 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4(i64* %A, i64** %ptr) {
1267 ;CHECK-LABEL: test_v1i64_post_imm_ld4:
1268 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
1269 %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
1270 %tmp = getelementptr i64, i64* %A, i32 4
1271 store i64* %tmp, i64** %ptr
1272 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
1275 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4(i64* %A, i64** %ptr, i64 %inc) {
1276 ;CHECK-LABEL: test_v1i64_post_reg_ld4:
1277 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1278 %ld4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64* %A)
1279 %tmp = getelementptr i64, i64* %A, i64 %inc
1280 store i64* %tmp, i64** %ptr
1281 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
1284 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0i64(i64*)
1287 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4(float* %A, float** %ptr) {
1288 ;CHECK-LABEL: test_v4f32_post_imm_ld4:
1289 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], #64
1290 %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
1291 %tmp = getelementptr float, float* %A, i32 16
1292 store float* %tmp, float** %ptr
1293 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
1296 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
1297 ;CHECK-LABEL: test_v4f32_post_reg_ld4:
1298 ;CHECK: ld4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1299 %ld4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float* %A)
1300 %tmp = getelementptr float, float* %A, i64 %inc
1301 store float* %tmp, float** %ptr
1302 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
1305 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4.v4f32.p0f32(float*)
1308 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4(float* %A, float** %ptr) {
1309 ;CHECK-LABEL: test_v2f32_post_imm_ld4:
1310 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], #32
1311 %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
1312 %tmp = getelementptr float, float* %A, i32 8
1313 store float* %tmp, float** %ptr
1314 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
1317 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4(float* %A, float** %ptr, i64 %inc) {
1318 ;CHECK-LABEL: test_v2f32_post_reg_ld4:
1319 ;CHECK: ld4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1320 %ld4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float* %A)
1321 %tmp = getelementptr float, float* %A, i64 %inc
1322 store float* %tmp, float** %ptr
1323 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
1326 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4.v2f32.p0f32(float*)
1329 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4(double* %A, double** %ptr) {
1330 ;CHECK-LABEL: test_v2f64_post_imm_ld4:
1331 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], #64
1332 %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
1333 %tmp = getelementptr double, double* %A, i32 8
1334 store double* %tmp, double** %ptr
1335 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
1338 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
1339 ;CHECK-LABEL: test_v2f64_post_reg_ld4:
1340 ;CHECK: ld4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1341 %ld4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double* %A)
1342 %tmp = getelementptr double, double* %A, i64 %inc
1343 store double* %tmp, double** %ptr
1344 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
1347 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4.v2f64.p0f64(double*)
1350 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4(double* %A, double** %ptr) {
1351 ;CHECK-LABEL: test_v1f64_post_imm_ld4:
1352 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
1353 %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
1354 %tmp = getelementptr double, double* %A, i32 4
1355 store double* %tmp, double** %ptr
1356 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
1359 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4(double* %A, double** %ptr, i64 %inc) {
1360 ;CHECK-LABEL: test_v1f64_post_reg_ld4:
1361 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1362 %ld4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double* %A)
1363 %tmp = getelementptr double, double* %A, i64 %inc
1364 store double* %tmp, double** %ptr
1365 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
1368 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4.v1f64.p0f64(double*)
1370 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
1371 ;CHECK-LABEL: test_v16i8_post_imm_ld1x2:
1372 ;CHECK: ld1.16b { v0, v1 }, [x0], #32
1373 %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
1374 %tmp = getelementptr i8, i8* %A, i32 32
1375 store i8* %tmp, i8** %ptr
1376 ret { <16 x i8>, <16 x i8> } %ld1x2
1379 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
1380 ;CHECK-LABEL: test_v16i8_post_reg_ld1x2:
1381 ;CHECK: ld1.16b { v0, v1 }, [x0], x{{[0-9]+}}
1382 %ld1x2 = tail call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8* %A)
1383 %tmp = getelementptr i8, i8* %A, i64 %inc
1384 store i8* %tmp, i8** %ptr
1385 ret { <16 x i8>, <16 x i8> } %ld1x2
1388 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x2.v16i8.p0i8(i8*)
1391 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x2(i8* %A, i8** %ptr) {
1392 ;CHECK-LABEL: test_v8i8_post_imm_ld1x2:
1393 ;CHECK: ld1.8b { v0, v1 }, [x0], #16
1394 %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
1395 %tmp = getelementptr i8, i8* %A, i32 16
1396 store i8* %tmp, i8** %ptr
1397 ret { <8 x i8>, <8 x i8> } %ld1x2
1400 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x2(i8* %A, i8** %ptr, i64 %inc) {
1401 ;CHECK-LABEL: test_v8i8_post_reg_ld1x2:
1402 ;CHECK: ld1.8b { v0, v1 }, [x0], x{{[0-9]+}}
1403 %ld1x2 = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8* %A)
1404 %tmp = getelementptr i8, i8* %A, i64 %inc
1405 store i8* %tmp, i8** %ptr
1406 ret { <8 x i8>, <8 x i8> } %ld1x2
1409 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x2.v8i8.p0i8(i8*)
1412 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
1413 ;CHECK-LABEL: test_v8i16_post_imm_ld1x2:
1414 ;CHECK: ld1.8h { v0, v1 }, [x0], #32
1415 %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
1416 %tmp = getelementptr i16, i16* %A, i32 16
1417 store i16* %tmp, i16** %ptr
1418 ret { <8 x i16>, <8 x i16> } %ld1x2
1421 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
1422 ;CHECK-LABEL: test_v8i16_post_reg_ld1x2:
1423 ;CHECK: ld1.8h { v0, v1 }, [x0], x{{[0-9]+}}
1424 %ld1x2 = tail call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16* %A)
1425 %tmp = getelementptr i16, i16* %A, i64 %inc
1426 store i16* %tmp, i16** %ptr
1427 ret { <8 x i16>, <8 x i16> } %ld1x2
1430 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x2.v8i16.p0i16(i16*)
1433 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x2(i16* %A, i16** %ptr) {
1434 ;CHECK-LABEL: test_v4i16_post_imm_ld1x2:
1435 ;CHECK: ld1.4h { v0, v1 }, [x0], #16
1436 %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
1437 %tmp = getelementptr i16, i16* %A, i32 8
1438 store i16* %tmp, i16** %ptr
1439 ret { <4 x i16>, <4 x i16> } %ld1x2
1442 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x2(i16* %A, i16** %ptr, i64 %inc) {
1443 ;CHECK-LABEL: test_v4i16_post_reg_ld1x2:
1444 ;CHECK: ld1.4h { v0, v1 }, [x0], x{{[0-9]+}}
1445 %ld1x2 = tail call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16* %A)
1446 %tmp = getelementptr i16, i16* %A, i64 %inc
1447 store i16* %tmp, i16** %ptr
1448 ret { <4 x i16>, <4 x i16> } %ld1x2
1451 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x2.v4i16.p0i16(i16*)
1454 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
1455 ;CHECK-LABEL: test_v4i32_post_imm_ld1x2:
1456 ;CHECK: ld1.4s { v0, v1 }, [x0], #32
1457 %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
1458 %tmp = getelementptr i32, i32* %A, i32 8
1459 store i32* %tmp, i32** %ptr
1460 ret { <4 x i32>, <4 x i32> } %ld1x2
1463 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
1464 ;CHECK-LABEL: test_v4i32_post_reg_ld1x2:
1465 ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
1466 %ld1x2 = tail call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32* %A)
1467 %tmp = getelementptr i32, i32* %A, i64 %inc
1468 store i32* %tmp, i32** %ptr
1469 ret { <4 x i32>, <4 x i32> } %ld1x2
1472 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x2.v4i32.p0i32(i32*)
1475 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x2(i32* %A, i32** %ptr) {
1476 ;CHECK-LABEL: test_v2i32_post_imm_ld1x2:
1477 ;CHECK: ld1.2s { v0, v1 }, [x0], #16
1478 %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
1479 %tmp = getelementptr i32, i32* %A, i32 4
1480 store i32* %tmp, i32** %ptr
1481 ret { <2 x i32>, <2 x i32> } %ld1x2
1484 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x2(i32* %A, i32** %ptr, i64 %inc) {
1485 ;CHECK-LABEL: test_v2i32_post_reg_ld1x2:
1486 ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
1487 %ld1x2 = tail call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32* %A)
1488 %tmp = getelementptr i32, i32* %A, i64 %inc
1489 store i32* %tmp, i32** %ptr
1490 ret { <2 x i32>, <2 x i32> } %ld1x2
1493 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x2.v2i32.p0i32(i32*)
1496 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
1497 ;CHECK-LABEL: test_v2i64_post_imm_ld1x2:
1498 ;CHECK: ld1.2d { v0, v1 }, [x0], #32
1499 %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
1500 %tmp = getelementptr i64, i64* %A, i32 4
1501 store i64* %tmp, i64** %ptr
1502 ret { <2 x i64>, <2 x i64> } %ld1x2
1505 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
1506 ;CHECK-LABEL: test_v2i64_post_reg_ld1x2:
1507 ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
1508 %ld1x2 = tail call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64* %A)
1509 %tmp = getelementptr i64, i64* %A, i64 %inc
1510 store i64* %tmp, i64** %ptr
1511 ret { <2 x i64>, <2 x i64> } %ld1x2
1514 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x2.v2i64.p0i64(i64*)
1517 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x2(i64* %A, i64** %ptr) {
1518 ;CHECK-LABEL: test_v1i64_post_imm_ld1x2:
1519 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
1520 %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
1521 %tmp = getelementptr i64, i64* %A, i32 2
1522 store i64* %tmp, i64** %ptr
1523 ret { <1 x i64>, <1 x i64> } %ld1x2
1526 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x2(i64* %A, i64** %ptr, i64 %inc) {
1527 ;CHECK-LABEL: test_v1i64_post_reg_ld1x2:
1528 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
1529 %ld1x2 = tail call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64* %A)
1530 %tmp = getelementptr i64, i64* %A, i64 %inc
1531 store i64* %tmp, i64** %ptr
1532 ret { <1 x i64>, <1 x i64> } %ld1x2
1535 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x2.v1i64.p0i64(i64*)
1538 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x2(float* %A, float** %ptr) {
1539 ;CHECK-LABEL: test_v4f32_post_imm_ld1x2:
1540 ;CHECK: ld1.4s { v0, v1 }, [x0], #32
1541 %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
1542 %tmp = getelementptr float, float* %A, i32 8
1543 store float* %tmp, float** %ptr
1544 ret { <4 x float>, <4 x float> } %ld1x2
1547 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
1548 ;CHECK-LABEL: test_v4f32_post_reg_ld1x2:
1549 ;CHECK: ld1.4s { v0, v1 }, [x0], x{{[0-9]+}}
1550 %ld1x2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float* %A)
1551 %tmp = getelementptr float, float* %A, i64 %inc
1552 store float* %tmp, float** %ptr
1553 ret { <4 x float>, <4 x float> } %ld1x2
1556 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x2.v4f32.p0f32(float*)
1559 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x2(float* %A, float** %ptr) {
1560 ;CHECK-LABEL: test_v2f32_post_imm_ld1x2:
1561 ;CHECK: ld1.2s { v0, v1 }, [x0], #16
1562 %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
1563 %tmp = getelementptr float, float* %A, i32 4
1564 store float* %tmp, float** %ptr
1565 ret { <2 x float>, <2 x float> } %ld1x2
1568 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x2(float* %A, float** %ptr, i64 %inc) {
1569 ;CHECK-LABEL: test_v2f32_post_reg_ld1x2:
1570 ;CHECK: ld1.2s { v0, v1 }, [x0], x{{[0-9]+}}
1571 %ld1x2 = tail call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float* %A)
1572 %tmp = getelementptr float, float* %A, i64 %inc
1573 store float* %tmp, float** %ptr
1574 ret { <2 x float>, <2 x float> } %ld1x2
1577 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x2.v2f32.p0f32(float*)
1580 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x2(double* %A, double** %ptr) {
1581 ;CHECK-LABEL: test_v2f64_post_imm_ld1x2:
1582 ;CHECK: ld1.2d { v0, v1 }, [x0], #32
1583 %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
1584 %tmp = getelementptr double, double* %A, i32 4
1585 store double* %tmp, double** %ptr
1586 ret { <2 x double>, <2 x double> } %ld1x2
1589 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
1590 ;CHECK-LABEL: test_v2f64_post_reg_ld1x2:
1591 ;CHECK: ld1.2d { v0, v1 }, [x0], x{{[0-9]+}}
1592 %ld1x2 = tail call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double* %A)
1593 %tmp = getelementptr double, double* %A, i64 %inc
1594 store double* %tmp, double** %ptr
1595 ret { <2 x double>, <2 x double> } %ld1x2
1598 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x2.v2f64.p0f64(double*)
1601 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x2(double* %A, double** %ptr) {
1602 ;CHECK-LABEL: test_v1f64_post_imm_ld1x2:
1603 ;CHECK: ld1.1d { v0, v1 }, [x0], #16
1604 %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
1605 %tmp = getelementptr double, double* %A, i32 2
1606 store double* %tmp, double** %ptr
1607 ret { <1 x double>, <1 x double> } %ld1x2
1610 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x2(double* %A, double** %ptr, i64 %inc) {
1611 ;CHECK-LABEL: test_v1f64_post_reg_ld1x2:
1612 ;CHECK: ld1.1d { v0, v1 }, [x0], x{{[0-9]+}}
1613 %ld1x2 = tail call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double* %A)
1614 %tmp = getelementptr double, double* %A, i64 %inc
1615 store double* %tmp, double** %ptr
1616 ret { <1 x double>, <1 x double> } %ld1x2
1619 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x2.v1f64.p0f64(double*)
1622 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
1623 ;CHECK-LABEL: test_v16i8_post_imm_ld1x3:
1624 ;CHECK: ld1.16b { v0, v1, v2 }, [x0], #48
1625 %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
1626 %tmp = getelementptr i8, i8* %A, i32 48
1627 store i8* %tmp, i8** %ptr
1628 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
1631 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
1632 ;CHECK-LABEL: test_v16i8_post_reg_ld1x3:
1633 ;CHECK: ld1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
1634 %ld1x3 = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8* %A)
1635 %tmp = getelementptr i8, i8* %A, i64 %inc
1636 store i8* %tmp, i8** %ptr
1637 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld1x3
1640 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x3.v16i8.p0i8(i8*)
1643 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x3(i8* %A, i8** %ptr) {
1644 ;CHECK-LABEL: test_v8i8_post_imm_ld1x3:
1645 ;CHECK: ld1.8b { v0, v1, v2 }, [x0], #24
1646 %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
1647 %tmp = getelementptr i8, i8* %A, i32 24
1648 store i8* %tmp, i8** %ptr
1649 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
1652 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x3(i8* %A, i8** %ptr, i64 %inc) {
1653 ;CHECK-LABEL: test_v8i8_post_reg_ld1x3:
1654 ;CHECK: ld1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
1655 %ld1x3 = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8* %A)
1656 %tmp = getelementptr i8, i8* %A, i64 %inc
1657 store i8* %tmp, i8** %ptr
1658 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld1x3
1661 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x3.v8i8.p0i8(i8*)
1664 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
1665 ;CHECK-LABEL: test_v8i16_post_imm_ld1x3:
1666 ;CHECK: ld1.8h { v0, v1, v2 }, [x0], #48
1667 %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
1668 %tmp = getelementptr i16, i16* %A, i32 24
1669 store i16* %tmp, i16** %ptr
1670 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
1673 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
1674 ;CHECK-LABEL: test_v8i16_post_reg_ld1x3:
1675 ;CHECK: ld1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
1676 %ld1x3 = tail call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16* %A)
1677 %tmp = getelementptr i16, i16* %A, i64 %inc
1678 store i16* %tmp, i16** %ptr
1679 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld1x3
1682 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x3.v8i16.p0i16(i16*)
1685 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x3(i16* %A, i16** %ptr) {
1686 ;CHECK-LABEL: test_v4i16_post_imm_ld1x3:
1687 ;CHECK: ld1.4h { v0, v1, v2 }, [x0], #24
1688 %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
1689 %tmp = getelementptr i16, i16* %A, i32 12
1690 store i16* %tmp, i16** %ptr
1691 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
1694 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x3(i16* %A, i16** %ptr, i64 %inc) {
1695 ;CHECK-LABEL: test_v4i16_post_reg_ld1x3:
1696 ;CHECK: ld1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
1697 %ld1x3 = tail call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16* %A)
1698 %tmp = getelementptr i16, i16* %A, i64 %inc
1699 store i16* %tmp, i16** %ptr
1700 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld1x3
1703 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x3.v4i16.p0i16(i16*)
1706 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
1707 ;CHECK-LABEL: test_v4i32_post_imm_ld1x3:
1708 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
1709 %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
1710 %tmp = getelementptr i32, i32* %A, i32 12
1711 store i32* %tmp, i32** %ptr
1712 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
1715 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
1716 ;CHECK-LABEL: test_v4i32_post_reg_ld1x3:
1717 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1718 %ld1x3 = tail call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32* %A)
1719 %tmp = getelementptr i32, i32* %A, i64 %inc
1720 store i32* %tmp, i32** %ptr
1721 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld1x3
1724 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x3.v4i32.p0i32(i32*)
1727 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x3(i32* %A, i32** %ptr) {
1728 ;CHECK-LABEL: test_v2i32_post_imm_ld1x3:
1729 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
1730 %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
1731 %tmp = getelementptr i32, i32* %A, i32 6
1732 store i32* %tmp, i32** %ptr
1733 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
1736 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x3(i32* %A, i32** %ptr, i64 %inc) {
1737 ;CHECK-LABEL: test_v2i32_post_reg_ld1x3:
1738 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1739 %ld1x3 = tail call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32* %A)
1740 %tmp = getelementptr i32, i32* %A, i64 %inc
1741 store i32* %tmp, i32** %ptr
1742 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld1x3
1745 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x3.v2i32.p0i32(i32*)
1748 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
1749 ;CHECK-LABEL: test_v2i64_post_imm_ld1x3:
1750 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
1751 %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
1752 %tmp = getelementptr i64, i64* %A, i32 6
1753 store i64* %tmp, i64** %ptr
1754 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
1757 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
1758 ;CHECK-LABEL: test_v2i64_post_reg_ld1x3:
1759 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1760 %ld1x3 = tail call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64* %A)
1761 %tmp = getelementptr i64, i64* %A, i64 %inc
1762 store i64* %tmp, i64** %ptr
1763 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld1x3
1766 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x3.v2i64.p0i64(i64*)
1769 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x3(i64* %A, i64** %ptr) {
1770 ;CHECK-LABEL: test_v1i64_post_imm_ld1x3:
1771 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1772 %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
1773 %tmp = getelementptr i64, i64* %A, i32 3
1774 store i64* %tmp, i64** %ptr
1775 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
1778 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x3(i64* %A, i64** %ptr, i64 %inc) {
1779 ;CHECK-LABEL: test_v1i64_post_reg_ld1x3:
1780 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1781 %ld1x3 = tail call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64* %A)
1782 %tmp = getelementptr i64, i64* %A, i64 %inc
1783 store i64* %tmp, i64** %ptr
1784 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld1x3
1787 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x3.v1i64.p0i64(i64*)
1790 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x3(float* %A, float** %ptr) {
1791 ;CHECK-LABEL: test_v4f32_post_imm_ld1x3:
1792 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], #48
1793 %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
1794 %tmp = getelementptr float, float* %A, i32 12
1795 store float* %tmp, float** %ptr
1796 ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
1799 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
1800 ;CHECK-LABEL: test_v4f32_post_reg_ld1x3:
1801 ;CHECK: ld1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1802 %ld1x3 = tail call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float* %A)
1803 %tmp = getelementptr float, float* %A, i64 %inc
1804 store float* %tmp, float** %ptr
1805 ret { <4 x float>, <4 x float>, <4 x float> } %ld1x3
1808 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x3.v4f32.p0f32(float*)
1811 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x3(float* %A, float** %ptr) {
1812 ;CHECK-LABEL: test_v2f32_post_imm_ld1x3:
1813 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], #24
1814 %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
1815 %tmp = getelementptr float, float* %A, i32 6
1816 store float* %tmp, float** %ptr
1817 ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
1820 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x3(float* %A, float** %ptr, i64 %inc) {
1821 ;CHECK-LABEL: test_v2f32_post_reg_ld1x3:
1822 ;CHECK: ld1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
1823 %ld1x3 = tail call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float* %A)
1824 %tmp = getelementptr float, float* %A, i64 %inc
1825 store float* %tmp, float** %ptr
1826 ret { <2 x float>, <2 x float>, <2 x float> } %ld1x3
1829 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x3.v2f32.p0f32(float*)
1832 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x3(double* %A, double** %ptr) {
1833 ;CHECK-LABEL: test_v2f64_post_imm_ld1x3:
1834 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], #48
1835 %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
1836 %tmp = getelementptr double, double* %A, i32 6
1837 store double* %tmp, double** %ptr
1838 ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
1841 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
1842 ;CHECK-LABEL: test_v2f64_post_reg_ld1x3:
1843 ;CHECK: ld1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1844 %ld1x3 = tail call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double* %A)
1845 %tmp = getelementptr double, double* %A, i64 %inc
1846 store double* %tmp, double** %ptr
1847 ret { <2 x double>, <2 x double>, <2 x double> } %ld1x3
1850 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x3.v2f64.p0f64(double*)
1853 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x3(double* %A, double** %ptr) {
1854 ;CHECK-LABEL: test_v1f64_post_imm_ld1x3:
1855 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], #24
1856 %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
1857 %tmp = getelementptr double, double* %A, i32 3
1858 store double* %tmp, double** %ptr
1859 ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
1862 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x3(double* %A, double** %ptr, i64 %inc) {
1863 ;CHECK-LABEL: test_v1f64_post_reg_ld1x3:
1864 ;CHECK: ld1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
1865 %ld1x3 = tail call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double* %A)
1866 %tmp = getelementptr double, double* %A, i64 %inc
1867 store double* %tmp, double** %ptr
1868 ret { <1 x double>, <1 x double>, <1 x double> } %ld1x3
1871 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x3.v1f64.p0f64(double*)
1874 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
1875 ;CHECK-LABEL: test_v16i8_post_imm_ld1x4:
1876 ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], #64
1877 %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
1878 %tmp = getelementptr i8, i8* %A, i32 64
1879 store i8* %tmp, i8** %ptr
1880 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
1883 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
1884 ;CHECK-LABEL: test_v16i8_post_reg_ld1x4:
1885 ;CHECK: ld1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1886 %ld1x4 = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8* %A)
1887 %tmp = getelementptr i8, i8* %A, i64 %inc
1888 store i8* %tmp, i8** %ptr
1889 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld1x4
1892 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld1x4.v16i8.p0i8(i8*)
1895 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld1x4(i8* %A, i8** %ptr) {
1896 ;CHECK-LABEL: test_v8i8_post_imm_ld1x4:
1897 ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], #32
1898 %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
1899 %tmp = getelementptr i8, i8* %A, i32 32
1900 store i8* %tmp, i8** %ptr
1901 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
1904 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld1x4(i8* %A, i8** %ptr, i64 %inc) {
1905 ;CHECK-LABEL: test_v8i8_post_reg_ld1x4:
1906 ;CHECK: ld1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1907 %ld1x4 = tail call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8* %A)
1908 %tmp = getelementptr i8, i8* %A, i64 %inc
1909 store i8* %tmp, i8** %ptr
1910 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld1x4
1913 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld1x4.v8i8.p0i8(i8*)
1916 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
1917 ;CHECK-LABEL: test_v8i16_post_imm_ld1x4:
1918 ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], #64
1919 %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
1920 %tmp = getelementptr i16, i16* %A, i32 32
1921 store i16* %tmp, i16** %ptr
1922 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
1925 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
1926 ;CHECK-LABEL: test_v8i16_post_reg_ld1x4:
1927 ;CHECK: ld1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1928 %ld1x4 = tail call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16* %A)
1929 %tmp = getelementptr i16, i16* %A, i64 %inc
1930 store i16* %tmp, i16** %ptr
1931 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld1x4
1934 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld1x4.v8i16.p0i16(i16*)
1937 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld1x4(i16* %A, i16** %ptr) {
1938 ;CHECK-LABEL: test_v4i16_post_imm_ld1x4:
1939 ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], #32
1940 %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
1941 %tmp = getelementptr i16, i16* %A, i32 16
1942 store i16* %tmp, i16** %ptr
1943 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
1946 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld1x4(i16* %A, i16** %ptr, i64 %inc) {
1947 ;CHECK-LABEL: test_v4i16_post_reg_ld1x4:
1948 ;CHECK: ld1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1949 %ld1x4 = tail call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16* %A)
1950 %tmp = getelementptr i16, i16* %A, i64 %inc
1951 store i16* %tmp, i16** %ptr
1952 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld1x4
1955 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld1x4.v4i16.p0i16(i16*)
1958 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
1959 ;CHECK-LABEL: test_v4i32_post_imm_ld1x4:
1960 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
1961 %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
1962 %tmp = getelementptr i32, i32* %A, i32 16
1963 store i32* %tmp, i32** %ptr
1964 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
1967 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
1968 ;CHECK-LABEL: test_v4i32_post_reg_ld1x4:
1969 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1970 %ld1x4 = tail call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32* %A)
1971 %tmp = getelementptr i32, i32* %A, i64 %inc
1972 store i32* %tmp, i32** %ptr
1973 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld1x4
1976 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld1x4.v4i32.p0i32(i32*)
1979 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld1x4(i32* %A, i32** %ptr) {
1980 ;CHECK-LABEL: test_v2i32_post_imm_ld1x4:
1981 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
1982 %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
1983 %tmp = getelementptr i32, i32* %A, i32 8
1984 store i32* %tmp, i32** %ptr
1985 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
1988 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld1x4(i32* %A, i32** %ptr, i64 %inc) {
1989 ;CHECK-LABEL: test_v2i32_post_reg_ld1x4:
1990 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
1991 %ld1x4 = tail call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32* %A)
1992 %tmp = getelementptr i32, i32* %A, i64 %inc
1993 store i32* %tmp, i32** %ptr
1994 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld1x4
1997 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld1x4.v2i32.p0i32(i32*)
2000 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
2001 ;CHECK-LABEL: test_v2i64_post_imm_ld1x4:
2002 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
2003 %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
2004 %tmp = getelementptr i64, i64* %A, i32 8
2005 store i64* %tmp, i64** %ptr
2006 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
2009 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
2010 ;CHECK-LABEL: test_v2i64_post_reg_ld1x4:
2011 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2012 %ld1x4 = tail call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64* %A)
2013 %tmp = getelementptr i64, i64* %A, i64 %inc
2014 store i64* %tmp, i64** %ptr
2015 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld1x4
2018 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld1x4.v2i64.p0i64(i64*)
2021 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld1x4(i64* %A, i64** %ptr) {
2022 ;CHECK-LABEL: test_v1i64_post_imm_ld1x4:
2023 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
2024 %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
2025 %tmp = getelementptr i64, i64* %A, i32 4
2026 store i64* %tmp, i64** %ptr
2027 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
2030 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld1x4(i64* %A, i64** %ptr, i64 %inc) {
2031 ;CHECK-LABEL: test_v1i64_post_reg_ld1x4:
2032 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2033 %ld1x4 = tail call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64* %A)
2034 %tmp = getelementptr i64, i64* %A, i64 %inc
2035 store i64* %tmp, i64** %ptr
2036 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld1x4
2039 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld1x4.v1i64.p0i64(i64*)
2042 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld1x4(float* %A, float** %ptr) {
2043 ;CHECK-LABEL: test_v4f32_post_imm_ld1x4:
2044 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], #64
2045 %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
2046 %tmp = getelementptr float, float* %A, i32 16
2047 store float* %tmp, float** %ptr
2048 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
2051 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
2052 ;CHECK-LABEL: test_v4f32_post_reg_ld1x4:
2053 ;CHECK: ld1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2054 %ld1x4 = tail call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float* %A)
2055 %tmp = getelementptr float, float* %A, i64 %inc
2056 store float* %tmp, float** %ptr
2057 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld1x4
2060 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld1x4.v4f32.p0f32(float*)
2063 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld1x4(float* %A, float** %ptr) {
2064 ;CHECK-LABEL: test_v2f32_post_imm_ld1x4:
2065 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], #32
2066 %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
2067 %tmp = getelementptr float, float* %A, i32 8
2068 store float* %tmp, float** %ptr
2069 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
2072 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld1x4(float* %A, float** %ptr, i64 %inc) {
2073 ;CHECK-LABEL: test_v2f32_post_reg_ld1x4:
2074 ;CHECK: ld1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2075 %ld1x4 = tail call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float* %A)
2076 %tmp = getelementptr float, float* %A, i64 %inc
2077 store float* %tmp, float** %ptr
2078 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld1x4
2081 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld1x4.v2f32.p0f32(float*)
2084 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld1x4(double* %A, double** %ptr) {
2085 ;CHECK-LABEL: test_v2f64_post_imm_ld1x4:
2086 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], #64
2087 %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
2088 %tmp = getelementptr double, double* %A, i32 8
2089 store double* %tmp, double** %ptr
2090 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
2093 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
2094 ;CHECK-LABEL: test_v2f64_post_reg_ld1x4:
2095 ;CHECK: ld1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2096 %ld1x4 = tail call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double* %A)
2097 %tmp = getelementptr double, double* %A, i64 %inc
2098 store double* %tmp, double** %ptr
2099 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld1x4
2102 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld1x4.v2f64.p0f64(double*)
2105 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld1x4(double* %A, double** %ptr) {
2106 ;CHECK-LABEL: test_v1f64_post_imm_ld1x4:
2107 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], #32
2108 %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
2109 %tmp = getelementptr double, double* %A, i32 4
2110 store double* %tmp, double** %ptr
2111 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
2114 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld1x4(double* %A, double** %ptr, i64 %inc) {
2115 ;CHECK-LABEL: test_v1f64_post_reg_ld1x4:
2116 ;CHECK: ld1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2117 %ld1x4 = tail call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double* %A)
2118 %tmp = getelementptr double, double* %A, i64 %inc
2119 store double* %tmp, double** %ptr
2120 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld1x4
2123 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld1x4.v1f64.p0f64(double*)
2126 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
2127 ;CHECK-LABEL: test_v16i8_post_imm_ld2r:
2128 ;CHECK: ld2r.16b { v0, v1 }, [x0], #2
2129 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
2130 %tmp = getelementptr i8, i8* %A, i32 2
2131 store i8* %tmp, i8** %ptr
2132 ret { <16 x i8>, <16 x i8> } %ld2
2135 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2136 ;CHECK-LABEL: test_v16i8_post_reg_ld2r:
2137 ;CHECK: ld2r.16b { v0, v1 }, [x0], x{{[0-9]+}}
2138 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8* %A)
2139 %tmp = getelementptr i8, i8* %A, i64 %inc
2140 store i8* %tmp, i8** %ptr
2141 ret { <16 x i8>, <16 x i8> } %ld2
2144 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2r.v16i8.p0i8(i8*) nounwind readonly
2147 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2r(i8* %A, i8** %ptr) nounwind {
2148 ;CHECK-LABEL: test_v8i8_post_imm_ld2r:
2149 ;CHECK: ld2r.8b { v0, v1 }, [x0], #2
2150 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
2151 %tmp = getelementptr i8, i8* %A, i32 2
2152 store i8* %tmp, i8** %ptr
2153 ret { <8 x i8>, <8 x i8> } %ld2
2156 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2157 ;CHECK-LABEL: test_v8i8_post_reg_ld2r:
2158 ;CHECK: ld2r.8b { v0, v1 }, [x0], x{{[0-9]+}}
2159 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8* %A)
2160 %tmp = getelementptr i8, i8* %A, i64 %inc
2161 store i8* %tmp, i8** %ptr
2162 ret { <8 x i8>, <8 x i8> } %ld2
2165 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2r.v8i8.p0i8(i8*) nounwind readonly
2168 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
2169 ;CHECK-LABEL: test_v8i16_post_imm_ld2r:
2170 ;CHECK: ld2r.8h { v0, v1 }, [x0], #4
2171 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
2172 %tmp = getelementptr i16, i16* %A, i32 2
2173 store i16* %tmp, i16** %ptr
2174 ret { <8 x i16>, <8 x i16> } %ld2
2177 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2178 ;CHECK-LABEL: test_v8i16_post_reg_ld2r:
2179 ;CHECK: ld2r.8h { v0, v1 }, [x0], x{{[0-9]+}}
2180 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16* %A)
2181 %tmp = getelementptr i16, i16* %A, i64 %inc
2182 store i16* %tmp, i16** %ptr
2183 ret { <8 x i16>, <8 x i16> } %ld2
2186 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2r.v8i16.p0i16(i16*) nounwind readonly
2189 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2r(i16* %A, i16** %ptr) nounwind {
2190 ;CHECK-LABEL: test_v4i16_post_imm_ld2r:
2191 ;CHECK: ld2r.4h { v0, v1 }, [x0], #4
2192 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
2193 %tmp = getelementptr i16, i16* %A, i32 2
2194 store i16* %tmp, i16** %ptr
2195 ret { <4 x i16>, <4 x i16> } %ld2
2198 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2199 ;CHECK-LABEL: test_v4i16_post_reg_ld2r:
2200 ;CHECK: ld2r.4h { v0, v1 }, [x0], x{{[0-9]+}}
2201 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16* %A)
2202 %tmp = getelementptr i16, i16* %A, i64 %inc
2203 store i16* %tmp, i16** %ptr
2204 ret { <4 x i16>, <4 x i16> } %ld2
2207 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2r.v4i16.p0i16(i16*) nounwind readonly
2210 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
2211 ;CHECK-LABEL: test_v4i32_post_imm_ld2r:
2212 ;CHECK: ld2r.4s { v0, v1 }, [x0], #8
2213 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
2214 %tmp = getelementptr i32, i32* %A, i32 2
2215 store i32* %tmp, i32** %ptr
2216 ret { <4 x i32>, <4 x i32> } %ld2
2219 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2220 ;CHECK-LABEL: test_v4i32_post_reg_ld2r:
2221 ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
2222 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32* %A)
2223 %tmp = getelementptr i32, i32* %A, i64 %inc
2224 store i32* %tmp, i32** %ptr
2225 ret { <4 x i32>, <4 x i32> } %ld2
2228 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2r.v4i32.p0i32(i32*) nounwind readonly
2230 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2r(i32* %A, i32** %ptr) nounwind {
2231 ;CHECK-LABEL: test_v2i32_post_imm_ld2r:
2232 ;CHECK: ld2r.2s { v0, v1 }, [x0], #8
2233 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
2234 %tmp = getelementptr i32, i32* %A, i32 2
2235 store i32* %tmp, i32** %ptr
2236 ret { <2 x i32>, <2 x i32> } %ld2
2239 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2240 ;CHECK-LABEL: test_v2i32_post_reg_ld2r:
2241 ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
2242 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32* %A)
2243 %tmp = getelementptr i32, i32* %A, i64 %inc
2244 store i32* %tmp, i32** %ptr
2245 ret { <2 x i32>, <2 x i32> } %ld2
2248 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2r.v2i32.p0i32(i32*) nounwind readonly
2251 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
2252 ;CHECK-LABEL: test_v2i64_post_imm_ld2r:
2253 ;CHECK: ld2r.2d { v0, v1 }, [x0], #16
2254 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
2255 %tmp = getelementptr i64, i64* %A, i32 2
2256 store i64* %tmp, i64** %ptr
2257 ret { <2 x i64>, <2 x i64> } %ld2
2260 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2261 ;CHECK-LABEL: test_v2i64_post_reg_ld2r:
2262 ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
2263 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64* %A)
2264 %tmp = getelementptr i64, i64* %A, i64 %inc
2265 store i64* %tmp, i64** %ptr
2266 ret { <2 x i64>, <2 x i64> } %ld2
2269 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2r.v2i64.p0i64(i64*) nounwind readonly
2271 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2r(i64* %A, i64** %ptr) nounwind {
2272 ;CHECK-LABEL: test_v1i64_post_imm_ld2r:
2273 ;CHECK: ld2r.1d { v0, v1 }, [x0], #16
2274 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
2275 %tmp = getelementptr i64, i64* %A, i32 2
2276 store i64* %tmp, i64** %ptr
2277 ret { <1 x i64>, <1 x i64> } %ld2
2280 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2281 ;CHECK-LABEL: test_v1i64_post_reg_ld2r:
2282 ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
2283 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64* %A)
2284 %tmp = getelementptr i64, i64* %A, i64 %inc
2285 store i64* %tmp, i64** %ptr
2286 ret { <1 x i64>, <1 x i64> } %ld2
2289 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2r.v1i64.p0i64(i64*) nounwind readonly
2292 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
2293 ;CHECK-LABEL: test_v4f32_post_imm_ld2r:
2294 ;CHECK: ld2r.4s { v0, v1 }, [x0], #8
2295 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
2296 %tmp = getelementptr float, float* %A, i32 2
2297 store float* %tmp, float** %ptr
2298 ret { <4 x float>, <4 x float> } %ld2
2301 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
2302 ;CHECK-LABEL: test_v4f32_post_reg_ld2r:
2303 ;CHECK: ld2r.4s { v0, v1 }, [x0], x{{[0-9]+}}
2304 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float* %A)
2305 %tmp = getelementptr float, float* %A, i64 %inc
2306 store float* %tmp, float** %ptr
2307 ret { <4 x float>, <4 x float> } %ld2
2310 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2r.v4f32.p0f32(float*) nounwind readonly
2312 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2r(float* %A, float** %ptr) nounwind {
2313 ;CHECK-LABEL: test_v2f32_post_imm_ld2r:
2314 ;CHECK: ld2r.2s { v0, v1 }, [x0], #8
2315 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
2316 %tmp = getelementptr float, float* %A, i32 2
2317 store float* %tmp, float** %ptr
2318 ret { <2 x float>, <2 x float> } %ld2
2321 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2r(float* %A, float** %ptr, i64 %inc) nounwind {
2322 ;CHECK-LABEL: test_v2f32_post_reg_ld2r:
2323 ;CHECK: ld2r.2s { v0, v1 }, [x0], x{{[0-9]+}}
2324 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float* %A)
2325 %tmp = getelementptr float, float* %A, i64 %inc
2326 store float* %tmp, float** %ptr
2327 ret { <2 x float>, <2 x float> } %ld2
2330 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2r.v2f32.p0f32(float*) nounwind readonly
2333 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
2334 ;CHECK-LABEL: test_v2f64_post_imm_ld2r:
2335 ;CHECK: ld2r.2d { v0, v1 }, [x0], #16
2336 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
2337 %tmp = getelementptr double, double* %A, i32 2
2338 store double* %tmp, double** %ptr
2339 ret { <2 x double>, <2 x double> } %ld2
2342 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
2343 ;CHECK-LABEL: test_v2f64_post_reg_ld2r:
2344 ;CHECK: ld2r.2d { v0, v1 }, [x0], x{{[0-9]+}}
2345 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double* %A)
2346 %tmp = getelementptr double, double* %A, i64 %inc
2347 store double* %tmp, double** %ptr
2348 ret { <2 x double>, <2 x double> } %ld2
2351 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2r.v2f64.p0f64(double*) nounwind readonly
2353 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2r(double* %A, double** %ptr) nounwind {
2354 ;CHECK-LABEL: test_v1f64_post_imm_ld2r:
2355 ;CHECK: ld2r.1d { v0, v1 }, [x0], #16
2356 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
2357 %tmp = getelementptr double, double* %A, i32 2
2358 store double* %tmp, double** %ptr
2359 ret { <1 x double>, <1 x double> } %ld2
2362 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2r(double* %A, double** %ptr, i64 %inc) nounwind {
2363 ;CHECK-LABEL: test_v1f64_post_reg_ld2r:
2364 ;CHECK: ld2r.1d { v0, v1 }, [x0], x{{[0-9]+}}
2365 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double* %A)
2366 %tmp = getelementptr double, double* %A, i64 %inc
2367 store double* %tmp, double** %ptr
2368 ret { <1 x double>, <1 x double> } %ld2
2371 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2r.v1f64.p0f64(double*) nounwind readonly
2374 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
2375 ;CHECK-LABEL: test_v16i8_post_imm_ld3r:
2376 ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], #3
2377 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
2378 %tmp = getelementptr i8, i8* %A, i32 3
2379 store i8* %tmp, i8** %ptr
2380 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
2383 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2384 ;CHECK-LABEL: test_v16i8_post_reg_ld3r:
2385 ;CHECK: ld3r.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
2386 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8* %A)
2387 %tmp = getelementptr i8, i8* %A, i64 %inc
2388 store i8* %tmp, i8** %ptr
2389 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
2392 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3r.v16i8.p0i8(i8*) nounwind readonly
2395 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3r(i8* %A, i8** %ptr) nounwind {
2396 ;CHECK-LABEL: test_v8i8_post_imm_ld3r:
2397 ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], #3
2398 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
2399 %tmp = getelementptr i8, i8* %A, i32 3
2400 store i8* %tmp, i8** %ptr
2401 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
2404 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2405 ;CHECK-LABEL: test_v8i8_post_reg_ld3r:
2406 ;CHECK: ld3r.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
2407 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8* %A)
2408 %tmp = getelementptr i8, i8* %A, i64 %inc
2409 store i8* %tmp, i8** %ptr
2410 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
2413 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3r.v8i8.p0i8(i8*) nounwind readonly
2416 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
2417 ;CHECK-LABEL: test_v8i16_post_imm_ld3r:
2418 ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], #6
2419 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
2420 %tmp = getelementptr i16, i16* %A, i32 3
2421 store i16* %tmp, i16** %ptr
2422 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
2425 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2426 ;CHECK-LABEL: test_v8i16_post_reg_ld3r:
2427 ;CHECK: ld3r.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
2428 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16* %A)
2429 %tmp = getelementptr i16, i16* %A, i64 %inc
2430 store i16* %tmp, i16** %ptr
2431 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
2434 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3r.v8i16.p0i16(i16*) nounwind readonly
2437 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3r(i16* %A, i16** %ptr) nounwind {
2438 ;CHECK-LABEL: test_v4i16_post_imm_ld3r:
2439 ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], #6
2440 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
2441 %tmp = getelementptr i16, i16* %A, i32 3
2442 store i16* %tmp, i16** %ptr
2443 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
2446 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2447 ;CHECK-LABEL: test_v4i16_post_reg_ld3r:
2448 ;CHECK: ld3r.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
2449 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16* %A)
2450 %tmp = getelementptr i16, i16* %A, i64 %inc
2451 store i16* %tmp, i16** %ptr
2452 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
2455 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3r.v4i16.p0i16(i16*) nounwind readonly
2458 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
2459 ;CHECK-LABEL: test_v4i32_post_imm_ld3r:
2460 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
2461 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
2462 %tmp = getelementptr i32, i32* %A, i32 3
2463 store i32* %tmp, i32** %ptr
2464 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
2467 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2468 ;CHECK-LABEL: test_v4i32_post_reg_ld3r:
2469 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2470 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32* %A)
2471 %tmp = getelementptr i32, i32* %A, i64 %inc
2472 store i32* %tmp, i32** %ptr
2473 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
2476 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3r.v4i32.p0i32(i32*) nounwind readonly
2478 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3r(i32* %A, i32** %ptr) nounwind {
2479 ;CHECK-LABEL: test_v2i32_post_imm_ld3r:
2480 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
2481 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
2482 %tmp = getelementptr i32, i32* %A, i32 3
2483 store i32* %tmp, i32** %ptr
2484 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
2487 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2488 ;CHECK-LABEL: test_v2i32_post_reg_ld3r:
2489 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2490 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32* %A)
2491 %tmp = getelementptr i32, i32* %A, i64 %inc
2492 store i32* %tmp, i32** %ptr
2493 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
2496 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3r.v2i32.p0i32(i32*) nounwind readonly
2499 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
2500 ;CHECK-LABEL: test_v2i64_post_imm_ld3r:
2501 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
2502 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
2503 %tmp = getelementptr i64, i64* %A, i32 3
2504 store i64* %tmp, i64** %ptr
2505 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
2508 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2509 ;CHECK-LABEL: test_v2i64_post_reg_ld3r:
2510 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2511 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64* %A)
2512 %tmp = getelementptr i64, i64* %A, i64 %inc
2513 store i64* %tmp, i64** %ptr
2514 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
2517 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3r.v2i64.p0i64(i64*) nounwind readonly
2519 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3r(i64* %A, i64** %ptr) nounwind {
2520 ;CHECK-LABEL: test_v1i64_post_imm_ld3r:
2521 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
2522 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
2523 %tmp = getelementptr i64, i64* %A, i32 3
2524 store i64* %tmp, i64** %ptr
2525 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
2528 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2529 ;CHECK-LABEL: test_v1i64_post_reg_ld3r:
2530 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2531 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64* %A)
2532 %tmp = getelementptr i64, i64* %A, i64 %inc
2533 store i64* %tmp, i64** %ptr
2534 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
2537 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3r.v1i64.p0i64(i64*) nounwind readonly
2540 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
2541 ;CHECK-LABEL: test_v4f32_post_imm_ld3r:
2542 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], #12
2543 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
2544 %tmp = getelementptr float, float* %A, i32 3
2545 store float* %tmp, float** %ptr
2546 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
2549 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
2550 ;CHECK-LABEL: test_v4f32_post_reg_ld3r:
2551 ;CHECK: ld3r.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2552 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float* %A)
2553 %tmp = getelementptr float, float* %A, i64 %inc
2554 store float* %tmp, float** %ptr
2555 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
2558 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3r.v4f32.p0f32(float*) nounwind readonly
2560 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3r(float* %A, float** %ptr) nounwind {
2561 ;CHECK-LABEL: test_v2f32_post_imm_ld3r:
2562 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], #12
2563 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
2564 %tmp = getelementptr float, float* %A, i32 3
2565 store float* %tmp, float** %ptr
2566 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
2569 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3r(float* %A, float** %ptr, i64 %inc) nounwind {
2570 ;CHECK-LABEL: test_v2f32_post_reg_ld3r:
2571 ;CHECK: ld3r.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
2572 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float* %A)
2573 %tmp = getelementptr float, float* %A, i64 %inc
2574 store float* %tmp, float** %ptr
2575 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
2578 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3r.v2f32.p0f32(float*) nounwind readonly
2581 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
2582 ;CHECK-LABEL: test_v2f64_post_imm_ld3r:
2583 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], #24
2584 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
2585 %tmp = getelementptr double, double* %A, i32 3
2586 store double* %tmp, double** %ptr
2587 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
2590 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
2591 ;CHECK-LABEL: test_v2f64_post_reg_ld3r:
2592 ;CHECK: ld3r.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2593 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double* %A)
2594 %tmp = getelementptr double, double* %A, i64 %inc
2595 store double* %tmp, double** %ptr
2596 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
2599 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3r.v2f64.p0f64(double*) nounwind readonly
2601 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3r(double* %A, double** %ptr) nounwind {
2602 ;CHECK-LABEL: test_v1f64_post_imm_ld3r:
2603 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], #24
2604 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
2605 %tmp = getelementptr double, double* %A, i32 3
2606 store double* %tmp, double** %ptr
2607 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
2610 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3r(double* %A, double** %ptr, i64 %inc) nounwind {
2611 ;CHECK-LABEL: test_v1f64_post_reg_ld3r:
2612 ;CHECK: ld3r.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
2613 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double* %A)
2614 %tmp = getelementptr double, double* %A, i64 %inc
2615 store double* %tmp, double** %ptr
2616 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
2619 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3r.v1f64.p0f64(double*) nounwind readonly
2622 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
2623 ;CHECK-LABEL: test_v16i8_post_imm_ld4r:
2624 ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], #4
2625 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
2626 %tmp = getelementptr i8, i8* %A, i32 4
2627 store i8* %tmp, i8** %ptr
2628 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
2631 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2632 ;CHECK-LABEL: test_v16i8_post_reg_ld4r:
2633 ;CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2634 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8* %A)
2635 %tmp = getelementptr i8, i8* %A, i64 %inc
2636 store i8* %tmp, i8** %ptr
2637 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
2640 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4r.v16i8.p0i8(i8*) nounwind readonly
2643 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4r(i8* %A, i8** %ptr) nounwind {
2644 ;CHECK-LABEL: test_v8i8_post_imm_ld4r:
2645 ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], #4
2646 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
2647 %tmp = getelementptr i8, i8* %A, i32 4
2648 store i8* %tmp, i8** %ptr
2649 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
2652 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4r(i8* %A, i8** %ptr, i64 %inc) nounwind {
2653 ;CHECK-LABEL: test_v8i8_post_reg_ld4r:
2654 ;CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2655 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8* %A)
2656 %tmp = getelementptr i8, i8* %A, i64 %inc
2657 store i8* %tmp, i8** %ptr
2658 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
2661 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4r.v8i8.p0i8(i8*) nounwind readonly
2664 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
2665 ;CHECK-LABEL: test_v8i16_post_imm_ld4r:
2666 ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], #8
2667 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
2668 %tmp = getelementptr i16, i16* %A, i32 4
2669 store i16* %tmp, i16** %ptr
2670 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
2673 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2674 ;CHECK-LABEL: test_v8i16_post_reg_ld4r:
2675 ;CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2676 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16* %A)
2677 %tmp = getelementptr i16, i16* %A, i64 %inc
2678 store i16* %tmp, i16** %ptr
2679 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
2682 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4r.v8i16.p0i16(i16*) nounwind readonly
2685 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4r(i16* %A, i16** %ptr) nounwind {
2686 ;CHECK-LABEL: test_v4i16_post_imm_ld4r:
2687 ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], #8
2688 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
2689 %tmp = getelementptr i16, i16* %A, i32 4
2690 store i16* %tmp, i16** %ptr
2691 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
2694 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4r(i16* %A, i16** %ptr, i64 %inc) nounwind {
2695 ;CHECK-LABEL: test_v4i16_post_reg_ld4r:
2696 ;CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2697 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16* %A)
2698 %tmp = getelementptr i16, i16* %A, i64 %inc
2699 store i16* %tmp, i16** %ptr
2700 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
2703 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4r.v4i16.p0i16(i16*) nounwind readonly
2706 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
2707 ;CHECK-LABEL: test_v4i32_post_imm_ld4r:
2708 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
2709 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
2710 %tmp = getelementptr i32, i32* %A, i32 4
2711 store i32* %tmp, i32** %ptr
2712 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
2715 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2716 ;CHECK-LABEL: test_v4i32_post_reg_ld4r:
2717 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2718 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32* %A)
2719 %tmp = getelementptr i32, i32* %A, i64 %inc
2720 store i32* %tmp, i32** %ptr
2721 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
2724 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4r.v4i32.p0i32(i32*) nounwind readonly
2726 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4r(i32* %A, i32** %ptr) nounwind {
2727 ;CHECK-LABEL: test_v2i32_post_imm_ld4r:
2728 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
2729 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
2730 %tmp = getelementptr i32, i32* %A, i32 4
2731 store i32* %tmp, i32** %ptr
2732 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
2735 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4r(i32* %A, i32** %ptr, i64 %inc) nounwind {
2736 ;CHECK-LABEL: test_v2i32_post_reg_ld4r:
2737 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2738 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32* %A)
2739 %tmp = getelementptr i32, i32* %A, i64 %inc
2740 store i32* %tmp, i32** %ptr
2741 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
2744 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4r.v2i32.p0i32(i32*) nounwind readonly
2747 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
2748 ;CHECK-LABEL: test_v2i64_post_imm_ld4r:
2749 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
2750 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
2751 %tmp = getelementptr i64, i64* %A, i32 4
2752 store i64* %tmp, i64** %ptr
2753 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
2756 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2757 ;CHECK-LABEL: test_v2i64_post_reg_ld4r:
2758 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2759 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64* %A)
2760 %tmp = getelementptr i64, i64* %A, i64 %inc
2761 store i64* %tmp, i64** %ptr
2762 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
2765 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4r.v2i64.p0i64(i64*) nounwind readonly
2767 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4r(i64* %A, i64** %ptr) nounwind {
2768 ;CHECK-LABEL: test_v1i64_post_imm_ld4r:
2769 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
2770 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
2771 %tmp = getelementptr i64, i64* %A, i32 4
2772 store i64* %tmp, i64** %ptr
2773 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
2776 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4r(i64* %A, i64** %ptr, i64 %inc) nounwind {
2777 ;CHECK-LABEL: test_v1i64_post_reg_ld4r:
2778 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2779 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64* %A)
2780 %tmp = getelementptr i64, i64* %A, i64 %inc
2781 store i64* %tmp, i64** %ptr
2782 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
2785 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4r.v1i64.p0i64(i64*) nounwind readonly
2788 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
2789 ;CHECK-LABEL: test_v4f32_post_imm_ld4r:
2790 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], #16
2791 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
2792 %tmp = getelementptr float, float* %A, i32 4
2793 store float* %tmp, float** %ptr
2794 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
2797 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
2798 ;CHECK-LABEL: test_v4f32_post_reg_ld4r:
2799 ;CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2800 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float* %A)
2801 %tmp = getelementptr float, float* %A, i64 %inc
2802 store float* %tmp, float** %ptr
2803 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
2806 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4r.v4f32.p0f32(float*) nounwind readonly
2808 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4r(float* %A, float** %ptr) nounwind {
2809 ;CHECK-LABEL: test_v2f32_post_imm_ld4r:
2810 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], #16
2811 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
2812 %tmp = getelementptr float, float* %A, i32 4
2813 store float* %tmp, float** %ptr
2814 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
2817 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4r(float* %A, float** %ptr, i64 %inc) nounwind {
2818 ;CHECK-LABEL: test_v2f32_post_reg_ld4r:
2819 ;CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2820 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float* %A)
2821 %tmp = getelementptr float, float* %A, i64 %inc
2822 store float* %tmp, float** %ptr
2823 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
2826 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4r.v2f32.p0f32(float*) nounwind readonly
2829 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
2830 ;CHECK-LABEL: test_v2f64_post_imm_ld4r:
2831 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], #32
2832 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
2833 %tmp = getelementptr double, double* %A, i32 4
2834 store double* %tmp, double** %ptr
2835 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
2838 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
2839 ;CHECK-LABEL: test_v2f64_post_reg_ld4r:
2840 ;CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2841 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double* %A)
2842 %tmp = getelementptr double, double* %A, i64 %inc
2843 store double* %tmp, double** %ptr
2844 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
2847 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4r.v2f64.p0f64(double*) nounwind readonly
2849 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4r(double* %A, double** %ptr) nounwind {
2850 ;CHECK-LABEL: test_v1f64_post_imm_ld4r:
2851 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], #32
2852 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
2853 %tmp = getelementptr double, double* %A, i32 4
2854 store double* %tmp, double** %ptr
2855 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
2858 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4r(double* %A, double** %ptr, i64 %inc) nounwind {
2859 ;CHECK-LABEL: test_v1f64_post_reg_ld4r:
2860 ;CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
2861 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double* %A)
2862 %tmp = getelementptr double, double* %A, i64 %inc
2863 store double* %tmp, double** %ptr
2864 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
2867 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4r.v1f64.p0f64(double*) nounwind readonly
2870 define { <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
2871 ;CHECK-LABEL: test_v16i8_post_imm_ld2lane:
2872 ;CHECK: ld2.b { v0, v1 }[0], [x0], #2
2873 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
2874 %tmp = getelementptr i8, i8* %A, i32 2
2875 store i8* %tmp, i8** %ptr
2876 ret { <16 x i8>, <16 x i8> } %ld2
2879 define { <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C) nounwind {
2880 ;CHECK-LABEL: test_v16i8_post_reg_ld2lane:
2881 ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
2882 %ld2 = call { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
2883 %tmp = getelementptr i8, i8* %A, i64 %inc
2884 store i8* %tmp, i8** %ptr
2885 ret { <16 x i8>, <16 x i8> } %ld2
2888 declare { <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
2891 define { <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
2892 ;CHECK-LABEL: test_v8i8_post_imm_ld2lane:
2893 ;CHECK: ld2.b { v0, v1 }[0], [x0], #2
2894 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
2895 %tmp = getelementptr i8, i8* %A, i32 2
2896 store i8* %tmp, i8** %ptr
2897 ret { <8 x i8>, <8 x i8> } %ld2
2900 define { <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld2lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C) nounwind {
2901 ;CHECK-LABEL: test_v8i8_post_reg_ld2lane:
2902 ;CHECK: ld2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
2903 %ld2 = call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
2904 %tmp = getelementptr i8, i8* %A, i64 %inc
2905 store i8* %tmp, i8** %ptr
2906 ret { <8 x i8>, <8 x i8> } %ld2
2909 declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
2912 define { <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
2913 ;CHECK-LABEL: test_v8i16_post_imm_ld2lane:
2914 ;CHECK: ld2.h { v0, v1 }[0], [x0], #4
2915 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
2916 %tmp = getelementptr i16, i16* %A, i32 2
2917 store i16* %tmp, i16** %ptr
2918 ret { <8 x i16>, <8 x i16> } %ld2
2921 define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind {
2922 ;CHECK-LABEL: test_v8i16_post_reg_ld2lane:
2923 ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
2924 %ld2 = call { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
2925 %tmp = getelementptr i16, i16* %A, i64 %inc
2926 store i16* %tmp, i16** %ptr
2927 ret { <8 x i16>, <8 x i16> } %ld2
2930 declare { <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
2933 define { <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
2934 ;CHECK-LABEL: test_v4i16_post_imm_ld2lane:
2935 ;CHECK: ld2.h { v0, v1 }[0], [x0], #4
2936 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
2937 %tmp = getelementptr i16, i16* %A, i32 2
2938 store i16* %tmp, i16** %ptr
2939 ret { <4 x i16>, <4 x i16> } %ld2
2942 define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind {
2943 ;CHECK-LABEL: test_v4i16_post_reg_ld2lane:
2944 ;CHECK: ld2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
2945 %ld2 = call { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
2946 %tmp = getelementptr i16, i16* %A, i64 %inc
2947 store i16* %tmp, i16** %ptr
2948 ret { <4 x i16>, <4 x i16> } %ld2
2951 declare { <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
2954 define { <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
2955 ;CHECK-LABEL: test_v4i32_post_imm_ld2lane:
2956 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
2957 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
2958 %tmp = getelementptr i32, i32* %A, i32 2
2959 store i32* %tmp, i32** %ptr
2960 ret { <4 x i32>, <4 x i32> } %ld2
2963 define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind {
2964 ;CHECK-LABEL: test_v4i32_post_reg_ld2lane:
2965 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
2966 %ld2 = call { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
2967 %tmp = getelementptr i32, i32* %A, i64 %inc
2968 store i32* %tmp, i32** %ptr
2969 ret { <4 x i32>, <4 x i32> } %ld2
2972 declare { <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
2975 define { <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
2976 ;CHECK-LABEL: test_v2i32_post_imm_ld2lane:
2977 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
2978 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
2979 %tmp = getelementptr i32, i32* %A, i32 2
2980 store i32* %tmp, i32** %ptr
2981 ret { <2 x i32>, <2 x i32> } %ld2
2984 define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind {
2985 ;CHECK-LABEL: test_v2i32_post_reg_ld2lane:
2986 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
2987 %ld2 = call { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
2988 %tmp = getelementptr i32, i32* %A, i64 %inc
2989 store i32* %tmp, i32** %ptr
2990 ret { <2 x i32>, <2 x i32> } %ld2
2993 declare { <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
2996 define { <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
2997 ;CHECK-LABEL: test_v2i64_post_imm_ld2lane:
2998 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
2999 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
3000 %tmp = getelementptr i64, i64* %A, i32 2
3001 store i64* %tmp, i64** %ptr
3002 ret { <2 x i64>, <2 x i64> } %ld2
3005 define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind {
3006 ;CHECK-LABEL: test_v2i64_post_reg_ld2lane:
3007 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3008 %ld2 = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
3009 %tmp = getelementptr i64, i64* %A, i64 %inc
3010 store i64* %tmp, i64** %ptr
3011 ret { <2 x i64>, <2 x i64> } %ld2
3014 declare { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3017 define { <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
3018 ;CHECK-LABEL: test_v1i64_post_imm_ld2lane:
3019 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3020 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
3021 %tmp = getelementptr i64, i64* %A, i32 2
3022 store i64* %tmp, i64** %ptr
3023 ret { <1 x i64>, <1 x i64> } %ld2
3026 define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind {
3027 ;CHECK-LABEL: test_v1i64_post_reg_ld2lane:
3028 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3029 %ld2 = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
3030 %tmp = getelementptr i64, i64* %A, i64 %inc
3031 store i64* %tmp, i64** %ptr
3032 ret { <1 x i64>, <1 x i64> } %ld2
3035 declare { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3038 define { <4 x float>, <4 x float> } @test_v4f32_post_imm_ld2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
3039 ;CHECK-LABEL: test_v4f32_post_imm_ld2lane:
3040 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
3041 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
3042 %tmp = getelementptr float, float* %A, i32 2
3043 store float* %tmp, float** %ptr
3044 ret { <4 x float>, <4 x float> } %ld2
3047 define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind {
3048 ;CHECK-LABEL: test_v4f32_post_reg_ld2lane:
3049 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
3050 %ld2 = call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
3051 %tmp = getelementptr float, float* %A, i64 %inc
3052 store float* %tmp, float** %ptr
3053 ret { <4 x float>, <4 x float> } %ld2
3056 declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*) nounwind readonly
3059 define { <2 x float>, <2 x float> } @test_v2f32_post_imm_ld2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
3060 ;CHECK-LABEL: test_v2f32_post_imm_ld2lane:
3061 ;CHECK: ld2.s { v0, v1 }[0], [x0], #8
3062 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
3063 %tmp = getelementptr float, float* %A, i32 2
3064 store float* %tmp, float** %ptr
3065 ret { <2 x float>, <2 x float> } %ld2
3068 define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind {
3069 ;CHECK-LABEL: test_v2f32_post_reg_ld2lane:
3070 ;CHECK: ld2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
3071 %ld2 = call { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
3072 %tmp = getelementptr float, float* %A, i64 %inc
3073 store float* %tmp, float** %ptr
3074 ret { <2 x float>, <2 x float> } %ld2
3077 declare { <2 x float>, <2 x float> } @llvm.aarch64.neon.ld2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*) nounwind readonly
3080 define { <2 x double>, <2 x double> } @test_v2f64_post_imm_ld2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
3081 ;CHECK-LABEL: test_v2f64_post_imm_ld2lane:
3082 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3083 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
3084 %tmp = getelementptr double, double* %A, i32 2
3085 store double* %tmp, double** %ptr
3086 ret { <2 x double>, <2 x double> } %ld2
3089 define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind {
3090 ;CHECK-LABEL: test_v2f64_post_reg_ld2lane:
3091 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3092 %ld2 = call { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
3093 %tmp = getelementptr double, double* %A, i64 %inc
3094 store double* %tmp, double** %ptr
3095 ret { <2 x double>, <2 x double> } %ld2
3098 declare { <2 x double>, <2 x double> } @llvm.aarch64.neon.ld2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*) nounwind readonly
3101 define { <1 x double>, <1 x double> } @test_v1f64_post_imm_ld2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
3102 ;CHECK-LABEL: test_v1f64_post_imm_ld2lane:
3103 ;CHECK: ld2.d { v0, v1 }[0], [x0], #16
3104 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
3105 %tmp = getelementptr double, double* %A, i32 2
3106 store double* %tmp, double** %ptr
3107 ret { <1 x double>, <1 x double> } %ld2
3110 define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind {
3111 ;CHECK-LABEL: test_v1f64_post_reg_ld2lane:
3112 ;CHECK: ld2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
3113 %ld2 = call { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
3114 %tmp = getelementptr double, double* %A, i64 %inc
3115 store double* %tmp, double** %ptr
3116 ret { <1 x double>, <1 x double> } %ld2
3119 declare { <1 x double>, <1 x double> } @llvm.aarch64.neon.ld2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*) nounwind readonly
3122 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3123 ;CHECK-LABEL: test_v16i8_post_imm_ld3lane:
3124 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
3125 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
3126 %tmp = getelementptr i8, i8* %A, i32 3
3127 store i8* %tmp, i8** %ptr
3128 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
3131 define { <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3132 ;CHECK-LABEL: test_v16i8_post_reg_ld3lane:
3133 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3134 %ld3 = call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
3135 %tmp = getelementptr i8, i8* %A, i64 %inc
3136 store i8* %tmp, i8** %ptr
3137 ret { <16 x i8>, <16 x i8>, <16 x i8> } %ld3
3140 declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
3143 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3144 ;CHECK-LABEL: test_v8i8_post_imm_ld3lane:
3145 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], #3
3146 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
3147 %tmp = getelementptr i8, i8* %A, i32 3
3148 store i8* %tmp, i8** %ptr
3149 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
3152 define { <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld3lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3153 ;CHECK-LABEL: test_v8i8_post_reg_ld3lane:
3154 ;CHECK: ld3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3155 %ld3 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
3156 %tmp = getelementptr i8, i8* %A, i64 %inc
3157 store i8* %tmp, i8** %ptr
3158 ret { <8 x i8>, <8 x i8>, <8 x i8> } %ld3
3161 declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
3164 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3165 ;CHECK-LABEL: test_v8i16_post_imm_ld3lane:
3166 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
3167 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
3168 %tmp = getelementptr i16, i16* %A, i32 3
3169 store i16* %tmp, i16** %ptr
3170 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
3173 define { <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3174 ;CHECK-LABEL: test_v8i16_post_reg_ld3lane:
3175 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3176 %ld3 = call { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
3177 %tmp = getelementptr i16, i16* %A, i64 %inc
3178 store i16* %tmp, i16** %ptr
3179 ret { <8 x i16>, <8 x i16>, <8 x i16> } %ld3
3182 declare { <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
3185 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3186 ;CHECK-LABEL: test_v4i16_post_imm_ld3lane:
3187 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], #6
3188 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
3189 %tmp = getelementptr i16, i16* %A, i32 3
3190 store i16* %tmp, i16** %ptr
3191 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
3194 define { <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld3lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3195 ;CHECK-LABEL: test_v4i16_post_reg_ld3lane:
3196 ;CHECK: ld3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3197 %ld3 = call { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
3198 %tmp = getelementptr i16, i16* %A, i64 %inc
3199 store i16* %tmp, i16** %ptr
3200 ret { <4 x i16>, <4 x i16>, <4 x i16> } %ld3
3203 declare { <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
3206 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3207 ;CHECK-LABEL: test_v4i32_post_imm_ld3lane:
3208 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3209 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
3210 %tmp = getelementptr i32, i32* %A, i32 3
3211 store i32* %tmp, i32** %ptr
3212 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
3215 define { <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3216 ;CHECK-LABEL: test_v4i32_post_reg_ld3lane:
3217 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3218 %ld3 = call { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
3219 %tmp = getelementptr i32, i32* %A, i64 %inc
3220 store i32* %tmp, i32** %ptr
3221 ret { <4 x i32>, <4 x i32>, <4 x i32> } %ld3
3224 declare { <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
3227 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3228 ;CHECK-LABEL: test_v2i32_post_imm_ld3lane:
3229 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3230 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
3231 %tmp = getelementptr i32, i32* %A, i32 3
3232 store i32* %tmp, i32** %ptr
3233 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
3236 define { <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld3lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3237 ;CHECK-LABEL: test_v2i32_post_reg_ld3lane:
3238 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3239 %ld3 = call { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
3240 %tmp = getelementptr i32, i32* %A, i64 %inc
3241 store i32* %tmp, i32** %ptr
3242 ret { <2 x i32>, <2 x i32>, <2 x i32> } %ld3
3245 declare { <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
3248 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3249 ;CHECK-LABEL: test_v2i64_post_imm_ld3lane:
3250 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3251 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
3252 %tmp = getelementptr i64, i64* %A, i32 3
3253 store i64* %tmp, i64** %ptr
3254 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
3257 define { <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3258 ;CHECK-LABEL: test_v2i64_post_reg_ld3lane:
3259 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3260 %ld3 = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
3261 %tmp = getelementptr i64, i64* %A, i64 %inc
3262 store i64* %tmp, i64** %ptr
3263 ret { <2 x i64>, <2 x i64>, <2 x i64> } %ld3
3266 declare { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3269 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3270 ;CHECK-LABEL: test_v1i64_post_imm_ld3lane:
3271 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3272 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
3273 %tmp = getelementptr i64, i64* %A, i32 3
3274 store i64* %tmp, i64** %ptr
3275 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
3278 define { <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld3lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3279 ;CHECK-LABEL: test_v1i64_post_reg_ld3lane:
3280 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3281 %ld3 = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
3282 %tmp = getelementptr i64, i64* %A, i64 %inc
3283 store i64* %tmp, i64** %ptr
3284 ret { <1 x i64>, <1 x i64>, <1 x i64> } %ld3
3287 declare { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3290 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
3291 ;CHECK-LABEL: test_v4f32_post_imm_ld3lane:
3292 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3293 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
3294 %tmp = getelementptr float, float* %A, i32 3
3295 store float* %tmp, float** %ptr
3296 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
3299 define { <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
3300 ;CHECK-LABEL: test_v4f32_post_reg_ld3lane:
3301 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3302 %ld3 = call { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
3303 %tmp = getelementptr float, float* %A, i64 %inc
3304 store float* %tmp, float** %ptr
3305 ret { <4 x float>, <4 x float>, <4 x float> } %ld3
3308 declare { <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
3311 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
3312 ;CHECK-LABEL: test_v2f32_post_imm_ld3lane:
3313 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], #12
3314 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
3315 %tmp = getelementptr float, float* %A, i32 3
3316 store float* %tmp, float** %ptr
3317 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
3320 define { <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld3lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
3321 ;CHECK-LABEL: test_v2f32_post_reg_ld3lane:
3322 ;CHECK: ld3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3323 %ld3 = call { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
3324 %tmp = getelementptr float, float* %A, i64 %inc
3325 store float* %tmp, float** %ptr
3326 ret { <2 x float>, <2 x float>, <2 x float> } %ld3
3329 declare { <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
3332 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
3333 ;CHECK-LABEL: test_v2f64_post_imm_ld3lane:
3334 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3335 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
3336 %tmp = getelementptr double, double* %A, i32 3
3337 store double* %tmp, double** %ptr
3338 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
3341 define { <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
3342 ;CHECK-LABEL: test_v2f64_post_reg_ld3lane:
3343 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3344 %ld3 = call { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
3345 %tmp = getelementptr double, double* %A, i64 %inc
3346 store double* %tmp, double** %ptr
3347 ret { <2 x double>, <2 x double>, <2 x double> } %ld3
3350 declare { <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
3353 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
3354 ;CHECK-LABEL: test_v1f64_post_imm_ld3lane:
3355 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], #24
3356 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
3357 %tmp = getelementptr double, double* %A, i32 3
3358 store double* %tmp, double** %ptr
3359 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
3362 define { <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld3lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
3363 ;CHECK-LABEL: test_v1f64_post_reg_ld3lane:
3364 ;CHECK: ld3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
3365 %ld3 = call { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
3366 %tmp = getelementptr double, double* %A, i64 %inc
3367 store double* %tmp, double** %ptr
3368 ret { <1 x double>, <1 x double>, <1 x double> } %ld3
3371 declare { <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
3374 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_imm_ld4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
3375 ;CHECK-LABEL: test_v16i8_post_imm_ld4lane:
3376 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
3377 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
3378 %tmp = getelementptr i8, i8* %A, i32 4
3379 store i8* %tmp, i8** %ptr
3380 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
3383 define { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @test_v16i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
3384 ;CHECK-LABEL: test_v16i8_post_reg_ld4lane:
3385 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3386 %ld4 = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
3387 %tmp = getelementptr i8, i8* %A, i64 %inc
3388 store i8* %tmp, i8** %ptr
3389 ret { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %ld4
3392 declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readonly
3395 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_imm_ld4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
3396 ;CHECK-LABEL: test_v8i8_post_imm_ld4lane:
3397 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], #4
3398 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
3399 %tmp = getelementptr i8, i8* %A, i32 4
3400 store i8* %tmp, i8** %ptr
3401 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
3404 define { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @test_v8i8_post_reg_ld4lane(i8* %A, i8** %ptr, i64 %inc, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
3405 ;CHECK-LABEL: test_v8i8_post_reg_ld4lane:
3406 ;CHECK: ld4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3407 %ld4 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
3408 %tmp = getelementptr i8, i8* %A, i64 %inc
3409 store i8* %tmp, i8** %ptr
3410 ret { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %ld4
3413 declare { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*) nounwind readonly
3416 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_imm_ld4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
3417 ;CHECK-LABEL: test_v8i16_post_imm_ld4lane:
3418 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
3419 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
3420 %tmp = getelementptr i16, i16* %A, i32 4
3421 store i16* %tmp, i16** %ptr
3422 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
3425 define { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
3426 ;CHECK-LABEL: test_v8i16_post_reg_ld4lane:
3427 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3428 %ld4 = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
3429 %tmp = getelementptr i16, i16* %A, i64 %inc
3430 store i16* %tmp, i16** %ptr
3431 ret { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } %ld4
3434 declare { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } @llvm.aarch64.neon.ld4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readonly
3437 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_imm_ld4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
3438 ;CHECK-LABEL: test_v4i16_post_imm_ld4lane:
3439 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], #8
3440 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
3441 %tmp = getelementptr i16, i16* %A, i32 4
3442 store i16* %tmp, i16** %ptr
3443 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
3446 define { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld4lane(i16* %A, i16** %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
3447 ;CHECK-LABEL: test_v4i16_post_reg_ld4lane:
3448 ;CHECK: ld4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3449 %ld4 = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
3450 %tmp = getelementptr i16, i16* %A, i64 %inc
3451 store i16* %tmp, i16** %ptr
3452 ret { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } %ld4
3455 declare { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } @llvm.aarch64.neon.ld4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*) nounwind readonly
3458 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_imm_ld4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
3459 ;CHECK-LABEL: test_v4i32_post_imm_ld4lane:
3460 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3461 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
3462 %tmp = getelementptr i32, i32* %A, i32 4
3463 store i32* %tmp, i32** %ptr
3464 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
3467 define { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
3468 ;CHECK-LABEL: test_v4i32_post_reg_ld4lane:
3469 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3470 %ld4 = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
3471 %tmp = getelementptr i32, i32* %A, i64 %inc
3472 store i32* %tmp, i32** %ptr
3473 ret { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } %ld4
3476 declare { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } @llvm.aarch64.neon.ld4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readonly
3479 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_imm_ld4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
3480 ;CHECK-LABEL: test_v2i32_post_imm_ld4lane:
3481 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3482 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
3483 %tmp = getelementptr i32, i32* %A, i32 4
3484 store i32* %tmp, i32** %ptr
3485 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
3488 define { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld4lane(i32* %A, i32** %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
3489 ;CHECK-LABEL: test_v2i32_post_reg_ld4lane:
3490 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3491 %ld4 = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
3492 %tmp = getelementptr i32, i32* %A, i64 %inc
3493 store i32* %tmp, i32** %ptr
3494 ret { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } %ld4
3497 declare { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } @llvm.aarch64.neon.ld4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*) nounwind readonly
3500 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_imm_ld4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
3501 ;CHECK-LABEL: test_v2i64_post_imm_ld4lane:
3502 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3503 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
3504 %tmp = getelementptr i64, i64* %A, i32 4
3505 store i64* %tmp, i64** %ptr
3506 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
3509 define { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
3510 ;CHECK-LABEL: test_v2i64_post_reg_ld4lane:
3511 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3512 %ld4 = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
3513 %tmp = getelementptr i64, i64* %A, i64 %inc
3514 store i64* %tmp, i64** %ptr
3515 ret { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } %ld4
3518 declare { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readonly
3521 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_imm_ld4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
3522 ;CHECK-LABEL: test_v1i64_post_imm_ld4lane:
3523 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3524 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
3525 %tmp = getelementptr i64, i64* %A, i32 4
3526 store i64* %tmp, i64** %ptr
3527 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
3530 define { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld4lane(i64* %A, i64** %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
3531 ;CHECK-LABEL: test_v1i64_post_reg_ld4lane:
3532 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3533 %ld4 = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
3534 %tmp = getelementptr i64, i64* %A, i64 %inc
3535 store i64* %tmp, i64** %ptr
3536 ret { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } %ld4
3539 declare { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*) nounwind readonly
3542 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_imm_ld4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
3543 ;CHECK-LABEL: test_v4f32_post_imm_ld4lane:
3544 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3545 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
3546 %tmp = getelementptr float, float* %A, i32 4
3547 store float* %tmp, float** %ptr
3548 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
3551 define { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @test_v4f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
3552 ;CHECK-LABEL: test_v4f32_post_reg_ld4lane:
3553 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3554 %ld4 = call { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
3555 %tmp = getelementptr float, float* %A, i64 %inc
3556 store float* %tmp, float** %ptr
3557 ret { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %ld4
3560 declare { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @llvm.aarch64.neon.ld4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*) nounwind readonly
3563 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_imm_ld4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
3564 ;CHECK-LABEL: test_v2f32_post_imm_ld4lane:
3565 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], #16
3566 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
3567 %tmp = getelementptr float, float* %A, i32 4
3568 store float* %tmp, float** %ptr
3569 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
3572 define { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @test_v2f32_post_reg_ld4lane(float* %A, float** %ptr, i64 %inc, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
3573 ;CHECK-LABEL: test_v2f32_post_reg_ld4lane:
3574 ;CHECK: ld4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3575 %ld4 = call { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
3576 %tmp = getelementptr float, float* %A, i64 %inc
3577 store float* %tmp, float** %ptr
3578 ret { <2 x float>, <2 x float>, <2 x float>, <2 x float> } %ld4
3581 declare { <2 x float>, <2 x float>, <2 x float>, <2 x float> } @llvm.aarch64.neon.ld4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*) nounwind readonly
3584 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_imm_ld4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
3585 ;CHECK-LABEL: test_v2f64_post_imm_ld4lane:
3586 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3587 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
3588 %tmp = getelementptr double, double* %A, i32 4
3589 store double* %tmp, double** %ptr
3590 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
3593 define { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @test_v2f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
3594 ;CHECK-LABEL: test_v2f64_post_reg_ld4lane:
3595 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3596 %ld4 = call { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
3597 %tmp = getelementptr double, double* %A, i64 %inc
3598 store double* %tmp, double** %ptr
3599 ret { <2 x double>, <2 x double>, <2 x double>, <2 x double> } %ld4
3602 declare { <2 x double>, <2 x double>, <2 x double>, <2 x double> } @llvm.aarch64.neon.ld4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*) nounwind readonly
3605 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_imm_ld4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
3606 ;CHECK-LABEL: test_v1f64_post_imm_ld4lane:
3607 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], #32
3608 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
3609 %tmp = getelementptr double, double* %A, i32 4
3610 store double* %tmp, double** %ptr
3611 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
3614 define { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @test_v1f64_post_reg_ld4lane(double* %A, double** %ptr, i64 %inc, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
3615 ;CHECK-LABEL: test_v1f64_post_reg_ld4lane:
3616 ;CHECK: ld4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
3617 %ld4 = call { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
3618 %tmp = getelementptr double, double* %A, i64 %inc
3619 store double* %tmp, double** %ptr
3620 ret { <1 x double>, <1 x double>, <1 x double>, <1 x double> } %ld4
3623 declare { <1 x double>, <1 x double>, <1 x double>, <1 x double> } @llvm.aarch64.neon.ld4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*) nounwind readonly
3626 define i8* @test_v16i8_post_imm_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
3627 ;CHECK-LABEL: test_v16i8_post_imm_st2:
3628 ;CHECK: st2.16b { v0, v1 }, [x0], #32
3629 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
3630 %tmp = getelementptr i8, i8* %A, i32 32
3634 define i8* @test_v16i8_post_reg_st2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
3635 ;CHECK-LABEL: test_v16i8_post_reg_st2:
3636 ;CHECK: st2.16b { v0, v1 }, [x0], x{{[0-9]+}}
3637 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
3638 %tmp = getelementptr i8, i8* %A, i64 %inc
3642 declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
3645 define i8* @test_v8i8_post_imm_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
3646 ;CHECK-LABEL: test_v8i8_post_imm_st2:
3647 ;CHECK: st2.8b { v0, v1 }, [x0], #16
3648 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
3649 %tmp = getelementptr i8, i8* %A, i32 16
3653 define i8* @test_v8i8_post_reg_st2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
3654 ;CHECK-LABEL: test_v8i8_post_reg_st2:
3655 ;CHECK: st2.8b { v0, v1 }, [x0], x{{[0-9]+}}
3656 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
3657 %tmp = getelementptr i8, i8* %A, i64 %inc
3661 declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
3664 define i16* @test_v8i16_post_imm_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
3665 ;CHECK-LABEL: test_v8i16_post_imm_st2:
3666 ;CHECK: st2.8h { v0, v1 }, [x0], #32
3667 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
3668 %tmp = getelementptr i16, i16* %A, i32 16
3672 define i16* @test_v8i16_post_reg_st2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
3673 ;CHECK-LABEL: test_v8i16_post_reg_st2:
3674 ;CHECK: st2.8h { v0, v1 }, [x0], x{{[0-9]+}}
3675 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
3676 %tmp = getelementptr i16, i16* %A, i64 %inc
3680 declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
3683 define i16* @test_v4i16_post_imm_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
3684 ;CHECK-LABEL: test_v4i16_post_imm_st2:
3685 ;CHECK: st2.4h { v0, v1 }, [x0], #16
3686 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
3687 %tmp = getelementptr i16, i16* %A, i32 8
3691 define i16* @test_v4i16_post_reg_st2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
3692 ;CHECK-LABEL: test_v4i16_post_reg_st2:
3693 ;CHECK: st2.4h { v0, v1 }, [x0], x{{[0-9]+}}
3694 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
3695 %tmp = getelementptr i16, i16* %A, i64 %inc
3699 declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
3702 define i32* @test_v4i32_post_imm_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
3703 ;CHECK-LABEL: test_v4i32_post_imm_st2:
3704 ;CHECK: st2.4s { v0, v1 }, [x0], #32
3705 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
3706 %tmp = getelementptr i32, i32* %A, i32 8
3710 define i32* @test_v4i32_post_reg_st2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
3711 ;CHECK-LABEL: test_v4i32_post_reg_st2:
3712 ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
3713 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
3714 %tmp = getelementptr i32, i32* %A, i64 %inc
3718 declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
3721 define i32* @test_v2i32_post_imm_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
3722 ;CHECK-LABEL: test_v2i32_post_imm_st2:
3723 ;CHECK: st2.2s { v0, v1 }, [x0], #16
3724 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
3725 %tmp = getelementptr i32, i32* %A, i32 4
3729 define i32* @test_v2i32_post_reg_st2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
3730 ;CHECK-LABEL: test_v2i32_post_reg_st2:
3731 ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
3732 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
3733 %tmp = getelementptr i32, i32* %A, i64 %inc
3737 declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
3740 define i64* @test_v2i64_post_imm_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
3741 ;CHECK-LABEL: test_v2i64_post_imm_st2:
3742 ;CHECK: st2.2d { v0, v1 }, [x0], #32
3743 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
3744 %tmp = getelementptr i64, i64* %A, i64 4
3748 define i64* @test_v2i64_post_reg_st2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
3749 ;CHECK-LABEL: test_v2i64_post_reg_st2:
3750 ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
3751 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
3752 %tmp = getelementptr i64, i64* %A, i64 %inc
3756 declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
3759 define i64* @test_v1i64_post_imm_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
3760 ;CHECK-LABEL: test_v1i64_post_imm_st2:
3761 ;CHECK: st1.1d { v0, v1 }, [x0], #16
3762 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
3763 %tmp = getelementptr i64, i64* %A, i64 2
3767 define i64* @test_v1i64_post_reg_st2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
3768 ;CHECK-LABEL: test_v1i64_post_reg_st2:
3769 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
3770 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
3771 %tmp = getelementptr i64, i64* %A, i64 %inc
3775 declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
3778 define float* @test_v4f32_post_imm_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
3779 ;CHECK-LABEL: test_v4f32_post_imm_st2:
3780 ;CHECK: st2.4s { v0, v1 }, [x0], #32
3781 call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
3782 %tmp = getelementptr float, float* %A, i32 8
3786 define float* @test_v4f32_post_reg_st2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
3787 ;CHECK-LABEL: test_v4f32_post_reg_st2:
3788 ;CHECK: st2.4s { v0, v1 }, [x0], x{{[0-9]+}}
3789 call void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
3790 %tmp = getelementptr float, float* %A, i64 %inc
3794 declare void @llvm.aarch64.neon.st2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
3797 define float* @test_v2f32_post_imm_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
3798 ;CHECK-LABEL: test_v2f32_post_imm_st2:
3799 ;CHECK: st2.2s { v0, v1 }, [x0], #16
3800 call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
3801 %tmp = getelementptr float, float* %A, i32 4
3805 define float* @test_v2f32_post_reg_st2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
3806 ;CHECK-LABEL: test_v2f32_post_reg_st2:
3807 ;CHECK: st2.2s { v0, v1 }, [x0], x{{[0-9]+}}
3808 call void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
3809 %tmp = getelementptr float, float* %A, i64 %inc
3813 declare void @llvm.aarch64.neon.st2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
3816 define double* @test_v2f64_post_imm_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
3817 ;CHECK-LABEL: test_v2f64_post_imm_st2:
3818 ;CHECK: st2.2d { v0, v1 }, [x0], #32
3819 call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
3820 %tmp = getelementptr double, double* %A, i64 4
3824 define double* @test_v2f64_post_reg_st2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
3825 ;CHECK-LABEL: test_v2f64_post_reg_st2:
3826 ;CHECK: st2.2d { v0, v1 }, [x0], x{{[0-9]+}}
3827 call void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
3828 %tmp = getelementptr double, double* %A, i64 %inc
3832 declare void @llvm.aarch64.neon.st2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
3835 define double* @test_v1f64_post_imm_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
3836 ;CHECK-LABEL: test_v1f64_post_imm_st2:
3837 ;CHECK: st1.1d { v0, v1 }, [x0], #16
3838 call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
3839 %tmp = getelementptr double, double* %A, i64 2
3843 define double* @test_v1f64_post_reg_st2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
3844 ;CHECK-LABEL: test_v1f64_post_reg_st2:
3845 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
3846 call void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
3847 %tmp = getelementptr double, double* %A, i64 %inc
3851 declare void @llvm.aarch64.neon.st2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
3854 define i8* @test_v16i8_post_imm_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
3855 ;CHECK-LABEL: test_v16i8_post_imm_st3:
3856 ;CHECK: st3.16b { v0, v1, v2 }, [x0], #48
3857 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
3858 %tmp = getelementptr i8, i8* %A, i32 48
3862 define i8* @test_v16i8_post_reg_st3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
3863 ;CHECK-LABEL: test_v16i8_post_reg_st3:
3864 ;CHECK: st3.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
3865 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
3866 %tmp = getelementptr i8, i8* %A, i64 %inc
3870 declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
3873 define i8* @test_v8i8_post_imm_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
3874 ;CHECK-LABEL: test_v8i8_post_imm_st3:
3875 ;CHECK: st3.8b { v0, v1, v2 }, [x0], #24
3876 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
3877 %tmp = getelementptr i8, i8* %A, i32 24
3881 define i8* @test_v8i8_post_reg_st3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
3882 ;CHECK-LABEL: test_v8i8_post_reg_st3:
3883 ;CHECK: st3.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
3884 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
3885 %tmp = getelementptr i8, i8* %A, i64 %inc
3889 declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
3892 define i16* @test_v8i16_post_imm_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
3893 ;CHECK-LABEL: test_v8i16_post_imm_st3:
3894 ;CHECK: st3.8h { v0, v1, v2 }, [x0], #48
3895 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
3896 %tmp = getelementptr i16, i16* %A, i32 24
3900 define i16* @test_v8i16_post_reg_st3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
3901 ;CHECK-LABEL: test_v8i16_post_reg_st3:
3902 ;CHECK: st3.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
3903 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
3904 %tmp = getelementptr i16, i16* %A, i64 %inc
3908 declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
3911 define i16* @test_v4i16_post_imm_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
3912 ;CHECK-LABEL: test_v4i16_post_imm_st3:
3913 ;CHECK: st3.4h { v0, v1, v2 }, [x0], #24
3914 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
3915 %tmp = getelementptr i16, i16* %A, i32 12
3919 define i16* @test_v4i16_post_reg_st3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
3920 ;CHECK-LABEL: test_v4i16_post_reg_st3:
3921 ;CHECK: st3.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
3922 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
3923 %tmp = getelementptr i16, i16* %A, i64 %inc
3927 declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
3930 define i32* @test_v4i32_post_imm_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
3931 ;CHECK-LABEL: test_v4i32_post_imm_st3:
3932 ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
3933 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
3934 %tmp = getelementptr i32, i32* %A, i32 12
3938 define i32* @test_v4i32_post_reg_st3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
3939 ;CHECK-LABEL: test_v4i32_post_reg_st3:
3940 ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
3941 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
3942 %tmp = getelementptr i32, i32* %A, i64 %inc
3946 declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
3949 define i32* @test_v2i32_post_imm_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
3950 ;CHECK-LABEL: test_v2i32_post_imm_st3:
3951 ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
3952 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
3953 %tmp = getelementptr i32, i32* %A, i32 6
3957 define i32* @test_v2i32_post_reg_st3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
3958 ;CHECK-LABEL: test_v2i32_post_reg_st3:
3959 ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
3960 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
3961 %tmp = getelementptr i32, i32* %A, i64 %inc
3965 declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
3968 define i64* @test_v2i64_post_imm_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
3969 ;CHECK-LABEL: test_v2i64_post_imm_st3:
3970 ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
3971 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
3972 %tmp = getelementptr i64, i64* %A, i64 6
3976 define i64* @test_v2i64_post_reg_st3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
3977 ;CHECK-LABEL: test_v2i64_post_reg_st3:
3978 ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
3979 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
3980 %tmp = getelementptr i64, i64* %A, i64 %inc
3984 declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
3987 define i64* @test_v1i64_post_imm_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
3988 ;CHECK-LABEL: test_v1i64_post_imm_st3:
3989 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
3990 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
3991 %tmp = getelementptr i64, i64* %A, i64 3
3995 define i64* @test_v1i64_post_reg_st3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
3996 ;CHECK-LABEL: test_v1i64_post_reg_st3:
3997 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
3998 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
3999 %tmp = getelementptr i64, i64* %A, i64 %inc
4003 declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
4006 define float* @test_v4f32_post_imm_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
4007 ;CHECK-LABEL: test_v4f32_post_imm_st3:
4008 ;CHECK: st3.4s { v0, v1, v2 }, [x0], #48
4009 call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4010 %tmp = getelementptr float, float* %A, i32 12
4014 define float* @test_v4f32_post_reg_st3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
4015 ;CHECK-LABEL: test_v4f32_post_reg_st3:
4016 ;CHECK: st3.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4017 call void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4018 %tmp = getelementptr float, float* %A, i64 %inc
4022 declare void @llvm.aarch64.neon.st3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
4025 define float* @test_v2f32_post_imm_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
4026 ;CHECK-LABEL: test_v2f32_post_imm_st3:
4027 ;CHECK: st3.2s { v0, v1, v2 }, [x0], #24
4028 call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4029 %tmp = getelementptr float, float* %A, i32 6
4033 define float* @test_v2f32_post_reg_st3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
4034 ;CHECK-LABEL: test_v2f32_post_reg_st3:
4035 ;CHECK: st3.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4036 call void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4037 %tmp = getelementptr float, float* %A, i64 %inc
4041 declare void @llvm.aarch64.neon.st3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
4044 define double* @test_v2f64_post_imm_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
4045 ;CHECK-LABEL: test_v2f64_post_imm_st3:
4046 ;CHECK: st3.2d { v0, v1, v2 }, [x0], #48
4047 call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4048 %tmp = getelementptr double, double* %A, i64 6
4052 define double* @test_v2f64_post_reg_st3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
4053 ;CHECK-LABEL: test_v2f64_post_reg_st3:
4054 ;CHECK: st3.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4055 call void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4056 %tmp = getelementptr double, double* %A, i64 %inc
4060 declare void @llvm.aarch64.neon.st3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
4063 define double* @test_v1f64_post_imm_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
4064 ;CHECK-LABEL: test_v1f64_post_imm_st3:
4065 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4066 call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4067 %tmp = getelementptr double, double* %A, i64 3
4071 define double* @test_v1f64_post_reg_st3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
4072 ;CHECK-LABEL: test_v1f64_post_reg_st3:
4073 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4074 call void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4075 %tmp = getelementptr double, double* %A, i64 %inc
4079 declare void @llvm.aarch64.neon.st3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
4082 define i8* @test_v16i8_post_imm_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
4083 ;CHECK-LABEL: test_v16i8_post_imm_st4:
4084 ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], #64
4085 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4086 %tmp = getelementptr i8, i8* %A, i32 64
4090 define i8* @test_v16i8_post_reg_st4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
4091 ;CHECK-LABEL: test_v16i8_post_reg_st4:
4092 ;CHECK: st4.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4093 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4094 %tmp = getelementptr i8, i8* %A, i64 %inc
4098 declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
4101 define i8* @test_v8i8_post_imm_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
4102 ;CHECK-LABEL: test_v8i8_post_imm_st4:
4103 ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], #32
4104 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4105 %tmp = getelementptr i8, i8* %A, i32 32
4109 define i8* @test_v8i8_post_reg_st4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
4110 ;CHECK-LABEL: test_v8i8_post_reg_st4:
4111 ;CHECK: st4.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4112 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4113 %tmp = getelementptr i8, i8* %A, i64 %inc
4117 declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
4120 define i16* @test_v8i16_post_imm_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
4121 ;CHECK-LABEL: test_v8i16_post_imm_st4:
4122 ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], #64
4123 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4124 %tmp = getelementptr i16, i16* %A, i32 32
4128 define i16* @test_v8i16_post_reg_st4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
4129 ;CHECK-LABEL: test_v8i16_post_reg_st4:
4130 ;CHECK: st4.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4131 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4132 %tmp = getelementptr i16, i16* %A, i64 %inc
4136 declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
4139 define i16* @test_v4i16_post_imm_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
4140 ;CHECK-LABEL: test_v4i16_post_imm_st4:
4141 ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], #32
4142 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4143 %tmp = getelementptr i16, i16* %A, i32 16
4147 define i16* @test_v4i16_post_reg_st4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
4148 ;CHECK-LABEL: test_v4i16_post_reg_st4:
4149 ;CHECK: st4.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4150 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4151 %tmp = getelementptr i16, i16* %A, i64 %inc
4155 declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*)
4158 define i32* @test_v4i32_post_imm_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
4159 ;CHECK-LABEL: test_v4i32_post_imm_st4:
4160 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
4161 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4162 %tmp = getelementptr i32, i32* %A, i32 16
4166 define i32* @test_v4i32_post_reg_st4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
4167 ;CHECK-LABEL: test_v4i32_post_reg_st4:
4168 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4169 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4170 %tmp = getelementptr i32, i32* %A, i64 %inc
4174 declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*)
4177 define i32* @test_v2i32_post_imm_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
4178 ;CHECK-LABEL: test_v2i32_post_imm_st4:
4179 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
4180 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4181 %tmp = getelementptr i32, i32* %A, i32 8
4185 define i32* @test_v2i32_post_reg_st4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
4186 ;CHECK-LABEL: test_v2i32_post_reg_st4:
4187 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4188 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4189 %tmp = getelementptr i32, i32* %A, i64 %inc
4193 declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
4196 define i64* @test_v2i64_post_imm_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
4197 ;CHECK-LABEL: test_v2i64_post_imm_st4:
4198 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
4199 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4200 %tmp = getelementptr i64, i64* %A, i64 8
4204 define i64* @test_v2i64_post_reg_st4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
4205 ;CHECK-LABEL: test_v2i64_post_reg_st4:
4206 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4207 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4208 %tmp = getelementptr i64, i64* %A, i64 %inc
4212 declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*)
4215 define i64* @test_v1i64_post_imm_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
4216 ;CHECK-LABEL: test_v1i64_post_imm_st4:
4217 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4218 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4219 %tmp = getelementptr i64, i64* %A, i64 4
4223 define i64* @test_v1i64_post_reg_st4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
4224 ;CHECK-LABEL: test_v1i64_post_reg_st4:
4225 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4226 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4227 %tmp = getelementptr i64, i64* %A, i64 %inc
4231 declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*)
4234 define float* @test_v4f32_post_imm_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
4235 ;CHECK-LABEL: test_v4f32_post_imm_st4:
4236 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], #64
4237 call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4238 %tmp = getelementptr float, float* %A, i32 16
4242 define float* @test_v4f32_post_reg_st4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
4243 ;CHECK-LABEL: test_v4f32_post_reg_st4:
4244 ;CHECK: st4.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4245 call void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4246 %tmp = getelementptr float, float* %A, i64 %inc
4250 declare void @llvm.aarch64.neon.st4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
4253 define float* @test_v2f32_post_imm_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
4254 ;CHECK-LABEL: test_v2f32_post_imm_st4:
4255 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], #32
4256 call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4257 %tmp = getelementptr float, float* %A, i32 8
4261 define float* @test_v2f32_post_reg_st4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
4262 ;CHECK-LABEL: test_v2f32_post_reg_st4:
4263 ;CHECK: st4.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4264 call void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4265 %tmp = getelementptr float, float* %A, i64 %inc
4269 declare void @llvm.aarch64.neon.st4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
4272 define double* @test_v2f64_post_imm_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
4273 ;CHECK-LABEL: test_v2f64_post_imm_st4:
4274 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], #64
4275 call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4276 %tmp = getelementptr double, double* %A, i64 8
4280 define double* @test_v2f64_post_reg_st4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
4281 ;CHECK-LABEL: test_v2f64_post_reg_st4:
4282 ;CHECK: st4.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4283 call void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4284 %tmp = getelementptr double, double* %A, i64 %inc
4288 declare void @llvm.aarch64.neon.st4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*)
4291 define double* @test_v1f64_post_imm_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
4292 ;CHECK-LABEL: test_v1f64_post_imm_st4:
4293 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4294 call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4295 %tmp = getelementptr double, double* %A, i64 4
4299 define double* @test_v1f64_post_reg_st4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
4300 ;CHECK-LABEL: test_v1f64_post_reg_st4:
4301 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4302 call void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4303 %tmp = getelementptr double, double* %A, i64 %inc
4307 declare void @llvm.aarch64.neon.st4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
4310 define i8* @test_v16i8_post_imm_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
4311 ;CHECK-LABEL: test_v16i8_post_imm_st1x2:
4312 ;CHECK: st1.16b { v0, v1 }, [x0], #32
4313 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
4314 %tmp = getelementptr i8, i8* %A, i32 32
4318 define i8* @test_v16i8_post_reg_st1x2(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
4319 ;CHECK-LABEL: test_v16i8_post_reg_st1x2:
4320 ;CHECK: st1.16b { v0, v1 }, [x0], x{{[0-9]+}}
4321 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i8* %A)
4322 %tmp = getelementptr i8, i8* %A, i64 %inc
4326 declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*)
4329 define i8* @test_v8i8_post_imm_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
4330 ;CHECK-LABEL: test_v8i8_post_imm_st1x2:
4331 ;CHECK: st1.8b { v0, v1 }, [x0], #16
4332 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
4333 %tmp = getelementptr i8, i8* %A, i32 16
4337 define i8* @test_v8i8_post_reg_st1x2(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
4338 ;CHECK-LABEL: test_v8i8_post_reg_st1x2:
4339 ;CHECK: st1.8b { v0, v1 }, [x0], x{{[0-9]+}}
4340 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i8* %A)
4341 %tmp = getelementptr i8, i8* %A, i64 %inc
4345 declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*)
4348 define i16* @test_v8i16_post_imm_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
4349 ;CHECK-LABEL: test_v8i16_post_imm_st1x2:
4350 ;CHECK: st1.8h { v0, v1 }, [x0], #32
4351 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
4352 %tmp = getelementptr i16, i16* %A, i32 16
4356 define i16* @test_v8i16_post_reg_st1x2(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
4357 ;CHECK-LABEL: test_v8i16_post_reg_st1x2:
4358 ;CHECK: st1.8h { v0, v1 }, [x0], x{{[0-9]+}}
4359 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i16* %A)
4360 %tmp = getelementptr i16, i16* %A, i64 %inc
4364 declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*)
4367 define i16* @test_v4i16_post_imm_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
4368 ;CHECK-LABEL: test_v4i16_post_imm_st1x2:
4369 ;CHECK: st1.4h { v0, v1 }, [x0], #16
4370 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
4371 %tmp = getelementptr i16, i16* %A, i32 8
4375 define i16* @test_v4i16_post_reg_st1x2(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
4376 ;CHECK-LABEL: test_v4i16_post_reg_st1x2:
4377 ;CHECK: st1.4h { v0, v1 }, [x0], x{{[0-9]+}}
4378 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i16* %A)
4379 %tmp = getelementptr i16, i16* %A, i64 %inc
4383 declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*)
4386 define i32* @test_v4i32_post_imm_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
4387 ;CHECK-LABEL: test_v4i32_post_imm_st1x2:
4388 ;CHECK: st1.4s { v0, v1 }, [x0], #32
4389 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
4390 %tmp = getelementptr i32, i32* %A, i32 8
4394 define i32* @test_v4i32_post_reg_st1x2(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
4395 ;CHECK-LABEL: test_v4i32_post_reg_st1x2:
4396 ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
4397 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i32* %A)
4398 %tmp = getelementptr i32, i32* %A, i64 %inc
4402 declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*)
4405 define i32* @test_v2i32_post_imm_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
4406 ;CHECK-LABEL: test_v2i32_post_imm_st1x2:
4407 ;CHECK: st1.2s { v0, v1 }, [x0], #16
4408 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
4409 %tmp = getelementptr i32, i32* %A, i32 4
4413 define i32* @test_v2i32_post_reg_st1x2(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
4414 ;CHECK-LABEL: test_v2i32_post_reg_st1x2:
4415 ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
4416 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i32* %A)
4417 %tmp = getelementptr i32, i32* %A, i64 %inc
4421 declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*)
4424 define i64* @test_v2i64_post_imm_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
4425 ;CHECK-LABEL: test_v2i64_post_imm_st1x2:
4426 ;CHECK: st1.2d { v0, v1 }, [x0], #32
4427 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
4428 %tmp = getelementptr i64, i64* %A, i64 4
4432 define i64* @test_v2i64_post_reg_st1x2(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
4433 ;CHECK-LABEL: test_v2i64_post_reg_st1x2:
4434 ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
4435 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64* %A)
4436 %tmp = getelementptr i64, i64* %A, i64 %inc
4440 declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*)
4443 define i64* @test_v1i64_post_imm_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
4444 ;CHECK-LABEL: test_v1i64_post_imm_st1x2:
4445 ;CHECK: st1.1d { v0, v1 }, [x0], #16
4446 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
4447 %tmp = getelementptr i64, i64* %A, i64 2
4451 define i64* @test_v1i64_post_reg_st1x2(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
4452 ;CHECK-LABEL: test_v1i64_post_reg_st1x2:
4453 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
4454 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64* %A)
4455 %tmp = getelementptr i64, i64* %A, i64 %inc
4459 declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*)
4462 define float* @test_v4f32_post_imm_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
4463 ;CHECK-LABEL: test_v4f32_post_imm_st1x2:
4464 ;CHECK: st1.4s { v0, v1 }, [x0], #32
4465 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
4466 %tmp = getelementptr float, float* %A, i32 8
4470 define float* @test_v4f32_post_reg_st1x2(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
4471 ;CHECK-LABEL: test_v4f32_post_reg_st1x2:
4472 ;CHECK: st1.4s { v0, v1 }, [x0], x{{[0-9]+}}
4473 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %B, <4 x float> %C, float* %A)
4474 %tmp = getelementptr float, float* %A, i64 %inc
4478 declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*)
4481 define float* @test_v2f32_post_imm_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
4482 ;CHECK-LABEL: test_v2f32_post_imm_st1x2:
4483 ;CHECK: st1.2s { v0, v1 }, [x0], #16
4484 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
4485 %tmp = getelementptr float, float* %A, i32 4
4489 define float* @test_v2f32_post_reg_st1x2(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
4490 ;CHECK-LABEL: test_v2f32_post_reg_st1x2:
4491 ;CHECK: st1.2s { v0, v1 }, [x0], x{{[0-9]+}}
4492 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %B, <2 x float> %C, float* %A)
4493 %tmp = getelementptr float, float* %A, i64 %inc
4497 declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*)
4500 define double* @test_v2f64_post_imm_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
4501 ;CHECK-LABEL: test_v2f64_post_imm_st1x2:
4502 ;CHECK: st1.2d { v0, v1 }, [x0], #32
4503 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
4504 %tmp = getelementptr double, double* %A, i64 4
4508 define double* @test_v2f64_post_reg_st1x2(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
4509 ;CHECK-LABEL: test_v2f64_post_reg_st1x2:
4510 ;CHECK: st1.2d { v0, v1 }, [x0], x{{[0-9]+}}
4511 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %B, <2 x double> %C, double* %A)
4512 %tmp = getelementptr double, double* %A, i64 %inc
4516 declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*)
4519 define double* @test_v1f64_post_imm_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
4520 ;CHECK-LABEL: test_v1f64_post_imm_st1x2:
4521 ;CHECK: st1.1d { v0, v1 }, [x0], #16
4522 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
4523 %tmp = getelementptr double, double* %A, i64 2
4527 define double* @test_v1f64_post_reg_st1x2(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
4528 ;CHECK-LABEL: test_v1f64_post_reg_st1x2:
4529 ;CHECK: st1.1d { v0, v1 }, [x0], x{{[0-9]+}}
4530 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %B, <1 x double> %C, double* %A)
4531 %tmp = getelementptr double, double* %A, i64 %inc
4535 declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*)
4538 define i8* @test_v16i8_post_imm_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
4539 ;CHECK-LABEL: test_v16i8_post_imm_st1x3:
4540 ;CHECK: st1.16b { v0, v1, v2 }, [x0], #48
4541 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
4542 %tmp = getelementptr i8, i8* %A, i32 48
4546 define i8* @test_v16i8_post_reg_st1x3(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
4547 ;CHECK-LABEL: test_v16i8_post_reg_st1x3:
4548 ;CHECK: st1.16b { v0, v1, v2 }, [x0], x{{[0-9]+}}
4549 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %A)
4550 %tmp = getelementptr i8, i8* %A, i64 %inc
4554 declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*)
4557 define i8* @test_v8i8_post_imm_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
4558 ;CHECK-LABEL: test_v8i8_post_imm_st1x3:
4559 ;CHECK: st1.8b { v0, v1, v2 }, [x0], #24
4560 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
4561 %tmp = getelementptr i8, i8* %A, i32 24
4565 define i8* @test_v8i8_post_reg_st1x3(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
4566 ;CHECK-LABEL: test_v8i8_post_reg_st1x3:
4567 ;CHECK: st1.8b { v0, v1, v2 }, [x0], x{{[0-9]+}}
4568 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %A)
4569 %tmp = getelementptr i8, i8* %A, i64 %inc
4573 declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*)
4576 define i16* @test_v8i16_post_imm_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
4577 ;CHECK-LABEL: test_v8i16_post_imm_st1x3:
4578 ;CHECK: st1.8h { v0, v1, v2 }, [x0], #48
4579 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
4580 %tmp = getelementptr i16, i16* %A, i32 24
4584 define i16* @test_v8i16_post_reg_st1x3(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
4585 ;CHECK-LABEL: test_v8i16_post_reg_st1x3:
4586 ;CHECK: st1.8h { v0, v1, v2 }, [x0], x{{[0-9]+}}
4587 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %A)
4588 %tmp = getelementptr i16, i16* %A, i64 %inc
4592 declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*)
4595 define i16* @test_v4i16_post_imm_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
4596 ;CHECK-LABEL: test_v4i16_post_imm_st1x3:
4597 ;CHECK: st1.4h { v0, v1, v2 }, [x0], #24
4598 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
4599 %tmp = getelementptr i16, i16* %A, i32 12
4603 define i16* @test_v4i16_post_reg_st1x3(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
4604 ;CHECK-LABEL: test_v4i16_post_reg_st1x3:
4605 ;CHECK: st1.4h { v0, v1, v2 }, [x0], x{{[0-9]+}}
4606 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %A)
4607 %tmp = getelementptr i16, i16* %A, i64 %inc
4611 declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*)
4614 define i32* @test_v4i32_post_imm_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
4615 ;CHECK-LABEL: test_v4i32_post_imm_st1x3:
4616 ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
4617 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
4618 %tmp = getelementptr i32, i32* %A, i32 12
4622 define i32* @test_v4i32_post_reg_st1x3(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
4623 ;CHECK-LABEL: test_v4i32_post_reg_st1x3:
4624 ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4625 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %A)
4626 %tmp = getelementptr i32, i32* %A, i64 %inc
4630 declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*)
4633 define i32* @test_v2i32_post_imm_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
4634 ;CHECK-LABEL: test_v2i32_post_imm_st1x3:
4635 ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
4636 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
4637 %tmp = getelementptr i32, i32* %A, i32 6
4641 define i32* @test_v2i32_post_reg_st1x3(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
4642 ;CHECK-LABEL: test_v2i32_post_reg_st1x3:
4643 ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4644 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %A)
4645 %tmp = getelementptr i32, i32* %A, i64 %inc
4649 declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*)
4652 define i64* @test_v2i64_post_imm_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
4653 ;CHECK-LABEL: test_v2i64_post_imm_st1x3:
4654 ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
4655 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
4656 %tmp = getelementptr i64, i64* %A, i64 6
4660 define i64* @test_v2i64_post_reg_st1x3(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
4661 ;CHECK-LABEL: test_v2i64_post_reg_st1x3:
4662 ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4663 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %A)
4664 %tmp = getelementptr i64, i64* %A, i64 %inc
4668 declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*)
4671 define i64* @test_v1i64_post_imm_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
4672 ;CHECK-LABEL: test_v1i64_post_imm_st1x3:
4673 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4674 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
4675 %tmp = getelementptr i64, i64* %A, i64 3
4679 define i64* @test_v1i64_post_reg_st1x3(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
4680 ;CHECK-LABEL: test_v1i64_post_reg_st1x3:
4681 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4682 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %A)
4683 %tmp = getelementptr i64, i64* %A, i64 %inc
4687 declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*)
4690 define float* @test_v4f32_post_imm_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
4691 ;CHECK-LABEL: test_v4f32_post_imm_st1x3:
4692 ;CHECK: st1.4s { v0, v1, v2 }, [x0], #48
4693 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4694 %tmp = getelementptr float, float* %A, i32 12
4698 define float* @test_v4f32_post_reg_st1x3(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
4699 ;CHECK-LABEL: test_v4f32_post_reg_st1x3:
4700 ;CHECK: st1.4s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4701 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, float* %A)
4702 %tmp = getelementptr float, float* %A, i64 %inc
4706 declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*)
4709 define float* @test_v2f32_post_imm_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
4710 ;CHECK-LABEL: test_v2f32_post_imm_st1x3:
4711 ;CHECK: st1.2s { v0, v1, v2 }, [x0], #24
4712 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4713 %tmp = getelementptr float, float* %A, i32 6
4717 define float* @test_v2f32_post_reg_st1x3(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
4718 ;CHECK-LABEL: test_v2f32_post_reg_st1x3:
4719 ;CHECK: st1.2s { v0, v1, v2 }, [x0], x{{[0-9]+}}
4720 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, float* %A)
4721 %tmp = getelementptr float, float* %A, i64 %inc
4725 declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*)
4728 define double* @test_v2f64_post_imm_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
4729 ;CHECK-LABEL: test_v2f64_post_imm_st1x3:
4730 ;CHECK: st1.2d { v0, v1, v2 }, [x0], #48
4731 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4732 %tmp = getelementptr double, double* %A, i64 6
4736 define double* @test_v2f64_post_reg_st1x3(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
4737 ;CHECK-LABEL: test_v2f64_post_reg_st1x3:
4738 ;CHECK: st1.2d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4739 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, double* %A)
4740 %tmp = getelementptr double, double* %A, i64 %inc
4744 declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*)
4747 define double* @test_v1f64_post_imm_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
4748 ;CHECK-LABEL: test_v1f64_post_imm_st1x3:
4749 ;CHECK: st1.1d { v0, v1, v2 }, [x0], #24
4750 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4751 %tmp = getelementptr double, double* %A, i64 3
4755 define double* @test_v1f64_post_reg_st1x3(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
4756 ;CHECK-LABEL: test_v1f64_post_reg_st1x3:
4757 ;CHECK: st1.1d { v0, v1, v2 }, [x0], x{{[0-9]+}}
4758 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, double* %A)
4759 %tmp = getelementptr double, double* %A, i64 %inc
4763 declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*)
4766 define i8* @test_v16i8_post_imm_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
4767 ;CHECK-LABEL: test_v16i8_post_imm_st1x4:
4768 ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], #64
4769 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4770 %tmp = getelementptr i8, i8* %A, i32 64
4774 define i8* @test_v16i8_post_reg_st1x4(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
4775 ;CHECK-LABEL: test_v16i8_post_reg_st1x4:
4776 ;CHECK: st1.16b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4777 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i8* %A)
4778 %tmp = getelementptr i8, i8* %A, i64 %inc
4782 declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*)
4785 define i8* @test_v8i8_post_imm_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
4786 ;CHECK-LABEL: test_v8i8_post_imm_st1x4:
4787 ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], #32
4788 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4789 %tmp = getelementptr i8, i8* %A, i32 32
4793 define i8* @test_v8i8_post_reg_st1x4(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
4794 ;CHECK-LABEL: test_v8i8_post_reg_st1x4:
4795 ;CHECK: st1.8b { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4796 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i8* %A)
4797 %tmp = getelementptr i8, i8* %A, i64 %inc
4801 declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*)
4804 define i16* @test_v8i16_post_imm_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
4805 ;CHECK-LABEL: test_v8i16_post_imm_st1x4:
4806 ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], #64
4807 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4808 %tmp = getelementptr i16, i16* %A, i32 32
4812 define i16* @test_v8i16_post_reg_st1x4(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
4813 ;CHECK-LABEL: test_v8i16_post_reg_st1x4:
4814 ;CHECK: st1.8h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4815 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i16* %A)
4816 %tmp = getelementptr i16, i16* %A, i64 %inc
4820 declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*)
4823 define i16* @test_v4i16_post_imm_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
4824 ;CHECK-LABEL: test_v4i16_post_imm_st1x4:
4825 ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], #32
4826 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4827 %tmp = getelementptr i16, i16* %A, i32 16
4831 define i16* @test_v4i16_post_reg_st1x4(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
4832 ;CHECK-LABEL: test_v4i16_post_reg_st1x4:
4833 ;CHECK: st1.4h { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4834 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i16* %A)
4835 %tmp = getelementptr i16, i16* %A, i64 %inc
4839 declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>,<4 x i16>, i16*)
4842 define i32* @test_v4i32_post_imm_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
4843 ;CHECK-LABEL: test_v4i32_post_imm_st1x4:
4844 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
4845 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4846 %tmp = getelementptr i32, i32* %A, i32 16
4850 define i32* @test_v4i32_post_reg_st1x4(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
4851 ;CHECK-LABEL: test_v4i32_post_reg_st1x4:
4852 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4853 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i32* %A)
4854 %tmp = getelementptr i32, i32* %A, i64 %inc
4858 declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>,<4 x i32>, i32*)
4861 define i32* @test_v2i32_post_imm_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
4862 ;CHECK-LABEL: test_v2i32_post_imm_st1x4:
4863 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
4864 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4865 %tmp = getelementptr i32, i32* %A, i32 8
4869 define i32* @test_v2i32_post_reg_st1x4(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
4870 ;CHECK-LABEL: test_v2i32_post_reg_st1x4:
4871 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4872 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i32* %A)
4873 %tmp = getelementptr i32, i32* %A, i64 %inc
4877 declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*)
4880 define i64* @test_v2i64_post_imm_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
4881 ;CHECK-LABEL: test_v2i64_post_imm_st1x4:
4882 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
4883 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4884 %tmp = getelementptr i64, i64* %A, i64 8
4888 define i64* @test_v2i64_post_reg_st1x4(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
4889 ;CHECK-LABEL: test_v2i64_post_reg_st1x4:
4890 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4891 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64* %A)
4892 %tmp = getelementptr i64, i64* %A, i64 %inc
4896 declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>,<2 x i64>, i64*)
4899 define i64* @test_v1i64_post_imm_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
4900 ;CHECK-LABEL: test_v1i64_post_imm_st1x4:
4901 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4902 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4903 %tmp = getelementptr i64, i64* %A, i64 4
4907 define i64* @test_v1i64_post_reg_st1x4(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
4908 ;CHECK-LABEL: test_v1i64_post_reg_st1x4:
4909 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4910 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64* %A)
4911 %tmp = getelementptr i64, i64* %A, i64 %inc
4915 declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>,<1 x i64>, i64*)
4918 define float* @test_v4f32_post_imm_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
4919 ;CHECK-LABEL: test_v4f32_post_imm_st1x4:
4920 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], #64
4921 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4922 %tmp = getelementptr float, float* %A, i32 16
4926 define float* @test_v4f32_post_reg_st1x4(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
4927 ;CHECK-LABEL: test_v4f32_post_reg_st1x4:
4928 ;CHECK: st1.4s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4929 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, float* %A)
4930 %tmp = getelementptr float, float* %A, i64 %inc
4934 declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*)
4937 define float* @test_v2f32_post_imm_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
4938 ;CHECK-LABEL: test_v2f32_post_imm_st1x4:
4939 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], #32
4940 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4941 %tmp = getelementptr float, float* %A, i32 8
4945 define float* @test_v2f32_post_reg_st1x4(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
4946 ;CHECK-LABEL: test_v2f32_post_reg_st1x4:
4947 ;CHECK: st1.2s { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4948 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, float* %A)
4949 %tmp = getelementptr float, float* %A, i64 %inc
4953 declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*)
4956 define double* @test_v2f64_post_imm_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
4957 ;CHECK-LABEL: test_v2f64_post_imm_st1x4:
4958 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], #64
4959 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4960 %tmp = getelementptr double, double* %A, i64 8
4964 define double* @test_v2f64_post_reg_st1x4(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
4965 ;CHECK-LABEL: test_v2f64_post_reg_st1x4:
4966 ;CHECK: st1.2d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4967 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, double* %A)
4968 %tmp = getelementptr double, double* %A, i64 %inc
4972 declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>,<2 x double>, double*)
4975 define double* @test_v1f64_post_imm_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
4976 ;CHECK-LABEL: test_v1f64_post_imm_st1x4:
4977 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], #32
4978 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4979 %tmp = getelementptr double, double* %A, i64 4
4983 define double* @test_v1f64_post_reg_st1x4(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
4984 ;CHECK-LABEL: test_v1f64_post_reg_st1x4:
4985 ;CHECK: st1.1d { v0, v1, v2, v3 }, [x0], x{{[0-9]+}}
4986 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, double* %A)
4987 %tmp = getelementptr double, double* %A, i64 %inc
4991 declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*)
4994 define i8* @test_v16i8_post_imm_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) {
4995 call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
4996 %tmp = getelementptr i8, i8* %A, i32 2
5000 define i8* @test_v16i8_post_reg_st2lanelane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) {
5001 call void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i64 1, i8* %A)
5002 %tmp = getelementptr i8, i8* %A, i64 %inc
5006 declare void @llvm.aarch64.neon.st2lanelane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i64, i8*) nounwind readnone
5009 define i8* @test_v16i8_post_imm_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C) nounwind {
5010 ;CHECK-LABEL: test_v16i8_post_imm_st2lane:
5011 ;CHECK: st2.b { v0, v1 }[0], [x0], #2
5012 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
5013 %tmp = getelementptr i8, i8* %A, i32 2
5017 define i8* @test_v16i8_post_reg_st2lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, i64 %inc) nounwind {
5018 ;CHECK-LABEL: test_v16i8_post_reg_st2lane:
5019 ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
5020 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, i64 0, i8* %A)
5021 %tmp = getelementptr i8, i8* %A, i64 %inc
5025 declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*)
5028 define i8* @test_v8i8_post_imm_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C) nounwind {
5029 ;CHECK-LABEL: test_v8i8_post_imm_st2lane:
5030 ;CHECK: st2.b { v0, v1 }[0], [x0], #2
5031 call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
5032 %tmp = getelementptr i8, i8* %A, i32 2
5036 define i8* @test_v8i8_post_reg_st2lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, i64 %inc) nounwind {
5037 ;CHECK-LABEL: test_v8i8_post_reg_st2lane:
5038 ;CHECK: st2.b { v0, v1 }[0], [x0], x{{[0-9]+}}
5039 call void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, i64 0, i8* %A)
5040 %tmp = getelementptr i8, i8* %A, i64 %inc
5044 declare void @llvm.aarch64.neon.st2lane.v8i8.p0i8(<8 x i8>, <8 x i8>, i64, i8*)
5047 define i16* @test_v8i16_post_imm_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C) nounwind {
5048 ;CHECK-LABEL: test_v8i16_post_imm_st2lane:
5049 ;CHECK: st2.h { v0, v1 }[0], [x0], #4
5050 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
5051 %tmp = getelementptr i16, i16* %A, i32 2
5055 define i16* @test_v8i16_post_reg_st2lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, i64 %inc) nounwind {
5056 ;CHECK-LABEL: test_v8i16_post_reg_st2lane:
5057 ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
5058 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, i64 0, i16* %A)
5059 %tmp = getelementptr i16, i16* %A, i64 %inc
5063 declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*)
5066 define i16* @test_v4i16_post_imm_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C) nounwind {
5067 ;CHECK-LABEL: test_v4i16_post_imm_st2lane:
5068 ;CHECK: st2.h { v0, v1 }[0], [x0], #4
5069 call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
5070 %tmp = getelementptr i16, i16* %A, i32 2
5074 define i16* @test_v4i16_post_reg_st2lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, i64 %inc) nounwind {
5075 ;CHECK-LABEL: test_v4i16_post_reg_st2lane:
5076 ;CHECK: st2.h { v0, v1 }[0], [x0], x{{[0-9]+}}
5077 call void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, i64 0, i16* %A)
5078 %tmp = getelementptr i16, i16* %A, i64 %inc
5082 declare void @llvm.aarch64.neon.st2lane.v4i16.p0i16(<4 x i16>, <4 x i16>, i64, i16*)
5085 define i32* @test_v4i32_post_imm_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C) nounwind {
5086 ;CHECK-LABEL: test_v4i32_post_imm_st2lane:
5087 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
5088 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
5089 %tmp = getelementptr i32, i32* %A, i32 2
5093 define i32* @test_v4i32_post_reg_st2lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, i64 %inc) nounwind {
5094 ;CHECK-LABEL: test_v4i32_post_reg_st2lane:
5095 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5096 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, i64 0, i32* %A)
5097 %tmp = getelementptr i32, i32* %A, i64 %inc
5101 declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*)
5104 define i32* @test_v2i32_post_imm_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C) nounwind {
5105 ;CHECK-LABEL: test_v2i32_post_imm_st2lane:
5106 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
5107 call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
5108 %tmp = getelementptr i32, i32* %A, i32 2
5112 define i32* @test_v2i32_post_reg_st2lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, i64 %inc) nounwind {
5113 ;CHECK-LABEL: test_v2i32_post_reg_st2lane:
5114 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5115 call void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, i64 0, i32* %A)
5116 %tmp = getelementptr i32, i32* %A, i64 %inc
5120 declare void @llvm.aarch64.neon.st2lane.v2i32.p0i32(<2 x i32>, <2 x i32>, i64, i32*)
5123 define i64* @test_v2i64_post_imm_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C) nounwind {
5124 ;CHECK-LABEL: test_v2i64_post_imm_st2lane:
5125 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
5126 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
5127 %tmp = getelementptr i64, i64* %A, i64 2
5131 define i64* @test_v2i64_post_reg_st2lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, i64 %inc) nounwind {
5132 ;CHECK-LABEL: test_v2i64_post_reg_st2lane:
5133 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5134 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, i64 0, i64* %A)
5135 %tmp = getelementptr i64, i64* %A, i64 %inc
5139 declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*)
5142 define i64* @test_v1i64_post_imm_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C) nounwind {
5143 ;CHECK-LABEL: test_v1i64_post_imm_st2lane:
5144 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
5145 call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
5146 %tmp = getelementptr i64, i64* %A, i64 2
5150 define i64* @test_v1i64_post_reg_st2lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, i64 %inc) nounwind {
5151 ;CHECK-LABEL: test_v1i64_post_reg_st2lane:
5152 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5153 call void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, i64 0, i64* %A)
5154 %tmp = getelementptr i64, i64* %A, i64 %inc
5158 declare void @llvm.aarch64.neon.st2lane.v1i64.p0i64(<1 x i64>, <1 x i64>, i64, i64*)
5161 define float* @test_v4f32_post_imm_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C) nounwind {
5162 ;CHECK-LABEL: test_v4f32_post_imm_st2lane:
5163 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
5164 call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
5165 %tmp = getelementptr float, float* %A, i32 2
5169 define float* @test_v4f32_post_reg_st2lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, i64 %inc) nounwind {
5170 ;CHECK-LABEL: test_v4f32_post_reg_st2lane:
5171 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5172 call void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, i64 0, float* %A)
5173 %tmp = getelementptr float, float* %A, i64 %inc
5177 declare void @llvm.aarch64.neon.st2lane.v4f32.p0f32(<4 x float>, <4 x float>, i64, float*)
5180 define float* @test_v2f32_post_imm_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C) nounwind {
5181 ;CHECK-LABEL: test_v2f32_post_imm_st2lane:
5182 ;CHECK: st2.s { v0, v1 }[0], [x0], #8
5183 call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
5184 %tmp = getelementptr float, float* %A, i32 2
5188 define float* @test_v2f32_post_reg_st2lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, i64 %inc) nounwind {
5189 ;CHECK-LABEL: test_v2f32_post_reg_st2lane:
5190 ;CHECK: st2.s { v0, v1 }[0], [x0], x{{[0-9]+}}
5191 call void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, i64 0, float* %A)
5192 %tmp = getelementptr float, float* %A, i64 %inc
5196 declare void @llvm.aarch64.neon.st2lane.v2f32.p0f32(<2 x float>, <2 x float>, i64, float*)
5199 define double* @test_v2f64_post_imm_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C) nounwind {
5200 ;CHECK-LABEL: test_v2f64_post_imm_st2lane:
5201 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
5202 call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
5203 %tmp = getelementptr double, double* %A, i64 2
5207 define double* @test_v2f64_post_reg_st2lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, i64 %inc) nounwind {
5208 ;CHECK-LABEL: test_v2f64_post_reg_st2lane:
5209 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5210 call void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, i64 0, double* %A)
5211 %tmp = getelementptr double, double* %A, i64 %inc
5215 declare void @llvm.aarch64.neon.st2lane.v2f64.p0f64(<2 x double>, <2 x double>, i64, double*)
5218 define double* @test_v1f64_post_imm_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C) nounwind {
5219 ;CHECK-LABEL: test_v1f64_post_imm_st2lane:
5220 ;CHECK: st2.d { v0, v1 }[0], [x0], #16
5221 call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
5222 %tmp = getelementptr double, double* %A, i64 2
5226 define double* @test_v1f64_post_reg_st2lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, i64 %inc) nounwind {
5227 ;CHECK-LABEL: test_v1f64_post_reg_st2lane:
5228 ;CHECK: st2.d { v0, v1 }[0], [x0], x{{[0-9]+}}
5229 call void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, i64 0, double* %A)
5230 %tmp = getelementptr double, double* %A, i64 %inc
5234 declare void @llvm.aarch64.neon.st2lane.v1f64.p0f64(<1 x double>, <1 x double>, i64, double*)
5237 define i8* @test_v16i8_post_imm_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D) nounwind {
5238 ;CHECK-LABEL: test_v16i8_post_imm_st3lane:
5239 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
5240 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
5241 %tmp = getelementptr i8, i8* %A, i32 3
5245 define i8* @test_v16i8_post_reg_st3lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 %inc) nounwind {
5246 ;CHECK-LABEL: test_v16i8_post_reg_st3lane:
5247 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5248 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 0, i8* %A)
5249 %tmp = getelementptr i8, i8* %A, i64 %inc
5253 declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
5256 define i8* @test_v8i8_post_imm_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D) nounwind {
5257 ;CHECK-LABEL: test_v8i8_post_imm_st3lane:
5258 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], #3
5259 call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
5260 %tmp = getelementptr i8, i8* %A, i32 3
5264 define i8* @test_v8i8_post_reg_st3lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 %inc) nounwind {
5265 ;CHECK-LABEL: test_v8i8_post_reg_st3lane:
5266 ;CHECK: st3.b { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5267 call void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i64 0, i8* %A)
5268 %tmp = getelementptr i8, i8* %A, i64 %inc
5272 declare void @llvm.aarch64.neon.st3lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
5275 define i16* @test_v8i16_post_imm_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D) nounwind {
5276 ;CHECK-LABEL: test_v8i16_post_imm_st3lane:
5277 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
5278 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
5279 %tmp = getelementptr i16, i16* %A, i32 3
5283 define i16* @test_v8i16_post_reg_st3lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind {
5284 ;CHECK-LABEL: test_v8i16_post_reg_st3lane:
5285 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5286 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 0, i16* %A)
5287 %tmp = getelementptr i16, i16* %A, i64 %inc
5291 declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
5294 define i16* @test_v4i16_post_imm_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D) nounwind {
5295 ;CHECK-LABEL: test_v4i16_post_imm_st3lane:
5296 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], #6
5297 call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
5298 %tmp = getelementptr i16, i16* %A, i32 3
5302 define i16* @test_v4i16_post_reg_st3lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind {
5303 ;CHECK-LABEL: test_v4i16_post_reg_st3lane:
5304 ;CHECK: st3.h { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5305 call void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 0, i16* %A)
5306 %tmp = getelementptr i16, i16* %A, i64 %inc
5310 declare void @llvm.aarch64.neon.st3lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
5313 define i32* @test_v4i32_post_imm_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D) nounwind {
5314 ;CHECK-LABEL: test_v4i32_post_imm_st3lane:
5315 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5316 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
5317 %tmp = getelementptr i32, i32* %A, i32 3
5321 define i32* @test_v4i32_post_reg_st3lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind {
5322 ;CHECK-LABEL: test_v4i32_post_reg_st3lane:
5323 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5324 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 0, i32* %A)
5325 %tmp = getelementptr i32, i32* %A, i64 %inc
5329 declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
5332 define i32* @test_v2i32_post_imm_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D) nounwind {
5333 ;CHECK-LABEL: test_v2i32_post_imm_st3lane:
5334 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5335 call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
5336 %tmp = getelementptr i32, i32* %A, i32 3
5340 define i32* @test_v2i32_post_reg_st3lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind {
5341 ;CHECK-LABEL: test_v2i32_post_reg_st3lane:
5342 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5343 call void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 0, i32* %A)
5344 %tmp = getelementptr i32, i32* %A, i64 %inc
5348 declare void @llvm.aarch64.neon.st3lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
5351 define i64* @test_v2i64_post_imm_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D) nounwind {
5352 ;CHECK-LABEL: test_v2i64_post_imm_st3lane:
5353 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5354 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
5355 %tmp = getelementptr i64, i64* %A, i64 3
5359 define i64* @test_v2i64_post_reg_st3lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind {
5360 ;CHECK-LABEL: test_v2i64_post_reg_st3lane:
5361 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5362 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 0, i64* %A)
5363 %tmp = getelementptr i64, i64* %A, i64 %inc
5367 declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
5370 define i64* @test_v1i64_post_imm_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D) nounwind {
5371 ;CHECK-LABEL: test_v1i64_post_imm_st3lane:
5372 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5373 call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
5374 %tmp = getelementptr i64, i64* %A, i64 3
5378 define i64* @test_v1i64_post_reg_st3lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind {
5379 ;CHECK-LABEL: test_v1i64_post_reg_st3lane:
5380 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5381 call void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 0, i64* %A)
5382 %tmp = getelementptr i64, i64* %A, i64 %inc
5386 declare void @llvm.aarch64.neon.st3lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
5389 define float* @test_v4f32_post_imm_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D) nounwind {
5390 ;CHECK-LABEL: test_v4f32_post_imm_st3lane:
5391 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5392 call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
5393 %tmp = getelementptr float, float* %A, i32 3
5397 define float* @test_v4f32_post_reg_st3lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind {
5398 ;CHECK-LABEL: test_v4f32_post_reg_st3lane:
5399 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5400 call void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, i64 0, float* %A)
5401 %tmp = getelementptr float, float* %A, i64 %inc
5405 declare void @llvm.aarch64.neon.st3lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, i64, float*)
5408 define float* @test_v2f32_post_imm_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D) nounwind {
5409 ;CHECK-LABEL: test_v2f32_post_imm_st3lane:
5410 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], #12
5411 call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
5412 %tmp = getelementptr float, float* %A, i32 3
5416 define float* @test_v2f32_post_reg_st3lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind {
5417 ;CHECK-LABEL: test_v2f32_post_reg_st3lane:
5418 ;CHECK: st3.s { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5419 call void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, i64 0, float* %A)
5420 %tmp = getelementptr float, float* %A, i64 %inc
5424 declare void @llvm.aarch64.neon.st3lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, i64, float*)
5427 define double* @test_v2f64_post_imm_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D) nounwind {
5428 ;CHECK-LABEL: test_v2f64_post_imm_st3lane:
5429 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5430 call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
5431 %tmp = getelementptr double, double* %A, i64 3
5435 define double* @test_v2f64_post_reg_st3lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind {
5436 ;CHECK-LABEL: test_v2f64_post_reg_st3lane:
5437 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5438 call void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, i64 0, double* %A)
5439 %tmp = getelementptr double, double* %A, i64 %inc
5443 declare void @llvm.aarch64.neon.st3lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, i64, double*)
5446 define double* @test_v1f64_post_imm_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D) nounwind {
5447 ;CHECK-LABEL: test_v1f64_post_imm_st3lane:
5448 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], #24
5449 call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
5450 %tmp = getelementptr double, double* %A, i64 3
5454 define double* @test_v1f64_post_reg_st3lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind {
5455 ;CHECK-LABEL: test_v1f64_post_reg_st3lane:
5456 ;CHECK: st3.d { v0, v1, v2 }[0], [x0], x{{[0-9]+}}
5457 call void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, i64 0, double* %A)
5458 %tmp = getelementptr double, double* %A, i64 %inc
5462 declare void @llvm.aarch64.neon.st3lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, i64, double*)
5465 define i8* @test_v16i8_post_imm_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E) nounwind {
5466 ;CHECK-LABEL: test_v16i8_post_imm_st4lane:
5467 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
5468 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
5469 %tmp = getelementptr i8, i8* %A, i32 4
5473 define i8* @test_v16i8_post_reg_st4lane(i8* %A, i8** %ptr, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 %inc) nounwind {
5474 ;CHECK-LABEL: test_v16i8_post_reg_st4lane:
5475 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5476 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %B, <16 x i8> %C, <16 x i8> %D, <16 x i8> %E, i64 0, i8* %A)
5477 %tmp = getelementptr i8, i8* %A, i64 %inc
5481 declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*)
5484 define i8* @test_v8i8_post_imm_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E) nounwind {
5485 ;CHECK-LABEL: test_v8i8_post_imm_st4lane:
5486 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], #4
5487 call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
5488 %tmp = getelementptr i8, i8* %A, i32 4
5492 define i8* @test_v8i8_post_reg_st4lane(i8* %A, i8** %ptr, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 %inc) nounwind {
5493 ;CHECK-LABEL: test_v8i8_post_reg_st4lane:
5494 ;CHECK: st4.b { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5495 call void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8> %B, <8 x i8> %C, <8 x i8> %D, <8 x i8> %E, i64 0, i8* %A)
5496 %tmp = getelementptr i8, i8* %A, i64 %inc
5500 declare void @llvm.aarch64.neon.st4lane.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i64, i8*)
5503 define i16* @test_v8i16_post_imm_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E) nounwind {
5504 ;CHECK-LABEL: test_v8i16_post_imm_st4lane:
5505 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
5506 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
5507 %tmp = getelementptr i16, i16* %A, i32 4
5511 define i16* @test_v8i16_post_reg_st4lane(i16* %A, i16** %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 %inc) nounwind {
5512 ;CHECK-LABEL: test_v8i16_post_reg_st4lane:
5513 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5514 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %B, <8 x i16> %C, <8 x i16> %D, <8 x i16> %E, i64 0, i16* %A)
5515 %tmp = getelementptr i16, i16* %A, i64 %inc
5519 declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*)
5522 define i16* @test_v4i16_post_imm_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E) nounwind {
5523 ;CHECK-LABEL: test_v4i16_post_imm_st4lane:
5524 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], #8
5525 call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
5526 %tmp = getelementptr i16, i16* %A, i32 4
5530 define i16* @test_v4i16_post_reg_st4lane(i16* %A, i16** %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 %inc) nounwind {
5531 ;CHECK-LABEL: test_v4i16_post_reg_st4lane:
5532 ;CHECK: st4.h { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5533 call void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16> %B, <4 x i16> %C, <4 x i16> %D, <4 x i16> %E, i64 0, i16* %A)
5534 %tmp = getelementptr i16, i16* %A, i64 %inc
5538 declare void @llvm.aarch64.neon.st4lane.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i64, i16*)
5541 define i32* @test_v4i32_post_imm_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E) nounwind {
5542 ;CHECK-LABEL: test_v4i32_post_imm_st4lane:
5543 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5544 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
5545 %tmp = getelementptr i32, i32* %A, i32 4
5549 define i32* @test_v4i32_post_reg_st4lane(i32* %A, i32** %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 %inc) nounwind {
5550 ;CHECK-LABEL: test_v4i32_post_reg_st4lane:
5551 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5552 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %B, <4 x i32> %C, <4 x i32> %D, <4 x i32> %E, i64 0, i32* %A)
5553 %tmp = getelementptr i32, i32* %A, i64 %inc
5557 declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*)
5560 define i32* @test_v2i32_post_imm_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E) nounwind {
5561 ;CHECK-LABEL: test_v2i32_post_imm_st4lane:
5562 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5563 call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
5564 %tmp = getelementptr i32, i32* %A, i32 4
5568 define i32* @test_v2i32_post_reg_st4lane(i32* %A, i32** %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 %inc) nounwind {
5569 ;CHECK-LABEL: test_v2i32_post_reg_st4lane:
5570 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5571 call void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32> %B, <2 x i32> %C, <2 x i32> %D, <2 x i32> %E, i64 0, i32* %A)
5572 %tmp = getelementptr i32, i32* %A, i64 %inc
5576 declare void @llvm.aarch64.neon.st4lane.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i64, i32*)
5579 define i64* @test_v2i64_post_imm_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E) nounwind {
5580 ;CHECK-LABEL: test_v2i64_post_imm_st4lane:
5581 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5582 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
5583 %tmp = getelementptr i64, i64* %A, i64 4
5587 define i64* @test_v2i64_post_reg_st4lane(i64* %A, i64** %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 %inc) nounwind {
5588 ;CHECK-LABEL: test_v2i64_post_reg_st4lane:
5589 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5590 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %B, <2 x i64> %C, <2 x i64> %D, <2 x i64> %E, i64 0, i64* %A)
5591 %tmp = getelementptr i64, i64* %A, i64 %inc
5595 declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*)
5598 define i64* @test_v1i64_post_imm_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E) nounwind {
5599 ;CHECK-LABEL: test_v1i64_post_imm_st4lane:
5600 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5601 call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
5602 %tmp = getelementptr i64, i64* %A, i64 4
5606 define i64* @test_v1i64_post_reg_st4lane(i64* %A, i64** %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 %inc) nounwind {
5607 ;CHECK-LABEL: test_v1i64_post_reg_st4lane:
5608 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5609 call void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64> %B, <1 x i64> %C, <1 x i64> %D, <1 x i64> %E, i64 0, i64* %A)
5610 %tmp = getelementptr i64, i64* %A, i64 %inc
5614 declare void @llvm.aarch64.neon.st4lane.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64, i64*)
5617 define float* @test_v4f32_post_imm_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E) nounwind {
5618 ;CHECK-LABEL: test_v4f32_post_imm_st4lane:
5619 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5620 call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
5621 %tmp = getelementptr float, float* %A, i32 4
5625 define float* @test_v4f32_post_reg_st4lane(float* %A, float** %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 %inc) nounwind {
5626 ;CHECK-LABEL: test_v4f32_post_reg_st4lane:
5627 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5628 call void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float> %B, <4 x float> %C, <4 x float> %D, <4 x float> %E, i64 0, float* %A)
5629 %tmp = getelementptr float, float* %A, i64 %inc
5633 declare void @llvm.aarch64.neon.st4lane.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, i64, float*)
5636 define float* @test_v2f32_post_imm_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E) nounwind {
5637 ;CHECK-LABEL: test_v2f32_post_imm_st4lane:
5638 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], #16
5639 call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
5640 %tmp = getelementptr float, float* %A, i32 4
5644 define float* @test_v2f32_post_reg_st4lane(float* %A, float** %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 %inc) nounwind {
5645 ;CHECK-LABEL: test_v2f32_post_reg_st4lane:
5646 ;CHECK: st4.s { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5647 call void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float> %B, <2 x float> %C, <2 x float> %D, <2 x float> %E, i64 0, float* %A)
5648 %tmp = getelementptr float, float* %A, i64 %inc
5652 declare void @llvm.aarch64.neon.st4lane.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, i64, float*)
5655 define double* @test_v2f64_post_imm_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E) nounwind {
5656 ;CHECK-LABEL: test_v2f64_post_imm_st4lane:
5657 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5658 call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
5659 %tmp = getelementptr double, double* %A, i64 4
5663 define double* @test_v2f64_post_reg_st4lane(double* %A, double** %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 %inc) nounwind {
5664 ;CHECK-LABEL: test_v2f64_post_reg_st4lane:
5665 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5666 call void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double> %B, <2 x double> %C, <2 x double> %D, <2 x double> %E, i64 0, double* %A)
5667 %tmp = getelementptr double, double* %A, i64 %inc
5671 declare void @llvm.aarch64.neon.st4lane.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, i64, double*)
5674 define double* @test_v1f64_post_imm_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E) nounwind {
5675 ;CHECK-LABEL: test_v1f64_post_imm_st4lane:
5676 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], #32
5677 call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
5678 %tmp = getelementptr double, double* %A, i64 4
5682 define double* @test_v1f64_post_reg_st4lane(double* %A, double** %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 %inc) nounwind {
5683 ;CHECK-LABEL: test_v1f64_post_reg_st4lane:
5684 ;CHECK: st4.d { v0, v1, v2, v3 }[0], [x0], x{{[0-9]+}}
5685 call void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double> %B, <1 x double> %C, <1 x double> %D, <1 x double> %E, i64 0, double* %A)
5686 %tmp = getelementptr double, double* %A, i64 %inc
5690 declare void @llvm.aarch64.neon.st4lane.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, i64, double*)
5692 define <16 x i8> @test_v16i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
5693 ; CHECK-LABEL: test_v16i8_post_imm_ld1r:
5694 ; CHECK: ld1r.16b { v0 }, [x0], #1
5695 %tmp1 = load i8, i8* %bar
5696 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5697 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
5698 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
5699 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
5700 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
5701 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
5702 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
5703 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
5704 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
5705 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
5706 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
5707 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
5708 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
5709 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
5710 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
5711 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
5712 %tmp18 = getelementptr i8, i8* %bar, i64 1
5713 store i8* %tmp18, i8** %ptr
5714 ret <16 x i8> %tmp17
5717 define <16 x i8> @test_v16i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
5718 ; CHECK-LABEL: test_v16i8_post_reg_ld1r:
5719 ; CHECK: ld1r.16b { v0 }, [x0], x{{[0-9]+}}
5720 %tmp1 = load i8, i8* %bar
5721 %tmp2 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5722 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1
5723 %tmp4 = insertelement <16 x i8> %tmp3, i8 %tmp1, i32 2
5724 %tmp5 = insertelement <16 x i8> %tmp4, i8 %tmp1, i32 3
5725 %tmp6 = insertelement <16 x i8> %tmp5, i8 %tmp1, i32 4
5726 %tmp7 = insertelement <16 x i8> %tmp6, i8 %tmp1, i32 5
5727 %tmp8 = insertelement <16 x i8> %tmp7, i8 %tmp1, i32 6
5728 %tmp9 = insertelement <16 x i8> %tmp8, i8 %tmp1, i32 7
5729 %tmp10 = insertelement <16 x i8> %tmp9, i8 %tmp1, i32 8
5730 %tmp11 = insertelement <16 x i8> %tmp10, i8 %tmp1, i32 9
5731 %tmp12 = insertelement <16 x i8> %tmp11, i8 %tmp1, i32 10
5732 %tmp13 = insertelement <16 x i8> %tmp12, i8 %tmp1, i32 11
5733 %tmp14 = insertelement <16 x i8> %tmp13, i8 %tmp1, i32 12
5734 %tmp15 = insertelement <16 x i8> %tmp14, i8 %tmp1, i32 13
5735 %tmp16 = insertelement <16 x i8> %tmp15, i8 %tmp1, i32 14
5736 %tmp17 = insertelement <16 x i8> %tmp16, i8 %tmp1, i32 15
5737 %tmp18 = getelementptr i8, i8* %bar, i64 %inc
5738 store i8* %tmp18, i8** %ptr
5739 ret <16 x i8> %tmp17
5742 define <8 x i8> @test_v8i8_post_imm_ld1r(i8* %bar, i8** %ptr) {
5743 ; CHECK-LABEL: test_v8i8_post_imm_ld1r:
5744 ; CHECK: ld1r.8b { v0 }, [x0], #1
5745 %tmp1 = load i8, i8* %bar
5746 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5747 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
5748 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
5749 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
5750 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
5751 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
5752 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
5753 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
5754 %tmp10 = getelementptr i8, i8* %bar, i64 1
5755 store i8* %tmp10, i8** %ptr
5759 define <8 x i8> @test_v8i8_post_reg_ld1r(i8* %bar, i8** %ptr, i64 %inc) {
5760 ; CHECK-LABEL: test_v8i8_post_reg_ld1r:
5761 ; CHECK: ld1r.8b { v0 }, [x0], x{{[0-9]+}}
5762 %tmp1 = load i8, i8* %bar
5763 %tmp2 = insertelement <8 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, i8 %tmp1, i32 0
5764 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1
5765 %tmp4 = insertelement <8 x i8> %tmp3, i8 %tmp1, i32 2
5766 %tmp5 = insertelement <8 x i8> %tmp4, i8 %tmp1, i32 3
5767 %tmp6 = insertelement <8 x i8> %tmp5, i8 %tmp1, i32 4
5768 %tmp7 = insertelement <8 x i8> %tmp6, i8 %tmp1, i32 5
5769 %tmp8 = insertelement <8 x i8> %tmp7, i8 %tmp1, i32 6
5770 %tmp9 = insertelement <8 x i8> %tmp8, i8 %tmp1, i32 7
5771 %tmp10 = getelementptr i8, i8* %bar, i64 %inc
5772 store i8* %tmp10, i8** %ptr
5776 define <8 x i16> @test_v8i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
5777 ; CHECK-LABEL: test_v8i16_post_imm_ld1r:
5778 ; CHECK: ld1r.8h { v0 }, [x0], #2
5779 %tmp1 = load i16, i16* %bar
5780 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5781 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
5782 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
5783 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
5784 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
5785 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
5786 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
5787 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
5788 %tmp10 = getelementptr i16, i16* %bar, i64 1
5789 store i16* %tmp10, i16** %ptr
5793 define <8 x i16> @test_v8i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
5794 ; CHECK-LABEL: test_v8i16_post_reg_ld1r:
5795 ; CHECK: ld1r.8h { v0 }, [x0], x{{[0-9]+}}
5796 %tmp1 = load i16, i16* %bar
5797 %tmp2 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5798 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1
5799 %tmp4 = insertelement <8 x i16> %tmp3, i16 %tmp1, i32 2
5800 %tmp5 = insertelement <8 x i16> %tmp4, i16 %tmp1, i32 3
5801 %tmp6 = insertelement <8 x i16> %tmp5, i16 %tmp1, i32 4
5802 %tmp7 = insertelement <8 x i16> %tmp6, i16 %tmp1, i32 5
5803 %tmp8 = insertelement <8 x i16> %tmp7, i16 %tmp1, i32 6
5804 %tmp9 = insertelement <8 x i16> %tmp8, i16 %tmp1, i32 7
5805 %tmp10 = getelementptr i16, i16* %bar, i64 %inc
5806 store i16* %tmp10, i16** %ptr
5810 define <4 x i16> @test_v4i16_post_imm_ld1r(i16* %bar, i16** %ptr) {
5811 ; CHECK-LABEL: test_v4i16_post_imm_ld1r:
5812 ; CHECK: ld1r.4h { v0 }, [x0], #2
5813 %tmp1 = load i16, i16* %bar
5814 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5815 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
5816 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
5817 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
5818 %tmp6 = getelementptr i16, i16* %bar, i64 1
5819 store i16* %tmp6, i16** %ptr
5823 define <4 x i16> @test_v4i16_post_reg_ld1r(i16* %bar, i16** %ptr, i64 %inc) {
5824 ; CHECK-LABEL: test_v4i16_post_reg_ld1r:
5825 ; CHECK: ld1r.4h { v0 }, [x0], x{{[0-9]+}}
5826 %tmp1 = load i16, i16* %bar
5827 %tmp2 = insertelement <4 x i16> <i16 undef, i16 undef, i16 undef, i16 undef>, i16 %tmp1, i32 0
5828 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1
5829 %tmp4 = insertelement <4 x i16> %tmp3, i16 %tmp1, i32 2
5830 %tmp5 = insertelement <4 x i16> %tmp4, i16 %tmp1, i32 3
5831 %tmp6 = getelementptr i16, i16* %bar, i64 %inc
5832 store i16* %tmp6, i16** %ptr
5836 define <4 x i32> @test_v4i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
5837 ; CHECK-LABEL: test_v4i32_post_imm_ld1r:
5838 ; CHECK: ld1r.4s { v0 }, [x0], #4
5839 %tmp1 = load i32, i32* %bar
5840 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
5841 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
5842 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
5843 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
5844 %tmp6 = getelementptr i32, i32* %bar, i64 1
5845 store i32* %tmp6, i32** %ptr
5849 define <4 x i32> @test_v4i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
5850 ; CHECK-LABEL: test_v4i32_post_reg_ld1r:
5851 ; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
5852 %tmp1 = load i32, i32* %bar
5853 %tmp2 = insertelement <4 x i32> <i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp1, i32 0
5854 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
5855 %tmp4 = insertelement <4 x i32> %tmp3, i32 %tmp1, i32 2
5856 %tmp5 = insertelement <4 x i32> %tmp4, i32 %tmp1, i32 3
5857 %tmp6 = getelementptr i32, i32* %bar, i64 %inc
5858 store i32* %tmp6, i32** %ptr
5862 define <2 x i32> @test_v2i32_post_imm_ld1r(i32* %bar, i32** %ptr) {
5863 ; CHECK-LABEL: test_v2i32_post_imm_ld1r:
5864 ; CHECK: ld1r.2s { v0 }, [x0], #4
5865 %tmp1 = load i32, i32* %bar
5866 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
5867 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
5868 %tmp4 = getelementptr i32, i32* %bar, i64 1
5869 store i32* %tmp4, i32** %ptr
5873 define <2 x i32> @test_v2i32_post_reg_ld1r(i32* %bar, i32** %ptr, i64 %inc) {
5874 ; CHECK-LABEL: test_v2i32_post_reg_ld1r:
5875 ; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
5876 %tmp1 = load i32, i32* %bar
5877 %tmp2 = insertelement <2 x i32> <i32 undef, i32 undef>, i32 %tmp1, i32 0
5878 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1
5879 %tmp4 = getelementptr i32, i32* %bar, i64 %inc
5880 store i32* %tmp4, i32** %ptr
5884 define <2 x i64> @test_v2i64_post_imm_ld1r(i64* %bar, i64** %ptr) {
5885 ; CHECK-LABEL: test_v2i64_post_imm_ld1r:
5886 ; CHECK: ld1r.2d { v0 }, [x0], #8
5887 %tmp1 = load i64, i64* %bar
5888 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
5889 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
5890 %tmp4 = getelementptr i64, i64* %bar, i64 1
5891 store i64* %tmp4, i64** %ptr
5895 define <2 x i64> @test_v2i64_post_reg_ld1r(i64* %bar, i64** %ptr, i64 %inc) {
5896 ; CHECK-LABEL: test_v2i64_post_reg_ld1r:
5897 ; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
5898 %tmp1 = load i64, i64* %bar
5899 %tmp2 = insertelement <2 x i64> <i64 undef, i64 undef>, i64 %tmp1, i32 0
5900 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1
5901 %tmp4 = getelementptr i64, i64* %bar, i64 %inc
5902 store i64* %tmp4, i64** %ptr
5906 define <4 x float> @test_v4f32_post_imm_ld1r(float* %bar, float** %ptr) {
5907 ; CHECK-LABEL: test_v4f32_post_imm_ld1r:
5908 ; CHECK: ld1r.4s { v0 }, [x0], #4
5909 %tmp1 = load float, float* %bar
5910 %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
5911 %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
5912 %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
5913 %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
5914 %tmp6 = getelementptr float, float* %bar, i64 1
5915 store float* %tmp6, float** %ptr
5916 ret <4 x float> %tmp5
5919 define <4 x float> @test_v4f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
5920 ; CHECK-LABEL: test_v4f32_post_reg_ld1r:
5921 ; CHECK: ld1r.4s { v0 }, [x0], x{{[0-9]+}}
5922 %tmp1 = load float, float* %bar
5923 %tmp2 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %tmp1, i32 0
5924 %tmp3 = insertelement <4 x float> %tmp2, float %tmp1, i32 1
5925 %tmp4 = insertelement <4 x float> %tmp3, float %tmp1, i32 2
5926 %tmp5 = insertelement <4 x float> %tmp4, float %tmp1, i32 3
5927 %tmp6 = getelementptr float, float* %bar, i64 %inc
5928 store float* %tmp6, float** %ptr
5929 ret <4 x float> %tmp5
5932 define <2 x float> @test_v2f32_post_imm_ld1r(float* %bar, float** %ptr) {
5933 ; CHECK-LABEL: test_v2f32_post_imm_ld1r:
5934 ; CHECK: ld1r.2s { v0 }, [x0], #4
5935 %tmp1 = load float, float* %bar
5936 %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
5937 %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
5938 %tmp4 = getelementptr float, float* %bar, i64 1
5939 store float* %tmp4, float** %ptr
5940 ret <2 x float> %tmp3
5943 define <2 x float> @test_v2f32_post_reg_ld1r(float* %bar, float** %ptr, i64 %inc) {
5944 ; CHECK-LABEL: test_v2f32_post_reg_ld1r:
5945 ; CHECK: ld1r.2s { v0 }, [x0], x{{[0-9]+}}
5946 %tmp1 = load float, float* %bar
5947 %tmp2 = insertelement <2 x float> <float undef, float undef>, float %tmp1, i32 0
5948 %tmp3 = insertelement <2 x float> %tmp2, float %tmp1, i32 1
5949 %tmp4 = getelementptr float, float* %bar, i64 %inc
5950 store float* %tmp4, float** %ptr
5951 ret <2 x float> %tmp3
5954 define <2 x double> @test_v2f64_post_imm_ld1r(double* %bar, double** %ptr) {
5955 ; CHECK-LABEL: test_v2f64_post_imm_ld1r:
5956 ; CHECK: ld1r.2d { v0 }, [x0], #8
5957 %tmp1 = load double, double* %bar
5958 %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
5959 %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
5960 %tmp4 = getelementptr double, double* %bar, i64 1
5961 store double* %tmp4, double** %ptr
5962 ret <2 x double> %tmp3
5965 define <2 x double> @test_v2f64_post_reg_ld1r(double* %bar, double** %ptr, i64 %inc) {
5966 ; CHECK-LABEL: test_v2f64_post_reg_ld1r:
5967 ; CHECK: ld1r.2d { v0 }, [x0], x{{[0-9]+}}
5968 %tmp1 = load double, double* %bar
5969 %tmp2 = insertelement <2 x double> <double undef, double undef>, double %tmp1, i32 0
5970 %tmp3 = insertelement <2 x double> %tmp2, double %tmp1, i32 1
5971 %tmp4 = getelementptr double, double* %bar, i64 %inc
5972 store double* %tmp4, double** %ptr
5973 ret <2 x double> %tmp3
5976 define <16 x i8> @test_v16i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <16 x i8> %A) {
5977 ; CHECK-LABEL: test_v16i8_post_imm_ld1lane:
5978 ; CHECK: ld1.b { v0 }[1], [x0], #1
5979 %tmp1 = load i8, i8* %bar
5980 %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
5981 %tmp3 = getelementptr i8, i8* %bar, i64 1
5982 store i8* %tmp3, i8** %ptr
5986 define <16 x i8> @test_v16i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <16 x i8> %A) {
5987 ; CHECK-LABEL: test_v16i8_post_reg_ld1lane:
5988 ; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
5989 %tmp1 = load i8, i8* %bar
5990 %tmp2 = insertelement <16 x i8> %A, i8 %tmp1, i32 1
5991 %tmp3 = getelementptr i8, i8* %bar, i64 %inc
5992 store i8* %tmp3, i8** %ptr
5996 define <8 x i8> @test_v8i8_post_imm_ld1lane(i8* %bar, i8** %ptr, <8 x i8> %A) {
5997 ; CHECK-LABEL: test_v8i8_post_imm_ld1lane:
5998 ; CHECK: ld1.b { v0 }[1], [x0], #1
5999 %tmp1 = load i8, i8* %bar
6000 %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
6001 %tmp3 = getelementptr i8, i8* %bar, i64 1
6002 store i8* %tmp3, i8** %ptr
6006 define <8 x i8> @test_v8i8_post_reg_ld1lane(i8* %bar, i8** %ptr, i64 %inc, <8 x i8> %A) {
6007 ; CHECK-LABEL: test_v8i8_post_reg_ld1lane:
6008 ; CHECK: ld1.b { v0 }[1], [x0], x{{[0-9]+}}
6009 %tmp1 = load i8, i8* %bar
6010 %tmp2 = insertelement <8 x i8> %A, i8 %tmp1, i32 1
6011 %tmp3 = getelementptr i8, i8* %bar, i64 %inc
6012 store i8* %tmp3, i8** %ptr
6016 define <8 x i16> @test_v8i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <8 x i16> %A) {
6017 ; CHECK-LABEL: test_v8i16_post_imm_ld1lane:
6018 ; CHECK: ld1.h { v0 }[1], [x0], #2
6019 %tmp1 = load i16, i16* %bar
6020 %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
6021 %tmp3 = getelementptr i16, i16* %bar, i64 1
6022 store i16* %tmp3, i16** %ptr
6026 define <8 x i16> @test_v8i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <8 x i16> %A) {
6027 ; CHECK-LABEL: test_v8i16_post_reg_ld1lane:
6028 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
6029 %tmp1 = load i16, i16* %bar
6030 %tmp2 = insertelement <8 x i16> %A, i16 %tmp1, i32 1
6031 %tmp3 = getelementptr i16, i16* %bar, i64 %inc
6032 store i16* %tmp3, i16** %ptr
6036 define <4 x i16> @test_v4i16_post_imm_ld1lane(i16* %bar, i16** %ptr, <4 x i16> %A) {
6037 ; CHECK-LABEL: test_v4i16_post_imm_ld1lane:
6038 ; CHECK: ld1.h { v0 }[1], [x0], #2
6039 %tmp1 = load i16, i16* %bar
6040 %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
6041 %tmp3 = getelementptr i16, i16* %bar, i64 1
6042 store i16* %tmp3, i16** %ptr
6046 define <4 x i16> @test_v4i16_post_reg_ld1lane(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A) {
6047 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane:
6048 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
6049 %tmp1 = load i16, i16* %bar
6050 %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
6051 %tmp3 = getelementptr i16, i16* %bar, i64 %inc
6052 store i16* %tmp3, i16** %ptr
6056 define <4 x i32> @test_v4i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <4 x i32> %A) {
6057 ; CHECK-LABEL: test_v4i32_post_imm_ld1lane:
6058 ; CHECK: ld1.s { v0 }[1], [x0], #4
6059 %tmp1 = load i32, i32* %bar
6060 %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
6061 %tmp3 = getelementptr i32, i32* %bar, i64 1
6062 store i32* %tmp3, i32** %ptr
6066 define <4 x i32> @test_v4i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <4 x i32> %A) {
6067 ; CHECK-LABEL: test_v4i32_post_reg_ld1lane:
6068 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6069 %tmp1 = load i32, i32* %bar
6070 %tmp2 = insertelement <4 x i32> %A, i32 %tmp1, i32 1
6071 %tmp3 = getelementptr i32, i32* %bar, i64 %inc
6072 store i32* %tmp3, i32** %ptr
6076 define <2 x i32> @test_v2i32_post_imm_ld1lane(i32* %bar, i32** %ptr, <2 x i32> %A) {
6077 ; CHECK-LABEL: test_v2i32_post_imm_ld1lane:
6078 ; CHECK: ld1.s { v0 }[1], [x0], #4
6079 %tmp1 = load i32, i32* %bar
6080 %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
6081 %tmp3 = getelementptr i32, i32* %bar, i64 1
6082 store i32* %tmp3, i32** %ptr
6086 define <2 x i32> @test_v2i32_post_reg_ld1lane(i32* %bar, i32** %ptr, i64 %inc, <2 x i32> %A) {
6087 ; CHECK-LABEL: test_v2i32_post_reg_ld1lane:
6088 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6089 %tmp1 = load i32, i32* %bar
6090 %tmp2 = insertelement <2 x i32> %A, i32 %tmp1, i32 1
6091 %tmp3 = getelementptr i32, i32* %bar, i64 %inc
6092 store i32* %tmp3, i32** %ptr
6096 define <2 x i64> @test_v2i64_post_imm_ld1lane(i64* %bar, i64** %ptr, <2 x i64> %A) {
6097 ; CHECK-LABEL: test_v2i64_post_imm_ld1lane:
6098 ; CHECK: ld1.d { v0 }[1], [x0], #8
6099 %tmp1 = load i64, i64* %bar
6100 %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
6101 %tmp3 = getelementptr i64, i64* %bar, i64 1
6102 store i64* %tmp3, i64** %ptr
6106 define <2 x i64> @test_v2i64_post_reg_ld1lane(i64* %bar, i64** %ptr, i64 %inc, <2 x i64> %A) {
6107 ; CHECK-LABEL: test_v2i64_post_reg_ld1lane:
6108 ; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
6109 %tmp1 = load i64, i64* %bar
6110 %tmp2 = insertelement <2 x i64> %A, i64 %tmp1, i32 1
6111 %tmp3 = getelementptr i64, i64* %bar, i64 %inc
6112 store i64* %tmp3, i64** %ptr
6116 define <4 x float> @test_v4f32_post_imm_ld1lane(float* %bar, float** %ptr, <4 x float> %A) {
6117 ; CHECK-LABEL: test_v4f32_post_imm_ld1lane:
6118 ; CHECK: ld1.s { v0 }[1], [x0], #4
6119 %tmp1 = load float, float* %bar
6120 %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
6121 %tmp3 = getelementptr float, float* %bar, i64 1
6122 store float* %tmp3, float** %ptr
6123 ret <4 x float> %tmp2
6126 define <4 x float> @test_v4f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <4 x float> %A) {
6127 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane:
6128 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6129 %tmp1 = load float, float* %bar
6130 %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
6131 %tmp3 = getelementptr float, float* %bar, i64 %inc
6132 store float* %tmp3, float** %ptr
6133 ret <4 x float> %tmp2
6136 define <2 x float> @test_v2f32_post_imm_ld1lane(float* %bar, float** %ptr, <2 x float> %A) {
6137 ; CHECK-LABEL: test_v2f32_post_imm_ld1lane:
6138 ; CHECK: ld1.s { v0 }[1], [x0], #4
6139 %tmp1 = load float, float* %bar
6140 %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
6141 %tmp3 = getelementptr float, float* %bar, i64 1
6142 store float* %tmp3, float** %ptr
6143 ret <2 x float> %tmp2
6146 define <2 x float> @test_v2f32_post_reg_ld1lane(float* %bar, float** %ptr, i64 %inc, <2 x float> %A) {
6147 ; CHECK-LABEL: test_v2f32_post_reg_ld1lane:
6148 ; CHECK: ld1.s { v0 }[1], [x0], x{{[0-9]+}}
6149 %tmp1 = load float, float* %bar
6150 %tmp2 = insertelement <2 x float> %A, float %tmp1, i32 1
6151 %tmp3 = getelementptr float, float* %bar, i64 %inc
6152 store float* %tmp3, float** %ptr
6153 ret <2 x float> %tmp2
6156 define <2 x double> @test_v2f64_post_imm_ld1lane(double* %bar, double** %ptr, <2 x double> %A) {
6157 ; CHECK-LABEL: test_v2f64_post_imm_ld1lane:
6158 ; CHECK: ld1.d { v0 }[1], [x0], #8
6159 %tmp1 = load double, double* %bar
6160 %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
6161 %tmp3 = getelementptr double, double* %bar, i64 1
6162 store double* %tmp3, double** %ptr
6163 ret <2 x double> %tmp2
6166 define <2 x double> @test_v2f64_post_reg_ld1lane(double* %bar, double** %ptr, i64 %inc, <2 x double> %A) {
6167 ; CHECK-LABEL: test_v2f64_post_reg_ld1lane:
6168 ; CHECK: ld1.d { v0 }[1], [x0], x{{[0-9]+}}
6169 %tmp1 = load double, double* %bar
6170 %tmp2 = insertelement <2 x double> %A, double %tmp1, i32 1
6171 %tmp3 = getelementptr double, double* %bar, i64 %inc
6172 store double* %tmp3, double** %ptr
6173 ret <2 x double> %tmp2
6176 ; Check for dependencies between the vector and the scalar load.
6177 define <4 x float> @test_v4f32_post_reg_ld1lane_dep_vec_on_load(float* %bar, float** %ptr, i64 %inc, <4 x float>* %dep_ptr_1, <4 x float>* %dep_ptr_2, <4 x float> %vec) {
6178 ; CHECK-LABEL: test_v4f32_post_reg_ld1lane_dep_vec_on_load:
6180 ; CHECK-NEXT: ldr s[[LD:[0-9]+]], [x0]
6181 ; CHECK-NEXT: str q0, [x3]
6182 ; CHECK-NEXT: ldr q0, [x4]
6183 ; CHECK-NEXT: mov.s v0[1], v[[LD]][0]
6184 ; CHECK-NEXT: add [[POST:x[0-9]]], x0, x2, lsl #2
6185 ; CHECK-NEXT: str [[POST]], [x1]
6187 %tmp1 = load float, float* %bar
6188 store <4 x float> %vec, <4 x float>* %dep_ptr_1, align 16
6189 %A = load <4 x float>, <4 x float>* %dep_ptr_2, align 16
6190 %tmp2 = insertelement <4 x float> %A, float %tmp1, i32 1
6191 %tmp3 = getelementptr float, float* %bar, i64 %inc
6192 store float* %tmp3, float** %ptr
6193 ret <4 x float> %tmp2
6196 ; Make sure that we test the narrow V64 code path.
6197 ; The tests above don't, because there, 64-bit insert_vector_elt nodes will be
6198 ; widened to 128-bit before the LD1LANEpost combine has the chance to run,
6199 ; making it avoid narrow vector types.
6200 ; One way to trick that combine into running early is to force the vector ops
6201 ; legalizer to run. We achieve that using the ctpop.
6203 define <4 x i16> @test_v4i16_post_reg_ld1lane_forced_narrow(i16* %bar, i16** %ptr, i64 %inc, <4 x i16> %A, <2 x i32>* %d) {
6204 ; CHECK-LABEL: test_v4i16_post_reg_ld1lane_forced_narrow:
6205 ; CHECK: ld1.h { v0 }[1], [x0], x{{[0-9]+}}
6206 %tmp1 = load i16, i16* %bar
6207 %tmp2 = insertelement <4 x i16> %A, i16 %tmp1, i32 1
6208 %tmp3 = getelementptr i16, i16* %bar, i64 %inc
6209 store i16* %tmp3, i16** %ptr
6210 %dl = load <2 x i32>, <2 x i32>* %d
6211 %dr = call <2 x i32> @llvm.ctpop.v2i32(<2 x i32> %dl)
6212 store <2 x i32> %dr, <2 x i32>* %d
6216 declare <2 x i32> @llvm.ctpop.v2i32(<2 x i32>)
6218 ; CHECK-LABEL: test_ld1lane_build:
6219 ; CHECK-DAG: ldr s[[REGNUM0:[0-9]+]], [x0]
6220 ; CHECK-DAG: ld1.s { v[[REGNUM0:[0-9]+]] }[1], [x1]
6221 ; CHECK-DAG: ldr s[[REGNUM1:[0-9]+]], [x2]
6222 ; CHECK-DAG: ld1.s { v[[REGNUM1:[0-9]+]] }[1], [x3]
6223 ; CHECK: sub.2s v[[REGNUM2:[0-9]+]], v[[REGNUM0]], v[[REGNUM1]]
6224 ; CHECK-NEXT: str d[[REGNUM2]], [x4]
6226 define void @test_ld1lane_build(i32* %ptr0, i32* %ptr1, i32* %ptr2, i32* %ptr3, <2 x i32>* %out) {
6227 %load0 = load i32, i32* %ptr0, align 4
6228 %load1 = load i32, i32* %ptr1, align 4
6229 %vec0_0 = insertelement <2 x i32> undef, i32 %load0, i32 0
6230 %vec0_1 = insertelement <2 x i32> %vec0_0, i32 %load1, i32 1
6232 %load2 = load i32, i32* %ptr2, align 4
6233 %load3 = load i32, i32* %ptr3, align 4
6234 %vec1_0 = insertelement <2 x i32> undef, i32 %load2, i32 0
6235 %vec1_1 = insertelement <2 x i32> %vec1_0, i32 %load3, i32 1
6237 %sub = sub nsw <2 x i32> %vec0_1, %vec1_1
6238 store <2 x i32> %sub, <2 x i32>* %out, align 16
6242 ; CHECK-LABEL: test_ld1lane_build_i16:
6243 ; CHECK-DAG: ldr h[[REGNUM1:[0-9]+]], [x0]
6244 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[1], [x1]
6245 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[2], [x2]
6246 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[3], [x3]
6247 ; CHECK: sub.4h v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0
6248 ; CHECK-NEXT: str d[[REGNUM2]], [x4]
6250 define void @test_ld1lane_build_i16(i16* %a, i16* %b, i16* %c, i16* %d, <4 x i16> %e, <4 x i16>* %p) {
6251 %ld.a = load i16, i16* %a
6252 %ld.b = load i16, i16* %b
6253 %ld.c = load i16, i16* %c
6254 %ld.d = load i16, i16* %d
6255 %v.a = insertelement <4 x i16> undef, i16 %ld.a, i64 0
6256 %v.b = insertelement <4 x i16> %v.a, i16 %ld.b, i64 1
6257 %v.c = insertelement <4 x i16> %v.b, i16 %ld.c, i64 2
6258 %v = insertelement <4 x i16> %v.c, i16 %ld.d, i64 3
6259 %sub = sub nsw <4 x i16> %v, %e
6260 store <4 x i16> %sub, <4 x i16>* %p
6264 ; CHECK-LABEL: test_ld1lane_build_half:
6265 ; CHECK-DAG: ldr h[[REGNUM1:[0-9]+]], [x0]
6266 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[1], [x1]
6267 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[2], [x2]
6268 ; CHECK-DAG: ld1.h { v[[REGNUM1]] }[3], [x3]
6269 ; CHECK-DAG: fcvtl v[[REGNUM01:[0-9]+]].4s, v0.4h
6270 ; CHECK-DAG: fcvtl v[[REGNUM11:[0-9]+]].4s, v[[REGNUM1]].4h
6271 ; CHECK: fsub.4s v[[REGNUM2:[0-9]+]], v[[REGNUM11]], v[[REGNUM01]]
6272 ; CHECK-DAG: fcvtn v[[REGNUM3:[0-9]+]].4h, v[[REGNUM2]].4s
6273 ; CHECK-NEXT: str d[[REGNUM2]], [x4]
6275 define void @test_ld1lane_build_half(half* %a, half* %b, half* %c, half* %d, <4 x half> %e, <4 x half>* %p) {
6276 %ld.a = load half, half* %a
6277 %ld.b = load half, half* %b
6278 %ld.c = load half, half* %c
6279 %ld.d = load half, half* %d
6280 %v.a = insertelement <4 x half> undef, half %ld.a, i64 0
6281 %v.b = insertelement <4 x half> %v.a, half %ld.b, i64 1
6282 %v.c = insertelement <4 x half> %v.b, half %ld.c, i64 2
6283 %v = insertelement <4 x half> %v.c, half %ld.d, i64 3
6284 %sub = fsub <4 x half> %v, %e
6285 store <4 x half> %sub, <4 x half>* %p
6289 ; CHECK-LABEL: test_ld1lane_build_i8:
6290 ; CHECK-DAG: ldr b[[REGNUM1:[0-9]+]], [x0]
6291 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[1], [x1]
6292 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[2], [x2]
6293 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[3], [x3]
6294 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[4], [x4]
6295 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[5], [x5]
6296 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[6], [x6]
6297 ; CHECK-DAG: ld1.b { v[[REGNUM1]] }[7], [x7]
6298 ; CHECK: sub.8b v[[REGNUM2:[0-9]+]], v[[REGNUM1]], v0
6299 ; CHECK-NEXT: str d[[REGNUM2]], [x
6301 define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8* %f, i8* %g, i8* %h, <8 x i8> %v, <8 x i8>* %p) {
6302 %ld.a = load i8, i8* %a
6303 %ld.b = load i8, i8* %b
6304 %ld.c = load i8, i8* %c
6305 %ld.d = load i8, i8* %d
6306 %ld.e = load i8, i8* %e
6307 %ld.f = load i8, i8* %f
6308 %ld.g = load i8, i8* %g
6309 %ld.h = load i8, i8* %h
6310 %v.a = insertelement <8 x i8> undef, i8 %ld.a, i64 0
6311 %v.b = insertelement <8 x i8> %v.a, i8 %ld.b, i64 1
6312 %v.c = insertelement <8 x i8> %v.b, i8 %ld.c, i64 2
6313 %v.d = insertelement <8 x i8> %v.c, i8 %ld.d, i64 3
6314 %v.e = insertelement <8 x i8> %v.d, i8 %ld.e, i64 4
6315 %v.f = insertelement <8 x i8> %v.e, i8 %ld.f, i64 5
6316 %v.g = insertelement <8 x i8> %v.f, i8 %ld.g, i64 6
6317 %v1 = insertelement <8 x i8> %v.g, i8 %ld.h, i64 7
6318 %sub = sub nsw <8 x i8> %v1, %v
6319 store <8 x i8> %sub, <8 x i8>* %p