1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2 ; RUN: llc < %s -global-isel -global-isel-abort=1 -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
3 ; The instruction latencies of Exynos-M3 trigger the transform we see under the Exynos check.
4 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m3 | FileCheck --check-prefix=EXYNOS %s
6 define void @st1lane_16b(<16 x i8> %A, ptr %D) {
7 ; CHECK-LABEL: st1lane_16b
8 ; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
9 %ptr = getelementptr i8, ptr %D, i64 1
10 %tmp = extractelement <16 x i8> %A, i32 1
11 store i8 %tmp, ptr %ptr
15 define void @st1lane0_16b(<16 x i8> %A, ptr %D) {
16 ; CHECK-LABEL: st1lane0_16b
17 ; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
18 %ptr = getelementptr i8, ptr %D, i64 1
19 %tmp = extractelement <16 x i8> %A, i32 0
20 store i8 %tmp, ptr %ptr
24 define void @st1lane0u_16b(<16 x i8> %A, ptr %D) {
25 ; CHECK-LABEL: st1lane0u_16b
26 ; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
27 %ptr = getelementptr i8, ptr %D, i64 -1
28 %tmp = extractelement <16 x i8> %A, i32 0
29 store i8 %tmp, ptr %ptr
33 define void @st1lane_ro_16b(<16 x i8> %A, ptr %D, i64 %offset) {
34 ; CHECK-LABEL: st1lane_ro_16b
35 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
36 ; CHECK: st1.b { v0 }[1], [x[[XREG]]]
37 %ptr = getelementptr i8, ptr %D, i64 %offset
38 %tmp = extractelement <16 x i8> %A, i32 1
39 store i8 %tmp, ptr %ptr
43 define void @st1lane0_ro_16b(<16 x i8> %A, ptr %D, i64 %offset) {
44 ; CHECK-LABEL: st1lane0_ro_16b
45 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
46 ; CHECK: st1.b { v0 }[0], [x[[XREG]]]
47 %ptr = getelementptr i8, ptr %D, i64 %offset
48 %tmp = extractelement <16 x i8> %A, i32 0
49 store i8 %tmp, ptr %ptr
53 define void @st1lane_8h(<8 x i16> %A, ptr %D) {
54 ; CHECK-LABEL: st1lane_8h
55 ; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
56 %ptr = getelementptr i16, ptr %D, i64 1
57 %tmp = extractelement <8 x i16> %A, i32 1
58 store i16 %tmp, ptr %ptr
62 define void @st1lane0_8h(<8 x i16> %A, ptr %D) {
63 ; CHECK-LABEL: st1lane0_8h
64 ; CHECK: str h0, [x0, #2]
65 %ptr = getelementptr i16, ptr %D, i64 1
66 %tmp = extractelement <8 x i16> %A, i32 0
67 store i16 %tmp, ptr %ptr
71 define void @st1lane0u_8h(<8 x i16> %A, ptr %D) {
72 ; CHECK-LABEL: st1lane0u_8h
73 ; CHECK: stur h0, [x0, #-2]
74 %ptr = getelementptr i16, ptr %D, i64 -1
75 %tmp = extractelement <8 x i16> %A, i32 0
76 store i16 %tmp, ptr %ptr
80 define void @st1lane_ro_8h(<8 x i16> %A, ptr %D, i64 %offset) {
81 ; CHECK-LABEL: st1lane_ro_8h
82 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
83 ; CHECK: st1.h { v0 }[1], [x[[XREG]]]
84 %ptr = getelementptr i16, ptr %D, i64 %offset
85 %tmp = extractelement <8 x i16> %A, i32 1
86 store i16 %tmp, ptr %ptr
90 define void @st1lane0_ro_8h(<8 x i16> %A, ptr %D, i64 %offset) {
91 ; CHECK-LABEL: st1lane0_ro_8h
92 ; CHECK: str h0, [x0, x1, lsl #1]
93 %ptr = getelementptr i16, ptr %D, i64 %offset
94 %tmp = extractelement <8 x i16> %A, i32 0
95 store i16 %tmp, ptr %ptr
99 define void @st1lane_4s(<4 x i32> %A, ptr %D) {
100 ; CHECK-LABEL: st1lane_4s
101 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
102 %ptr = getelementptr i32, ptr %D, i64 1
103 %tmp = extractelement <4 x i32> %A, i32 1
104 store i32 %tmp, ptr %ptr
108 define void @st1lane0_4s(<4 x i32> %A, ptr %D) {
109 ; CHECK-LABEL: st1lane0_4s
110 ; CHECK: str s0, [x0, #4]
111 %ptr = getelementptr i32, ptr %D, i64 1
112 %tmp = extractelement <4 x i32> %A, i32 0
113 store i32 %tmp, ptr %ptr
117 define void @st1lane0u_4s(<4 x i32> %A, ptr %D) {
118 ; CHECK-LABEL: st1lane0u_4s
119 ; CHECK: stur s0, [x0, #-4]
120 %ptr = getelementptr i32, ptr %D, i64 -1
121 %tmp = extractelement <4 x i32> %A, i32 0
122 store i32 %tmp, ptr %ptr
126 define void @st1lane_ro_4s(<4 x i32> %A, ptr %D, i64 %offset) {
127 ; CHECK-LABEL: st1lane_ro_4s
128 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
129 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
130 %ptr = getelementptr i32, ptr %D, i64 %offset
131 %tmp = extractelement <4 x i32> %A, i32 1
132 store i32 %tmp, ptr %ptr
136 define void @st1lane0_ro_4s(<4 x i32> %A, ptr %D, i64 %offset) {
137 ; CHECK-LABEL: st1lane0_ro_4s
138 ; CHECK: str s0, [x0, x1, lsl #2]
139 %ptr = getelementptr i32, ptr %D, i64 %offset
140 %tmp = extractelement <4 x i32> %A, i32 0
141 store i32 %tmp, ptr %ptr
145 define void @st1lane_4s_float(<4 x float> %A, ptr %D) {
146 ; CHECK-LABEL: st1lane_4s_float
147 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
148 %ptr = getelementptr float, ptr %D, i64 1
149 %tmp = extractelement <4 x float> %A, i32 1
150 store float %tmp, ptr %ptr
154 define void @st1lane0_4s_float(<4 x float> %A, ptr %D) {
155 ; CHECK-LABEL: st1lane0_4s_float
156 ; CHECK: str s0, [x0, #4]
157 %ptr = getelementptr float, ptr %D, i64 1
158 %tmp = extractelement <4 x float> %A, i32 0
159 store float %tmp, ptr %ptr
163 define void @st1lane0u_4s_float(<4 x float> %A, ptr %D) {
164 ; CHECK-LABEL: st1lane0u_4s_float
165 ; CHECK: stur s0, [x0, #-4]
166 %ptr = getelementptr float, ptr %D, i64 -1
167 %tmp = extractelement <4 x float> %A, i32 0
168 store float %tmp, ptr %ptr
172 define void @st1lane_ro_4s_float(<4 x float> %A, ptr %D, i64 %offset) {
173 ; CHECK-LABEL: st1lane_ro_4s_float
174 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
175 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
176 %ptr = getelementptr float, ptr %D, i64 %offset
177 %tmp = extractelement <4 x float> %A, i32 1
178 store float %tmp, ptr %ptr
182 define void @st1lane0_ro_4s_float(<4 x float> %A, ptr %D, i64 %offset) {
183 ; CHECK-LABEL: st1lane0_ro_4s_float
184 ; CHECK: str s0, [x0, x1, lsl #2]
185 %ptr = getelementptr float, ptr %D, i64 %offset
186 %tmp = extractelement <4 x float> %A, i32 0
187 store float %tmp, ptr %ptr
191 define void @st1lane_2d(<2 x i64> %A, ptr %D) {
192 ; CHECK-LABEL: st1lane_2d
193 ; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
194 %ptr = getelementptr i64, ptr %D, i64 1
195 %tmp = extractelement <2 x i64> %A, i32 1
196 store i64 %tmp, ptr %ptr
200 define void @st1lane0_2d(<2 x i64> %A, ptr %D) {
201 ; CHECK-LABEL: st1lane0_2d
202 ; CHECK: str d0, [x0, #8]
203 %ptr = getelementptr i64, ptr %D, i64 1
204 %tmp = extractelement <2 x i64> %A, i32 0
205 store i64 %tmp, ptr %ptr
209 define void @st1lane0u_2d(<2 x i64> %A, ptr %D) {
210 ; CHECK-LABEL: st1lane0u_2d
211 ; CHECK: stur d0, [x0, #-8]
212 %ptr = getelementptr i64, ptr %D, i64 -1
213 %tmp = extractelement <2 x i64> %A, i32 0
214 store i64 %tmp, ptr %ptr
218 define void @st1lane_ro_2d(<2 x i64> %A, ptr %D, i64 %offset) {
219 ; CHECK-LABEL: st1lane_ro_2d
220 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
221 ; CHECK: st1.d { v0 }[1], [x[[XREG]]]
222 %ptr = getelementptr i64, ptr %D, i64 %offset
223 %tmp = extractelement <2 x i64> %A, i32 1
224 store i64 %tmp, ptr %ptr
228 define void @st1lane0_ro_2d(<2 x i64> %A, ptr %D, i64 %offset) {
229 ; CHECK-LABEL: st1lane0_ro_2d
230 ; CHECK: str d0, [x0, x1, lsl #3]
231 %ptr = getelementptr i64, ptr %D, i64 %offset
232 %tmp = extractelement <2 x i64> %A, i32 0
233 store i64 %tmp, ptr %ptr
237 define void @st1lane_2d_double(<2 x double> %A, ptr %D) {
238 ; CHECK-LABEL: st1lane_2d_double
239 ; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
240 %ptr = getelementptr double, ptr %D, i64 1
241 %tmp = extractelement <2 x double> %A, i32 1
242 store double %tmp, ptr %ptr
246 define void @st1lane0_2d_double(<2 x double> %A, ptr %D) {
247 ; CHECK-LABEL: st1lane0_2d_double
248 ; CHECK: str d0, [x0, #8]
249 %ptr = getelementptr double, ptr %D, i64 1
250 %tmp = extractelement <2 x double> %A, i32 0
251 store double %tmp, ptr %ptr
255 define void @st1lane0u_2d_double(<2 x double> %A, ptr %D) {
256 ; CHECK-LABEL: st1lane0u_2d_double
257 ; CHECK: stur d0, [x0, #-8]
258 %ptr = getelementptr double, ptr %D, i64 -1
259 %tmp = extractelement <2 x double> %A, i32 0
260 store double %tmp, ptr %ptr
264 define void @st1lane_ro_2d_double(<2 x double> %A, ptr %D, i64 %offset) {
265 ; CHECK-LABEL: st1lane_ro_2d_double
266 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
267 ; CHECK: st1.d { v0 }[1], [x[[XREG]]]
268 %ptr = getelementptr double, ptr %D, i64 %offset
269 %tmp = extractelement <2 x double> %A, i32 1
270 store double %tmp, ptr %ptr
274 define void @st1lane0_ro_2d_double(<2 x double> %A, ptr %D, i64 %offset) {
275 ; CHECK-LABEL: st1lane0_ro_2d_double
276 ; CHECK: str d0, [x0, x1, lsl #3]
277 %ptr = getelementptr double, ptr %D, i64 %offset
278 %tmp = extractelement <2 x double> %A, i32 0
279 store double %tmp, ptr %ptr
283 define void @st1lane_8b(<8 x i8> %A, ptr %D) {
284 ; CHECK-LABEL: st1lane_8b
285 ; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
286 %ptr = getelementptr i8, ptr %D, i64 1
287 %tmp = extractelement <8 x i8> %A, i32 1
288 store i8 %tmp, ptr %ptr
292 define void @st1lane_ro_8b(<8 x i8> %A, ptr %D, i64 %offset) {
293 ; CHECK-LABEL: st1lane_ro_8b
294 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
295 ; CHECK: st1.b { v0 }[1], [x[[XREG]]]
296 %ptr = getelementptr i8, ptr %D, i64 %offset
297 %tmp = extractelement <8 x i8> %A, i32 1
298 store i8 %tmp, ptr %ptr
302 define void @st1lane0_ro_8b(<8 x i8> %A, ptr %D, i64 %offset) {
303 ; CHECK-LABEL: st1lane0_ro_8b
304 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
305 ; CHECK: st1.b { v0 }[0], [x[[XREG]]]
306 %ptr = getelementptr i8, ptr %D, i64 %offset
307 %tmp = extractelement <8 x i8> %A, i32 0
308 store i8 %tmp, ptr %ptr
312 define void @st1lane_4h(<4 x i16> %A, ptr %D) {
313 ; CHECK-LABEL: st1lane_4h
314 ; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
315 %ptr = getelementptr i16, ptr %D, i64 1
316 %tmp = extractelement <4 x i16> %A, i32 1
317 store i16 %tmp, ptr %ptr
321 define void @st1lane0_4h(<4 x i16> %A, ptr %D) {
322 ; CHECK-LABEL: st1lane0_4h
323 ; CHECK: str h0, [x0, #2]
324 %ptr = getelementptr i16, ptr %D, i64 1
325 %tmp = extractelement <4 x i16> %A, i32 0
326 store i16 %tmp, ptr %ptr
330 define void @st1lane0u_4h(<4 x i16> %A, ptr %D) {
331 ; CHECK-LABEL: st1lane0u_4h
332 ; CHECK: stur h0, [x0, #-2]
333 %ptr = getelementptr i16, ptr %D, i64 -1
334 %tmp = extractelement <4 x i16> %A, i32 0
335 store i16 %tmp, ptr %ptr
339 define void @st1lane_ro_4h(<4 x i16> %A, ptr %D, i64 %offset) {
340 ; CHECK-LABEL: st1lane_ro_4h
341 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
342 ; CHECK: st1.h { v0 }[1], [x[[XREG]]]
343 %ptr = getelementptr i16, ptr %D, i64 %offset
344 %tmp = extractelement <4 x i16> %A, i32 1
345 store i16 %tmp, ptr %ptr
349 define void @st1lane0_ro_4h(<4 x i16> %A, ptr %D, i64 %offset) {
350 ; CHECK-LABEL: st1lane0_ro_4h
351 ; CHECK: str h0, [x0, x1, lsl #1]
352 %ptr = getelementptr i16, ptr %D, i64 %offset
353 %tmp = extractelement <4 x i16> %A, i32 0
354 store i16 %tmp, ptr %ptr
358 define void @st1lane_2s(<2 x i32> %A, ptr %D) {
359 ; CHECK-LABEL: st1lane_2s
360 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
361 %ptr = getelementptr i32, ptr %D, i64 1
362 %tmp = extractelement <2 x i32> %A, i32 1
363 store i32 %tmp, ptr %ptr
367 define void @st1lane0_2s(<2 x i32> %A, ptr %D) {
368 ; CHECK-LABEL: st1lane0_2s
369 ; CHECK: str s0, [x0, #4]
370 %ptr = getelementptr i32, ptr %D, i64 1
371 %tmp = extractelement <2 x i32> %A, i32 0
372 store i32 %tmp, ptr %ptr
376 define void @st1lane0u_2s(<2 x i32> %A, ptr %D) {
377 ; CHECK-LABEL: st1lane0u_2s
378 ; CHECK: stur s0, [x0, #-4]
379 %ptr = getelementptr i32, ptr %D, i64 -1
380 %tmp = extractelement <2 x i32> %A, i32 0
381 store i32 %tmp, ptr %ptr
385 define void @st1lane_ro_2s(<2 x i32> %A, ptr %D, i64 %offset) {
386 ; CHECK-LABEL: st1lane_ro_2s
387 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
388 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
389 %ptr = getelementptr i32, ptr %D, i64 %offset
390 %tmp = extractelement <2 x i32> %A, i32 1
391 store i32 %tmp, ptr %ptr
395 define void @st1lane0_ro_2s(<2 x i32> %A, ptr %D, i64 %offset) {
396 ; CHECK-LABEL: st1lane0_ro_2s
397 ; CHECK: str s0, [x0, x1, lsl #2]
398 %ptr = getelementptr i32, ptr %D, i64 %offset
399 %tmp = extractelement <2 x i32> %A, i32 0
400 store i32 %tmp, ptr %ptr
404 define void @st1lane_2s_float(<2 x float> %A, ptr %D) {
405 ; CHECK-LABEL: st1lane_2s_float
406 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
407 %ptr = getelementptr float, ptr %D, i64 1
408 %tmp = extractelement <2 x float> %A, i32 1
409 store float %tmp, ptr %ptr
413 define void @st1lane0_2s_float(<2 x float> %A, ptr %D) {
414 ; CHECK-LABEL: st1lane0_2s_float
415 ; CHECK: str s0, [x0, #4]
416 %ptr = getelementptr float, ptr %D, i64 1
417 %tmp = extractelement <2 x float> %A, i32 0
418 store float %tmp, ptr %ptr
422 define void @st1lane0u_2s_float(<2 x float> %A, ptr %D) {
423 ; CHECK-LABEL: st1lane0u_2s_float
424 ; CHECK: stur s0, [x0, #-4]
425 %ptr = getelementptr float, ptr %D, i64 -1
426 %tmp = extractelement <2 x float> %A, i32 0
427 store float %tmp, ptr %ptr
431 define void @st1lane_ro_2s_float(<2 x float> %A, ptr %D, i64 %offset) {
432 ; CHECK-LABEL: st1lane_ro_2s_float
433 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
434 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
435 %ptr = getelementptr float, ptr %D, i64 %offset
436 %tmp = extractelement <2 x float> %A, i32 1
437 store float %tmp, ptr %ptr
441 define void @st1lane0_ro_2s_float(<2 x float> %A, ptr %D, i64 %offset) {
442 ; CHECK-LABEL: st1lane0_ro_2s_float
443 ; CHECK: str s0, [x0, x1, lsl #2]
444 %ptr = getelementptr float, ptr %D, i64 %offset
445 %tmp = extractelement <2 x float> %A, i32 0
446 store float %tmp, ptr %ptr
450 define void @st1lane0_1d(<1 x i64> %A, ptr %D) {
451 ; CHECK-LABEL: st1lane0_1d
452 ; CHECK: str d0, [x0, #8]
453 %ptr = getelementptr i64, ptr %D, i64 1
454 %tmp = extractelement <1 x i64> %A, i32 0
455 store i64 %tmp, ptr %ptr
459 define void @st1lane0u_1d(<1 x i64> %A, ptr %D) {
460 ; CHECK-LABEL: st1lane0u_1d
461 ; CHECK: stur d0, [x0, #-8]
462 %ptr = getelementptr i64, ptr %D, i64 -1
463 %tmp = extractelement <1 x i64> %A, i32 0
464 store i64 %tmp, ptr %ptr
468 define void @st1lane0_ro_1d(<1 x i64> %A, ptr %D, i64 %offset) {
469 ; CHECK-LABEL: st1lane0_ro_1d
470 ; CHECK: str d0, [x0, x1, lsl #3]
471 %ptr = getelementptr i64, ptr %D, i64 %offset
472 %tmp = extractelement <1 x i64> %A, i32 0
473 store i64 %tmp, ptr %ptr
477 define void @st1lane0_1d_double(<1 x double> %A, ptr %D) {
478 ; CHECK-LABEL: st1lane0_1d_double
479 ; CHECK: str d0, [x0, #8]
480 %ptr = getelementptr double, ptr %D, i64 1
481 %tmp = extractelement <1 x double> %A, i32 0
482 store double %tmp, ptr %ptr
486 define void @st1lane0u_1d_double(<1 x double> %A, ptr %D) {
487 ; CHECK-LABEL: st1lane0u_1d_double
488 ; CHECK: stur d0, [x0, #-8]
489 %ptr = getelementptr double, ptr %D, i64 -1
490 %tmp = extractelement <1 x double> %A, i32 0
491 store double %tmp, ptr %ptr
495 define void @st1lane0_ro_1d_double(<1 x double> %A, ptr %D, i64 %offset) {
496 ; CHECK-LABEL: st1lane0_ro_1d_double
497 ; CHECK: str d0, [x0, x1, lsl #3]
498 %ptr = getelementptr double, ptr %D, i64 %offset
499 %tmp = extractelement <1 x double> %A, i32 0
500 store double %tmp, ptr %ptr
504 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, ptr %D) {
505 ; CHECK-LABEL: st2lane_16b
507 call void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, i64 1, ptr %D)
511 define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, ptr %D) {
512 ; CHECK-LABEL: st2lane_8h
514 call void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, i64 1, ptr %D)
518 define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, ptr %D) {
519 ; CHECK-LABEL: st2lane_4s
521 call void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, i64 1, ptr %D)
525 define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, ptr %D) {
526 ; CHECK-LABEL: st2lane_2d
528 call void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, i64 1, ptr %D)
532 declare void @llvm.aarch64.neon.st2lane.v16i8.p0(<16 x i8>, <16 x i8>, i64, ptr) nounwind readnone
533 declare void @llvm.aarch64.neon.st2lane.v8i16.p0(<8 x i16>, <8 x i16>, i64, ptr) nounwind readnone
534 declare void @llvm.aarch64.neon.st2lane.v4i32.p0(<4 x i32>, <4 x i32>, i64, ptr) nounwind readnone
535 declare void @llvm.aarch64.neon.st2lane.v2i64.p0(<2 x i64>, <2 x i64>, i64, ptr) nounwind readnone
537 define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %D) {
538 ; CHECK-LABEL: st3lane_16b
540 call void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, ptr %D)
544 define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %D) {
545 ; CHECK-LABEL: st3lane_8h
547 call void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, ptr %D)
551 define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %D) {
552 ; CHECK-LABEL: st3lane_4s
554 call void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, ptr %D)
558 define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %D) {
559 ; CHECK-LABEL: st3lane_2d
561 call void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, ptr %D)
565 declare void @llvm.aarch64.neon.st3lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readnone
566 declare void @llvm.aarch64.neon.st3lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readnone
567 declare void @llvm.aarch64.neon.st3lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readnone
568 declare void @llvm.aarch64.neon.st3lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readnone
570 define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %E) {
571 ; CHECK-LABEL: st4lane_16b
573 call void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, ptr %E)
577 define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %E) {
578 ; CHECK-LABEL: st4lane_8h
580 call void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, ptr %E)
584 define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %E) {
585 ; CHECK-LABEL: st4lane_4s
587 call void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, ptr %E)
591 define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %E) {
592 ; CHECK-LABEL: st4lane_2d
594 call void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, ptr %E)
598 declare void @llvm.aarch64.neon.st4lane.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, ptr) nounwind readnone
599 declare void @llvm.aarch64.neon.st4lane.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, ptr) nounwind readnone
600 declare void @llvm.aarch64.neon.st4lane.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, ptr) nounwind readnone
601 declare void @llvm.aarch64.neon.st4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readnone
604 define void @st2_8b(<8 x i8> %A, <8 x i8> %B, ptr %P) nounwind {
605 ; CHECK-LABEL: st2_8b
607 ; EXYNOS-LABEL: st2_8b
611 call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %A, <8 x i8> %B, ptr %P)
615 define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P) nounwind {
616 ; CHECK-LABEL: st3_8b
618 call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %P)
622 define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P) nounwind {
623 ; CHECK-LABEL: st4_8b
625 ; EXYNOS-LABEL: st4_8b
636 call void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %P)
640 declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) nounwind readonly
641 declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly
642 declare void @llvm.aarch64.neon.st4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly
644 define void @st2_16b(<16 x i8> %A, <16 x i8> %B, ptr %P) nounwind {
645 ; CHECK-LABEL: st2_16b
647 ; EXYNOS-LABEL: st2_16b
651 call void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8> %A, <16 x i8> %B, ptr %P)
655 define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P) nounwind {
656 ; CHECK-LABEL: st3_16b
658 call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %P)
662 define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %P) nounwind {
663 ; CHECK-LABEL: st4_16b
665 ; EXYNOS-LABEL: st4_16b
676 call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %P)
680 declare void @llvm.aarch64.neon.st2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) nounwind readonly
681 declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly
682 declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly
684 define void @st2_4h(<4 x i16> %A, <4 x i16> %B, ptr %P) nounwind {
685 ; CHECK-LABEL: st2_4h
687 ; EXYNOS-LABEL: st2_4h
691 call void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16> %A, <4 x i16> %B, ptr %P)
695 define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P) nounwind {
696 ; CHECK-LABEL: st3_4h
698 call void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %P)
702 define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %P) nounwind {
703 ; CHECK-LABEL: st4_4h
705 ; EXYNOS-LABEL: st4_4h
716 call void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %P)
720 declare void @llvm.aarch64.neon.st2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) nounwind readonly
721 declare void @llvm.aarch64.neon.st3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly
722 declare void @llvm.aarch64.neon.st4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly
724 define void @st2_8h(<8 x i16> %A, <8 x i16> %B, ptr %P) nounwind {
725 ; CHECK-LABEL: st2_8h
727 ; EXYNOS-LABEL: st2_8h
731 call void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16> %A, <8 x i16> %B, ptr %P)
735 define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P) nounwind {
736 ; CHECK-LABEL: st3_8h
738 call void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %P)
742 define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %P) nounwind {
743 ; CHECK-LABEL: st4_8h
745 ; EXYNOS-LABEL: st4_8h
756 call void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %P)
760 declare void @llvm.aarch64.neon.st2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) nounwind readonly
761 declare void @llvm.aarch64.neon.st3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly
762 declare void @llvm.aarch64.neon.st4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly
764 define void @st2_2s(<2 x i32> %A, <2 x i32> %B, ptr %P) nounwind {
765 ; CHECK-LABEL: st2_2s
767 ; EXYNOS-LABEL: st2_2s
771 call void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32> %A, <2 x i32> %B, ptr %P)
775 define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P) nounwind {
776 ; CHECK-LABEL: st3_2s
778 call void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %P)
782 define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %P) nounwind {
783 ; CHECK-LABEL: st4_2s
785 ; EXYNOS-LABEL: st4_2s
796 call void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %P)
800 declare void @llvm.aarch64.neon.st2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) nounwind readonly
801 declare void @llvm.aarch64.neon.st3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly
802 declare void @llvm.aarch64.neon.st4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly
804 define void @st2_4s(<4 x i32> %A, <4 x i32> %B, ptr %P) nounwind {
805 ; CHECK-LABEL: st2_4s
807 ; EXYNOS-LABEL: st2_4s
811 call void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32> %A, <4 x i32> %B, ptr %P)
815 define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P) nounwind {
816 ; CHECK-LABEL: st3_4s
818 call void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %P)
822 define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %P) nounwind {
823 ; CHECK-LABEL: st4_4s
825 ; EXYNOS-LABEL: st4_4s
836 call void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %P)
840 declare void @llvm.aarch64.neon.st2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) nounwind readonly
841 declare void @llvm.aarch64.neon.st3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly
842 declare void @llvm.aarch64.neon.st4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly
844 ; If there's only one element, st2/3/4 don't make much sense, stick to st1.
845 define void @st2_1d(<1 x i64> %A, <1 x i64> %B, ptr %P) nounwind {
846 ; CHECK-LABEL: st2_1d
848 call void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64> %A, <1 x i64> %B, ptr %P)
852 define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P) nounwind {
853 ; CHECK-LABEL: st3_1d
855 call void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %P)
859 define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %P) nounwind {
860 ; CHECK-LABEL: st4_1d
862 call void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %P)
866 declare void @llvm.aarch64.neon.st2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) nounwind readonly
867 declare void @llvm.aarch64.neon.st3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly
868 declare void @llvm.aarch64.neon.st4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly
870 define void @st2_2d(<2 x i64> %A, <2 x i64> %B, ptr %P) nounwind {
871 ; CHECK-LABEL: st2_2d
873 ; EXYNOS-LABEL: st2_2d
877 call void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64> %A, <2 x i64> %B, ptr %P)
881 define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P) nounwind {
882 ; CHECK-LABEL: st3_2d
884 call void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %P)
888 define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %P) nounwind {
889 ; CHECK-LABEL: st4_2d
891 ; EXYNOS-LABEL: st4_2d
902 call void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %P)
906 declare void @llvm.aarch64.neon.st2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) nounwind readonly
907 declare void @llvm.aarch64.neon.st3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly
908 declare void @llvm.aarch64.neon.st4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly
910 declare void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8>, <8 x i8>, ptr) nounwind readonly
911 declare void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16>, <4 x i16>, ptr) nounwind readonly
912 declare void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32>, <2 x i32>, ptr) nounwind readonly
913 declare void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float>, <2 x float>, ptr) nounwind readonly
914 declare void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64>, <1 x i64>, ptr) nounwind readonly
915 declare void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double>, <1 x double>, ptr) nounwind readonly
917 define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, ptr %addr) {
918 ; CHECK-LABEL: st1_x2_v8i8:
919 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
920 call void @llvm.aarch64.neon.st1x2.v8i8.p0(<8 x i8> %A, <8 x i8> %B, ptr %addr)
924 define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, ptr %addr) {
925 ; CHECK-LABEL: st1_x2_v4i16:
926 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
927 call void @llvm.aarch64.neon.st1x2.v4i16.p0(<4 x i16> %A, <4 x i16> %B, ptr %addr)
931 define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, ptr %addr) {
932 ; CHECK-LABEL: st1_x2_v2i32:
933 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
934 call void @llvm.aarch64.neon.st1x2.v2i32.p0(<2 x i32> %A, <2 x i32> %B, ptr %addr)
938 define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, ptr %addr) {
939 ; CHECK-LABEL: st1_x2_v2f32:
940 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
941 call void @llvm.aarch64.neon.st1x2.v2f32.p0(<2 x float> %A, <2 x float> %B, ptr %addr)
945 define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, ptr %addr) {
946 ; CHECK-LABEL: st1_x2_v1i64:
947 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
948 call void @llvm.aarch64.neon.st1x2.v1i64.p0(<1 x i64> %A, <1 x i64> %B, ptr %addr)
952 define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, ptr %addr) {
953 ; CHECK-LABEL: st1_x2_v1f64:
954 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
955 call void @llvm.aarch64.neon.st1x2.v1f64.p0(<1 x double> %A, <1 x double> %B, ptr %addr)
959 declare void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8>, <16 x i8>, ptr) nounwind readonly
960 declare void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16>, <8 x i16>, ptr) nounwind readonly
961 declare void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32>, <4 x i32>, ptr) nounwind readonly
962 declare void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float>, <4 x float>, ptr) nounwind readonly
963 declare void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64>, <2 x i64>, ptr) nounwind readonly
964 declare void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double>, <2 x double>, ptr) nounwind readonly
966 define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, ptr %addr) {
967 ; CHECK-LABEL: st1_x2_v16i8:
968 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
969 call void @llvm.aarch64.neon.st1x2.v16i8.p0(<16 x i8> %A, <16 x i8> %B, ptr %addr)
973 define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, ptr %addr) {
974 ; CHECK-LABEL: st1_x2_v8i16:
975 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
976 call void @llvm.aarch64.neon.st1x2.v8i16.p0(<8 x i16> %A, <8 x i16> %B, ptr %addr)
980 define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, ptr %addr) {
981 ; CHECK-LABEL: st1_x2_v4i32:
982 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
983 call void @llvm.aarch64.neon.st1x2.v4i32.p0(<4 x i32> %A, <4 x i32> %B, ptr %addr)
987 define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, ptr %addr) {
988 ; CHECK-LABEL: st1_x2_v4f32:
989 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
990 call void @llvm.aarch64.neon.st1x2.v4f32.p0(<4 x float> %A, <4 x float> %B, ptr %addr)
994 define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, ptr %addr) {
995 ; CHECK-LABEL: st1_x2_v2i64:
996 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
997 call void @llvm.aarch64.neon.st1x2.v2i64.p0(<2 x i64> %A, <2 x i64> %B, ptr %addr)
1001 define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, ptr %addr) {
1002 ; CHECK-LABEL: st1_x2_v2f64:
1003 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1004 call void @llvm.aarch64.neon.st1x2.v2f64.p0(<2 x double> %A, <2 x double> %B, ptr %addr)
1008 declare void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly
1009 declare void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly
1010 declare void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly
1011 declare void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, ptr) nounwind readonly
1012 declare void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly
1013 declare void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, ptr) nounwind readonly
1015 define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %addr) {
1016 ; CHECK-LABEL: st1_x3_v8i8:
1017 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1018 call void @llvm.aarch64.neon.st1x3.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, ptr %addr)
1022 define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %addr) {
1023 ; CHECK-LABEL: st1_x3_v4i16:
1024 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1025 call void @llvm.aarch64.neon.st1x3.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, ptr %addr)
1029 define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %addr) {
1030 ; CHECK-LABEL: st1_x3_v2i32:
1031 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1032 call void @llvm.aarch64.neon.st1x3.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, ptr %addr)
1036 define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, ptr %addr) {
1037 ; CHECK-LABEL: st1_x3_v2f32:
1038 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1039 call void @llvm.aarch64.neon.st1x3.v2f32.p0(<2 x float> %A, <2 x float> %B, <2 x float> %C, ptr %addr)
1043 define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %addr) {
1044 ; CHECK-LABEL: st1_x3_v1i64:
1045 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1046 call void @llvm.aarch64.neon.st1x3.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, ptr %addr)
1050 define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %addr) {
1051 ; CHECK-LABEL: st1_x3_v1f64:
1052 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1053 call void @llvm.aarch64.neon.st1x3.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, ptr %addr)
1057 declare void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly
1058 declare void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly
1059 declare void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly
1060 declare void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, ptr) nounwind readonly
1061 declare void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly
1062 declare void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, ptr) nounwind readonly
1064 define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %addr) {
1065 ; CHECK-LABEL: st1_x3_v16i8:
1066 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1067 call void @llvm.aarch64.neon.st1x3.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, ptr %addr)
1071 define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %addr) {
1072 ; CHECK-LABEL: st1_x3_v8i16:
1073 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1074 call void @llvm.aarch64.neon.st1x3.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, ptr %addr)
1078 define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %addr) {
1079 ; CHECK-LABEL: st1_x3_v4i32:
1080 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1081 call void @llvm.aarch64.neon.st1x3.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, ptr %addr)
1085 define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, ptr %addr) {
1086 ; CHECK-LABEL: st1_x3_v4f32:
1087 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1088 call void @llvm.aarch64.neon.st1x3.v4f32.p0(<4 x float> %A, <4 x float> %B, <4 x float> %C, ptr %addr)
1092 define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %addr) {
1093 ; CHECK-LABEL: st1_x3_v2i64:
1094 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1095 call void @llvm.aarch64.neon.st1x3.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, ptr %addr)
1099 define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %addr) {
1100 ; CHECK-LABEL: st1_x3_v2f64:
1101 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1102 call void @llvm.aarch64.neon.st1x3.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, ptr %addr)
1107 declare void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, ptr) nounwind readonly
1108 declare void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, ptr) nounwind readonly
1109 declare void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, ptr) nounwind readonly
1110 declare void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float>, <2 x float>, <2 x float>, <2 x float>, ptr) nounwind readonly
1111 declare void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, ptr) nounwind readonly
1112 declare void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double>, <1 x double>, <1 x double>, <1 x double>, ptr) nounwind readonly
1114 define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %addr) {
1115 ; CHECK-LABEL: st1_x4_v8i8:
1116 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1117 call void @llvm.aarch64.neon.st1x4.v8i8.p0(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, ptr %addr)
1121 define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %addr) {
1122 ; CHECK-LABEL: st1_x4_v4i16:
1123 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1124 call void @llvm.aarch64.neon.st1x4.v4i16.p0(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, ptr %addr)
1128 define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %addr) {
1129 ; CHECK-LABEL: st1_x4_v2i32:
1130 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1131 call void @llvm.aarch64.neon.st1x4.v2i32.p0(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, ptr %addr)
1135 define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %addr) {
1136 ; CHECK-LABEL: st1_x4_v2f32:
1137 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1138 call void @llvm.aarch64.neon.st1x4.v2f32.p0(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, ptr %addr)
1142 define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %addr) {
1143 ; CHECK-LABEL: st1_x4_v1i64:
1144 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1145 call void @llvm.aarch64.neon.st1x4.v1i64.p0(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, ptr %addr)
1149 define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %addr) {
1150 ; CHECK-LABEL: st1_x4_v1f64:
1151 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1152 call void @llvm.aarch64.neon.st1x4.v1f64.p0(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, ptr %addr)
1156 declare void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr) nounwind readonly
1157 declare void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, ptr) nounwind readonly
1158 declare void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, ptr) nounwind readonly
1159 declare void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float>, <4 x float>, <4 x float>, <4 x float>, ptr) nounwind readonly
1160 declare void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, ptr) nounwind readonly
1161 declare void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double>, <2 x double>, <2 x double>, <2 x double>, ptr) nounwind readonly
1163 define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %addr) {
1164 ; CHECK-LABEL: st1_x4_v16i8:
1165 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1166 call void @llvm.aarch64.neon.st1x4.v16i8.p0(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, ptr %addr)
1170 define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %addr) {
1171 ; CHECK-LABEL: st1_x4_v8i16:
1172 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1173 call void @llvm.aarch64.neon.st1x4.v8i16.p0(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, ptr %addr)
1177 define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %addr) {
1178 ; CHECK-LABEL: st1_x4_v4i32:
1179 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1180 call void @llvm.aarch64.neon.st1x4.v4i32.p0(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, ptr %addr)
1184 define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %addr) {
1185 ; CHECK-LABEL: st1_x4_v4f32:
1186 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1187 call void @llvm.aarch64.neon.st1x4.v4f32.p0(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, ptr %addr)
1191 define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %addr) {
1192 ; CHECK-LABEL: st1_x4_v2i64:
1193 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1194 call void @llvm.aarch64.neon.st1x4.v2i64.p0(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, ptr %addr)
1198 define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %addr) {
1199 ; CHECK-LABEL: st1_x4_v2f64:
1200 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1201 call void @llvm.aarch64.neon.st1x4.v2f64.p0(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, ptr %addr)