1 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs | FileCheck %s
2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -mcpu=exynos-m1 | FileCheck --check-prefix=EXYNOS %s
3 ; The instruction latencies of Exynos-M1 trigger the transform we see under the Exynos check.
5 define void @st1lane_16b(<16 x i8> %A, i8* %D) {
6 ; CHECK-LABEL: st1lane_16b
7 ; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
8 %ptr = getelementptr i8, i8* %D, i64 1
9 %tmp = extractelement <16 x i8> %A, i32 1
10 store i8 %tmp, i8* %ptr
14 define void @st1lane0_16b(<16 x i8> %A, i8* %D) {
15 ; CHECK-LABEL: st1lane0_16b
16 ; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
17 %ptr = getelementptr i8, i8* %D, i64 1
18 %tmp = extractelement <16 x i8> %A, i32 0
19 store i8 %tmp, i8* %ptr
23 define void @st1lane0u_16b(<16 x i8> %A, i8* %D) {
24 ; CHECK-LABEL: st1lane0u_16b
25 ; CHECK: st1.b { v0 }[0], [x{{[0-9]+}}]
26 %ptr = getelementptr i8, i8* %D, i64 -1
27 %tmp = extractelement <16 x i8> %A, i32 0
28 store i8 %tmp, i8* %ptr
32 define void @st1lane_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
33 ; CHECK-LABEL: st1lane_ro_16b
34 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
35 ; CHECK: st1.b { v0 }[1], [x[[XREG]]]
36 %ptr = getelementptr i8, i8* %D, i64 %offset
37 %tmp = extractelement <16 x i8> %A, i32 1
38 store i8 %tmp, i8* %ptr
42 define void @st1lane0_ro_16b(<16 x i8> %A, i8* %D, i64 %offset) {
43 ; CHECK-LABEL: st1lane0_ro_16b
44 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
45 ; CHECK: st1.b { v0 }[0], [x[[XREG]]]
46 %ptr = getelementptr i8, i8* %D, i64 %offset
47 %tmp = extractelement <16 x i8> %A, i32 0
48 store i8 %tmp, i8* %ptr
52 define void @st1lane_8h(<8 x i16> %A, i16* %D) {
53 ; CHECK-LABEL: st1lane_8h
54 ; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
55 %ptr = getelementptr i16, i16* %D, i64 1
56 %tmp = extractelement <8 x i16> %A, i32 1
57 store i16 %tmp, i16* %ptr
61 define void @st1lane0_8h(<8 x i16> %A, i16* %D) {
62 ; CHECK-LABEL: st1lane0_8h
63 ; CHECK: str h0, [x0, #2]
64 %ptr = getelementptr i16, i16* %D, i64 1
65 %tmp = extractelement <8 x i16> %A, i32 0
66 store i16 %tmp, i16* %ptr
70 define void @st1lane0u_8h(<8 x i16> %A, i16* %D) {
71 ; CHECK-LABEL: st1lane0u_8h
72 ; CHECK: stur h0, [x0, #-2]
73 %ptr = getelementptr i16, i16* %D, i64 -1
74 %tmp = extractelement <8 x i16> %A, i32 0
75 store i16 %tmp, i16* %ptr
79 define void @st1lane_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
80 ; CHECK-LABEL: st1lane_ro_8h
81 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
82 ; CHECK: st1.h { v0 }[1], [x[[XREG]]]
83 %ptr = getelementptr i16, i16* %D, i64 %offset
84 %tmp = extractelement <8 x i16> %A, i32 1
85 store i16 %tmp, i16* %ptr
89 define void @st1lane0_ro_8h(<8 x i16> %A, i16* %D, i64 %offset) {
90 ; CHECK-LABEL: st1lane0_ro_8h
91 ; CHECK: str h0, [x0, x1, lsl #1]
92 %ptr = getelementptr i16, i16* %D, i64 %offset
93 %tmp = extractelement <8 x i16> %A, i32 0
94 store i16 %tmp, i16* %ptr
98 define void @st1lane_4s(<4 x i32> %A, i32* %D) {
99 ; CHECK-LABEL: st1lane_4s
100 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
101 %ptr = getelementptr i32, i32* %D, i64 1
102 %tmp = extractelement <4 x i32> %A, i32 1
103 store i32 %tmp, i32* %ptr
107 define void @st1lane0_4s(<4 x i32> %A, i32* %D) {
108 ; CHECK-LABEL: st1lane0_4s
109 ; CHECK: str s0, [x0, #4]
110 %ptr = getelementptr i32, i32* %D, i64 1
111 %tmp = extractelement <4 x i32> %A, i32 0
112 store i32 %tmp, i32* %ptr
116 define void @st1lane0u_4s(<4 x i32> %A, i32* %D) {
117 ; CHECK-LABEL: st1lane0u_4s
118 ; CHECK: stur s0, [x0, #-4]
119 %ptr = getelementptr i32, i32* %D, i64 -1
120 %tmp = extractelement <4 x i32> %A, i32 0
121 store i32 %tmp, i32* %ptr
125 define void @st1lane_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
126 ; CHECK-LABEL: st1lane_ro_4s
127 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
128 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
129 %ptr = getelementptr i32, i32* %D, i64 %offset
130 %tmp = extractelement <4 x i32> %A, i32 1
131 store i32 %tmp, i32* %ptr
135 define void @st1lane0_ro_4s(<4 x i32> %A, i32* %D, i64 %offset) {
136 ; CHECK-LABEL: st1lane0_ro_4s
137 ; CHECK: str s0, [x0, x1, lsl #2]
138 %ptr = getelementptr i32, i32* %D, i64 %offset
139 %tmp = extractelement <4 x i32> %A, i32 0
140 store i32 %tmp, i32* %ptr
144 define void @st1lane_4s_float(<4 x float> %A, float* %D) {
145 ; CHECK-LABEL: st1lane_4s_float
146 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
147 %ptr = getelementptr float, float* %D, i64 1
148 %tmp = extractelement <4 x float> %A, i32 1
149 store float %tmp, float* %ptr
153 define void @st1lane0_4s_float(<4 x float> %A, float* %D) {
154 ; CHECK-LABEL: st1lane0_4s_float
155 ; CHECK: str s0, [x0, #4]
156 %ptr = getelementptr float, float* %D, i64 1
157 %tmp = extractelement <4 x float> %A, i32 0
158 store float %tmp, float* %ptr
162 define void @st1lane0u_4s_float(<4 x float> %A, float* %D) {
163 ; CHECK-LABEL: st1lane0u_4s_float
164 ; CHECK: stur s0, [x0, #-4]
165 %ptr = getelementptr float, float* %D, i64 -1
166 %tmp = extractelement <4 x float> %A, i32 0
167 store float %tmp, float* %ptr
171 define void @st1lane_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
172 ; CHECK-LABEL: st1lane_ro_4s_float
173 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
174 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
175 %ptr = getelementptr float, float* %D, i64 %offset
176 %tmp = extractelement <4 x float> %A, i32 1
177 store float %tmp, float* %ptr
181 define void @st1lane0_ro_4s_float(<4 x float> %A, float* %D, i64 %offset) {
182 ; CHECK-LABEL: st1lane0_ro_4s_float
183 ; CHECK: str s0, [x0, x1, lsl #2]
184 %ptr = getelementptr float, float* %D, i64 %offset
185 %tmp = extractelement <4 x float> %A, i32 0
186 store float %tmp, float* %ptr
190 define void @st1lane_2d(<2 x i64> %A, i64* %D) {
191 ; CHECK-LABEL: st1lane_2d
192 ; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
193 %ptr = getelementptr i64, i64* %D, i64 1
194 %tmp = extractelement <2 x i64> %A, i32 1
195 store i64 %tmp, i64* %ptr
199 define void @st1lane0_2d(<2 x i64> %A, i64* %D) {
200 ; CHECK-LABEL: st1lane0_2d
201 ; CHECK: str d0, [x0, #8]
202 %ptr = getelementptr i64, i64* %D, i64 1
203 %tmp = extractelement <2 x i64> %A, i32 0
204 store i64 %tmp, i64* %ptr
208 define void @st1lane0u_2d(<2 x i64> %A, i64* %D) {
209 ; CHECK-LABEL: st1lane0u_2d
210 ; CHECK: stur d0, [x0, #-8]
211 %ptr = getelementptr i64, i64* %D, i64 -1
212 %tmp = extractelement <2 x i64> %A, i32 0
213 store i64 %tmp, i64* %ptr
217 define void @st1lane_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
218 ; CHECK-LABEL: st1lane_ro_2d
219 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
220 ; CHECK: st1.d { v0 }[1], [x[[XREG]]]
221 %ptr = getelementptr i64, i64* %D, i64 %offset
222 %tmp = extractelement <2 x i64> %A, i32 1
223 store i64 %tmp, i64* %ptr
227 define void @st1lane0_ro_2d(<2 x i64> %A, i64* %D, i64 %offset) {
228 ; CHECK-LABEL: st1lane0_ro_2d
229 ; CHECK: str d0, [x0, x1, lsl #3]
230 %ptr = getelementptr i64, i64* %D, i64 %offset
231 %tmp = extractelement <2 x i64> %A, i32 0
232 store i64 %tmp, i64* %ptr
236 define void @st1lane_2d_double(<2 x double> %A, double* %D) {
237 ; CHECK-LABEL: st1lane_2d_double
238 ; CHECK: st1.d { v0 }[1], [x{{[0-9]+}}]
239 %ptr = getelementptr double, double* %D, i64 1
240 %tmp = extractelement <2 x double> %A, i32 1
241 store double %tmp, double* %ptr
245 define void @st1lane0_2d_double(<2 x double> %A, double* %D) {
246 ; CHECK-LABEL: st1lane0_2d_double
247 ; CHECK: str d0, [x0, #8]
248 %ptr = getelementptr double, double* %D, i64 1
249 %tmp = extractelement <2 x double> %A, i32 0
250 store double %tmp, double* %ptr
254 define void @st1lane0u_2d_double(<2 x double> %A, double* %D) {
255 ; CHECK-LABEL: st1lane0u_2d_double
256 ; CHECK: stur d0, [x0, #-8]
257 %ptr = getelementptr double, double* %D, i64 -1
258 %tmp = extractelement <2 x double> %A, i32 0
259 store double %tmp, double* %ptr
263 define void @st1lane_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
264 ; CHECK-LABEL: st1lane_ro_2d_double
265 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
266 ; CHECK: st1.d { v0 }[1], [x[[XREG]]]
267 %ptr = getelementptr double, double* %D, i64 %offset
268 %tmp = extractelement <2 x double> %A, i32 1
269 store double %tmp, double* %ptr
273 define void @st1lane0_ro_2d_double(<2 x double> %A, double* %D, i64 %offset) {
274 ; CHECK-LABEL: st1lane0_ro_2d_double
275 ; CHECK: str d0, [x0, x1, lsl #3]
276 %ptr = getelementptr double, double* %D, i64 %offset
277 %tmp = extractelement <2 x double> %A, i32 0
278 store double %tmp, double* %ptr
282 define void @st1lane_8b(<8 x i8> %A, i8* %D) {
283 ; CHECK-LABEL: st1lane_8b
284 ; CHECK: st1.b { v0 }[1], [x{{[0-9]+}}]
285 %ptr = getelementptr i8, i8* %D, i64 1
286 %tmp = extractelement <8 x i8> %A, i32 1
287 store i8 %tmp, i8* %ptr
291 define void @st1lane_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
292 ; CHECK-LABEL: st1lane_ro_8b
293 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
294 ; CHECK: st1.b { v0 }[1], [x[[XREG]]]
295 %ptr = getelementptr i8, i8* %D, i64 %offset
296 %tmp = extractelement <8 x i8> %A, i32 1
297 store i8 %tmp, i8* %ptr
301 define void @st1lane0_ro_8b(<8 x i8> %A, i8* %D, i64 %offset) {
302 ; CHECK-LABEL: st1lane0_ro_8b
303 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
304 ; CHECK: st1.b { v0 }[0], [x[[XREG]]]
305 %ptr = getelementptr i8, i8* %D, i64 %offset
306 %tmp = extractelement <8 x i8> %A, i32 0
307 store i8 %tmp, i8* %ptr
311 define void @st1lane_4h(<4 x i16> %A, i16* %D) {
312 ; CHECK-LABEL: st1lane_4h
313 ; CHECK: st1.h { v0 }[1], [x{{[0-9]+}}]
314 %ptr = getelementptr i16, i16* %D, i64 1
315 %tmp = extractelement <4 x i16> %A, i32 1
316 store i16 %tmp, i16* %ptr
320 define void @st1lane0_4h(<4 x i16> %A, i16* %D) {
321 ; CHECK-LABEL: st1lane0_4h
322 ; CHECK: str h0, [x0, #2]
323 %ptr = getelementptr i16, i16* %D, i64 1
324 %tmp = extractelement <4 x i16> %A, i32 0
325 store i16 %tmp, i16* %ptr
329 define void @st1lane0u_4h(<4 x i16> %A, i16* %D) {
330 ; CHECK-LABEL: st1lane0u_4h
331 ; CHECK: stur h0, [x0, #-2]
332 %ptr = getelementptr i16, i16* %D, i64 -1
333 %tmp = extractelement <4 x i16> %A, i32 0
334 store i16 %tmp, i16* %ptr
338 define void @st1lane_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
339 ; CHECK-LABEL: st1lane_ro_4h
340 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
341 ; CHECK: st1.h { v0 }[1], [x[[XREG]]]
342 %ptr = getelementptr i16, i16* %D, i64 %offset
343 %tmp = extractelement <4 x i16> %A, i32 1
344 store i16 %tmp, i16* %ptr
348 define void @st1lane0_ro_4h(<4 x i16> %A, i16* %D, i64 %offset) {
349 ; CHECK-LABEL: st1lane0_ro_4h
350 ; CHECK: str h0, [x0, x1, lsl #1]
351 %ptr = getelementptr i16, i16* %D, i64 %offset
352 %tmp = extractelement <4 x i16> %A, i32 0
353 store i16 %tmp, i16* %ptr
357 define void @st1lane_2s(<2 x i32> %A, i32* %D) {
358 ; CHECK-LABEL: st1lane_2s
359 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
360 %ptr = getelementptr i32, i32* %D, i64 1
361 %tmp = extractelement <2 x i32> %A, i32 1
362 store i32 %tmp, i32* %ptr
366 define void @st1lane0_2s(<2 x i32> %A, i32* %D) {
367 ; CHECK-LABEL: st1lane0_2s
368 ; CHECK: str s0, [x0, #4]
369 %ptr = getelementptr i32, i32* %D, i64 1
370 %tmp = extractelement <2 x i32> %A, i32 0
371 store i32 %tmp, i32* %ptr
375 define void @st1lane0u_2s(<2 x i32> %A, i32* %D) {
376 ; CHECK-LABEL: st1lane0u_2s
377 ; CHECK: stur s0, [x0, #-4]
378 %ptr = getelementptr i32, i32* %D, i64 -1
379 %tmp = extractelement <2 x i32> %A, i32 0
380 store i32 %tmp, i32* %ptr
384 define void @st1lane_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
385 ; CHECK-LABEL: st1lane_ro_2s
386 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
387 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
388 %ptr = getelementptr i32, i32* %D, i64 %offset
389 %tmp = extractelement <2 x i32> %A, i32 1
390 store i32 %tmp, i32* %ptr
394 define void @st1lane0_ro_2s(<2 x i32> %A, i32* %D, i64 %offset) {
395 ; CHECK-LABEL: st1lane0_ro_2s
396 ; CHECK: str s0, [x0, x1, lsl #2]
397 %ptr = getelementptr i32, i32* %D, i64 %offset
398 %tmp = extractelement <2 x i32> %A, i32 0
399 store i32 %tmp, i32* %ptr
403 define void @st1lane_2s_float(<2 x float> %A, float* %D) {
404 ; CHECK-LABEL: st1lane_2s_float
405 ; CHECK: st1.s { v0 }[1], [x{{[0-9]+}}]
406 %ptr = getelementptr float, float* %D, i64 1
407 %tmp = extractelement <2 x float> %A, i32 1
408 store float %tmp, float* %ptr
412 define void @st1lane0_2s_float(<2 x float> %A, float* %D) {
413 ; CHECK-LABEL: st1lane0_2s_float
414 ; CHECK: str s0, [x0, #4]
415 %ptr = getelementptr float, float* %D, i64 1
416 %tmp = extractelement <2 x float> %A, i32 0
417 store float %tmp, float* %ptr
421 define void @st1lane0u_2s_float(<2 x float> %A, float* %D) {
422 ; CHECK-LABEL: st1lane0u_2s_float
423 ; CHECK: stur s0, [x0, #-4]
424 %ptr = getelementptr float, float* %D, i64 -1
425 %tmp = extractelement <2 x float> %A, i32 0
426 store float %tmp, float* %ptr
430 define void @st1lane_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
431 ; CHECK-LABEL: st1lane_ro_2s_float
432 ; CHECK: add x[[XREG:[0-9]+]], x0, x1
433 ; CHECK: st1.s { v0 }[1], [x[[XREG]]]
434 %ptr = getelementptr float, float* %D, i64 %offset
435 %tmp = extractelement <2 x float> %A, i32 1
436 store float %tmp, float* %ptr
440 define void @st1lane0_ro_2s_float(<2 x float> %A, float* %D, i64 %offset) {
441 ; CHECK-LABEL: st1lane0_ro_2s_float
442 ; CHECK: str s0, [x0, x1, lsl #2]
443 %ptr = getelementptr float, float* %D, i64 %offset
444 %tmp = extractelement <2 x float> %A, i32 0
445 store float %tmp, float* %ptr
449 define void @st1lane0_1d(<1 x i64> %A, i64* %D) {
450 ; CHECK-LABEL: st1lane0_1d
451 ; CHECK: str d0, [x0, #8]
452 %ptr = getelementptr i64, i64* %D, i64 1
453 %tmp = extractelement <1 x i64> %A, i32 0
454 store i64 %tmp, i64* %ptr
458 define void @st1lane0u_1d(<1 x i64> %A, i64* %D) {
459 ; CHECK-LABEL: st1lane0u_1d
460 ; CHECK: stur d0, [x0, #-8]
461 %ptr = getelementptr i64, i64* %D, i64 -1
462 %tmp = extractelement <1 x i64> %A, i32 0
463 store i64 %tmp, i64* %ptr
467 define void @st1lane0_ro_1d(<1 x i64> %A, i64* %D, i64 %offset) {
468 ; CHECK-LABEL: st1lane0_ro_1d
469 ; CHECK: str d0, [x0, x1, lsl #3]
470 %ptr = getelementptr i64, i64* %D, i64 %offset
471 %tmp = extractelement <1 x i64> %A, i32 0
472 store i64 %tmp, i64* %ptr
476 define void @st1lane0_1d_double(<1 x double> %A, double* %D) {
477 ; CHECK-LABEL: st1lane0_1d_double
478 ; CHECK: str d0, [x0, #8]
479 %ptr = getelementptr double, double* %D, i64 1
480 %tmp = extractelement <1 x double> %A, i32 0
481 store double %tmp, double* %ptr
485 define void @st1lane0u_1d_double(<1 x double> %A, double* %D) {
486 ; CHECK-LABEL: st1lane0u_1d_double
487 ; CHECK: stur d0, [x0, #-8]
488 %ptr = getelementptr double, double* %D, i64 -1
489 %tmp = extractelement <1 x double> %A, i32 0
490 store double %tmp, double* %ptr
494 define void @st1lane0_ro_1d_double(<1 x double> %A, double* %D, i64 %offset) {
495 ; CHECK-LABEL: st1lane0_ro_1d_double
496 ; CHECK: str d0, [x0, x1, lsl #3]
497 %ptr = getelementptr double, double* %D, i64 %offset
498 %tmp = extractelement <1 x double> %A, i32 0
499 store double %tmp, double* %ptr
503 define void @st2lane_16b(<16 x i8> %A, <16 x i8> %B, i8* %D) {
504 ; CHECK-LABEL: st2lane_16b
506 call void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i64 1, i8* %D)
510 define void @st2lane_8h(<8 x i16> %A, <8 x i16> %B, i16* %D) {
511 ; CHECK-LABEL: st2lane_8h
513 call void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i64 1, i16* %D)
517 define void @st2lane_4s(<4 x i32> %A, <4 x i32> %B, i32* %D) {
518 ; CHECK-LABEL: st2lane_4s
520 call void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i64 1, i32* %D)
524 define void @st2lane_2d(<2 x i64> %A, <2 x i64> %B, i64* %D) {
525 ; CHECK-LABEL: st2lane_2d
527 call void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64 1, i64* %D)
531 declare void @llvm.aarch64.neon.st2lane.v16i8.p0i8(<16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
532 declare void @llvm.aarch64.neon.st2lane.v8i16.p0i16(<8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
533 declare void @llvm.aarch64.neon.st2lane.v4i32.p0i32(<4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
534 declare void @llvm.aarch64.neon.st2lane.v2i64.p0i64(<2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
536 define void @st3lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %D) {
537 ; CHECK-LABEL: st3lane_16b
539 call void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i64 1, i8* %D)
543 define void @st3lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %D) {
544 ; CHECK-LABEL: st3lane_8h
546 call void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i64 1, i16* %D)
550 define void @st3lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %D) {
551 ; CHECK-LABEL: st3lane_4s
553 call void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i64 1, i32* %D)
557 define void @st3lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %D) {
558 ; CHECK-LABEL: st3lane_2d
560 call void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64 1, i64* %D)
564 declare void @llvm.aarch64.neon.st3lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
565 declare void @llvm.aarch64.neon.st3lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
566 declare void @llvm.aarch64.neon.st3lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
567 declare void @llvm.aarch64.neon.st3lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
569 define void @st4lane_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %E) {
570 ; CHECK-LABEL: st4lane_16b
572 call void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i64 1, i8* %E)
576 define void @st4lane_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %E) {
577 ; CHECK-LABEL: st4lane_8h
579 call void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 1, i16* %E)
583 define void @st4lane_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %E) {
584 ; CHECK-LABEL: st4lane_4s
586 call void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 1, i32* %E)
590 define void @st4lane_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %E) {
591 ; CHECK-LABEL: st4lane_2d
593 call void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 1, i64* %E)
597 declare void @llvm.aarch64.neon.st4lane.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i64, i8*) nounwind readnone
598 declare void @llvm.aarch64.neon.st4lane.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i64, i16*) nounwind readnone
599 declare void @llvm.aarch64.neon.st4lane.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i64, i32*) nounwind readnone
600 declare void @llvm.aarch64.neon.st4lane.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, i64*) nounwind readnone
603 define void @st2_8b(<8 x i8> %A, <8 x i8> %B, i8* %P) nounwind {
604 ; CHECK-LABEL: st2_8b
606 ; EXYNOS-LABEL: st2_8b
610 call void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %P)
614 define void @st3_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P) nounwind {
615 ; CHECK-LABEL: st3_8b
617 call void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %P)
621 define void @st4_8b(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P) nounwind {
622 ; CHECK-LABEL: st4_8b
624 ; EXYNOS-LABEL: st4_8b
635 call void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %P)
639 declare void @llvm.aarch64.neon.st2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
640 declare void @llvm.aarch64.neon.st3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
641 declare void @llvm.aarch64.neon.st4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
643 define void @st2_16b(<16 x i8> %A, <16 x i8> %B, i8* %P) nounwind {
644 ; CHECK-LABEL: st2_16b
646 ; EXYNOS-LABEL: st2_16b
650 call void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %P)
654 define void @st3_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P) nounwind {
655 ; CHECK-LABEL: st3_16b
657 call void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %P)
661 define void @st4_16b(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P) nounwind {
662 ; CHECK-LABEL: st4_16b
664 ; EXYNOS-LABEL: st4_16b
675 call void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %P)
679 declare void @llvm.aarch64.neon.st2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
680 declare void @llvm.aarch64.neon.st3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
681 declare void @llvm.aarch64.neon.st4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
683 define void @st2_4h(<4 x i16> %A, <4 x i16> %B, i16* %P) nounwind {
684 ; CHECK-LABEL: st2_4h
686 ; EXYNOS-LABEL: st2_4h
690 call void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %P)
694 define void @st3_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P) nounwind {
695 ; CHECK-LABEL: st3_4h
697 call void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %P)
701 define void @st4_4h(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P) nounwind {
702 ; CHECK-LABEL: st4_4h
704 ; EXYNOS-LABEL: st4_4h
715 call void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %P)
719 declare void @llvm.aarch64.neon.st2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
720 declare void @llvm.aarch64.neon.st3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
721 declare void @llvm.aarch64.neon.st4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
723 define void @st2_8h(<8 x i16> %A, <8 x i16> %B, i16* %P) nounwind {
724 ; CHECK-LABEL: st2_8h
726 ; EXYNOS-LABEL: st2_8h
730 call void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %P)
734 define void @st3_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P) nounwind {
735 ; CHECK-LABEL: st3_8h
737 call void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %P)
741 define void @st4_8h(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P) nounwind {
742 ; CHECK-LABEL: st4_8h
744 ; EXYNOS-LABEL: st4_8h
755 call void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %P)
759 declare void @llvm.aarch64.neon.st2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
760 declare void @llvm.aarch64.neon.st3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
761 declare void @llvm.aarch64.neon.st4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
763 define void @st2_2s(<2 x i32> %A, <2 x i32> %B, i32* %P) nounwind {
764 ; CHECK-LABEL: st2_2s
766 ; EXYNOS-LABEL: st2_2s
770 call void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %P)
774 define void @st3_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P) nounwind {
775 ; CHECK-LABEL: st3_2s
777 call void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %P)
781 define void @st4_2s(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P) nounwind {
782 ; CHECK-LABEL: st4_2s
784 ; EXYNOS-LABEL: st4_2s
795 call void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %P)
799 declare void @llvm.aarch64.neon.st2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
800 declare void @llvm.aarch64.neon.st3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
801 declare void @llvm.aarch64.neon.st4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
803 define void @st2_4s(<4 x i32> %A, <4 x i32> %B, i32* %P) nounwind {
804 ; CHECK-LABEL: st2_4s
806 ; EXYNOS-LABEL: st2_4s
810 call void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %P)
814 define void @st3_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P) nounwind {
815 ; CHECK-LABEL: st3_4s
817 call void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %P)
821 define void @st4_4s(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P) nounwind {
822 ; CHECK-LABEL: st4_4s
824 ; EXYNOS-LABEL: st4_4s
835 call void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %P)
839 declare void @llvm.aarch64.neon.st2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
840 declare void @llvm.aarch64.neon.st3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
841 declare void @llvm.aarch64.neon.st4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
843 ; If there's only one element, st2/3/4 don't make much sense, stick to st1.
844 define void @st2_1d(<1 x i64> %A, <1 x i64> %B, i64* %P) nounwind {
845 ; CHECK-LABEL: st2_1d
847 call void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %P)
851 define void @st3_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P) nounwind {
852 ; CHECK-LABEL: st3_1d
854 call void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %P)
858 define void @st4_1d(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P) nounwind {
859 ; CHECK-LABEL: st4_1d
861 call void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %P)
865 declare void @llvm.aarch64.neon.st2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
866 declare void @llvm.aarch64.neon.st3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
867 declare void @llvm.aarch64.neon.st4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
869 define void @st2_2d(<2 x i64> %A, <2 x i64> %B, i64* %P) nounwind {
870 ; CHECK-LABEL: st2_2d
872 ; EXYNOS-LABEL: st2_2d
876 call void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %P)
880 define void @st3_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P) nounwind {
881 ; CHECK-LABEL: st3_2d
883 call void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %P)
887 define void @st4_2d(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P) nounwind {
888 ; CHECK-LABEL: st4_2d
890 ; EXYNOS-LABEL: st4_2d
901 call void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %P)
905 declare void @llvm.aarch64.neon.st2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
906 declare void @llvm.aarch64.neon.st3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
907 declare void @llvm.aarch64.neon.st4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
909 declare void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8>, <8 x i8>, i8*) nounwind readonly
910 declare void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16>, <4 x i16>, i16*) nounwind readonly
911 declare void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32>, <2 x i32>, i32*) nounwind readonly
912 declare void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float>, <2 x float>, float*) nounwind readonly
913 declare void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64>, <1 x i64>, i64*) nounwind readonly
914 declare void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double>, <1 x double>, double*) nounwind readonly
916 define void @st1_x2_v8i8(<8 x i8> %A, <8 x i8> %B, i8* %addr) {
917 ; CHECK-LABEL: st1_x2_v8i8:
918 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
919 call void @llvm.aarch64.neon.st1x2.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, i8* %addr)
923 define void @st1_x2_v4i16(<4 x i16> %A, <4 x i16> %B, i16* %addr) {
924 ; CHECK-LABEL: st1_x2_v4i16:
925 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
926 call void @llvm.aarch64.neon.st1x2.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, i16* %addr)
930 define void @st1_x2_v2i32(<2 x i32> %A, <2 x i32> %B, i32* %addr) {
931 ; CHECK-LABEL: st1_x2_v2i32:
932 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
933 call void @llvm.aarch64.neon.st1x2.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, i32* %addr)
937 define void @st1_x2_v2f32(<2 x float> %A, <2 x float> %B, float* %addr) {
938 ; CHECK-LABEL: st1_x2_v2f32:
939 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
940 call void @llvm.aarch64.neon.st1x2.v2f32.p0f32(<2 x float> %A, <2 x float> %B, float* %addr)
944 define void @st1_x2_v1i64(<1 x i64> %A, <1 x i64> %B, i64* %addr) {
945 ; CHECK-LABEL: st1_x2_v1i64:
946 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
947 call void @llvm.aarch64.neon.st1x2.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, i64* %addr)
951 define void @st1_x2_v1f64(<1 x double> %A, <1 x double> %B, double* %addr) {
952 ; CHECK-LABEL: st1_x2_v1f64:
953 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
954 call void @llvm.aarch64.neon.st1x2.v1f64.p0f64(<1 x double> %A, <1 x double> %B, double* %addr)
958 declare void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8>, <16 x i8>, i8*) nounwind readonly
959 declare void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16>, <8 x i16>, i16*) nounwind readonly
960 declare void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32>, <4 x i32>, i32*) nounwind readonly
961 declare void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float>, <4 x float>, float*) nounwind readonly
962 declare void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64>, <2 x i64>, i64*) nounwind readonly
963 declare void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double>, <2 x double>, double*) nounwind readonly
965 define void @st1_x2_v16i8(<16 x i8> %A, <16 x i8> %B, i8* %addr) {
966 ; CHECK-LABEL: st1_x2_v16i8:
967 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
968 call void @llvm.aarch64.neon.st1x2.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, i8* %addr)
972 define void @st1_x2_v8i16(<8 x i16> %A, <8 x i16> %B, i16* %addr) {
973 ; CHECK-LABEL: st1_x2_v8i16:
974 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
975 call void @llvm.aarch64.neon.st1x2.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, i16* %addr)
979 define void @st1_x2_v4i32(<4 x i32> %A, <4 x i32> %B, i32* %addr) {
980 ; CHECK-LABEL: st1_x2_v4i32:
981 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
982 call void @llvm.aarch64.neon.st1x2.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, i32* %addr)
986 define void @st1_x2_v4f32(<4 x float> %A, <4 x float> %B, float* %addr) {
987 ; CHECK-LABEL: st1_x2_v4f32:
988 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
989 call void @llvm.aarch64.neon.st1x2.v4f32.p0f32(<4 x float> %A, <4 x float> %B, float* %addr)
993 define void @st1_x2_v2i64(<2 x i64> %A, <2 x i64> %B, i64* %addr) {
994 ; CHECK-LABEL: st1_x2_v2i64:
995 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
996 call void @llvm.aarch64.neon.st1x2.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, i64* %addr)
1000 define void @st1_x2_v2f64(<2 x double> %A, <2 x double> %B, double* %addr) {
1001 ; CHECK-LABEL: st1_x2_v2f64:
1002 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1003 call void @llvm.aarch64.neon.st1x2.v2f64.p0f64(<2 x double> %A, <2 x double> %B, double* %addr)
1007 declare void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
1008 declare void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
1009 declare void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
1010 declare void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
1011 declare void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
1012 declare void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
1014 define void @st1_x3_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr) {
1015 ; CHECK-LABEL: st1_x3_v8i8:
1016 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1017 call void @llvm.aarch64.neon.st1x3.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, i8* %addr)
1021 define void @st1_x3_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr) {
1022 ; CHECK-LABEL: st1_x3_v4i16:
1023 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1024 call void @llvm.aarch64.neon.st1x3.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, i16* %addr)
1028 define void @st1_x3_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr) {
1029 ; CHECK-LABEL: st1_x3_v2i32:
1030 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1031 call void @llvm.aarch64.neon.st1x3.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, i32* %addr)
1035 define void @st1_x3_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr) {
1036 ; CHECK-LABEL: st1_x3_v2f32:
1037 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1038 call void @llvm.aarch64.neon.st1x3.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, float* %addr)
1042 define void @st1_x3_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr) {
1043 ; CHECK-LABEL: st1_x3_v1i64:
1044 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1045 call void @llvm.aarch64.neon.st1x3.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, i64* %addr)
1049 define void @st1_x3_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr) {
1050 ; CHECK-LABEL: st1_x3_v1f64:
1051 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1052 call void @llvm.aarch64.neon.st1x3.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, double* %addr)
1056 declare void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
1057 declare void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
1058 declare void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
1059 declare void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
1060 declare void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
1061 declare void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
1063 define void @st1_x3_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr) {
1064 ; CHECK-LABEL: st1_x3_v16i8:
1065 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1066 call void @llvm.aarch64.neon.st1x3.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, i8* %addr)
1070 define void @st1_x3_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr) {
1071 ; CHECK-LABEL: st1_x3_v8i16:
1072 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1073 call void @llvm.aarch64.neon.st1x3.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, i16* %addr)
1077 define void @st1_x3_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr) {
1078 ; CHECK-LABEL: st1_x3_v4i32:
1079 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1080 call void @llvm.aarch64.neon.st1x3.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, i32* %addr)
1084 define void @st1_x3_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr) {
1085 ; CHECK-LABEL: st1_x3_v4f32:
1086 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1087 call void @llvm.aarch64.neon.st1x3.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, float* %addr)
1091 define void @st1_x3_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr) {
1092 ; CHECK-LABEL: st1_x3_v2i64:
1093 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1094 call void @llvm.aarch64.neon.st1x3.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, i64* %addr)
1098 define void @st1_x3_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr) {
1099 ; CHECK-LABEL: st1_x3_v2f64:
1100 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1101 call void @llvm.aarch64.neon.st1x3.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, double* %addr)
1106 declare void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, i8*) nounwind readonly
1107 declare void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>, i16*) nounwind readonly
1108 declare void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32*) nounwind readonly
1109 declare void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float>, <2 x float>, <2 x float>, <2 x float>, float*) nounwind readonly
1110 declare void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>, i64*) nounwind readonly
1111 declare void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double>, <1 x double>, <1 x double>, <1 x double>, double*) nounwind readonly
1113 define void @st1_x4_v8i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr) {
1114 ; CHECK-LABEL: st1_x4_v8i8:
1115 ; CHECK: st1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1116 call void @llvm.aarch64.neon.st1x4.v8i8.p0i8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C, <8 x i8> %D, i8* %addr)
1120 define void @st1_x4_v4i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr) {
1121 ; CHECK-LABEL: st1_x4_v4i16:
1122 ; CHECK: st1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1123 call void @llvm.aarch64.neon.st1x4.v4i16.p0i16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i16* %addr)
1127 define void @st1_x4_v2i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr) {
1128 ; CHECK-LABEL: st1_x4_v2i32:
1129 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1130 call void @llvm.aarch64.neon.st1x4.v2i32.p0i32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i32* %addr)
1134 define void @st1_x4_v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr) {
1135 ; CHECK-LABEL: st1_x4_v2f32:
1136 ; CHECK: st1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1137 call void @llvm.aarch64.neon.st1x4.v2f32.p0f32(<2 x float> %A, <2 x float> %B, <2 x float> %C, <2 x float> %D, float* %addr)
1141 define void @st1_x4_v1i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr) {
1142 ; CHECK-LABEL: st1_x4_v1i64:
1143 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1144 call void @llvm.aarch64.neon.st1x4.v1i64.p0i64(<1 x i64> %A, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64* %addr)
1148 define void @st1_x4_v1f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr) {
1149 ; CHECK-LABEL: st1_x4_v1f64:
1150 ; CHECK: st1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1151 call void @llvm.aarch64.neon.st1x4.v1f64.p0f64(<1 x double> %A, <1 x double> %B, <1 x double> %C, <1 x double> %D, double* %addr)
1155 declare void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, i8*) nounwind readonly
1156 declare void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>, i16*) nounwind readonly
1157 declare void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, i32*) nounwind readonly
1158 declare void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float>, <4 x float>, <4 x float>, <4 x float>, float*) nounwind readonly
1159 declare void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64*) nounwind readonly
1160 declare void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double>, <2 x double>, <2 x double>, <2 x double>, double*) nounwind readonly
1162 define void @st1_x4_v16i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr) {
1163 ; CHECK-LABEL: st1_x4_v16i8:
1164 ; CHECK: st1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1165 call void @llvm.aarch64.neon.st1x4.v16i8.p0i8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C, <16 x i8> %D, i8* %addr)
1169 define void @st1_x4_v8i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr) {
1170 ; CHECK-LABEL: st1_x4_v8i16:
1171 ; CHECK: st1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1172 call void @llvm.aarch64.neon.st1x4.v8i16.p0i16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i16* %addr)
1176 define void @st1_x4_v4i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr) {
1177 ; CHECK-LABEL: st1_x4_v4i32:
1178 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1179 call void @llvm.aarch64.neon.st1x4.v4i32.p0i32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i32* %addr)
1183 define void @st1_x4_v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr) {
1184 ; CHECK-LABEL: st1_x4_v4f32:
1185 ; CHECK: st1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1186 call void @llvm.aarch64.neon.st1x4.v4f32.p0f32(<4 x float> %A, <4 x float> %B, <4 x float> %C, <4 x float> %D, float* %addr)
1190 define void @st1_x4_v2i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr) {
1191 ; CHECK-LABEL: st1_x4_v2i64:
1192 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1193 call void @llvm.aarch64.neon.st1x4.v2i64.p0i64(<2 x i64> %A, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64* %addr)
1197 define void @st1_x4_v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr) {
1198 ; CHECK-LABEL: st1_x4_v2f64:
1199 ; CHECK: st1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0]
1200 call void @llvm.aarch64.neon.st1x4.v2f64.p0f64(<2 x double> %A, <2 x double> %B, <2 x double> %C, <2 x double> %D, double* %addr)