1 ; RUN: llc < %s -mtriple aarch64-apple-darwin -asm-verbose=false -disable-post-ra | FileCheck %s
3 define void @test_stnp_v4i64(<4 x i64>* %p, <4 x i64> %v) #0 {
4 ; CHECK-LABEL: test_stnp_v4i64:
5 ; CHECK-NEXT: mov d[[HI1:[0-9]+]], v1[1]
6 ; CHECK-NEXT: mov d[[HI0:[0-9]+]], v0[1]
7 ; CHECK-NEXT: stnp d1, d[[HI1]], [x0, #16]
8 ; CHECK-NEXT: stnp d0, d[[HI0]], [x0]
10 store <4 x i64> %v, <4 x i64>* %p, align 1, !nontemporal !0
14 define void @test_stnp_v4i32(<4 x i32>* %p, <4 x i32> %v) #0 {
15 ; CHECK-LABEL: test_stnp_v4i32:
16 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
17 ; CHECK-NEXT: stnp d0, d[[HI]], [x0]
19 store <4 x i32> %v, <4 x i32>* %p, align 1, !nontemporal !0
23 define void @test_stnp_v8i16(<8 x i16>* %p, <8 x i16> %v) #0 {
24 ; CHECK-LABEL: test_stnp_v8i16:
25 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
26 ; CHECK-NEXT: stnp d0, d[[HI]], [x0]
28 store <8 x i16> %v, <8 x i16>* %p, align 1, !nontemporal !0
32 define void @test_stnp_v16i8(<16 x i8>* %p, <16 x i8> %v) #0 {
33 ; CHECK-LABEL: test_stnp_v16i8:
34 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
35 ; CHECK-NEXT: stnp d0, d[[HI]], [x0]
37 store <16 x i8> %v, <16 x i8>* %p, align 1, !nontemporal !0
41 define void @test_stnp_v2i32(<2 x i32>* %p, <2 x i32> %v) #0 {
42 ; CHECK-LABEL: test_stnp_v2i32:
43 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
44 ; CHECK-NEXT: stnp s0, s[[HI]], [x0]
46 store <2 x i32> %v, <2 x i32>* %p, align 1, !nontemporal !0
50 define void @test_stnp_v4i16(<4 x i16>* %p, <4 x i16> %v) #0 {
51 ; CHECK-LABEL: test_stnp_v4i16:
52 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
53 ; CHECK-NEXT: stnp s0, s[[HI]], [x0]
55 store <4 x i16> %v, <4 x i16>* %p, align 1, !nontemporal !0
59 define void @test_stnp_v8i8(<8 x i8>* %p, <8 x i8> %v) #0 {
60 ; CHECK-LABEL: test_stnp_v8i8:
61 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
62 ; CHECK-NEXT: stnp s0, s[[HI]], [x0]
64 store <8 x i8> %v, <8 x i8>* %p, align 1, !nontemporal !0
68 define void @test_stnp_v2f64(<2 x double>* %p, <2 x double> %v) #0 {
69 ; CHECK-LABEL: test_stnp_v2f64:
70 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
71 ; CHECK-NEXT: stnp d0, d[[HI]], [x0]
73 store <2 x double> %v, <2 x double>* %p, align 1, !nontemporal !0
77 define void @test_stnp_v4f32(<4 x float>* %p, <4 x float> %v) #0 {
78 ; CHECK-LABEL: test_stnp_v4f32:
79 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
80 ; CHECK-NEXT: stnp d0, d[[HI]], [x0]
82 store <4 x float> %v, <4 x float>* %p, align 1, !nontemporal !0
86 define void @test_stnp_v2f32(<2 x float>* %p, <2 x float> %v) #0 {
87 ; CHECK-LABEL: test_stnp_v2f32:
88 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
89 ; CHECK-NEXT: stnp s0, s[[HI]], [x0]
91 store <2 x float> %v, <2 x float>* %p, align 1, !nontemporal !0
95 define void @test_stnp_v1f64(<1 x double>* %p, <1 x double> %v) #0 {
96 ; CHECK-LABEL: test_stnp_v1f64:
97 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
98 ; CHECK-NEXT: stnp s0, s[[HI]], [x0]
100 store <1 x double> %v, <1 x double>* %p, align 1, !nontemporal !0
104 define void @test_stnp_v1i64(<1 x i64>* %p, <1 x i64> %v) #0 {
105 ; CHECK-LABEL: test_stnp_v1i64:
106 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
107 ; CHECK-NEXT: stnp s0, s[[HI]], [x0]
109 store <1 x i64> %v, <1 x i64>* %p, align 1, !nontemporal !0
113 define void @test_stnp_i64(i64* %p, i64 %v) #0 {
114 ; CHECK-LABEL: test_stnp_i64:
115 ; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
116 ; CHECK-NEXT: stnp w1, w[[HI]], [x0]
118 store i64 %v, i64* %p, align 1, !nontemporal !0
123 define void @test_stnp_v2f64_offset(<2 x double>* %p, <2 x double> %v) #0 {
124 ; CHECK-LABEL: test_stnp_v2f64_offset:
125 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
126 ; CHECK-NEXT: stnp d0, d[[HI]], [x0, #16]
128 %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 1
129 store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
133 define void @test_stnp_v2f64_offset_neg(<2 x double>* %p, <2 x double> %v) #0 {
134 ; CHECK-LABEL: test_stnp_v2f64_offset_neg:
135 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
136 ; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-16]
138 %tmp0 = getelementptr <2 x double>, <2 x double>* %p, i32 -1
139 store <2 x double> %v, <2 x double>* %tmp0, align 1, !nontemporal !0
143 define void @test_stnp_v2f32_offset(<2 x float>* %p, <2 x float> %v) #0 {
144 ; CHECK-LABEL: test_stnp_v2f32_offset:
145 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
146 ; CHECK-NEXT: stnp s0, s[[HI]], [x0, #8]
148 %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 1
149 store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
153 define void @test_stnp_v2f32_offset_neg(<2 x float>* %p, <2 x float> %v) #0 {
154 ; CHECK-LABEL: test_stnp_v2f32_offset_neg:
155 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
156 ; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-8]
158 %tmp0 = getelementptr <2 x float>, <2 x float>* %p, i32 -1
159 store <2 x float> %v, <2 x float>* %tmp0, align 1, !nontemporal !0
163 define void @test_stnp_i64_offset(i64* %p, i64 %v) #0 {
164 ; CHECK-LABEL: test_stnp_i64_offset:
165 ; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
166 ; CHECK-NEXT: stnp w1, w[[HI]], [x0, #8]
168 %tmp0 = getelementptr i64, i64* %p, i32 1
169 store i64 %v, i64* %tmp0, align 1, !nontemporal !0
173 define void @test_stnp_i64_offset_neg(i64* %p, i64 %v) #0 {
174 ; CHECK-LABEL: test_stnp_i64_offset_neg:
175 ; CHECK-NEXT: lsr x[[HI:[0-9]+]], x1, #32
176 ; CHECK-NEXT: stnp w1, w[[HI]], [x0, #-8]
178 %tmp0 = getelementptr i64, i64* %p, i32 -1
179 store i64 %v, i64* %tmp0, align 1, !nontemporal !0
183 define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 {
184 ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4:
185 ; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #4
186 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
187 ; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
189 %tmp0 = getelementptr i8, i8* %p, i32 4
190 %tmp1 = bitcast i8* %tmp0 to <4 x float>*
191 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
195 define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 {
196 ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4:
197 ; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #4
198 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
199 ; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
201 %tmp0 = getelementptr i8, i8* %p, i32 -4
202 %tmp1 = bitcast i8* %tmp0 to <4 x float>*
203 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
207 define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 {
208 ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512:
209 ; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #512
210 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
211 ; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
213 %tmp0 = getelementptr i8, i8* %p, i32 512
214 %tmp1 = bitcast i8* %tmp0 to <4 x float>*
215 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
219 define void @test_stnp_v4f32_offset_504(i8* %p, <4 x float> %v) #0 {
220 ; CHECK-LABEL: test_stnp_v4f32_offset_504:
221 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
222 ; CHECK-NEXT: stnp d0, d[[HI]], [x0, #504]
224 %tmp0 = getelementptr i8, i8* %p, i32 504
225 %tmp1 = bitcast i8* %tmp0 to <4 x float>*
226 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
230 define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 {
231 ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508:
232 ; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #508
233 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
234 ; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
236 %tmp0 = getelementptr i8, i8* %p, i32 508
237 %tmp1 = bitcast i8* %tmp0 to <4 x float>*
238 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
242 define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 {
243 ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520:
244 ; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #520
245 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
246 ; CHECK-NEXT: stnp d0, d[[HI]], [x[[PTR]]]
248 %tmp0 = getelementptr i8, i8* %p, i32 -520
249 %tmp1 = bitcast i8* %tmp0 to <4 x float>*
250 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
254 define void @test_stnp_v4f32_offset_neg_512(i8* %p, <4 x float> %v) #0 {
255 ; CHECK-LABEL: test_stnp_v4f32_offset_neg_512:
256 ; CHECK-NEXT: mov d[[HI:[0-9]+]], v0[1]
257 ; CHECK-NEXT: stnp d0, d[[HI]], [x0, #-512]
259 %tmp0 = getelementptr i8, i8* %p, i32 -512
260 %tmp1 = bitcast i8* %tmp0 to <4 x float>*
261 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
266 define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 {
267 ; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256:
268 ; CHECK-NEXT: add x[[PTR:[0-9]+]], x0, #256
269 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
270 ; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]]
272 %tmp0 = getelementptr i8, i8* %p, i32 256
273 %tmp1 = bitcast i8* %tmp0 to <2 x float>*
274 store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
278 define void @test_stnp_v2f32_offset_252(i8* %p, <2 x float> %v) #0 {
279 ; CHECK-LABEL: test_stnp_v2f32_offset_252:
280 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
281 ; CHECK-NEXT: stnp s0, s[[HI]], [x0, #252]
283 %tmp0 = getelementptr i8, i8* %p, i32 252
284 %tmp1 = bitcast i8* %tmp0 to <2 x float>*
285 store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
289 define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 {
290 ; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260:
291 ; CHECK-NEXT: sub x[[PTR:[0-9]+]], x0, #260
292 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
293 ; CHECK-NEXT: stnp s0, s[[HI]], [x[[PTR]]]
295 %tmp0 = getelementptr i8, i8* %p, i32 -260
296 %tmp1 = bitcast i8* %tmp0 to <2 x float>*
297 store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
301 define void @test_stnp_v2f32_offset_neg_256(i8* %p, <2 x float> %v) #0 {
302 ; CHECK-LABEL: test_stnp_v2f32_offset_neg_256:
303 ; CHECK-NEXT: mov s[[HI:[0-9]+]], v0[1]
304 ; CHECK-NEXT: stnp s0, s[[HI]], [x0, #-256]
306 %tmp0 = getelementptr i8, i8* %p, i32 -256
307 %tmp1 = bitcast i8* %tmp0 to <2 x float>*
308 store <2 x float> %v, <2 x float>* %tmp1, align 1, !nontemporal !0
312 declare void @dummy(<4 x float>*)
314 define void @test_stnp_v4f32_offset_alloca(<4 x float> %v) #0 {
315 ; CHECK-LABEL: test_stnp_v4f32_offset_alloca:
316 ; CHECK: stnp d0, d{{.*}}, [sp]
317 ; CHECK-NEXT: mov x0, sp
318 ; CHECK-NEXT: bl _dummy
319 %tmp0 = alloca <4 x float>
320 store <4 x float> %v, <4 x float>* %tmp0, align 1, !nontemporal !0
321 call void @dummy(<4 x float>* %tmp0)
325 define void @test_stnp_v4f32_offset_alloca_2(<4 x float> %v) #0 {
326 ; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2:
327 ; CHECK: stnp d0, d{{.*}}, [sp, #16]
328 ; CHECK-NEXT: mov x0, sp
329 ; CHECK-NEXT: bl _dummy
330 %tmp0 = alloca <4 x float>, i32 2
331 %tmp1 = getelementptr <4 x float>, <4 x float>* %tmp0, i32 1
332 store <4 x float> %v, <4 x float>* %tmp1, align 1, !nontemporal !0
333 call void @dummy(<4 x float>* %tmp0)
339 attributes #0 = { nounwind }