1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mattr=+sve -force-streaming-compatible-sve < %s | FileCheck %s
5 target triple = "aarch64-unknown-linux-gnu"
11 define void @masked_store_v4i8(ptr %dst, <4 x i1> %mask) {
12 ; CHECK-LABEL: masked_store_v4i8:
14 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
15 ; CHECK-NEXT: ptrue p0.h, vl4
16 ; CHECK-NEXT: lsl z0.h, z0.h, #15
17 ; CHECK-NEXT: asr z0.h, z0.h, #15
18 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
19 ; CHECK-NEXT: mov z0.h, #0 // =0x0
20 ; CHECK-NEXT: st1b { z0.h }, p0, [x0]
22 call void @llvm.masked.store.v4i8(<4 x i8> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
26 define void @masked_store_v8i8(ptr %dst, <8 x i1> %mask) {
27 ; CHECK-LABEL: masked_store_v8i8:
29 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
30 ; CHECK-NEXT: ptrue p0.b, vl8
31 ; CHECK-NEXT: lsl z0.b, z0.b, #7
32 ; CHECK-NEXT: asr z0.b, z0.b, #7
33 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
34 ; CHECK-NEXT: mov z0.b, #0 // =0x0
35 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
37 call void @llvm.masked.store.v8i8(<8 x i8> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
41 define void @masked_store_v16i8(ptr %dst, <16 x i1> %mask) {
42 ; CHECK-LABEL: masked_store_v16i8:
44 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
45 ; CHECK-NEXT: ptrue p0.b, vl16
46 ; CHECK-NEXT: lsl z0.b, z0.b, #7
47 ; CHECK-NEXT: asr z0.b, z0.b, #7
48 ; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0
49 ; CHECK-NEXT: mov z0.b, #0 // =0x0
50 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
52 call void @llvm.masked.store.v16i8(<16 x i8> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
56 define void @masked_store_v32i8(ptr %dst, <32 x i1> %mask) {
57 ; CHECK-LABEL: masked_store_v32i8:
59 ; CHECK-NEXT: sub sp, sp, #32
60 ; CHECK-NEXT: .cfi_def_cfa_offset 32
61 ; CHECK-NEXT: ldr w8, [sp, #96]
62 ; CHECK-NEXT: ldr w9, [sp, #88]
63 ; CHECK-NEXT: ptrue p0.b, vl16
64 ; CHECK-NEXT: ldr w10, [sp, #120]
65 ; CHECK-NEXT: strb w7, [sp, #6]
66 ; CHECK-NEXT: strb w8, [sp, #15]
67 ; CHECK-NEXT: ldr w8, [sp, #80]
68 ; CHECK-NEXT: strb w9, [sp, #14]
69 ; CHECK-NEXT: ldr w9, [sp, #72]
70 ; CHECK-NEXT: strb w8, [sp, #13]
71 ; CHECK-NEXT: ldr w8, [sp, #64]
72 ; CHECK-NEXT: strb w9, [sp, #12]
73 ; CHECK-NEXT: ldr w9, [sp, #56]
74 ; CHECK-NEXT: strb w8, [sp, #11]
75 ; CHECK-NEXT: ldr w8, [sp, #48]
76 ; CHECK-NEXT: strb w9, [sp, #10]
77 ; CHECK-NEXT: ldr w9, [sp, #40]
78 ; CHECK-NEXT: strb w8, [sp, #9]
79 ; CHECK-NEXT: ldr w8, [sp, #32]
80 ; CHECK-NEXT: strb w9, [sp, #8]
81 ; CHECK-NEXT: ldr w9, [sp, #216]
82 ; CHECK-NEXT: strb w8, [sp, #7]
83 ; CHECK-NEXT: ldr w8, [sp, #224]
84 ; CHECK-NEXT: strb w9, [sp, #30]
85 ; CHECK-NEXT: ldr w9, [sp, #200]
86 ; CHECK-NEXT: strb w8, [sp, #31]
87 ; CHECK-NEXT: ldr w8, [sp, #208]
88 ; CHECK-NEXT: strb w9, [sp, #28]
89 ; CHECK-NEXT: ldr w9, [sp, #184]
90 ; CHECK-NEXT: strb w8, [sp, #29]
91 ; CHECK-NEXT: ldr w8, [sp, #192]
92 ; CHECK-NEXT: strb w9, [sp, #26]
93 ; CHECK-NEXT: ldr w9, [sp, #168]
94 ; CHECK-NEXT: strb w8, [sp, #27]
95 ; CHECK-NEXT: ldr w8, [sp, #176]
96 ; CHECK-NEXT: strb w9, [sp, #24]
97 ; CHECK-NEXT: ldr w9, [sp, #152]
98 ; CHECK-NEXT: strb w8, [sp, #25]
99 ; CHECK-NEXT: ldr w8, [sp, #160]
100 ; CHECK-NEXT: strb w9, [sp, #22]
101 ; CHECK-NEXT: ldr w9, [sp, #136]
102 ; CHECK-NEXT: strb w8, [sp, #23]
103 ; CHECK-NEXT: ldr w8, [sp, #144]
104 ; CHECK-NEXT: strb w9, [sp, #20]
105 ; CHECK-NEXT: ldr w9, [sp, #112]
106 ; CHECK-NEXT: strb w8, [sp, #21]
107 ; CHECK-NEXT: ldr w8, [sp, #128]
108 ; CHECK-NEXT: strb w6, [sp, #5]
109 ; CHECK-NEXT: strb w8, [sp, #19]
110 ; CHECK-NEXT: ldr w8, [sp, #104]
111 ; CHECK-NEXT: strb w5, [sp, #4]
112 ; CHECK-NEXT: strb w4, [sp, #3]
113 ; CHECK-NEXT: strb w3, [sp, #2]
114 ; CHECK-NEXT: strb w2, [sp, #1]
115 ; CHECK-NEXT: strb w1, [sp]
116 ; CHECK-NEXT: strb w10, [sp, #18]
117 ; CHECK-NEXT: strb w9, [sp, #17]
118 ; CHECK-NEXT: strb w8, [sp, #16]
119 ; CHECK-NEXT: mov w8, #16 // =0x10
120 ; CHECK-NEXT: ldp q1, q0, [sp]
121 ; CHECK-NEXT: lsl z0.b, z0.b, #7
122 ; CHECK-NEXT: lsl z1.b, z1.b, #7
123 ; CHECK-NEXT: asr z0.b, z0.b, #7
124 ; CHECK-NEXT: asr z1.b, z1.b, #7
125 ; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0
126 ; CHECK-NEXT: mov z0.b, #0 // =0x0
127 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0
128 ; CHECK-NEXT: st1b { z0.b }, p1, [x0, x8]
129 ; CHECK-NEXT: st1b { z0.b }, p0, [x0]
130 ; CHECK-NEXT: add sp, sp, #32
132 call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, ptr %dst, i32 8, <32 x i1> %mask)
136 define void @masked_store_v2f16(ptr %dst, <2 x i1> %mask) {
137 ; CHECK-LABEL: masked_store_v2f16:
139 ; CHECK-NEXT: sub sp, sp, #16
140 ; CHECK-NEXT: .cfi_def_cfa_offset 16
141 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
142 ; CHECK-NEXT: mov z1.s, z0.s[1]
143 ; CHECK-NEXT: fmov w8, s0
144 ; CHECK-NEXT: str wzr, [sp, #12]
145 ; CHECK-NEXT: ptrue p0.h, vl4
146 ; CHECK-NEXT: strh w8, [sp, #8]
147 ; CHECK-NEXT: fmov w8, s1
148 ; CHECK-NEXT: strh w8, [sp, #10]
149 ; CHECK-NEXT: ldr d0, [sp, #8]
150 ; CHECK-NEXT: lsl z0.h, z0.h, #15
151 ; CHECK-NEXT: asr z0.h, z0.h, #15
152 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
153 ; CHECK-NEXT: mov z0.h, #0 // =0x0
154 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
155 ; CHECK-NEXT: add sp, sp, #16
157 call void @llvm.masked.store.v2f16(<2 x half> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
161 define void @masked_store_v4f16(ptr %dst, <4 x i1> %mask) {
162 ; CHECK-LABEL: masked_store_v4f16:
164 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
165 ; CHECK-NEXT: ptrue p0.h, vl4
166 ; CHECK-NEXT: lsl z0.h, z0.h, #15
167 ; CHECK-NEXT: asr z0.h, z0.h, #15
168 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
169 ; CHECK-NEXT: mov z0.h, #0 // =0x0
170 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
172 call void @llvm.masked.store.v4f16(<4 x half> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
176 define void @masked_store_v8f16(ptr %dst, <8 x i1> %mask) {
177 ; CHECK-LABEL: masked_store_v8f16:
179 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
180 ; CHECK-NEXT: ptrue p0.h, vl8
181 ; CHECK-NEXT: uunpklo z0.h, z0.b
182 ; CHECK-NEXT: lsl z0.h, z0.h, #15
183 ; CHECK-NEXT: asr z0.h, z0.h, #15
184 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
185 ; CHECK-NEXT: mov z0.h, #0 // =0x0
186 ; CHECK-NEXT: st1h { z0.h }, p0, [x0]
188 call void @llvm.masked.store.v8f16(<8 x half> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
192 define void @masked_store_v16f16(ptr %dst, <16 x i1> %mask) {
193 ; CHECK-LABEL: masked_store_v16f16:
195 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
196 ; CHECK-NEXT: mov z1.d, z0.d
197 ; CHECK-NEXT: ptrue p0.h, vl8
198 ; CHECK-NEXT: mov x8, #8 // =0x8
199 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8
200 ; CHECK-NEXT: uunpklo z0.h, z0.b
201 ; CHECK-NEXT: uunpklo z1.h, z1.b
202 ; CHECK-NEXT: lsl z0.h, z0.h, #15
203 ; CHECK-NEXT: lsl z1.h, z1.h, #15
204 ; CHECK-NEXT: asr z0.h, z0.h, #15
205 ; CHECK-NEXT: asr z1.h, z1.h, #15
206 ; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0
207 ; CHECK-NEXT: mov z1.h, #0 // =0x0
208 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0
209 ; CHECK-NEXT: st1h { z1.h }, p1, [x0, x8, lsl #1]
210 ; CHECK-NEXT: st1h { z1.h }, p0, [x0]
212 call void @llvm.masked.store.v16f16(<16 x half> zeroinitializer, ptr %dst, i32 8, <16 x i1> %mask)
216 define void @masked_store_v4f32(ptr %dst, <4 x i1> %mask) {
217 ; CHECK-LABEL: masked_store_v4f32:
219 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
220 ; CHECK-NEXT: ptrue p0.s, vl4
221 ; CHECK-NEXT: uunpklo z0.s, z0.h
222 ; CHECK-NEXT: lsl z0.s, z0.s, #31
223 ; CHECK-NEXT: asr z0.s, z0.s, #31
224 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
225 ; CHECK-NEXT: mov z0.s, #0 // =0x0
226 ; CHECK-NEXT: st1w { z0.s }, p0, [x0]
228 call void @llvm.masked.store.v4f32(<4 x float> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
232 define void @masked_store_v8f32(ptr %dst, <8 x i1> %mask) {
233 ; CHECK-LABEL: masked_store_v8f32:
235 ; CHECK-NEXT: sub sp, sp, #16
236 ; CHECK-NEXT: .cfi_def_cfa_offset 16
237 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
238 ; CHECK-NEXT: mov z1.b, z0.b[7]
239 ; CHECK-NEXT: mov z2.b, z0.b[6]
240 ; CHECK-NEXT: mov z3.b, z0.b[5]
241 ; CHECK-NEXT: mov z4.b, z0.b[4]
242 ; CHECK-NEXT: ptrue p0.s, vl4
243 ; CHECK-NEXT: fmov w8, s1
244 ; CHECK-NEXT: fmov w9, s2
245 ; CHECK-NEXT: mov z2.s, #0 // =0x0
246 ; CHECK-NEXT: strh w8, [sp, #14]
247 ; CHECK-NEXT: fmov w8, s3
248 ; CHECK-NEXT: mov z3.b, z0.b[2]
249 ; CHECK-NEXT: strh w9, [sp, #12]
250 ; CHECK-NEXT: fmov w9, s4
251 ; CHECK-NEXT: mov z4.b, z0.b[1]
252 ; CHECK-NEXT: strh w8, [sp, #10]
253 ; CHECK-NEXT: mov x8, #4 // =0x4
254 ; CHECK-NEXT: strh w9, [sp, #8]
255 ; CHECK-NEXT: fmov w9, s0
256 ; CHECK-NEXT: ldr d1, [sp, #8]
257 ; CHECK-NEXT: uunpklo z1.s, z1.h
258 ; CHECK-NEXT: lsl z1.s, z1.s, #31
259 ; CHECK-NEXT: asr z1.s, z1.s, #31
260 ; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0
261 ; CHECK-NEXT: mov z1.b, z0.b[3]
262 ; CHECK-NEXT: st1w { z2.s }, p1, [x0, x8, lsl #2]
263 ; CHECK-NEXT: fmov w8, s1
264 ; CHECK-NEXT: strh w9, [sp]
265 ; CHECK-NEXT: strh w8, [sp, #6]
266 ; CHECK-NEXT: fmov w8, s3
267 ; CHECK-NEXT: strh w8, [sp, #4]
268 ; CHECK-NEXT: fmov w8, s4
269 ; CHECK-NEXT: strh w8, [sp, #2]
270 ; CHECK-NEXT: ldr d0, [sp]
271 ; CHECK-NEXT: uunpklo z0.s, z0.h
272 ; CHECK-NEXT: lsl z0.s, z0.s, #31
273 ; CHECK-NEXT: asr z0.s, z0.s, #31
274 ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0
275 ; CHECK-NEXT: st1w { z2.s }, p0, [x0]
276 ; CHECK-NEXT: add sp, sp, #16
278 call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask)
282 define void @masked_store_v2f64(ptr %dst, <2 x i1> %mask) {
283 ; CHECK-LABEL: masked_store_v2f64:
285 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
286 ; CHECK-NEXT: ptrue p0.d, vl2
287 ; CHECK-NEXT: uunpklo z0.d, z0.s
288 ; CHECK-NEXT: lsl z0.d, z0.d, #63
289 ; CHECK-NEXT: asr z0.d, z0.d, #63
290 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0
291 ; CHECK-NEXT: mov z0.d, #0 // =0x0
292 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
294 call void @llvm.masked.store.v2f64(<2 x double> zeroinitializer, ptr %dst, i32 8, <2 x i1> %mask)
298 define void @masked_store_v4f64(ptr %dst, <4 x i1> %mask) {
299 ; CHECK-LABEL: masked_store_v4f64:
301 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
302 ; CHECK-NEXT: ptrue p0.d, vl2
303 ; CHECK-NEXT: mov x8, #2 // =0x2
304 ; CHECK-NEXT: uunpklo z0.s, z0.h
305 ; CHECK-NEXT: uunpklo z1.d, z0.s
306 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8
307 ; CHECK-NEXT: uunpklo z0.d, z0.s
308 ; CHECK-NEXT: lsl z1.d, z1.d, #63
309 ; CHECK-NEXT: lsl z0.d, z0.d, #63
310 ; CHECK-NEXT: asr z1.d, z1.d, #63
311 ; CHECK-NEXT: asr z0.d, z0.d, #63
312 ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0
313 ; CHECK-NEXT: mov z0.d, #0 // =0x0
314 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0
315 ; CHECK-NEXT: st1d { z0.d }, p1, [x0, x8, lsl #3]
316 ; CHECK-NEXT: st1d { z0.d }, p0, [x0]
318 call void @llvm.masked.store.v4f64(<4 x double> zeroinitializer, ptr %dst, i32 8, <4 x i1> %mask)
322 declare void @llvm.masked.store.v4i8(<4 x i8>, ptr, i32, <4 x i1>)
323 declare void @llvm.masked.store.v8i8(<8 x i8>, ptr, i32, <8 x i1>)
324 declare void @llvm.masked.store.v16i8(<16 x i8>, ptr, i32, <16 x i1>)
325 declare void @llvm.masked.store.v32i8(<32 x i8>, ptr, i32, <32 x i1>)
326 declare void @llvm.masked.store.v2f16(<2 x half>, ptr, i32, <2 x i1>)
327 declare void @llvm.masked.store.v4f16(<4 x half>, ptr, i32, <4 x i1>)
328 declare void @llvm.masked.store.v8f16(<8 x half>, ptr, i32, <8 x i1>)
329 declare void @llvm.masked.store.v16f16(<16 x half>, ptr, i32, <16 x i1>)
330 declare void @llvm.masked.store.v4f32(<4 x float>, ptr, i32, <4 x i1>)
331 declare void @llvm.masked.store.v8f32(<8 x float>, ptr, i32, <8 x i1>)
332 declare void @llvm.masked.store.v2f64(<2 x double>, ptr, i32, <2 x i1>)
333 declare void @llvm.masked.store.v4f64(<4 x double>, ptr, i32, <4 x i1>)