1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
3 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfh,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
4 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
5 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvfhmin,+zvfbfmin -verify-machineinstrs < %s | FileCheck %s
7 define void @masked_store_v1bf16(<1 x bfloat> %val, ptr %a, <1 x i1> %mask) {
8 ; CHECK-LABEL: masked_store_v1bf16:
10 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
11 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
13 call void @llvm.masked.store.v1bf16.p0(<1 x bfloat> %val, ptr %a, i32 8, <1 x i1> %mask)
17 define void @masked_store_v1f16(<1 x half> %val, ptr %a, <1 x i1> %mask) {
18 ; CHECK-LABEL: masked_store_v1f16:
20 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
21 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
23 call void @llvm.masked.store.v1f16.p0(<1 x half> %val, ptr %a, i32 8, <1 x i1> %mask)
27 define void @masked_store_v1f32(<1 x float> %val, ptr %a, <1 x i1> %mask) {
28 ; CHECK-LABEL: masked_store_v1f32:
30 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
31 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
33 call void @llvm.masked.store.v1f32.p0(<1 x float> %val, ptr %a, i32 8, <1 x i1> %mask)
37 define void @masked_store_v1f64(<1 x double> %val, ptr %a, <1 x i1> %mask) {
38 ; CHECK-LABEL: masked_store_v1f64:
40 ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
41 ; CHECK-NEXT: vse64.v v8, (a0), v0.t
43 call void @llvm.masked.store.v1f64.p0(<1 x double> %val, ptr %a, i32 8, <1 x i1> %mask)
47 define void @masked_store_v2bf16(<2 x bfloat> %val, ptr %a, <2 x i1> %mask) {
48 ; CHECK-LABEL: masked_store_v2bf16:
50 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
51 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
53 call void @llvm.masked.store.v2bf16.p0(<2 x bfloat> %val, ptr %a, i32 8, <2 x i1> %mask)
57 define void @masked_store_v2f16(<2 x half> %val, ptr %a, <2 x i1> %mask) {
58 ; CHECK-LABEL: masked_store_v2f16:
60 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
61 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
63 call void @llvm.masked.store.v2f16.p0(<2 x half> %val, ptr %a, i32 8, <2 x i1> %mask)
67 define void @masked_store_v2f32(<2 x float> %val, ptr %a, <2 x i1> %mask) {
68 ; CHECK-LABEL: masked_store_v2f32:
70 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
71 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
73 call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %a, i32 8, <2 x i1> %mask)
77 define void @masked_store_v2f64(<2 x double> %val, ptr %a, <2 x i1> %mask) {
78 ; CHECK-LABEL: masked_store_v2f64:
80 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
81 ; CHECK-NEXT: vse64.v v8, (a0), v0.t
83 call void @llvm.masked.store.v2f64.p0(<2 x double> %val, ptr %a, i32 8, <2 x i1> %mask)
87 define void @masked_store_v4bf16(<4 x bfloat> %val, ptr %a, <4 x i1> %mask) {
88 ; CHECK-LABEL: masked_store_v4bf16:
90 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
91 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
93 call void @llvm.masked.store.v4bf16.p0(<4 x bfloat> %val, ptr %a, i32 8, <4 x i1> %mask)
97 define void @masked_store_v4f16(<4 x half> %val, ptr %a, <4 x i1> %mask) {
98 ; CHECK-LABEL: masked_store_v4f16:
100 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
101 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
103 call void @llvm.masked.store.v4f16.p0(<4 x half> %val, ptr %a, i32 8, <4 x i1> %mask)
107 define void @masked_store_v4f32(<4 x float> %val, ptr %a, <4 x i1> %mask) {
108 ; CHECK-LABEL: masked_store_v4f32:
110 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
111 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
113 call void @llvm.masked.store.v4f32.p0(<4 x float> %val, ptr %a, i32 8, <4 x i1> %mask)
117 define void @masked_store_v4f64(<4 x double> %val, ptr %a, <4 x i1> %mask) {
118 ; CHECK-LABEL: masked_store_v4f64:
120 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
121 ; CHECK-NEXT: vse64.v v8, (a0), v0.t
123 call void @llvm.masked.store.v4f64.p0(<4 x double> %val, ptr %a, i32 8, <4 x i1> %mask)
127 define void @masked_store_v8bf16(<8 x bfloat> %val, ptr %a, <8 x i1> %mask) {
128 ; CHECK-LABEL: masked_store_v8bf16:
130 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
131 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
133 call void @llvm.masked.store.v8bf16.p0(<8 x bfloat> %val, ptr %a, i32 8, <8 x i1> %mask)
137 define void @masked_store_v8f16(<8 x half> %val, ptr %a, <8 x i1> %mask) {
138 ; CHECK-LABEL: masked_store_v8f16:
140 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
141 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
143 call void @llvm.masked.store.v8f16.p0(<8 x half> %val, ptr %a, i32 8, <8 x i1> %mask)
147 define void @masked_store_v8f32(<8 x float> %val, ptr %a, <8 x i1> %mask) {
148 ; CHECK-LABEL: masked_store_v8f32:
150 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
151 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
153 call void @llvm.masked.store.v8f32.p0(<8 x float> %val, ptr %a, i32 8, <8 x i1> %mask)
157 define void @masked_store_v8f64(<8 x double> %val, ptr %a, <8 x i1> %mask) {
158 ; CHECK-LABEL: masked_store_v8f64:
160 ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
161 ; CHECK-NEXT: vse64.v v8, (a0), v0.t
163 call void @llvm.masked.store.v8f64.p0(<8 x double> %val, ptr %a, i32 8, <8 x i1> %mask)
167 define void @masked_store_v16bf16(<16 x bfloat> %val, ptr %a, <16 x i1> %mask) {
168 ; CHECK-LABEL: masked_store_v16bf16:
170 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
171 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
173 call void @llvm.masked.store.v16bf16.p0(<16 x bfloat> %val, ptr %a, i32 8, <16 x i1> %mask)
177 define void @masked_store_v16f16(<16 x half> %val, ptr %a, <16 x i1> %mask) {
178 ; CHECK-LABEL: masked_store_v16f16:
180 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
181 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
183 call void @llvm.masked.store.v16f16.p0(<16 x half> %val, ptr %a, i32 8, <16 x i1> %mask)
187 define void @masked_store_v16f32(<16 x float> %val, ptr %a, <16 x i1> %mask) {
188 ; CHECK-LABEL: masked_store_v16f32:
190 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma
191 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
193 call void @llvm.masked.store.v16f32.p0(<16 x float> %val, ptr %a, i32 8, <16 x i1> %mask)
197 define void @masked_store_v16f64(<16 x double> %val, ptr %a, <16 x i1> %mask) {
198 ; CHECK-LABEL: masked_store_v16f64:
200 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
201 ; CHECK-NEXT: vse64.v v8, (a0), v0.t
203 call void @llvm.masked.store.v16f64.p0(<16 x double> %val, ptr %a, i32 8, <16 x i1> %mask)
207 define void @masked_store_v32bf16(<32 x bfloat> %val, ptr %a, <32 x i1> %mask) {
208 ; CHECK-LABEL: masked_store_v32bf16:
210 ; CHECK-NEXT: li a1, 32
211 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
212 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
214 call void @llvm.masked.store.v32bf16.p0(<32 x bfloat> %val, ptr %a, i32 8, <32 x i1> %mask)
218 define void @masked_store_v32f16(<32 x half> %val, ptr %a, <32 x i1> %mask) {
219 ; CHECK-LABEL: masked_store_v32f16:
221 ; CHECK-NEXT: li a1, 32
222 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
223 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
225 call void @llvm.masked.store.v32f16.p0(<32 x half> %val, ptr %a, i32 8, <32 x i1> %mask)
229 define void @masked_store_v32f32(<32 x float> %val, ptr %a, <32 x i1> %mask) {
230 ; CHECK-LABEL: masked_store_v32f32:
232 ; CHECK-NEXT: li a1, 32
233 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
234 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
236 call void @llvm.masked.store.v32f32.p0(<32 x float> %val, ptr %a, i32 8, <32 x i1> %mask)
240 define void @masked_store_v32f64(<32 x double> %val, ptr %a, <32 x i1> %mask) {
241 ; CHECK-LABEL: masked_store_v32f64:
243 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
244 ; CHECK-NEXT: vse64.v v8, (a0), v0.t
245 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
246 ; CHECK-NEXT: vslidedown.vi v0, v0, 2
247 ; CHECK-NEXT: addi a0, a0, 128
248 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
249 ; CHECK-NEXT: vse64.v v16, (a0), v0.t
251 call void @llvm.masked.store.v32f64.p0(<32 x double> %val, ptr %a, i32 8, <32 x i1> %mask)
255 define void @masked_store_v64bf16(<64 x bfloat> %val, ptr %a, <64 x i1> %mask) {
256 ; CHECK-LABEL: masked_store_v64bf16:
258 ; CHECK-NEXT: li a1, 64
259 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
260 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
262 call void @llvm.masked.store.v64bf16.p0(<64 x bfloat> %val, ptr %a, i32 8, <64 x i1> %mask)
266 define void @masked_store_v64f16(<64 x half> %val, ptr %a, <64 x i1> %mask) {
267 ; CHECK-LABEL: masked_store_v64f16:
269 ; CHECK-NEXT: li a1, 64
270 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
271 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
273 call void @llvm.masked.store.v64f16.p0(<64 x half> %val, ptr %a, i32 8, <64 x i1> %mask)
277 define void @masked_store_v64f32(<64 x float> %val, ptr %a, <64 x i1> %mask) {
278 ; CHECK-LABEL: masked_store_v64f32:
280 ; CHECK-NEXT: li a1, 32
281 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
282 ; CHECK-NEXT: vslidedown.vi v24, v0, 4
283 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma
284 ; CHECK-NEXT: vse32.v v8, (a0), v0.t
285 ; CHECK-NEXT: addi a0, a0, 128
286 ; CHECK-NEXT: vmv1r.v v0, v24
287 ; CHECK-NEXT: vse32.v v16, (a0), v0.t
289 call void @llvm.masked.store.v64f32.p0(<64 x float> %val, ptr %a, i32 8, <64 x i1> %mask)
293 define void @masked_store_v128bf16(<128 x bfloat> %val, ptr %a, <128 x i1> %mask) {
294 ; CHECK-LABEL: masked_store_v128bf16:
296 ; CHECK-NEXT: li a1, 64
297 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
298 ; CHECK-NEXT: vslidedown.vi v24, v0, 8
299 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
300 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
301 ; CHECK-NEXT: addi a0, a0, 128
302 ; CHECK-NEXT: vmv1r.v v0, v24
303 ; CHECK-NEXT: vse16.v v16, (a0), v0.t
305 call void @llvm.masked.store.v128bf16.p0(<128 x bfloat> %val, ptr %a, i32 8, <128 x i1> %mask)
309 define void @masked_store_v128f16(<128 x half> %val, ptr %a, <128 x i1> %mask) {
310 ; CHECK-LABEL: masked_store_v128f16:
312 ; CHECK-NEXT: li a1, 64
313 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
314 ; CHECK-NEXT: vslidedown.vi v24, v0, 8
315 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma
316 ; CHECK-NEXT: vse16.v v8, (a0), v0.t
317 ; CHECK-NEXT: addi a0, a0, 128
318 ; CHECK-NEXT: vmv1r.v v0, v24
319 ; CHECK-NEXT: vse16.v v16, (a0), v0.t
321 call void @llvm.masked.store.v128f16.p0(<128 x half> %val, ptr %a, i32 8, <128 x i1> %mask)