1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s
9 define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, ptr %a, i64 %index) {
10 ; CHECK-LABEL: st1b_i8:
12 ; CHECK-NEXT: st1b { z0.b }, p0, [x0, x1]
14 %base = getelementptr i8, ptr %a, i64 %index
15 call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> %data,
16 <vscale x 16 x i1> %pred,
23 define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, ptr %a, i64 %index) {
24 ; CHECK-LABEL: st1b_h:
26 ; CHECK-NEXT: st1b { z0.h }, p0, [x0, x1]
28 %base = getelementptr i8, ptr %a, i64 %index
29 %trunc = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8>
30 call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> %trunc,
31 <vscale x 8 x i1> %pred,
36 define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %a, i64 %index) {
37 ; CHECK-LABEL: st1b_s:
39 ; CHECK-NEXT: st1b { z0.s }, p0, [x0, x1]
41 %base = getelementptr i8, ptr %a, i64 %index
42 %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8>
43 call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> %trunc,
44 <vscale x 4 x i1> %pred,
49 define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %a, i64 %index) {
50 ; CHECK-LABEL: st1b_d:
52 ; CHECK-NEXT: st1b { z0.d }, p0, [x0, x1]
54 %base = getelementptr i8, ptr %a, i64 %index
55 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8>
56 call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> %trunc,
57 <vscale x 2 x i1> %pred,
66 define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, ptr %a, i64 %index) {
67 ; CHECK-LABEL: st1h_i16:
69 ; CHECK-NEXT: st1h { z0.h }, p0, [x0, x1, lsl #1]
71 %base = getelementptr i16, ptr %a, i64 %index
72 call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %data,
73 <vscale x 8 x i1> %pred,
78 define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, ptr %a, i64 %index) {
79 ; CHECK-LABEL: st1h_f16:
81 ; CHECK-NEXT: st1h { z0.h }, p0, [x0, x1, lsl #1]
83 %base = getelementptr half, ptr %a, i64 %index
84 call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> %data,
85 <vscale x 8 x i1> %pred,
90 define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, ptr %a, i64 %index) #0 {
91 ; CHECK-LABEL: st1h_bf16:
93 ; CHECK-NEXT: st1h { z0.h }, p0, [x0, x1, lsl #1]
95 %base = getelementptr bfloat, ptr %a, i64 %index
96 call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> %data,
97 <vscale x 8 x i1> %pred,
102 define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %addr) {
103 ; CHECK-LABEL: st1h_s:
105 ; CHECK-NEXT: st1h { z0.s }, p0, [x0]
107 %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16>
108 call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> %trunc,
109 <vscale x 4 x i1> %pred,
114 define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %a, i64 %index) {
115 ; CHECK-LABEL: st1h_d:
117 ; CHECK-NEXT: st1h { z0.d }, p0, [x0, x1, lsl #1]
119 %base = getelementptr i16, ptr %a, i64 %index
120 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16>
121 call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> %trunc,
122 <vscale x 2 x i1> %pred,
131 define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %a, i64 %index) {
132 ; CHECK-LABEL: st1w_i32:
134 ; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2]
136 %base = getelementptr i32, ptr %a, i64 %index
137 call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %data,
138 <vscale x 4 x i1> %pred,
143 define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, ptr %a, i64 %index) {
144 ; CHECK-LABEL: st1w_f32:
146 ; CHECK-NEXT: st1w { z0.s }, p0, [x0, x1, lsl #2]
148 %base = getelementptr float, ptr %a, i64 %index
149 call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> %data,
150 <vscale x 4 x i1> %pred,
155 define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %a, i64 %index) {
156 ; CHECK-LABEL: st1w_d:
158 ; CHECK-NEXT: st1w { z0.d }, p0, [x0, x1, lsl #2]
160 %base = getelementptr i32, ptr %a, i64 %index
161 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32>
162 call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> %trunc,
163 <vscale x 2 x i1> %pred,
172 define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %a, i64 %index) {
173 ; CHECK-LABEL: st1d_i64:
175 ; CHECK-NEXT: st1d { z0.d }, p0, [x0, x1, lsl #3]
177 %base = getelementptr i64, ptr %a, i64 %index
178 call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> %data,
179 <vscale x 2 x i1> %pred,
184 define void @st1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, ptr %a, i64 %index) {
185 ; CHECK-LABEL: st1d_f64:
187 ; CHECK-NEXT: st1d { z0.d }, p0, [x0, x1, lsl #3]
189 %base = getelementptr double, ptr %a, i64 %index
190 call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> %data,
191 <vscale x 2 x i1> %pred,
196 declare void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, ptr)
198 declare void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, ptr)
199 declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, ptr)
200 declare void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, ptr)
201 declare void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr)
203 declare void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, ptr)
204 declare void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, ptr)
205 declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr)
206 declare void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, ptr)
208 declare void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, ptr)
209 declare void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, ptr)
210 declare void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, ptr)
211 declare void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, ptr)
212 declare void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, ptr)
214 ; +bf16 is required for the bfloat version.
215 attributes #0 = { "target-features"="+bf16" }