1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
7 define <vscale x 16 x i8> @ld1b_upper_bound(<vscale x 16 x i1> %pg, i8* %a) {
8 ; CHECK-LABEL: ld1b_upper_bound:
9 ; CHECK: ld1b { z0.b }, p0/z, [x0, #7, mul vl]
11 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
12 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 7
13 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
14 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
15 ret <vscale x 16 x i8> %load
18 define <vscale x 16 x i8> @ld1b_inbound(<vscale x 16 x i1> %pg, i8* %a) {
19 ; CHECK-LABEL: ld1b_inbound:
20 ; CHECK: ld1b { z0.b }, p0/z, [x0, #1, mul vl]
22 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
23 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 1
24 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
25 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
26 ret <vscale x 16 x i8> %load
29 define <vscale x 4 x i32> @ld1b_s_inbound(<vscale x 4 x i1> %pg, i8* %a) {
30 ; CHECK-LABEL: ld1b_s_inbound:
31 ; CHECK: ld1b { z0.s }, p0/z, [x0, #7, mul vl]
33 %base_scalable = bitcast i8* %a to <vscale x 4 x i8>*
34 %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %base_scalable, i64 7
35 %base_scalar = bitcast <vscale x 4 x i8>* %base to i8*
36 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pg, i8* %base_scalar)
37 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
38 ret <vscale x 4 x i32> %res
41 define <vscale x 4 x i32> @ld1sb_s_inbound(<vscale x 4 x i1> %pg, i8* %a) {
42 ; CHECK-LABEL: ld1sb_s_inbound:
43 ; CHECK: ld1sb { z0.s }, p0/z, [x0, #7, mul vl]
45 %base_scalable = bitcast i8* %a to <vscale x 4 x i8>*
46 %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %base_scalable, i64 7
47 %base_scalar = bitcast <vscale x 4 x i8>* %base to i8*
48 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pg, i8* %base_scalar)
49 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
50 ret <vscale x 4 x i32> %res
53 define <vscale x 16 x i8> @ld1b_lower_bound(<vscale x 16 x i1> %pg, i8* %a) {
54 ; CHECK-LABEL: ld1b_lower_bound:
55 ; CHECK: ld1b { z0.b }, p0/z, [x0, #-8, mul vl]
57 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
58 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 -8
59 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
60 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
61 ret <vscale x 16 x i8> %load
64 define <vscale x 16 x i8> @ld1b_out_of_upper_bound(<vscale x 16 x i1> %pg, i8* %a) {
65 ; CHECK-LABEL: ld1b_out_of_upper_bound:
66 ; CHECK: rdvl x[[OFFSET:[0-9]+]], #8
67 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x[[OFFSET]]]
69 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
70 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 8
71 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
72 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
73 ret <vscale x 16 x i8> %load
76 define <vscale x 16 x i8> @ld1b_out_of_lower_bound(<vscale x 16 x i1> %pg, i8* %a) {
77 ; CHECK-LABEL: ld1b_out_of_lower_bound:
78 ; CHECK: rdvl x[[OFFSET:[0-9]+]], #-9
79 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x[[OFFSET]]]
81 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
82 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 -9
83 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
84 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
85 ret <vscale x 16 x i8> %load
92 define <vscale x 8 x i16> @ld1b_h_inbound(<vscale x 8 x i1> %pg, i8* %a) {
93 ; CHECK-LABEL: ld1b_h_inbound:
94 ; CHECK: ld1b { z0.h }, p0/z, [x0, #7, mul vl]
96 %base_scalable = bitcast i8* %a to <vscale x 8 x i8>*
97 %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %base_scalable, i64 7
98 %base_scalar = bitcast <vscale x 8 x i8>* %base to i8*
99 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pg, i8* %base_scalar)
100 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
101 ret <vscale x 8 x i16> %res
104 define <vscale x 8 x i16> @ld1sb_h_inbound(<vscale x 8 x i1> %pg, i8* %a) {
105 ; CHECK-LABEL: ld1sb_h_inbound:
106 ; CHECK: ld1sb { z0.h }, p0/z, [x0, #7, mul vl]
108 %base_scalable = bitcast i8* %a to <vscale x 8 x i8>*
109 %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %base_scalable, i64 7
110 %base_scalar = bitcast <vscale x 8 x i8>* %base to i8*
111 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pg, i8* %base_scalar)
112 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
113 ret <vscale x 8 x i16> %res
116 define <vscale x 8 x i16> @ld1h_inbound(<vscale x 8 x i1> %pg, i16* %a) {
117 ; CHECK-LABEL: ld1h_inbound:
118 ; CHECK: ld1h { z0.h }, p0/z, [x0, #1, mul vl]
120 %base_scalable = bitcast i16* %a to <vscale x 8 x i16>*
121 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %base_scalable, i64 1
122 %base_scalar = bitcast <vscale x 8 x i16>* %base to i16*
123 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %pg, i16* %base_scalar)
124 ret <vscale x 8 x i16> %load
127 define <vscale x 4 x i32> @ld1h_s_inbound(<vscale x 4 x i1> %pg, i16* %a) {
128 ; CHECK-LABEL: ld1h_s_inbound:
129 ; CHECK: ld1h { z0.s }, p0/z, [x0, #7, mul vl]
131 %base_scalable = bitcast i16* %a to <vscale x 4 x i16>*
132 %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %base_scalable, i64 7
133 %base_scalar = bitcast <vscale x 4 x i16>* %base to i16*
134 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pg, i16* %base_scalar)
135 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
136 ret <vscale x 4 x i32> %res
139 define <vscale x 4 x i32> @ld1sh_s_inbound(<vscale x 4 x i1> %pg, i16* %a) {
140 ; CHECK-LABEL: ld1sh_s_inbound:
141 ; CHECK: ld1sh { z0.s }, p0/z, [x0, #7, mul vl]
143 %base_scalable = bitcast i16* %a to <vscale x 4 x i16>*
144 %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %base_scalable, i64 7
145 %base_scalar = bitcast <vscale x 4 x i16>* %base to i16*
146 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pg, i16* %base_scalar)
147 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
148 ret <vscale x 4 x i32> %res
151 define <vscale x 2 x i64> @ld1b_d_inbound(<vscale x 2 x i1> %pg, i8* %a) {
152 ; CHECK-LABEL: ld1b_d_inbound:
153 ; CHECK: ld1b { z0.d }, p0/z, [x0, #7, mul vl]
155 %base_scalable = bitcast i8* %a to <vscale x 2 x i8>*
156 %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %base_scalable, i64 7
157 %base_scalar = bitcast <vscale x 2 x i8>* %base to i8*
158 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pg, i8* %base_scalar)
159 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
160 ret <vscale x 2 x i64> %res
163 define <vscale x 2 x i64> @ld1sb_d_inbound(<vscale x 2 x i1> %pg, i8* %a) {
164 ; CHECK-LABEL: ld1sb_d_inbound:
165 ; CHECK: ld1sb { z0.d }, p0/z, [x0, #7, mul vl]
167 %base_scalable = bitcast i8* %a to <vscale x 2 x i8>*
168 %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %base_scalable, i64 7
169 %base_scalar = bitcast <vscale x 2 x i8>* %base to i8*
170 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pg, i8* %base_scalar)
171 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
172 ret <vscale x 2 x i64> %res
175 define <vscale x 2 x i64> @ld1h_d_inbound(<vscale x 2 x i1> %pg, i16* %a) {
176 ; CHECK-LABEL: ld1h_d_inbound:
177 ; CHECK: ld1h { z0.d }, p0/z, [x0, #7, mul vl]
179 %base_scalable = bitcast i16* %a to <vscale x 2 x i16>*
180 %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %base_scalable, i64 7
181 %base_scalar = bitcast <vscale x 2 x i16>* %base to i16*
182 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pg, i16* %base_scalar)
183 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
184 ret <vscale x 2 x i64> %res
187 define <vscale x 2 x i64> @ld1sh_d_inbound(<vscale x 2 x i1> %pg, i16* %a) {
188 ; CHECK-LABEL: ld1sh_d_inbound:
189 ; CHECK: ld1sh { z0.d }, p0/z, [x0, #7, mul vl]
191 %base_scalable = bitcast i16* %a to <vscale x 2 x i16>*
192 %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %base_scalable, i64 7
193 %base_scalar = bitcast <vscale x 2 x i16>* %base to i16*
194 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pg, i16* %base_scalar)
195 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
196 ret <vscale x 2 x i64> %res
199 define <vscale x 8 x half> @ld1h_f16_inbound(<vscale x 8 x i1> %pg, half* %a) {
200 ; CHECK-LABEL: ld1h_f16_inbound:
201 ; CHECK: ld1h { z0.h }, p0/z, [x0, #1, mul vl]
203 %base_scalable = bitcast half* %a to <vscale x 8 x half>*
204 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %base_scalable, i64 1
205 %base_scalar = bitcast <vscale x 8 x half>* %base to half*
206 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> %pg, half* %base_scalar)
207 ret <vscale x 8 x half> %load
210 define <vscale x 8 x bfloat> @ld1h_bf16_inbound(<vscale x 8 x i1> %pg, bfloat* %a) #0 {
211 ; CHECK-LABEL: ld1h_bf16_inbound:
212 ; CHECK: ld1h { z0.h }, p0/z, [x0, #1, mul vl]
214 %base_scalable = bitcast bfloat* %a to <vscale x 8 x bfloat>*
215 %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %base_scalable, i64 1
216 %base_scalar = bitcast <vscale x 8 x bfloat>* %base to bfloat*
217 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %base_scalar)
218 ret <vscale x 8 x bfloat> %load
225 define <vscale x 4 x i32> @ld1w_inbound(<vscale x 4 x i1> %pg, i32* %a) {
226 ; CHECK-LABEL: ld1w_inbound:
227 ; CHECK: ld1w { z0.s }, p0/z, [x0, #7, mul vl]
229 %base_scalable = bitcast i32* %a to <vscale x 4 x i32>*
230 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %base_scalable, i64 7
231 %base_scalar = bitcast <vscale x 4 x i32>* %base to i32*
232 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %pg, i32* %base_scalar)
233 ret <vscale x 4 x i32> %load
236 define <vscale x 4 x float> @ld1w_f32_inbound(<vscale x 4 x i1> %pg, float* %a) {
237 ; CHECK-LABEL: ld1w_f32_inbound:
238 ; CHECK: ld1w { z0.s }, p0/z, [x0, #7, mul vl]
240 %base_scalable = bitcast float* %a to <vscale x 4 x float>*
241 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %base_scalable, i64 7
242 %base_scalar = bitcast <vscale x 4 x float>* %base to float*
243 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> %pg, float* %base_scalar)
244 ret <vscale x 4 x float> %load
251 define <vscale x 2 x i64> @ld1d_inbound(<vscale x 2 x i1> %pg, i64* %a) {
252 ; CHECK-LABEL: ld1d_inbound:
253 ; CHECK: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
255 %base_scalable = bitcast i64* %a to <vscale x 2 x i64>*
256 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base_scalable, i64 1
257 %base_scalar = bitcast <vscale x 2 x i64>* %base to i64*
258 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> %pg, i64* %base_scalar)
259 ret <vscale x 2 x i64> %load
262 define <vscale x 2 x i64> @ld1w_d_inbound(<vscale x 2 x i1> %pg, i32* %a) {
263 ; CHECK-LABEL: ld1w_d_inbound:
264 ; CHECK: ld1w { z0.d }, p0/z, [x0, #7, mul vl]
266 %base_scalable = bitcast i32* %a to <vscale x 2 x i32>*
267 %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base_scalable, i64 7
268 %base_scalar = bitcast <vscale x 2 x i32>* %base to i32*
269 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pg, i32* %base_scalar)
270 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
271 ret <vscale x 2 x i64> %res
274 define <vscale x 2 x i64> @ld1sw_d_inbound(<vscale x 2 x i1> %pg, i32* %a) {
275 ; CHECK-LABEL: ld1sw_d_inbound:
276 ; CHECK: ld1sw { z0.d }, p0/z, [x0, #7, mul vl]
278 %base_scalable = bitcast i32* %a to <vscale x 2 x i32>*
279 %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base_scalable, i64 7
280 %base_scalar = bitcast <vscale x 2 x i32>* %base to i32*
281 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pg, i32* %base_scalar)
282 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
283 ret <vscale x 2 x i64> %res
286 define <vscale x 2 x double> @ld1d_f64_inbound(<vscale x 2 x i1> %pg, double* %a) {
287 ; CHECK-LABEL: ld1d_f64_inbound:
288 ; CHECK: ld1d { z0.d }, p0/z, [x0, #1, mul vl]
290 %base_scalable = bitcast double* %a to <vscale x 2 x double>*
291 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %base_scalable, i64 1
292 %base_scalar = bitcast <vscale x 2 x double>* %base to double*
293 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pg, double* %base_scalar)
294 ret <vscale x 2 x double> %load
297 declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1>, i8*)
299 declare <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1>, i8*)
300 declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, i16*)
301 declare <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1>, half*)
302 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1>, bfloat*)
304 declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1>, i8*)
305 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1>, i16*)
306 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, i32*)
307 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1>, float*)
309 declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1>, i8*)
310 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1>, i16*)
311 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1>, i32*)
312 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1>, i64*)
313 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, double*)
315 ; +bf16 is required for the bfloat version.
316 attributes #0 = { "target-features"="+sve,+bf16" }