1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
4 ; Range testing for the immediate in the reg+imm(mulvl) addressing
5 ; mode is done only for one instruction. The rest of the instrucions
6 ; test only one immediate value in bound.
8 define <vscale x 16 x i8> @ldnf1b(<vscale x 16 x i1> %pg, i8* %a) {
11 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0]
13 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %a)
14 ret <vscale x 16 x i8> %load
17 define <vscale x 16 x i8> @ldnf1b_out_of_lower_bound(<vscale x 16 x i1> %pg, i8* %a) {
18 ; CHECK-LABEL: ldnf1b_out_of_lower_bound:
20 ; CHECK-NEXT: addvl x8, x0, #-9
21 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8]
23 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
24 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 -9
25 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
26 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
27 ret <vscale x 16 x i8> %load
30 define <vscale x 16 x i8> @ldnf1b_lower_bound(<vscale x 16 x i1> %pg, i8* %a) {
31 ; CHECK-LABEL: ldnf1b_lower_bound:
33 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #-8, mul vl]
35 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
36 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 -8
37 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
38 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
39 ret <vscale x 16 x i8> %load
42 define <vscale x 16 x i8> @ldnf1b_inbound(<vscale x 16 x i1> %pg, i8* %a) {
43 ; CHECK-LABEL: ldnf1b_inbound:
45 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #1, mul vl]
47 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
48 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 1
49 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
50 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
51 ret <vscale x 16 x i8> %load
54 define <vscale x 16 x i8> @ldnf1b_upper_bound(<vscale x 16 x i1> %pg, i8* %a) {
55 ; CHECK-LABEL: ldnf1b_upper_bound:
57 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #7, mul vl]
59 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
60 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 7
61 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
62 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
63 ret <vscale x 16 x i8> %load
66 define <vscale x 16 x i8> @ldnf1b_out_of_upper_bound(<vscale x 16 x i1> %pg, i8* %a) {
67 ; CHECK-LABEL: ldnf1b_out_of_upper_bound:
69 ; CHECK-NEXT: addvl x8, x0, #8
70 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8]
72 %base_scalable = bitcast i8* %a to <vscale x 16 x i8>*
73 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %base_scalable, i64 8
74 %base_scalar = bitcast <vscale x 16 x i8>* %base to i8*
75 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, i8* %base_scalar)
76 ret <vscale x 16 x i8> %load
79 define <vscale x 8 x i16> @ldnf1b_h(<vscale x 8 x i1> %pg, i8* %a) {
80 ; CHECK-LABEL: ldnf1b_h:
82 ; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0]
84 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, i8* %a)
85 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
86 ret <vscale x 8 x i16> %res
89 define <vscale x 8 x i16> @ldnf1b_h_inbound(<vscale x 8 x i1> %pg, i8* %a) {
90 ; CHECK-LABEL: ldnf1b_h_inbound:
92 ; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0, #7, mul vl]
94 %base_scalable = bitcast i8* %a to <vscale x 8 x i8>*
95 %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %base_scalable, i64 7
96 %base_scalar = bitcast <vscale x 8 x i8>* %base to i8*
97 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, i8* %base_scalar)
98 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
99 ret <vscale x 8 x i16> %res
102 define <vscale x 8 x i16> @ldnf1sb_h(<vscale x 8 x i1> %pg, i8* %a) {
103 ; CHECK-LABEL: ldnf1sb_h:
105 ; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0]
107 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, i8* %a)
108 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
109 ret <vscale x 8 x i16> %res
112 define <vscale x 8 x i16> @ldnf1sb_h_inbound(<vscale x 8 x i1> %pg, i8* %a) {
113 ; CHECK-LABEL: ldnf1sb_h_inbound:
115 ; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0, #7, mul vl]
117 %base_scalable = bitcast i8* %a to <vscale x 8 x i8>*
118 %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %base_scalable, i64 7
119 %base_scalar = bitcast <vscale x 8 x i8>* %base to i8*
120 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, i8* %base_scalar)
121 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
122 ret <vscale x 8 x i16> %res
125 define <vscale x 8 x i16> @ldnf1h(<vscale x 8 x i1> %pg, i16* %a) {
126 ; CHECK-LABEL: ldnf1h:
128 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
130 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, i16* %a)
131 ret <vscale x 8 x i16> %load
134 define <vscale x 8 x i16> @ldnf1h_inbound(<vscale x 8 x i1> %pg, i16* %a) {
135 ; CHECK-LABEL: ldnf1h_inbound:
137 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
139 %base_scalable = bitcast i16* %a to <vscale x 8 x i16>*
140 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %base_scalable, i64 1
141 %base_scalar = bitcast <vscale x 8 x i16>* %base to i16*
142 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, i16* %base_scalar)
143 ret <vscale x 8 x i16> %load
146 define <vscale x 8 x half> @ldnf1h_f16(<vscale x 8 x i1> %pg, half* %a) {
147 ; CHECK-LABEL: ldnf1h_f16:
149 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
151 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, half* %a)
152 ret <vscale x 8 x half> %load
155 define <vscale x 8 x bfloat> @ldnf1h_bf16(<vscale x 8 x i1> %pg, bfloat* %a) #0 {
156 ; CHECK-LABEL: ldnf1h_bf16:
158 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
160 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %a)
161 ret <vscale x 8 x bfloat> %load
164 define <vscale x 8 x half> @ldnf1h_f16_inbound(<vscale x 8 x i1> %pg, half* %a) {
165 ; CHECK-LABEL: ldnf1h_f16_inbound:
167 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
169 %base_scalable = bitcast half* %a to <vscale x 8 x half>*
170 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %base_scalable, i64 1
171 %base_scalar = bitcast <vscale x 8 x half>* %base to half*
172 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, half* %base_scalar)
173 ret <vscale x 8 x half> %load
176 define <vscale x 8 x bfloat> @ldnf1h_bf16_inbound(<vscale x 8 x i1> %pg, bfloat* %a) #0 {
177 ; CHECK-LABEL: ldnf1h_bf16_inbound:
179 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
181 %base_scalable = bitcast bfloat* %a to <vscale x 8 x bfloat>*
182 %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %base_scalable, i64 1
183 %base_scalar = bitcast <vscale x 8 x bfloat>* %base to bfloat*
184 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, bfloat* %base_scalar)
185 ret <vscale x 8 x bfloat> %load
188 define <vscale x 4 x i32> @ldnf1b_s(<vscale x 4 x i1> %pg, i8* %a) {
189 ; CHECK-LABEL: ldnf1b_s:
191 ; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0]
193 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, i8* %a)
194 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
195 ret <vscale x 4 x i32> %res
198 define <vscale x 4 x i32> @ldnf1b_s_inbound(<vscale x 4 x i1> %pg, i8* %a) {
199 ; CHECK-LABEL: ldnf1b_s_inbound:
201 ; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0, #7, mul vl]
203 %base_scalable = bitcast i8* %a to <vscale x 4 x i8>*
204 %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %base_scalable, i64 7
205 %base_scalar = bitcast <vscale x 4 x i8>* %base to i8*
206 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, i8* %base_scalar)
207 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
208 ret <vscale x 4 x i32> %res
211 define <vscale x 4 x i32> @ldnf1sb_s(<vscale x 4 x i1> %pg, i8* %a) {
212 ; CHECK-LABEL: ldnf1sb_s:
214 ; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0]
216 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, i8* %a)
217 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
218 ret <vscale x 4 x i32> %res
221 define <vscale x 4 x i32> @ldnf1sb_s_inbound(<vscale x 4 x i1> %pg, i8* %a) {
222 ; CHECK-LABEL: ldnf1sb_s_inbound:
224 ; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0, #7, mul vl]
226 %base_scalable = bitcast i8* %a to <vscale x 4 x i8>*
227 %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %base_scalable, i64 7
228 %base_scalar = bitcast <vscale x 4 x i8>* %base to i8*
229 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, i8* %base_scalar)
230 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
231 ret <vscale x 4 x i32> %res
234 define <vscale x 4 x i32> @ldnf1h_s(<vscale x 4 x i1> %pg, i16* %a) {
235 ; CHECK-LABEL: ldnf1h_s:
237 ; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0]
239 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, i16* %a)
240 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
241 ret <vscale x 4 x i32> %res
244 define <vscale x 4 x i32> @ldnf1h_s_inbound(<vscale x 4 x i1> %pg, i16* %a) {
245 ; CHECK-LABEL: ldnf1h_s_inbound:
247 ; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0, #7, mul vl]
249 %base_scalable = bitcast i16* %a to <vscale x 4 x i16>*
250 %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %base_scalable, i64 7
251 %base_scalar = bitcast <vscale x 4 x i16>* %base to i16*
252 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, i16* %base_scalar)
253 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
254 ret <vscale x 4 x i32> %res
257 define <vscale x 4 x i32> @ldnf1sh_s(<vscale x 4 x i1> %pg, i16* %a) {
258 ; CHECK-LABEL: ldnf1sh_s:
260 ; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0]
262 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, i16* %a)
263 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
264 ret <vscale x 4 x i32> %res
267 define <vscale x 4 x i32> @ldnf1sh_s_inbound(<vscale x 4 x i1> %pg, i16* %a) {
268 ; CHECK-LABEL: ldnf1sh_s_inbound:
270 ; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0, #7, mul vl]
272 %base_scalable = bitcast i16* %a to <vscale x 4 x i16>*
273 %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %base_scalable, i64 7
274 %base_scalar = bitcast <vscale x 4 x i16>* %base to i16*
275 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, i16* %base_scalar)
276 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
277 ret <vscale x 4 x i32> %res
280 define <vscale x 4 x i32> @ldnf1w(<vscale x 4 x i1> %pg, i32* %a) {
281 ; CHECK-LABEL: ldnf1w:
283 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0]
285 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, i32* %a)
286 ret <vscale x 4 x i32> %load
289 define <vscale x 4 x i32> @ldnf1w_inbound(<vscale x 4 x i1> %pg, i32* %a) {
290 ; CHECK-LABEL: ldnf1w_inbound:
292 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl]
294 %base_scalable = bitcast i32* %a to <vscale x 4 x i32>*
295 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %base_scalable, i64 7
296 %base_scalar = bitcast <vscale x 4 x i32>* %base to i32*
297 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, i32* %base_scalar)
298 ret <vscale x 4 x i32> %load
301 define <vscale x 4 x float> @ldnf1w_f32(<vscale x 4 x i1> %pg, float* %a) {
302 ; CHECK-LABEL: ldnf1w_f32:
304 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0]
306 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, float* %a)
307 ret <vscale x 4 x float> %load
310 define <vscale x 4 x float> @ldnf1w_f32_inbound(<vscale x 4 x i1> %pg, float* %a) {
311 ; CHECK-LABEL: ldnf1w_f32_inbound:
313 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl]
315 %base_scalable = bitcast float* %a to <vscale x 4 x float>*
316 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %base_scalable, i64 7
317 %base_scalar = bitcast <vscale x 4 x float>* %base to float*
318 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, float* %base_scalar)
319 ret <vscale x 4 x float> %load
322 define <vscale x 2 x i64> @ldnf1b_d(<vscale x 2 x i1> %pg, i8* %a) {
323 ; CHECK-LABEL: ldnf1b_d:
325 ; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0]
327 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, i8* %a)
328 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
329 ret <vscale x 2 x i64> %res
332 define <vscale x 2 x i64> @ldnf1b_d_inbound(<vscale x 2 x i1> %pg, i8* %a) {
333 ; CHECK-LABEL: ldnf1b_d_inbound:
335 ; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0, #7, mul vl]
337 %base_scalable = bitcast i8* %a to <vscale x 2 x i8>*
338 %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %base_scalable, i64 7
339 %base_scalar = bitcast <vscale x 2 x i8>* %base to i8*
340 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, i8* %base_scalar)
341 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
342 ret <vscale x 2 x i64> %res
345 define <vscale x 2 x i64> @ldnf1sb_d(<vscale x 2 x i1> %pg, i8* %a) {
346 ; CHECK-LABEL: ldnf1sb_d:
348 ; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0]
350 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, i8* %a)
351 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
352 ret <vscale x 2 x i64> %res
355 define <vscale x 2 x i64> @ldnf1sb_d_inbound(<vscale x 2 x i1> %pg, i8* %a) {
356 ; CHECK-LABEL: ldnf1sb_d_inbound:
358 ; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0, #7, mul vl]
360 %base_scalable = bitcast i8* %a to <vscale x 2 x i8>*
361 %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %base_scalable, i64 7
362 %base_scalar = bitcast <vscale x 2 x i8>* %base to i8*
363 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, i8* %base_scalar)
364 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
365 ret <vscale x 2 x i64> %res
368 define <vscale x 2 x i64> @ldnf1h_d(<vscale x 2 x i1> %pg, i16* %a) {
369 ; CHECK-LABEL: ldnf1h_d:
371 ; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0]
373 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, i16* %a)
374 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
375 ret <vscale x 2 x i64> %res
378 define <vscale x 2 x i64> @ldnf1h_d_inbound(<vscale x 2 x i1> %pg, i16* %a) {
379 ; CHECK-LABEL: ldnf1h_d_inbound:
381 ; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0, #7, mul vl]
383 %base_scalable = bitcast i16* %a to <vscale x 2 x i16>*
384 %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %base_scalable, i64 7
385 %base_scalar = bitcast <vscale x 2 x i16>* %base to i16*
386 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, i16* %base_scalar)
387 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
388 ret <vscale x 2 x i64> %res
391 define <vscale x 2 x i64> @ldnf1sh_d(<vscale x 2 x i1> %pg, i16* %a) {
392 ; CHECK-LABEL: ldnf1sh_d:
394 ; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0]
396 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, i16* %a)
397 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
398 ret <vscale x 2 x i64> %res
401 define <vscale x 2 x i64> @ldnf1sh_d_inbound(<vscale x 2 x i1> %pg, i16* %a) {
402 ; CHECK-LABEL: ldnf1sh_d_inbound:
404 ; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0, #7, mul vl]
406 %base_scalable = bitcast i16* %a to <vscale x 2 x i16>*
407 %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %base_scalable, i64 7
408 %base_scalar = bitcast <vscale x 2 x i16>* %base to i16*
409 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, i16* %base_scalar)
410 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
411 ret <vscale x 2 x i64> %res
414 define <vscale x 2 x i64> @ldnf1w_d(<vscale x 2 x i1> %pg, i32* %a) {
415 ; CHECK-LABEL: ldnf1w_d:
417 ; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0]
419 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, i32* %a)
420 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
421 ret <vscale x 2 x i64> %res
424 define <vscale x 2 x i64> @ldnf1w_d_inbound(<vscale x 2 x i1> %pg, i32* %a) {
425 ; CHECK-LABEL: ldnf1w_d_inbound:
427 ; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0, #7, mul vl]
429 %base_scalable = bitcast i32* %a to <vscale x 2 x i32>*
430 %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base_scalable, i64 7
431 %base_scalar = bitcast <vscale x 2 x i32>* %base to i32*
432 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, i32* %base_scalar)
433 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
434 ret <vscale x 2 x i64> %res
437 define <vscale x 2 x i64> @ldnf1sw_d(<vscale x 2 x i1> %pg, i32* %a) {
438 ; CHECK-LABEL: ldnf1sw_d:
440 ; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0]
442 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, i32* %a)
443 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
444 ret <vscale x 2 x i64> %res
447 define <vscale x 2 x i64> @ldnf1sw_d_inbound(<vscale x 2 x i1> %pg, i32* %a) {
448 ; CHECK-LABEL: ldnf1sw_d_inbound:
450 ; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0, #7, mul vl]
452 %base_scalable = bitcast i32* %a to <vscale x 2 x i32>*
453 %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %base_scalable, i64 7
454 %base_scalar = bitcast <vscale x 2 x i32>* %base to i32*
455 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, i32* %base_scalar)
456 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
457 ret <vscale x 2 x i64> %res
460 define <vscale x 2 x i64> @ldnf1d(<vscale x 2 x i1> %pg, i64* %a) {
461 ; CHECK-LABEL: ldnf1d:
463 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0]
465 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, i64* %a)
466 ret <vscale x 2 x i64> %load
469 define <vscale x 2 x i64> @ldnf1d_inbound(<vscale x 2 x i1> %pg, i64* %a) {
470 ; CHECK-LABEL: ldnf1d_inbound:
472 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl]
474 %base_scalable = bitcast i64* %a to <vscale x 2 x i64>*
475 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base_scalable, i64 1
476 %base_scalar = bitcast <vscale x 2 x i64>* %base to i64*
477 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, i64* %base_scalar)
478 ret <vscale x 2 x i64> %load
481 define <vscale x 2 x double> @ldnf1d_f64(<vscale x 2 x i1> %pg, double* %a) {
482 ; CHECK-LABEL: ldnf1d_f64:
484 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0]
486 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, double* %a)
487 ret <vscale x 2 x double> %load
490 define <vscale x 2 x double> @ldnf1d_f64_inbound(<vscale x 2 x i1> %pg, double* %a) {
491 ; CHECK-LABEL: ldnf1d_f64_inbound:
493 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl]
495 %base_scalable = bitcast double* %a to <vscale x 2 x double>*
496 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %base_scalable, i64 1
497 %base_scalar = bitcast <vscale x 2 x double>* %base to double*
498 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, double* %base_scalar)
499 ret <vscale x 2 x double> %load
502 declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1>, i8*)
504 declare <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1>, i8*)
505 declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1>, i16*)
506 declare <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1>, half*)
507 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1>, bfloat*)
509 declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1>, i8*)
510 declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1>, i16*)
511 declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1>, i32*)
512 declare <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1>, float*)
514 declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1>, i8*)
515 declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1>, i16*)
516 declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1>, i32*)
517 declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1>, i64*)
518 declare <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1>, double*)
520 ; +bf16 is required for the bfloat version.
521 attributes #0 = { "target-features"="+sve,+bf16" }