1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
4 ; Range testing for the immediate in the reg+imm(mulvl) addressing
5 ; mode is done only for one instruction. The rest of the instrucions
6 ; test only one immediate value in bound.
8 define <vscale x 16 x i8> @ldnf1b(<vscale x 16 x i1> %pg, ptr %a) {
11 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0]
13 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %a)
14 ret <vscale x 16 x i8> %load
17 define <vscale x 16 x i8> @ldnf1b_out_of_lower_bound(<vscale x 16 x i1> %pg, ptr %a) {
18 ; CHECK-LABEL: ldnf1b_out_of_lower_bound:
20 ; CHECK-NEXT: rdvl x8, #-9
21 ; CHECK-NEXT: add x8, x0, x8
22 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8]
24 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 -9
25 %base_scalar = bitcast ptr %base to ptr
26 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
27 ret <vscale x 16 x i8> %load
30 define <vscale x 16 x i8> @ldnf1b_lower_bound(<vscale x 16 x i1> %pg, ptr %a) {
31 ; CHECK-LABEL: ldnf1b_lower_bound:
33 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #-8, mul vl]
35 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 -8
36 %base_scalar = bitcast ptr %base to ptr
37 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
38 ret <vscale x 16 x i8> %load
41 define <vscale x 16 x i8> @ldnf1b_inbound(<vscale x 16 x i1> %pg, ptr %a) {
42 ; CHECK-LABEL: ldnf1b_inbound:
44 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #1, mul vl]
46 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 1
47 %base_scalar = bitcast ptr %base to ptr
48 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
49 ret <vscale x 16 x i8> %load
52 define <vscale x 16 x i8> @ldnf1b_upper_bound(<vscale x 16 x i1> %pg, ptr %a) {
53 ; CHECK-LABEL: ldnf1b_upper_bound:
55 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #7, mul vl]
57 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 7
58 %base_scalar = bitcast ptr %base to ptr
59 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
60 ret <vscale x 16 x i8> %load
63 define <vscale x 16 x i8> @ldnf1b_out_of_upper_bound(<vscale x 16 x i1> %pg, ptr %a) {
64 ; CHECK-LABEL: ldnf1b_out_of_upper_bound:
66 ; CHECK-NEXT: rdvl x8, #8
67 ; CHECK-NEXT: add x8, x0, x8
68 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8]
70 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 8
71 %base_scalar = bitcast ptr %base to ptr
72 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
73 ret <vscale x 16 x i8> %load
76 define <vscale x 8 x i16> @ldnf1b_h(<vscale x 8 x i1> %pg, ptr %a) {
77 ; CHECK-LABEL: ldnf1b_h:
79 ; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0]
81 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a)
82 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
83 ret <vscale x 8 x i16> %res
86 define <vscale x 8 x i16> @ldnf1b_h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
87 ; CHECK-LABEL: ldnf1b_h_inbound:
89 ; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0, #7, mul vl]
91 %base = getelementptr <vscale x 8 x i8>, ptr %a, i64 7
92 %base_scalar = bitcast ptr %base to ptr
93 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar)
94 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
95 ret <vscale x 8 x i16> %res
98 define <vscale x 8 x i16> @ldnf1sb_h(<vscale x 8 x i1> %pg, ptr %a) {
99 ; CHECK-LABEL: ldnf1sb_h:
101 ; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0]
103 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a)
104 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
105 ret <vscale x 8 x i16> %res
108 define <vscale x 8 x i16> @ldnf1sb_h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
109 ; CHECK-LABEL: ldnf1sb_h_inbound:
111 ; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0, #7, mul vl]
113 %base = getelementptr <vscale x 8 x i8>, ptr %a, i64 7
114 %base_scalar = bitcast ptr %base to ptr
115 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar)
116 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
117 ret <vscale x 8 x i16> %res
120 define <vscale x 8 x i16> @ldnf1h(<vscale x 8 x i1> %pg, ptr %a) {
121 ; CHECK-LABEL: ldnf1h:
123 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
125 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, ptr %a)
126 ret <vscale x 8 x i16> %load
129 define <vscale x 8 x i16> @ldnf1h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
130 ; CHECK-LABEL: ldnf1h_inbound:
132 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
134 %base = getelementptr <vscale x 8 x i16>, ptr %a, i64 1
135 %base_scalar = bitcast ptr %base to ptr
136 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, ptr %base_scalar)
137 ret <vscale x 8 x i16> %load
140 define <vscale x 8 x half> @ldnf1h_f16(<vscale x 8 x i1> %pg, ptr %a) {
141 ; CHECK-LABEL: ldnf1h_f16:
143 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
145 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, ptr %a)
146 ret <vscale x 8 x half> %load
149 define <vscale x 8 x bfloat> @ldnf1h_bf16(<vscale x 8 x i1> %pg, ptr %a) #0 {
150 ; CHECK-LABEL: ldnf1h_bf16:
152 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
154 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %a)
155 ret <vscale x 8 x bfloat> %load
158 define <vscale x 8 x half> @ldnf1h_f16_inbound(<vscale x 8 x i1> %pg, ptr %a) {
159 ; CHECK-LABEL: ldnf1h_f16_inbound:
161 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
163 %base = getelementptr <vscale x 8 x half>, ptr %a, i64 1
164 %base_scalar = bitcast ptr %base to ptr
165 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, ptr %base_scalar)
166 ret <vscale x 8 x half> %load
169 define <vscale x 8 x bfloat> @ldnf1h_bf16_inbound(<vscale x 8 x i1> %pg, ptr %a) #0 {
170 ; CHECK-LABEL: ldnf1h_bf16_inbound:
172 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
174 %base = getelementptr <vscale x 8 x bfloat>, ptr %a, i64 1
175 %base_scalar = bitcast ptr %base to ptr
176 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %base_scalar)
177 ret <vscale x 8 x bfloat> %load
180 define <vscale x 4 x i32> @ldnf1b_s(<vscale x 4 x i1> %pg, ptr %a) {
181 ; CHECK-LABEL: ldnf1b_s:
183 ; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0]
185 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a)
186 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
187 ret <vscale x 4 x i32> %res
190 define <vscale x 4 x i32> @ldnf1b_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
191 ; CHECK-LABEL: ldnf1b_s_inbound:
193 ; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0, #7, mul vl]
195 %base = getelementptr <vscale x 4 x i8>, ptr %a, i64 7
196 %base_scalar = bitcast ptr %base to ptr
197 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar)
198 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
199 ret <vscale x 4 x i32> %res
202 define <vscale x 4 x i32> @ldnf1sb_s(<vscale x 4 x i1> %pg, ptr %a) {
203 ; CHECK-LABEL: ldnf1sb_s:
205 ; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0]
207 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a)
208 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
209 ret <vscale x 4 x i32> %res
212 define <vscale x 4 x i32> @ldnf1sb_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
213 ; CHECK-LABEL: ldnf1sb_s_inbound:
215 ; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0, #7, mul vl]
217 %base = getelementptr <vscale x 4 x i8>, ptr %a, i64 7
218 %base_scalar = bitcast ptr %base to ptr
219 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar)
220 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
221 ret <vscale x 4 x i32> %res
224 define <vscale x 4 x i32> @ldnf1h_s(<vscale x 4 x i1> %pg, ptr %a) {
225 ; CHECK-LABEL: ldnf1h_s:
227 ; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0]
229 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a)
230 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
231 ret <vscale x 4 x i32> %res
234 define <vscale x 4 x i32> @ldnf1h_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
235 ; CHECK-LABEL: ldnf1h_s_inbound:
237 ; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0, #7, mul vl]
239 %base = getelementptr <vscale x 4 x i16>, ptr %a, i64 7
240 %base_scalar = bitcast ptr %base to ptr
241 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar)
242 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
243 ret <vscale x 4 x i32> %res
246 define <vscale x 4 x i32> @ldnf1sh_s(<vscale x 4 x i1> %pg, ptr %a) {
247 ; CHECK-LABEL: ldnf1sh_s:
249 ; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0]
251 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a)
252 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
253 ret <vscale x 4 x i32> %res
256 define <vscale x 4 x i32> @ldnf1sh_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
257 ; CHECK-LABEL: ldnf1sh_s_inbound:
259 ; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0, #7, mul vl]
261 %base = getelementptr <vscale x 4 x i16>, ptr %a, i64 7
262 %base_scalar = bitcast ptr %base to ptr
263 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar)
264 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
265 ret <vscale x 4 x i32> %res
268 define <vscale x 4 x i32> @ldnf1w(<vscale x 4 x i1> %pg, ptr %a) {
269 ; CHECK-LABEL: ldnf1w:
271 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0]
273 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, ptr %a)
274 ret <vscale x 4 x i32> %load
277 define <vscale x 4 x i32> @ldnf1w_inbound(<vscale x 4 x i1> %pg, ptr %a) {
278 ; CHECK-LABEL: ldnf1w_inbound:
280 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl]
282 %base = getelementptr <vscale x 4 x i32>, ptr %a, i64 7
283 %base_scalar = bitcast ptr %base to ptr
284 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, ptr %base_scalar)
285 ret <vscale x 4 x i32> %load
288 define <vscale x 4 x float> @ldnf1w_f32(<vscale x 4 x i1> %pg, ptr %a) {
289 ; CHECK-LABEL: ldnf1w_f32:
291 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0]
293 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, ptr %a)
294 ret <vscale x 4 x float> %load
297 define <vscale x 4 x float> @ldnf1w_f32_inbound(<vscale x 4 x i1> %pg, ptr %a) {
298 ; CHECK-LABEL: ldnf1w_f32_inbound:
300 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl]
302 %base = getelementptr <vscale x 4 x float>, ptr %a, i64 7
303 %base_scalar = bitcast ptr %base to ptr
304 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, ptr %base_scalar)
305 ret <vscale x 4 x float> %load
308 define <vscale x 2 x i64> @ldnf1b_d(<vscale x 2 x i1> %pg, ptr %a) {
309 ; CHECK-LABEL: ldnf1b_d:
311 ; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0]
313 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a)
314 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
315 ret <vscale x 2 x i64> %res
318 define <vscale x 2 x i64> @ldnf1b_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
319 ; CHECK-LABEL: ldnf1b_d_inbound:
321 ; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0, #7, mul vl]
323 %base = getelementptr <vscale x 2 x i8>, ptr %a, i64 7
324 %base_scalar = bitcast ptr %base to ptr
325 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar)
326 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
327 ret <vscale x 2 x i64> %res
330 define <vscale x 2 x i64> @ldnf1sb_d(<vscale x 2 x i1> %pg, ptr %a) {
331 ; CHECK-LABEL: ldnf1sb_d:
333 ; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0]
335 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a)
336 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
337 ret <vscale x 2 x i64> %res
340 define <vscale x 2 x i64> @ldnf1sb_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
341 ; CHECK-LABEL: ldnf1sb_d_inbound:
343 ; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0, #7, mul vl]
345 %base = getelementptr <vscale x 2 x i8>, ptr %a, i64 7
346 %base_scalar = bitcast ptr %base to ptr
347 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar)
348 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
349 ret <vscale x 2 x i64> %res
352 define <vscale x 2 x i64> @ldnf1h_d(<vscale x 2 x i1> %pg, ptr %a) {
353 ; CHECK-LABEL: ldnf1h_d:
355 ; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0]
357 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a)
358 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
359 ret <vscale x 2 x i64> %res
362 define <vscale x 2 x i64> @ldnf1h_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
363 ; CHECK-LABEL: ldnf1h_d_inbound:
365 ; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0, #7, mul vl]
367 %base = getelementptr <vscale x 2 x i16>, ptr %a, i64 7
368 %base_scalar = bitcast ptr %base to ptr
369 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar)
370 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
371 ret <vscale x 2 x i64> %res
374 define <vscale x 2 x i64> @ldnf1sh_d(<vscale x 2 x i1> %pg, ptr %a) {
375 ; CHECK-LABEL: ldnf1sh_d:
377 ; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0]
379 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a)
380 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
381 ret <vscale x 2 x i64> %res
384 define <vscale x 2 x i64> @ldnf1sh_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
385 ; CHECK-LABEL: ldnf1sh_d_inbound:
387 ; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0, #7, mul vl]
389 %base = getelementptr <vscale x 2 x i16>, ptr %a, i64 7
390 %base_scalar = bitcast ptr %base to ptr
391 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar)
392 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
393 ret <vscale x 2 x i64> %res
396 define <vscale x 2 x i64> @ldnf1w_d(<vscale x 2 x i1> %pg, ptr %a) {
397 ; CHECK-LABEL: ldnf1w_d:
399 ; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0]
401 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a)
402 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
403 ret <vscale x 2 x i64> %res
406 define <vscale x 2 x i64> @ldnf1w_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
407 ; CHECK-LABEL: ldnf1w_d_inbound:
409 ; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0, #7, mul vl]
411 %base = getelementptr <vscale x 2 x i32>, ptr %a, i64 7
412 %base_scalar = bitcast ptr %base to ptr
413 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar)
414 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
415 ret <vscale x 2 x i64> %res
418 define <vscale x 2 x i64> @ldnf1sw_d(<vscale x 2 x i1> %pg, ptr %a) {
419 ; CHECK-LABEL: ldnf1sw_d:
421 ; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0]
423 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a)
424 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
425 ret <vscale x 2 x i64> %res
428 define <vscale x 2 x i64> @ldnf1sw_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
429 ; CHECK-LABEL: ldnf1sw_d_inbound:
431 ; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0, #7, mul vl]
433 %base = getelementptr <vscale x 2 x i32>, ptr %a, i64 7
434 %base_scalar = bitcast ptr %base to ptr
435 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar)
436 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
437 ret <vscale x 2 x i64> %res
440 define <vscale x 2 x i64> @ldnf1d(<vscale x 2 x i1> %pg, ptr %a) {
441 ; CHECK-LABEL: ldnf1d:
443 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0]
445 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, ptr %a)
446 ret <vscale x 2 x i64> %load
449 define <vscale x 2 x i64> @ldnf1d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
450 ; CHECK-LABEL: ldnf1d_inbound:
452 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl]
454 %base = getelementptr <vscale x 2 x i64>, ptr %a, i64 1
455 %base_scalar = bitcast ptr %base to ptr
456 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, ptr %base_scalar)
457 ret <vscale x 2 x i64> %load
460 define <vscale x 2 x double> @ldnf1d_f64(<vscale x 2 x i1> %pg, ptr %a) {
461 ; CHECK-LABEL: ldnf1d_f64:
463 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0]
465 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, ptr %a)
466 ret <vscale x 2 x double> %load
469 define <vscale x 2 x double> @ldnf1d_f64_inbound(<vscale x 2 x i1> %pg, ptr %a) {
470 ; CHECK-LABEL: ldnf1d_f64_inbound:
472 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl]
474 %base = getelementptr <vscale x 2 x double>, ptr %a, i64 1
475 %base_scalar = bitcast ptr %base to ptr
476 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, ptr %base_scalar)
477 ret <vscale x 2 x double> %load
480 declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1>, ptr)
482 declare <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1>, ptr)
483 declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1>, ptr)
484 declare <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1>, ptr)
485 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1>, ptr)
487 declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1>, ptr)
488 declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1>, ptr)
489 declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1>, ptr)
490 declare <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1>, ptr)
492 declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1>, ptr)
493 declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1>, ptr)
494 declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1>, ptr)
495 declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1>, ptr)
496 declare <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1>, ptr)
498 ; +bf16 is required for the bfloat version.
499 attributes #0 = { "target-features"="+sve,+bf16" }