1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
4 ; Range testing for the immediate in the reg+imm(mulvl) addressing
5 ; mode is done only for one instruction. The rest of the instrucions
6 ; test only one immediate value in bound.
8 define <vscale x 16 x i8> @ldnf1b(<vscale x 16 x i1> %pg, ptr %a) {
11 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0]
13 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %a)
14 ret <vscale x 16 x i8> %load
17 define <vscale x 16 x i8> @ldnf1b_out_of_lower_bound(<vscale x 16 x i1> %pg, ptr %a) {
18 ; CHECK-LABEL: ldnf1b_out_of_lower_bound:
20 ; CHECK-NEXT: addvl x8, x0, #-9
21 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8]
23 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 -9
24 %base_scalar = bitcast <vscale x 16 x i8>* %base to ptr
25 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
26 ret <vscale x 16 x i8> %load
29 define <vscale x 16 x i8> @ldnf1b_lower_bound(<vscale x 16 x i1> %pg, ptr %a) {
30 ; CHECK-LABEL: ldnf1b_lower_bound:
32 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #-8, mul vl]
34 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 -8
35 %base_scalar = bitcast <vscale x 16 x i8>* %base to ptr
36 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
37 ret <vscale x 16 x i8> %load
40 define <vscale x 16 x i8> @ldnf1b_inbound(<vscale x 16 x i1> %pg, ptr %a) {
41 ; CHECK-LABEL: ldnf1b_inbound:
43 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #1, mul vl]
45 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 1
46 %base_scalar = bitcast <vscale x 16 x i8>* %base to ptr
47 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
48 ret <vscale x 16 x i8> %load
51 define <vscale x 16 x i8> @ldnf1b_upper_bound(<vscale x 16 x i1> %pg, ptr %a) {
52 ; CHECK-LABEL: ldnf1b_upper_bound:
54 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #7, mul vl]
56 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 7
57 %base_scalar = bitcast <vscale x 16 x i8>* %base to ptr
58 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
59 ret <vscale x 16 x i8> %load
62 define <vscale x 16 x i8> @ldnf1b_out_of_upper_bound(<vscale x 16 x i1> %pg, ptr %a) {
63 ; CHECK-LABEL: ldnf1b_out_of_upper_bound:
65 ; CHECK-NEXT: addvl x8, x0, #8
66 ; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8]
68 %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %a, i64 8
69 %base_scalar = bitcast <vscale x 16 x i8>* %base to ptr
70 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar)
71 ret <vscale x 16 x i8> %load
74 define <vscale x 8 x i16> @ldnf1b_h(<vscale x 8 x i1> %pg, ptr %a) {
75 ; CHECK-LABEL: ldnf1b_h:
77 ; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0]
79 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a)
80 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
81 ret <vscale x 8 x i16> %res
84 define <vscale x 8 x i16> @ldnf1b_h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
85 ; CHECK-LABEL: ldnf1b_h_inbound:
87 ; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0, #7, mul vl]
89 %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %a, i64 7
90 %base_scalar = bitcast <vscale x 8 x i8>* %base to ptr
91 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar)
92 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16>
93 ret <vscale x 8 x i16> %res
96 define <vscale x 8 x i16> @ldnf1sb_h(<vscale x 8 x i1> %pg, ptr %a) {
97 ; CHECK-LABEL: ldnf1sb_h:
99 ; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0]
101 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a)
102 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
103 ret <vscale x 8 x i16> %res
106 define <vscale x 8 x i16> @ldnf1sb_h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
107 ; CHECK-LABEL: ldnf1sb_h_inbound:
109 ; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0, #7, mul vl]
111 %base = getelementptr <vscale x 8 x i8>, <vscale x 8 x i8>* %a, i64 7
112 %base_scalar = bitcast <vscale x 8 x i8>* %base to ptr
113 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar)
114 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16>
115 ret <vscale x 8 x i16> %res
118 define <vscale x 8 x i16> @ldnf1h(<vscale x 8 x i1> %pg, ptr %a) {
119 ; CHECK-LABEL: ldnf1h:
121 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
123 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, ptr %a)
124 ret <vscale x 8 x i16> %load
127 define <vscale x 8 x i16> @ldnf1h_inbound(<vscale x 8 x i1> %pg, ptr %a) {
128 ; CHECK-LABEL: ldnf1h_inbound:
130 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
132 %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %a, i64 1
133 %base_scalar = bitcast <vscale x 8 x i16>* %base to ptr
134 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, ptr %base_scalar)
135 ret <vscale x 8 x i16> %load
138 define <vscale x 8 x half> @ldnf1h_f16(<vscale x 8 x i1> %pg, ptr %a) {
139 ; CHECK-LABEL: ldnf1h_f16:
141 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
143 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, ptr %a)
144 ret <vscale x 8 x half> %load
147 define <vscale x 8 x bfloat> @ldnf1h_bf16(<vscale x 8 x i1> %pg, ptr %a) #0 {
148 ; CHECK-LABEL: ldnf1h_bf16:
150 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0]
152 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %a)
153 ret <vscale x 8 x bfloat> %load
156 define <vscale x 8 x half> @ldnf1h_f16_inbound(<vscale x 8 x i1> %pg, ptr %a) {
157 ; CHECK-LABEL: ldnf1h_f16_inbound:
159 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
161 %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %a, i64 1
162 %base_scalar = bitcast <vscale x 8 x half>* %base to ptr
163 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, ptr %base_scalar)
164 ret <vscale x 8 x half> %load
167 define <vscale x 8 x bfloat> @ldnf1h_bf16_inbound(<vscale x 8 x i1> %pg, ptr %a) #0 {
168 ; CHECK-LABEL: ldnf1h_bf16_inbound:
170 ; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl]
172 %base = getelementptr <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* %a, i64 1
173 %base_scalar = bitcast <vscale x 8 x bfloat>* %base to ptr
174 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %base_scalar)
175 ret <vscale x 8 x bfloat> %load
178 define <vscale x 4 x i32> @ldnf1b_s(<vscale x 4 x i1> %pg, ptr %a) {
179 ; CHECK-LABEL: ldnf1b_s:
181 ; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0]
183 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a)
184 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
185 ret <vscale x 4 x i32> %res
188 define <vscale x 4 x i32> @ldnf1b_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
189 ; CHECK-LABEL: ldnf1b_s_inbound:
191 ; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0, #7, mul vl]
193 %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %a, i64 7
194 %base_scalar = bitcast <vscale x 4 x i8>* %base to ptr
195 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar)
196 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
197 ret <vscale x 4 x i32> %res
200 define <vscale x 4 x i32> @ldnf1sb_s(<vscale x 4 x i1> %pg, ptr %a) {
201 ; CHECK-LABEL: ldnf1sb_s:
203 ; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0]
205 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a)
206 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
207 ret <vscale x 4 x i32> %res
210 define <vscale x 4 x i32> @ldnf1sb_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
211 ; CHECK-LABEL: ldnf1sb_s_inbound:
213 ; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0, #7, mul vl]
215 %base = getelementptr <vscale x 4 x i8>, <vscale x 4 x i8>* %a, i64 7
216 %base_scalar = bitcast <vscale x 4 x i8>* %base to ptr
217 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar)
218 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
219 ret <vscale x 4 x i32> %res
222 define <vscale x 4 x i32> @ldnf1h_s(<vscale x 4 x i1> %pg, ptr %a) {
223 ; CHECK-LABEL: ldnf1h_s:
225 ; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0]
227 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a)
228 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
229 ret <vscale x 4 x i32> %res
232 define <vscale x 4 x i32> @ldnf1h_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
233 ; CHECK-LABEL: ldnf1h_s_inbound:
235 ; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0, #7, mul vl]
237 %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %a, i64 7
238 %base_scalar = bitcast <vscale x 4 x i16>* %base to ptr
239 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar)
240 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
241 ret <vscale x 4 x i32> %res
244 define <vscale x 4 x i32> @ldnf1sh_s(<vscale x 4 x i1> %pg, ptr %a) {
245 ; CHECK-LABEL: ldnf1sh_s:
247 ; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0]
249 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a)
250 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
251 ret <vscale x 4 x i32> %res
254 define <vscale x 4 x i32> @ldnf1sh_s_inbound(<vscale x 4 x i1> %pg, ptr %a) {
255 ; CHECK-LABEL: ldnf1sh_s_inbound:
257 ; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0, #7, mul vl]
259 %base = getelementptr <vscale x 4 x i16>, <vscale x 4 x i16>* %a, i64 7
260 %base_scalar = bitcast <vscale x 4 x i16>* %base to ptr
261 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar)
262 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
263 ret <vscale x 4 x i32> %res
266 define <vscale x 4 x i32> @ldnf1w(<vscale x 4 x i1> %pg, ptr %a) {
267 ; CHECK-LABEL: ldnf1w:
269 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0]
271 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, ptr %a)
272 ret <vscale x 4 x i32> %load
275 define <vscale x 4 x i32> @ldnf1w_inbound(<vscale x 4 x i1> %pg, ptr %a) {
276 ; CHECK-LABEL: ldnf1w_inbound:
278 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl]
280 %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %a, i64 7
281 %base_scalar = bitcast <vscale x 4 x i32>* %base to ptr
282 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, ptr %base_scalar)
283 ret <vscale x 4 x i32> %load
286 define <vscale x 4 x float> @ldnf1w_f32(<vscale x 4 x i1> %pg, ptr %a) {
287 ; CHECK-LABEL: ldnf1w_f32:
289 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0]
291 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, ptr %a)
292 ret <vscale x 4 x float> %load
295 define <vscale x 4 x float> @ldnf1w_f32_inbound(<vscale x 4 x i1> %pg, ptr %a) {
296 ; CHECK-LABEL: ldnf1w_f32_inbound:
298 ; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl]
300 %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %a, i64 7
301 %base_scalar = bitcast <vscale x 4 x float>* %base to ptr
302 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, ptr %base_scalar)
303 ret <vscale x 4 x float> %load
306 define <vscale x 2 x i64> @ldnf1b_d(<vscale x 2 x i1> %pg, ptr %a) {
307 ; CHECK-LABEL: ldnf1b_d:
309 ; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0]
311 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a)
312 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
313 ret <vscale x 2 x i64> %res
316 define <vscale x 2 x i64> @ldnf1b_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
317 ; CHECK-LABEL: ldnf1b_d_inbound:
319 ; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0, #7, mul vl]
321 %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %a, i64 7
322 %base_scalar = bitcast <vscale x 2 x i8>* %base to ptr
323 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar)
324 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
325 ret <vscale x 2 x i64> %res
328 define <vscale x 2 x i64> @ldnf1sb_d(<vscale x 2 x i1> %pg, ptr %a) {
329 ; CHECK-LABEL: ldnf1sb_d:
331 ; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0]
333 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a)
334 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
335 ret <vscale x 2 x i64> %res
338 define <vscale x 2 x i64> @ldnf1sb_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
339 ; CHECK-LABEL: ldnf1sb_d_inbound:
341 ; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0, #7, mul vl]
343 %base = getelementptr <vscale x 2 x i8>, <vscale x 2 x i8>* %a, i64 7
344 %base_scalar = bitcast <vscale x 2 x i8>* %base to ptr
345 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar)
346 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
347 ret <vscale x 2 x i64> %res
350 define <vscale x 2 x i64> @ldnf1h_d(<vscale x 2 x i1> %pg, ptr %a) {
351 ; CHECK-LABEL: ldnf1h_d:
353 ; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0]
355 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a)
356 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
357 ret <vscale x 2 x i64> %res
360 define <vscale x 2 x i64> @ldnf1h_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
361 ; CHECK-LABEL: ldnf1h_d_inbound:
363 ; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0, #7, mul vl]
365 %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %a, i64 7
366 %base_scalar = bitcast <vscale x 2 x i16>* %base to ptr
367 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar)
368 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
369 ret <vscale x 2 x i64> %res
372 define <vscale x 2 x i64> @ldnf1sh_d(<vscale x 2 x i1> %pg, ptr %a) {
373 ; CHECK-LABEL: ldnf1sh_d:
375 ; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0]
377 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a)
378 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
379 ret <vscale x 2 x i64> %res
382 define <vscale x 2 x i64> @ldnf1sh_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
383 ; CHECK-LABEL: ldnf1sh_d_inbound:
385 ; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0, #7, mul vl]
387 %base = getelementptr <vscale x 2 x i16>, <vscale x 2 x i16>* %a, i64 7
388 %base_scalar = bitcast <vscale x 2 x i16>* %base to ptr
389 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar)
390 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
391 ret <vscale x 2 x i64> %res
394 define <vscale x 2 x i64> @ldnf1w_d(<vscale x 2 x i1> %pg, ptr %a) {
395 ; CHECK-LABEL: ldnf1w_d:
397 ; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0]
399 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a)
400 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
401 ret <vscale x 2 x i64> %res
404 define <vscale x 2 x i64> @ldnf1w_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
405 ; CHECK-LABEL: ldnf1w_d_inbound:
407 ; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0, #7, mul vl]
409 %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %a, i64 7
410 %base_scalar = bitcast <vscale x 2 x i32>* %base to ptr
411 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar)
412 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
413 ret <vscale x 2 x i64> %res
416 define <vscale x 2 x i64> @ldnf1sw_d(<vscale x 2 x i1> %pg, ptr %a) {
417 ; CHECK-LABEL: ldnf1sw_d:
419 ; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0]
421 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a)
422 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
423 ret <vscale x 2 x i64> %res
426 define <vscale x 2 x i64> @ldnf1sw_d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
427 ; CHECK-LABEL: ldnf1sw_d_inbound:
429 ; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0, #7, mul vl]
431 %base = getelementptr <vscale x 2 x i32>, <vscale x 2 x i32>* %a, i64 7
432 %base_scalar = bitcast <vscale x 2 x i32>* %base to ptr
433 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar)
434 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
435 ret <vscale x 2 x i64> %res
438 define <vscale x 2 x i64> @ldnf1d(<vscale x 2 x i1> %pg, ptr %a) {
439 ; CHECK-LABEL: ldnf1d:
441 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0]
443 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, ptr %a)
444 ret <vscale x 2 x i64> %load
447 define <vscale x 2 x i64> @ldnf1d_inbound(<vscale x 2 x i1> %pg, ptr %a) {
448 ; CHECK-LABEL: ldnf1d_inbound:
450 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl]
452 %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %a, i64 1
453 %base_scalar = bitcast <vscale x 2 x i64>* %base to ptr
454 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, ptr %base_scalar)
455 ret <vscale x 2 x i64> %load
458 define <vscale x 2 x double> @ldnf1d_f64(<vscale x 2 x i1> %pg, ptr %a) {
459 ; CHECK-LABEL: ldnf1d_f64:
461 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0]
463 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, ptr %a)
464 ret <vscale x 2 x double> %load
467 define <vscale x 2 x double> @ldnf1d_f64_inbound(<vscale x 2 x i1> %pg, ptr %a) {
468 ; CHECK-LABEL: ldnf1d_f64_inbound:
470 ; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl]
472 %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %a, i64 1
473 %base_scalar = bitcast <vscale x 2 x double>* %base to ptr
474 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, ptr %base_scalar)
475 ret <vscale x 2 x double> %load
478 declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1>, ptr)
480 declare <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1>, ptr)
481 declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1>, ptr)
482 declare <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1>, ptr)
483 declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1>, ptr)
485 declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1>, ptr)
486 declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1>, ptr)
487 declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1>, ptr)
488 declare <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1>, ptr)
490 declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1>, ptr)
491 declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1>, ptr)
492 declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1>, ptr)
493 declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1>, ptr)
494 declare <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1>, ptr)
496 ; +bf16 is required for the bfloat version.
497 attributes #0 = { "target-features"="+sve,+bf16" }