1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s
5 ; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + immediate offset (index)
6 ; e.g. ldff1h { z0.s }, p0/z, [z0.s, #16]
10 define <vscale x 4 x i32> @gldff1b_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
11 ; CHECK-LABEL: gldff1b_s_imm_offset:
13 ; CHECK-NEXT: ldff1b { z0.s }, p0/z, [z0.s, #16]
15 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
16 <vscale x 4 x i32> %base,
18 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
19 ret <vscale x 4 x i32> %res
22 define <vscale x 2 x i64> @gldff1b_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
23 ; CHECK-LABEL: gldff1b_d_imm_offset:
25 ; CHECK-NEXT: ldff1b { z0.d }, p0/z, [z0.d, #16]
27 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
28 <vscale x 2 x i64> %base,
30 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
31 ret <vscale x 2 x i64> %res
35 define <vscale x 4 x i32> @gldff1h_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
36 ; CHECK-LABEL: gldff1h_s_imm_offset:
38 ; CHECK-NEXT: ldff1h { z0.s }, p0/z, [z0.s, #16]
40 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
41 <vscale x 4 x i32> %base,
43 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
44 ret <vscale x 4 x i32> %res
47 define <vscale x 2 x i64> @gldff1h_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
48 ; CHECK-LABEL: gldff1h_d_imm_offset:
50 ; CHECK-NEXT: ldff1h { z0.d }, p0/z, [z0.d, #16]
52 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
53 <vscale x 2 x i64> %base,
55 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
56 ret <vscale x 2 x i64> %res
60 define <vscale x 4 x i32> @gldff1w_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
61 ; CHECK-LABEL: gldff1w_s_imm_offset:
63 ; CHECK-NEXT: ldff1w { z0.s }, p0/z, [z0.s, #16]
65 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
66 <vscale x 4 x i32> %base,
68 ret <vscale x 4 x i32> %load
71 define <vscale x 2 x i64> @gldff1w_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
72 ; CHECK-LABEL: gldff1w_d_imm_offset:
74 ; CHECK-NEXT: ldff1w { z0.d }, p0/z, [z0.d, #16]
76 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
77 <vscale x 2 x i64> %base,
79 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
80 ret <vscale x 2 x i64> %res
83 define <vscale x 4 x float> @gldff1w_s_imm_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
84 ; CHECK-LABEL: gldff1w_s_imm_offset_float:
86 ; CHECK-NEXT: ldff1w { z0.s }, p0/z, [z0.s, #16]
88 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
89 <vscale x 4 x i32> %base,
91 ret <vscale x 4 x float> %load
95 define <vscale x 2 x i64> @gldff1d_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
96 ; CHECK-LABEL: gldff1d_d_imm_offset:
98 ; CHECK-NEXT: ldff1d { z0.d }, p0/z, [z0.d, #16]
100 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
101 <vscale x 2 x i64> %base,
103 ret <vscale x 2 x i64> %load
106 define <vscale x 2 x double> @gldff1d_d_imm_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
107 ; CHECK-LABEL: gldff1d_d_imm_offset_double:
109 ; CHECK-NEXT: ldff1d { z0.d }, p0/z, [z0.d, #16]
111 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
112 <vscale x 2 x i64> %base,
114 ret <vscale x 2 x double> %load
118 ; LDFF1SB, LDFF1SW, LDFF1SH: vector base + immediate offset (index)
119 ; e.g. ldff1sh { z0.s }, p0/z, [z0.s, #16]
123 define <vscale x 4 x i32> @gldff1sb_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
124 ; CHECK-LABEL: gldff1sb_s_imm_offset:
126 ; CHECK-NEXT: ldff1sb { z0.s }, p0/z, [z0.s, #16]
128 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
129 <vscale x 4 x i32> %base,
131 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
132 ret <vscale x 4 x i32> %res
135 define <vscale x 2 x i64> @gldff1sb_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
136 ; CHECK-LABEL: gldff1sb_d_imm_offset:
138 ; CHECK-NEXT: ldff1sb { z0.d }, p0/z, [z0.d, #16]
140 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
141 <vscale x 2 x i64> %base,
143 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
144 ret <vscale x 2 x i64> %res
148 define <vscale x 4 x i32> @gldff1sh_s_imm_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
149 ; CHECK-LABEL: gldff1sh_s_imm_offset:
151 ; CHECK-NEXT: ldff1sh { z0.s }, p0/z, [z0.s, #16]
153 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
154 <vscale x 4 x i32> %base,
156 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
157 ret <vscale x 4 x i32> %res
160 define <vscale x 2 x i64> @gldff1sh_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
161 ; CHECK-LABEL: gldff1sh_d_imm_offset:
163 ; CHECK-NEXT: ldff1sh { z0.d }, p0/z, [z0.d, #16]
165 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
166 <vscale x 2 x i64> %base,
168 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
169 ret <vscale x 2 x i64> %res
173 define <vscale x 2 x i64> @gldff1sw_d_imm_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
174 ; CHECK-LABEL: gldff1sw_d_imm_offset:
176 ; CHECK-NEXT: ldff1sw { z0.d }, p0/z, [z0.d, #16]
178 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
179 <vscale x 2 x i64> %base,
181 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
182 ret <vscale x 2 x i64> %res
186 ; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + out of range immediate offset
187 ; e.g. ldff1b { z0.d }, p0/z, [x0, z0.d]
191 define <vscale x 4 x i32> @gldff1b_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
192 ; CHECK-LABEL: gldff1b_s_imm_offset_out_of_range:
194 ; CHECK-NEXT: mov w8, #32
195 ; CHECK-NEXT: ldff1b { z0.s }, p0/z, [x8, z0.s, uxtw]
197 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
198 <vscale x 4 x i32> %base,
200 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
201 ret <vscale x 4 x i32> %res
204 define <vscale x 2 x i64> @gldff1b_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
205 ; CHECK-LABEL: gldff1b_d_imm_offset_out_of_range:
207 ; CHECK-NEXT: mov w8, #32
208 ; CHECK-NEXT: ldff1b { z0.d }, p0/z, [x8, z0.d]
210 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
211 <vscale x 2 x i64> %base,
213 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
214 ret <vscale x 2 x i64> %res
218 define <vscale x 4 x i32> @gldff1h_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
219 ; CHECK-LABEL: gldff1h_s_imm_offset_out_of_range:
221 ; CHECK-NEXT: mov w8, #63
222 ; CHECK-NEXT: ldff1h { z0.s }, p0/z, [x8, z0.s, uxtw]
224 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
225 <vscale x 4 x i32> %base,
227 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
228 ret <vscale x 4 x i32> %res
231 define <vscale x 2 x i64> @gldff1h_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
232 ; CHECK-LABEL: gldff1h_d_imm_offset_out_of_range:
234 ; CHECK-NEXT: mov w8, #63
235 ; CHECK-NEXT: ldff1h { z0.d }, p0/z, [x8, z0.d]
237 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
238 <vscale x 2 x i64> %base,
240 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
241 ret <vscale x 2 x i64> %res
245 define <vscale x 4 x i32> @gldff1w_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
246 ; CHECK-LABEL: gldff1w_s_imm_offset_out_of_range:
248 ; CHECK-NEXT: mov w8, #125
249 ; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x8, z0.s, uxtw]
251 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
252 <vscale x 4 x i32> %base,
254 ret <vscale x 4 x i32> %load
257 define <vscale x 2 x i64> @gldff1w_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
258 ; CHECK-LABEL: gldff1w_d_imm_offset_out_of_range:
260 ; CHECK-NEXT: mov w8, #125
261 ; CHECK-NEXT: ldff1w { z0.d }, p0/z, [x8, z0.d]
263 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
264 <vscale x 2 x i64> %base,
266 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
267 ret <vscale x 2 x i64> %res
270 define <vscale x 4 x float> @gldff1w_s_imm_offset_out_of_range_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
271 ; CHECK-LABEL: gldff1w_s_imm_offset_out_of_range_float:
273 ; CHECK-NEXT: mov w8, #125
274 ; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x8, z0.s, uxtw]
276 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
277 <vscale x 4 x i32> %base,
279 ret <vscale x 4 x float> %load
283 define <vscale x 2 x i64> @gldff1d_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
284 ; CHECK-LABEL: gldff1d_d_imm_offset_out_of_range:
286 ; CHECK-NEXT: mov w8, #249
287 ; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x8, z0.d]
289 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
290 <vscale x 2 x i64> %base,
292 ret <vscale x 2 x i64> %load
295 define <vscale x 2 x double> @gldff1d_d_imm_offset_out_of_range_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
296 ; CHECK-LABEL: gldff1d_d_imm_offset_out_of_range_double:
298 ; CHECK-NEXT: mov w8, #249
299 ; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x8, z0.d]
301 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
302 <vscale x 2 x i64> %base,
304 ret <vscale x 2 x double> %load
308 ; LDFF1SB, LDFF1SW, LDFF1SH: vector base + out of range immediate offset
309 ; e.g. ldff1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
313 define <vscale x 4 x i32> @gldff1sb_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
314 ; CHECK-LABEL: gldff1sb_s_imm_offset_out_of_range:
316 ; CHECK-NEXT: mov w8, #32
317 ; CHECK-NEXT: ldff1sb { z0.s }, p0/z, [x8, z0.s, uxtw]
319 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
320 <vscale x 4 x i32> %base,
322 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
323 ret <vscale x 4 x i32> %res
326 define <vscale x 2 x i64> @gldff1sb_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
327 ; CHECK-LABEL: gldff1sb_d_imm_offset_out_of_range:
329 ; CHECK-NEXT: mov w8, #32
330 ; CHECK-NEXT: ldff1sb { z0.d }, p0/z, [x8, z0.d]
332 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
333 <vscale x 2 x i64> %base,
335 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
336 ret <vscale x 2 x i64> %res
340 define <vscale x 4 x i32> @gldff1sh_s_imm_offset_out_of_range(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base) {
341 ; CHECK-LABEL: gldff1sh_s_imm_offset_out_of_range:
343 ; CHECK-NEXT: mov w8, #63
344 ; CHECK-NEXT: ldff1sh { z0.s }, p0/z, [x8, z0.s, uxtw]
346 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
347 <vscale x 4 x i32> %base,
349 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
350 ret <vscale x 4 x i32> %res
353 define <vscale x 2 x i64> @gldff1sh_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
354 ; CHECK-LABEL: gldff1sh_d_imm_offset_out_of_range:
356 ; CHECK-NEXT: mov w8, #63
357 ; CHECK-NEXT: ldff1sh { z0.d }, p0/z, [x8, z0.d]
359 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
360 <vscale x 2 x i64> %base,
362 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
363 ret <vscale x 2 x i64> %res
367 define <vscale x 2 x i64> @gldff1sw_d_imm_offset_out_of_range(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base) {
368 ; CHECK-LABEL: gldff1sw_d_imm_offset_out_of_range:
370 ; CHECK-NEXT: mov w8, #125
371 ; CHECK-NEXT: ldff1sw { z0.d }, p0/z, [x8, z0.d]
373 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
374 <vscale x 2 x i64> %base,
376 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
377 ret <vscale x 2 x i64> %res
381 declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
382 declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
385 declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
386 declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
389 declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
390 declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
392 declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
395 declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
397 declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)