1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
5 ; LD1B, LD1W, LD1H, LD1D: base + 64-bit unscaled offset
6 ; e.g. ld1h { z0.d }, p0/z, [x0, z0.d]
9 define <vscale x 2 x i64> @gld1b_d(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
10 ; CHECK-LABEL: gld1b_d:
12 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
14 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
16 <vscale x 2 x i64> %b)
17 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
18 ret <vscale x 2 x i64> %res
21 define <vscale x 2 x i64> @gld1h_d(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
22 ; CHECK-LABEL: gld1h_d:
24 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
26 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
28 <vscale x 2 x i64> %b)
29 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
30 ret <vscale x 2 x i64> %res
33 define <vscale x 2 x i64> @gld1w_d(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %offsets) {
34 ; CHECK-LABEL: gld1w_d:
36 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
38 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
40 <vscale x 2 x i64> %offsets)
41 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
42 ret <vscale x 2 x i64> %res
45 define <vscale x 2 x i64> @gld1d_d(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
46 ; CHECK-LABEL: gld1d_d:
48 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
50 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %pg,
52 <vscale x 2 x i64> %b)
53 ret <vscale x 2 x i64> %load
56 define <vscale x 2 x double> @gld1d_d_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
57 ; CHECK-LABEL: gld1d_d_double:
59 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
61 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> %pg,
63 <vscale x 2 x i64> %b)
64 ret <vscale x 2 x double> %load
68 ; LD1SB, LD1SW, LD1SH: base + 64-bit unscaled offset
69 ; e.g. ld1sh { z0.d }, p0/z, [x0, z0.d]
72 define <vscale x 2 x i64> @gld1sb_d(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
73 ; CHECK-LABEL: gld1sb_d:
75 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
77 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
79 <vscale x 2 x i64> %b)
80 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
81 ret <vscale x 2 x i64> %res
84 define <vscale x 2 x i64> @gld1sh_d(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
85 ; CHECK-LABEL: gld1sh_d:
87 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d]
89 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
91 <vscale x 2 x i64> %b)
92 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
93 ret <vscale x 2 x i64> %res
96 define <vscale x 2 x i64> @gld1sw_d(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %offsets) {
97 ; CHECK-LABEL: gld1sw_d:
99 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d]
101 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
103 <vscale x 2 x i64> %offsets)
104 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
105 ret <vscale x 2 x i64> %res
109 ; LD1B, LD1W, LD1H, LD1D: base + 64-bit sxtw'd unscaled offset
110 ; e.g. ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
113 define <vscale x 2 x i64> @gld1b_d_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
114 ; CHECK-LABEL: gld1b_d_sxtw:
116 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, sxtw]
118 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
119 <vscale x 2 x i1> %pg,
120 <vscale x 2 x i64> %b)
121 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
123 <vscale x 2 x i64> %sxtw)
124 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
125 ret <vscale x 2 x i64> %res
128 define <vscale x 2 x i64> @gld1h_d_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
129 ; CHECK-LABEL: gld1h_d_sxtw:
131 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw]
133 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
134 <vscale x 2 x i1> %pg,
135 <vscale x 2 x i64> %b)
136 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
138 <vscale x 2 x i64> %sxtw)
139 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
140 ret <vscale x 2 x i64> %res
143 define <vscale x 2 x i64> @gld1w_d_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %offsets) {
144 ; CHECK-LABEL: gld1w_d_sxtw:
146 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw]
148 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
149 <vscale x 2 x i1> %pg,
150 <vscale x 2 x i64> %offsets)
151 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
153 <vscale x 2 x i64> %sxtw)
154 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
155 ret <vscale x 2 x i64> %res
158 define <vscale x 2 x i64> @gld1d_d_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
159 ; CHECK-LABEL: gld1d_d_sxtw:
161 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
163 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
164 <vscale x 2 x i1> %pg,
165 <vscale x 2 x i64> %b)
166 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %pg,
168 <vscale x 2 x i64> %sxtw)
169 ret <vscale x 2 x i64> %load
172 define <vscale x 2 x double> @gld1d_d_double_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
173 ; CHECK-LABEL: gld1d_d_double_sxtw:
175 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw]
177 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
178 <vscale x 2 x i1> %pg,
179 <vscale x 2 x i64> %b)
180 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> %pg,
182 <vscale x 2 x i64> %sxtw)
183 ret <vscale x 2 x double> %load
187 ; LD1SB, LD1SW, LD1SH: base + 64-bit sxtw'd unscaled offset
188 ; e.g. ld1sh { z0.d }, p0/z, [x0, z0.d]
191 define <vscale x 2 x i64> @gld1sb_d_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
192 ; CHECK-LABEL: gld1sb_d_sxtw:
194 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d, sxtw]
196 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
197 <vscale x 2 x i1> %pg,
198 <vscale x 2 x i64> %b)
199 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
201 <vscale x 2 x i64> %sxtw)
202 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
203 ret <vscale x 2 x i64> %res
206 define <vscale x 2 x i64> @gld1sh_d_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
207 ; CHECK-LABEL: gld1sh_d_sxtw:
209 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw]
211 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
212 <vscale x 2 x i1> %pg,
213 <vscale x 2 x i64> %b)
214 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
216 <vscale x 2 x i64> %sxtw)
217 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
218 ret <vscale x 2 x i64> %res
221 define <vscale x 2 x i64> @gld1sw_d_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %offsets) {
222 ; CHECK-LABEL: gld1sw_d_sxtw:
224 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw]
226 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
227 <vscale x 2 x i1> %pg,
228 <vscale x 2 x i64> %offsets)
229 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
231 <vscale x 2 x i64> %sxtw)
232 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
233 ret <vscale x 2 x i64> %res
237 ; LD1B, LD1W, LD1H, LD1D: base + 64-bit uxtw'd unscaled offset
238 ; e.g. ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
241 define <vscale x 2 x i64> @gld1b_d_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
242 ; CHECK-LABEL: gld1b_d_uxtw:
244 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d, uxtw]
246 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
247 <vscale x 2 x i1> %pg,
248 <vscale x 2 x i64> %b)
249 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
251 <vscale x 2 x i64> %uxtw)
252 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
253 ret <vscale x 2 x i64> %res
256 define <vscale x 2 x i64> @gld1h_d_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
257 ; CHECK-LABEL: gld1h_d_uxtw:
259 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw]
261 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
262 <vscale x 2 x i1> %pg,
263 <vscale x 2 x i64> %b)
264 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
266 <vscale x 2 x i64> %uxtw)
267 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
268 ret <vscale x 2 x i64> %res
271 define <vscale x 2 x i64> @gld1w_d_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %offsets) {
272 ; CHECK-LABEL: gld1w_d_uxtw:
274 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw]
276 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
277 <vscale x 2 x i1> %pg,
278 <vscale x 2 x i64> %offsets)
279 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
281 <vscale x 2 x i64> %uxtw)
282 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
283 ret <vscale x 2 x i64> %res
286 define <vscale x 2 x i64> @gld1d_d_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
287 ; CHECK-LABEL: gld1d_d_uxtw:
289 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
291 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
292 <vscale x 2 x i1> %pg,
293 <vscale x 2 x i64> %b)
294 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1> %pg,
296 <vscale x 2 x i64> %uxtw)
297 ret <vscale x 2 x i64> %load
300 define <vscale x 2 x double> @gld1d_d_double_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
301 ; CHECK-LABEL: gld1d_d_double_uxtw:
303 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw]
305 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
306 <vscale x 2 x i1> %pg,
307 <vscale x 2 x i64> %b)
308 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1> %pg,
310 <vscale x 2 x i64> %uxtw)
311 ret <vscale x 2 x double> %load
315 ; LD1SB, LD1SW, LD1SH: base + 64-bit uxtw'd unscaled offset
316 ; e.g. ld1sh { z0.d }, p0/z, [x0, z0.d]
319 define <vscale x 2 x i64> @gld1sb_d_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
320 ; CHECK-LABEL: gld1sb_d_uxtw:
322 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d, uxtw]
324 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
325 <vscale x 2 x i1> %pg,
326 <vscale x 2 x i64> %b)
327 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1> %pg,
329 <vscale x 2 x i64> %uxtw)
330 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
331 ret <vscale x 2 x i64> %res
334 define <vscale x 2 x i64> @gld1sh_d_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
335 ; CHECK-LABEL: gld1sh_d_uxtw:
337 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw]
339 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
340 <vscale x 2 x i1> %pg,
341 <vscale x 2 x i64> %b)
342 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1> %pg,
344 <vscale x 2 x i64> %uxtw)
345 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
346 ret <vscale x 2 x i64> %res
349 define <vscale x 2 x i64> @gld1sw_d_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %offsets) {
350 ; CHECK-LABEL: gld1sw_d_uxtw:
352 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw]
354 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
355 <vscale x 2 x i1> %pg,
356 <vscale x 2 x i64> %offsets)
357 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1> %pg,
359 <vscale x 2 x i64> %uxtw)
360 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
361 ret <vscale x 2 x i64> %res
364 declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.nxv2i8(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
365 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
366 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
367 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
368 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
370 declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
371 declare <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)