1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
5 ; LD1H, LD1W, LD1D: base + 64-bit scaled offset
6 ; e.g. ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
9 define <vscale x 2 x i64> @gld1h_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
10 ; CHECK-LABEL: gld1h_index:
12 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
14 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
16 <vscale x 2 x i64> %b)
17 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
18 ret <vscale x 2 x i64> %res
21 define <vscale x 2 x i64> @gld1w_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
22 ; CHECK-LABEL: gld1w_index:
24 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
26 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
28 <vscale x 2 x i64> %b)
29 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
30 ret <vscale x 2 x i64> %res
33 define <vscale x 2 x i64> @gld1d_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
34 ; CHECK-LABEL: gld1d_index:
36 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
38 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
40 <vscale x 2 x i64> %b)
41 ret <vscale x 2 x i64> %load
44 define <vscale x 2 x double> @gld1d_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
45 ; CHECK-LABEL: gld1d_index_double:
47 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
49 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
51 <vscale x 2 x i64> %b)
52 ret <vscale x 2 x double> %load
56 ; LD1SH, LD1SW: base + 64-bit scaled offset
57 ; e.g. ld1sh z0.d, p0/z, [x0, z0.d, lsl #1]
60 define <vscale x 2 x i64> @gld1sh_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
61 ; CHECK-LABEL: gld1sh_index:
63 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, lsl #1]
65 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
67 <vscale x 2 x i64> %b)
68 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
69 ret <vscale x 2 x i64> %res
72 define <vscale x 2 x i64> @gld1sw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
73 ; CHECK-LABEL: gld1sw_index:
75 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, lsl #2]
77 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
79 <vscale x 2 x i64> %b)
80 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
81 ret <vscale x 2 x i64> %res
85 ; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset
86 ; e.g. ld1h z0.d, p0/z, [x0, z0.d, sxtw #1]
89 define <vscale x 2 x i64> @gld1h_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
90 ; CHECK-LABEL: gld1h_index_sxtw:
92 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
94 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
95 <vscale x 2 x i1> %pg,
96 <vscale x 2 x i64> %b)
97 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
99 <vscale x 2 x i64> %sxtw)
100 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
101 ret <vscale x 2 x i64> %res
104 define <vscale x 2 x i64> @gld1w_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
105 ; CHECK-LABEL: gld1w_index_sxtw:
107 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
109 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
110 <vscale x 2 x i1> %pg,
111 <vscale x 2 x i64> %b)
112 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
114 <vscale x 2 x i64> %sxtw)
115 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
116 ret <vscale x 2 x i64> %res
119 define <vscale x 2 x i64> @gld1d_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
120 ; CHECK-LABEL: gld1d_index_sxtw:
122 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
124 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
125 <vscale x 2 x i1> %pg,
126 <vscale x 2 x i64> %b)
127 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
129 <vscale x 2 x i64> %sxtw)
130 ret <vscale x 2 x i64> %load
133 define <vscale x 2 x double> @gld1d_index_double_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
134 ; CHECK-LABEL: gld1d_index_double_sxtw:
136 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
138 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
139 <vscale x 2 x i1> %pg,
140 <vscale x 2 x i64> %b)
141 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
143 <vscale x 2 x i64> %sxtw)
144 ret <vscale x 2 x double> %load
148 ; LD1SH, LD1SW: base + 64-bit sxtw'd scaled offset
149 ; e.g. ld1sh z0.d, p0/z, [x0, z0.d, sxtw #1]
152 define <vscale x 2 x i64> @gld1sh_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
153 ; CHECK-LABEL: gld1sh_index_sxtw:
155 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
157 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
158 <vscale x 2 x i1> %pg,
159 <vscale x 2 x i64> %b)
160 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
162 <vscale x 2 x i64> %sxtw)
163 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
164 ret <vscale x 2 x i64> %res
167 define <vscale x 2 x i64> @gld1sw_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
168 ; CHECK-LABEL: gld1sw_index_sxtw:
170 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
172 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
173 <vscale x 2 x i1> %pg,
174 <vscale x 2 x i64> %b)
175 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
177 <vscale x 2 x i64> %sxtw)
178 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
179 ret <vscale x 2 x i64> %res
183 ; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset
184 ; e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
187 define <vscale x 2 x i64> @gld1h_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
188 ; CHECK-LABEL: gld1h_index_uxtw:
190 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
192 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
193 <vscale x 2 x i1> %pg,
194 <vscale x 2 x i64> %b)
195 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
197 <vscale x 2 x i64> %uxtw)
198 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
199 ret <vscale x 2 x i64> %res
202 define <vscale x 2 x i64> @gld1w_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
203 ; CHECK-LABEL: gld1w_index_uxtw:
205 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
207 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
208 <vscale x 2 x i1> %pg,
209 <vscale x 2 x i64> %b)
210 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
212 <vscale x 2 x i64> %uxtw)
213 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
214 ret <vscale x 2 x i64> %res
217 define <vscale x 2 x i64> @gld1d_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
218 ; CHECK-LABEL: gld1d_index_uxtw:
220 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
222 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
223 <vscale x 2 x i1> %pg,
224 <vscale x 2 x i64> %b)
225 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
227 <vscale x 2 x i64> %uxtw)
228 ret <vscale x 2 x i64> %load
231 define <vscale x 2 x double> @gld1d_index_double_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
232 ; CHECK-LABEL: gld1d_index_double_uxtw:
234 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
236 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
237 <vscale x 2 x i1> %pg,
238 <vscale x 2 x i64> %b)
239 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
241 <vscale x 2 x i64> %uxtw)
242 ret <vscale x 2 x double> %load
246 ; LD1SH, LD1SW: base + 64-bit uxtw'd scaled offset
247 ; e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1]
250 define <vscale x 2 x i64> @gld1sh_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
251 ; CHECK-LABEL: gld1sh_index_uxtw:
253 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
255 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
256 <vscale x 2 x i1> %pg,
257 <vscale x 2 x i64> %b)
258 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
260 <vscale x 2 x i64> %uxtw)
261 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
262 ret <vscale x 2 x i64> %res
265 define <vscale x 2 x i64> @gld1sw_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) {
266 ; CHECK-LABEL: gld1sw_index_uxtw:
268 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
270 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
271 <vscale x 2 x i1> %pg,
272 <vscale x 2 x i64> %b)
273 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
275 <vscale x 2 x i64> %uxtw)
276 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
277 ret <vscale x 2 x i64> %res
280 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
281 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
282 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
283 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>)
285 declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
286 declare <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)