1 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
4 ; LD1H, LD1W, LD1D: base + 64-bit scaled offset
5 ; e.g. ld1h z0.d, p0/z, [x0, z0.d, lsl #1]
8 define <vscale x 2 x i64> @gld1h_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
9 ; CHECK-LABEL: gld1h_index
10 ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1]
12 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
14 <vscale x 2 x i64> %b)
15 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
16 ret <vscale x 2 x i64> %res
19 define <vscale x 2 x i64> @gld1w_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
20 ; CHECK-LABEL: gld1w_index
21 ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2]
23 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
25 <vscale x 2 x i64> %b)
26 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
27 ret <vscale x 2 x i64> %res
30 define <vscale x 2 x i64> @gld1d_index(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
31 ; CHECK-LABEL: gld1d_index
32 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
34 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
36 <vscale x 2 x i64> %b)
37 ret <vscale x 2 x i64> %load
40 define <vscale x 2 x double> @gld1d_index_double(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
41 ; CHECK-LABEL: gld1d_index_double
42 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3]
44 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
46 <vscale x 2 x i64> %b)
47 ret <vscale x 2 x double> %load
51 ; LD1SH, LD1SW: base + 64-bit scaled offset
52 ; e.g. ld1sh z0.d, p0/z, [x0, z0.d, lsl #1]
55 define <vscale x 2 x i64> @gld1sh_index(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
56 ; CHECK-LABEL: gld1sh_index
57 ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, lsl #1]
59 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
61 <vscale x 2 x i64> %b)
62 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
63 ret <vscale x 2 x i64> %res
66 define <vscale x 2 x i64> @gld1sw_index(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
67 ; CHECK-LABEL: gld1sw_index
68 ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, lsl #2]
70 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
72 <vscale x 2 x i64> %b)
73 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
74 ret <vscale x 2 x i64> %res
78 ; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset
79 ; e.g. ld1h z0.d, p0/z, [x0, z0.d, sxtw #1]
82 define <vscale x 2 x i64> @gld1h_index_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
83 ; CHECK-LABEL: gld1h_index_sxtw
84 ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
86 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
87 <vscale x 2 x i1> %pg,
88 <vscale x 2 x i64> %b)
89 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
91 <vscale x 2 x i64> %sxtw)
92 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
93 ret <vscale x 2 x i64> %res
96 define <vscale x 2 x i64> @gld1w_index_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
97 ; CHECK-LABEL: gld1w_index_sxtw
98 ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
100 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
101 <vscale x 2 x i1> %pg,
102 <vscale x 2 x i64> %b)
103 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
105 <vscale x 2 x i64> %sxtw)
106 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
107 ret <vscale x 2 x i64> %res
110 define <vscale x 2 x i64> @gld1d_index_sxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
111 ; CHECK-LABEL: gld1d_index_sxtw
112 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
114 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
115 <vscale x 2 x i1> %pg,
116 <vscale x 2 x i64> %b)
117 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
119 <vscale x 2 x i64> %sxtw)
120 ret <vscale x 2 x i64> %load
123 define <vscale x 2 x double> @gld1d_index_double_sxtw(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
124 ; CHECK-LABEL: gld1d_index_double_sxtw
125 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
127 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
128 <vscale x 2 x i1> %pg,
129 <vscale x 2 x i64> %b)
130 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
132 <vscale x 2 x i64> %sxtw)
133 ret <vscale x 2 x double> %load
137 ; LD1SH, LD1SW: base + 64-bit sxtw'd scaled offset
138 ; e.g. ld1sh z0.d, p0/z, [x0, z0.d, sxtw #1]
141 define <vscale x 2 x i64> @gld1sh_index_sxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
142 ; CHECK-LABEL: gld1sh_index_sxtw
143 ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
145 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
146 <vscale x 2 x i1> %pg,
147 <vscale x 2 x i64> %b)
148 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
150 <vscale x 2 x i64> %sxtw)
151 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
152 ret <vscale x 2 x i64> %res
155 define <vscale x 2 x i64> @gld1sw_index_sxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
156 ; CHECK-LABEL: gld1sw_index_sxtw
157 ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
159 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef,
160 <vscale x 2 x i1> %pg,
161 <vscale x 2 x i64> %b)
162 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
164 <vscale x 2 x i64> %sxtw)
165 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
166 ret <vscale x 2 x i64> %res
170 ; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset
171 ; e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
174 define <vscale x 2 x i64> @gld1h_index_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
175 ; CHECK-LABEL: gld1h_index_uxtw
176 ; CHECK: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
178 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
179 <vscale x 2 x i1> %pg,
180 <vscale x 2 x i64> %b)
181 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
183 <vscale x 2 x i64> %uxtw)
184 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
185 ret <vscale x 2 x i64> %res
188 define <vscale x 2 x i64> @gld1w_index_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
189 ; CHECK-LABEL: gld1w_index_uxtw
190 ; CHECK: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
192 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
193 <vscale x 2 x i1> %pg,
194 <vscale x 2 x i64> %b)
195 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
197 <vscale x 2 x i64> %uxtw)
198 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
199 ret <vscale x 2 x i64> %res
202 define <vscale x 2 x i64> @gld1d_index_uxtw(<vscale x 2 x i1> %pg, i64* %base, <vscale x 2 x i64> %b) {
203 ; CHECK-LABEL: gld1d_index_uxtw
204 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
206 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
207 <vscale x 2 x i1> %pg,
208 <vscale x 2 x i64> %b)
209 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg,
211 <vscale x 2 x i64> %uxtw)
212 ret <vscale x 2 x i64> %load
215 define <vscale x 2 x double> @gld1d_index_double_uxtw(<vscale x 2 x i1> %pg, double* %base, <vscale x 2 x i64> %b) {
216 ; CHECK-LABEL: gld1d_index_double_uxtw
217 ; CHECK: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
219 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
220 <vscale x 2 x i1> %pg,
221 <vscale x 2 x i64> %b)
222 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg,
224 <vscale x 2 x i64> %uxtw)
225 ret <vscale x 2 x double> %load
229 ; LD1SH, LD1SW: base + 64-bit uxtw'd scaled offset
230 ; e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1]
233 define <vscale x 2 x i64> @gld1sh_index_uxtw(<vscale x 2 x i1> %pg, i16* %base, <vscale x 2 x i64> %b) {
234 ; CHECK-LABEL: gld1sh_index_uxtw
235 ; CHECK: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
237 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
238 <vscale x 2 x i1> %pg,
239 <vscale x 2 x i64> %b)
240 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg,
242 <vscale x 2 x i64> %uxtw)
243 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
244 ret <vscale x 2 x i64> %res
247 define <vscale x 2 x i64> @gld1sw_index_uxtw(<vscale x 2 x i1> %pg, i32* %base, <vscale x 2 x i64> %b) {
248 ; CHECK-LABEL: gld1sw_index_uxtw
249 ; CHECK: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
251 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef,
252 <vscale x 2 x i1> %pg,
253 <vscale x 2 x i64> %b)
254 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg,
256 <vscale x 2 x i64> %uxtw)
257 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
258 ret <vscale x 2 x i64> %res
261 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1>, i16*, <vscale x 2 x i64>)
262 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1>, i32*, <vscale x 2 x i64>)
263 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1>, i64*, <vscale x 2 x i64>)
264 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, double*, <vscale x 2 x i64>)
266 declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
267 declare <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)