1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
5 ; LD1H, LD1W, LD1D: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
7 ; e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1]
11 define <vscale x 4 x i32> @gld1h_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
12 ; CHECK-LABEL: gld1h_s_uxtw_index:
14 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1]
16 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
18 <vscale x 4 x i32> %b)
19 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
20 ret <vscale x 4 x i32> %res
23 define <vscale x 4 x i32> @gld1h_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
24 ; CHECK-LABEL: gld1h_s_sxtw_index:
26 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1]
28 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
30 <vscale x 4 x i32> %b)
31 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
32 ret <vscale x 4 x i32> %res
35 define <vscale x 2 x i64> @gld1h_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
36 ; CHECK-LABEL: gld1h_d_uxtw_index:
38 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1]
40 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
42 <vscale x 2 x i32> %b)
43 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
44 ret <vscale x 2 x i64> %res
47 define <vscale x 2 x i64> @gld1h_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
48 ; CHECK-LABEL: gld1h_d_sxtw_index:
50 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1]
52 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
54 <vscale x 2 x i32> %b)
55 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
56 ret <vscale x 2 x i64> %res
60 define <vscale x 4 x i32> @gld1w_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
61 ; CHECK-LABEL: gld1w_s_uxtw_index:
63 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
65 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
67 <vscale x 4 x i32> %b)
68 ret <vscale x 4 x i32> %load
71 define <vscale x 4 x i32> @gld1w_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
72 ; CHECK-LABEL: gld1w_s_sxtw_index:
74 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
76 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %pg,
78 <vscale x 4 x i32> %b)
79 ret <vscale x 4 x i32> %load
82 define <vscale x 2 x i64> @gld1w_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
83 ; CHECK-LABEL: gld1w_d_uxtw_index:
85 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2]
87 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
89 <vscale x 2 x i32> %b)
90 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
91 ret <vscale x 2 x i64> %res
94 define <vscale x 2 x i64> @gld1w_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
95 ; CHECK-LABEL: gld1w_d_sxtw_index:
97 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2]
99 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
101 <vscale x 2 x i32> %b)
102 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
103 ret <vscale x 2 x i64> %res
106 define <vscale x 4 x float> @gld1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
107 ; CHECK-LABEL: gld1w_s_uxtw_index_float:
109 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2]
111 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
113 <vscale x 4 x i32> %b)
114 ret <vscale x 4 x float> %load
117 define <vscale x 4 x float> @gld1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
118 ; CHECK-LABEL: gld1w_s_sxtw_index_float:
120 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2]
122 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %pg,
124 <vscale x 4 x i32> %b)
125 ret <vscale x 4 x float> %load
129 define <vscale x 2 x i64> @gld1d_s_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
130 ; CHECK-LABEL: gld1d_s_uxtw_index:
132 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
134 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
136 <vscale x 2 x i32> %b)
137 ret <vscale x 2 x i64> %load
140 define <vscale x 2 x i64> @gld1d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
141 ; CHECK-LABEL: gld1d_sxtw_index:
143 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
145 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1> %pg,
147 <vscale x 2 x i32> %b)
148 ret <vscale x 2 x i64> %load
151 define <vscale x 2 x double> @gld1d_uxtw_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
152 ; CHECK-LABEL: gld1d_uxtw_index_double:
154 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3]
156 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
158 <vscale x 2 x i32> %b)
159 ret <vscale x 2 x double> %load
162 define <vscale x 2 x double> @gld1d_sxtw_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
163 ; CHECK-LABEL: gld1d_sxtw_index_double:
165 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3]
167 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1> %pg,
169 <vscale x 2 x i32> %b)
170 ret <vscale x 2 x double> %load
174 ; LD1SH, LD1SW, LD1SD: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw)
175 ; extended to 64 bits
176 ; e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1]
180 define <vscale x 4 x i32> @gld1sh_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
181 ; CHECK-LABEL: gld1sh_s_uxtw_index:
183 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1]
185 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
187 <vscale x 4 x i32> %b)
188 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
189 ret <vscale x 4 x i32> %res
192 define <vscale x 4 x i32> @gld1sh_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) {
193 ; CHECK-LABEL: gld1sh_s_sxtw_index:
195 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1]
197 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg,
199 <vscale x 4 x i32> %b)
200 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
201 ret <vscale x 4 x i32> %res
204 define <vscale x 2 x i64> @gld1sh_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
205 ; CHECK-LABEL: gld1sh_d_uxtw_index:
207 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1]
209 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
211 <vscale x 2 x i32> %b)
212 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
213 ret <vscale x 2 x i64> %res
216 define <vscale x 2 x i64> @gld1sh_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
217 ; CHECK-LABEL: gld1sh_d_sxtw_index:
219 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1]
221 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg,
223 <vscale x 2 x i32> %b)
224 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
225 ret <vscale x 2 x i64> %res
229 define <vscale x 2 x i64> @gld1sw_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
230 ; CHECK-LABEL: gld1sw_d_uxtw_index:
232 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2]
234 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
236 <vscale x 2 x i32> %b)
237 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
238 ret <vscale x 2 x i64> %res
241 define <vscale x 2 x i64> @gld1sw_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) {
242 ; CHECK-LABEL: gld1sw_d_sxtw_index:
244 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2]
246 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg,
248 <vscale x 2 x i32> %b)
249 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
250 ret <vscale x 2 x i64> %res
255 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
256 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
258 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
259 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
262 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
263 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
265 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
266 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
268 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
269 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>)
272 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
273 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
275 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)
276 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>)