1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
5 ; LD1B, LD1W, LD1H, LD1D: vector base + scalar offset (index)
6 ; e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
10 define <vscale x 4 x i32> @gld1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
11 ; CHECK-LABEL: gld1b_s_scalar_offset:
13 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw]
15 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
16 <vscale x 4 x i32> %base,
18 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32>
19 ret <vscale x 4 x i32> %res
22 define <vscale x 2 x i64> @gld1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
23 ; CHECK-LABEL: gld1b_d_scalar_offset:
25 ; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d]
27 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
28 <vscale x 2 x i64> %base,
30 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64>
31 ret <vscale x 2 x i64> %res
35 define <vscale x 4 x i32> @gld1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
36 ; CHECK-LABEL: gld1h_s_scalar_offset:
38 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw]
40 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
41 <vscale x 4 x i32> %base,
43 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32>
44 ret <vscale x 4 x i32> %res
47 define <vscale x 2 x i64> @gld1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
48 ; CHECK-LABEL: gld1h_d_scalar_offset:
50 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d]
52 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
53 <vscale x 2 x i64> %base,
55 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64>
56 ret <vscale x 2 x i64> %res
60 define <vscale x 4 x i32> @gld1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
61 ; CHECK-LABEL: gld1w_s_scalar_offset:
63 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
65 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg,
66 <vscale x 4 x i32> %base,
68 ret <vscale x 4 x i32> %load
71 define <vscale x 2 x i64> @gld1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
72 ; CHECK-LABEL: gld1w_d_scalar_offset:
74 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d]
76 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
77 <vscale x 2 x i64> %base,
79 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64>
80 ret <vscale x 2 x i64> %res
83 define <vscale x 4 x float> @gld1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
84 ; CHECK-LABEL: gld1w_s_scalar_offset_float:
86 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw]
88 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg,
89 <vscale x 4 x i32> %base,
91 ret <vscale x 4 x float> %load
95 define <vscale x 2 x i64> @gld1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
96 ; CHECK-LABEL: gld1d_d_scalar_offset:
98 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
100 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg,
101 <vscale x 2 x i64> %base,
103 ret <vscale x 2 x i64> %load
106 define <vscale x 2 x double> @gld1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
107 ; CHECK-LABEL: gld1d_d_scalar_offset_double:
109 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d]
111 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg,
112 <vscale x 2 x i64> %base,
114 ret <vscale x 2 x double> %load
117 ; LD1SB, LD1SW, LD1SH: vector base + scalar offset (index)
118 ; e.g. ld1b { z0.d }, p0/z, [x0, z0.d]
122 define <vscale x 4 x i32> @gld1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
123 ; CHECK-LABEL: gld1sb_s_scalar_offset:
125 ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw]
127 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg,
128 <vscale x 4 x i32> %base,
130 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32>
131 ret <vscale x 4 x i32> %res
134 define <vscale x 2 x i64> @gld1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
135 ; CHECK-LABEL: gld1sb_d_scalar_offset:
137 ; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d]
139 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg,
140 <vscale x 2 x i64> %base,
142 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64>
143 ret <vscale x 2 x i64> %res
147 define <vscale x 4 x i32> @gld1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) {
148 ; CHECK-LABEL: gld1sh_s_scalar_offset:
150 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw]
152 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg,
153 <vscale x 4 x i32> %base,
155 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32>
156 ret <vscale x 4 x i32> %res
159 define <vscale x 2 x i64> @gld1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
160 ; CHECK-LABEL: gld1sh_d_scalar_offset:
162 ; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d]
164 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg,
165 <vscale x 2 x i64> %base,
167 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64>
168 ret <vscale x 2 x i64> %res
172 define <vscale x 2 x i64> @gld1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) {
173 ; CHECK-LABEL: gld1sw_d_scalar_offset:
175 ; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d]
177 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg,
178 <vscale x 2 x i64> %base,
180 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64>
181 ret <vscale x 2 x i64> %res
185 declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
186 declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
189 declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
190 declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
193 declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
194 declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
196 declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64)
199 declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)
201 declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64)