1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=aarch64 -mattr=+sve %s -o - | FileCheck %s
4 define <vscale x 16 x i8> @i8_1v_4s(ptr %b) {
5 ; CHECK-LABEL: i8_1v_4s:
6 ; CHECK: // %bb.0: // %entry
7 ; CHECK-NEXT: rdvl x8, #1
8 ; CHECK-NEXT: ptrue p0.b
9 ; CHECK-NEXT: mov w9, #4 // =0x4
10 ; CHECK-NEXT: add x8, x0, x8
11 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
14 %0 = tail call i64 @llvm.vscale.i64()
15 %1 = shl nuw nsw i64 %0, 4
16 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
17 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
18 %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
19 ret <vscale x 16 x i8> %2
22 define <vscale x 16 x i8> @i8_4s_1v(ptr %b) {
23 ; CHECK-LABEL: i8_4s_1v:
24 ; CHECK: // %bb.0: // %entry
25 ; CHECK-NEXT: ptrue p0.b
26 ; CHECK-NEXT: add x8, x0, #4
27 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #1, mul vl]
30 %add.ptr = getelementptr inbounds i8, ptr %b, i64 4
31 %0 = tail call i64 @llvm.vscale.i64()
32 %1 = shl nuw nsw i64 %0, 4
33 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
34 %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
35 ret <vscale x 16 x i8> %2
38 define <vscale x 8 x i16> @i16_1v_8s(ptr %b) {
39 ; CHECK-LABEL: i16_1v_8s:
40 ; CHECK: // %bb.0: // %entry
41 ; CHECK-NEXT: rdvl x8, #1
42 ; CHECK-NEXT: ptrue p0.h
43 ; CHECK-NEXT: mov x9, #4 // =0x4
44 ; CHECK-NEXT: add x8, x0, x8
45 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
48 %0 = tail call i64 @llvm.vscale.i64()
49 %1 = shl nuw nsw i64 %0, 3
50 %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1
51 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
52 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
53 ret <vscale x 8 x i16> %2
56 define <vscale x 8 x i16> @i16_8s_1v(ptr %b) {
57 ; CHECK-LABEL: i16_8s_1v:
58 ; CHECK: // %bb.0: // %entry
59 ; CHECK-NEXT: ptrue p0.h
60 ; CHECK-NEXT: add x8, x0, #8
61 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #1, mul vl]
64 %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
65 %0 = tail call i64 @llvm.vscale.i64()
66 %1 = shl nuw nsw i64 %0, 3
67 %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1
68 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
69 ret <vscale x 8 x i16> %2
72 define <vscale x 8 x i16> @i16_2v_8s(ptr %b) {
73 ; CHECK-LABEL: i16_2v_8s:
74 ; CHECK: // %bb.0: // %entry
75 ; CHECK-NEXT: rdvl x8, #2
76 ; CHECK-NEXT: ptrue p0.h
77 ; CHECK-NEXT: mov x9, #4 // =0x4
78 ; CHECK-NEXT: add x8, x0, x8
79 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
82 %0 = tail call i64 @llvm.vscale.i64()
83 %1 = shl nuw nsw i64 %0, 4
84 %add.ptr = getelementptr inbounds i16, ptr %b, i64 %1
85 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
86 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
87 ret <vscale x 8 x i16> %2
90 define <vscale x 8 x i16> @i16_8s_2v(ptr %b) {
91 ; CHECK-LABEL: i16_8s_2v:
92 ; CHECK: // %bb.0: // %entry
93 ; CHECK-NEXT: ptrue p0.h
94 ; CHECK-NEXT: add x8, x0, #8
95 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #2, mul vl]
98 %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
99 %0 = tail call i64 @llvm.vscale.i64()
100 %1 = shl nuw nsw i64 %0, 4
101 %add.ptr1 = getelementptr inbounds i16, ptr %add.ptr, i64 %1
102 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
103 ret <vscale x 8 x i16> %2
106 define <vscale x 4 x i32> @i32_1v_16s(ptr %b) {
107 ; CHECK-LABEL: i32_1v_16s:
108 ; CHECK: // %bb.0: // %entry
109 ; CHECK-NEXT: rdvl x8, #1
110 ; CHECK-NEXT: ptrue p0.s
111 ; CHECK-NEXT: mov x9, #4 // =0x4
112 ; CHECK-NEXT: add x8, x0, x8
113 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
116 %0 = tail call i64 @llvm.vscale.i64()
117 %1 = shl nuw nsw i64 %0, 2
118 %add.ptr = getelementptr inbounds i32, ptr %b, i64 %1
119 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16
120 %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
121 ret <vscale x 4 x i32> %2
124 define <vscale x 4 x i32> @i32_16s_2v(ptr %b) {
125 ; CHECK-LABEL: i32_16s_2v:
126 ; CHECK: // %bb.0: // %entry
127 ; CHECK-NEXT: ptrue p0.s
128 ; CHECK-NEXT: add x8, x0, #16
129 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #1, mul vl]
132 %add.ptr = getelementptr inbounds i8, ptr %b, i64 16
133 %0 = tail call i64 @llvm.vscale.i64()
134 %1 = shl nuw nsw i64 %0, 2
135 %add.ptr1 = getelementptr inbounds i32, ptr %add.ptr, i64 %1
136 %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
137 ret <vscale x 4 x i32> %2
140 define <vscale x 2 x i64> @i64_1v_32s(ptr %b) {
141 ; CHECK-LABEL: i64_1v_32s:
142 ; CHECK: // %bb.0: // %entry
143 ; CHECK-NEXT: rdvl x8, #1
144 ; CHECK-NEXT: ptrue p0.d
145 ; CHECK-NEXT: mov x9, #4 // =0x4
146 ; CHECK-NEXT: add x8, x0, x8
147 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
150 %0 = tail call i64 @llvm.vscale.i64()
151 %1 = shl nuw nsw i64 %0, 1
152 %add.ptr = getelementptr inbounds i64, ptr %b, i64 %1
153 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32
154 %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
155 ret <vscale x 2 x i64> %2
158 define <vscale x 2 x i64> @i64_32s_2v(ptr %b) {
159 ; CHECK-LABEL: i64_32s_2v:
160 ; CHECK: // %bb.0: // %entry
161 ; CHECK-NEXT: ptrue p0.d
162 ; CHECK-NEXT: add x8, x0, #32
163 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #1, mul vl]
166 %add.ptr = getelementptr inbounds i8, ptr %b, i64 32
167 %0 = tail call i64 @llvm.vscale.i64()
168 %1 = shl nuw nsw i64 %0, 1
169 %add.ptr1 = getelementptr inbounds i64, ptr %add.ptr, i64 %1
170 %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
171 ret <vscale x 2 x i64> %2
175 define <vscale x 16 x i8> @i8_m2v_4s(ptr %b) {
176 ; CHECK-LABEL: i8_m2v_4s:
177 ; CHECK: // %bb.0: // %entry
178 ; CHECK-NEXT: cnth x8, all, mul #4
179 ; CHECK-NEXT: ptrue p0.b
180 ; CHECK-NEXT: mov w9, #4 // =0x4
181 ; CHECK-NEXT: sub x8, x0, x8
182 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, x9]
185 %0 = tail call i64 @llvm.vscale.i64()
187 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
188 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 4
189 %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
190 ret <vscale x 16 x i8> %2
193 define <vscale x 16 x i8> @i8_4s_m2v(ptr %b) {
194 ; CHECK-LABEL: i8_4s_m2v:
195 ; CHECK: // %bb.0: // %entry
196 ; CHECK-NEXT: ptrue p0.b
197 ; CHECK-NEXT: add x8, x0, #4
198 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8, #-2, mul vl]
201 %add.ptr = getelementptr inbounds i8, ptr %b, i64 4
202 %0 = tail call i64 @llvm.vscale.i64()
204 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
205 %2 = load <vscale x 16 x i8>, ptr %add.ptr1, align 16
206 ret <vscale x 16 x i8> %2
209 define <vscale x 8 x i16> @i16_m2v_8s(ptr %b) {
210 ; CHECK-LABEL: i16_m2v_8s:
211 ; CHECK: // %bb.0: // %entry
212 ; CHECK-NEXT: cnth x8, all, mul #4
213 ; CHECK-NEXT: ptrue p0.h
214 ; CHECK-NEXT: mov x9, #4 // =0x4
215 ; CHECK-NEXT: sub x8, x0, x8
216 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, x9, lsl #1]
219 %0 = tail call i64 @llvm.vscale.i64()
221 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
222 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 8
223 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
224 ret <vscale x 8 x i16> %2
227 define <vscale x 8 x i16> @i16_8s_m2v(ptr %b) {
228 ; CHECK-LABEL: i16_8s_m2v:
229 ; CHECK: // %bb.0: // %entry
230 ; CHECK-NEXT: ptrue p0.h
231 ; CHECK-NEXT: add x8, x0, #8
232 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8, #-2, mul vl]
235 %add.ptr = getelementptr inbounds i8, ptr %b, i64 8
236 %0 = tail call i64 @llvm.vscale.i64()
238 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
239 %2 = load <vscale x 8 x i16>, ptr %add.ptr1, align 16
240 ret <vscale x 8 x i16> %2
243 define <vscale x 4 x i32> @i32_m2v_16s(ptr %b) {
244 ; CHECK-LABEL: i32_m2v_16s:
245 ; CHECK: // %bb.0: // %entry
246 ; CHECK-NEXT: cnth x8, all, mul #4
247 ; CHECK-NEXT: ptrue p0.s
248 ; CHECK-NEXT: mov x9, #4 // =0x4
249 ; CHECK-NEXT: sub x8, x0, x8
250 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2]
253 %0 = tail call i64 @llvm.vscale.i64()
255 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
256 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 16
257 %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
258 ret <vscale x 4 x i32> %2
261 define <vscale x 4 x i32> @i32_16s_m2v(ptr %b) {
262 ; CHECK-LABEL: i32_16s_m2v:
263 ; CHECK: // %bb.0: // %entry
264 ; CHECK-NEXT: ptrue p0.s
265 ; CHECK-NEXT: add x8, x0, #16
266 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, #-2, mul vl]
269 %add.ptr = getelementptr inbounds i8, ptr %b, i64 16
270 %0 = tail call i64 @llvm.vscale.i64()
272 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
273 %2 = load <vscale x 4 x i32>, ptr %add.ptr1, align 16
274 ret <vscale x 4 x i32> %2
277 define <vscale x 2 x i64> @i64_m2v_32s(ptr %b) {
278 ; CHECK-LABEL: i64_m2v_32s:
279 ; CHECK: // %bb.0: // %entry
280 ; CHECK-NEXT: cnth x8, all, mul #4
281 ; CHECK-NEXT: ptrue p0.d
282 ; CHECK-NEXT: mov x9, #4 // =0x4
283 ; CHECK-NEXT: sub x8, x0, x8
284 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, x9, lsl #3]
287 %0 = tail call i64 @llvm.vscale.i64()
289 %add.ptr = getelementptr inbounds i8, ptr %b, i64 %1
290 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 32
291 %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
292 ret <vscale x 2 x i64> %2
295 define <vscale x 2 x i64> @i64_32s_m2v(ptr %b) {
296 ; CHECK-LABEL: i64_32s_m2v:
297 ; CHECK: // %bb.0: // %entry
298 ; CHECK-NEXT: ptrue p0.d
299 ; CHECK-NEXT: add x8, x0, #32
300 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, #-2, mul vl]
303 %add.ptr = getelementptr inbounds i8, ptr %b, i64 32
304 %0 = tail call i64 @llvm.vscale.i64()
306 %add.ptr1 = getelementptr inbounds i8, ptr %add.ptr, i64 %1
307 %2 = load <vscale x 2 x i64>, ptr %add.ptr1, align 16
308 ret <vscale x 2 x i64> %2
311 declare i64 @llvm.vscale.i64()