1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
3 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
5 target triple = "aarch64-unknown-linux-gnu"
7 define <4 x i32> @load_zext_v4i16i32(ptr %ap) vscale_range(2,0) #0 {
8 ; CHECK-LABEL: load_zext_v4i16i32:
10 ; CHECK-NEXT: ldr d0, [x0]
11 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
13 %a = load <4 x i16>, ptr %ap
14 %val = zext <4 x i16> %a to <4 x i32>
18 ; Don't try to use SVE for irregular types.
19 define <2 x i256> @load_zext_v2i64i256(ptr %ap) #0 {
20 ; CHECK-LABEL: load_zext_v2i64i256:
22 ; CHECK-NEXT: ldr q0, [x0]
23 ; CHECK-NEXT: mov x1, xzr
24 ; CHECK-NEXT: mov x2, xzr
25 ; CHECK-NEXT: mov x3, xzr
26 ; CHECK-NEXT: mov x5, xzr
27 ; CHECK-NEXT: mov x6, xzr
28 ; CHECK-NEXT: mov x4, v0.d[1]
29 ; CHECK-NEXT: fmov x0, d0
30 ; CHECK-NEXT: mov x7, xzr
32 %a = load <2 x i64>, ptr %ap
33 %val = zext <2 x i64> %a to <2 x i256>
37 define void @load_zext_v8i16i32(ptr %ap, ptr %b) vscale_range(2,0) #0 {
38 ; CHECK-LABEL: load_zext_v8i16i32:
40 ; CHECK-NEXT: ptrue p0.s, vl8
41 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
42 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
44 %a = load <8 x i16>, ptr %ap
45 %val = zext <8 x i16> %a to <8 x i32>
46 store <8 x i32> %val, ptr %b
50 define void @load_zext_v16i16i32(ptr %ap, ptr %b) vscale_range(4,0) #0 {
51 ; CHECK-LABEL: load_zext_v16i16i32:
53 ; CHECK-NEXT: ptrue p0.s, vl16
54 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
55 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
57 %a = load <16 x i16>, ptr %ap
58 %val = zext <16 x i16> %a to <16 x i32>
59 store <16 x i32> %val, ptr %b
63 define void @load_zext_v32i16i32(ptr %ap, ptr %b) vscale_range(8,0) #0 {
64 ; CHECK-LABEL: load_zext_v32i16i32:
66 ; CHECK-NEXT: ptrue p0.s, vl32
67 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
68 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
70 %a = load <32 x i16>, ptr %ap
71 %val = zext <32 x i16> %a to <32 x i32>
72 store <32 x i32> %val, ptr %b
76 define void @load_zext_v64i16i32(ptr %ap, ptr %b) #0 {
77 ; VBITS_GE_1024-LABEL: load_zext_v64i16i32:
78 ; VBITS_GE_1024: // %bb.0:
79 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
80 ; VBITS_GE_1024-NEXT: mov x8, #32 // =0x20
81 ; VBITS_GE_1024-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1]
82 ; VBITS_GE_1024-NEXT: ld1h { z1.s }, p0/z, [x0]
83 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
84 ; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x1]
85 ; VBITS_GE_1024-NEXT: ret
87 ; VBITS_GE_2048-LABEL: load_zext_v64i16i32:
88 ; VBITS_GE_2048: // %bb.0:
89 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
90 ; VBITS_GE_2048-NEXT: ld1h { z0.s }, p0/z, [x0]
91 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
92 ; VBITS_GE_2048-NEXT: ret
93 %a = load <64 x i16>, ptr %ap
94 %val = zext <64 x i16> %a to <64 x i32>
95 store <64 x i32> %val, ptr %b
99 define <4 x i32> @load_sext_v4i16i32(ptr %ap) vscale_range(2,0) #0 {
100 ; CHECK-LABEL: load_sext_v4i16i32:
102 ; CHECK-NEXT: ldr d0, [x0]
103 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
105 %a = load <4 x i16>, ptr %ap
106 %val = sext <4 x i16> %a to <4 x i32>
110 define void @load_sext_v8i16i32(ptr %ap, ptr %b) vscale_range(2,0) #0 {
111 ; CHECK-LABEL: load_sext_v8i16i32:
113 ; CHECK-NEXT: ptrue p0.s, vl8
114 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
115 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
117 %a = load <8 x i16>, ptr %ap
118 %val = sext <8 x i16> %a to <8 x i32>
119 store <8 x i32> %val, ptr %b
123 define void @load_sext_v16i16i32(ptr %ap, ptr %b) vscale_range(4,0) #0 {
124 ; CHECK-LABEL: load_sext_v16i16i32:
126 ; CHECK-NEXT: ptrue p0.s, vl16
127 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
128 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
130 %a = load <16 x i16>, ptr %ap
131 %val = sext <16 x i16> %a to <16 x i32>
132 store <16 x i32> %val, ptr %b
136 define void @load_sext_v32i16i32(ptr %ap, ptr %b) vscale_range(8,0) #0 {
137 ; CHECK-LABEL: load_sext_v32i16i32:
139 ; CHECK-NEXT: ptrue p0.s, vl32
140 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
141 ; CHECK-NEXT: st1w { z0.s }, p0, [x1]
143 %a = load <32 x i16>, ptr %ap
144 %val = sext <32 x i16> %a to <32 x i32>
145 store <32 x i32> %val, ptr %b
149 define void @load_sext_v64i16i32(ptr %ap, ptr %b) #0 {
150 ; VBITS_GE_1024-LABEL: load_sext_v64i16i32:
151 ; VBITS_GE_1024: // %bb.0:
152 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
153 ; VBITS_GE_1024-NEXT: mov x8, #32 // =0x20
154 ; VBITS_GE_1024-NEXT: ld1sh { z0.s }, p0/z, [x0, x8, lsl #1]
155 ; VBITS_GE_1024-NEXT: ld1sh { z1.s }, p0/z, [x0]
156 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2]
157 ; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x1]
158 ; VBITS_GE_1024-NEXT: ret
160 ; VBITS_GE_2048-LABEL: load_sext_v64i16i32:
161 ; VBITS_GE_2048: // %bb.0:
162 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
163 ; VBITS_GE_2048-NEXT: ld1sh { z0.s }, p0/z, [x0]
164 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x1]
165 ; VBITS_GE_2048-NEXT: ret
166 %a = load <64 x i16>, ptr %ap
167 %val = sext <64 x i16> %a to <64 x i32>
168 store <64 x i32> %val, ptr %b
172 define void @load_zext_v32i8i64(ptr %ap, ptr %b) #0 {
173 ; VBITS_GE_1024-LABEL: load_zext_v32i8i64:
174 ; VBITS_GE_1024: // %bb.0:
175 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
176 ; VBITS_GE_1024-NEXT: mov w8, #16 // =0x10
177 ; VBITS_GE_1024-NEXT: ld1b { z0.d }, p0/z, [x0, x8]
178 ; VBITS_GE_1024-NEXT: ld1b { z1.d }, p0/z, [x0]
179 ; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10
180 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
181 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1]
182 ; VBITS_GE_1024-NEXT: ret
184 ; VBITS_GE_2048-LABEL: load_zext_v32i8i64:
185 ; VBITS_GE_2048: // %bb.0:
186 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
187 ; VBITS_GE_2048-NEXT: ld1b { z0.d }, p0/z, [x0]
188 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
189 ; VBITS_GE_2048-NEXT: ret
190 %a = load <32 x i8>, ptr %ap
191 %val = zext <32 x i8> %a to <32 x i64>
192 store <32 x i64> %val, ptr %b
196 define void @load_sext_v32i8i64(ptr %ap, ptr %b) #0 {
197 ; VBITS_GE_1024-LABEL: load_sext_v32i8i64:
198 ; VBITS_GE_1024: // %bb.0:
199 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
200 ; VBITS_GE_1024-NEXT: mov w8, #16 // =0x10
201 ; VBITS_GE_1024-NEXT: ld1sb { z0.d }, p0/z, [x0, x8]
202 ; VBITS_GE_1024-NEXT: ld1sb { z1.d }, p0/z, [x0]
203 ; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10
204 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
205 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1]
206 ; VBITS_GE_1024-NEXT: ret
208 ; VBITS_GE_2048-LABEL: load_sext_v32i8i64:
209 ; VBITS_GE_2048: // %bb.0:
210 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
211 ; VBITS_GE_2048-NEXT: ld1sb { z0.d }, p0/z, [x0]
212 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
213 ; VBITS_GE_2048-NEXT: ret
214 %a = load <32 x i8>, ptr %ap
215 %val = sext <32 x i8> %a to <32 x i64>
216 store <32 x i64> %val, ptr %b
220 define void @load_zext_v32i16i64(ptr %ap, ptr %b) #0 {
221 ; VBITS_GE_1024-LABEL: load_zext_v32i16i64:
222 ; VBITS_GE_1024: // %bb.0:
223 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
224 ; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10
225 ; VBITS_GE_1024-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1]
226 ; VBITS_GE_1024-NEXT: ld1h { z1.d }, p0/z, [x0]
227 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
228 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1]
229 ; VBITS_GE_1024-NEXT: ret
231 ; VBITS_GE_2048-LABEL: load_zext_v32i16i64:
232 ; VBITS_GE_2048: // %bb.0:
233 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
234 ; VBITS_GE_2048-NEXT: ld1h { z0.d }, p0/z, [x0]
235 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
236 ; VBITS_GE_2048-NEXT: ret
237 %a = load <32 x i16>, ptr %ap
238 %val = zext <32 x i16> %a to <32 x i64>
239 store <32 x i64> %val, ptr %b
243 define void @load_sext_v32i16i64(ptr %ap, ptr %b) #0 {
244 ; VBITS_GE_1024-LABEL: load_sext_v32i16i64:
245 ; VBITS_GE_1024: // %bb.0:
246 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
247 ; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10
248 ; VBITS_GE_1024-NEXT: ld1sh { z0.d }, p0/z, [x0, x8, lsl #1]
249 ; VBITS_GE_1024-NEXT: ld1sh { z1.d }, p0/z, [x0]
250 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
251 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1]
252 ; VBITS_GE_1024-NEXT: ret
254 ; VBITS_GE_2048-LABEL: load_sext_v32i16i64:
255 ; VBITS_GE_2048: // %bb.0:
256 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
257 ; VBITS_GE_2048-NEXT: ld1sh { z0.d }, p0/z, [x0]
258 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
259 ; VBITS_GE_2048-NEXT: ret
260 %a = load <32 x i16>, ptr %ap
261 %val = sext <32 x i16> %a to <32 x i64>
262 store <32 x i64> %val, ptr %b
266 define void @load_zext_v32i32i64(ptr %ap, ptr %b) #0 {
267 ; VBITS_GE_1024-LABEL: load_zext_v32i32i64:
268 ; VBITS_GE_1024: // %bb.0:
269 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
270 ; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10
271 ; VBITS_GE_1024-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2]
272 ; VBITS_GE_1024-NEXT: ld1w { z1.d }, p0/z, [x0]
273 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
274 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1]
275 ; VBITS_GE_1024-NEXT: ret
277 ; VBITS_GE_2048-LABEL: load_zext_v32i32i64:
278 ; VBITS_GE_2048: // %bb.0:
279 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
280 ; VBITS_GE_2048-NEXT: ld1w { z0.d }, p0/z, [x0]
281 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
282 ; VBITS_GE_2048-NEXT: ret
283 %a = load <32 x i32>, ptr %ap
284 %val = zext <32 x i32> %a to <32 x i64>
285 store <32 x i64> %val, ptr %b
289 define void @load_sext_v32i32i64(ptr %ap, ptr %b) #0 {
290 ; VBITS_GE_1024-LABEL: load_sext_v32i32i64:
291 ; VBITS_GE_1024: // %bb.0:
292 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
293 ; VBITS_GE_1024-NEXT: mov x8, #16 // =0x10
294 ; VBITS_GE_1024-NEXT: ld1sw { z0.d }, p0/z, [x0, x8, lsl #2]
295 ; VBITS_GE_1024-NEXT: ld1sw { z1.d }, p0/z, [x0]
296 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3]
297 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x1]
298 ; VBITS_GE_1024-NEXT: ret
300 ; VBITS_GE_2048-LABEL: load_sext_v32i32i64:
301 ; VBITS_GE_2048: // %bb.0:
302 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
303 ; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p0/z, [x0]
304 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x1]
305 ; VBITS_GE_2048-NEXT: ret
306 %a = load <32 x i32>, ptr %ap
307 %val = sext <32 x i32> %a to <32 x i64>
308 store <32 x i64> %val, ptr %b
312 attributes #0 = { "target-features"="+sve" }