1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -aarch64-sve-vector-bits-min=1024 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_1024
3 ; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_2048
5 target triple = "aarch64-unknown-linux-gnu"
7 define <4 x i32> @load_zext_v4i16i32(ptr %ap) vscale_range(2,0) #0 {
8 ; CHECK-LABEL: load_zext_v4i16i32:
10 ; CHECK-NEXT: ldr d0, [x0]
11 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0
13 %a = load <4 x i16>, ptr %ap
14 %val = zext <4 x i16> %a to <4 x i32>
18 ; Don't try to use SVE for irregular types.
19 define <2 x i256> @load_zext_v2i64i256(ptr %ap) #0 {
20 ; CHECK-LABEL: load_zext_v2i64i256:
22 ; CHECK-NEXT: ldr q0, [x0]
23 ; CHECK-NEXT: mov x1, xzr
24 ; CHECK-NEXT: mov x2, xzr
25 ; CHECK-NEXT: mov x3, xzr
26 ; CHECK-NEXT: mov x5, xzr
27 ; CHECK-NEXT: mov x6, xzr
28 ; CHECK-NEXT: mov x4, v0.d[1]
29 ; CHECK-NEXT: fmov x0, d0
30 ; CHECK-NEXT: mov x7, xzr
32 %a = load <2 x i64>, ptr %ap
33 %val = zext <2 x i64> %a to <2 x i256>
37 define <8 x i32> @load_zext_v8i16i32(ptr %ap) vscale_range(2,0) #0 {
38 ; CHECK-LABEL: load_zext_v8i16i32:
40 ; CHECK-NEXT: ptrue p0.s, vl8
41 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
42 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
44 %a = load <8 x i16>, ptr %ap
45 %val = zext <8 x i16> %a to <8 x i32>
49 define <16 x i32> @load_zext_v16i16i32(ptr %ap) vscale_range(4,0) #0 {
50 ; CHECK-LABEL: load_zext_v16i16i32:
52 ; CHECK-NEXT: ptrue p0.s, vl16
53 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
54 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
56 %a = load <16 x i16>, ptr %ap
57 %val = zext <16 x i16> %a to <16 x i32>
61 define <32 x i32> @load_zext_v32i16i32(ptr %ap) vscale_range(8,0) #0 {
62 ; CHECK-LABEL: load_zext_v32i16i32:
64 ; CHECK-NEXT: ptrue p0.s, vl32
65 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
66 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
68 %a = load <32 x i16>, ptr %ap
69 %val = zext <32 x i16> %a to <32 x i32>
73 define <64 x i32> @load_zext_v64i16i32(ptr %ap) #0 {
74 ; VBITS_GE_1024-LABEL: load_zext_v64i16i32:
75 ; VBITS_GE_1024: // %bb.0:
76 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
77 ; VBITS_GE_1024-NEXT: mov x9, #32 // =0x20
78 ; VBITS_GE_1024-NEXT: ld1h { z0.s }, p0/z, [x0, x9, lsl #1]
79 ; VBITS_GE_1024-NEXT: ld1h { z1.s }, p0/z, [x0]
80 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
81 ; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x8]
82 ; VBITS_GE_1024-NEXT: ret
84 ; VBITS_GE_2048-LABEL: load_zext_v64i16i32:
85 ; VBITS_GE_2048: // %bb.0:
86 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
87 ; VBITS_GE_2048-NEXT: ld1h { z0.s }, p0/z, [x0]
88 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
89 ; VBITS_GE_2048-NEXT: ret
90 %a = load <64 x i16>, ptr %ap
91 %val = zext <64 x i16> %a to <64 x i32>
95 define <4 x i32> @load_sext_v4i16i32(ptr %ap) vscale_range(2,0) #0 {
96 ; CHECK-LABEL: load_sext_v4i16i32:
98 ; CHECK-NEXT: ldr d0, [x0]
99 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0
101 %a = load <4 x i16>, ptr %ap
102 %val = sext <4 x i16> %a to <4 x i32>
106 define <8 x i32> @load_sext_v8i16i32(ptr %ap) vscale_range(2,0) #0 {
107 ; CHECK-LABEL: load_sext_v8i16i32:
109 ; CHECK-NEXT: ptrue p0.s, vl8
110 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
111 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
113 %a = load <8 x i16>, ptr %ap
114 %val = sext <8 x i16> %a to <8 x i32>
118 define <16 x i32> @load_sext_v16i16i32(ptr %ap) vscale_range(4,0) #0 {
119 ; CHECK-LABEL: load_sext_v16i16i32:
121 ; CHECK-NEXT: ptrue p0.s, vl16
122 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
123 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
125 %a = load <16 x i16>, ptr %ap
126 %val = sext <16 x i16> %a to <16 x i32>
130 define <32 x i32> @load_sext_v32i16i32(ptr %ap) vscale_range(8,0) #0 {
131 ; CHECK-LABEL: load_sext_v32i16i32:
133 ; CHECK-NEXT: ptrue p0.s, vl32
134 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0]
135 ; CHECK-NEXT: st1w { z0.s }, p0, [x8]
137 %a = load <32 x i16>, ptr %ap
138 %val = sext <32 x i16> %a to <32 x i32>
142 define <64 x i32> @load_sext_v64i16i32(ptr %ap) #0 {
143 ; VBITS_GE_1024-LABEL: load_sext_v64i16i32:
144 ; VBITS_GE_1024: // %bb.0:
145 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32
146 ; VBITS_GE_1024-NEXT: mov x9, #32 // =0x20
147 ; VBITS_GE_1024-NEXT: ld1sh { z0.s }, p0/z, [x0, x9, lsl #1]
148 ; VBITS_GE_1024-NEXT: ld1sh { z1.s }, p0/z, [x0]
149 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2]
150 ; VBITS_GE_1024-NEXT: st1w { z1.s }, p0, [x8]
151 ; VBITS_GE_1024-NEXT: ret
153 ; VBITS_GE_2048-LABEL: load_sext_v64i16i32:
154 ; VBITS_GE_2048: // %bb.0:
155 ; VBITS_GE_2048-NEXT: ptrue p0.s, vl64
156 ; VBITS_GE_2048-NEXT: ld1sh { z0.s }, p0/z, [x0]
157 ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x8]
158 ; VBITS_GE_2048-NEXT: ret
159 %a = load <64 x i16>, ptr %ap
160 %val = sext <64 x i16> %a to <64 x i32>
164 define <32 x i64> @load_zext_v32i8i64(ptr %ap) #0 {
165 ; VBITS_GE_1024-LABEL: load_zext_v32i8i64:
166 ; VBITS_GE_1024: // %bb.0:
167 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
168 ; VBITS_GE_1024-NEXT: mov w9, #16 // =0x10
169 ; VBITS_GE_1024-NEXT: ld1b { z0.d }, p0/z, [x0, x9]
170 ; VBITS_GE_1024-NEXT: ld1b { z1.d }, p0/z, [x0]
171 ; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10
172 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
173 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
174 ; VBITS_GE_1024-NEXT: ret
176 ; VBITS_GE_2048-LABEL: load_zext_v32i8i64:
177 ; VBITS_GE_2048: // %bb.0:
178 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
179 ; VBITS_GE_2048-NEXT: ld1b { z0.d }, p0/z, [x0]
180 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
181 ; VBITS_GE_2048-NEXT: ret
182 %a = load <32 x i8>, ptr %ap
183 %val = zext <32 x i8> %a to <32 x i64>
187 define <32 x i64> @load_sext_v32i8i64(ptr %ap) #0 {
188 ; VBITS_GE_1024-LABEL: load_sext_v32i8i64:
189 ; VBITS_GE_1024: // %bb.0:
190 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
191 ; VBITS_GE_1024-NEXT: mov w9, #16 // =0x10
192 ; VBITS_GE_1024-NEXT: ld1sb { z0.d }, p0/z, [x0, x9]
193 ; VBITS_GE_1024-NEXT: ld1sb { z1.d }, p0/z, [x0]
194 ; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10
195 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
196 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
197 ; VBITS_GE_1024-NEXT: ret
199 ; VBITS_GE_2048-LABEL: load_sext_v32i8i64:
200 ; VBITS_GE_2048: // %bb.0:
201 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
202 ; VBITS_GE_2048-NEXT: ld1sb { z0.d }, p0/z, [x0]
203 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
204 ; VBITS_GE_2048-NEXT: ret
205 %a = load <32 x i8>, ptr %ap
206 %val = sext <32 x i8> %a to <32 x i64>
210 define <32 x i64> @load_zext_v32i16i64(ptr %ap) #0 {
211 ; VBITS_GE_1024-LABEL: load_zext_v32i16i64:
212 ; VBITS_GE_1024: // %bb.0:
213 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
214 ; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10
215 ; VBITS_GE_1024-NEXT: ld1h { z0.d }, p0/z, [x0, x9, lsl #1]
216 ; VBITS_GE_1024-NEXT: ld1h { z1.d }, p0/z, [x0]
217 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
218 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
219 ; VBITS_GE_1024-NEXT: ret
221 ; VBITS_GE_2048-LABEL: load_zext_v32i16i64:
222 ; VBITS_GE_2048: // %bb.0:
223 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
224 ; VBITS_GE_2048-NEXT: ld1h { z0.d }, p0/z, [x0]
225 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
226 ; VBITS_GE_2048-NEXT: ret
227 %a = load <32 x i16>, ptr %ap
228 %val = zext <32 x i16> %a to <32 x i64>
232 define <32 x i64> @load_sext_v32i16i64(ptr %ap) #0 {
233 ; VBITS_GE_1024-LABEL: load_sext_v32i16i64:
234 ; VBITS_GE_1024: // %bb.0:
235 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
236 ; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10
237 ; VBITS_GE_1024-NEXT: ld1sh { z0.d }, p0/z, [x0, x9, lsl #1]
238 ; VBITS_GE_1024-NEXT: ld1sh { z1.d }, p0/z, [x0]
239 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
240 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
241 ; VBITS_GE_1024-NEXT: ret
243 ; VBITS_GE_2048-LABEL: load_sext_v32i16i64:
244 ; VBITS_GE_2048: // %bb.0:
245 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
246 ; VBITS_GE_2048-NEXT: ld1sh { z0.d }, p0/z, [x0]
247 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
248 ; VBITS_GE_2048-NEXT: ret
249 %a = load <32 x i16>, ptr %ap
250 %val = sext <32 x i16> %a to <32 x i64>
254 define <32 x i64> @load_zext_v32i32i64(ptr %ap) #0 {
255 ; VBITS_GE_1024-LABEL: load_zext_v32i32i64:
256 ; VBITS_GE_1024: // %bb.0:
257 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
258 ; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10
259 ; VBITS_GE_1024-NEXT: ld1w { z0.d }, p0/z, [x0, x9, lsl #2]
260 ; VBITS_GE_1024-NEXT: ld1w { z1.d }, p0/z, [x0]
261 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
262 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
263 ; VBITS_GE_1024-NEXT: ret
265 ; VBITS_GE_2048-LABEL: load_zext_v32i32i64:
266 ; VBITS_GE_2048: // %bb.0:
267 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
268 ; VBITS_GE_2048-NEXT: ld1w { z0.d }, p0/z, [x0]
269 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
270 ; VBITS_GE_2048-NEXT: ret
271 %a = load <32 x i32>, ptr %ap
272 %val = zext <32 x i32> %a to <32 x i64>
276 define <32 x i64> @load_sext_v32i32i64(ptr %ap) #0 {
277 ; VBITS_GE_1024-LABEL: load_sext_v32i32i64:
278 ; VBITS_GE_1024: // %bb.0:
279 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16
280 ; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10
281 ; VBITS_GE_1024-NEXT: ld1sw { z0.d }, p0/z, [x0, x9, lsl #2]
282 ; VBITS_GE_1024-NEXT: ld1sw { z1.d }, p0/z, [x0]
283 ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3]
284 ; VBITS_GE_1024-NEXT: st1d { z1.d }, p0, [x8]
285 ; VBITS_GE_1024-NEXT: ret
287 ; VBITS_GE_2048-LABEL: load_sext_v32i32i64:
288 ; VBITS_GE_2048: // %bb.0:
289 ; VBITS_GE_2048-NEXT: ptrue p0.d, vl32
290 ; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p0/z, [x0]
291 ; VBITS_GE_2048-NEXT: st1d { z0.d }, p0, [x8]
292 ; VBITS_GE_2048-NEXT: ret
293 %a = load <32 x i32>, ptr %ap
294 %val = sext <32 x i32> %a to <32 x i64>
298 attributes #0 = { "target-features"="+sve" }