1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s -check-prefix=NO_SCALAR_INC
3 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-scalar-inc-vl -verify-machineinstrs < %s | FileCheck %s
4 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -verify-machineinstrs < %s | FileCheck %s
6 define <vscale x 8 x i16> @inch_vec(<vscale x 8 x i16> %a) {
7 ; NO_SCALAR_INC-LABEL: inch_vec:
8 ; NO_SCALAR_INC: // %bb.0:
9 ; NO_SCALAR_INC-NEXT: inch z0.h
10 ; NO_SCALAR_INC-NEXT: ret
12 ; CHECK-LABEL: inch_vec:
14 ; CHECK-NEXT: inch z0.h
16 %vscale = call i16 @llvm.vscale.i16()
17 %mul = mul i16 %vscale, 8
18 %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0
19 %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
20 %res = add <vscale x 8 x i16> %a, %vl.splat
21 ret <vscale x 8 x i16> %res
24 define <vscale x 4 x i32> @incw_vec(<vscale x 4 x i32> %a) {
25 ; NO_SCALAR_INC-LABEL: incw_vec:
26 ; NO_SCALAR_INC: // %bb.0:
27 ; NO_SCALAR_INC-NEXT: incw z0.s
28 ; NO_SCALAR_INC-NEXT: ret
30 ; CHECK-LABEL: incw_vec:
32 ; CHECK-NEXT: incw z0.s
34 %vscale = call i32 @llvm.vscale.i32()
35 %mul = mul i32 %vscale, 4
36 %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0
37 %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
38 %res = add <vscale x 4 x i32> %a, %vl.splat
39 ret <vscale x 4 x i32> %res
42 define <vscale x 2 x i64> @incd_vec(<vscale x 2 x i64> %a) {
43 ; NO_SCALAR_INC-LABEL: incd_vec:
44 ; NO_SCALAR_INC: // %bb.0:
45 ; NO_SCALAR_INC-NEXT: incd z0.d
46 ; NO_SCALAR_INC-NEXT: ret
48 ; CHECK-LABEL: incd_vec:
50 ; CHECK-NEXT: incd z0.d
52 %vscale = call i64 @llvm.vscale.i64()
53 %mul = mul i64 %vscale, 2
54 %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0
55 %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
56 %res = add <vscale x 2 x i64> %a, %vl.splat
57 ret <vscale x 2 x i64> %res
60 define <vscale x 8 x i16> @dech_vec(<vscale x 8 x i16> %a) {
61 ; NO_SCALAR_INC-LABEL: dech_vec:
62 ; NO_SCALAR_INC: // %bb.0:
63 ; NO_SCALAR_INC-NEXT: dech z0.h, all, mul #2
64 ; NO_SCALAR_INC-NEXT: ret
66 ; CHECK-LABEL: dech_vec:
68 ; CHECK-NEXT: dech z0.h, all, mul #2
70 %vscale = call i16 @llvm.vscale.i16()
71 %mul = mul i16 %vscale, 16
72 %vl = insertelement <vscale x 8 x i16> undef, i16 %mul, i32 0
73 %vl.splat = shufflevector <vscale x 8 x i16> %vl, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer
74 %res = sub <vscale x 8 x i16> %a, %vl.splat
75 ret <vscale x 8 x i16> %res
78 define <vscale x 4 x i32> @decw_vec(<vscale x 4 x i32> %a) {
79 ; NO_SCALAR_INC-LABEL: decw_vec:
80 ; NO_SCALAR_INC: // %bb.0:
81 ; NO_SCALAR_INC-NEXT: decw z0.s, all, mul #4
82 ; NO_SCALAR_INC-NEXT: ret
84 ; CHECK-LABEL: decw_vec:
86 ; CHECK-NEXT: decw z0.s, all, mul #4
88 %vscale = call i32 @llvm.vscale.i32()
89 %mul = mul i32 %vscale, 16
90 %vl = insertelement <vscale x 4 x i32> undef, i32 %mul, i32 0
91 %vl.splat = shufflevector <vscale x 4 x i32> %vl, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer
92 %res = sub <vscale x 4 x i32> %a, %vl.splat
93 ret <vscale x 4 x i32> %res
96 define <vscale x 2 x i64> @decd_vec(<vscale x 2 x i64> %a) {
97 ; NO_SCALAR_INC-LABEL: decd_vec:
98 ; NO_SCALAR_INC: // %bb.0:
99 ; NO_SCALAR_INC-NEXT: decd z0.d, all, mul #8
100 ; NO_SCALAR_INC-NEXT: ret
102 ; CHECK-LABEL: decd_vec:
104 ; CHECK-NEXT: decd z0.d, all, mul #8
106 %vscale = call i64 @llvm.vscale.i64()
107 %mul = mul i64 %vscale, 16
108 %vl = insertelement <vscale x 2 x i64> undef, i64 %mul, i32 0
109 %vl.splat = shufflevector <vscale x 2 x i64> %vl, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
110 %res = sub <vscale x 2 x i64> %a, %vl.splat
111 ret <vscale x 2 x i64> %res
114 ; NOTE: As there is no need for the predicate pattern we
115 ; fall back to using ADDVL with its larger immediate range.
116 define i64 @incb_scalar_i64(i64 %a) {
117 ; NO_SCALAR_INC-LABEL: incb_scalar_i64:
118 ; NO_SCALAR_INC: // %bb.0:
119 ; NO_SCALAR_INC-NEXT: addvl x0, x0, #1
120 ; NO_SCALAR_INC-NEXT: ret
122 ; CHECK-LABEL: incb_scalar_i64:
124 ; CHECK-NEXT: addvl x0, x0, #1
126 %vscale = call i64 @llvm.vscale.i64()
127 %mul = mul i64 %vscale, 16
128 %add = add i64 %a, %mul
132 define i64 @inch_scalar_i64(i64 %a) {
133 ; NO_SCALAR_INC-LABEL: inch_scalar_i64:
134 ; NO_SCALAR_INC: // %bb.0:
135 ; NO_SCALAR_INC-NEXT: cnth x8
136 ; NO_SCALAR_INC-NEXT: add x0, x0, x8
137 ; NO_SCALAR_INC-NEXT: ret
139 ; CHECK-LABEL: inch_scalar_i64:
141 ; CHECK-NEXT: inch x0
143 %vscale = call i64 @llvm.vscale.i64()
144 %mul = mul i64 %vscale, 8
145 %add = add i64 %a, %mul
149 define i64 @incw_scalar_i64(i64 %a) {
150 ; NO_SCALAR_INC-LABEL: incw_scalar_i64:
151 ; NO_SCALAR_INC: // %bb.0:
152 ; NO_SCALAR_INC-NEXT: cntw x8
153 ; NO_SCALAR_INC-NEXT: add x0, x0, x8
154 ; NO_SCALAR_INC-NEXT: ret
156 ; CHECK-LABEL: incw_scalar_i64:
158 ; CHECK-NEXT: incw x0
160 %vscale = call i64 @llvm.vscale.i64()
161 %mul = mul i64 %vscale, 4
162 %add = add i64 %a, %mul
166 define i64 @incd_scalar_i64(i64 %a) {
167 ; NO_SCALAR_INC-LABEL: incd_scalar_i64:
168 ; NO_SCALAR_INC: // %bb.0:
169 ; NO_SCALAR_INC-NEXT: cntd x8
170 ; NO_SCALAR_INC-NEXT: add x0, x0, x8
171 ; NO_SCALAR_INC-NEXT: ret
173 ; CHECK-LABEL: incd_scalar_i64:
175 ; CHECK-NEXT: incd x0
177 %vscale = call i64 @llvm.vscale.i64()
178 %mul = mul i64 %vscale, 2
179 %add = add i64 %a, %mul
183 ; NOTE: As there is no need for the predicate pattern we
184 ; fall back to using ADDVL with its larger immediate range.
185 define i64 @decb_scalar_i64(i64 %a) {
186 ; NO_SCALAR_INC-LABEL: decb_scalar_i64:
187 ; NO_SCALAR_INC: // %bb.0:
188 ; NO_SCALAR_INC-NEXT: addvl x0, x0, #-2
189 ; NO_SCALAR_INC-NEXT: ret
191 ; CHECK-LABEL: decb_scalar_i64:
193 ; CHECK-NEXT: addvl x0, x0, #-2
195 %vscale = call i64 @llvm.vscale.i64()
196 %mul = mul i64 %vscale, 32
197 %sub = sub i64 %a, %mul
201 define i64 @dech_scalar_i64(i64 %a) {
202 ; NO_SCALAR_INC-LABEL: dech_scalar_i64:
203 ; NO_SCALAR_INC: // %bb.0:
204 ; NO_SCALAR_INC-NEXT: cnth x8, all, mul #3
205 ; NO_SCALAR_INC-NEXT: neg x8, x8
206 ; NO_SCALAR_INC-NEXT: add x0, x0, x8
207 ; NO_SCALAR_INC-NEXT: ret
209 ; CHECK-LABEL: dech_scalar_i64:
211 ; CHECK-NEXT: dech x0, all, mul #3
213 %vscale = call i64 @llvm.vscale.i64()
214 %mul = mul i64 %vscale, 24
215 %sub = sub i64 %a, %mul
219 define i64 @decw_scalar_i64(i64 %a) {
220 ; NO_SCALAR_INC-LABEL: decw_scalar_i64:
221 ; NO_SCALAR_INC: // %bb.0:
222 ; NO_SCALAR_INC-NEXT: cntw x8, all, mul #3
223 ; NO_SCALAR_INC-NEXT: neg x8, x8
224 ; NO_SCALAR_INC-NEXT: add x0, x0, x8
225 ; NO_SCALAR_INC-NEXT: ret
227 ; CHECK-LABEL: decw_scalar_i64:
229 ; CHECK-NEXT: decw x0, all, mul #3
231 %vscale = call i64 @llvm.vscale.i64()
232 %mul = mul i64 %vscale, 12
233 %sub = sub i64 %a, %mul
237 define i64 @decd_scalar_i64(i64 %a) {
238 ; NO_SCALAR_INC-LABEL: decd_scalar_i64:
239 ; NO_SCALAR_INC: // %bb.0:
240 ; NO_SCALAR_INC-NEXT: cntd x8, all, mul #3
241 ; NO_SCALAR_INC-NEXT: neg x8, x8
242 ; NO_SCALAR_INC-NEXT: add x0, x0, x8
243 ; NO_SCALAR_INC-NEXT: ret
245 ; CHECK-LABEL: decd_scalar_i64:
247 ; CHECK-NEXT: decd x0, all, mul #3
249 %vscale = call i64 @llvm.vscale.i64()
250 %mul = mul i64 %vscale, 6
251 %sub = sub i64 %a, %mul
255 ; NOTE: As there is no need for the predicate pattern we
256 ; fall back to using ADDVL with its larger immediate range.
257 define i32 @incb_scalar_i32(i32 %a) {
258 ; NO_SCALAR_INC-LABEL: incb_scalar_i32:
259 ; NO_SCALAR_INC: // %bb.0:
260 ; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 def $x0
261 ; NO_SCALAR_INC-NEXT: addvl x0, x0, #3
262 ; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 killed $x0
263 ; NO_SCALAR_INC-NEXT: ret
265 ; CHECK-LABEL: incb_scalar_i32:
267 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
268 ; CHECK-NEXT: addvl x0, x0, #3
269 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
272 %vscale = call i64 @llvm.vscale.i64()
273 %mul = mul i64 %vscale, 48
274 %vl = trunc i64 %mul to i32
275 %add = add i32 %a, %vl
279 define i32 @inch_scalar_i32(i32 %a) {
280 ; NO_SCALAR_INC-LABEL: inch_scalar_i32:
281 ; NO_SCALAR_INC: // %bb.0:
282 ; NO_SCALAR_INC-NEXT: cnth x8, all, mul #7
283 ; NO_SCALAR_INC-NEXT: add w0, w0, w8
284 ; NO_SCALAR_INC-NEXT: ret
286 ; CHECK-LABEL: inch_scalar_i32:
288 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
289 ; CHECK-NEXT: inch x0, all, mul #7
290 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
293 %vscale = call i64 @llvm.vscale.i64()
294 %mul = mul i64 %vscale, 56
295 %vl = trunc i64 %mul to i32
296 %add = add i32 %a, %vl
300 define i32 @incw_scalar_i32(i32 %a) {
301 ; NO_SCALAR_INC-LABEL: incw_scalar_i32:
302 ; NO_SCALAR_INC: // %bb.0:
303 ; NO_SCALAR_INC-NEXT: cntw x8, all, mul #7
304 ; NO_SCALAR_INC-NEXT: add w0, w0, w8
305 ; NO_SCALAR_INC-NEXT: ret
307 ; CHECK-LABEL: incw_scalar_i32:
309 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
310 ; CHECK-NEXT: incw x0, all, mul #7
311 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
314 %vscale = call i64 @llvm.vscale.i64()
315 %mul = mul i64 %vscale, 28
316 %vl = trunc i64 %mul to i32
317 %add = add i32 %a, %vl
321 define i32 @incd_scalar_i32(i32 %a) {
322 ; NO_SCALAR_INC-LABEL: incd_scalar_i32:
323 ; NO_SCALAR_INC: // %bb.0:
324 ; NO_SCALAR_INC-NEXT: cntd x8, all, mul #7
325 ; NO_SCALAR_INC-NEXT: add w0, w0, w8
326 ; NO_SCALAR_INC-NEXT: ret
328 ; CHECK-LABEL: incd_scalar_i32:
330 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
331 ; CHECK-NEXT: incd x0, all, mul #7
332 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
335 %vscale = call i64 @llvm.vscale.i64()
336 %mul = mul i64 %vscale, 14
337 %vl = trunc i64 %mul to i32
338 %add = add i32 %a, %vl
342 ; NOTE: As there is no need for the predicate pattern we
343 ; fall back to using ADDVL with its larger immediate range.
344 define i32 @decb_scalar_i32(i32 %a) {
345 ; NO_SCALAR_INC-LABEL: decb_scalar_i32:
346 ; NO_SCALAR_INC: // %bb.0:
347 ; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 def $x0
348 ; NO_SCALAR_INC-NEXT: addvl x0, x0, #-4
349 ; NO_SCALAR_INC-NEXT: // kill: def $w0 killed $w0 killed $x0
350 ; NO_SCALAR_INC-NEXT: ret
352 ; CHECK-LABEL: decb_scalar_i32:
354 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
355 ; CHECK-NEXT: addvl x0, x0, #-4
356 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
359 %vscale = call i64 @llvm.vscale.i64()
360 %mul = mul i64 %vscale, 64
361 %vl = trunc i64 %mul to i32
362 %sub = sub i32 %a, %vl
366 define i32 @dech_scalar_i32(i32 %a) {
367 ; NO_SCALAR_INC-LABEL: dech_scalar_i32:
368 ; NO_SCALAR_INC: // %bb.0:
369 ; NO_SCALAR_INC-NEXT: cnth x8
370 ; NO_SCALAR_INC-NEXT: neg x8, x8
371 ; NO_SCALAR_INC-NEXT: add w0, w0, w8
372 ; NO_SCALAR_INC-NEXT: ret
374 ; CHECK-LABEL: dech_scalar_i32:
376 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
377 ; CHECK-NEXT: dech x0
378 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
381 %vscale = call i64 @llvm.vscale.i64()
382 %mul = mul i64 %vscale, 8
383 %vl = trunc i64 %mul to i32
384 %sub = sub i32 %a, %vl
388 define i32 @decw_scalar_i32(i32 %a) {
389 ; NO_SCALAR_INC-LABEL: decw_scalar_i32:
390 ; NO_SCALAR_INC: // %bb.0:
391 ; NO_SCALAR_INC-NEXT: cntw x8
392 ; NO_SCALAR_INC-NEXT: neg x8, x8
393 ; NO_SCALAR_INC-NEXT: add w0, w0, w8
394 ; NO_SCALAR_INC-NEXT: ret
396 ; CHECK-LABEL: decw_scalar_i32:
398 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
399 ; CHECK-NEXT: decw x0
400 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
403 %vscale = call i64 @llvm.vscale.i64()
404 %mul = mul i64 %vscale, 4
405 %vl = trunc i64 %mul to i32
406 %sub = sub i32 %a, %vl
410 define i32 @decd_scalar_i32(i32 %a) {
411 ; NO_SCALAR_INC-LABEL: decd_scalar_i32:
412 ; NO_SCALAR_INC: // %bb.0:
413 ; NO_SCALAR_INC-NEXT: cntd x8
414 ; NO_SCALAR_INC-NEXT: neg x8, x8
415 ; NO_SCALAR_INC-NEXT: add w0, w0, w8
416 ; NO_SCALAR_INC-NEXT: ret
418 ; CHECK-LABEL: decd_scalar_i32:
420 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
421 ; CHECK-NEXT: decd x0
422 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
424 %vscale = call i64 @llvm.vscale.i64()
425 %mul = mul i64 %vscale, 2
426 %vl = trunc i64 %mul to i32
427 %sub = sub i32 %a, %vl
431 declare i16 @llvm.vscale.i16()
432 declare i32 @llvm.vscale.i32()
433 declare i64 @llvm.vscale.i64()