1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
4 ; This tests that various ands, sexts, and zexts (and other operations)
5 ; operating on vscale or the SVE count instructions can be eliminated
6 ; (via demanded bits) due to their known limited range.
8 ; On AArch64 vscale can be at most 16 (for a 2048-bit vector).
9 ; The counting instructions (sans multiplier) have a value of at most 256
10 ; (for a 2048-bit vector of i8s).
12 define i32 @vscale_and_elimination() vscale_range(1,16) {
13 ; CHECK-LABEL: vscale_and_elimination:
15 ; CHECK-NEXT: rdvl x8, #1
16 ; CHECK-NEXT: lsr x8, x8, #4
17 ; CHECK-NEXT: and w9, w8, #0x1c
18 ; CHECK-NEXT: add w0, w8, w9
20 %vscale = call i32 @llvm.vscale.i32()
21 %and_redundant = and i32 %vscale, 31
22 %and_required = and i32 %vscale, 17179869180
23 %result = add i32 %and_redundant, %and_required
27 define i64 @cntb_and_elimination() {
28 ; CHECK-LABEL: cntb_and_elimination:
31 ; CHECK-NEXT: and x9, x8, #0x1fc
32 ; CHECK-NEXT: add x0, x8, x9
34 %cntb = call i64 @llvm.aarch64.sve.cntb(i32 31)
35 %and_redundant = and i64 %cntb, 511
36 %and_required = and i64 %cntb, 17179869180
37 %result = add i64 %and_redundant, %and_required
41 define i64 @cnth_and_elimination() {
42 ; CHECK-LABEL: cnth_and_elimination:
45 ; CHECK-NEXT: and x9, x8, #0xfc
46 ; CHECK-NEXT: add x0, x8, x9
48 %cnth = call i64 @llvm.aarch64.sve.cnth(i32 31)
49 %and_redundant = and i64 %cnth, 1023
50 %and_required = and i64 %cnth, 17179869180
51 %result = add i64 %and_redundant, %and_required
55 define i64 @cntw_and_elimination() {
56 ; CHECK-LABEL: cntw_and_elimination:
59 ; CHECK-NEXT: and x9, x8, #0x7c
60 ; CHECK-NEXT: add x0, x8, x9
62 %cntw = call i64 @llvm.aarch64.sve.cntw(i32 31)
63 %and_redundant = and i64 %cntw, 127
64 %and_required = and i64 %cntw, 17179869180
65 %result = add i64 %and_redundant, %and_required
69 define i64 @cntd_and_elimination() {
70 ; CHECK-LABEL: cntd_and_elimination:
73 ; CHECK-NEXT: and x9, x8, #0x3c
74 ; CHECK-NEXT: add x0, x8, x9
76 %cntd = call i64 @llvm.aarch64.sve.cntd(i32 31)
77 %and_redundant = and i64 %cntd, 63
78 %and_required = and i64 %cntd, 17179869180
79 %result = add i64 %and_redundant, %and_required
83 define i64 @vscale_trunc_zext() vscale_range(1,16) {
84 ; CHECK-LABEL: vscale_trunc_zext:
86 ; CHECK-NEXT: rdvl x8, #1
87 ; CHECK-NEXT: lsr x0, x8, #4
89 %vscale = call i32 @llvm.vscale.i32()
90 %zext = zext i32 %vscale to i64
94 define i64 @vscale_trunc_sext() vscale_range(1,16) {
95 ; CHECK-LABEL: vscale_trunc_sext:
97 ; CHECK-NEXT: rdvl x8, #1
98 ; CHECK-NEXT: lsr x0, x8, #4
100 %vscale = call i32 @llvm.vscale.i32()
101 %sext = sext i32 %vscale to i64
105 define i64 @count_bytes_trunc_zext() {
106 ; CHECK-LABEL: count_bytes_trunc_zext:
108 ; CHECK-NEXT: cntb x0
110 %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
111 %trunc = trunc i64 %cnt to i32
112 %zext = zext i32 %trunc to i64
116 define i64 @count_halfs_trunc_zext() {
117 ; CHECK-LABEL: count_halfs_trunc_zext:
119 ; CHECK-NEXT: cnth x0
121 %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
122 %trunc = trunc i64 %cnt to i32
123 %zext = zext i32 %trunc to i64
127 define i64 @count_words_trunc_zext() {
128 ; CHECK-LABEL: count_words_trunc_zext:
130 ; CHECK-NEXT: cntw x0
132 %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
133 %trunc = trunc i64 %cnt to i32
134 %zext = zext i32 %trunc to i64
138 define i64 @count_doubles_trunc_zext() {
139 ; CHECK-LABEL: count_doubles_trunc_zext:
141 ; CHECK-NEXT: cntd x0
143 %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
144 %trunc = trunc i64 %cnt to i32
145 %zext = zext i32 %trunc to i64
149 define i64 @count_bytes_trunc_sext() {
150 ; CHECK-LABEL: count_bytes_trunc_sext:
152 ; CHECK-NEXT: cntb x0
154 %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
155 %trunc = trunc i64 %cnt to i32
156 %sext = sext i32 %trunc to i64
160 define i64 @count_halfs_trunc_sext() {
161 ; CHECK-LABEL: count_halfs_trunc_sext:
163 ; CHECK-NEXT: cnth x0
165 %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
166 %trunc = trunc i64 %cnt to i32
167 %sext = sext i32 %trunc to i64
171 define i64 @count_words_trunc_sext() {
172 ; CHECK-LABEL: count_words_trunc_sext:
174 ; CHECK-NEXT: cntw x0
176 %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
177 %trunc = trunc i64 %cnt to i32
178 %sext = sext i32 %trunc to i64
182 define i64 @count_doubles_trunc_sext() {
183 ; CHECK-LABEL: count_doubles_trunc_sext:
185 ; CHECK-NEXT: cntd x0
187 %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
188 %trunc = trunc i64 %cnt to i32
189 %sext = sext i32 %trunc to i64
193 define i32 @vscale_with_multiplier() vscale_range(1,16) {
194 ; CHECK-LABEL: vscale_with_multiplier:
196 ; CHECK-NEXT: rdvl x8, #1
197 ; CHECK-NEXT: mov w9, #5 // =0x5
198 ; CHECK-NEXT: lsr x8, x8, #4
199 ; CHECK-NEXT: mul x8, x8, x9
200 ; CHECK-NEXT: and w9, w8, #0x3f
201 ; CHECK-NEXT: add w0, w8, w9
203 %vscale = call i32 @llvm.vscale.i32()
204 %mul = mul i32 %vscale, 5
205 %and_redundant = and i32 %mul, 127
206 %and_required = and i32 %mul, 63
207 %result = add i32 %and_redundant, %and_required
211 define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
212 ; CHECK-LABEL: vscale_with_negative_multiplier:
214 ; CHECK-NEXT: rdvl x8, #1
215 ; CHECK-NEXT: mov x9, #-5 // =0xfffffffffffffffb
216 ; CHECK-NEXT: lsr x8, x8, #4
217 ; CHECK-NEXT: mul x8, x8, x9
218 ; CHECK-NEXT: and w9, w8, #0xffffffc0
219 ; CHECK-NEXT: add w0, w8, w9
221 %vscale = call i32 @llvm.vscale.i32()
222 %mul = mul i32 %vscale, -5
223 %or_redundant = or i32 %mul, 4294967168
224 %or_required = and i32 %mul, 4294967232
225 %result = add i32 %or_redundant, %or_required
229 define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
230 ; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
232 ; CHECK-NEXT: cntd x8
233 ; CHECK-NEXT: neg x9, x8
234 ; CHECK-NEXT: orr w9, w9, #0xfffffff0
235 ; CHECK-NEXT: sub w0, w9, w8
237 %vscale = call i32 @llvm.vscale.i32()
238 %mul = mul i32 %vscale, -2
239 %or_redundant = or i32 %mul, 4294967264
240 %or_required = or i32 %mul, 4294967280
241 %result = add i32 %or_redundant, %or_required
245 declare i32 @llvm.vscale.i32()
246 declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
247 declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
248 declare i64 @llvm.aarch64.sve.cntw(i32 %pattern)
249 declare i64 @llvm.aarch64.sve.cntd(i32 %pattern)