llvm/test/CodeGen/AArch64/vscale-and-sve-cnt-demandedbits.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s
   3
   4 ; This tests that various ands, sexts, and zexts (and other operations)
   5 ; operating on vscale or the SVE count instructions can be eliminated
   6 ; (via demanded bits) due to their known limited range.
   7
   8 ; On AArch64 vscale can be at most 16 (for a 2048-bit vector).
   9 ; The counting instructions (sans multiplier) have a value of at most 256
  10 ; (for a 2048-bit vector of i8s).
  11
  12 define i32 @vscale_and_elimination() vscale_range(1,16) {
  13 ; CHECK-LABEL: vscale_and_elimination:
  14 ; CHECK:       // %bb.0:
  15 ; CHECK-NEXT:    rdvl x8, #1
  16 ; CHECK-NEXT:    lsr x8, x8, #4
  17 ; CHECK-NEXT:    and w9, w8, #0x1c
  18 ; CHECK-NEXT:    add w0, w8, w9
  19 ; CHECK-NEXT:    ret
  20   %vscale = call i32 @llvm.vscale.i32()
  21   %and_redundant = and i32 %vscale, 31
  22   %and_required = and i32 %vscale, 17179869180
  23   %result = add i32 %and_redundant, %and_required
  24   ret i32 %result
  25 }
  26
  27 define i64 @cntb_and_elimination() {
  28 ; CHECK-LABEL: cntb_and_elimination:
  29 ; CHECK:       // %bb.0:
  30 ; CHECK-NEXT:    cntb x8
  31 ; CHECK-NEXT:    and x9, x8, #0x1fc
  32 ; CHECK-NEXT:    add x0, x8, x9
  33 ; CHECK-NEXT:    ret
  34   %cntb = call i64 @llvm.aarch64.sve.cntb(i32 31)
  35   %and_redundant = and i64 %cntb, 511
  36   %and_required = and i64 %cntb, 17179869180
  37   %result = add i64 %and_redundant, %and_required
  38   ret i64 %result
  39 }
  40
  41 define i64 @cnth_and_elimination() {
  42 ; CHECK-LABEL: cnth_and_elimination:
  43 ; CHECK:       // %bb.0:
  44 ; CHECK-NEXT:    cnth x8
  45 ; CHECK-NEXT:    and x9, x8, #0xfc
  46 ; CHECK-NEXT:    add x0, x8, x9
  47 ; CHECK-NEXT:    ret
  48   %cnth = call i64 @llvm.aarch64.sve.cnth(i32 31)
  49   %and_redundant = and i64 %cnth, 1023
  50   %and_required = and i64 %cnth, 17179869180
  51   %result = add i64 %and_redundant, %and_required
  52   ret i64 %result
  53 }
  54
  55 define i64 @cntw_and_elimination() {
  56 ; CHECK-LABEL: cntw_and_elimination:
  57 ; CHECK:       // %bb.0:
  58 ; CHECK-NEXT:    cntw x8
  59 ; CHECK-NEXT:    and x9, x8, #0x7c
  60 ; CHECK-NEXT:    add x0, x8, x9
  61 ; CHECK-NEXT:    ret
  62   %cntw = call i64 @llvm.aarch64.sve.cntw(i32 31)
  63   %and_redundant = and i64 %cntw, 127
  64   %and_required = and i64 %cntw, 17179869180
  65   %result = add i64 %and_redundant, %and_required
  66   ret i64 %result
  67 }
  68
  69 define i64 @cntd_and_elimination() {
  70 ; CHECK-LABEL: cntd_and_elimination:
  71 ; CHECK:       // %bb.0:
  72 ; CHECK-NEXT:    cntd x8
  73 ; CHECK-NEXT:    and x9, x8, #0x3c
  74 ; CHECK-NEXT:    add x0, x8, x9
  75 ; CHECK-NEXT:    ret
  76   %cntd = call i64 @llvm.aarch64.sve.cntd(i32 31)
  77   %and_redundant = and i64 %cntd, 63
  78   %and_required = and i64 %cntd, 17179869180
  79   %result = add i64 %and_redundant, %and_required
  80   ret i64 %result
  81 }
  82
  83 define i64 @vscale_trunc_zext() vscale_range(1,16) {
  84 ; CHECK-LABEL: vscale_trunc_zext:
  85 ; CHECK:       // %bb.0:
  86 ; CHECK-NEXT:    rdvl x8, #1
  87 ; CHECK-NEXT:    lsr x0, x8, #4
  88 ; CHECK-NEXT:    ret
  89   %vscale = call i32 @llvm.vscale.i32()
  90   %zext = zext i32 %vscale to i64
  91   ret i64 %zext
  92 }
  93
  94 define i64 @vscale_trunc_sext() vscale_range(1,16) {
  95 ; CHECK-LABEL: vscale_trunc_sext:
  96 ; CHECK:       // %bb.0:
  97 ; CHECK-NEXT:    rdvl x8, #1
  98 ; CHECK-NEXT:    lsr x0, x8, #4
  99 ; CHECK-NEXT:    ret
 100   %vscale = call i32 @llvm.vscale.i32()
 101   %sext = sext i32 %vscale to i64
 102   ret i64 %sext
 103 }
 104
 105 define i64 @count_bytes_trunc_zext() {
 106 ; CHECK-LABEL: count_bytes_trunc_zext:
 107 ; CHECK:       // %bb.0:
 108 ; CHECK-NEXT:    cntb x0
 109 ; CHECK-NEXT:    ret
 110   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
 111   %trunc = trunc i64 %cnt to i32
 112   %zext = zext i32 %trunc to i64
 113   ret i64 %zext
 114 }
 115
 116 define i64 @count_halfs_trunc_zext() {
 117 ; CHECK-LABEL: count_halfs_trunc_zext:
 118 ; CHECK:       // %bb.0:
 119 ; CHECK-NEXT:    cnth x0
 120 ; CHECK-NEXT:    ret
 121   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
 122   %trunc = trunc i64 %cnt to i32
 123   %zext = zext i32 %trunc to i64
 124   ret i64 %zext
 125 }
 126
 127 define i64 @count_words_trunc_zext() {
 128 ; CHECK-LABEL: count_words_trunc_zext:
 129 ; CHECK:       // %bb.0:
 130 ; CHECK-NEXT:    cntw x0
 131 ; CHECK-NEXT:    ret
 132   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
 133   %trunc = trunc i64 %cnt to i32
 134   %zext = zext i32 %trunc to i64
 135   ret i64 %zext
 136 }
 137
 138 define i64 @count_doubles_trunc_zext() {
 139 ; CHECK-LABEL: count_doubles_trunc_zext:
 140 ; CHECK:       // %bb.0:
 141 ; CHECK-NEXT:    cntd x0
 142 ; CHECK-NEXT:    ret
 143   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
 144   %trunc = trunc i64 %cnt to i32
 145   %zext = zext i32 %trunc to i64
 146   ret i64 %zext
 147 }
 148
 149 define i64 @count_bytes_trunc_sext() {
 150 ; CHECK-LABEL: count_bytes_trunc_sext:
 151 ; CHECK:       // %bb.0:
 152 ; CHECK-NEXT:    cntb x0
 153 ; CHECK-NEXT:    ret
 154   %cnt = call i64 @llvm.aarch64.sve.cntb(i32 31)
 155   %trunc = trunc i64 %cnt to i32
 156   %sext = sext i32 %trunc to i64
 157   ret i64 %sext
 158 }
 159
 160 define i64 @count_halfs_trunc_sext() {
 161 ; CHECK-LABEL: count_halfs_trunc_sext:
 162 ; CHECK:       // %bb.0:
 163 ; CHECK-NEXT:    cnth x0
 164 ; CHECK-NEXT:    ret
 165   %cnt = call i64 @llvm.aarch64.sve.cnth(i32 31)
 166   %trunc = trunc i64 %cnt to i32
 167   %sext = sext i32 %trunc to i64
 168   ret i64 %sext
 169 }
 170
 171 define i64 @count_words_trunc_sext() {
 172 ; CHECK-LABEL: count_words_trunc_sext:
 173 ; CHECK:       // %bb.0:
 174 ; CHECK-NEXT:    cntw x0
 175 ; CHECK-NEXT:    ret
 176   %cnt = call i64 @llvm.aarch64.sve.cntw(i32 31)
 177   %trunc = trunc i64 %cnt to i32
 178   %sext = sext i32 %trunc to i64
 179   ret i64 %sext
 180 }
 181
 182 define i64 @count_doubles_trunc_sext() {
 183 ; CHECK-LABEL: count_doubles_trunc_sext:
 184 ; CHECK:       // %bb.0:
 185 ; CHECK-NEXT:    cntd x0
 186 ; CHECK-NEXT:    ret
 187   %cnt = call i64 @llvm.aarch64.sve.cntd(i32 31)
 188   %trunc = trunc i64 %cnt to i32
 189   %sext = sext i32 %trunc to i64
 190   ret i64 %sext
 191 }
 192
 193 define i32 @vscale_with_multiplier() vscale_range(1,16) {
 194 ; CHECK-LABEL: vscale_with_multiplier:
 195 ; CHECK:       // %bb.0:
 196 ; CHECK-NEXT:    rdvl x8, #1
 197 ; CHECK-NEXT:    mov w9, #5 // =0x5
 198 ; CHECK-NEXT:    lsr x8, x8, #4
 199 ; CHECK-NEXT:    mul x8, x8, x9
 200 ; CHECK-NEXT:    and w9, w8, #0x3f
 201 ; CHECK-NEXT:    add w0, w8, w9
 202 ; CHECK-NEXT:    ret
 203   %vscale = call i32 @llvm.vscale.i32()
 204   %mul = mul i32 %vscale, 5
 205   %and_redundant = and i32 %mul, 127
 206   %and_required = and i32 %mul, 63
 207   %result = add i32 %and_redundant, %and_required
 208   ret i32 %result
 209 }
 210
 211 define i32 @vscale_with_negative_multiplier() vscale_range(1,16) {
 212 ; CHECK-LABEL: vscale_with_negative_multiplier:
 213 ; CHECK:       // %bb.0:
 214 ; CHECK-NEXT:    rdvl x8, #1
 215 ; CHECK-NEXT:    mov x9, #-5 // =0xfffffffffffffffb
 216 ; CHECK-NEXT:    lsr x8, x8, #4
 217 ; CHECK-NEXT:    mul x8, x8, x9
 218 ; CHECK-NEXT:    and w9, w8, #0xffffffc0
 219 ; CHECK-NEXT:    add w0, w8, w9
 220 ; CHECK-NEXT:    ret
 221   %vscale = call i32 @llvm.vscale.i32()
 222   %mul = mul i32 %vscale, -5
 223   %or_redundant = or i32 %mul, 4294967168
 224   %or_required = and i32 %mul, 4294967232
 225   %result = add i32 %or_redundant, %or_required
 226   ret i32 %result
 227 }
 228
 229 define i32 @pow2_vscale_with_negative_multiplier() vscale_range(1,16) {
 230 ; CHECK-LABEL: pow2_vscale_with_negative_multiplier:
 231 ; CHECK:       // %bb.0:
 232 ; CHECK-NEXT:    cntd x8
 233 ; CHECK-NEXT:    neg x9, x8
 234 ; CHECK-NEXT:    orr w9, w9, #0xfffffff0
 235 ; CHECK-NEXT:    sub w0, w9, w8
 236 ; CHECK-NEXT:    ret
 237   %vscale = call i32 @llvm.vscale.i32()
 238   %mul = mul i32 %vscale, -2
 239   %or_redundant = or i32 %mul, 4294967264
 240   %or_required = or i32 %mul, 4294967280
 241   %result = add i32 %or_redundant, %or_required
 242   ret i32 %result
 243 }
 244
 245 declare i32 @llvm.vscale.i32()
 246 declare i64 @llvm.aarch64.sve.cntb(i32 %pattern)
 247 declare i64 @llvm.aarch64.sve.cnth(i32 %pattern)
 248 declare i64 @llvm.aarch64.sve.cntw(i32 %pattern)
 249 declare i64 @llvm.aarch64.sve.cntd(i32 %pattern)