llvm/test/Transforms/InstCombine/bit_ceil.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
   3
   4 ; std::bit_ceil<uint32_t>(x)
   5 define i32 @bit_ceil_32(i32 %x) {
   6 ; CHECK-LABEL: @bit_ceil_32(
   7 ; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[X:%.*]], -1
   8 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0:![0-9]+]]
   9 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]]
  10 ; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 31
  11 ; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i32 1, [[TMP2]]
  12 ; CHECK-NEXT:    ret i32 [[SEL]]
  13 ;
  14   %dec = add i32 %x, -1
  15   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
  16   %sub = sub i32 32, %ctlz
  17   %shl = shl i32 1, %sub
  18   %ugt = icmp ugt i32 %x, 1
  19   %sel = select i1 %ugt, i32 %shl, i32 1
  20   ret i32 %sel
  21 }
  22
  23 ; std::bit_ceil<uint64_t>(x)
  24 define i64 @bit_ceil_64(i64 %x) {
  25 ; CHECK-LABEL: @bit_ceil_64(
  26 ; CHECK-NEXT:    [[DEC:%.*]] = add i64 [[X:%.*]], -1
  27 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i64 @llvm.ctlz.i64(i64 [[DEC]], i1 false), !range [[RNG1:![0-9]+]]
  28 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw i64 0, [[CTLZ]]
  29 ; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], 63
  30 ; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i64 1, [[TMP2]]
  31 ; CHECK-NEXT:    ret i64 [[SEL]]
  32 ;
  33   %dec = add i64 %x, -1
  34   %ctlz = tail call i64 @llvm.ctlz.i64(i64 %dec, i1 false)
  35   %sub = sub i64 64, %ctlz
  36   %shl = shl i64 1, %sub
  37   %ugt = icmp ugt i64 %x, 1
  38   %sel = select i1 %ugt, i64 %shl, i64 1
  39   ret i64 %sel
  40 }
  41
  42 ; std::bit_ceil<uint32_t>(x - 1)
  43 define i32 @bit_ceil_32_minus_1(i32 %x) {
  44 ; CHECK-LABEL: @bit_ceil_32_minus_1(
  45 ; CHECK-NEXT:  entry:
  46 ; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[X:%.*]], -2
  47 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]]
  48 ; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
  49 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 31
  50 ; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i32 1, [[TMP1]]
  51 ; CHECK-NEXT:    ret i32 [[SEL]]
  52 ;
  53 entry:
  54   %sub = add i32 %x, -2
  55   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
  56   %sub2 = sub nuw nsw i32 32, %ctlz
  57   %shl = shl nuw i32 1, %sub2
  58   %add = add i32 %x, -3
  59   %ult = icmp ult i32 %add, -2
  60   %sel = select i1 %ult, i32 %shl, i32 1
  61   ret i32 %sel
  62 }
  63
  64 ; std::bit_ceil<uint32_t>(x + 1)
  65 define i32 @bit_ceil_32_plus_1(i32 %x) {
  66 ; CHECK-LABEL: @bit_ceil_32_plus_1(
  67 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 false), !range [[RNG0]]
  68 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw i32 0, [[CTLZ]]
  69 ; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], 31
  70 ; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i32 1, [[TMP2]]
  71 ; CHECK-NEXT:    ret i32 [[SEL]]
  72 ;
  73   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %x, i1 false)
  74   %sub = sub i32 32, %ctlz
  75   %shl = shl i32 1, %sub
  76   %dec = add i32 %x, -1
  77   %ult = icmp ult i32 %dec, -2
  78   %sel = select i1 %ult, i32 %shl, i32 1
  79   ret i32 %sel
  80 }
  81
  82 ; std::bit_ceil<uint32_t>(x + 2)
  83 define i32 @bit_ceil_plus_2(i32 %x) {
  84 ; CHECK-LABEL: @bit_ceil_plus_2(
  85 ; CHECK-NEXT:  entry:
  86 ; CHECK-NEXT:    [[SUB:%.*]] = add i32 [[X:%.*]], 1
  87 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]]
  88 ; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
  89 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 31
  90 ; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i32 1, [[TMP1]]
  91 ; CHECK-NEXT:    ret i32 [[SEL]]
  92 ;
  93 entry:
  94   %sub = add i32 %x, 1
  95   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
  96   %sub2 = sub nuw nsw i32 32, %ctlz
  97   %shl = shl nuw i32 1, %sub2
  98   %ult = icmp ult i32 %x, -2
  99   %sel = select i1 %ult, i32 %shl, i32 1
 100   ret i32 %sel
 101 }
 102
 103 ; std::bit_ceil<uint32_t>(-x)
 104 define i32 @bit_ceil_32_neg(i32 %x) {
 105 ; CHECK-LABEL: @bit_ceil_32_neg(
 106 ; CHECK-NEXT:  entry:
 107 ; CHECK-NEXT:    [[SUB:%.*]] = xor i32 [[X:%.*]], -1
 108 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]]
 109 ; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
 110 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 31
 111 ; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i32 1, [[TMP1]]
 112 ; CHECK-NEXT:    ret i32 [[SEL]]
 113 ;
 114 entry:
 115   %sub = xor i32 %x, -1
 116   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
 117   %sub2 = sub nuw nsw i32 32, %ctlz
 118   %shl = shl nuw i32 1, %sub2
 119   %notsub = add i32 %x, -1
 120   %ult = icmp ult i32 %notsub, -2
 121   %sel = select i1 %ult, i32 %shl, i32 1
 122   ret i32 %sel
 123 }
 124
 125 ; std::bit_ceil<uint32_t>(~x)
 126 define i32 @bit_ceil_not(i32 %x) {
 127 ; CHECK-LABEL: @bit_ceil_not(
 128 ; CHECK-NEXT:  entry:
 129 ; CHECK-NEXT:    [[SUB:%.*]] = sub i32 -2, [[X:%.*]]
 130 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[SUB]], i1 false), !range [[RNG0]]
 131 ; CHECK-NEXT:    [[TMP0:%.*]] = sub nsw i32 0, [[CTLZ]]
 132 ; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 31
 133 ; CHECK-NEXT:    [[SEL:%.*]] = shl nuw i32 1, [[TMP1]]
 134 ; CHECK-NEXT:    ret i32 [[SEL]]
 135 ;
 136 entry:
 137   %sub = sub i32 -2, %x
 138   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %sub, i1 false)
 139   %sub2 = sub nuw nsw i32 32, %ctlz
 140   %shl = shl nuw i32 1, %sub2
 141   %ult = icmp ult i32 %x, -2
 142   %sel = select i1 %ult, i32 %shl, i32 1
 143   ret i32 %sel
 144 }
 145
 146 ; Commuted select operands should still be recognized.
 147 define i32 @bit_ceil_commuted_operands(i32 %x) {
 148 ; CHECK-LABEL: @bit_ceil_commuted_operands(
 149 ; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[X:%.*]], -1
 150 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]]
 151 ; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]]
 152 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB]]
 153 ; CHECK-NEXT:    ret i32 [[SHL]]
 154 ;
 155   %dec = add i32 %x, -1
 156   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
 157   %sub = sub i32 32, %ctlz
 158   %shl = shl i32 1, %sub
 159   %eq = icmp eq i32 %dec, 0
 160   %sel = select i1 %eq, i32 1, i32 %shl
 161   ret i32 %sel
 162 }
 163
 164 ; Negative test: wrong select constant
 165 define i32 @bit_ceil_wrong_select_constant(i32 %x) {
 166 ; CHECK-LABEL: @bit_ceil_wrong_select_constant(
 167 ; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[X:%.*]], -1
 168 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]]
 169 ; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]]
 170 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB]]
 171 ; CHECK-NEXT:    [[UGT_INV:%.*]] = icmp ult i32 [[X]], 2
 172 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[UGT_INV]], i32 2, i32 [[SHL]]
 173 ; CHECK-NEXT:    ret i32 [[SEL]]
 174 ;
 175   %dec = add i32 %x, -1
 176   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
 177   %sub = sub i32 32, %ctlz
 178   %shl = shl i32 1, %sub
 179   %ugt = icmp ugt i32 %x, 1
 180   %sel = select i1 %ugt, i32 %shl, i32 2
 181   ret i32 %sel
 182 }
 183
 184 ; Negative test: select condition != false does not guarantee ctlz being either 0 or 32
 185 define i32 @bit_ceil_32_wrong_cond(i32 %x) {
 186 ; CHECK-LABEL: @bit_ceil_32_wrong_cond(
 187 ; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[X:%.*]], -1
 188 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]]
 189 ; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]]
 190 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB]]
 191 ; CHECK-NEXT:    [[UGT:%.*]] = icmp ugt i32 [[X]], 2
 192 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1
 193 ; CHECK-NEXT:    ret i32 [[SEL]]
 194 ;
 195   %dec = add i32 %x, -1
 196   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
 197   %sub = sub i32 32, %ctlz
 198   %shl = shl i32 1, %sub
 199   %ugt = icmp ugt i32 %x, 2
 200   %sel = select i1 %ugt, i32 %shl, i32 1
 201   ret i32 %sel
 202 }
 203
 204 ; Negative test: wrong sub constant
 205 define i32 @bit_ceil_wrong_sub_constant(i32 %x) {
 206 ; CHECK-LABEL: @bit_ceil_wrong_sub_constant(
 207 ; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[X:%.*]], -1
 208 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]]
 209 ; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 33, [[CTLZ]]
 210 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB]]
 211 ; CHECK-NEXT:    [[UGT:%.*]] = icmp ugt i32 [[X]], 1
 212 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1
 213 ; CHECK-NEXT:    ret i32 [[SEL]]
 214 ;
 215   %dec = add i32 %x, -1
 216   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
 217   %sub = sub i32 33, %ctlz
 218   %shl = shl i32 1, %sub
 219   %ugt = icmp ugt i32 %x, 1
 220   %sel = select i1 %ugt, i32 %shl, i32 1
 221   ret i32 %sel
 222 }
 223
 224 ; Negative test: the shl result used twice
 225 define i32 @bit_ceil_32_shl_used_twice(i32 %x, ptr %p) {
 226 ; CHECK-LABEL: @bit_ceil_32_shl_used_twice(
 227 ; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[X:%.*]], -1
 228 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]]
 229 ; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]]
 230 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB]]
 231 ; CHECK-NEXT:    [[UGT:%.*]] = icmp ugt i32 [[X]], 1
 232 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1
 233 ; CHECK-NEXT:    store i32 [[SHL]], ptr [[P:%.*]], align 4
 234 ; CHECK-NEXT:    ret i32 [[SEL]]
 235 ;
 236   %dec = add i32 %x, -1
 237   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
 238   %sub = sub i32 32, %ctlz
 239   %shl = shl i32 1, %sub
 240   %ugt = icmp ugt i32 %x, 1
 241   %sel = select i1 %ugt, i32 %shl, i32 1
 242   store i32 %shl, ptr %p, align 4
 243   ret i32 %sel
 244 }
 245
 246 ; Negative test: the sub result used twice
 247 define i32 @bit_ceil_32_sub_used_twice(i32 %x, ptr %p) {
 248 ; CHECK-LABEL: @bit_ceil_32_sub_used_twice(
 249 ; CHECK-NEXT:    [[DEC:%.*]] = add i32 [[X:%.*]], -1
 250 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call i32 @llvm.ctlz.i32(i32 [[DEC]], i1 false), !range [[RNG0]]
 251 ; CHECK-NEXT:    [[SUB:%.*]] = sub nuw nsw i32 32, [[CTLZ]]
 252 ; CHECK-NEXT:    [[SHL:%.*]] = shl nuw i32 1, [[SUB]]
 253 ; CHECK-NEXT:    [[UGT:%.*]] = icmp ugt i32 [[X]], 1
 254 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[UGT]], i32 [[SHL]], i32 1
 255 ; CHECK-NEXT:    store i32 [[SUB]], ptr [[P:%.*]], align 4
 256 ; CHECK-NEXT:    ret i32 [[SEL]]
 257 ;
 258   %dec = add i32 %x, -1
 259   %ctlz = tail call i32 @llvm.ctlz.i32(i32 %dec, i1 false)
 260   %sub = sub i32 32, %ctlz
 261   %shl = shl i32 1, %sub
 262   %ugt = icmp ugt i32 %x, 1
 263   %sel = select i1 %ugt, i32 %shl, i32 1
 264   store i32 %sub, ptr %p, align 4
 265   ret i32 %sel
 266 }
 267
 268 ; a vector version of @bit_ceil_32 above
 269 define <4 x i32> @bit_ceil_v4i32(<4 x i32> %x) {
 270 ; CHECK-LABEL: @bit_ceil_v4i32(
 271 ; CHECK-NEXT:    [[DEC:%.*]] = add <4 x i32> [[X:%.*]], <i32 -1, i32 -1, i32 -1, i32 -1>
 272 ; CHECK-NEXT:    [[CTLZ:%.*]] = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> [[DEC]], i1 false), !range [[RNG0]]
 273 ; CHECK-NEXT:    [[TMP1:%.*]] = sub nsw <4 x i32> zeroinitializer, [[CTLZ]]
 274 ; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 31, i32 31, i32 31, i32 31>
 275 ; CHECK-NEXT:    [[SEL:%.*]] = shl nuw <4 x i32> <i32 1, i32 1, i32 1, i32 1>, [[TMP2]]
 276 ; CHECK-NEXT:    ret <4 x i32> [[SEL]]
 277 ;
 278   %dec = add <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
 279   %ctlz = tail call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %dec, i1 false)
 280   %sub = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %ctlz
 281   %shl = shl <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %sub
 282   %ugt = icmp ugt <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
 283   %sel = select <4 x i1> %ugt, <4 x i32> %shl, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
 284   ret <4 x i32> %sel
 285 }
 286
 287 declare i32 @llvm.ctlz.i32(i32, i1 immarg)
 288 declare i64 @llvm.ctlz.i64(i64, i1 immarg)
 289 declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1)