llvm/test/Transforms/InstSimplify/shift-knownbits.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -passes=instsimplify -S -data-layout="E" | FileCheck %s --check-prefixes=CHECK,BIGENDIAN
   3 ; RUN: opt < %s -passes=instsimplify -S -data-layout="e" | FileCheck %s --check-prefixes=CHECK,LITTLEENDIAN
   4
   5 ; If any bits of the shift amount are known to make it exceed or equal
   6 ; the number of bits in the type, the shift causes undefined behavior.
   7
   8 define i32 @shl_amount_is_known_bogus(i32 %a, i32 %b) {
   9 ; CHECK-LABEL: @shl_amount_is_known_bogus(
  10 ; CHECK-NEXT:    ret i32 poison
  11 ;
  12   %or = or i32 %b, 32
  13   %shl = shl i32 %a, %or
  14   ret i32 %shl
  15 }
  16
  17 ; Check some weird types and the other shift ops.
  18
  19 define i31 @lshr_amount_is_known_bogus(i31 %a, i31 %b) {
  20 ; CHECK-LABEL: @lshr_amount_is_known_bogus(
  21 ; CHECK-NEXT:    ret i31 poison
  22 ;
  23   %or = or i31 %b, 31
  24   %shr = lshr i31 %a, %or
  25   ret i31 %shr
  26 }
  27
  28 define i33 @ashr_amount_is_known_bogus(i33 %a, i33 %b) {
  29 ; CHECK-LABEL: @ashr_amount_is_known_bogus(
  30 ; CHECK-NEXT:    ret i33 poison
  31 ;
  32   %or = or i33 %b, 33
  33   %shr = ashr i33 %a, %or
  34   ret i33 %shr
  35 }
  36
  37
  38 ; If all valid bits of the shift amount are known 0, there's no shift.
  39 ; It doesn't matter if high bits are set because that would be undefined.
  40 ; Therefore, the only possible valid result of these shifts is %a.
  41
  42 define i16 @ashr_amount_is_zero(i16 %a, i16 %b) {
  43 ; CHECK-LABEL: @ashr_amount_is_zero(
  44 ; CHECK-NEXT:    ret i16 [[A:%.*]]
  45 ;
  46   %and = and i16 %b, 65520 ; 0xfff0
  47   %shr = ashr i16 %a, %and
  48   ret i16 %shr
  49 }
  50
  51 define i300 @lshr_amount_is_zero(i300 %a, i300 %b) {
  52 ; CHECK-LABEL: @lshr_amount_is_zero(
  53 ; CHECK-NEXT:    ret i300 [[A:%.*]]
  54 ;
  55   %and = and i300 %b, 2048
  56   %shr = lshr i300 %a, %and
  57   ret i300 %shr
  58 }
  59
  60 define i9 @shl_amount_is_zero(i9 %a, i9 %b) {
  61 ; CHECK-LABEL: @shl_amount_is_zero(
  62 ; CHECK-NEXT:    ret i9 [[A:%.*]]
  63 ;
  64   %and = and i9 %b, 496 ; 0x1f0
  65   %shl = shl i9 %a, %and
  66   ret i9 %shl
  67 }
  68
  69
  70 ; Verify that we've calculated the log2 boundary of valid bits correctly for a weird type.
  71
  72 define i9 @shl_amount_is_not_known_zero(i9 %a, i9 %b) {
  73 ; CHECK-LABEL: @shl_amount_is_not_known_zero(
  74 ; CHECK-NEXT:    [[AND:%.*]] = and i9 [[B:%.*]], -8
  75 ; CHECK-NEXT:    [[SHL:%.*]] = shl i9 [[A:%.*]], [[AND]]
  76 ; CHECK-NEXT:    ret i9 [[SHL]]
  77 ;
  78   %and = and i9 %b, 504 ; 0x1f8
  79   %shl = shl i9 %a, %and
  80   ret i9 %shl
  81 }
  82
  83
  84 ; For vectors, we need all scalar elements to meet the requirements to optimize.
  85
  86 define <2 x i32> @ashr_vector_bogus(<2 x i32> %a, <2 x i32> %b) {
  87 ; CHECK-LABEL: @ashr_vector_bogus(
  88 ; CHECK-NEXT:    ret <2 x i32> poison
  89 ;
  90   %or = or <2 x i32> %b, <i32 32, i32 32>
  91   %shr = ashr <2 x i32> %a, %or
  92   ret <2 x i32> %shr
  93 }
  94
  95 ; FIXME: This is undef, but computeKnownBits doesn't handle the union.
  96 define <2 x i32> @shl_vector_bogus(<2 x i32> %a, <2 x i32> %b) {
  97 ; CHECK-LABEL: @shl_vector_bogus(
  98 ; CHECK-NEXT:    [[OR:%.*]] = or <2 x i32> [[B:%.*]], <i32 32, i32 64>
  99 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[OR]]
 100 ; CHECK-NEXT:    ret <2 x i32> [[SHL]]
 101 ;
 102   %or = or <2 x i32> %b, <i32 32, i32 64>
 103   %shl = shl <2 x i32> %a, %or
 104   ret <2 x i32> %shl
 105 }
 106
 107 define <2 x i32> @lshr_vector_zero(<2 x i32> %a, <2 x i32> %b) {
 108 ; CHECK-LABEL: @lshr_vector_zero(
 109 ; CHECK-NEXT:    ret <2 x i32> [[A:%.*]]
 110 ;
 111   %and = and <2 x i32> %b, <i32 64, i32 256>
 112   %shr = lshr <2 x i32> %a, %and
 113   ret <2 x i32> %shr
 114 }
 115
 116 ; Make sure that weird vector types work too.
 117 define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) {
 118 ; CHECK-LABEL: @shl_vector_zero(
 119 ; CHECK-NEXT:    ret <2 x i15> [[A:%.*]]
 120 ;
 121   %and = and <2 x i15> %b, <i15 1024, i15 1024>
 122   %shl = shl <2 x i15> %a, %and
 123   ret <2 x i15> %shl
 124 }
 125
 126 define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) {
 127 ; CHECK-LABEL: @shl_vector_for_real(
 128 ; CHECK-NEXT:    [[AND:%.*]] = and <2 x i32> [[B:%.*]], <i32 3, i32 3>
 129 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i32> [[A:%.*]], [[AND]]
 130 ; CHECK-NEXT:    ret <2 x i32> [[SHL]]
 131 ;
 132   %and = and <2 x i32> %b, <i32 3, i32 3> ; a necessary mask op
 133   %shl = shl <2 x i32> %a, %and
 134   ret <2 x i32> %shl
 135 }
 136
 137
 138 ; We calculate the valid bits of the shift using log2, and log2 of 1 (the type width) is 0.
 139 ; That should be ok. Either the shift amount is 0 or invalid (1), so we can always return %a.
 140
 141 define i1 @shl_i1(i1 %a, i1 %b) {
 142 ; CHECK-LABEL: @shl_i1(
 143 ; CHECK-NEXT:    ret i1 [[A:%.*]]
 144 ;
 145   %shl = shl i1 %a, %b
 146   ret i1 %shl
 147 }
 148
 149 ; The following cases only get folded by InstCombine,
 150 ; see InstCombine/lshr.ll.
 151
 152 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
 153 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
 154 declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone
 155 declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
 156
 157 define i32 @lshr_ctlz_zero_is_undef(i32 %x) {
 158 ; CHECK-LABEL: @lshr_ctlz_zero_is_undef(
 159 ; CHECK-NEXT:    [[CT:%.*]] = call i32 @llvm.ctlz.i32(i32 [[X:%.*]], i1 true)
 160 ; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[CT]], 5
 161 ; CHECK-NEXT:    ret i32 [[SH]]
 162 ;
 163   %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
 164   %sh = lshr i32 %ct, 5
 165   ret i32 %sh
 166 }
 167
 168 define i32 @lshr_cttz_zero_is_undef(i32 %x) {
 169 ; CHECK-LABEL: @lshr_cttz_zero_is_undef(
 170 ; CHECK-NEXT:    [[CT:%.*]] = call i32 @llvm.cttz.i32(i32 [[X:%.*]], i1 true)
 171 ; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[CT]], 5
 172 ; CHECK-NEXT:    ret i32 [[SH]]
 173 ;
 174   %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true)
 175   %sh = lshr i32 %ct, 5
 176   ret i32 %sh
 177 }
 178
 179 define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) {
 180 ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec(
 181 ; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
 182 ; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
 183 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 184 ;
 185   %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
 186   %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
 187   ret <2 x i8> %sh
 188 }
 189
 190 define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) {
 191 ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec(
 192 ; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> [[X:%.*]], i1 true)
 193 ; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0>
 194 ; CHECK-NEXT:    [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0
 195 ; CHECK-NEXT:    ret i8 [[EX]]
 196 ;
 197   %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
 198   %sh = lshr <2 x i8> %ct, <i8 3, i8 0>
 199   %ex = extractelement <2 x i8> %sh, i32 0
 200   ret i8 %ex
 201 }
 202
 203 define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) {
 204 ; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec(
 205 ; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
 206 ; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 3>
 207 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 208 ;
 209   %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
 210   %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
 211   ret <2 x i8> %sh
 212 }
 213
 214 define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) {
 215 ; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec(
 216 ; CHECK-NEXT:    [[CT:%.*]] = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> [[X:%.*]], i1 true)
 217 ; CHECK-NEXT:    [[SH:%.*]] = lshr <2 x i8> [[CT]], <i8 3, i8 0>
 218 ; CHECK-NEXT:    [[EX:%.*]] = extractelement <2 x i8> [[SH]], i32 0
 219 ; CHECK-NEXT:    ret i8 [[EX]]
 220 ;
 221   %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
 222   %sh = lshr <2 x i8> %ct, <i8 3, i8 0>
 223   %ex = extractelement <2 x i8> %sh, i32 0
 224   ret i8 %ex
 225 }
 226
 227 ; The shift amount is 0 on either of high/low bytes. The middle byte doesn't matter.
 228
 229 define i24 @bitcast_noshift_scalar(<3 x i8> %v1, i24 %v2) {
 230 ; CHECK-LABEL: @bitcast_noshift_scalar(
 231 ; CHECK-NEXT:    ret i24 [[V2:%.*]]
 232 ;
 233   %c = insertelement <3 x i8> poison, i8 0, i64 0
 234   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 3>
 235   %b = bitcast <3 x i8> %s to i24
 236   %r = shl i24 %v2, %b
 237   ret i24 %r
 238 }
 239
 240 ; The shift amount is 0 on low byte of big-endian and unknown on little-endian.
 241
 242 define i24 @bitcast_noshift_scalar_bigend(<3 x i8> %v1, i24 %v2) {
 243 ; BIGENDIAN-LABEL: @bitcast_noshift_scalar_bigend(
 244 ; BIGENDIAN-NEXT:    ret i24 [[V2:%.*]]
 245 ;
 246 ; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_bigend(
 247 ; LITTLEENDIAN-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 0, i32 1, i32 3>
 248 ; LITTLEENDIAN-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
 249 ; LITTLEENDIAN-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
 250 ; LITTLEENDIAN-NEXT:    ret i24 [[R]]
 251 ;
 252   %c = insertelement <3 x i8> poison, i8 0, i64 0
 253   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 0, i32 1, i32 3>
 254   %b = bitcast <3 x i8> %s to i24
 255   %r = shl i24 %v2, %b
 256   ret i24 %r
 257 }
 258
 259 ; The shift amount is 0 on low byte of little-endian and unknown on big-endian.
 260
 261 define i24 @bitcast_noshift_scalar_littleend(<3 x i8> %v1, i24 %v2) {
 262 ; BIGENDIAN-LABEL: @bitcast_noshift_scalar_littleend(
 263 ; BIGENDIAN-NEXT:    [[S:%.*]] = shufflevector <3 x i8> [[V1:%.*]], <3 x i8> <i8 0, i8 poison, i8 poison>, <3 x i32> <i32 3, i32 1, i32 2>
 264 ; BIGENDIAN-NEXT:    [[B:%.*]] = bitcast <3 x i8> [[S]] to i24
 265 ; BIGENDIAN-NEXT:    [[R:%.*]] = shl i24 [[V2:%.*]], [[B]]
 266 ; BIGENDIAN-NEXT:    ret i24 [[R]]
 267 ;
 268 ; LITTLEENDIAN-LABEL: @bitcast_noshift_scalar_littleend(
 269 ; LITTLEENDIAN-NEXT:    ret i24 [[V2:%.*]]
 270 ;
 271   %c = insertelement <3 x i8> poison, i8 0, i64 0
 272   %s = shufflevector <3 x i8> %v1, <3 x i8> %c, <3 x i32> <i32 3, i32 1, i32 2>
 273   %b = bitcast <3 x i8> %s to i24
 274   %r = shl i24 %v2, %b
 275   ret i24 %r
 276 }
 277
 278 ; The shift amount is known 24 on little-endian and known 24<<16 on big-endian
 279 ; across all vector elements, so it's an overshift either way.
 280
 281 define <3 x i24> @bitcast_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
 282 ; CHECK-LABEL: @bitcast_overshift_vector(
 283 ; CHECK-NEXT:    ret <3 x i24> poison
 284 ;
 285   %c = insertelement <9 x i8> poison, i8 24, i64 0
 286   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
 287   %b = bitcast <9 x i8> %s to <3 x i24>
 288   %r = shl <3 x i24> %v2, %b
 289   ret <3 x i24> %r
 290 }
 291
 292 ; The shift amount is known 23 on little-endian and known 23<<16 on big-endian
 293 ; across all vector elements, so it's an overshift for big-endian.
 294
 295 define <3 x i24> @bitcast_overshift_vector_bigend(<9 x i8> %v1, <3 x i24> %v2) {
 296 ; BIGENDIAN-LABEL: @bitcast_overshift_vector_bigend(
 297 ; BIGENDIAN-NEXT:    ret <3 x i24> poison
 298 ;
 299 ; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_bigend(
 300 ; LITTLEENDIAN-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
 301 ; LITTLEENDIAN-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
 302 ; LITTLEENDIAN-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
 303 ; LITTLEENDIAN-NEXT:    ret <3 x i24> [[R]]
 304 ;
 305   %c = insertelement <9 x i8> poison, i8 23, i64 0
 306   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 9, i32 7, i32 8>
 307   %b = bitcast <9 x i8> %s to <3 x i24>
 308   %r = shl <3 x i24> %v2, %b
 309   ret <3 x i24> %r
 310 }
 311
 312 ; The shift amount is known 23 on big-endian and known 23<<16 on little-endian
 313 ; across all vector elements, so it's an overshift for little-endian.
 314
 315 define <3 x i24> @bitcast_overshift_vector_littleend(<9 x i8> %v1, <3 x i24> %v2) {
 316 ; BIGENDIAN-LABEL: @bitcast_overshift_vector_littleend(
 317 ; BIGENDIAN-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 23, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
 318 ; BIGENDIAN-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
 319 ; BIGENDIAN-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
 320 ; BIGENDIAN-NEXT:    ret <3 x i24> [[R]]
 321 ;
 322 ; LITTLEENDIAN-LABEL: @bitcast_overshift_vector_littleend(
 323 ; LITTLEENDIAN-NEXT:    ret <3 x i24> poison
 324 ;
 325   %c = insertelement <9 x i8> poison, i8 23, i64 0
 326   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 0, i32 1, i32 9, i32 3, i32 4, i32 9, i32 6, i32 7, i32 9>
 327   %b = bitcast <9 x i8> %s to <3 x i24>
 328   %r = shl <3 x i24> %v2, %b
 329   ret <3 x i24> %r
 330 }
 331
 332 ; Negative test - the shift amount is known 24 or 24<<16 on only 2 out of 3 elements.
 333
 334 define <3 x i24> @bitcast_partial_overshift_vector(<9 x i8> %v1, <3 x i24> %v2) {
 335 ; CHECK-LABEL: @bitcast_partial_overshift_vector(
 336 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <9 x i8> [[V1:%.*]], <9 x i8> <i8 24, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7, i32 8>
 337 ; CHECK-NEXT:    [[B:%.*]] = bitcast <9 x i8> [[S]] to <3 x i24>
 338 ; CHECK-NEXT:    [[R:%.*]] = shl <3 x i24> [[V2:%.*]], [[B]]
 339 ; CHECK-NEXT:    ret <3 x i24> [[R]]
 340 ;
 341   %c = insertelement <9 x i8> poison, i8 24, i64 0
 342   %s = shufflevector <9 x i8> %v1, <9 x i8> %c, <9 x i32> <i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7, i32 8>
 343   %b = bitcast <9 x i8> %s to <3 x i24>
 344   %r = shl <3 x i24> %v2, %b
 345   ret <3 x i24> %r
 346 }
 347
 348 ; Negative test - don't know how to look through a cast with non-integer type (but we could handle this...).
 349
 350 define <1 x i64> @bitcast_noshift_vector_wrong_type(<2 x float> %v1, <1 x i64> %v2) {
 351 ; CHECK-LABEL: @bitcast_noshift_vector_wrong_type(
 352 ; CHECK-NEXT:    [[S:%.*]] = shufflevector <2 x float> [[V1:%.*]], <2 x float> <float 0.000000e+00, float poison>, <2 x i32> <i32 2, i32 1>
 353 ; CHECK-NEXT:    [[B:%.*]] = bitcast <2 x float> [[S]] to <1 x i64>
 354 ; CHECK-NEXT:    [[R:%.*]] = shl <1 x i64> [[V2:%.*]], [[B]]
 355 ; CHECK-NEXT:    ret <1 x i64> [[R]]
 356 ;
 357   %c = insertelement <2 x float> poison, float 0.0, i64 0
 358   %s = shufflevector <2 x float> %v1, <2 x float> %c, <2 x i32> <i32 2, i32 1>
 359   %b = bitcast <2 x float> %s to <1 x i64>
 360   %r = shl <1 x i64> %v2, %b
 361   ret <1 x i64> %r
 362 }