llvm/test/Transforms/InstCombine/udiv-simplify.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt < %s -passes=instcombine -S | FileCheck %s
   3
   4 define i64 @test1(i32 %x) nounwind {
   5 ; CHECK-LABEL: @test1(
   6 ; CHECK-NEXT:    ret i64 0
   7 ;
   8   %y = lshr i32 %x, 1
   9   %r = udiv i32 %y, -1
  10   %z = sext i32 %r to i64
  11   ret i64 %z
  12 }
  13 define i64 @test2(i32 %x) nounwind {
  14 ; CHECK-LABEL: @test2(
  15 ; CHECK-NEXT:    ret i64 0
  16 ;
  17   %y = lshr i32 %x, 31
  18   %r = udiv i32 %y, 3
  19   %z = sext i32 %r to i64
  20   ret i64 %z
  21 }
  22
  23 ; The udiv instructions shouldn't be optimized away, and the
  24 ; sext instructions should be optimized to zext.
  25
  26 define i64 @test1_PR2274(i32 %x, i32 %g) nounwind {
  27 ; CHECK-LABEL: @test1_PR2274(
  28 ; CHECK-NEXT:    [[Y:%.*]] = lshr i32 [[X:%.*]], 30
  29 ; CHECK-NEXT:    [[R:%.*]] = udiv i32 [[Y]], [[G:%.*]]
  30 ; CHECK-NEXT:    [[Z:%.*]] = zext nneg i32 [[R]] to i64
  31 ; CHECK-NEXT:    ret i64 [[Z]]
  32 ;
  33   %y = lshr i32 %x, 30
  34   %r = udiv i32 %y, %g
  35   %z = sext i32 %r to i64
  36   ret i64 %z
  37 }
  38 define i64 @test2_PR2274(i32 %x, i32 %v) nounwind {
  39 ; CHECK-LABEL: @test2_PR2274(
  40 ; CHECK-NEXT:    [[Y:%.*]] = lshr i32 [[X:%.*]], 31
  41 ; CHECK-NEXT:    [[R:%.*]] = udiv i32 [[Y]], [[V:%.*]]
  42 ; CHECK-NEXT:    [[Z:%.*]] = zext nneg i32 [[R]] to i64
  43 ; CHECK-NEXT:    ret i64 [[Z]]
  44 ;
  45   %y = lshr i32 %x, 31
  46   %r = udiv i32 %y, %v
  47   %z = sext i32 %r to i64
  48   ret i64 %z
  49 }
  50
  51 ; The udiv should be simplified according to the rule:
  52 ; X udiv (C1 << N), where C1 is `1<<C2` --> X >> (N+C2)
  53 @b = external global [1 x i16]
  54
  55 define i32 @PR30366(i1 %a) {
  56 ; CHECK-LABEL: @PR30366(
  57 ; CHECK-NEXT:    [[Z:%.*]] = zext i1 [[A:%.*]] to i32
  58 ; CHECK-NEXT:    [[TMP1:%.*]] = zext nneg i16 ptrtoint (ptr @b to i16) to i32
  59 ; CHECK-NEXT:    [[D1:%.*]] = lshr i32 [[Z]], [[TMP1]]
  60 ; CHECK-NEXT:    ret i32 [[D1]]
  61 ;
  62   %z = zext i1 %a to i32
  63   %shl = shl i16 1, ptrtoint (ptr @b to i16)
  64   %z2 = zext i16 %shl to i32
  65   %d = udiv i32 %z, %z2
  66   ret i32 %d
  67 }
  68
  69 ; OSS-Fuzz #4857
  70 ; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=4857
  71 define i177 @ossfuzz_4857(i177 %X, i177 %Y) {
  72 ; CHECK-LABEL: @ossfuzz_4857(
  73 ; CHECK-NEXT:    store i1 poison, ptr undef, align 1
  74 ; CHECK-NEXT:    ret i177 0
  75 ;
  76   %B5 = udiv i177 %Y, -1
  77   %B4 = add i177 %B5, -1
  78   %B2 = add i177 %B4, -1
  79   %B6 = mul i177 %B5, %B2
  80   %B3 = add i177 %B2, %B2
  81   %B9 = xor i177 %B4, %B3
  82   %B13 = ashr i177 %Y, %B2
  83   %B22 = add i177 %B9, %B13
  84   %B1 = udiv i177 %B5, %B6
  85   %C9 = icmp ult i177 %Y, %B22
  86   store i1 %C9, ptr undef
  87   ret i177 %B1
  88 }
  89
  90 ; 2 low bits are not needed because 12 has 2 trailing zeros
  91
  92 define i8 @udiv_demanded_low_bits_set(i8 %a) {
  93 ; CHECK-LABEL: @udiv_demanded_low_bits_set(
  94 ; CHECK-NEXT:    [[U:%.*]] = udiv i8 [[A:%.*]], 12
  95 ; CHECK-NEXT:    ret i8 [[U]]
  96 ;
  97   %o = or i8 %a, 3
  98   %u = udiv i8 %o, 12
  99   ret i8 %u
 100 }
 101
 102 ; This can't divide evenly, so it is poison.
 103
 104 define i8 @udiv_exact_demanded_low_bits_set(i8 %a) {
 105 ; CHECK-LABEL: @udiv_exact_demanded_low_bits_set(
 106 ; CHECK-NEXT:    ret i8 poison
 107 ;
 108   %o = or i8 %a, 3
 109   %u = udiv exact i8 %o, 12
 110   ret i8 %u
 111 }
 112
 113 ; All high bits are set, so this simplifies.
 114
 115 define i8 @udiv_demanded_high_bits_set(i8 %x, i8 %y) {
 116 ; CHECK-LABEL: @udiv_demanded_high_bits_set(
 117 ; CHECK-NEXT:    ret i8 21
 118 ;
 119   %o = or i8 %x, -4
 120   %r = udiv i8 %o, 12
 121   ret i8 %r
 122 }
 123
 124 ; This should fold the same as above.
 125
 126 define i8 @udiv_exact_demanded_high_bits_set(i8 %x, i8 %y) {
 127 ; CHECK-LABEL: @udiv_exact_demanded_high_bits_set(
 128 ; CHECK-NEXT:    ret i8 21
 129 ;
 130   %o = or i8 %x, -4
 131   %r = udiv exact i8 %o, 12
 132   ret i8 %r
 133 }
 134
 135 ; 2 low bits are not needed because 12 has 2 trailing zeros
 136
 137 define i8 @udiv_demanded_low_bits_clear(i8 %a) {
 138 ; CHECK-LABEL: @udiv_demanded_low_bits_clear(
 139 ; CHECK-NEXT:    [[U:%.*]] = udiv i8 [[A:%.*]], 12
 140 ; CHECK-NEXT:    ret i8 [[U]]
 141 ;
 142   %o = and i8 %a, -4
 143   %u = udiv i8 %o, 12
 144   ret i8 %u
 145 }
 146
 147 ; This should fold the same as above.
 148
 149 define i8 @udiv_exact_demanded_low_bits_clear(i8 %a) {
 150 ; CHECK-LABEL: @udiv_exact_demanded_low_bits_clear(
 151 ; CHECK-NEXT:    [[U:%.*]] = udiv i8 [[A:%.*]], 12
 152 ; CHECK-NEXT:    ret i8 [[U]]
 153 ;
 154   %o = and i8 %a, -4
 155   %u = udiv exact i8 %o, 12
 156   ret i8 %u
 157 }
 158
 159 define <vscale x 1 x i32> @udiv_demanded3(<vscale x 1 x i32> %a) {
 160 ; CHECK-LABEL: @udiv_demanded3(
 161 ; CHECK-NEXT:    [[U:%.*]] = udiv <vscale x 1 x i32> [[A:%.*]], splat (i32 12)
 162 ; CHECK-NEXT:    ret <vscale x 1 x i32> [[U]]
 163 ;
 164   %o = or <vscale x 1 x i32> %a, splat (i32 3)
 165   %u = udiv <vscale x 1 x i32> %o, splat (i32 12)
 166   ret <vscale x 1 x i32> %u
 167 }
 168
 169 ; PR74242
 170 define i32 @div_by_zero_or_one_from_dom_cond(i32 %a, i32 %b) {
 171 ; CHECK-LABEL: @div_by_zero_or_one_from_dom_cond(
 172 ; CHECK-NEXT:  entry:
 173 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[A:%.*]], 1
 174 ; CHECK-NEXT:    br i1 [[CMP]], label [[JOIN:%.*]], label [[ZERO_OR_ONE:%.*]]
 175 ; CHECK:       zero_or_one:
 176 ; CHECK-NEXT:    br label [[JOIN]]
 177 ; CHECK:       join:
 178 ; CHECK-NEXT:    ret i32 [[B:%.*]]
 179 ;
 180 entry:
 181   %cmp = icmp ugt i32 %a, 1
 182   br i1 %cmp, label %join, label %zero_or_one
 183
 184 zero_or_one:
 185   %div = udiv i32 %b, %a
 186   br label %join
 187
 188 join:
 189   %res = phi i32 [ %div, %zero_or_one ], [ %b, %entry ]
 190   ret i32 %res
 191 }