llvm/test/Transforms/InstCombine/lshr.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
   2 ; RUN: opt -instcombine -S < %s | FileCheck %s
   3
   4 target datalayout = "e-m:e-i64:64-n8:16:32:64"
   5
   6 declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
   7 declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
   8 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
   9 declare <2 x i8> @llvm.cttz.v2i8(<2 x i8>, i1) nounwind readnone
  10 declare <2 x i8> @llvm.ctlz.v2i8(<2 x i8>, i1) nounwind readnone
  11 declare <2 x i8> @llvm.ctpop.v2i8(<2 x i8>) nounwind readnone
  12
  13 define i32 @lshr_ctlz_zero_is_not_undef(i32 %x) {
  14 ; CHECK-LABEL: @lshr_ctlz_zero_is_not_undef(
  15 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0
  16 ; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
  17 ; CHECK-NEXT:    ret i32 [[SH]]
  18 ;
  19   %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 false)
  20   %sh = lshr i32 %ct, 5
  21   ret i32 %sh
  22 }
  23
  24 define i32 @lshr_cttz_zero_is_not_undef(i32 %x) {
  25 ; CHECK-LABEL: @lshr_cttz_zero_is_not_undef(
  26 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0
  27 ; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
  28 ; CHECK-NEXT:    ret i32 [[SH]]
  29 ;
  30   %ct = call i32 @llvm.cttz.i32(i32 %x, i1 false)
  31   %sh = lshr i32 %ct, 5
  32   ret i32 %sh
  33 }
  34
  35 define i32 @lshr_ctpop(i32 %x) {
  36 ; CHECK-LABEL: @lshr_ctpop(
  37 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], -1
  38 ; CHECK-NEXT:    [[SH:%.*]] = zext i1 [[TMP1]] to i32
  39 ; CHECK-NEXT:    ret i32 [[SH]]
  40 ;
  41   %ct = call i32 @llvm.ctpop.i32(i32 %x)
  42   %sh = lshr i32 %ct, 5
  43   ret i32 %sh
  44 }
  45
  46 define <2 x i8> @lshr_ctlz_zero_is_not_undef_splat_vec(<2 x i8> %x) {
  47 ; CHECK-LABEL: @lshr_ctlz_zero_is_not_undef_splat_vec(
  48 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer
  49 ; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
  50 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
  51 ;
  52   %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 false)
  53   %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
  54   ret <2 x i8> %sh
  55 }
  56
  57 define <2 x i8> @lshr_cttz_zero_is_not_undef_splat_vec(<2 x i8> %x) {
  58 ; CHECK-LABEL: @lshr_cttz_zero_is_not_undef_splat_vec(
  59 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], zeroinitializer
  60 ; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
  61 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
  62 ;
  63   %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 false)
  64   %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
  65   ret <2 x i8> %sh
  66 }
  67
  68 define <2 x i8> @lshr_ctpop_splat_vec(<2 x i8> %x) {
  69 ; CHECK-LABEL: @lshr_ctpop_splat_vec(
  70 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <2 x i8> [[X:%.*]], <i8 -1, i8 -1>
  71 ; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i8>
  72 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
  73 ;
  74   %ct = call <2 x i8> @llvm.ctpop.v2i8(<2 x i8> %x)
  75   %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
  76   ret <2 x i8> %sh
  77 }
  78
  79 define i32 @lshr_ctlz_zero_is_undef(i32 %x) {
  80 ; CHECK-LABEL: @lshr_ctlz_zero_is_undef(
  81 ; CHECK-NEXT:    ret i32 0
  82 ;
  83   %ct = call i32 @llvm.ctlz.i32(i32 %x, i1 true)
  84   %sh = lshr i32 %ct, 5
  85   ret i32 %sh
  86 }
  87
  88 define i32 @lshr_cttz_zero_is_undef(i32 %x) {
  89 ; CHECK-LABEL: @lshr_cttz_zero_is_undef(
  90 ; CHECK-NEXT:    ret i32 0
  91 ;
  92   %ct = call i32 @llvm.cttz.i32(i32 %x, i1 true)
  93   %sh = lshr i32 %ct, 5
  94   ret i32 %sh
  95 }
  96
  97 define <2 x i8> @lshr_ctlz_zero_is_undef_splat_vec(<2 x i8> %x) {
  98 ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_splat_vec(
  99 ; CHECK-NEXT:    ret <2 x i8> zeroinitializer
 100 ;
 101   %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
 102   %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
 103   ret <2 x i8> %sh
 104 }
 105
 106 define i8 @lshr_ctlz_zero_is_undef_vec(<2 x i8> %x) {
 107 ; CHECK-LABEL: @lshr_ctlz_zero_is_undef_vec(
 108 ; CHECK-NEXT:    ret i8 0
 109 ;
 110   %ct = call <2 x i8> @llvm.ctlz.v2i8(<2 x i8> %x, i1 true)
 111   %sh = lshr <2 x i8> %ct, <i8 3, i8 0>
 112   %ex = extractelement <2 x i8> %sh, i32 0
 113   ret i8 %ex
 114 }
 115
 116 define <2 x i8> @lshr_cttz_zero_is_undef_splat_vec(<2 x i8> %x) {
 117 ; CHECK-LABEL: @lshr_cttz_zero_is_undef_splat_vec(
 118 ; CHECK-NEXT:    ret <2 x i8> zeroinitializer
 119 ;
 120   %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
 121   %sh = lshr <2 x i8> %ct, <i8 3, i8 3>
 122   ret <2 x i8> %sh
 123 }
 124
 125 define i8 @lshr_cttz_zero_is_undef_vec(<2 x i8> %x) {
 126 ; CHECK-LABEL: @lshr_cttz_zero_is_undef_vec(
 127 ; CHECK-NEXT:    ret i8 0
 128 ;
 129   %ct = call <2 x i8> @llvm.cttz.v2i8(<2 x i8> %x, i1 true)
 130   %sh = lshr <2 x i8> %ct, <i8 3, i8 0>
 131   %ex = extractelement <2 x i8> %sh, i32 0
 132   ret i8 %ex
 133 }
 134
 135
 136 define i8 @lshr_exact(i8 %x) {
 137 ; CHECK-LABEL: @lshr_exact(
 138 ; CHECK-NEXT:    [[SHL:%.*]] = shl i8 [[X:%.*]], 2
 139 ; CHECK-NEXT:    [[ADD:%.*]] = add i8 [[SHL]], 4
 140 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact i8 [[ADD]], 2
 141 ; CHECK-NEXT:    ret i8 [[LSHR]]
 142 ;
 143   %shl = shl i8 %x, 2
 144   %add = add i8 %shl, 4
 145   %lshr = lshr i8 %add, 2
 146   ret i8 %lshr
 147 }
 148
 149 define <2 x i8> @lshr_exact_splat_vec(<2 x i8> %x) {
 150 ; CHECK-LABEL: @lshr_exact_splat_vec(
 151 ; CHECK-NEXT:    [[SHL:%.*]] = shl <2 x i8> [[X:%.*]], <i8 2, i8 2>
 152 ; CHECK-NEXT:    [[ADD:%.*]] = add <2 x i8> [[SHL]], <i8 4, i8 4>
 153 ; CHECK-NEXT:    [[LSHR:%.*]] = lshr exact <2 x i8> [[ADD]], <i8 2, i8 2>
 154 ; CHECK-NEXT:    ret <2 x i8> [[LSHR]]
 155 ;
 156   %shl = shl <2 x i8> %x, <i8 2, i8 2>
 157   %add = add <2 x i8> %shl, <i8 4, i8 4>
 158   %lshr = lshr <2 x i8> %add, <i8 2, i8 2>
 159   ret <2 x i8> %lshr
 160 }
 161
 162 define i16 @bool_zext(i1 %x) {
 163 ; CHECK-LABEL: @bool_zext(
 164 ; CHECK-NEXT:    [[HIBIT:%.*]] = zext i1 [[X:%.*]] to i16
 165 ; CHECK-NEXT:    ret i16 [[HIBIT]]
 166 ;
 167   %sext = sext i1 %x to i16
 168   %hibit = lshr i16 %sext, 15
 169   ret i16 %hibit
 170 }
 171
 172 define <2 x i8> @bool_zext_splat(<2 x i1> %x) {
 173 ; CHECK-LABEL: @bool_zext_splat(
 174 ; CHECK-NEXT:    [[HIBIT:%.*]] = zext <2 x i1> [[X:%.*]] to <2 x i8>
 175 ; CHECK-NEXT:    ret <2 x i8> [[HIBIT]]
 176 ;
 177   %sext = sext <2 x i1> %x to <2 x i8>
 178   %hibit = lshr <2 x i8> %sext, <i8 7, i8 7>
 179   ret <2 x i8> %hibit
 180 }
 181
 182 define i32 @smear_sign_and_widen(i8 %x) {
 183 ; CHECK-LABEL: @smear_sign_and_widen(
 184 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr i8 [[X:%.*]], 7
 185 ; CHECK-NEXT:    [[HIBIT:%.*]] = zext i8 [[TMP1]] to i32
 186 ; CHECK-NEXT:    ret i32 [[HIBIT]]
 187 ;
 188   %sext = sext i8 %x to i32
 189   %hibit = lshr i32 %sext, 24
 190   ret i32 %hibit
 191 }
 192
 193 define i16 @smear_sign_and_widen_should_not_change_type(i4 %x) {
 194 ; CHECK-LABEL: @smear_sign_and_widen_should_not_change_type(
 195 ; CHECK-NEXT:    [[SEXT:%.*]] = sext i4 [[X:%.*]] to i16
 196 ; CHECK-NEXT:    [[HIBIT:%.*]] = lshr i16 [[SEXT]], 12
 197 ; CHECK-NEXT:    ret i16 [[HIBIT]]
 198 ;
 199   %sext = sext i4 %x to i16
 200   %hibit = lshr i16 %sext, 12
 201   ret i16 %hibit
 202 }
 203
 204 define <2 x i8> @smear_sign_and_widen_splat(<2 x i6> %x) {
 205 ; CHECK-LABEL: @smear_sign_and_widen_splat(
 206 ; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i6> [[X:%.*]], <i6 2, i6 2>
 207 ; CHECK-NEXT:    [[HIBIT:%.*]] = zext <2 x i6> [[TMP1]] to <2 x i8>
 208 ; CHECK-NEXT:    ret <2 x i8> [[HIBIT]]
 209 ;
 210   %sext = sext <2 x i6> %x to <2 x i8>
 211   %hibit = lshr <2 x i8> %sext, <i8 2, i8 2>
 212   ret <2 x i8> %hibit
 213 }
 214
 215 define i18 @fake_sext(i3 %x) {
 216 ; CHECK-LABEL: @fake_sext(
 217 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i3 [[X:%.*]], 2
 218 ; CHECK-NEXT:    [[SH:%.*]] = zext i3 [[TMP1]] to i18
 219 ; CHECK-NEXT:    ret i18 [[SH]]
 220 ;
 221   %sext = sext i3 %x to i18
 222   %sh = lshr i18 %sext, 17
 223   ret i18 %sh
 224 }
 225
 226 ; Avoid the transform if it would change the shift from a legal to illegal type.
 227
 228 define i32 @fake_sext_but_should_not_change_type(i3 %x) {
 229 ; CHECK-LABEL: @fake_sext_but_should_not_change_type(
 230 ; CHECK-NEXT:    [[SEXT:%.*]] = sext i3 [[X:%.*]] to i32
 231 ; CHECK-NEXT:    [[SH:%.*]] = lshr i32 [[SEXT]], 31
 232 ; CHECK-NEXT:    ret i32 [[SH]]
 233 ;
 234   %sext = sext i3 %x to i32
 235   %sh = lshr i32 %sext, 31
 236   ret i32 %sh
 237 }
 238
 239 define <2 x i8> @fake_sext_splat(<2 x i3> %x) {
 240 ; CHECK-LABEL: @fake_sext_splat(
 241 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i3> [[X:%.*]], <i3 2, i3 2>
 242 ; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i3> [[TMP1]] to <2 x i8>
 243 ; CHECK-NEXT:    ret <2 x i8> [[SH]]
 244 ;
 245   %sext = sext <2 x i3> %x to <2 x i8>
 246   %sh = lshr <2 x i8> %sext, <i8 7, i8 7>
 247   ret <2 x i8> %sh
 248 }
 249
 250 ; Use a narrow shift: lshr (zext iM X to iN), C --> zext (lshr X, C) to iN
 251
 252 define <2 x i32> @narrow_lshr_constant(<2 x i8> %x, <2 x i8> %y) {
 253 ; CHECK-LABEL: @narrow_lshr_constant(
 254 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i8> [[X:%.*]], <i8 3, i8 3>
 255 ; CHECK-NEXT:    [[SH:%.*]] = zext <2 x i8> [[TMP1]] to <2 x i32>
 256 ; CHECK-NEXT:    ret <2 x i32> [[SH]]
 257 ;
 258   %zx = zext <2 x i8> %x to <2 x i32>
 259   %sh = lshr <2 x i32> %zx, <i32 3, i32 3>
 260   ret <2 x i32> %sh
 261 }
 262
 263 define i32 @mul_splat_fold(i32 %x) {
 264 ; CHECK-LABEL: @mul_splat_fold(
 265 ; CHECK-NEXT:    [[T:%.*]] = and i32 [[X:%.*]], 65535
 266 ; CHECK-NEXT:    ret i32 [[T]]
 267 ;
 268   %m = mul nuw i32 %x, 65537
 269   %t = lshr i32 %m, 16
 270   ret i32 %t
 271 }
 272
 273 ; Vector type, extra use, weird types are all ok.
 274
 275 declare void @usevec(<3 x i14>)
 276
 277 define <3 x i14> @mul_splat_fold_vec(<3 x i14> %x) {
 278 ; CHECK-LABEL: @mul_splat_fold_vec(
 279 ; CHECK-NEXT:    [[M:%.*]] = mul nuw <3 x i14> [[X:%.*]], <i14 129, i14 129, i14 129>
 280 ; CHECK-NEXT:    call void @usevec(<3 x i14> [[M]])
 281 ; CHECK-NEXT:    [[T:%.*]] = and <3 x i14> [[X]], <i14 127, i14 127, i14 127>
 282 ; CHECK-NEXT:    ret <3 x i14> [[T]]
 283 ;
 284   %m = mul nuw <3 x i14> %x, <i14 129, i14 129, i14 129>
 285   call void @usevec(<3 x i14> %m)
 286   %t = lshr <3 x i14> %m, <i14 7, i14 7, i14 7>
 287   ret <3 x i14> %t
 288 }
 289
 290 ; Negative test
 291
 292 define i32 @mul_splat_fold_wrong_mul_const(i32 %x) {
 293 ; CHECK-LABEL: @mul_splat_fold_wrong_mul_const(
 294 ; CHECK-NEXT:    [[M:%.*]] = mul nuw i32 [[X:%.*]], 65538
 295 ; CHECK-NEXT:    [[T:%.*]] = lshr i32 [[M]], 16
 296 ; CHECK-NEXT:    ret i32 [[T]]
 297 ;
 298   %m = mul nuw i32 %x, 65538
 299   %t = lshr i32 %m, 16
 300   ret i32 %t
 301 }
 302
 303 ; Negative test
 304
 305 define i32 @mul_splat_fold_wrong_lshr_const(i32 %x) {
 306 ; CHECK-LABEL: @mul_splat_fold_wrong_lshr_const(
 307 ; CHECK-NEXT:    [[M:%.*]] = mul nuw i32 [[X:%.*]], 65537
 308 ; CHECK-NEXT:    [[T:%.*]] = lshr i32 [[M]], 15
 309 ; CHECK-NEXT:    ret i32 [[T]]
 310 ;
 311   %m = mul nuw i32 %x, 65537
 312   %t = lshr i32 %m, 15
 313   ret i32 %t
 314 }
 315
 316 ; Negative test
 317
 318 define i32 @mul_splat_fold_no_nuw(i32 %x) {
 319 ; CHECK-LABEL: @mul_splat_fold_no_nuw(
 320 ; CHECK-NEXT:    [[M:%.*]] = mul nsw i32 [[X:%.*]], 65537
 321 ; CHECK-NEXT:    [[T:%.*]] = lshr i32 [[M]], 16
 322 ; CHECK-NEXT:    ret i32 [[T]]
 323 ;
 324   %m = mul nsw i32 %x, 65537
 325   %t = lshr i32 %m, 16
 326   ret i32 %t
 327 }
 328
 329 define i32 @negative_and_odd(i32 %x) {
 330 ; CHECK-LABEL: @negative_and_odd(
 331 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[X:%.*]], 31
 332 ; CHECK-NEXT:    [[R:%.*]] = and i32 [[TMP1]], [[X]]
 333 ; CHECK-NEXT:    ret i32 [[R]]
 334 ;
 335   %s = srem i32 %x, 2
 336   %r = lshr i32 %s, 31
 337   ret i32 %r
 338 }
 339
 340 define <2 x i7> @negative_and_odd_vec(<2 x i7> %x) {
 341 ; CHECK-LABEL: @negative_and_odd_vec(
 342 ; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i7> [[X:%.*]], <i7 6, i7 6>
 343 ; CHECK-NEXT:    [[R:%.*]] = and <2 x i7> [[TMP1]], [[X]]
 344 ; CHECK-NEXT:    ret <2 x i7> [[R]]
 345 ;
 346   %s = srem <2 x i7> %x, <i7 2, i7 2>
 347   %r = lshr <2 x i7> %s, <i7 6, i7 6>
 348   ret <2 x i7> %r
 349 }
 350
 351 ; Negative test - this is still worth trying to avoid srem?
 352
 353 define i32 @negative_and_odd_uses(i32 %x, i32* %p) {
 354 ; CHECK-LABEL: @negative_and_odd_uses(
 355 ; CHECK-NEXT:    [[S:%.*]] = srem i32 [[X:%.*]], 2
 356 ; CHECK-NEXT:    store i32 [[S]], i32* [[P:%.*]], align 4
 357 ; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[S]], 31
 358 ; CHECK-NEXT:    ret i32 [[R]]
 359 ;
 360   %s = srem i32 %x, 2
 361   store i32 %s, i32* %p
 362   %r = lshr i32 %s, 31
 363   ret i32 %r
 364 }
 365
 366 ; Negative test - wrong divisor
 367
 368 define i32 @srem3(i32 %x) {
 369 ; CHECK-LABEL: @srem3(
 370 ; CHECK-NEXT:    [[S:%.*]] = srem i32 [[X:%.*]], 3
 371 ; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[S]], 31
 372 ; CHECK-NEXT:    ret i32 [[R]]
 373 ;
 374   %s = srem i32 %x, 3
 375   %r = lshr i32 %s, 31
 376   ret i32 %r
 377 }
 378
 379 ; Negative test - wrong shift amount
 380
 381 define i32 @srem2_lshr30(i32 %x) {
 382 ; CHECK-LABEL: @srem2_lshr30(
 383 ; CHECK-NEXT:    [[S:%.*]] = srem i32 [[X:%.*]], 2
 384 ; CHECK-NEXT:    [[R:%.*]] = lshr i32 [[S]], 30
 385 ; CHECK-NEXT:    ret i32 [[R]]
 386 ;
 387   %s = srem i32 %x, 2
 388   %r = lshr i32 %s, 30
 389   ret i32 %r
 390 }