test/CodeGen/X86/dagcombine-shifts.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s
   3
   4 ; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))
   5
   6 ; Canolicalize the sequence shl/zext/lshr performing the zeroextend
   7 ; as the last instruction of the sequence.
   8 ; This will help DAGCombiner to identify and then fold the sequence
   9 ; of shifts into a single AND.
  10 ; This transformation is profitable if the shift amounts are the same
  11 ; and if there is only one use of the zext.
  12
  13 define i16 @fun1(i8 zeroext %v) {
  14 ; CHECK-LABEL: fun1:
  15 ; CHECK:       # %bb.0: # %entry
  16 ; CHECK-NEXT:    movl %edi, %eax
  17 ; CHECK-NEXT:    andl $-16, %eax
  18 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
  19 ; CHECK-NEXT:    retq
  20 entry:
  21   %shr = lshr i8 %v, 4
  22   %ext = zext i8 %shr to i16
  23   %shl = shl i16 %ext, 4
  24   ret i16 %shl
  25 }
  26
  27 define i32 @fun2(i8 zeroext %v) {
  28 ; CHECK-LABEL: fun2:
  29 ; CHECK:       # %bb.0: # %entry
  30 ; CHECK-NEXT:    movl %edi, %eax
  31 ; CHECK-NEXT:    andl $-16, %eax
  32 ; CHECK-NEXT:    retq
  33 entry:
  34   %shr = lshr i8 %v, 4
  35   %ext = zext i8 %shr to i32
  36   %shl = shl i32 %ext, 4
  37   ret i32 %shl
  38 }
  39
  40 define i32 @fun3(i16 zeroext %v) {
  41 ; CHECK-LABEL: fun3:
  42 ; CHECK:       # %bb.0: # %entry
  43 ; CHECK-NEXT:    movl %edi, %eax
  44 ; CHECK-NEXT:    andl $-16, %eax
  45 ; CHECK-NEXT:    retq
  46 entry:
  47   %shr = lshr i16 %v, 4
  48   %ext = zext i16 %shr to i32
  49   %shl = shl i32 %ext, 4
  50   ret i32 %shl
  51 }
  52
  53 define i64 @fun4(i8 zeroext %v) {
  54 ; CHECK-LABEL: fun4:
  55 ; CHECK:       # %bb.0: # %entry
  56 ; CHECK-NEXT:    movl %edi, %eax
  57 ; CHECK-NEXT:    andl $-16, %eax
  58 ; CHECK-NEXT:    retq
  59 entry:
  60   %shr = lshr i8 %v, 4
  61   %ext = zext i8 %shr to i64
  62   %shl = shl i64 %ext, 4
  63   ret i64 %shl
  64 }
  65
  66 define i64 @fun5(i16 zeroext %v) {
  67 ; CHECK-LABEL: fun5:
  68 ; CHECK:       # %bb.0: # %entry
  69 ; CHECK-NEXT:    movl %edi, %eax
  70 ; CHECK-NEXT:    andl $-16, %eax
  71 ; CHECK-NEXT:    retq
  72 entry:
  73   %shr = lshr i16 %v, 4
  74   %ext = zext i16 %shr to i64
  75   %shl = shl i64 %ext, 4
  76   ret i64 %shl
  77 }
  78
  79 define i64 @fun6(i32 zeroext %v) {
  80 ; CHECK-LABEL: fun6:
  81 ; CHECK:       # %bb.0: # %entry
  82 ; CHECK-NEXT:    movl %edi, %eax
  83 ; CHECK-NEXT:    andl $-16, %eax
  84 ; CHECK-NEXT:    retq
  85 entry:
  86   %shr = lshr i32 %v, 4
  87   %ext = zext i32 %shr to i64
  88   %shl = shl i64 %ext, 4
  89   ret i64 %shl
  90 }
  91
  92 ; Don't fold the pattern if we use arithmetic shifts.
  93
  94 define i64 @fun7(i8 zeroext %v) {
  95 ; CHECK-LABEL: fun7:
  96 ; CHECK:       # %bb.0: # %entry
  97 ; CHECK-NEXT:    sarb $4, %dil
  98 ; CHECK-NEXT:    movzbl %dil, %eax
  99 ; CHECK-NEXT:    shlq $4, %rax
 100 ; CHECK-NEXT:    retq
 101 entry:
 102   %shr = ashr i8 %v, 4
 103   %ext = zext i8 %shr to i64
 104   %shl = shl i64 %ext, 4
 105   ret i64 %shl
 106 }
 107
 108 define i64 @fun8(i16 zeroext %v) {
 109 ; CHECK-LABEL: fun8:
 110 ; CHECK:       # %bb.0: # %entry
 111 ; CHECK-NEXT:    movswl %di, %eax
 112 ; CHECK-NEXT:    shrl $4, %eax
 113 ; CHECK-NEXT:    movzwl %ax, %eax
 114 ; CHECK-NEXT:    shlq $4, %rax
 115 ; CHECK-NEXT:    retq
 116 entry:
 117   %shr = ashr i16 %v, 4
 118   %ext = zext i16 %shr to i64
 119   %shl = shl i64 %ext, 4
 120   ret i64 %shl
 121 }
 122
 123 define i64 @fun9(i32 zeroext %v) {
 124 ; CHECK-LABEL: fun9:
 125 ; CHECK:       # %bb.0: # %entry
 126 ; CHECK-NEXT:    movl %edi, %eax
 127 ; CHECK-NEXT:    sarl $4, %eax
 128 ; CHECK-NEXT:    shlq $4, %rax
 129 ; CHECK-NEXT:    retq
 130 entry:
 131   %shr = ashr i32 %v, 4
 132   %ext = zext i32 %shr to i64
 133   %shl = shl i64 %ext, 4
 134   ret i64 %shl
 135 }
 136
 137 ; Don't fold the pattern if there is more than one use of the
 138 ; operand in input to the shift left.
 139
 140 define i64 @fun10(i8 zeroext %v) {
 141 ; CHECK-LABEL: fun10:
 142 ; CHECK:       # %bb.0: # %entry
 143 ; CHECK-NEXT:    shrb $4, %dil
 144 ; CHECK-NEXT:    movzbl %dil, %ecx
 145 ; CHECK-NEXT:    movq %rcx, %rax
 146 ; CHECK-NEXT:    shlq $4, %rax
 147 ; CHECK-NEXT:    orq %rcx, %rax
 148 ; CHECK-NEXT:    retq
 149 entry:
 150   %shr = lshr i8 %v, 4
 151   %ext = zext i8 %shr to i64
 152   %shl = shl i64 %ext, 4
 153   %add = add i64 %shl, %ext
 154   ret i64 %add
 155 }
 156
 157 define i64 @fun11(i16 zeroext %v) {
 158 ; CHECK-LABEL: fun11:
 159 ; CHECK:       # %bb.0: # %entry
 160 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 161 ; CHECK-NEXT:    shrl $4, %edi
 162 ; CHECK-NEXT:    movq %rdi, %rax
 163 ; CHECK-NEXT:    shlq $4, %rax
 164 ; CHECK-NEXT:    addq %rdi, %rax
 165 ; CHECK-NEXT:    retq
 166 entry:
 167   %shr = lshr i16 %v, 4
 168   %ext = zext i16 %shr to i64
 169   %shl = shl i64 %ext, 4
 170   %add = add i64 %shl, %ext
 171   ret i64 %add
 172 }
 173
 174 define i64 @fun12(i32 zeroext %v) {
 175 ; CHECK-LABEL: fun12:
 176 ; CHECK:       # %bb.0: # %entry
 177 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 178 ; CHECK-NEXT:    shrl $4, %edi
 179 ; CHECK-NEXT:    movq %rdi, %rax
 180 ; CHECK-NEXT:    shlq $4, %rax
 181 ; CHECK-NEXT:    addq %rdi, %rax
 182 ; CHECK-NEXT:    retq
 183 entry:
 184   %shr = lshr i32 %v, 4
 185   %ext = zext i32 %shr to i64
 186   %shl = shl i64 %ext, 4
 187   %add = add i64 %shl, %ext
 188   ret i64 %add
 189 }
 190
 191 ; PR17380
 192 ; Make sure that the combined dags are legal if we run the DAGCombiner after
 193 ; Legalization took place. The add instruction is redundant and increases by
 194 ; one the number of uses of the zext. This prevents the transformation from
 195 ; firing before dags are legalized and optimized.
 196 ; Once the add is removed, the number of uses becomes one and therefore the
 197 ; dags are canonicalized. After Legalization, we need to make sure that the
 198 ; valuetype for the shift count is legal.
 199 ; Verify also that we correctly fold the shl-shr sequence into an
 200 ; AND with bitmask.
 201
 202 define void @g(i32 %a) {
 203 ; CHECK-LABEL: g:
 204 ; CHECK:       # %bb.0:
 205 ; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
 206 ; CHECK-NEXT:    andl $-4, %edi
 207 ; CHECK-NEXT:    jmp f # TAILCALL
 208   %b = lshr i32 %a, 2
 209   %c = zext i32 %b to i64
 210   %d = add i64 %c, 1
 211   %e = shl i64 %c, 2
 212   tail call void @f(i64 %e)
 213   ret void
 214 }
 215
 216 declare void @f(i64)
 217