llvm/test/CodeGen/X86/dagcombine-shifts.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-linux-gnu | FileCheck %s --check-prefixes=X86
   3 ; RUN: llc < %s -mtriple=x86_64-linux-gnu | FileCheck %s --check-prefixes=X64
   4
   5 ; fold (shl (zext (lshr (A, X))), X) -> (zext (shl (lshr (A, X)), X))
   6
   7 ; Canolicalize the sequence shl/zext/lshr performing the zeroextend
   8 ; as the last instruction of the sequence.
   9 ; This will help DAGCombiner to identify and then fold the sequence
  10 ; of shifts into a single AND.
  11 ; This transformation is profitable if the shift amounts are the same
  12 ; and if there is only one use of the zext.
  13
  14 define i16 @fun1(i8 zeroext %v) {
  15 ; X86-LABEL: fun1:
  16 ; X86:       # %bb.0: # %entry
  17 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
  18 ; X86-NEXT:    andl $-16, %eax
  19 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
  20 ; X86-NEXT:    retl
  21 ;
  22 ; X64-LABEL: fun1:
  23 ; X64:       # %bb.0: # %entry
  24 ; X64-NEXT:    movl %edi, %eax
  25 ; X64-NEXT:    andl $-16, %eax
  26 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
  27 ; X64-NEXT:    retq
  28 entry:
  29   %shr = lshr i8 %v, 4
  30   %ext = zext i8 %shr to i16
  31   %shl = shl i16 %ext, 4
  32   ret i16 %shl
  33 }
  34
  35 define i32 @fun2(i8 zeroext %v) {
  36 ; X86-LABEL: fun2:
  37 ; X86:       # %bb.0: # %entry
  38 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
  39 ; X86-NEXT:    andl $-16, %eax
  40 ; X86-NEXT:    retl
  41 ;
  42 ; X64-LABEL: fun2:
  43 ; X64:       # %bb.0: # %entry
  44 ; X64-NEXT:    movl %edi, %eax
  45 ; X64-NEXT:    andl $-16, %eax
  46 ; X64-NEXT:    retq
  47 entry:
  48   %shr = lshr i8 %v, 4
  49   %ext = zext i8 %shr to i32
  50   %shl = shl i32 %ext, 4
  51   ret i32 %shl
  52 }
  53
  54 define i32 @fun3(i16 zeroext %v) {
  55 ; X86-LABEL: fun3:
  56 ; X86:       # %bb.0: # %entry
  57 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
  58 ; X86-NEXT:    andl $-16, %eax
  59 ; X86-NEXT:    retl
  60 ;
  61 ; X64-LABEL: fun3:
  62 ; X64:       # %bb.0: # %entry
  63 ; X64-NEXT:    movl %edi, %eax
  64 ; X64-NEXT:    andl $-16, %eax
  65 ; X64-NEXT:    retq
  66 entry:
  67   %shr = lshr i16 %v, 4
  68   %ext = zext i16 %shr to i32
  69   %shl = shl i32 %ext, 4
  70   ret i32 %shl
  71 }
  72
  73 define i64 @fun4(i8 zeroext %v) {
  74 ; X86-LABEL: fun4:
  75 ; X86:       # %bb.0: # %entry
  76 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
  77 ; X86-NEXT:    andl $-16, %eax
  78 ; X86-NEXT:    xorl %edx, %edx
  79 ; X86-NEXT:    retl
  80 ;
  81 ; X64-LABEL: fun4:
  82 ; X64:       # %bb.0: # %entry
  83 ; X64-NEXT:    movl %edi, %eax
  84 ; X64-NEXT:    andl $-16, %eax
  85 ; X64-NEXT:    retq
  86 entry:
  87   %shr = lshr i8 %v, 4
  88   %ext = zext i8 %shr to i64
  89   %shl = shl i64 %ext, 4
  90   ret i64 %shl
  91 }
  92
  93 define i64 @fun5(i16 zeroext %v) {
  94 ; X86-LABEL: fun5:
  95 ; X86:       # %bb.0: # %entry
  96 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
  97 ; X86-NEXT:    andl $-16, %eax
  98 ; X86-NEXT:    xorl %edx, %edx
  99 ; X86-NEXT:    retl
 100 ;
 101 ; X64-LABEL: fun5:
 102 ; X64:       # %bb.0: # %entry
 103 ; X64-NEXT:    movl %edi, %eax
 104 ; X64-NEXT:    andl $-16, %eax
 105 ; X64-NEXT:    retq
 106 entry:
 107   %shr = lshr i16 %v, 4
 108   %ext = zext i16 %shr to i64
 109   %shl = shl i64 %ext, 4
 110   ret i64 %shl
 111 }
 112
 113 define i64 @fun6(i32 zeroext %v) {
 114 ; X86-LABEL: fun6:
 115 ; X86:       # %bb.0: # %entry
 116 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 117 ; X86-NEXT:    andl $-16, %eax
 118 ; X86-NEXT:    xorl %edx, %edx
 119 ; X86-NEXT:    retl
 120 ;
 121 ; X64-LABEL: fun6:
 122 ; X64:       # %bb.0: # %entry
 123 ; X64-NEXT:    movl %edi, %eax
 124 ; X64-NEXT:    andl $-16, %eax
 125 ; X64-NEXT:    retq
 126 entry:
 127   %shr = lshr i32 %v, 4
 128   %ext = zext i32 %shr to i64
 129   %shl = shl i64 %ext, 4
 130   ret i64 %shl
 131 }
 132
 133 ; Don't fold the pattern if we use arithmetic shifts.
 134
 135 define i64 @fun7(i8 zeroext %v) {
 136 ; X86-LABEL: fun7:
 137 ; X86:       # %bb.0: # %entry
 138 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 139 ; X86-NEXT:    sarb $4, %al
 140 ; X86-NEXT:    movzbl %al, %eax
 141 ; X86-NEXT:    shll $4, %eax
 142 ; X86-NEXT:    xorl %edx, %edx
 143 ; X86-NEXT:    retl
 144 ;
 145 ; X64-LABEL: fun7:
 146 ; X64:       # %bb.0: # %entry
 147 ; X64-NEXT:    sarb $4, %dil
 148 ; X64-NEXT:    movzbl %dil, %eax
 149 ; X64-NEXT:    shll $4, %eax
 150 ; X64-NEXT:    retq
 151 entry:
 152   %shr = ashr i8 %v, 4
 153   %ext = zext i8 %shr to i64
 154   %shl = shl i64 %ext, 4
 155   ret i64 %shl
 156 }
 157
 158 define i64 @fun8(i16 zeroext %v) {
 159 ; X86-LABEL: fun8:
 160 ; X86:       # %bb.0: # %entry
 161 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 162 ; X86-NEXT:    andl $1048560, %eax # imm = 0xFFFF0
 163 ; X86-NEXT:    xorl %edx, %edx
 164 ; X86-NEXT:    retl
 165 ;
 166 ; X64-LABEL: fun8:
 167 ; X64:       # %bb.0: # %entry
 168 ; X64-NEXT:    movswl %di, %eax
 169 ; X64-NEXT:    andl $1048560, %eax # imm = 0xFFFF0
 170 ; X64-NEXT:    retq
 171 entry:
 172   %shr = ashr i16 %v, 4
 173   %ext = zext i16 %shr to i64
 174   %shl = shl i64 %ext, 4
 175   ret i64 %shl
 176 }
 177
 178 define i64 @fun9(i32 zeroext %v) {
 179 ; X86-LABEL: fun9:
 180 ; X86:       # %bb.0: # %entry
 181 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 182 ; X86-NEXT:    movl %eax, %edx
 183 ; X86-NEXT:    sarl $4, %edx
 184 ; X86-NEXT:    andl $-16, %eax
 185 ; X86-NEXT:    shrl $28, %edx
 186 ; X86-NEXT:    retl
 187 ;
 188 ; X64-LABEL: fun9:
 189 ; X64:       # %bb.0: # %entry
 190 ; X64-NEXT:    movl %edi, %eax
 191 ; X64-NEXT:    sarl $4, %eax
 192 ; X64-NEXT:    shlq $4, %rax
 193 ; X64-NEXT:    retq
 194 entry:
 195   %shr = ashr i32 %v, 4
 196   %ext = zext i32 %shr to i64
 197   %shl = shl i64 %ext, 4
 198   ret i64 %shl
 199 }
 200
 201 ; Don't fold the pattern if there is more than one use of the
 202 ; operand in input to the shift left.
 203
 204 define i64 @fun10(i8 zeroext %v) {
 205 ; X86-LABEL: fun10:
 206 ; X86:       # %bb.0: # %entry
 207 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 208 ; X86-NEXT:    shrb $4, %al
 209 ; X86-NEXT:    movzbl %al, %ecx
 210 ; X86-NEXT:    movl %ecx, %eax
 211 ; X86-NEXT:    shll $4, %eax
 212 ; X86-NEXT:    orl %ecx, %eax
 213 ; X86-NEXT:    xorl %edx, %edx
 214 ; X86-NEXT:    retl
 215 ;
 216 ; X64-LABEL: fun10:
 217 ; X64:       # %bb.0: # %entry
 218 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 219 ; X64-NEXT:    movl %edi, %eax
 220 ; X64-NEXT:    shrb $4, %al
 221 ; X64-NEXT:    movzbl %al, %eax
 222 ; X64-NEXT:    andl $-16, %edi
 223 ; X64-NEXT:    orq %rdi, %rax
 224 ; X64-NEXT:    retq
 225 entry:
 226   %shr = lshr i8 %v, 4
 227   %ext = zext i8 %shr to i64
 228   %shl = shl i64 %ext, 4
 229   %add = add i64 %shl, %ext
 230   ret i64 %add
 231 }
 232
 233 define i64 @fun11(i16 zeroext %v) {
 234 ; X86-LABEL: fun11:
 235 ; X86:       # %bb.0: # %entry
 236 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 237 ; X86-NEXT:    movl %eax, %ecx
 238 ; X86-NEXT:    shrl $4, %ecx
 239 ; X86-NEXT:    andl $-16, %eax
 240 ; X86-NEXT:    addl %ecx, %eax
 241 ; X86-NEXT:    xorl %edx, %edx
 242 ; X86-NEXT:    retl
 243 ;
 244 ; X64-LABEL: fun11:
 245 ; X64:       # %bb.0: # %entry
 246 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 247 ; X64-NEXT:    movl %edi, %eax
 248 ; X64-NEXT:    shrl $4, %eax
 249 ; X64-NEXT:    andl $-16, %edi
 250 ; X64-NEXT:    addq %rdi, %rax
 251 ; X64-NEXT:    retq
 252 entry:
 253   %shr = lshr i16 %v, 4
 254   %ext = zext i16 %shr to i64
 255   %shl = shl i64 %ext, 4
 256   %add = add i64 %shl, %ext
 257   ret i64 %add
 258 }
 259
 260 define i64 @fun12(i32 zeroext %v) {
 261 ; X86-LABEL: fun12:
 262 ; X86:       # %bb.0: # %entry
 263 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 264 ; X86-NEXT:    movl %eax, %ecx
 265 ; X86-NEXT:    shrl $4, %ecx
 266 ; X86-NEXT:    andl $-16, %eax
 267 ; X86-NEXT:    xorl %edx, %edx
 268 ; X86-NEXT:    addl %ecx, %eax
 269 ; X86-NEXT:    setb %dl
 270 ; X86-NEXT:    retl
 271 ;
 272 ; X64-LABEL: fun12:
 273 ; X64:       # %bb.0: # %entry
 274 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 275 ; X64-NEXT:    movl %edi, %eax
 276 ; X64-NEXT:    shrl $4, %eax
 277 ; X64-NEXT:    andl $-16, %edi
 278 ; X64-NEXT:    addq %rdi, %rax
 279 ; X64-NEXT:    retq
 280 entry:
 281   %shr = lshr i32 %v, 4
 282   %ext = zext i32 %shr to i64
 283   %shl = shl i64 %ext, 4
 284   %add = add i64 %shl, %ext
 285   ret i64 %add
 286 }
 287
 288 ; PR17380
 289 ; Make sure that the combined dags are legal if we run the DAGCombiner after
 290 ; Legalization took place. The add instruction is redundant and increases by
 291 ; one the number of uses of the zext. This prevents the transformation from
 292 ; firing before dags are legalized and optimized.
 293 ; Once the add is removed, the number of uses becomes one and therefore the
 294 ; dags are canonicalized. After Legalization, we need to make sure that the
 295 ; valuetype for the shift count is legal.
 296 ; Verify also that we correctly fold the shl-shr sequence into an
 297 ; AND with bitmask.
 298
 299 define void @g(i32 %a) nounwind {
 300 ; X86-LABEL: g:
 301 ; X86:       # %bb.0:
 302 ; X86-NEXT:    subl $12, %esp
 303 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 304 ; X86-NEXT:    andl $-4, %eax
 305 ; X86-NEXT:    subl $8, %esp
 306 ; X86-NEXT:    pushl $0
 307 ; X86-NEXT:    pushl %eax
 308 ; X86-NEXT:    calll f
 309 ; X86-NEXT:    addl $28, %esp
 310 ; X86-NEXT:    retl
 311 ;
 312 ; X64-LABEL: g:
 313 ; X64:       # %bb.0:
 314 ; X64-NEXT:    # kill: def $edi killed $edi def $rdi
 315 ; X64-NEXT:    andl $-4, %edi
 316 ; X64-NEXT:    jmp f # TAILCALL
 317   %b = lshr i32 %a, 2
 318   %c = zext i32 %b to i64
 319   %d = add i64 %c, 1
 320   %e = shl i64 %c, 2
 321   tail call void @f(i64 %e)
 322   ret void
 323 }
 324
 325 declare dso_local void @f(i64)
 326