llvm/test/CodeGen/X86/fold-and-shift.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefixes=X86
   3 ; RUN: llc < %s -mtriple=x86_64-- | FileCheck %s --check-prefixes=X64
   4
   5 define i32 @t1(ptr %X, i32 %i) {
   6 ; X86-LABEL: t1:
   7 ; X86:       # %bb.0: # %entry
   8 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   9 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  10 ; X86-NEXT:    movzbl %cl, %ecx
  11 ; X86-NEXT:    movl (%eax,%ecx,4), %eax
  12 ; X86-NEXT:    retl
  13 ;
  14 ; X64-LABEL: t1:
  15 ; X64:       # %bb.0: # %entry
  16 ; X64-NEXT:    movzbl %sil, %eax
  17 ; X64-NEXT:    movl (%rdi,%rax,4), %eax
  18 ; X64-NEXT:    retq
  19 entry:
  20   %tmp2 = shl i32 %i, 2
  21   %tmp4 = and i32 %tmp2, 1020
  22   %tmp7 = getelementptr i8, ptr %X, i32 %tmp4
  23   %tmp9 = load i32, ptr %tmp7
  24   ret i32 %tmp9
  25 }
  26
  27 define i32 @t2(ptr %X, i32 %i) {
  28 ; X86-LABEL: t2:
  29 ; X86:       # %bb.0: # %entry
  30 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  31 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  32 ; X86-NEXT:    movzwl %cx, %ecx
  33 ; X86-NEXT:    movl (%eax,%ecx,4), %eax
  34 ; X86-NEXT:    retl
  35 ;
  36 ; X64-LABEL: t2:
  37 ; X64:       # %bb.0: # %entry
  38 ; X64-NEXT:    movzwl %si, %eax
  39 ; X64-NEXT:    movl (%rdi,%rax,4), %eax
  40 ; X64-NEXT:    retq
  41 entry:
  42   %tmp2 = shl i32 %i, 1
  43   %tmp4 = and i32 %tmp2, 131070
  44   %tmp7 = getelementptr i16, ptr %X, i32 %tmp4
  45   %tmp9 = load i32, ptr %tmp7
  46   ret i32 %tmp9
  47 }
  48
  49 ; This case is tricky. The lshr followed by a gep will produce a lshr followed
  50 ; by an and to remove the low bits. This can be simplified by doing the lshr by
  51 ; a greater constant and using the addressing mode to scale the result back up.
  52 ; To make matters worse, because of the two-phase zext of %i and their reuse in
  53 ; the function, the DAG can get confusing trying to re-use both of them and
  54 ; prevent easy analysis of the mask in order to match this.
  55 define i32 @t3(ptr %i.ptr, ptr %arr) {
  56 ; X86-LABEL: t3:
  57 ; X86:       # %bb.0: # %entry
  58 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  59 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  60 ; X86-NEXT:    movzwl (%eax), %eax
  61 ; X86-NEXT:    movl %eax, %edx
  62 ; X86-NEXT:    shrl $11, %edx
  63 ; X86-NEXT:    addl (%ecx,%edx,4), %eax
  64 ; X86-NEXT:    retl
  65 ;
  66 ; X64-LABEL: t3:
  67 ; X64:       # %bb.0: # %entry
  68 ; X64-NEXT:    movzwl (%rdi), %eax
  69 ; X64-NEXT:    movl %eax, %ecx
  70 ; X64-NEXT:    shrl $11, %ecx
  71 ; X64-NEXT:    addl (%rsi,%rcx,4), %eax
  72 ; X64-NEXT:    retq
  73 entry:
  74   %i = load i16, ptr %i.ptr
  75   %i.zext = zext i16 %i to i32
  76   %index = lshr i32 %i.zext, 11
  77   %val.ptr = getelementptr inbounds i32, ptr %arr, i32 %index
  78   %val = load i32, ptr %val.ptr
  79   %sum = add i32 %val, %i.zext
  80   ret i32 %sum
  81 }
  82
  83 ; A version of @t3 that has more zero extends and more re-use of intermediate
  84 ; values. This exercise slightly different bits of canonicalization.
  85 define i32 @t4(ptr %i.ptr, ptr %arr) {
  86 ; X86-LABEL: t4:
  87 ; X86:       # %bb.0: # %entry
  88 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  89 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  90 ; X86-NEXT:    movzwl (%eax), %eax
  91 ; X86-NEXT:    movl %eax, %edx
  92 ; X86-NEXT:    shrl $11, %edx
  93 ; X86-NEXT:    addl (%ecx,%edx,4), %eax
  94 ; X86-NEXT:    addl %edx, %eax
  95 ; X86-NEXT:    retl
  96 ;
  97 ; X64-LABEL: t4:
  98 ; X64:       # %bb.0: # %entry
  99 ; X64-NEXT:    movzwl (%rdi), %eax
 100 ; X64-NEXT:    movl %eax, %ecx
 101 ; X64-NEXT:    shrl $11, %ecx
 102 ; X64-NEXT:    addl (%rsi,%rcx,4), %eax
 103 ; X64-NEXT:    addl %ecx, %eax
 104 ; X64-NEXT:    retq
 105 entry:
 106   %i = load i16, ptr %i.ptr
 107   %i.zext = zext i16 %i to i32
 108   %index = lshr i32 %i.zext, 11
 109   %index.zext = zext i32 %index to i64
 110   %val.ptr = getelementptr inbounds i32, ptr %arr, i64 %index.zext
 111   %val = load i32, ptr %val.ptr
 112   %sum.1 = add i32 %val, %i.zext
 113   %sum.2 = add i32 %sum.1, %index
 114   ret i32 %sum.2
 115 }
 116
 117 define i8 @t5(ptr %X, i32 %i) {
 118 ; X86-LABEL: t5:
 119 ; X86:       # %bb.0: # %entry
 120 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 121 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 122 ; X86-NEXT:    andl $-14, %ecx
 123 ; X86-NEXT:    movzbl (%eax,%ecx,4), %eax
 124 ; X86-NEXT:    retl
 125 ;
 126 ; X64-LABEL: t5:
 127 ; X64:       # %bb.0: # %entry
 128 ; X64-NEXT:    shll $2, %esi
 129 ; X64-NEXT:    andl $-56, %esi
 130 ; X64-NEXT:    movslq %esi, %rax
 131 ; X64-NEXT:    movzbl (%rdi,%rax), %eax
 132 ; X64-NEXT:    retq
 133 entry:
 134   %tmp2 = shl i32 %i, 2
 135   %tmp4 = and i32 %tmp2, -56
 136   %tmp7 = getelementptr i8, ptr %X, i32 %tmp4
 137   %tmp9 = load i8, ptr %tmp7
 138   ret i8 %tmp9
 139 }
 140
 141 define i8 @t6(ptr %X, i32 %i) {
 142 ; X86-LABEL: t6:
 143 ; X86:       # %bb.0: # %entry
 144 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 145 ; X86-NEXT:    movl $-255, %ecx
 146 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
 147 ; X86-NEXT:    movzbl (%eax,%ecx,4), %eax
 148 ; X86-NEXT:    retl
 149 ;
 150 ; X64-LABEL: t6:
 151 ; X64:       # %bb.0: # %entry
 152 ; X64-NEXT:    shll $2, %esi
 153 ; X64-NEXT:    andl $-1020, %esi # imm = 0xFC04
 154 ; X64-NEXT:    movslq %esi, %rax
 155 ; X64-NEXT:    movzbl (%rdi,%rax), %eax
 156 ; X64-NEXT:    retq
 157 entry:
 158   %tmp2 = shl i32 %i, 2
 159   %tmp4 = and i32 %tmp2, -1020
 160   %tmp7 = getelementptr i8, ptr %X, i32 %tmp4
 161   %tmp9 = load i8, ptr %tmp7
 162   ret i8 %tmp9
 163 }