llvm/test/CodeGen/X86/icmp-shift-opt.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86
   3 ; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64
   4
   5 ; Optimize expanded SRL/SHL used as an input of
   6 ; SETCC comparing it with zero by removing rotation.
   7 ;
   8 ; See https://bugs.llvm.org/show_bug.cgi?id=50197
   9 define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
  10 ; X86-LABEL: opt_setcc_lt_power_of_2:
  11 ; X86:       # %bb.0:
  12 ; X86-NEXT:    pushl %ebp
  13 ; X86-NEXT:    pushl %ebx
  14 ; X86-NEXT:    pushl %edi
  15 ; X86-NEXT:    pushl %esi
  16 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  17 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  18 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
  19 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
  20 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
  21 ; X86-NEXT:    .p2align 4, 0x90
  22 ; X86-NEXT:  .LBB0_1: # %loop
  23 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
  24 ; X86-NEXT:    addl $1, %edi
  25 ; X86-NEXT:    adcl $0, %esi
  26 ; X86-NEXT:    adcl $0, %edx
  27 ; X86-NEXT:    adcl $0, %ecx
  28 ; X86-NEXT:    movl %edx, %ebx
  29 ; X86-NEXT:    orl %ecx, %ebx
  30 ; X86-NEXT:    movl %esi, %ebp
  31 ; X86-NEXT:    orl %edx, %ebp
  32 ; X86-NEXT:    orl %ecx, %ebp
  33 ; X86-NEXT:    shrdl $28, %ebx, %ebp
  34 ; X86-NEXT:    jne .LBB0_1
  35 ; X86-NEXT:  # %bb.2: # %exit
  36 ; X86-NEXT:    movl %edi, (%eax)
  37 ; X86-NEXT:    movl %esi, 4(%eax)
  38 ; X86-NEXT:    movl %edx, 8(%eax)
  39 ; X86-NEXT:    movl %ecx, 12(%eax)
  40 ; X86-NEXT:    popl %esi
  41 ; X86-NEXT:    popl %edi
  42 ; X86-NEXT:    popl %ebx
  43 ; X86-NEXT:    popl %ebp
  44 ; X86-NEXT:    retl $4
  45 ;
  46 ; X64-LABEL: opt_setcc_lt_power_of_2:
  47 ; X64:       # %bb.0:
  48 ; X64-NEXT:    movq %rsi, %rdx
  49 ; X64-NEXT:    movq %rdi, %rax
  50 ; X64-NEXT:    .p2align 4, 0x90
  51 ; X64-NEXT:  .LBB0_1: # %loop
  52 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
  53 ; X64-NEXT:    addq $1, %rax
  54 ; X64-NEXT:    adcq $0, %rdx
  55 ; X64-NEXT:    movq %rax, %rcx
  56 ; X64-NEXT:    shrq $60, %rcx
  57 ; X64-NEXT:    orq %rdx, %rcx
  58 ; X64-NEXT:    jne .LBB0_1
  59 ; X64-NEXT:  # %bb.2: # %exit
  60 ; X64-NEXT:    retq
  61   br label %loop
  62
  63 loop:
  64   %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]
  65   %inc = add i128 %phi.a, 1
  66   %cmp = icmp ult i128 %inc, 1152921504606846976
  67   br i1 %cmp, label %exit, label %loop
  68
  69 exit:
  70   ret i128 %inc
  71 }
  72
  73 define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
  74 ; X86-LABEL: opt_setcc_srl_eq_zero:
  75 ; X86:       # %bb.0:
  76 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  77 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  78 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
  79 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
  80 ; X86-NEXT:    orl %eax, %edx
  81 ; X86-NEXT:    orl %ecx, %edx
  82 ; X86-NEXT:    orl %eax, %ecx
  83 ; X86-NEXT:    shldl $15, %edx, %ecx
  84 ; X86-NEXT:    sete %al
  85 ; X86-NEXT:    retl
  86 ;
  87 ; X64-LABEL: opt_setcc_srl_eq_zero:
  88 ; X64:       # %bb.0:
  89 ; X64-NEXT:    shrq $17, %rdi
  90 ; X64-NEXT:    orq %rsi, %rdi
  91 ; X64-NEXT:    sete %al
  92 ; X64-NEXT:    retq
  93    %srl = lshr i128 %a, 17
  94    %cmp = icmp eq i128 %srl, 0
  95    ret i1 %cmp
  96 }
  97
  98 define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
  99 ; X86-LABEL: opt_setcc_srl_ne_zero:
 100 ; X86:       # %bb.0:
 101 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 102 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 103 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
 104 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 105 ; X86-NEXT:    orl %eax, %edx
 106 ; X86-NEXT:    orl %ecx, %edx
 107 ; X86-NEXT:    orl %eax, %ecx
 108 ; X86-NEXT:    shldl $15, %edx, %ecx
 109 ; X86-NEXT:    setne %al
 110 ; X86-NEXT:    retl
 111 ;
 112 ; X64-LABEL: opt_setcc_srl_ne_zero:
 113 ; X64:       # %bb.0:
 114 ; X64-NEXT:    shrq $17, %rdi
 115 ; X64-NEXT:    orq %rsi, %rdi
 116 ; X64-NEXT:    setne %al
 117 ; X64-NEXT:    retq
 118    %srl = lshr i128 %a, 17
 119    %cmp = icmp ne i128 %srl, 0
 120    ret i1 %cmp
 121 }
 122
 123 define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
 124 ; X86-LABEL: opt_setcc_shl_eq_zero:
 125 ; X86:       # %bb.0:
 126 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 127 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 128 ; X86-NEXT:    shll $17, %ecx
 129 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 130 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 131 ; X86-NEXT:    orl %ecx, %eax
 132 ; X86-NEXT:    sete %al
 133 ; X86-NEXT:    retl
 134 ;
 135 ; X64-LABEL: opt_setcc_shl_eq_zero:
 136 ; X64:       # %bb.0:
 137 ; X64-NEXT:    shlq $17, %rsi
 138 ; X64-NEXT:    orq %rdi, %rsi
 139 ; X64-NEXT:    sete %al
 140 ; X64-NEXT:    retq
 141    %shl = shl i128 %a, 17
 142    %cmp = icmp eq i128 %shl, 0
 143    ret i1 %cmp
 144 }
 145
 146 define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
 147 ; X86-LABEL: opt_setcc_shl_ne_zero:
 148 ; X86:       # %bb.0:
 149 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 150 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 151 ; X86-NEXT:    shll $17, %ecx
 152 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 153 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 154 ; X86-NEXT:    orl %ecx, %eax
 155 ; X86-NEXT:    setne %al
 156 ; X86-NEXT:    retl
 157 ;
 158 ; X64-LABEL: opt_setcc_shl_ne_zero:
 159 ; X64:       # %bb.0:
 160 ; X64-NEXT:    shlq $17, %rsi
 161 ; X64-NEXT:    orq %rdi, %rsi
 162 ; X64-NEXT:    setne %al
 163 ; X64-NEXT:    retq
 164    %shl = shl i128 %a, 17
 165    %cmp = icmp ne i128 %shl, 0
 166    ret i1 %cmp
 167 }
 168
 169 ; Negative test: optimization should not be applied if shift has multiple users.
 170 define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
 171 ; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
 172 ; X86:       # %bb.0:
 173 ; X86-NEXT:    pushl %ebx
 174 ; X86-NEXT:    pushl %edi
 175 ; X86-NEXT:    pushl %esi
 176 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 177 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 178 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 179 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 180 ; X86-NEXT:    shldl $17, %edx, %esi
 181 ; X86-NEXT:    shldl $17, %ecx, %edx
 182 ; X86-NEXT:    shldl $17, %eax, %ecx
 183 ; X86-NEXT:    shll $17, %eax
 184 ; X86-NEXT:    movl %ecx, %edi
 185 ; X86-NEXT:    orl %esi, %edi
 186 ; X86-NEXT:    movl %eax, %ebx
 187 ; X86-NEXT:    orl %edx, %ebx
 188 ; X86-NEXT:    orl %edi, %ebx
 189 ; X86-NEXT:    sete %bl
 190 ; X86-NEXT:    pushl %esi
 191 ; X86-NEXT:    pushl %edx
 192 ; X86-NEXT:    pushl %ecx
 193 ; X86-NEXT:    pushl %eax
 194 ; X86-NEXT:    calll use@PLT
 195 ; X86-NEXT:    addl $16, %esp
 196 ; X86-NEXT:    movl %ebx, %eax
 197 ; X86-NEXT:    popl %esi
 198 ; X86-NEXT:    popl %edi
 199 ; X86-NEXT:    popl %ebx
 200 ; X86-NEXT:    retl
 201 ;
 202 ; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
 203 ; X64:       # %bb.0:
 204 ; X64-NEXT:    pushq %rbx
 205 ; X64-NEXT:    shldq $17, %rdi, %rsi
 206 ; X64-NEXT:    shlq $17, %rdi
 207 ; X64-NEXT:    movq %rdi, %rax
 208 ; X64-NEXT:    orq %rsi, %rax
 209 ; X64-NEXT:    sete %bl
 210 ; X64-NEXT:    callq use@PLT
 211 ; X64-NEXT:    movl %ebx, %eax
 212 ; X64-NEXT:    popq %rbx
 213 ; X64-NEXT:    retq
 214    %shl = shl i128 %a, 17
 215    %cmp = icmp eq i128 %shl, 0
 216    call void @use(i128 %shl)
 217    ret i1 %cmp
 218 }
 219
 220 ; Check that optimization is applied to DAG having appropriate shape
 221 ; even if there were no actual shift's expansion.
 222 define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
 223 ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
 224 ; X86:       # %bb.0:
 225 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 226 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 227 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 228 ; X86-NEXT:    shll $17, %ecx
 229 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 230 ; X86-NEXT:    orl %ecx, %eax
 231 ; X86-NEXT:    sete %al
 232 ; X86-NEXT:    retl
 233 ;
 234 ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
 235 ; X64:       # %bb.0:
 236 ; X64-NEXT:    shlq $17, %rdi
 237 ; X64-NEXT:    orq %rsi, %rdi
 238 ; X64-NEXT:    sete %al
 239 ; X64-NEXT:    retq
 240   %shl.a = shl i64 %a, 17
 241   %srl.b = lshr i64 %b, 47
 242   %or.0 = or i64 %shl.a, %srl.b
 243   %shl.b = shl i64 %b, 17
 244   %or.1 = or i64 %or.0, %shl.b
 245   %cmp = icmp eq i64 %or.1, 0
 246   ret i1 %cmp
 247 }
 248
 249 ; Negative test: optimization should not be applied as
 250 ; constants used in shifts do not match.
 251 define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
 252 ; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts:
 253 ; X86:       # %bb.0:
 254 ; X86-NEXT:    pushl %esi
 255 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 256 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 257 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 258 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 259 ; X86-NEXT:    shldl $17, %edx, %esi
 260 ; X86-NEXT:    shldl $17, %ecx, %edx
 261 ; X86-NEXT:    shldl $18, %eax, %ecx
 262 ; X86-NEXT:    shll $18, %eax
 263 ; X86-NEXT:    orl %edx, %eax
 264 ; X86-NEXT:    orl %esi, %ecx
 265 ; X86-NEXT:    orl %eax, %ecx
 266 ; X86-NEXT:    sete %al
 267 ; X86-NEXT:    popl %esi
 268 ; X86-NEXT:    retl
 269 ;
 270 ; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts:
 271 ; X64:       # %bb.0:
 272 ; X64-NEXT:    shldq $17, %rsi, %rdi
 273 ; X64-NEXT:    shlq $18, %rsi
 274 ; X64-NEXT:    orq %rdi, %rsi
 275 ; X64-NEXT:    sete %al
 276 ; X64-NEXT:    retq
 277   %shl.a = shl i64 %a, 17
 278   %srl.b = lshr i64 %b, 47
 279   %or.0 = or i64 %shl.a, %srl.b
 280   %shl.b = shl i64 %b, 18
 281   %or.1 = or i64 %or.0, %shl.b
 282   %cmp = icmp eq i64 %or.1, 0
 283   ret i1 %cmp
 284 }
 285
 286 declare void @use(i128 %a)