llvm/test/CodeGen/X86/icmp-shift-opt.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -mtriple=i686-- < %s | FileCheck %s --check-prefix=X86
   3 ; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s --check-prefix=X64
   4
   5 ; Optimize expanded SRL/SHL used as an input of
   6 ; SETCC comparing it with zero by removing rotation.
   7 ;
   8 ; See https://bugs.llvm.org/show_bug.cgi?id=50197
   9 define i128 @opt_setcc_lt_power_of_2(i128 %a) nounwind {
  10 ; X86-LABEL: opt_setcc_lt_power_of_2:
  11 ; X86:       # %bb.0:
  12 ; X86-NEXT:    pushl %ebp
  13 ; X86-NEXT:    pushl %ebx
  14 ; X86-NEXT:    pushl %edi
  15 ; X86-NEXT:    pushl %esi
  16 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  17 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  18 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
  19 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
  20 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
  21 ; X86-NEXT:    .p2align 4, 0x90
  22 ; X86-NEXT:  .LBB0_1: # %loop
  23 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
  24 ; X86-NEXT:    addl $1, %edi
  25 ; X86-NEXT:    adcl $0, %esi
  26 ; X86-NEXT:    adcl $0, %edx
  27 ; X86-NEXT:    adcl $0, %ecx
  28 ; X86-NEXT:    movl %edx, %ebx
  29 ; X86-NEXT:    orl %ecx, %ebx
  30 ; X86-NEXT:    movl %esi, %ebp
  31 ; X86-NEXT:    orl %ebx, %ebp
  32 ; X86-NEXT:    shrdl $28, %ebx, %ebp
  33 ; X86-NEXT:    jne .LBB0_1
  34 ; X86-NEXT:  # %bb.2: # %exit
  35 ; X86-NEXT:    movl %edi, (%eax)
  36 ; X86-NEXT:    movl %esi, 4(%eax)
  37 ; X86-NEXT:    movl %edx, 8(%eax)
  38 ; X86-NEXT:    movl %ecx, 12(%eax)
  39 ; X86-NEXT:    popl %esi
  40 ; X86-NEXT:    popl %edi
  41 ; X86-NEXT:    popl %ebx
  42 ; X86-NEXT:    popl %ebp
  43 ; X86-NEXT:    retl $4
  44 ;
  45 ; X64-LABEL: opt_setcc_lt_power_of_2:
  46 ; X64:       # %bb.0:
  47 ; X64-NEXT:    movq %rsi, %rdx
  48 ; X64-NEXT:    movq %rdi, %rax
  49 ; X64-NEXT:    .p2align 4, 0x90
  50 ; X64-NEXT:  .LBB0_1: # %loop
  51 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
  52 ; X64-NEXT:    addq $1, %rax
  53 ; X64-NEXT:    adcq $0, %rdx
  54 ; X64-NEXT:    movq %rax, %rcx
  55 ; X64-NEXT:    shrq $60, %rcx
  56 ; X64-NEXT:    orq %rdx, %rcx
  57 ; X64-NEXT:    jne .LBB0_1
  58 ; X64-NEXT:  # %bb.2: # %exit
  59 ; X64-NEXT:    retq
  60   br label %loop
  61
  62 loop:
  63   %phi.a = phi i128 [ %a, %0 ], [ %inc, %loop ]
  64   %inc = add i128 %phi.a, 1
  65   %cmp = icmp ult i128 %inc, 1152921504606846976
  66   br i1 %cmp, label %exit, label %loop
  67
  68 exit:
  69   ret i128 %inc
  70 }
  71
  72 define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
  73 ; X86-LABEL: opt_setcc_srl_eq_zero:
  74 ; X86:       # %bb.0:
  75 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  76 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  77 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
  78 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
  79 ; X86-NEXT:    orl %ecx, %edx
  80 ; X86-NEXT:    orl %eax, %edx
  81 ; X86-NEXT:    orl %ecx, %eax
  82 ; X86-NEXT:    shldl $15, %edx, %eax
  83 ; X86-NEXT:    sete %al
  84 ; X86-NEXT:    retl
  85 ;
  86 ; X64-LABEL: opt_setcc_srl_eq_zero:
  87 ; X64:       # %bb.0:
  88 ; X64-NEXT:    shrq $17, %rdi
  89 ; X64-NEXT:    orq %rsi, %rdi
  90 ; X64-NEXT:    sete %al
  91 ; X64-NEXT:    retq
  92    %srl = lshr i128 %a, 17
  93    %cmp = icmp eq i128 %srl, 0
  94    ret i1 %cmp
  95 }
  96
  97 define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
  98 ; X86-LABEL: opt_setcc_srl_ne_zero:
  99 ; X86:       # %bb.0:
 100 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 101 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 102 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 103 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 104 ; X86-NEXT:    orl %ecx, %edx
 105 ; X86-NEXT:    orl %eax, %edx
 106 ; X86-NEXT:    orl %ecx, %eax
 107 ; X86-NEXT:    shldl $15, %edx, %eax
 108 ; X86-NEXT:    setne %al
 109 ; X86-NEXT:    retl
 110 ;
 111 ; X64-LABEL: opt_setcc_srl_ne_zero:
 112 ; X64:       # %bb.0:
 113 ; X64-NEXT:    shrq $17, %rdi
 114 ; X64-NEXT:    orq %rsi, %rdi
 115 ; X64-NEXT:    setne %al
 116 ; X64-NEXT:    retq
 117    %srl = lshr i128 %a, 17
 118    %cmp = icmp ne i128 %srl, 0
 119    ret i1 %cmp
 120 }
 121
 122 define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
 123 ; X86-LABEL: opt_setcc_shl_eq_zero:
 124 ; X86:       # %bb.0:
 125 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 126 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 127 ; X86-NEXT:    shll $17, %ecx
 128 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 129 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 130 ; X86-NEXT:    orl %ecx, %eax
 131 ; X86-NEXT:    sete %al
 132 ; X86-NEXT:    retl
 133 ;
 134 ; X64-LABEL: opt_setcc_shl_eq_zero:
 135 ; X64:       # %bb.0:
 136 ; X64-NEXT:    shlq $17, %rsi
 137 ; X64-NEXT:    orq %rdi, %rsi
 138 ; X64-NEXT:    sete %al
 139 ; X64-NEXT:    retq
 140    %shl = shl i128 %a, 17
 141    %cmp = icmp eq i128 %shl, 0
 142    ret i1 %cmp
 143 }
 144
 145 define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
 146 ; X86-LABEL: opt_setcc_shl_ne_zero:
 147 ; X86:       # %bb.0:
 148 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 149 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 150 ; X86-NEXT:    shll $17, %ecx
 151 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 152 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 153 ; X86-NEXT:    orl %ecx, %eax
 154 ; X86-NEXT:    setne %al
 155 ; X86-NEXT:    retl
 156 ;
 157 ; X64-LABEL: opt_setcc_shl_ne_zero:
 158 ; X64:       # %bb.0:
 159 ; X64-NEXT:    shlq $17, %rsi
 160 ; X64-NEXT:    orq %rdi, %rsi
 161 ; X64-NEXT:    setne %al
 162 ; X64-NEXT:    retq
 163    %shl = shl i128 %a, 17
 164    %cmp = icmp ne i128 %shl, 0
 165    ret i1 %cmp
 166 }
 167
 168 ; Negative test: optimization should not be applied if shift has multiple users.
 169 define i1 @opt_setcc_shl_eq_zero_multiple_shl_users(i128 %a) nounwind {
 170 ; X86-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
 171 ; X86:       # %bb.0:
 172 ; X86-NEXT:    pushl %ebx
 173 ; X86-NEXT:    pushl %edi
 174 ; X86-NEXT:    pushl %esi
 175 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 176 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 177 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 178 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 179 ; X86-NEXT:    shldl $17, %esi, %edx
 180 ; X86-NEXT:    shldl $17, %ecx, %esi
 181 ; X86-NEXT:    shldl $17, %eax, %ecx
 182 ; X86-NEXT:    shll $17, %eax
 183 ; X86-NEXT:    movl %ecx, %edi
 184 ; X86-NEXT:    orl %edx, %edi
 185 ; X86-NEXT:    movl %eax, %ebx
 186 ; X86-NEXT:    orl %esi, %ebx
 187 ; X86-NEXT:    orl %edi, %ebx
 188 ; X86-NEXT:    sete %bl
 189 ; X86-NEXT:    pushl %edx
 190 ; X86-NEXT:    pushl %esi
 191 ; X86-NEXT:    pushl %ecx
 192 ; X86-NEXT:    pushl %eax
 193 ; X86-NEXT:    calll use@PLT
 194 ; X86-NEXT:    addl $16, %esp
 195 ; X86-NEXT:    movl %ebx, %eax
 196 ; X86-NEXT:    popl %esi
 197 ; X86-NEXT:    popl %edi
 198 ; X86-NEXT:    popl %ebx
 199 ; X86-NEXT:    retl
 200 ;
 201 ; X64-LABEL: opt_setcc_shl_eq_zero_multiple_shl_users:
 202 ; X64:       # %bb.0:
 203 ; X64-NEXT:    pushq %rbx
 204 ; X64-NEXT:    shldq $17, %rdi, %rsi
 205 ; X64-NEXT:    shlq $17, %rdi
 206 ; X64-NEXT:    movq %rdi, %rax
 207 ; X64-NEXT:    orq %rsi, %rax
 208 ; X64-NEXT:    sete %bl
 209 ; X64-NEXT:    callq use@PLT
 210 ; X64-NEXT:    movl %ebx, %eax
 211 ; X64-NEXT:    popq %rbx
 212 ; X64-NEXT:    retq
 213    %shl = shl i128 %a, 17
 214    %cmp = icmp eq i128 %shl, 0
 215    call void @use(i128 %shl)
 216    ret i1 %cmp
 217 }
 218
 219 ; Check that optimization is applied to DAG having appropriate shape
 220 ; even if there were no actual shift's expansion.
 221 define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
 222 ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
 223 ; X86:       # %bb.0:
 224 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 225 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 226 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
 227 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 228 ; X86-NEXT:    orl %eax, %ecx
 229 ; X86-NEXT:    shldl $17, %eax, %ecx
 230 ; X86-NEXT:    sete %al
 231 ; X86-NEXT:    retl
 232 ;
 233 ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
 234 ; X64:       # %bb.0:
 235 ; X64-NEXT:    shlq $17, %rdi
 236 ; X64-NEXT:    orq %rsi, %rdi
 237 ; X64-NEXT:    sete %al
 238 ; X64-NEXT:    retq
 239   %shl.a = shl i64 %a, 17
 240   %srl.b = lshr i64 %b, 47
 241   %or.0 = or i64 %shl.a, %srl.b
 242   %shl.b = shl i64 %b, 17
 243   %or.1 = or i64 %or.0, %shl.b
 244   %cmp = icmp eq i64 %or.1, 0
 245   ret i1 %cmp
 246 }
 247
 248 ; Negative test: optimization should not be applied as
 249 ; constants used in shifts do not match.
 250 define i1 @opt_setcc_expanded_shl_wrong_shifts(i64 %a, i64 %b) nounwind {
 251 ; X86-LABEL: opt_setcc_expanded_shl_wrong_shifts:
 252 ; X86:       # %bb.0:
 253 ; X86-NEXT:    pushl %esi
 254 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 255 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 256 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 257 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 258 ; X86-NEXT:    shldl $17, %edx, %esi
 259 ; X86-NEXT:    shldl $17, %ecx, %edx
 260 ; X86-NEXT:    shldl $18, %eax, %ecx
 261 ; X86-NEXT:    shll $18, %eax
 262 ; X86-NEXT:    orl %edx, %eax
 263 ; X86-NEXT:    orl %esi, %ecx
 264 ; X86-NEXT:    orl %eax, %ecx
 265 ; X86-NEXT:    sete %al
 266 ; X86-NEXT:    popl %esi
 267 ; X86-NEXT:    retl
 268 ;
 269 ; X64-LABEL: opt_setcc_expanded_shl_wrong_shifts:
 270 ; X64:       # %bb.0:
 271 ; X64-NEXT:    shldq $17, %rsi, %rdi
 272 ; X64-NEXT:    shlq $18, %rsi
 273 ; X64-NEXT:    orq %rdi, %rsi
 274 ; X64-NEXT:    sete %al
 275 ; X64-NEXT:    retq
 276   %shl.a = shl i64 %a, 17
 277   %srl.b = lshr i64 %b, 47
 278   %or.0 = or i64 %shl.a, %srl.b
 279   %shl.b = shl i64 %b, 18
 280   %or.1 = or i64 %or.0, %shl.b
 281   %cmp = icmp eq i64 %or.1, 0
 282   ret i1 %cmp
 283 }
 284
 285 declare void @use(i128 %a)