llvm/test/CodeGen/X86/dagcombine-cse.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86
   3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
   4
   5 define i32 @t(ptr %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
   6 ; X86-LABEL: t:
   7 ; X86:       ## %bb.0: ## %entry
   8 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
   9 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  10 ; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
  11 ; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
  12 ; X86-NEXT:    movl (%eax,%ecx), %eax
  13 ; X86-NEXT:    retl
  14 ;
  15 ; X64-LABEL: t:
  16 ; X64:       ## %bb.0: ## %entry
  17 ; X64-NEXT:    imull %ecx, %esi
  18 ; X64-NEXT:    addl %edx, %esi
  19 ; X64-NEXT:    movslq %esi, %rax
  20 ; X64-NEXT:    movl (%rdi,%rax), %eax
  21 ; X64-NEXT:    retq
  22 entry:
  23         %tmp7 = mul i32 %idxY, %ref_frame_stride                ; <i32> [#uses=2]
  24         %tmp9 = add i32 %tmp7, %idxX            ; <i32> [#uses=1]
  25         %tmp11 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp9                ; <ptr> [#uses=1]
  26         %tmp13 = load i32, ptr %tmp11, align 4          ; <i32> [#uses=1]
  27         %tmp18 = add i32 %idxX, 4               ; <i32> [#uses=1]
  28         %tmp20.sum = add i32 %tmp18, %tmp7              ; <i32> [#uses=1]
  29         %tmp21 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp20.sum           ; <ptr> [#uses=1]
  30         %tmp23 = load i16, ptr %tmp21, align 2          ; <i16> [#uses=1]
  31         %tmp2425 = zext i16 %tmp23 to i64               ; <i64> [#uses=1]
  32         %tmp26 = shl i64 %tmp2425, 32           ; <i64> [#uses=1]
  33         %tmp2728 = zext i32 %tmp13 to i64               ; <i64> [#uses=1]
  34         %tmp29 = or i64 %tmp26, %tmp2728                ; <i64> [#uses=1]
  35         %tmp3454 = bitcast i64 %tmp29 to double         ; <double> [#uses=1]
  36         %tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0               ; <<2 x double>> [#uses=1]
  37         %tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1          ; <<2 x double>> [#uses=1]
  38         %tmp42 = bitcast <2 x double> %tmp36 to <8 x i16>               ; <<8 x i16>> [#uses=1]
  39         %tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 >          ; <<8 x i16>> [#uses=1]
  40         %tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32>          ; <<4 x i32>> [#uses=1]
  41         %tmp48 = extractelement <4 x i32> %tmp47, i32 0         ; <i32> [#uses=1]
  42         ret i32 %tmp48
  43 }
  44
  45 ; Test CSE for SDAG nodes with multiple results (UMUL_LOHI).
  46 define i96 @square_high(i96 %x) nounwind {
  47 ; X86-LABEL: square_high:
  48 ; X86:       ## %bb.0: ## %entry
  49 ; X86-NEXT:    pushl %ebp
  50 ; X86-NEXT:    pushl %ebx
  51 ; X86-NEXT:    pushl %edi
  52 ; X86-NEXT:    pushl %esi
  53 ; X86-NEXT:    pushl %eax
  54 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
  55 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
  56 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
  57 ; X86-NEXT:    movl %edi, %eax
  58 ; X86-NEXT:    mull %edi
  59 ; X86-NEXT:    movl %edx, %ecx
  60 ; X86-NEXT:    movl %ebx, %eax
  61 ; X86-NEXT:    mull %edi
  62 ; X86-NEXT:    addl %eax, %ecx
  63 ; X86-NEXT:    movl %edx, %ebp
  64 ; X86-NEXT:    adcl $0, %ebp
  65 ; X86-NEXT:    addl %eax, %ecx
  66 ; X86-NEXT:    adcl %edx, %ebp
  67 ; X86-NEXT:    setb %al
  68 ; X86-NEXT:    movzbl %al, %ecx
  69 ; X86-NEXT:    movl %ebx, %eax
  70 ; X86-NEXT:    mull %ebx
  71 ; X86-NEXT:    movl %eax, %ebx
  72 ; X86-NEXT:    addl %ebp, %ebx
  73 ; X86-NEXT:    adcl %edx, %ecx
  74 ; X86-NEXT:    movl %esi, %eax
  75 ; X86-NEXT:    mull %edi
  76 ; X86-NEXT:    movl %edx, (%esp) ## 4-byte Spill
  77 ; X86-NEXT:    movl %eax, %ebp
  78 ; X86-NEXT:    movl %esi, %eax
  79 ; X86-NEXT:    mull {{[0-9]+}}(%esp)
  80 ; X86-NEXT:    movl %edx, %esi
  81 ; X86-NEXT:    movl %eax, %edi
  82 ; X86-NEXT:    addl (%esp), %edi ## 4-byte Folded Reload
  83 ; X86-NEXT:    adcl $0, %esi
  84 ; X86-NEXT:    addl %ebp, %ebx
  85 ; X86-NEXT:    adcl %edi, %ecx
  86 ; X86-NEXT:    movl %esi, %eax
  87 ; X86-NEXT:    adcl $0, %eax
  88 ; X86-NEXT:    setb %dl
  89 ; X86-NEXT:    addl %ebp, %ebx
  90 ; X86-NEXT:    adcl %ecx, %edi
  91 ; X86-NEXT:    movzbl %dl, %ecx
  92 ; X86-NEXT:    adcl %eax, %esi
  93 ; X86-NEXT:    adcl $0, %ecx
  94 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
  95 ; X86-NEXT:    mull %eax
  96 ; X86-NEXT:    addl %eax, %esi
  97 ; X86-NEXT:    adcl %edx, %ecx
  98 ; X86-NEXT:    movl %edi, %eax
  99 ; X86-NEXT:    movl %esi, %edx
 100 ; X86-NEXT:    addl $4, %esp
 101 ; X86-NEXT:    popl %esi
 102 ; X86-NEXT:    popl %edi
 103 ; X86-NEXT:    popl %ebx
 104 ; X86-NEXT:    popl %ebp
 105 ; X86-NEXT:    retl
 106 ;
 107 ; X64-LABEL: square_high:
 108 ; X64:       ## %bb.0: ## %entry
 109 ; X64-NEXT:    movl %esi, %ecx
 110 ; X64-NEXT:    movq %rcx, %rax
 111 ; X64-NEXT:    mulq %rdi
 112 ; X64-NEXT:    movq %rdx, %rsi
 113 ; X64-NEXT:    movq %rax, %r8
 114 ; X64-NEXT:    movq %rdi, %rax
 115 ; X64-NEXT:    mulq %rdi
 116 ; X64-NEXT:    addq %r8, %rdx
 117 ; X64-NEXT:    movq %rsi, %rax
 118 ; X64-NEXT:    adcq $0, %rax
 119 ; X64-NEXT:    addq %rdx, %r8
 120 ; X64-NEXT:    adcq %rsi, %rax
 121 ; X64-NEXT:    imulq %rcx, %rcx
 122 ; X64-NEXT:    addq %rax, %rcx
 123 ; X64-NEXT:    shrdq $32, %rcx, %r8
 124 ; X64-NEXT:    shrq $32, %rcx
 125 ; X64-NEXT:    movq %r8, %rax
 126 ; X64-NEXT:    movq %rcx, %rdx
 127 ; X64-NEXT:    retq
 128 entry:
 129   %conv = zext i96 %x to i192
 130   %mul = mul nuw i192 %conv, %conv
 131   %shr = lshr i192 %mul, 96
 132   %conv2 = trunc i192 %shr to i96
 133   ret i96 %conv2
 134 }