llvm/test/CodeGen/X86/2008-02-22-LocalRegAllocBug.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -regalloc=fast -optimize-regalloc=0 -mtriple=i686-- -mattr=+mmx | FileCheck %s
   3 ; PR2082
   4 ; Local register allocator was refusing to use ESI, EDI, and EBP so it ran out of
   5 ; registers.
   6 define void @transpose4x4(ptr %dst, ptr %src, i32 %dst_stride, i32 %src_stride) {
   7 ; CHECK-LABEL: transpose4x4:
   8 ; CHECK:       # %bb.0: # %entry
   9 ; CHECK-NEXT:    pushl %ebx
  10 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
  11 ; CHECK-NEXT:    pushl %edi
  12 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
  13 ; CHECK-NEXT:    pushl %esi
  14 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
  15 ; CHECK-NEXT:    .cfi_offset %esi, -16
  16 ; CHECK-NEXT:    .cfi_offset %edi, -12
  17 ; CHECK-NEXT:    .cfi_offset %ebx, -8
  18 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
  19 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
  20 ; CHECK-NEXT:    leal (%ecx,%ecx,2), %edx
  21 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
  22 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
  23 ; CHECK-NEXT:    leal (%edi,%edi,2), %ebx
  24 ; CHECK-NEXT:    #APP
  25 ; CHECK-NEXT:    movd (%esi), %mm0
  26 ; CHECK-NEXT:    movd (%esi,%edi), %mm1
  27 ; CHECK-NEXT:    movd (%esi,%edi,2), %mm2
  28 ; CHECK-NEXT:    movd (%esi,%ebx), %mm3
  29 ; CHECK-NEXT:    punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3]
  30 ; CHECK-NEXT:    punpcklbw %mm3, %mm2 # mm2 = mm2[0],mm3[0],mm2[1],mm3[1],mm2[2],mm3[2],mm2[3],mm3[3]
  31 ; CHECK-NEXT:    movq %mm0, %mm1
  32 ; CHECK-NEXT:    punpcklwd %mm2, %mm0 # mm0 = mm0[0],mm2[0],mm0[1],mm2[1]
  33 ; CHECK-NEXT:    punpckhwd %mm2, %mm1 # mm1 = mm1[2],mm2[2],mm1[3],mm2[3]
  34 ; CHECK-NEXT:    movd %mm0, (%eax)
  35 ; CHECK-NEXT:    punpckhdq %mm0, %mm0 # mm0 = mm0[1,1]
  36 ; CHECK-NEXT:    movd %mm0, (%eax,%ecx)
  37 ; CHECK-NEXT:    movd %mm1, (%eax,%ecx,2)
  38 ; CHECK-NEXT:    punpckhdq %mm1, %mm1 # mm1 = mm1[1,1]
  39 ; CHECK-NEXT:    movd %mm1, (%eax,%edx)
  40 ; CHECK-EMPTY:
  41 ; CHECK-NEXT:    #NO_APP
  42 ; CHECK-NEXT:    popl %esi
  43 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
  44 ; CHECK-NEXT:    popl %edi
  45 ; CHECK-NEXT:    .cfi_def_cfa_offset 8
  46 ; CHECK-NEXT:    popl %ebx
  47 ; CHECK-NEXT:    .cfi_def_cfa_offset 4
  48 ; CHECK-NEXT:    retl
  49 entry:
  50         %dst_addr = alloca ptr          ; <ptr> [#uses=5]
  51         %src_addr = alloca ptr          ; <ptr> [#uses=5]
  52         %dst_stride_addr = alloca i32           ; <ptr> [#uses=4]
  53         %src_stride_addr = alloca i32           ; <ptr> [#uses=4]
  54         %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
  55         store ptr %dst, ptr %dst_addr
  56         store ptr %src, ptr %src_addr
  57         store i32 %dst_stride, ptr %dst_stride_addr
  58         store i32 %src_stride, ptr %src_stride_addr
  59         %tmp = load ptr, ptr %dst_addr, align 4         ; <ptr> [#uses=1]
  60         %tmp1 = getelementptr i8, ptr %tmp, i32 0               ; <ptr> [#uses=1]
  61         %tmp3 = load ptr, ptr %dst_addr, align 4                ; <ptr> [#uses=1]
  62         %tmp4 = load i32, ptr %dst_stride_addr, align 4         ; <i32> [#uses=1]
  63         %tmp5 = getelementptr i8, ptr %tmp3, i32 %tmp4          ; <ptr> [#uses=1]
  64         %tmp7 = load i32, ptr %dst_stride_addr, align 4         ; <i32> [#uses=1]
  65         %tmp8 = mul i32 %tmp7, 2                ; <i32> [#uses=1]
  66         %tmp9 = load ptr, ptr %dst_addr, align 4                ; <ptr> [#uses=1]
  67         %tmp10 = getelementptr i8, ptr %tmp9, i32 %tmp8         ; <ptr> [#uses=1]
  68         %tmp13 = load i32, ptr %dst_stride_addr, align 4                ; <i32> [#uses=1]
  69         %tmp14 = mul i32 %tmp13, 3              ; <i32> [#uses=1]
  70         %tmp15 = load ptr, ptr %dst_addr, align 4               ; <ptr> [#uses=1]
  71         %tmp16 = getelementptr i8, ptr %tmp15, i32 %tmp14               ; <ptr> [#uses=1]
  72         %tmp18 = load ptr, ptr %src_addr, align 4               ; <ptr> [#uses=1]
  73         %tmp19 = getelementptr i8, ptr %tmp18, i32 0            ; <ptr> [#uses=1]
  74         %tmp21 = load ptr, ptr %src_addr, align 4               ; <ptr> [#uses=1]
  75         %tmp22 = load i32, ptr %src_stride_addr, align 4                ; <i32> [#uses=1]
  76         %tmp23 = getelementptr i8, ptr %tmp21, i32 %tmp22               ; <ptr> [#uses=1]
  77         %tmp25 = load i32, ptr %src_stride_addr, align 4                ; <i32> [#uses=1]
  78         %tmp26 = mul i32 %tmp25, 2              ; <i32> [#uses=1]
  79         %tmp27 = load ptr, ptr %src_addr, align 4               ; <ptr> [#uses=1]
  80         %tmp28 = getelementptr i8, ptr %tmp27, i32 %tmp26               ; <ptr> [#uses=1]
  81         %tmp30 = load i32, ptr %src_stride_addr, align 4                ; <i32> [#uses=1]
  82         %tmp31 = mul i32 %tmp30, 3              ; <i32> [#uses=1]
  83         %tmp32 = load ptr, ptr %src_addr, align 4               ; <ptr> [#uses=1]
  84         %tmp33 = getelementptr i8, ptr %tmp32, i32 %tmp31               ; <ptr> [#uses=1]
  85         call void asm sideeffect "movd  $4, %mm0                \0A\09movd  $5, %mm1                \0A\09movd  $6, %mm2                \0A\09movd  $7, %mm3                \0A\09punpcklbw %mm1, %mm0         \0A\09punpcklbw %mm3, %mm2         \0A\09movq %mm0, %mm1              \0A\09punpcklwd %mm2, %mm0         \0A\09punpckhwd %mm2, %mm1         \0A\09movd  %mm0, $0                \0A\09punpckhdq %mm0, %mm0         \0A\09movd  %mm0, $1                \0A\09movd  %mm1, $2                \0A\09punpckhdq %mm1, %mm1         \0A\09movd  %mm1, $3                \0A\09", "=*m,=*m,=*m,=*m,*m,*m,*m,*m,~{dirflag},~{fpsr},~{flags}"( ptr elementtype( i32) %tmp1, ptr elementtype(i32) %tmp5, ptr elementtype(i32) %tmp10, ptr elementtype(i32) %tmp16, ptr elementtype(i32) %tmp19, ptr elementtype(i32) %tmp23, ptr elementtype(i32) %tmp28, ptr elementtype(i32) %tmp33 ) nounwind
  86         br label %return
  87
  88 return:         ; preds = %entry
  89         ret void
  90 }