test/CodeGen/X86/memset-sse-stack-realignment.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; Make sure that we realign the stack. Mingw32 uses 4 byte stack alignment, we
   3 ; need 16 bytes for SSE and 32 bytes for AVX.
   4
   5 ; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium2 | FileCheck %s --check-prefix=NOSSE
   6 ; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=pentium3 | FileCheck %s --check-prefixes=SSE,SSE1
   7 ; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=yonah | FileCheck %s --check-prefixes=SSE,SSE2
   8 ; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=corei7-avx | FileCheck %s --check-prefixes=AVX,AVX1
   9 ; RUN: llc < %s -mtriple=i386-pc-mingw32 -mcpu=core-avx2 | FileCheck %s --check-prefixes=AVX,AVX2
  10
  11 define void @test1(i32 %t) nounwind {
  12 ; NOSSE-LABEL: test1:
  13 ; NOSSE:       # %bb.0:
  14 ; NOSSE-NEXT:    pushl %ebp
  15 ; NOSSE-NEXT:    movl %esp, %ebp
  16 ; NOSSE-NEXT:    subl $32, %esp
  17 ; NOSSE-NEXT:    movl 8(%ebp), %eax
  18 ; NOSSE-NEXT:    movl $0, -4(%ebp)
  19 ; NOSSE-NEXT:    movl $0, -8(%ebp)
  20 ; NOSSE-NEXT:    movl $0, -12(%ebp)
  21 ; NOSSE-NEXT:    movl $0, -16(%ebp)
  22 ; NOSSE-NEXT:    movl $0, -20(%ebp)
  23 ; NOSSE-NEXT:    movl $0, -24(%ebp)
  24 ; NOSSE-NEXT:    movl $0, -28(%ebp)
  25 ; NOSSE-NEXT:    movl $0, -32(%ebp)
  26 ; NOSSE-NEXT:    addl $3, %eax
  27 ; NOSSE-NEXT:    andl $-4, %eax
  28 ; NOSSE-NEXT:    calll __alloca
  29 ; NOSSE-NEXT:    movl %esp, %eax
  30 ; NOSSE-NEXT:    pushl %eax
  31 ; NOSSE-NEXT:    calll _dummy
  32 ; NOSSE-NEXT:    movl %ebp, %esp
  33 ; NOSSE-NEXT:    popl %ebp
  34 ; NOSSE-NEXT:    retl
  35 ;
  36 ; SSE-LABEL: test1:
  37 ; SSE:       # %bb.0:
  38 ; SSE-NEXT:    pushl %ebp
  39 ; SSE-NEXT:    movl %esp, %ebp
  40 ; SSE-NEXT:    pushl %esi
  41 ; SSE-NEXT:    andl $-16, %esp
  42 ; SSE-NEXT:    subl $48, %esp
  43 ; SSE-NEXT:    movl %esp, %esi
  44 ; SSE-NEXT:    movl 8(%ebp), %eax
  45 ; SSE-NEXT:    xorps %xmm0, %xmm0
  46 ; SSE-NEXT:    movaps %xmm0, 16(%esi)
  47 ; SSE-NEXT:    movaps %xmm0, (%esi)
  48 ; SSE-NEXT:    addl $3, %eax
  49 ; SSE-NEXT:    andl $-4, %eax
  50 ; SSE-NEXT:    calll __alloca
  51 ; SSE-NEXT:    movl %esp, %eax
  52 ; SSE-NEXT:    pushl %eax
  53 ; SSE-NEXT:    calll _dummy
  54 ; SSE-NEXT:    leal -4(%ebp), %esp
  55 ; SSE-NEXT:    popl %esi
  56 ; SSE-NEXT:    popl %ebp
  57 ; SSE-NEXT:    retl
  58 ;
  59 ; AVX-LABEL: test1:
  60 ; AVX:       # %bb.0:
  61 ; AVX-NEXT:    pushl %ebp
  62 ; AVX-NEXT:    movl %esp, %ebp
  63 ; AVX-NEXT:    pushl %esi
  64 ; AVX-NEXT:    andl $-32, %esp
  65 ; AVX-NEXT:    subl $64, %esp
  66 ; AVX-NEXT:    movl %esp, %esi
  67 ; AVX-NEXT:    movl 8(%ebp), %eax
  68 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
  69 ; AVX-NEXT:    vmovaps %ymm0, (%esi)
  70 ; AVX-NEXT:    addl $3, %eax
  71 ; AVX-NEXT:    andl $-4, %eax
  72 ; AVX-NEXT:    calll __alloca
  73 ; AVX-NEXT:    movl %esp, %eax
  74 ; AVX-NEXT:    pushl %eax
  75 ; AVX-NEXT:    vzeroupper
  76 ; AVX-NEXT:    calll _dummy
  77 ; AVX-NEXT:    leal -4(%ebp), %esp
  78 ; AVX-NEXT:    popl %esi
  79 ; AVX-NEXT:    popl %ebp
  80 ; AVX-NEXT:    retl
  81   %tmp1210 = alloca i8, i32 32, align 4
  82   call void @llvm.memset.p0i8.i64(i8* align 4 %tmp1210, i8 0, i64 32, i1 false)
  83   %x = alloca i8, i32 %t
  84   call void @dummy(i8* %x)
  85   ret void
  86 }
  87
  88 define void @test2(i32 %t) nounwind {
  89 ; NOSSE-LABEL: test2:
  90 ; NOSSE:       # %bb.0:
  91 ; NOSSE-NEXT:    pushl %ebp
  92 ; NOSSE-NEXT:    movl %esp, %ebp
  93 ; NOSSE-NEXT:    subl $16, %esp
  94 ; NOSSE-NEXT:    movl 8(%ebp), %eax
  95 ; NOSSE-NEXT:    movl $0, -4(%ebp)
  96 ; NOSSE-NEXT:    movl $0, -8(%ebp)
  97 ; NOSSE-NEXT:    movl $0, -12(%ebp)
  98 ; NOSSE-NEXT:    movl $0, -16(%ebp)
  99 ; NOSSE-NEXT:    addl $3, %eax
 100 ; NOSSE-NEXT:    andl $-4, %eax
 101 ; NOSSE-NEXT:    calll __alloca
 102 ; NOSSE-NEXT:    movl %esp, %eax
 103 ; NOSSE-NEXT:    pushl %eax
 104 ; NOSSE-NEXT:    calll _dummy
 105 ; NOSSE-NEXT:    movl %ebp, %esp
 106 ; NOSSE-NEXT:    popl %ebp
 107 ; NOSSE-NEXT:    retl
 108 ;
 109 ; SSE-LABEL: test2:
 110 ; SSE:       # %bb.0:
 111 ; SSE-NEXT:    pushl %ebp
 112 ; SSE-NEXT:    movl %esp, %ebp
 113 ; SSE-NEXT:    pushl %esi
 114 ; SSE-NEXT:    andl $-16, %esp
 115 ; SSE-NEXT:    subl $32, %esp
 116 ; SSE-NEXT:    movl %esp, %esi
 117 ; SSE-NEXT:    movl 8(%ebp), %eax
 118 ; SSE-NEXT:    xorps %xmm0, %xmm0
 119 ; SSE-NEXT:    movaps %xmm0, (%esi)
 120 ; SSE-NEXT:    addl $3, %eax
 121 ; SSE-NEXT:    andl $-4, %eax
 122 ; SSE-NEXT:    calll __alloca
 123 ; SSE-NEXT:    movl %esp, %eax
 124 ; SSE-NEXT:    pushl %eax
 125 ; SSE-NEXT:    calll _dummy
 126 ; SSE-NEXT:    leal -4(%ebp), %esp
 127 ; SSE-NEXT:    popl %esi
 128 ; SSE-NEXT:    popl %ebp
 129 ; SSE-NEXT:    retl
 130 ;
 131 ; AVX-LABEL: test2:
 132 ; AVX:       # %bb.0:
 133 ; AVX-NEXT:    pushl %ebp
 134 ; AVX-NEXT:    movl %esp, %ebp
 135 ; AVX-NEXT:    pushl %esi
 136 ; AVX-NEXT:    andl $-16, %esp
 137 ; AVX-NEXT:    subl $32, %esp
 138 ; AVX-NEXT:    movl %esp, %esi
 139 ; AVX-NEXT:    movl 8(%ebp), %eax
 140 ; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
 141 ; AVX-NEXT:    vmovaps %xmm0, (%esi)
 142 ; AVX-NEXT:    addl $3, %eax
 143 ; AVX-NEXT:    andl $-4, %eax
 144 ; AVX-NEXT:    calll __alloca
 145 ; AVX-NEXT:    movl %esp, %eax
 146 ; AVX-NEXT:    pushl %eax
 147 ; AVX-NEXT:    calll _dummy
 148 ; AVX-NEXT:    leal -4(%ebp), %esp
 149 ; AVX-NEXT:    popl %esi
 150 ; AVX-NEXT:    popl %ebp
 151 ; AVX-NEXT:    retl
 152   %tmp1210 = alloca i8, i32 16, align 4
 153   call void @llvm.memset.p0i8.i64(i8* align 4 %tmp1210, i8 0, i64 16, i1 false)
 154   %x = alloca i8, i32 %t
 155   call void @dummy(i8* %x)
 156   ret void
 157 }
 158
 159 declare void @dummy(i8*)
 160
 161 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind