llvm/test/CodeGen/X86/musttail-fastcall.ll

   1 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
   2 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
   3 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
   4 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
   5
   6 ; While we don't support varargs with fastcall, we do support forwarding.
   7
   8 @asdf = internal constant [4 x i8] c"asdf"
   9
  10 declare void @puts(ptr)
  11
  12 define i32 @call_fast_thunk() {
  13   %r = call x86_fastcallcc i32 (...) @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
  14   ret i32 %r
  15 }
  16
  17 define x86_fastcallcc i32 @fast_thunk(...) {
  18   call void @puts(ptr @asdf)
  19   %r = musttail call x86_fastcallcc i32 (...) @fast_target (...)
  20   ret i32 %r
  21 }
  22
  23 ; Check that we spill and fill around the call to puts.
  24
  25 ; CHECK-LABEL: @fast_thunk@0:
  26 ; CHECK-DAG: movl %ecx, {{.*}}
  27 ; CHECK-DAG: movl %edx, {{.*}}
  28 ; CHECK: calll _puts
  29 ; CHECK-DAG: movl {{.*}}, %ecx
  30 ; CHECK-DAG: movl {{.*}}, %edx
  31 ; CHECK: jmp @fast_target@12
  32
  33 define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
  34   %a0 = add i32 %a, %b
  35   %a1 = add i32 %a0, %c
  36   ret i32 %a1
  37 }
  38
  39 ; Repeat the test for vectorcall, which has XMM registers.
  40
  41 define i32 @call_vector_thunk() {
  42   %r = call x86_vectorcallcc i32 (...) @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
  43   ret i32 %r
  44 }
  45
  46 define x86_vectorcallcc i32 @vector_thunk(...) {
  47   call void @puts(ptr @asdf)
  48   %r = musttail call x86_vectorcallcc i32 (...) @vector_target (...)
  49   ret i32 %r
  50 }
  51
  52 ; Check that we spill and fill SSE registers around the call to puts.
  53
  54 ; CHECK-LABEL: vector_thunk@@0:
  55 ; CHECK-DAG: movl %ecx, {{.*}}
  56 ; CHECK-DAG: movl %edx, {{.*}}
  57
  58 ; SSE2-DAG: movups %xmm0, {{.*}}
  59 ; SSE2-DAG: movups %xmm1, {{.*}}
  60 ; SSE2-DAG: movups %xmm2, {{.*}}
  61 ; SSE2-DAG: movups %xmm3, {{.*}}
  62 ; SSE2-DAG: movups %xmm4, {{.*}}
  63 ; SSE2-DAG: movups %xmm5, {{.*}}
  64
  65 ; AVX-DAG: vmovups %ymm0, {{.*}}
  66 ; AVX-DAG: vmovups %ymm1, {{.*}}
  67 ; AVX-DAG: vmovups %ymm2, {{.*}}
  68 ; AVX-DAG: vmovups %ymm3, {{.*}}
  69 ; AVX-DAG: vmovups %ymm4, {{.*}}
  70 ; AVX-DAG: vmovups %ymm5, {{.*}}
  71
  72 ; AVX512-DAG: vmovups %zmm0, {{.*}}
  73 ; AVX512-DAG: vmovups %zmm1, {{.*}}
  74 ; AVX512-DAG: vmovups %zmm2, {{.*}}
  75 ; AVX512-DAG: vmovups %zmm3, {{.*}}
  76 ; AVX512-DAG: vmovups %zmm4, {{.*}}
  77 ; AVX512-DAG: vmovups %zmm5, {{.*}}
  78
  79 ; CHECK: calll _puts
  80
  81 ; SSE2-DAG: movups {{.*}}, %xmm0
  82 ; SSE2-DAG: movups {{.*}}, %xmm1
  83 ; SSE2-DAG: movups {{.*}}, %xmm2
  84 ; SSE2-DAG: movups {{.*}}, %xmm3
  85 ; SSE2-DAG: movups {{.*}}, %xmm4
  86 ; SSE2-DAG: movups {{.*}}, %xmm5
  87
  88 ; AVX-DAG: vmovups {{.*}}, %ymm0
  89 ; AVX-DAG: vmovups {{.*}}, %ymm1
  90 ; AVX-DAG: vmovups {{.*}}, %ymm2
  91 ; AVX-DAG: vmovups {{.*}}, %ymm3
  92 ; AVX-DAG: vmovups {{.*}}, %ymm4
  93 ; AVX-DAG: vmovups {{.*}}, %ymm5
  94
  95 ; AVX512-DAG: vmovups {{.*}}, %zmm0
  96 ; AVX512-DAG: vmovups {{.*}}, %zmm1
  97 ; AVX512-DAG: vmovups {{.*}}, %zmm2
  98 ; AVX512-DAG: vmovups {{.*}}, %zmm3
  99 ; AVX512-DAG: vmovups {{.*}}, %zmm4
 100 ; AVX512-DAG: vmovups {{.*}}, %zmm5
 101
 102 ; CHECK-DAG: movl {{.*}}, %ecx
 103 ; CHECK-DAG: movl {{.*}}, %edx
 104 ; CHECK: jmp vector_target@@12
 105
 106 define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) {
 107   %a0 = add i32 %a, %b
 108   %a1 = add i32 %a0, %c
 109   ret i32 %a1
 110 }
 111
 112 ; Repeat the test for vectorcall, which has XMM registers.
 113
 114 define i32 @call_vector_thunk_prefer256() "min-legal-vector-width"="256" "prefer-vector-width"="256" {
 115   %r = call x86_vectorcallcc i32 (...) @vector_thunk_prefer256(i32 inreg 1, i32 inreg 2, i32 3)
 116   ret i32 %r
 117 }
 118
 119 define x86_vectorcallcc i32 @vector_thunk_prefer256(...) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
 120   call void @puts(ptr @asdf)
 121   %r = musttail call x86_vectorcallcc i32 (...) @vector_target_prefer256 (...)
 122   ret i32 %r
 123 }
 124
 125 ; Check that we spill and fill SSE registers around the call to puts.
 126
 127 ; CHECK-LABEL: vector_thunk_prefer256@@0:
 128 ; CHECK-DAG: movl %ecx, {{.*}}
 129 ; CHECK-DAG: movl %edx, {{.*}}
 130
 131 ; SSE2-DAG: movups %xmm0, {{.*}}
 132 ; SSE2-DAG: movups %xmm1, {{.*}}
 133 ; SSE2-DAG: movups %xmm2, {{.*}}
 134 ; SSE2-DAG: movups %xmm3, {{.*}}
 135 ; SSE2-DAG: movups %xmm4, {{.*}}
 136 ; SSE2-DAG: movups %xmm5, {{.*}}
 137
 138 ; AVX-DAG: vmovups %ymm0, {{.*}}
 139 ; AVX-DAG: vmovups %ymm1, {{.*}}
 140 ; AVX-DAG: vmovups %ymm2, {{.*}}
 141 ; AVX-DAG: vmovups %ymm3, {{.*}}
 142 ; AVX-DAG: vmovups %ymm4, {{.*}}
 143 ; AVX-DAG: vmovups %ymm5, {{.*}}
 144
 145 ; AVX512F-DAG: vmovups %zmm0, {{.*}}
 146 ; AVX512F-DAG: vmovups %zmm1, {{.*}}
 147 ; AVX512F-DAG: vmovups %zmm2, {{.*}}
 148 ; AVX512F-DAG: vmovups %zmm3, {{.*}}
 149 ; AVX512F-DAG: vmovups %zmm4, {{.*}}
 150 ; AVX512F-DAG: vmovups %zmm5, {{.*}}
 151
 152 ; AVX512VL-DAG: vmovups %ymm0, {{.*}}
 153 ; AVX512VL-DAG: vmovups %ymm1, {{.*}}
 154 ; AVX512VL-DAG: vmovups %ymm2, {{.*}}
 155 ; AVX512VL-DAG: vmovups %ymm3, {{.*}}
 156 ; AVX512VL-DAG: vmovups %ymm4, {{.*}}
 157 ; AVX512VL-DAG: vmovups %ymm5, {{.*}}
 158
 159 ; CHECK: calll _puts
 160
 161 ; SSE2-DAG: movups {{.*}}, %xmm0
 162 ; SSE2-DAG: movups {{.*}}, %xmm1
 163 ; SSE2-DAG: movups {{.*}}, %xmm2
 164 ; SSE2-DAG: movups {{.*}}, %xmm3
 165 ; SSE2-DAG: movups {{.*}}, %xmm4
 166 ; SSE2-DAG: movups {{.*}}, %xmm5
 167
 168 ; AVX-DAG: vmovups {{.*}}, %ymm0
 169 ; AVX-DAG: vmovups {{.*}}, %ymm1
 170 ; AVX-DAG: vmovups {{.*}}, %ymm2
 171 ; AVX-DAG: vmovups {{.*}}, %ymm3
 172 ; AVX-DAG: vmovups {{.*}}, %ymm4
 173 ; AVX-DAG: vmovups {{.*}}, %ymm5
 174
 175 ; AVX512F-DAG: vmovups {{.*}}, %zmm0
 176 ; AVX512F-DAG: vmovups {{.*}}, %zmm1
 177 ; AVX512F-DAG: vmovups {{.*}}, %zmm2
 178 ; AVX512F-DAG: vmovups {{.*}}, %zmm3
 179 ; AVX512F-DAG: vmovups {{.*}}, %zmm4
 180 ; AVX512F-DAG: vmovups {{.*}}, %zmm5
 181
 182 ; AVX512VL-DAG: vmovups {{.*}}, %ymm0
 183 ; AVX512VL-DAG: vmovups {{.*}}, %ymm1
 184 ; AVX512VL-DAG: vmovups {{.*}}, %ymm2
 185 ; AVX512VL-DAG: vmovups {{.*}}, %ymm3
 186 ; AVX512VL-DAG: vmovups {{.*}}, %ymm4
 187 ; AVX512VL-DAG: vmovups {{.*}}, %ymm5
 188
 189 ; CHECK-DAG: movl {{.*}}, %ecx
 190 ; CHECK-DAG: movl {{.*}}, %edx
 191 ; CHECK: jmp vector_target_prefer256@@12
 192
 193 define x86_vectorcallcc i32 @vector_target_prefer256(i32 inreg %a, i32 inreg %b, i32 %c) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
 194   %a0 = add i32 %a, %b
 195   %a1 = add i32 %a0, %c
 196   ret i32 %a1
 197 }