1 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
2 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
4 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
6 ; While we don't support varargs with fastcall, we do support forwarding.
8 @asdf = internal constant [4 x i8] c"asdf"
10 declare void @puts(ptr)
12 define i32 @call_fast_thunk() {
13 %r = call x86_fastcallcc i32 (...) @fast_thunk(i32 inreg 1, i32 inreg 2, i32 3)
17 define x86_fastcallcc i32 @fast_thunk(...) {
18 call void @puts(ptr @asdf)
19 %r = musttail call x86_fastcallcc i32 (...) @fast_target (...)
23 ; Check that we spill and fill around the call to puts.
25 ; CHECK-LABEL: @fast_thunk@0:
26 ; CHECK-DAG: movl %ecx, {{.*}}
27 ; CHECK-DAG: movl %edx, {{.*}}
29 ; CHECK-DAG: movl {{.*}}, %ecx
30 ; CHECK-DAG: movl {{.*}}, %edx
31 ; CHECK: jmp @fast_target@12
33 define x86_fastcallcc i32 @fast_target(i32 inreg %a, i32 inreg %b, i32 %c) {
39 ; Repeat the test for vectorcall, which has XMM registers.
41 define i32 @call_vector_thunk() {
42 %r = call x86_vectorcallcc i32 (...) @vector_thunk(i32 inreg 1, i32 inreg 2, i32 3)
46 define x86_vectorcallcc i32 @vector_thunk(...) {
47 call void @puts(ptr @asdf)
48 %r = musttail call x86_vectorcallcc i32 (...) @vector_target (...)
52 ; Check that we spill and fill SSE registers around the call to puts.
54 ; CHECK-LABEL: vector_thunk@@0:
55 ; CHECK-DAG: movl %ecx, {{.*}}
56 ; CHECK-DAG: movl %edx, {{.*}}
58 ; SSE2-DAG: movups %xmm0, {{.*}}
59 ; SSE2-DAG: movups %xmm1, {{.*}}
60 ; SSE2-DAG: movups %xmm2, {{.*}}
61 ; SSE2-DAG: movups %xmm3, {{.*}}
62 ; SSE2-DAG: movups %xmm4, {{.*}}
63 ; SSE2-DAG: movups %xmm5, {{.*}}
65 ; AVX-DAG: vmovups %ymm0, {{.*}}
66 ; AVX-DAG: vmovups %ymm1, {{.*}}
67 ; AVX-DAG: vmovups %ymm2, {{.*}}
68 ; AVX-DAG: vmovups %ymm3, {{.*}}
69 ; AVX-DAG: vmovups %ymm4, {{.*}}
70 ; AVX-DAG: vmovups %ymm5, {{.*}}
72 ; AVX512-DAG: vmovups %zmm0, {{.*}}
73 ; AVX512-DAG: vmovups %zmm1, {{.*}}
74 ; AVX512-DAG: vmovups %zmm2, {{.*}}
75 ; AVX512-DAG: vmovups %zmm3, {{.*}}
76 ; AVX512-DAG: vmovups %zmm4, {{.*}}
77 ; AVX512-DAG: vmovups %zmm5, {{.*}}
81 ; SSE2-DAG: movups {{.*}}, %xmm0
82 ; SSE2-DAG: movups {{.*}}, %xmm1
83 ; SSE2-DAG: movups {{.*}}, %xmm2
84 ; SSE2-DAG: movups {{.*}}, %xmm3
85 ; SSE2-DAG: movups {{.*}}, %xmm4
86 ; SSE2-DAG: movups {{.*}}, %xmm5
88 ; AVX-DAG: vmovups {{.*}}, %ymm0
89 ; AVX-DAG: vmovups {{.*}}, %ymm1
90 ; AVX-DAG: vmovups {{.*}}, %ymm2
91 ; AVX-DAG: vmovups {{.*}}, %ymm3
92 ; AVX-DAG: vmovups {{.*}}, %ymm4
93 ; AVX-DAG: vmovups {{.*}}, %ymm5
95 ; AVX512-DAG: vmovups {{.*}}, %zmm0
96 ; AVX512-DAG: vmovups {{.*}}, %zmm1
97 ; AVX512-DAG: vmovups {{.*}}, %zmm2
98 ; AVX512-DAG: vmovups {{.*}}, %zmm3
99 ; AVX512-DAG: vmovups {{.*}}, %zmm4
100 ; AVX512-DAG: vmovups {{.*}}, %zmm5
102 ; CHECK-DAG: movl {{.*}}, %ecx
103 ; CHECK-DAG: movl {{.*}}, %edx
104 ; CHECK: jmp vector_target@@12
106 define x86_vectorcallcc i32 @vector_target(i32 inreg %a, i32 inreg %b, i32 %c) {
108 %a1 = add i32 %a0, %c
112 ; Repeat the test for vectorcall, which has XMM registers.
114 define i32 @call_vector_thunk_prefer256() "min-legal-vector-width"="256" "prefer-vector-width"="256" {
115 %r = call x86_vectorcallcc i32 (...) @vector_thunk_prefer256(i32 inreg 1, i32 inreg 2, i32 3)
119 define x86_vectorcallcc i32 @vector_thunk_prefer256(...) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
120 call void @puts(ptr @asdf)
121 %r = musttail call x86_vectorcallcc i32 (...) @vector_target_prefer256 (...)
125 ; Check that we spill and fill SSE registers around the call to puts.
127 ; CHECK-LABEL: vector_thunk_prefer256@@0:
128 ; CHECK-DAG: movl %ecx, {{.*}}
129 ; CHECK-DAG: movl %edx, {{.*}}
131 ; SSE2-DAG: movups %xmm0, {{.*}}
132 ; SSE2-DAG: movups %xmm1, {{.*}}
133 ; SSE2-DAG: movups %xmm2, {{.*}}
134 ; SSE2-DAG: movups %xmm3, {{.*}}
135 ; SSE2-DAG: movups %xmm4, {{.*}}
136 ; SSE2-DAG: movups %xmm5, {{.*}}
138 ; AVX-DAG: vmovups %ymm0, {{.*}}
139 ; AVX-DAG: vmovups %ymm1, {{.*}}
140 ; AVX-DAG: vmovups %ymm2, {{.*}}
141 ; AVX-DAG: vmovups %ymm3, {{.*}}
142 ; AVX-DAG: vmovups %ymm4, {{.*}}
143 ; AVX-DAG: vmovups %ymm5, {{.*}}
145 ; AVX512F-DAG: vmovups %zmm0, {{.*}}
146 ; AVX512F-DAG: vmovups %zmm1, {{.*}}
147 ; AVX512F-DAG: vmovups %zmm2, {{.*}}
148 ; AVX512F-DAG: vmovups %zmm3, {{.*}}
149 ; AVX512F-DAG: vmovups %zmm4, {{.*}}
150 ; AVX512F-DAG: vmovups %zmm5, {{.*}}
152 ; AVX512VL-DAG: vmovups %ymm0, {{.*}}
153 ; AVX512VL-DAG: vmovups %ymm1, {{.*}}
154 ; AVX512VL-DAG: vmovups %ymm2, {{.*}}
155 ; AVX512VL-DAG: vmovups %ymm3, {{.*}}
156 ; AVX512VL-DAG: vmovups %ymm4, {{.*}}
157 ; AVX512VL-DAG: vmovups %ymm5, {{.*}}
161 ; SSE2-DAG: movups {{.*}}, %xmm0
162 ; SSE2-DAG: movups {{.*}}, %xmm1
163 ; SSE2-DAG: movups {{.*}}, %xmm2
164 ; SSE2-DAG: movups {{.*}}, %xmm3
165 ; SSE2-DAG: movups {{.*}}, %xmm4
166 ; SSE2-DAG: movups {{.*}}, %xmm5
168 ; AVX-DAG: vmovups {{.*}}, %ymm0
169 ; AVX-DAG: vmovups {{.*}}, %ymm1
170 ; AVX-DAG: vmovups {{.*}}, %ymm2
171 ; AVX-DAG: vmovups {{.*}}, %ymm3
172 ; AVX-DAG: vmovups {{.*}}, %ymm4
173 ; AVX-DAG: vmovups {{.*}}, %ymm5
175 ; AVX512F-DAG: vmovups {{.*}}, %zmm0
176 ; AVX512F-DAG: vmovups {{.*}}, %zmm1
177 ; AVX512F-DAG: vmovups {{.*}}, %zmm2
178 ; AVX512F-DAG: vmovups {{.*}}, %zmm3
179 ; AVX512F-DAG: vmovups {{.*}}, %zmm4
180 ; AVX512F-DAG: vmovups {{.*}}, %zmm5
182 ; AVX512VL-DAG: vmovups {{.*}}, %ymm0
183 ; AVX512VL-DAG: vmovups {{.*}}, %ymm1
184 ; AVX512VL-DAG: vmovups {{.*}}, %ymm2
185 ; AVX512VL-DAG: vmovups {{.*}}, %ymm3
186 ; AVX512VL-DAG: vmovups {{.*}}, %ymm4
187 ; AVX512VL-DAG: vmovups {{.*}}, %ymm5
189 ; CHECK-DAG: movl {{.*}}, %ecx
190 ; CHECK-DAG: movl {{.*}}, %edx
191 ; CHECK: jmp vector_target_prefer256@@12
193 define x86_vectorcallcc i32 @vector_target_prefer256(i32 inreg %a, i32 inreg %b, i32 %c) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
195 %a1 = add i32 %a0, %c