1 ; We specify -mcpu explicitly to avoid instruction reordering that happens on
2 ; some setups (e.g., Atom) from affecting the output.
3 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
4 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
5 ; RUN: llc < %s -mcpu=core2 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
6 ; RUN: llc < %s -mcpu=core2 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
7 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-win32 | FileCheck %s -check-prefix=WIN32
8 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-mingw32 | FileCheck %s -check-prefix=MINGW_X86
9 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i686-pc-cygwin | FileCheck %s -check-prefix=CYGWIN
10 ; RUN: llc < %s -mcpu=core2 -O0 -mtriple=i386-pc-linux | FileCheck %s -check-prefix=LINUX
12 ; The SysV ABI used by most Unixes and Mingw on x86 specifies that an sret pointer
13 ; is callee-cleanup. However, in MSVC's cdecl calling convention, sret pointer
14 ; arguments are caller-cleanup like normal arguments.
16 define void @sret1(i8* sret %x) nounwind {
18 ; WIN32-LABEL: _sret1:
19 ; WIN32: movb $42, ({{%e[abcd]x}})
20 ; WIN32-NOT: popl %eax
23 ; MINGW_X86-LABEL: _sret1:
24 ; MINGW_X86: {{retl$}}
26 ; CYGWIN-LABEL: _sret1:
32 store i8 42, i8* %x, align 4
36 define void @sret2(i8* sret %x, i8 %y) nounwind {
38 ; WIN32-LABEL: _sret2:
39 ; WIN32: movb {{.*}}, ({{%e[abcd]x}})
40 ; WIN32-NOT: popl %eax
43 ; MINGW_X86-LABEL: _sret2:
44 ; MINGW_X86: {{retl$}}
46 ; CYGWIN-LABEL: _sret2:
56 define void @sret3(i8* sret %x, i8* %y) nounwind {
58 ; WIN32-LABEL: _sret3:
59 ; WIN32: movb $42, ([[REG1:%e[abcd]x]])
60 ; WIN32-NOT: movb $13, ([[REG1]])
61 ; WIN32-NOT: popl %eax
64 ; MINGW_X86-LABEL: _sret3:
65 ; MINGW_X86: {{retl$}}
67 ; CYGWIN-LABEL: _sret3:
79 %struct.S4 = type { i32, i32, i32 }
81 define void @sret4(%struct.S4* noalias sret %agg.result) {
83 ; WIN32-LABEL: _sret4:
84 ; WIN32: movl $42, ({{%e[abcd]x}})
85 ; WIN32-NOT: popl %eax
88 ; MINGW_X86-LABEL: _sret4:
89 ; MINGW_X86: {{retl$}}
91 ; CYGWIN-LABEL: _sret4:
97 %x = getelementptr inbounds %struct.S4, %struct.S4* %agg.result, i32 0, i32 0
98 store i32 42, i32* %x, align 4
102 %struct.S5 = type { i32 }
103 %class.C5 = type { i8 }
105 define x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* noalias sret %agg.result, %class.C5* %this) {
107 %this.addr = alloca %class.C5*, align 4
108 store %class.C5* %this, %class.C5** %this.addr, align 4
109 %this1 = load %class.C5*, %class.C5** %this.addr
110 %x = getelementptr inbounds %struct.S5, %struct.S5* %agg.result, i32 0, i32 0
111 store i32 42, i32* %x, align 4
113 ; WIN32-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
114 ; MINGW_X86-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
115 ; CYGWIN-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
116 ; LINUX-LABEL: {{^}}"?foo@C5@@QAE?AUS5@@XZ":
118 ; The address of the return structure is passed as an implicit parameter.
119 ; In the -O0 build, %eax is spilled at the beginning of the function, hence we
120 ; should match both 4(%esp) and 8(%esp).
121 ; WIN32: {{[48]}}(%esp), [[REG:%e[abcd]x]]
122 ; WIN32: movl $42, ([[REG]])
126 define void @call_foo5() {
128 %c = alloca %class.C5, align 1
129 %s = alloca %struct.S5, align 4
130 call x86_thiscallcc void @"\01?foo@C5@@QAE?AUS5@@XZ"(%struct.S5* sret %s, %class.C5* %c)
131 ; WIN32-LABEL: {{^}}_call_foo5:
132 ; MINGW_X86-LABEL: {{^}}_call_foo5:
133 ; CYGWIN-LABEL: {{^}}_call_foo5:
134 ; LINUX-LABEL: {{^}}call_foo5:
137 ; Load the address of the result and put it onto stack
138 ; The this pointer goes to ECX.
139 ; (through %ecx in the -O0 build).
140 ; WIN32-DAG: leal {{[0-9]*}}(%esp), %e{{[a-d]}}x
141 ; WIN32-DAG: {{leal [1-9]+\(%esp\)|movl %esp}}, %ecx
142 ; WIN32-DAG: {{pushl %e[a-d]x|movl %e[a-d]x, \(%esp\)}}
143 ; WIN32-NEXT: calll "?foo@C5@@QAE?AUS5@@XZ"
149 %struct.test6 = type { i32, i32, i32 }
150 define void @test6_f(%struct.test6* %x) nounwind {
151 ; WIN32-LABEL: _test6_f:
152 ; MINGW_X86-LABEL: _test6_f:
153 ; CYGWIN-LABEL: _test6_f:
154 ; LINUX-LABEL: test6_f:
156 ; The %x argument is moved to %ecx. It will be the this pointer.
157 ; WIN32-DAG: movl {{16|20}}(%esp), %ecx
160 ; The sret pointer is (%esp)
161 ; WIN32-DAG: {{leal 4\(%esp\)|movl %esp}}, %eax
162 ; WIN32-DAG: {{pushl %eax|movl %eax, \(%esp\)}}
164 ; The sret pointer is %ecx
165 ; The %x argument is moved to (%esp). It will be the this pointer.
166 ; MINGW_X86-DAG: {{leal 4\(%esp\)|movl %esp}}, %ecx
167 ; MINGW_X86-DAG: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}}
168 ; MINGW_X86-NEXT: calll _test6_g
170 ; CYGWIN-DAG: {{leal 4\(%esp\)|movl %esp}}, %ecx
171 ; CYGWIN-DAG: {{pushl 16\(%esp\)|movl %eax, \(%esp\)}}
172 ; CYGWIN-NEXT: calll _test6_g
174 %tmp = alloca %struct.test6, align 4
175 call x86_thiscallcc void @test6_g(%struct.test6* sret %tmp, %struct.test6* %x)
178 declare x86_thiscallcc void @test6_g(%struct.test6* sret, %struct.test6*)
180 ; Flipping the parameters at the IR level generates the same code.
181 %struct.test7 = type { i32, i32, i32 }
182 define void @test7_f(%struct.test7* %x) nounwind {
183 ; WIN32-LABEL: _test7_f:
184 ; MINGW_X86-LABEL: _test7_f:
185 ; CYGWIN-LABEL: _test7_f:
186 ; LINUX-LABEL: test7_f:
188 ; The %x argument is moved to %ecx on all OSs. It will be the this pointer.
189 ; WIN32: movl {{16|20}}(%esp), %ecx
190 ; MINGW_X86: movl {{16|20}}(%esp), %ecx
191 ; CYGWIN: movl {{16|20}}(%esp), %ecx
193 ; The sret pointer is (%esp)
194 ; WIN32: {{leal 4\(%esp\)|movl %esp}}, %eax
195 ; WIN32-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
196 ; MINGW_X86: {{leal 4\(%esp\)|movl %esp}}, %eax
197 ; MINGW_X86-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
198 ; CYGWIN: {{leal 4\(%esp\)|movl %esp}}, %eax
199 ; CYGWIN-NEXT: {{pushl %eax|movl %eax, \(%esp\)}}
201 %tmp = alloca %struct.test7, align 4
202 call x86_thiscallcc void @test7_g(%struct.test7* %x, %struct.test7* sret %tmp)
206 define x86_thiscallcc void @test7_g(%struct.test7* %in, %struct.test7* sret %out) {
207 %s = getelementptr %struct.test7, %struct.test7* %in, i32 0, i32 0
208 %d = getelementptr %struct.test7, %struct.test7* %out, i32 0, i32 0
209 %v = load i32, i32* %s
210 store i32 %v, i32* %d
211 call void @clobber_eax()
214 ; Make sure we return the second parameter in %eax.
215 ; WIN32-LABEL: _test7_g:
216 ; WIN32: calll _clobber_eax
217 ; WIN32: movl {{.*}}, %eax
221 declare void @clobber_eax()
223 ; Test what happens if the first parameter has to be split by codegen.
224 ; Realistically, no frontend will generate code like this, but here it is for
226 define void @test8_f(i64 inreg %a, i64* sret %out) {
227 store i64 %a, i64* %out
228 call void @clobber_eax()
231 ; WIN32-LABEL: _test8_f:
232 ; WIN32: movl {{[0-9]+}}(%esp), %[[out:[a-z]+]]
233 ; WIN32-DAG: movl {{%e[abcd]x}}, 4(%[[out]])
234 ; WIN32-DAG: movl {{%e[abcd]x}}, (%[[out]])
235 ; WIN32: calll _clobber_eax
236 ; WIN32: movl {{.*}}, %eax