1 ; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN32 %s
2 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=nehalem | FileCheck -check-prefix=WIN64 %s
3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=nehalem | FileCheck -check-prefix=NOT_WIN %s
5 declare <16 x float> @func_float16_ptr(<16 x float>, <16 x float> *)
6 declare <16 x float> @func_float16(<16 x float>, <16 x float>)
8 ; WIN64: addps {{.*}}, {{%xmm[0-3]}}
9 ; WIN64: addps {{.*}}, {{%xmm[0-3]}}
10 ; WIN64: addps {{.*}}, {{%xmm[0-3]}}
11 ; WIN64: addps {{.*}}, {{%xmm[0-3]}}
12 ; WIN64: leaq {{.*}}(%rsp), %rcx
18 ; WIN32: addps {{.*}}, {{%xmm[0-3]}}
19 ; WIN32: addps {{.*}}, {{%xmm[0-3]}}
20 ; WIN32: addps {{.*}}, {{%xmm[0-3]}}
21 ; WIN32: addps {{.*}}, {{%xmm[0-3]}}
25 ; NOT_WIN: testf16_inp
26 ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
27 ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
28 ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
29 ; NOT_WIN: addps {{.*}}, {{%xmm[0-3]}}
30 ; NOT_WIN: movq %rsp, %rdi
34 ;test calling conventions - input parameters
35 define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
36 %y = alloca <16 x float>, align 16
37 %x = fadd <16 x float> %a, %b
38 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
39 %2 = load <16 x float>, <16 x float>* %y, align 16
40 %3 = fadd <16 x float> %2, %1
44 ;test calling conventions - preserved registers
46 ; preserved xmm6-xmm15
49 ; WIN64: addps {{%xmm[6-9]}}, {{.*}}
50 ; WIN64: addps {{%xmm[6-9]}}, {{.*}}
53 ; preserved xmm8-xmm15
54 ; NOT_WIN: testf16_regs
56 ; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
57 ; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
58 ; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
59 ; NOT_WIN: addps {{%xmm([8-9]|1[0-1])}}, {{.*}}
62 define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
63 %y = alloca <16 x float>, align 16
64 %x = fadd <16 x float> %a, %b
65 %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
66 %2 = load <16 x float>, <16 x float>* %y, align 16
67 %3 = fadd <16 x float> %1, %b
68 %4 = fadd <16 x float> %2, %3
72 ; test calling conventions - prolog and epilog
73 ; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
74 ; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
75 ; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
76 ; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
77 ; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
78 ; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
79 ; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
80 ; NOT_WIN: movaps {{%xmm([8-9]|1[0-5])}}, {{.*(%rsp).*}} ## 16-byte Spill
82 ; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
83 ; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
84 ; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
85 ; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
86 ; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
87 ; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
88 ; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
89 ; NOT_WIN: movaps {{.*(%rsp).*}}, {{%xmm([8-9]|1[0-5])}} ## 16-byte Reload
90 define intel_ocl_bicc <16 x float> @test_prolog_epilog(<16 x float> %a, <16 x float> %b) nounwind {
91 %c = call <16 x float> @func_float16(<16 x float> %a, <16 x float> %b)