clarify the purpose of this project
[nyanglibc.git] / mathvec / svml_d_cos4_core_avx.shared.s
blobe184b7f66b4712c22a7d95c91cab256cf5ba882b
1 .macro WRAPPER_IMPL_SSE2 callee
2 subq $40, %rsp
3 movaps %xmm0, (%rsp)
4 call \ callee@PLT
5 movsd %xmm0, 16(%rsp)
6 movsd 8(%rsp), %xmm0
7 call \ callee@PLT
8 movsd 16(%rsp), %xmm1
9 movsd %xmm0, 24(%rsp)
10 unpcklpd %xmm0, %xmm1
11 movaps %xmm1, %xmm0
12 addq $40, %rsp
13 ret
14 .endm
15 .macro WRAPPER_IMPL_SSE2_ff callee
16 subq $56, %rsp
17 movaps %xmm0, (%rsp)
18 movaps %xmm1, 16(%rsp)
19 call \ callee@PLT
20 movsd %xmm0, 32(%rsp)
21 movsd 8(%rsp), %xmm0
22 movsd 24(%rsp), %xmm1
23 call \ callee@PLT
24 movsd 32(%rsp), %xmm1
25 movsd %xmm0, 40(%rsp)
26 unpcklpd %xmm0, %xmm1
27 movaps %xmm1, %xmm0
28 addq $56, %rsp
29 ret
30 .endm
31 .macro WRAPPER_IMPL_SSE2_fFF callee
32 pushq %rbp
33 pushq %rbx
34 movq %rdi, %rbp
35 movq %rsi, %rbx
36 subq $40, %rsp
37 leaq 16(%rsp), %rsi
38 leaq 24(%rsp), %rdi
39 movaps %xmm0, (%rsp)
40 call \ callee@PLT
41 leaq 16(%rsp), %rsi
42 leaq 24(%rsp), %rdi
43 movsd 24(%rsp), %xmm0
44 movapd (%rsp), %xmm1
45 movsd %xmm0, 0(%rbp)
46 unpckhpd %xmm1, %xmm1
47 movsd 16(%rsp), %xmm0
48 movsd %xmm0, (%rbx)
49 movapd %xmm1, %xmm0
50 call \ callee@PLT
51 movsd 24(%rsp), %xmm0
52 movsd %xmm0, 8(%rbp)
53 movsd 16(%rsp), %xmm0
54 movsd %xmm0, 8(%rbx)
55 addq $40, %rsp
56 popq %rbx
57 popq %rbp
58 ret
59 .endm
60 .macro WRAPPER_IMPL_AVX callee
61 pushq %rbp
62 movq %rsp, %rbp
63 andq $-32, %rsp
64 subq $32, %rsp
65 vextractf128 $1, %ymm0, (%rsp)
66 vzeroupper
67 call __GI_\callee
68 vmovapd %xmm0, 16(%rsp)
69 vmovaps (%rsp), %xmm0
70 call __GI_\callee
71 vmovapd %xmm0, %xmm1
72 vmovapd 16(%rsp), %xmm0
73 vinsertf128 $1, %xmm1, %ymm0, %ymm0
74 movq %rbp, %rsp
75 popq %rbp
76 ret
77 .endm
78 .macro WRAPPER_IMPL_AVX_ff callee
79 pushq %rbp
80 movq %rsp, %rbp
81 andq $-32, %rsp
82 subq $64, %rsp
83 vextractf128 $1, %ymm0, 16(%rsp)
84 vextractf128 $1, %ymm1, (%rsp)
85 vzeroupper
86 call __GI_\callee
87 vmovaps %xmm0, 32(%rsp)
88 vmovaps 16(%rsp), %xmm0
89 vmovaps (%rsp), %xmm1
90 call __GI_\callee
91 vmovaps %xmm0, %xmm1
92 vmovaps 32(%rsp), %xmm0
93 vinsertf128 $1, %xmm1, %ymm0, %ymm0
94 movq %rbp, %rsp
95 popq %rbp
96 ret
97 .endm
98 .macro WRAPPER_IMPL_AVX_fFF callee
99 pushq %rbp
100 movq %rsp, %rbp
101 andq $-32, %rsp
102 pushq %r13
103 pushq %r14
104 subq $48, %rsp
105 movq %rsi, %r14
106 movq %rdi, %r13
107 vextractf128 $1, %ymm0, 32(%rsp)
108 vzeroupper
109 call __GI_\callee
110 vmovaps 32(%rsp), %xmm0
111 lea (%rsp), %rdi
112 lea 16(%rsp), %rsi
113 call __GI_\callee
114 vmovapd (%rsp), %xmm0
115 vmovapd 16(%rsp), %xmm1
116 vmovapd %xmm0, 16(%r13)
117 vmovapd %xmm1, 16(%r14)
118 addq $48, %rsp
119 popq %r14
120 popq %r13
121 movq %rbp, %rsp
122 popq %rbp
124 .endm
125 .macro WRAPPER_IMPL_AVX512 callee
126 pushq %rbp
127 movq %rsp, %rbp
128 andq $-64, %rsp
129 subq $128, %rsp
130 vmovups %zmm0, (%rsp)
131 vmovupd (%rsp), %ymm0
132 call __GI_\callee
133 vmovupd %ymm0, 64(%rsp)
134 vmovupd 32(%rsp), %ymm0
135 call __GI_\callee
136 vmovupd %ymm0, 96(%rsp)
137 vmovups 64(%rsp), %zmm0
138 movq %rbp, %rsp
139 popq %rbp
141 .endm
142 .macro WRAPPER_IMPL_AVX512_ff callee
143 pushq %rbp
144 movq %rsp, %rbp
145 andq $-64, %rsp
146 subq $192, %rsp
147 vmovups %zmm0, (%rsp)
148 vmovups %zmm1, 64(%rsp)
149 vmovupd (%rsp), %ymm0
150 vmovupd 64(%rsp), %ymm1
151 call __GI_\callee
152 vmovupd %ymm0, 128(%rsp)
153 vmovupd 32(%rsp), %ymm0
154 vmovupd 96(%rsp), %ymm1
155 call __GI_\callee
156 vmovupd %ymm0, 160(%rsp)
157 vmovups 128(%rsp), %zmm0
158 movq %rbp, %rsp
159 popq %rbp
161 .endm
162 .macro WRAPPER_IMPL_AVX512_fFF callee
163 pushq %rbp
164 movq %rsp, %rbp
165 andq $-64, %rsp
166 pushq %r12
167 pushq %r13
168 subq $176, %rsp
169 movq %rsi, %r13
170 vmovups %zmm0, (%rsp)
171 movq %rdi, %r12
172 vmovupd (%rsp), %ymm0
173 call __GI_\callee
174 vmovupd 32(%rsp), %ymm0
175 lea 64(%rsp), %rdi
176 lea 96(%rsp), %rsi
177 call __GI_\callee
178 vmovupd 64(%rsp), %ymm0
179 vmovupd 96(%rsp), %ymm1
180 vmovupd %ymm0, 32(%r12)
181 vmovupd %ymm1, 32(%r13)
182 vzeroupper
183 addq $176, %rsp
184 popq %r13
185 popq %r12
186 movq %rbp, %rsp
187 popq %rbp
189 .endm
190 .text
191 .globl _ZGVcN4v_cos
192 .type _ZGVcN4v_cos,@function
193 .align 1<<4
194 _ZGVcN4v_cos:
197 WRAPPER_IMPL_AVX _ZGVbN2v_cos
198 .size _ZGVcN4v_cos,.-_ZGVcN4v_cos