clarify the purpose of this project
[nyanglibc.git] / mathvec / svml_d_sincos8_core.shared.s
blobc7bfe791049116dd3de5b7cea9713c82ed778d59
1 .macro WRAPPER_IMPL_SSE2 callee
2 subq $40, %rsp
3 movaps %xmm0, (%rsp)
4 call \ callee@PLT
5 movsd %xmm0, 16(%rsp)
6 movsd 8(%rsp), %xmm0
7 call \ callee@PLT
8 movsd 16(%rsp), %xmm1
9 movsd %xmm0, 24(%rsp)
10 unpcklpd %xmm0, %xmm1
11 movaps %xmm1, %xmm0
12 addq $40, %rsp
13 ret
14 .endm
15 .macro WRAPPER_IMPL_SSE2_ff callee
16 subq $56, %rsp
17 movaps %xmm0, (%rsp)
18 movaps %xmm1, 16(%rsp)
19 call \ callee@PLT
20 movsd %xmm0, 32(%rsp)
21 movsd 8(%rsp), %xmm0
22 movsd 24(%rsp), %xmm1
23 call \ callee@PLT
24 movsd 32(%rsp), %xmm1
25 movsd %xmm0, 40(%rsp)
26 unpcklpd %xmm0, %xmm1
27 movaps %xmm1, %xmm0
28 addq $56, %rsp
29 ret
30 .endm
31 .macro WRAPPER_IMPL_SSE2_fFF callee
32 pushq %rbp
33 pushq %rbx
34 movq %rdi, %rbp
35 movq %rsi, %rbx
36 subq $40, %rsp
37 leaq 16(%rsp), %rsi
38 leaq 24(%rsp), %rdi
39 movaps %xmm0, (%rsp)
40 call \ callee@PLT
41 leaq 16(%rsp), %rsi
42 leaq 24(%rsp), %rdi
43 movsd 24(%rsp), %xmm0
44 movapd (%rsp), %xmm1
45 movsd %xmm0, 0(%rbp)
46 unpckhpd %xmm1, %xmm1
47 movsd 16(%rsp), %xmm0
48 movsd %xmm0, (%rbx)
49 movapd %xmm1, %xmm0
50 call \ callee@PLT
51 movsd 24(%rsp), %xmm0
52 movsd %xmm0, 8(%rbp)
53 movsd 16(%rsp), %xmm0
54 movsd %xmm0, 8(%rbx)
55 addq $40, %rsp
56 popq %rbx
57 popq %rbp
58 ret
59 .endm
60 .macro WRAPPER_IMPL_AVX callee
61 pushq %rbp
62 movq %rsp, %rbp
63 andq $-32, %rsp
64 subq $32, %rsp
65 vextractf128 $1, %ymm0, (%rsp)
66 vzeroupper
67 call __GI_\callee
68 vmovapd %xmm0, 16(%rsp)
69 vmovaps (%rsp), %xmm0
70 call __GI_\callee
71 vmovapd %xmm0, %xmm1
72 vmovapd 16(%rsp), %xmm0
73 vinsertf128 $1, %xmm1, %ymm0, %ymm0
74 movq %rbp, %rsp
75 popq %rbp
76 ret
77 .endm
78 .macro WRAPPER_IMPL_AVX_ff callee
79 pushq %rbp
80 movq %rsp, %rbp
81 andq $-32, %rsp
82 subq $64, %rsp
83 vextractf128 $1, %ymm0, 16(%rsp)
84 vextractf128 $1, %ymm1, (%rsp)
85 vzeroupper
86 call __GI_\callee
87 vmovaps %xmm0, 32(%rsp)
88 vmovaps 16(%rsp), %xmm0
89 vmovaps (%rsp), %xmm1
90 call __GI_\callee
91 vmovaps %xmm0, %xmm1
92 vmovaps 32(%rsp), %xmm0
93 vinsertf128 $1, %xmm1, %ymm0, %ymm0
94 movq %rbp, %rsp
95 popq %rbp
96 ret
97 .endm
98 .macro WRAPPER_IMPL_AVX_fFF callee
99 pushq %rbp
100 movq %rsp, %rbp
101 andq $-32, %rsp
102 pushq %r13
103 pushq %r14
104 subq $48, %rsp
105 movq %rsi, %r14
106 movq %rdi, %r13
107 vextractf128 $1, %ymm0, 32(%rsp)
108 vzeroupper
109 call __GI_\callee
110 vmovaps 32(%rsp), %xmm0
111 lea (%rsp), %rdi
112 lea 16(%rsp), %rsi
113 call __GI_\callee
114 vmovapd (%rsp), %xmm0
115 vmovapd 16(%rsp), %xmm1
116 vmovapd %xmm0, 16(%r13)
117 vmovapd %xmm1, 16(%r14)
118 addq $48, %rsp
119 popq %r14
120 popq %r13
121 movq %rbp, %rsp
122 popq %rbp
124 .endm
125 .macro WRAPPER_IMPL_AVX512 callee
126 pushq %rbp
127 movq %rsp, %rbp
128 andq $-64, %rsp
129 subq $128, %rsp
130 vmovups %zmm0, (%rsp)
131 vmovupd (%rsp), %ymm0
132 call __GI_\callee
133 vmovupd %ymm0, 64(%rsp)
134 vmovupd 32(%rsp), %ymm0
135 call __GI_\callee
136 vmovupd %ymm0, 96(%rsp)
137 vmovups 64(%rsp), %zmm0
138 movq %rbp, %rsp
139 popq %rbp
141 .endm
142 .macro WRAPPER_IMPL_AVX512_ff callee
143 pushq %rbp
144 movq %rsp, %rbp
145 andq $-64, %rsp
146 subq $192, %rsp
147 vmovups %zmm0, (%rsp)
148 vmovups %zmm1, 64(%rsp)
149 vmovupd (%rsp), %ymm0
150 vmovupd 64(%rsp), %ymm1
151 call __GI_\callee
152 vmovupd %ymm0, 128(%rsp)
153 vmovupd 32(%rsp), %ymm0
154 vmovupd 96(%rsp), %ymm1
155 call __GI_\callee
156 vmovupd %ymm0, 160(%rsp)
157 vmovups 128(%rsp), %zmm0
158 movq %rbp, %rsp
159 popq %rbp
161 .endm
162 .macro WRAPPER_IMPL_AVX512_fFF callee
163 pushq %rbp
164 movq %rsp, %rbp
165 andq $-64, %rsp
166 pushq %r12
167 pushq %r13
168 subq $176, %rsp
169 movq %rsi, %r13
170 vmovups %zmm0, (%rsp)
171 movq %rdi, %r12
172 vmovupd (%rsp), %ymm0
173 call __GI_\callee
174 vmovupd 32(%rsp), %ymm0
175 lea 64(%rsp), %rdi
176 lea 96(%rsp), %rsi
177 call __GI_\callee
178 vmovupd 64(%rsp), %ymm0
179 vmovupd 96(%rsp), %ymm1
180 vmovupd %ymm0, 32(%r12)
181 vmovupd %ymm1, 32(%r13)
182 vzeroupper
183 addq $176, %rsp
184 popq %r13
185 popq %r12
186 movq %rbp, %rsp
187 popq %rbp
189 .endm
190 .text
191 .globl _ZGVeN8vl8l8_sincos
192 .type _ZGVeN8vl8l8_sincos,@function
193 .align 1<<4
194 _ZGVeN8vl8l8_sincos:
197 WRAPPER_IMPL_AVX512_fFF _ZGVdN4vl8l8_sincos
198 .size _ZGVeN8vl8l8_sincos,.-_ZGVeN8vl8l8_sincos
200 .macro WRAPPER_IMPL_AVX512_fFF_vvv callee
201 pushq %rbp
202 movq %rsp, %rbp
203 andq $-64, %rsp
204 subq $320, %rsp
205 vmovups %zmm0, 256(%rsp)
206 lea (%rsp), %rdi
207 vmovups %zmm1, 128(%rdi)
208 vmovups %zmm2, 192(%rdi)
209 lea 64(%rsp), %rsi
210 call __GI_\callee
211 vmovdqu 288(%rsp), %ymm0
212 lea 32(%rsp), %rdi
213 lea 96(%rsp), %rsi
214 call __GI_\callee
215 movq 128(%rsp), %rdx
216 movq 192(%rsp), %rsi
217 movq 136(%rsp), %r8
218 movq 200(%rsp), %r10
219 movq (%rsp), %rax
220 movq 64(%rsp), %rcx
221 movq 8(%rsp), %rdi
222 movq 72(%rsp), %r9
223 movq %rax, (%rdx)
224 movq %rcx, (%rsi)
225 movq 144(%rsp), %rax
226 movq 208(%rsp), %rcx
227 movq %rdi, (%r8)
228 movq %r9, (%r10)
229 movq 152(%rsp), %rdi
230 movq 216(%rsp), %r9
231 movq 16(%rsp), %r11
232 movq 80(%rsp), %rdx
233 movq 24(%rsp), %rsi
234 movq 88(%rsp), %r8
235 movq %r11, (%rax)
236 movq %rdx, (%rcx)
237 movq 160(%rsp), %r11
238 movq 224(%rsp), %rdx
239 movq %rsi, (%rdi)
240 movq %r8, (%r9)
241 movq 168(%rsp), %rsi
242 movq 232(%rsp), %r8
243 movq 32(%rsp), %r10
244 movq 96(%rsp), %rax
245 movq 40(%rsp), %rcx
246 movq 104(%rsp), %rdi
247 movq %r10, (%r11)
248 movq %rax, (%rdx)
249 movq 176(%rsp), %r10
250 movq 240(%rsp), %rax
251 movq %rcx, (%rsi)
252 movq %rdi, (%r8)
253 movq 184(%rsp), %rcx
254 movq 248(%rsp), %rdi
255 movq 48(%rsp), %r9
256 movq 112(%rsp), %r11
257 movq 56(%rsp), %rdx
258 movq 120(%rsp), %rsi
259 movq %r9, (%r10)
260 movq %r11, (%rax)
261 movq %rdx, (%rcx)
262 movq %rsi, (%rdi)
263 movq %rbp, %rsp
264 popq %rbp
266 .endm
267 .globl _ZGVeN8vvv_sincos
268 .type _ZGVeN8vvv_sincos,@function
269 .align 1<<4
270 _ZGVeN8vvv_sincos:
273 WRAPPER_IMPL_AVX512_fFF_vvv _ZGVdN4vl8l8_sincos
274 .size _ZGVeN8vvv_sincos,.-_ZGVeN8vvv_sincos