clarify the purpose of this project
[nyanglibc.git] / elf / dl-trampoline.s
blobada4e6d629f9cb4ad9305fa57d679f0e3c4a8473
1 .text
2 .globl _dl_runtime_profile_avx512
3 .hidden _dl_runtime_profile_avx512
4 .type _dl_runtime_profile_avx512, @function
5 .align 16
6 _dl_runtime_profile_avx512:
8 sub $32, %rsp # Allocate the local storage.
9 movq %rbx, (%rsp)
10 movq %rax, 8(%rsp)
11 mov %rsp, %rbx
12 and $-64, %rsp
13 sub $(768 + 16*8), %rsp
14 movq %rsp, 24(%rbx)
15 movq %rdx, 0(%rsp)
16 movq %r8, 8(%rsp)
17 movq %r9, 16(%rsp)
18 movq %rcx, 24(%rsp)
19 movq %rsi, 32(%rsp)
20 movq %rdi, 40(%rsp)
21 movq %rbp, 48(%rsp)
22 lea 48(%rbx), %rax
23 movq %rax, 56(%rsp)
24 movaps %xmm0, (64)(%rsp)
25 movaps %xmm1, (64 + 16)(%rsp)
26 movaps %xmm2, (64 + 16*2)(%rsp)
27 movaps %xmm3, (64 + 16*3)(%rsp)
28 movaps %xmm4, (64 + 16*4)(%rsp)
29 movaps %xmm5, (64 + 16*5)(%rsp)
30 movaps %xmm6, (64 + 16*6)(%rsp)
31 movaps %xmm7, (64 + 16*7)(%rsp)
32 bndmov %bnd0, (704)(%rsp) # Preserve bound
33 bndmov %bnd1, (704 + 16)(%rsp) # registers. Nops if
34 bndmov %bnd2, (704 + 16*2)(%rsp) # MPX not available
35 bndmov %bnd3, (704 + 16*3)(%rsp) # or disabled.
36 vmovdqa64 %zmm0, (192)(%rsp)
37 vmovdqa64 %zmm1, (192 + 64)(%rsp)
38 vmovdqa64 %zmm2, (192 + 64*2)(%rsp)
39 vmovdqa64 %zmm3, (192 + 64*3)(%rsp)
40 vmovdqa64 %zmm4, (192 + 64*4)(%rsp)
41 vmovdqa64 %zmm5, (192 + 64*5)(%rsp)
42 vmovdqa64 %zmm6, (192 + 64*6)(%rsp)
43 vmovdqa64 %zmm7, (192 + 64*7)(%rsp)
44 vmovdqa %xmm0, (768)(%rsp)
45 vmovdqa %xmm1, (768 + 16)(%rsp)
46 vmovdqa %xmm2, (768 + 16*2)(%rsp)
47 vmovdqa %xmm3, (768 + 16*3)(%rsp)
48 vmovdqa %xmm4, (768 + 16*4)(%rsp)
49 vmovdqa %xmm5, (768 + 16*5)(%rsp)
50 vmovdqa %xmm6, (768 + 16*6)(%rsp)
51 vmovdqa %xmm7, (768 + 16*7)(%rsp)
52 mov %rsp, %rcx # La_x86_64_regs pointer to %rcx.
53 mov 48(%rbx), %rdx # Load return address if needed.
54 mov 40(%rbx), %rsi # Copy args pushed by PLT in register.
55 mov 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
56 lea 16(%rbx), %r8 # Address of framesize
57 call _dl_profile_fixup # Call resolver.
58 mov %rax, %r11 # Save return value.
59 movq 8(%rbx), %rax # Get back register content.
60 movq 0(%rsp), %rdx
61 movq 8(%rsp), %r8
62 movq 16(%rsp), %r9
63 movaps (64)(%rsp), %xmm0
64 movaps (64 + 16)(%rsp), %xmm1
65 movaps (64 + 16*2)(%rsp), %xmm2
66 movaps (64 + 16*3)(%rsp), %xmm3
67 movaps (64 + 16*4)(%rsp), %xmm4
68 movaps (64 + 16*5)(%rsp), %xmm5
69 movaps (64 + 16*6)(%rsp), %xmm6
70 movaps (64 + 16*7)(%rsp), %xmm7
71 vpcmpeqq (768)(%rsp), %xmm0, %xmm8
72 vpmovmskb %xmm8, %esi
73 cmpl $0xffff, %esi
74 je 2f
75 vmovdqa %xmm0, (192)(%rsp)
76 jmp 1f
77 2: vmovdqa64 (192)(%rsp), %zmm0
78 vmovdqa %xmm0, (64)(%rsp)
79 1: vpcmpeqq (768 + 16)(%rsp), %xmm1, %xmm8
80 vpmovmskb %xmm8, %esi
81 cmpl $0xffff, %esi
82 je 2f
83 vmovdqa %xmm1, (192 + 64)(%rsp)
84 jmp 1f
85 2: vmovdqa64 (192 + 64)(%rsp), %zmm1
86 vmovdqa %xmm1, (64 + 16)(%rsp)
87 1: vpcmpeqq (768 + 16*2)(%rsp), %xmm2, %xmm8
88 vpmovmskb %xmm8, %esi
89 cmpl $0xffff, %esi
90 je 2f
91 vmovdqa %xmm2, (192 + 64*2)(%rsp)
92 jmp 1f
93 2: vmovdqa64 (192 + 64*2)(%rsp), %zmm2
94 vmovdqa %xmm2, (64 + 16*2)(%rsp)
95 1: vpcmpeqq (768 + 16*3)(%rsp), %xmm3, %xmm8
96 vpmovmskb %xmm8, %esi
97 cmpl $0xffff, %esi
98 je 2f
99 vmovdqa %xmm3, (192 + 64*3)(%rsp)
100 jmp 1f
101 2: vmovdqa64 (192 + 64*3)(%rsp), %zmm3
102 vmovdqa %xmm3, (64 + 16*3)(%rsp)
103 1: vpcmpeqq (768 + 16*4)(%rsp), %xmm4, %xmm8
104 vpmovmskb %xmm8, %esi
105 cmpl $0xffff, %esi
106 je 2f
107 vmovdqa %xmm4, (192 + 64*4)(%rsp)
108 jmp 1f
109 2: vmovdqa64 (192 + 64*4)(%rsp), %zmm4
110 vmovdqa %xmm4, (64 + 16*4)(%rsp)
111 1: vpcmpeqq (768 + 16*5)(%rsp), %xmm5, %xmm8
112 vpmovmskb %xmm8, %esi
113 cmpl $0xffff, %esi
114 je 2f
115 vmovdqa %xmm5, (192 + 64*5)(%rsp)
116 jmp 1f
117 2: vmovdqa64 (192 + 64*5)(%rsp), %zmm5
118 vmovdqa %xmm5, (64 + 16*5)(%rsp)
119 1: vpcmpeqq (768 + 16*6)(%rsp), %xmm6, %xmm8
120 vpmovmskb %xmm8, %esi
121 cmpl $0xffff, %esi
122 je 2f
123 vmovdqa %xmm6, (192 + 64*6)(%rsp)
124 jmp 1f
125 2: vmovdqa64 (192 + 64*6)(%rsp), %zmm6
126 vmovdqa %xmm6, (64 + 16*6)(%rsp)
127 1: vpcmpeqq (768 + 16*7)(%rsp), %xmm7, %xmm8
128 vpmovmskb %xmm8, %esi
129 cmpl $0xffff, %esi
130 je 2f
131 vmovdqa %xmm7, (192 + 64*7)(%rsp)
132 jmp 1f
133 2: vmovdqa64 (192 + 64*7)(%rsp), %zmm7
134 vmovdqa %xmm7, (64 + 16*7)(%rsp)
136 bndmov (704)(%rsp), %bnd0 # Restore bound
137 bndmov (704 + 16)(%rsp), %bnd1 # registers.
138 bndmov (704 + 16*2)(%rsp), %bnd2
139 bndmov (704 + 16*3)(%rsp), %bnd3
140 mov 16(%rbx), %r10 # Anything in framesize?
141 test %r10, %r10
143 jns 3f
144 movq 24(%rsp), %rcx
145 movq 32(%rsp), %rsi
146 movq 40(%rsp), %rdi
147 mov %rbx, %rsp
148 movq (%rsp), %rbx
149 add $48, %rsp # Adjust the stack to the return value
150 # (eats the reloc index and link_map)
152 jmp *%r11 # Jump to function address.
154 lea 56(%rbx), %rsi # stack
155 add $8, %r10
156 and $-16, %r10
157 mov %r10, %rcx
158 sub %r10, %rsp
159 mov %rsp, %rdi
160 shr $3, %rcx
162 movsq
163 movq 24(%rdi), %rcx # Get back register content.
164 movq 32(%rdi), %rsi
165 movq 40(%rdi), %rdi
167 call *%r11
168 mov 24(%rbx), %rsp # Drop the copied stack content
169 sub $(240 + 16*2), %rsp
170 mov %rsp, %rcx # La_x86_64_retval argument to %rcx.
171 movq %rax, 0(%rcx)
172 movq %rdx, 8(%rcx)
173 movaps %xmm0, 16(%rcx)
174 movaps %xmm1, 32(%rcx)
175 vmovdqa64 %zmm0, 80(%rcx)
176 vmovdqa64 %zmm1, 144(%rcx)
177 vmovdqa %xmm0, (240)(%rcx)
178 vmovdqa %xmm1, (240 + 16)(%rcx)
179 bndmov %bnd0, 208(%rcx) # Preserve returned bounds.
180 bndmov %bnd1, 224(%rcx)
181 fstpt 48(%rcx)
182 fstpt 64(%rcx)
183 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
184 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
185 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
186 call _dl_call_pltexit
187 movq 0(%rsp), %rax
188 movq 8(%rsp), %rdx
189 movaps 16(%rsp), %xmm0
190 movaps 32(%rsp), %xmm1
191 vpcmpeqq (240)(%rsp), %xmm0, %xmm2
192 vpmovmskb %xmm2, %esi
193 cmpl $0xffff, %esi
194 jne 1f
195 vmovdqa64 80(%rsp), %zmm0
196 1: vpcmpeqq (240 + 16)(%rsp), %xmm1, %xmm2
197 vpmovmskb %xmm2, %esi
198 cmpl $0xffff, %esi
199 jne 1f
200 vmovdqa64 144(%rsp), %zmm1
202 bndmov 208(%rsp), %bnd0 # Restore bound registers.
203 bndmov 224(%rsp), %bnd1
204 fldt 64(%rsp)
205 fldt 48(%rsp)
206 mov %rbx, %rsp
207 movq (%rsp), %rbx
208 add $48, %rsp # Adjust the stack to the return value
209 # (eats the reloc index and link_map)
211 retq
212 .size _dl_runtime_profile_avx512, .-_dl_runtime_profile_avx512
213 .text
214 .globl _dl_runtime_profile_avx
215 .hidden _dl_runtime_profile_avx
216 .type _dl_runtime_profile_avx, @function
217 .align 16
218 _dl_runtime_profile_avx:
220 sub $32, %rsp # Allocate the local storage.
221 movq %rbx, (%rsp)
222 movq %rax, 8(%rsp)
223 mov %rsp, %rbx
224 and $-32, %rsp
225 sub $(768 + 16*8), %rsp
226 movq %rsp, 24(%rbx)
227 movq %rdx, 0(%rsp)
228 movq %r8, 8(%rsp)
229 movq %r9, 16(%rsp)
230 movq %rcx, 24(%rsp)
231 movq %rsi, 32(%rsp)
232 movq %rdi, 40(%rsp)
233 movq %rbp, 48(%rsp)
234 lea 48(%rbx), %rax
235 movq %rax, 56(%rsp)
236 movaps %xmm0, (64)(%rsp)
237 movaps %xmm1, (64 + 16)(%rsp)
238 movaps %xmm2, (64 + 16*2)(%rsp)
239 movaps %xmm3, (64 + 16*3)(%rsp)
240 movaps %xmm4, (64 + 16*4)(%rsp)
241 movaps %xmm5, (64 + 16*5)(%rsp)
242 movaps %xmm6, (64 + 16*6)(%rsp)
243 movaps %xmm7, (64 + 16*7)(%rsp)
244 bndmov %bnd0, (704)(%rsp) # Preserve bound
245 bndmov %bnd1, (704 + 16)(%rsp) # registers. Nops if
246 bndmov %bnd2, (704 + 16*2)(%rsp) # MPX not available
247 bndmov %bnd3, (704 + 16*3)(%rsp) # or disabled.
248 vmovdqa %ymm0, (192)(%rsp)
249 vmovdqa %ymm1, (192 + 64)(%rsp)
250 vmovdqa %ymm2, (192 + 64*2)(%rsp)
251 vmovdqa %ymm3, (192 + 64*3)(%rsp)
252 vmovdqa %ymm4, (192 + 64*4)(%rsp)
253 vmovdqa %ymm5, (192 + 64*5)(%rsp)
254 vmovdqa %ymm6, (192 + 64*6)(%rsp)
255 vmovdqa %ymm7, (192 + 64*7)(%rsp)
256 vmovdqa %xmm0, (768)(%rsp)
257 vmovdqa %xmm1, (768 + 16)(%rsp)
258 vmovdqa %xmm2, (768 + 16*2)(%rsp)
259 vmovdqa %xmm3, (768 + 16*3)(%rsp)
260 vmovdqa %xmm4, (768 + 16*4)(%rsp)
261 vmovdqa %xmm5, (768 + 16*5)(%rsp)
262 vmovdqa %xmm6, (768 + 16*6)(%rsp)
263 vmovdqa %xmm7, (768 + 16*7)(%rsp)
264 mov %rsp, %rcx # La_x86_64_regs pointer to %rcx.
265 mov 48(%rbx), %rdx # Load return address if needed.
266 mov 40(%rbx), %rsi # Copy args pushed by PLT in register.
267 mov 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
268 lea 16(%rbx), %r8 # Address of framesize
269 call _dl_profile_fixup # Call resolver.
270 mov %rax, %r11 # Save return value.
271 movq 8(%rbx), %rax # Get back register content.
272 movq 0(%rsp), %rdx
273 movq 8(%rsp), %r8
274 movq 16(%rsp), %r9
275 movaps (64)(%rsp), %xmm0
276 movaps (64 + 16)(%rsp), %xmm1
277 movaps (64 + 16*2)(%rsp), %xmm2
278 movaps (64 + 16*3)(%rsp), %xmm3
279 movaps (64 + 16*4)(%rsp), %xmm4
280 movaps (64 + 16*5)(%rsp), %xmm5
281 movaps (64 + 16*6)(%rsp), %xmm6
282 movaps (64 + 16*7)(%rsp), %xmm7
283 vpcmpeqq (768)(%rsp), %xmm0, %xmm8
284 vpmovmskb %xmm8, %esi
285 cmpl $0xffff, %esi
286 je 2f
287 vmovdqa %xmm0, (192)(%rsp)
288 jmp 1f
289 2: vmovdqa (192)(%rsp), %ymm0
290 vmovdqa %xmm0, (64)(%rsp)
291 1: vpcmpeqq (768 + 16)(%rsp), %xmm1, %xmm8
292 vpmovmskb %xmm8, %esi
293 cmpl $0xffff, %esi
294 je 2f
295 vmovdqa %xmm1, (192 + 64)(%rsp)
296 jmp 1f
297 2: vmovdqa (192 + 64)(%rsp), %ymm1
298 vmovdqa %xmm1, (64 + 16)(%rsp)
299 1: vpcmpeqq (768 + 16*2)(%rsp), %xmm2, %xmm8
300 vpmovmskb %xmm8, %esi
301 cmpl $0xffff, %esi
302 je 2f
303 vmovdqa %xmm2, (192 + 64*2)(%rsp)
304 jmp 1f
305 2: vmovdqa (192 + 64*2)(%rsp), %ymm2
306 vmovdqa %xmm2, (64 + 16*2)(%rsp)
307 1: vpcmpeqq (768 + 16*3)(%rsp), %xmm3, %xmm8
308 vpmovmskb %xmm8, %esi
309 cmpl $0xffff, %esi
310 je 2f
311 vmovdqa %xmm3, (192 + 64*3)(%rsp)
312 jmp 1f
313 2: vmovdqa (192 + 64*3)(%rsp), %ymm3
314 vmovdqa %xmm3, (64 + 16*3)(%rsp)
315 1: vpcmpeqq (768 + 16*4)(%rsp), %xmm4, %xmm8
316 vpmovmskb %xmm8, %esi
317 cmpl $0xffff, %esi
318 je 2f
319 vmovdqa %xmm4, (192 + 64*4)(%rsp)
320 jmp 1f
321 2: vmovdqa (192 + 64*4)(%rsp), %ymm4
322 vmovdqa %xmm4, (64 + 16*4)(%rsp)
323 1: vpcmpeqq (768 + 16*5)(%rsp), %xmm5, %xmm8
324 vpmovmskb %xmm8, %esi
325 cmpl $0xffff, %esi
326 je 2f
327 vmovdqa %xmm5, (192 + 64*5)(%rsp)
328 jmp 1f
329 2: vmovdqa (192 + 64*5)(%rsp), %ymm5
330 vmovdqa %xmm5, (64 + 16*5)(%rsp)
331 1: vpcmpeqq (768 + 16*6)(%rsp), %xmm6, %xmm8
332 vpmovmskb %xmm8, %esi
333 cmpl $0xffff, %esi
334 je 2f
335 vmovdqa %xmm6, (192 + 64*6)(%rsp)
336 jmp 1f
337 2: vmovdqa (192 + 64*6)(%rsp), %ymm6
338 vmovdqa %xmm6, (64 + 16*6)(%rsp)
339 1: vpcmpeqq (768 + 16*7)(%rsp), %xmm7, %xmm8
340 vpmovmskb %xmm8, %esi
341 cmpl $0xffff, %esi
342 je 2f
343 vmovdqa %xmm7, (192 + 64*7)(%rsp)
344 jmp 1f
345 2: vmovdqa (192 + 64*7)(%rsp), %ymm7
346 vmovdqa %xmm7, (64 + 16*7)(%rsp)
348 bndmov (704)(%rsp), %bnd0 # Restore bound
349 bndmov (704 + 16)(%rsp), %bnd1 # registers.
350 bndmov (704 + 16*2)(%rsp), %bnd2
351 bndmov (704 + 16*3)(%rsp), %bnd3
352 mov 16(%rbx), %r10 # Anything in framesize?
353 test %r10, %r10
355 jns 3f
356 movq 24(%rsp), %rcx
357 movq 32(%rsp), %rsi
358 movq 40(%rsp), %rdi
359 mov %rbx, %rsp
360 movq (%rsp), %rbx
361 add $48, %rsp # Adjust the stack to the return value
362 # (eats the reloc index and link_map)
364 jmp *%r11 # Jump to function address.
366 lea 56(%rbx), %rsi # stack
367 add $8, %r10
368 and $-16, %r10
369 mov %r10, %rcx
370 sub %r10, %rsp
371 mov %rsp, %rdi
372 shr $3, %rcx
374 movsq
375 movq 24(%rdi), %rcx # Get back register content.
376 movq 32(%rdi), %rsi
377 movq 40(%rdi), %rdi
379 call *%r11
380 mov 24(%rbx), %rsp # Drop the copied stack content
381 sub $(240 + 16*2), %rsp
382 mov %rsp, %rcx # La_x86_64_retval argument to %rcx.
383 movq %rax, 0(%rcx)
384 movq %rdx, 8(%rcx)
385 movaps %xmm0, 16(%rcx)
386 movaps %xmm1, 32(%rcx)
387 vmovdqa %ymm0, 80(%rcx)
388 vmovdqa %ymm1, 144(%rcx)
389 vmovdqa %xmm0, (240)(%rcx)
390 vmovdqa %xmm1, (240 + 16)(%rcx)
391 bndmov %bnd0, 208(%rcx) # Preserve returned bounds.
392 bndmov %bnd1, 224(%rcx)
393 fstpt 48(%rcx)
394 fstpt 64(%rcx)
395 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
396 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
397 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
398 call _dl_call_pltexit
399 movq 0(%rsp), %rax
400 movq 8(%rsp), %rdx
401 movaps 16(%rsp), %xmm0
402 movaps 32(%rsp), %xmm1
403 vpcmpeqq (240)(%rsp), %xmm0, %xmm2
404 vpmovmskb %xmm2, %esi
405 cmpl $0xffff, %esi
406 jne 1f
407 vmovdqa 80(%rsp), %ymm0
408 1: vpcmpeqq (240 + 16)(%rsp), %xmm1, %xmm2
409 vpmovmskb %xmm2, %esi
410 cmpl $0xffff, %esi
411 jne 1f
412 vmovdqa 144(%rsp), %ymm1
414 bndmov 208(%rsp), %bnd0 # Restore bound registers.
415 bndmov 224(%rsp), %bnd1
416 fldt 64(%rsp)
417 fldt 48(%rsp)
418 mov %rbx, %rsp
419 movq (%rsp), %rbx
420 add $48, %rsp # Adjust the stack to the return value
421 # (eats the reloc index and link_map)
423 retq
424 .size _dl_runtime_profile_avx, .-_dl_runtime_profile_avx
425 .text
426 .globl _dl_runtime_profile_sse
427 .hidden _dl_runtime_profile_sse
428 .type _dl_runtime_profile_sse, @function
429 .align 16
430 _dl_runtime_profile_sse:
432 sub $32, %rsp # Allocate the local storage.
433 movq %rbx, (%rsp)
434 movq %rax, 8(%rsp)
435 mov %rsp, %rbx
436 and $-16, %rsp
437 sub $(768 + 16*8), %rsp
438 movq %rsp, 24(%rbx)
439 movq %rdx, 0(%rsp)
440 movq %r8, 8(%rsp)
441 movq %r9, 16(%rsp)
442 movq %rcx, 24(%rsp)
443 movq %rsi, 32(%rsp)
444 movq %rdi, 40(%rsp)
445 movq %rbp, 48(%rsp)
446 lea 48(%rbx), %rax
447 movq %rax, 56(%rsp)
448 movaps %xmm0, (64)(%rsp)
449 movaps %xmm1, (64 + 16)(%rsp)
450 movaps %xmm2, (64 + 16*2)(%rsp)
451 movaps %xmm3, (64 + 16*3)(%rsp)
452 movaps %xmm4, (64 + 16*4)(%rsp)
453 movaps %xmm5, (64 + 16*5)(%rsp)
454 movaps %xmm6, (64 + 16*6)(%rsp)
455 movaps %xmm7, (64 + 16*7)(%rsp)
456 bndmov %bnd0, (704)(%rsp) # Preserve bound
457 bndmov %bnd1, (704 + 16)(%rsp) # registers. Nops if
458 bndmov %bnd2, (704 + 16*2)(%rsp) # MPX not available
459 bndmov %bnd3, (704 + 16*3)(%rsp) # or disabled.
460 mov %rsp, %rcx # La_x86_64_regs pointer to %rcx.
461 mov 48(%rbx), %rdx # Load return address if needed.
462 mov 40(%rbx), %rsi # Copy args pushed by PLT in register.
463 mov 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
464 lea 16(%rbx), %r8 # Address of framesize
465 call _dl_profile_fixup # Call resolver.
466 mov %rax, %r11 # Save return value.
467 movq 8(%rbx), %rax # Get back register content.
468 movq 0(%rsp), %rdx
469 movq 8(%rsp), %r8
470 movq 16(%rsp), %r9
471 movaps (64)(%rsp), %xmm0
472 movaps (64 + 16)(%rsp), %xmm1
473 movaps (64 + 16*2)(%rsp), %xmm2
474 movaps (64 + 16*3)(%rsp), %xmm3
475 movaps (64 + 16*4)(%rsp), %xmm4
476 movaps (64 + 16*5)(%rsp), %xmm5
477 movaps (64 + 16*6)(%rsp), %xmm6
478 movaps (64 + 16*7)(%rsp), %xmm7
479 bndmov (704)(%rsp), %bnd0 # Restore bound
480 bndmov (704 + 16)(%rsp), %bnd1 # registers.
481 bndmov (704 + 16*2)(%rsp), %bnd2
482 bndmov (704 + 16*3)(%rsp), %bnd3
483 mov 16(%rbx), %r10 # Anything in framesize?
484 test %r10, %r10
486 jns 3f
487 movq 24(%rsp), %rcx
488 movq 32(%rsp), %rsi
489 movq 40(%rsp), %rdi
490 mov %rbx, %rsp
491 movq (%rsp), %rbx
492 add $48, %rsp # Adjust the stack to the return value
493 # (eats the reloc index and link_map)
495 jmp *%r11 # Jump to function address.
497 lea 56(%rbx), %rsi # stack
498 add $8, %r10
499 and $-16, %r10
500 mov %r10, %rcx
501 sub %r10, %rsp
502 mov %rsp, %rdi
503 shr $3, %rcx
505 movsq
506 movq 24(%rdi), %rcx # Get back register content.
507 movq 32(%rdi), %rsi
508 movq 40(%rdi), %rdi
510 call *%r11
511 mov 24(%rbx), %rsp # Drop the copied stack content
512 sub $240, %rsp # sizeof(La_x86_64_retval)
513 mov %rsp, %rcx # La_x86_64_retval argument to %rcx.
514 movq %rax, 0(%rcx)
515 movq %rdx, 8(%rcx)
516 movaps %xmm0, 16(%rcx)
517 movaps %xmm1, 32(%rcx)
518 bndmov %bnd0, 208(%rcx) # Preserve returned bounds.
519 bndmov %bnd1, 224(%rcx)
520 fstpt 48(%rcx)
521 fstpt 64(%rcx)
522 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx.
523 movq 40(%rbx), %rsi # Copy args pushed by PLT in register.
524 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index
525 call _dl_call_pltexit
526 movq 0(%rsp), %rax
527 movq 8(%rsp), %rdx
528 movaps 16(%rsp), %xmm0
529 movaps 32(%rsp), %xmm1
530 bndmov 208(%rsp), %bnd0 # Restore bound registers.
531 bndmov 224(%rsp), %bnd1
532 fldt 64(%rsp)
533 fldt 48(%rsp)
534 mov %rbx, %rsp
535 movq (%rsp), %rbx
536 add $48, %rsp # Adjust the stack to the return value
537 # (eats the reloc index and link_map)
539 retq
540 .size _dl_runtime_profile_sse, .-_dl_runtime_profile_sse
541 .text
542 .globl _dl_runtime_resolve_fxsave
543 .hidden _dl_runtime_resolve_fxsave
544 .type _dl_runtime_resolve_fxsave, @function
545 .align 16
546 _dl_runtime_resolve_fxsave:
548 pushq %rbx # push subtracts stack by 8.
549 mov %rsp, %rbx
550 and $-16, %rsp
551 sub $(512 + (8 * 7 + 8)), %rsp
552 # Preserve registers otherwise clobbered.
553 movq %rax, 0(%rsp)
554 movq %rcx, (0 + 8)(%rsp)
555 movq %rdx, ((0 + 8) + 8)(%rsp)
556 movq %rsi, (((0 + 8) + 8) + 8)(%rsp)
557 movq %rdi, ((((0 + 8) + 8) + 8) + 8)(%rsp)
558 movq %r8, (((((0 + 8) + 8) + 8) + 8) + 8)(%rsp)
559 movq %r9, ((((((0 + 8) + 8) + 8) + 8) + 8) + 8)(%rsp)
560 fxsave (8 * 7 + 8)(%rsp)
561 # Copy args pushed by PLT in register.
562 # %rdi: link_map, %rsi: reloc_index
563 mov (8 + 8)(%rbx), %rsi
564 mov 8(%rbx), %rdi
565 call _dl_fixup # Call resolver.
566 mov %rax, %r11 # Save return value
567 # Get register content back.
568 fxrstor (8 * 7 + 8)(%rsp)
569 movq ((((((0 + 8) + 8) + 8) + 8) + 8) + 8)(%rsp), %r9
570 movq (((((0 + 8) + 8) + 8) + 8) + 8)(%rsp), %r8
571 movq ((((0 + 8) + 8) + 8) + 8)(%rsp), %rdi
572 movq (((0 + 8) + 8) + 8)(%rsp), %rsi
573 movq ((0 + 8) + 8)(%rsp), %rdx
574 movq (0 + 8)(%rsp), %rcx
575 movq 0(%rsp), %rax
576 mov %rbx, %rsp
577 movq (%rsp), %rbx
578 # Adjust stack(PLT did 2 pushes)
579 add $(8 + 16), %rsp
580 # Preserve bound registers.
582 jmp *%r11 # Jump to function address.
583 .size _dl_runtime_resolve_fxsave, .-_dl_runtime_resolve_fxsave
584 .text
585 .globl _dl_runtime_resolve_xsave
586 .hidden _dl_runtime_resolve_xsave
587 .type _dl_runtime_resolve_xsave, @function
588 .align 16
589 _dl_runtime_resolve_xsave:
591 pushq %rbx # push subtracts stack by 8.
592 mov %rsp, %rbx
593 and $-64, %rsp
594 # Allocate stack space of the required size to save the state.
595 sub _dl_x86_cpu_features+288(%rip), %rsp
596 # Preserve registers otherwise clobbered.
597 movq %rax, 0(%rsp)
598 movq %rcx, (0 + 8)(%rsp)
599 movq %rdx, ((0 + 8) + 8)(%rsp)
600 movq %rsi, (((0 + 8) + 8) + 8)(%rsp)
601 movq %rdi, ((((0 + 8) + 8) + 8) + 8)(%rsp)
602 movq %r8, (((((0 + 8) + 8) + 8) + 8) + 8)(%rsp)
603 movq %r9, ((((((0 + 8) + 8) + 8) + 8) + 8) + 8)(%rsp)
604 movl $((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)), %eax
605 xorl %edx, %edx
606 # Clear the XSAVE Header.
607 movq %rdx, ((8 * 7 + 8) + 512)(%rsp)
608 movq %rdx, ((8 * 7 + 8) + 512 + 8)(%rsp)
609 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 2)(%rsp)
610 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 3)(%rsp)
611 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 4)(%rsp)
612 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 5)(%rsp)
613 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 6)(%rsp)
614 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 7)(%rsp)
615 xsave (8 * 7 + 8)(%rsp)
616 # Copy args pushed by PLT in register.
617 # %rdi: link_map, %rsi: reloc_index
618 mov (8 + 8)(%rbx), %rsi
619 mov 8(%rbx), %rdi
620 call _dl_fixup # Call resolver.
621 mov %rax, %r11 # Save return value
622 # Get register content back.
623 movl $((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)), %eax
624 xorl %edx, %edx
625 xrstor (8 * 7 + 8)(%rsp)
626 movq ((((((0 + 8) + 8) + 8) + 8) + 8) + 8)(%rsp), %r9
627 movq (((((0 + 8) + 8) + 8) + 8) + 8)(%rsp), %r8
628 movq ((((0 + 8) + 8) + 8) + 8)(%rsp), %rdi
629 movq (((0 + 8) + 8) + 8)(%rsp), %rsi
630 movq ((0 + 8) + 8)(%rsp), %rdx
631 movq (0 + 8)(%rsp), %rcx
632 movq 0(%rsp), %rax
633 mov %rbx, %rsp
634 movq (%rsp), %rbx
635 # Adjust stack(PLT did 2 pushes)
636 add $(8 + 16), %rsp
637 # Preserve bound registers.
639 jmp *%r11 # Jump to function address.
640 .size _dl_runtime_resolve_xsave, .-_dl_runtime_resolve_xsave
641 .text
642 .globl _dl_runtime_resolve_xsavec
643 .hidden _dl_runtime_resolve_xsavec
644 .type _dl_runtime_resolve_xsavec, @function
645 .align 16
646 _dl_runtime_resolve_xsavec:
648 pushq %rbx # push subtracts stack by 8.
649 mov %rsp, %rbx
650 and $-64, %rsp
651 # Allocate stack space of the required size to save the state.
652 sub _dl_x86_cpu_features+288(%rip), %rsp
653 # Preserve registers otherwise clobbered.
654 movq %rax, 0(%rsp)
655 movq %rcx, (0 + 8)(%rsp)
656 movq %rdx, ((0 + 8) + 8)(%rsp)
657 movq %rsi, (((0 + 8) + 8) + 8)(%rsp)
658 movq %rdi, ((((0 + 8) + 8) + 8) + 8)(%rsp)
659 movq %r8, (((((0 + 8) + 8) + 8) + 8) + 8)(%rsp)
660 movq %r9, ((((((0 + 8) + 8) + 8) + 8) + 8) + 8)(%rsp)
661 movl $((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)), %eax
662 xorl %edx, %edx
663 # Clear the XSAVE Header.
664 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 2)(%rsp)
665 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 3)(%rsp)
666 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 4)(%rsp)
667 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 5)(%rsp)
668 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 6)(%rsp)
669 movq %rdx, ((8 * 7 + 8) + 512 + 8 * 7)(%rsp)
670 xsavec (8 * 7 + 8)(%rsp)
671 # Copy args pushed by PLT in register.
672 # %rdi: link_map, %rsi: reloc_index
673 mov (8 + 8)(%rbx), %rsi
674 mov 8(%rbx), %rdi
675 call _dl_fixup # Call resolver.
676 mov %rax, %r11 # Save return value
677 # Get register content back.
678 movl $((1 << 1) | (1 << 2) | (1 << 3) | (1 << 5) | (1 << 6) | (1 << 7)), %eax
679 xorl %edx, %edx
680 xrstor (8 * 7 + 8)(%rsp)
681 movq ((((((0 + 8) + 8) + 8) + 8) + 8) + 8)(%rsp), %r9
682 movq (((((0 + 8) + 8) + 8) + 8) + 8)(%rsp), %r8
683 movq ((((0 + 8) + 8) + 8) + 8)(%rsp), %rdi
684 movq (((0 + 8) + 8) + 8)(%rsp), %rsi
685 movq ((0 + 8) + 8)(%rsp), %rdx
686 movq (0 + 8)(%rsp), %rcx
687 movq 0(%rsp), %rax
688 mov %rbx, %rsp
689 movq (%rsp), %rbx
690 # Adjust stack(PLT did 2 pushes)
691 add $(8 + 16), %rsp
692 # Preserve bound registers.
694 jmp *%r11 # Jump to function address.
695 .size _dl_runtime_resolve_xsavec, .-_dl_runtime_resolve_xsavec