dmake: do not set MAKEFLAGS=k
[unleashed/tickless.git] / usr / src / cmd / sgs / rtld / amd64 / boot_elf.s
blob786dbb4a0eef1226982236b39e8ed366223150e8
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2012 Joyent, Inc. All rights reserved.
29 #include <link.h>
30 #include <_audit.h>
31 #include <sys/asm_linkage.h>
32 #include <sys/auxv_386.h>
34 .file "boot_elf.s"
35 .text
38 * On entry the 'glue code' has already done the following:
40 * pushq %rbp
41 * movq %rsp, %rbp
42 * subq $0x10, %rsp
43 * leaq trace_fields(%rip), %r11
44 * movq %r11, -0x8(%rbp)
45 * movq $elf_plt_trace, %r11
46 * jmp *%r11
48 * so - -8(%rbp) contains the dyndata ptr
50 * 0x0 Addr *reflmp
51 * 0x8 Addr *deflmp
52 * 0x10 Word symndx
53 * 0x14 Word sb_flags
54 * 0x18 Sym symdef.st_name
55 * 0x1c symdef.st_info
56 * 0x1d symdef.st_other
57 * 0x1e symdef.st_shndx
58 * 0x20 symdef.st_value
59 * 0x28 symdef.st_size
61 * Also note - on entry 16 bytes have already been subtracted
62 * from the %rsp. The first 8 bytes is for the dyn_data_ptr,
63 * the second 8 bytes are to align the stack and are available
64 * for use.
66 #define REFLMP_OFF 0x0
67 #define DEFLMP_OFF 0x8
68 #define SYMNDX_OFF 0x10
69 #define SBFLAGS_OFF 0x14
70 #define SYMDEF_OFF 0x18
71 #define SYMDEF_VALUE_OFF 0x20
73 * Local stack space storage for elf_plt_trace is allocated
74 * as follows:
76 * First - before we got here - %rsp has been decremented
77 * by 0x10 to make space for the dyndata ptr (and another
78 * free word). In addition to that, we create space
79 * for the following:
81 * La_amd64_regs 8 * 8: 64
82 * prev_stack_size 8 8
83 * Saved regs:
84 * %rdi 8
85 * %rsi 8
86 * %rdx 8
87 * %rcx 8
88 * %r8 8
89 * %r9 8
90 * %r10 8
91 * %r11 8
92 * %rax 8
93 * =======
94 * Subtotal: 144 (32byte aligned)
96 * Saved Media Regs (used to pass floating point args):
97 * %xmm0 - %xmm7 32 * 8: 256
98 * =======
99 * Total: 400 (32byte aligned)
101 * So - will subtract the following to create enough space
103 * -8(%rbp) store dyndata ptr
104 * -16(%rbp) store call destination
105 * -80(%rbp) space for La_amd64_regs
106 * -88(%rbp) prev stack size
107 * The next %rbp offsets are only true if the caller had correct stack
108 * alignment. See note above SPRDIOFF for why we use %rsp alignment to
109 * access these stack fields.
110 * -96(%rbp) entering %rdi
111 * -104(%rbp) entering %rsi
112 * -112(%rbp) entering %rdx
113 * -120(%rbp) entering %rcx
114 * -128(%rbp) entering %r8
115 * -136(%rbp) entering %r9
116 * -144(%rbp) entering %r10
117 * -152(%rbp) entering %r11
118 * -160(%rbp) entering %rax
119 * -192(%rbp) entering %xmm0
120 * -224(%rbp) entering %xmm1
121 * -256(%rbp) entering %xmm2
122 * -288(%rbp) entering %xmm3
123 * -320(%rbp) entering %xmm4
124 * -384(%rbp) entering %xmm5
125 * -416(%rbp) entering %xmm6
126 * -448(%rbp) entering %xmm7
129 #define SPDYNOFF -8
130 #define SPDESTOFF -16
131 #define SPLAREGOFF -80
132 #define SPPRVSTKOFF -88
135 * The next set of offsets are relative to %rsp.
136 * We guarantee %rsp is ABI compliant 32-byte aligned. This guarantees the
137 * ymm registers are saved to 32-byte aligned addresses.
138 * %rbp may only be 8 byte aligned if we came in from non-ABI compliant code.
140 #define SPRDIOFF 320
141 #define SPRSIOFF 312
142 #define SPRDXOFF 304
143 #define SPRCXOFF 296
144 #define SPR8OFF 288
145 #define SPR9OFF 280
146 #define SPR10OFF 272
147 #define SPR11OFF 264
148 #define SPRAXOFF 256
149 #define SPXMM0OFF 224
150 #define SPXMM1OFF 192
151 #define SPXMM2OFF 160
152 #define SPXMM3OFF 128
153 #define SPXMM4OFF 96
154 #define SPXMM5OFF 64
155 #define SPXMM6OFF 32
156 #define SPXMM7OFF 0
158 /* See elf_rtbndr for explanation behind org_scapset */
159 .extern org_scapset
160 .globl elf_plt_trace
161 .type elf_plt_trace,@function
162 .align 16
163 elf_plt_trace:
165 * Enforce ABI 32-byte stack alignment here.
166 * The next andq instruction does this pseudo code:
167 * If %rsp is 8 byte aligned then subtract 8 from %rsp.
169 andq $-32, %rsp /* enforce ABI 32-byte stack alignment */
170 subq $400,%rsp / create some local storage
172 movq %rdi, SPRDIOFF(%rsp)
173 movq %rsi, SPRSIOFF(%rsp)
174 movq %rdx, SPRDXOFF(%rsp)
175 movq %rcx, SPRCXOFF(%rsp)
176 movq %r8, SPR8OFF(%rsp)
177 movq %r9, SPR9OFF(%rsp)
178 movq %r10, SPR10OFF(%rsp)
179 movq %r11, SPR11OFF(%rsp)
180 movq %rax, SPRAXOFF(%rsp)
182 movq org_scapset@GOTPCREL(%rip),%r9
183 movq (%r9),%r9
184 movl (%r9),%edx
185 testl $AV_386_AVX,%edx
186 jne .trace_save_ymm
188 .trace_save_xmm:
189 movdqa %xmm0, SPXMM0OFF(%rsp)
190 movdqa %xmm1, SPXMM1OFF(%rsp)
191 movdqa %xmm2, SPXMM2OFF(%rsp)
192 movdqa %xmm3, SPXMM3OFF(%rsp)
193 movdqa %xmm4, SPXMM4OFF(%rsp)
194 movdqa %xmm5, SPXMM5OFF(%rsp)
195 movdqa %xmm6, SPXMM6OFF(%rsp)
196 movdqa %xmm7, SPXMM7OFF(%rsp)
197 jmp .trace_save_finish
199 .trace_save_ymm:
200 vmovdqa %ymm0, SPXMM0OFF(%rsp)
201 vmovdqa %ymm1, SPXMM1OFF(%rsp)
202 vmovdqa %ymm2, SPXMM2OFF(%rsp)
203 vmovdqa %ymm3, SPXMM3OFF(%rsp)
204 vmovdqa %ymm4, SPXMM4OFF(%rsp)
205 vmovdqa %ymm5, SPXMM5OFF(%rsp)
206 vmovdqa %ymm6, SPXMM6OFF(%rsp)
207 vmovdqa %ymm7, SPXMM7OFF(%rsp)
209 .trace_save_finish:
211 movq SPDYNOFF(%rbp), %rax / %rax = dyndata
212 testb $LA_SYMB_NOPLTENTER, SBFLAGS_OFF(%rax) / <link.h>
213 je .start_pltenter
214 movq SYMDEF_VALUE_OFF(%rax), %rdi
215 movq %rdi, SPDESTOFF(%rbp) / save destination address
216 jmp .end_pltenter
218 .start_pltenter:
220 * save all registers into La_amd64_regs
222 leaq SPLAREGOFF(%rbp), %rsi / %rsi = &La_amd64_regs
223 leaq 8(%rbp), %rdi
224 movq %rdi, 0(%rsi) / la_rsp
225 movq 0(%rbp), %rdi
226 movq %rdi, 8(%rsi) / la_rbp
227 movq SPRDIOFF(%rsp), %rdi
228 movq %rdi, 16(%rsi) / la_rdi
229 movq SPRSIOFF(%rsp), %rdi
230 movq %rdi, 24(%rsi) / la_rsi
231 movq SPRDXOFF(%rsp), %rdi
232 movq %rdi, 32(%rsi) / la_rdx
233 movq SPRCXOFF(%rsp), %rdi
234 movq %rdi, 40(%rsi) / la_rcx
235 movq SPR8OFF(%rsp), %rdi
236 movq %rdi, 48(%rsi) / la_r8
237 movq SPR9OFF(%rsp), %rdi
238 movq %rdi, 56(%rsi) / la_r9
241 * prepare for call to la_pltenter
243 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata
244 leaq SBFLAGS_OFF(%r11), %r9 / arg6 (&sb_flags)
245 leaq SPLAREGOFF(%rbp), %r8 / arg5 (&La_amd64_regs)
246 movl SYMNDX_OFF(%r11), %ecx / arg4 (symndx)
247 leaq SYMDEF_OFF(%r11), %rdx / arg3 (&Sym)
248 movq DEFLMP_OFF(%r11), %rsi / arg2 (dlmp)
249 movq REFLMP_OFF(%r11), %rdi / arg1 (rlmp)
250 call audit_pltenter@PLT
251 movq %rax, SPDESTOFF(%rbp) / save calling address
252 .end_pltenter:
255 * If *no* la_pltexit() routines exist
256 * we do not need to keep the stack frame
257 * before we call the actual routine. Instead we
258 * jump to it and remove our stack from the stack
259 * at the same time.
261 movl audit_flags(%rip), %eax
262 andl $AF_PLTEXIT, %eax / value of audit.h:AF_PLTEXIT
263 cmpl $0, %eax
264 je .bypass_pltexit
266 * Has the *nopltexit* flag been set for this entry point
268 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata
269 testb $LA_SYMB_NOPLTEXIT, SBFLAGS_OFF(%r11)
270 je .start_pltexit
272 .bypass_pltexit:
274 * No PLTEXIT processing required.
276 movq 0(%rbp), %r11
277 movq %r11, -8(%rbp) / move prev %rbp
278 movq SPDESTOFF(%rbp), %r11 / r11 == calling destination
279 movq %r11, 0(%rbp) / store destination at top
282 / Restore registers
284 movq org_scapset@GOTPCREL(%rip),%r9
285 movq (%r9),%r9
286 movl (%r9),%edx
287 testl $AV_386_AVX,%edx
288 jne .trace_restore_ymm
290 .trace_restore_xmm:
291 movdqa SPXMM0OFF(%rsp), %xmm0
292 movdqa SPXMM1OFF(%rsp), %xmm1
293 movdqa SPXMM2OFF(%rsp), %xmm2
294 movdqa SPXMM3OFF(%rsp), %xmm3
295 movdqa SPXMM4OFF(%rsp), %xmm4
296 movdqa SPXMM5OFF(%rsp), %xmm5
297 movdqa SPXMM6OFF(%rsp), %xmm6
298 movdqa SPXMM7OFF(%rsp), %xmm7
299 jmp .trace_restore_finish
301 .trace_restore_ymm:
302 vmovdqa SPXMM0OFF(%rsp), %ymm0
303 vmovdqa SPXMM1OFF(%rsp), %ymm1
304 vmovdqa SPXMM2OFF(%rsp), %ymm2
305 vmovdqa SPXMM3OFF(%rsp), %ymm3
306 vmovdqa SPXMM4OFF(%rsp), %ymm4
307 vmovdqa SPXMM5OFF(%rsp), %ymm5
308 vmovdqa SPXMM6OFF(%rsp), %ymm6
309 vmovdqa SPXMM7OFF(%rsp), %ymm7
311 .trace_restore_finish:
312 movq SPRDIOFF(%rsp), %rdi
313 movq SPRSIOFF(%rsp), %rsi
314 movq SPRDXOFF(%rsp), %rdx
315 movq SPRCXOFF(%rsp), %rcx
316 movq SPR8OFF(%rsp), %r8
317 movq SPR9OFF(%rsp), %r9
318 movq SPR10OFF(%rsp), %r10
319 movq SPR11OFF(%rsp), %r11
320 movq SPRAXOFF(%rsp), %rax
322 subq $8, %rbp / adjust %rbp for 'ret'
323 movq %rbp, %rsp /
325 * At this point, after a little doctoring, we should
326 * have the following on the stack:
328 * 16(%rsp): ret addr
329 * 8(%rsp): dest_addr
330 * 0(%rsp): Previous %rbp
332 * So - we pop the previous %rbp, and then
333 * ret to our final destination.
335 popq %rbp /
336 ret / jmp to final destination
337 / and clean up stack :)
339 .start_pltexit:
341 * In order to call the destination procedure and then return
342 * to audit_pltexit() for post analysis we must first grow
343 * our stack frame and then duplicate the original callers
344 * stack state. This duplicates all of the arguements
345 * that were to be passed to the destination procedure.
347 movq %rbp, %rdi /
348 addq $16, %rdi / %rdi = src
349 movq (%rbp), %rdx /
350 subq %rdi, %rdx / %rdx == prev frame sz
352 * If audit_argcnt > 0 then we limit the number of
353 * arguements that will be duplicated to audit_argcnt.
355 * If (prev_stack_size > (audit_argcnt * 8))
356 * prev_stack_size = audit_argcnt * 8;
358 movl audit_argcnt(%rip),%eax / %eax = audit_argcnt
359 cmpl $0, %eax
360 jle .grow_stack
361 leaq (,%rax,8), %rax / %eax = %eax * 4
362 cmpq %rax,%rdx
363 jle .grow_stack
364 movq %rax, %rdx
366 * Grow the stack and duplicate the arguements of the
367 * original caller.
369 * We save %rsp in %r11 since we need to use the current rsp for
370 * accessing the registers saved in our stack frame.
372 .grow_stack:
373 movq %rsp, %r11
374 subq %rdx, %rsp / grow the stack
375 movq %rdx, SPPRVSTKOFF(%rbp) / -88(%rbp) == prev frame sz
376 movq %rsp, %rcx / %rcx = dest
377 addq %rcx, %rdx / %rdx == tail of dest
378 .while_base:
379 cmpq %rdx, %rcx / while (base+size >= src++) {
380 jge .end_while /
381 movq (%rdi), %rsi
382 movq %rsi,(%rcx) / *dest = *src
383 addq $8, %rdi / src++
384 addq $8, %rcx / dest++
385 jmp .while_base / }
388 * The above stack is now an exact duplicate of
389 * the stack of the original calling procedure.
391 .end_while:
393 / Restore registers using %r11 which contains our old %rsp value
394 / before growing the stack.
397 / Yes, we have to do this dance again. Sorry.
398 movq org_scapset@GOTPCREL(%rip),%r9
399 movq (%r9),%r9
400 movl (%r9),%edx
401 testl $AV_386_AVX,%edx
402 jne .trace_r2_ymm
404 .trace_r2_xmm:
405 movdqa SPXMM0OFF(%r11), %xmm0
406 movdqa SPXMM1OFF(%r11), %xmm1
407 movdqa SPXMM2OFF(%r11), %xmm2
408 movdqa SPXMM3OFF(%r11), %xmm3
409 movdqa SPXMM4OFF(%r11), %xmm4
410 movdqa SPXMM5OFF(%r11), %xmm5
411 movdqa SPXMM6OFF(%r11), %xmm6
412 movdqa SPXMM7OFF(%r11), %xmm7
413 jmp .trace_r2_finish
415 .trace_r2_ymm:
416 vmovdqa SPXMM0OFF(%r11), %ymm0
417 vmovdqa SPXMM1OFF(%r11), %ymm1
418 vmovdqa SPXMM2OFF(%r11), %ymm2
419 vmovdqa SPXMM3OFF(%r11), %ymm3
420 vmovdqa SPXMM4OFF(%r11), %ymm4
421 vmovdqa SPXMM5OFF(%r11), %ymm5
422 vmovdqa SPXMM6OFF(%r11), %ymm6
423 vmovdqa SPXMM7OFF(%r11), %ymm7
425 .trace_r2_finish:
426 movq SPRDIOFF(%r11), %rdi
427 movq SPRSIOFF(%r11), %rsi
428 movq SPRDXOFF(%r11), %rdx
429 movq SPRCXOFF(%r11), %rcx
430 movq SPR8OFF(%r11), %r8
431 movq SPR9OFF(%r11), %r9
432 movq SPR10OFF(%r11), %r10
433 movq SPRAXOFF(%r11), %rax
434 movq SPR11OFF(%r11), %r11 / retore %r11 last
437 * Call to desitnation function - we'll return here
438 * for pltexit monitoring.
440 call *SPDESTOFF(%rbp)
442 addq SPPRVSTKOFF(%rbp), %rsp / cleanup dupped stack
445 / prepare for call to audit_pltenter()
447 movq SPDYNOFF(%rbp), %r11 / %r11 = &dyndata
448 movq SYMNDX_OFF(%r11), %r8 / arg5 (symndx)
449 leaq SYMDEF_OFF(%r11), %rcx / arg4 (&Sym)
450 movq DEFLMP_OFF(%r11), %rdx / arg3 (dlmp)
451 movq REFLMP_OFF(%r11), %rsi / arg2 (rlmp)
452 movq %rax, %rdi / arg1 (returnval)
453 call audit_pltexit@PLT
456 * Clean up after ourselves and return to the
457 * original calling procedure.
461 / Restore registers
463 movq SPRDIOFF(%rsp), %rdi
464 movq SPRSIOFF(%rsp), %rsi
465 movq SPRDXOFF(%rsp), %rdx
466 movq SPRCXOFF(%rsp), %rcx
467 movq SPR8OFF(%rsp), %r8
468 movq SPR9OFF(%rsp), %r9
469 movq SPR10OFF(%rsp), %r10
470 movq SPR11OFF(%rsp), %r11
471 // rax already contains return value
472 movdqa SPXMM0OFF(%rsp), %xmm0
473 movdqa SPXMM1OFF(%rsp), %xmm1
474 movdqa SPXMM2OFF(%rsp), %xmm2
475 movdqa SPXMM3OFF(%rsp), %xmm3
476 movdqa SPXMM4OFF(%rsp), %xmm4
477 movdqa SPXMM5OFF(%rsp), %xmm5
478 movdqa SPXMM6OFF(%rsp), %xmm6
479 movdqa SPXMM7OFF(%rsp), %xmm7
481 movq %rbp, %rsp /
482 popq %rbp /
483 ret / return to caller
484 .size elf_plt_trace, .-elf_plt_trace
487 * We got here because a call to a function resolved to a procedure
488 * linkage table entry. That entry did a JMPL to the first PLT entry, which
489 * in turn did a call to elf_rtbndr.
491 * the code sequence that got us here was:
493 * .PLT0:
494 * pushq GOT+8(%rip) #GOT[1]
495 * jmp *GOT+16(%rip) #GOT[2]
496 * nop
497 * nop
498 * nop
499 * nop
500 * ...
501 * PLT entry for foo:
502 * jmp *name1@GOTPCREL(%rip)
503 * pushl $rel.plt.foo
504 * jmp PLT0
506 * At entry, the stack looks like this:
508 * return address 16(%rsp)
509 * $rel.plt.foo (plt index) 8(%rsp)
510 * lmp 0(%rsp)
515 * The PLT code that landed us here placed 2 arguments on the stack as
516 * arguments to elf_rtbndr.
517 * Additionally the pc of caller is below these 2 args.
518 * Our stack will look like this after we establish a stack frame with
519 * push %rbp; movq %rsp, %rbp sequence:
521 * 8(%rbp) arg1 - *lmp
522 * 16(%rbp), %rsi arg2 - reloc index
523 * 24(%rbp), %rdx arg3 - pc of caller
525 #define LBPLMPOFF 8 /* arg1 - *lmp */
526 #define LBPRELOCOFF 16 /* arg2 - reloc index */
527 #define LBRPCOFF 24 /* arg3 - pc of caller */
530 * Possible arguments for the resolved function are in registers as per
531 * the AMD64 ABI. We must save on the local stack all possible register
532 * arguments before interposing functions to resolve the called function.
533 * Possible arguments must be restored before invoking the resolved function.
535 * Before the AVX instruction set enhancements to AMD64 there were no changes in
536 * the set of registers and their sizes across different processors. With AVX,
537 * the xmm registers became the lower 128 bits of the ymm registers. Because of
538 * this, we need to conditionally save 256 bits instead of 128 bits. Regardless
539 * of whether we have ymm registers or not, we're always going to push the stack
540 * space assuming that we do to simplify the code.
542 * Local stack space storage for elf_rtbndr is allocated as follows:
544 * Saved regs:
545 * %rax 8
546 * %rdi 8
547 * %rsi 8
548 * %rdx 8
549 * %rcx 8
550 * %r8 8
551 * %r9 8
552 * %r10 8
553 * =======
554 * Subtotal: 64 (32byte aligned)
556 * Saved Media Regs (used to pass floating point args):
557 * %ymm0 - %ymm7 32 * 8 256
558 * =======
559 * Total: 320 (32byte aligned)
561 * So - will subtract the following to create enough space
563 * 0(%rsp) save %rax
564 * 8(%rsp) save %rdi
565 * 16(%rsp) save %rsi
566 * 24(%rsp) save %rdx
567 * 32(%rsp) save %rcx
568 * 40(%rsp) save %r8
569 * 48(%rsp) save %r9
570 * 56(%rsp) save %r10
571 * 64(%rsp) save %ymm0
572 * 96(%rsp) save %ymm1
573 * 128(%rsp) save %ymm2
574 * 160(%rsp) save %ymm3
575 * 192(%rsp) save %ymm4
576 * 224(%rsp) save %ymm5
577 * 256(%rsp) save %ymm6
578 * 288(%rsp) save %ymm7
580 * Note: Some callers may use 8-byte stack alignment instead of the
581 * ABI required 16-byte alignment. We use %rsp offsets to save/restore
582 * registers because %rbp may not be 16-byte aligned. We guarantee %rsp
583 * is 16-byte aligned in the function preamble.
586 * As the registers may either be xmm or ymm, we've left the name as xmm, but
587 * increased the offset between them to always cover the xmm and ymm cases.
589 #define LS_SIZE $320 /* local stack space to save all possible arguments */
590 #define LSRAXOFF 0 /* for SSE register count */
591 #define LSRDIOFF 8 /* arg 0 ... */
592 #define LSRSIOFF 16
593 #define LSRDXOFF 24
594 #define LSRCXOFF 32
595 #define LSR8OFF 40
596 #define LSR9OFF 48
597 #define LSR10OFF 56 /* ... arg 5 */
598 #define LSXMM0OFF 64 /* SSE arg 0 ... */
599 #define LSXMM1OFF 96
600 #define LSXMM2OFF 128
601 #define LSXMM3OFF 160
602 #define LSXMM4OFF 192
603 #define LSXMM5OFF 224
604 #define LSXMM6OFF 256
605 #define LSXMM7OFF 288 /* ... SSE arg 7 */
608 * The org_scapset is a global variable that is a part of rtld. It
609 * contains the capabilities that the kernel has told us are supported
610 * (auxv_hwcap). This is necessary for determining whether or not we
611 * need to save and restore AVX registers or simple SSE registers. Note,
612 * that the field we care about is currently at offset 0, if that
613 * changes, this code will have to be updated.
615 .extern org_scapset
616 .weak _elf_rtbndr
617 _elf_rtbndr = elf_rtbndr
619 ENTRY(elf_rtbndr)
621 pushq %rbp
622 movq %rsp, %rbp
625 * Some libraries may (incorrectly) use non-ABI compliant 8-byte stack
626 * alignment. Enforce ABI 16-byte stack alignment here.
627 * The next andq instruction does this pseudo code:
628 * If %rsp is 8 byte aligned then subtract 8 from %rsp.
630 andq $-32, %rsp /* enforce ABI 32-byte stack alignment */
632 subq LS_SIZE, %rsp /* save all ABI defined argument registers */
634 movq %rax, LSRAXOFF(%rsp) /* for SSE register count */
635 movq %rdi, LSRDIOFF(%rsp) /* arg 0 .. */
636 movq %rsi, LSRSIOFF(%rsp)
637 movq %rdx, LSRDXOFF(%rsp)
638 movq %rcx, LSRCXOFF(%rsp)
639 movq %r8, LSR8OFF(%rsp)
640 movq %r9, LSR9OFF(%rsp) /* .. arg 5 */
641 movq %r10, LSR10OFF(%rsp) /* call chain reg */
644 * Our xmm registers could secretly by ymm registers in disguise.
646 movq org_scapset@GOTPCREL(%rip),%r9
647 movq (%r9),%r9
648 movl (%r9),%edx
649 testl $AV_386_AVX,%edx
650 jne .save_ymm
652 .save_xmm:
653 movdqa %xmm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */
654 movdqa %xmm1, LSXMM1OFF(%rsp)
655 movdqa %xmm2, LSXMM2OFF(%rsp)
656 movdqa %xmm3, LSXMM3OFF(%rsp)
657 movdqa %xmm4, LSXMM4OFF(%rsp)
658 movdqa %xmm5, LSXMM5OFF(%rsp)
659 movdqa %xmm6, LSXMM6OFF(%rsp)
660 movdqa %xmm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */
661 jmp .save_finish
663 .save_ymm:
664 vmovdqa %ymm0, LSXMM0OFF(%rsp) /* SSE arg 0 ... */
665 vmovdqa %ymm1, LSXMM1OFF(%rsp)
666 vmovdqa %ymm2, LSXMM2OFF(%rsp)
667 vmovdqa %ymm3, LSXMM3OFF(%rsp)
668 vmovdqa %ymm4, LSXMM4OFF(%rsp)
669 vmovdqa %ymm5, LSXMM5OFF(%rsp)
670 vmovdqa %ymm6, LSXMM6OFF(%rsp)
671 vmovdqa %ymm7, LSXMM7OFF(%rsp) /* ... SSE arg 7 */
673 .save_finish:
674 movq LBPLMPOFF(%rbp), %rdi /* arg1 - *lmp */
675 movq LBPRELOCOFF(%rbp), %rsi /* arg2 - reloc index */
676 movq LBRPCOFF(%rbp), %rdx /* arg3 - pc of caller */
677 call elf_bndr@PLT /* call elf_rtbndr(lmp, relndx, pc) */
678 movq %rax, LBPRELOCOFF(%rbp) /* store final destination */
681 * Restore possible arguments before invoking resolved function. We
682 * check the xmm vs. ymm regs first so we can use the others.
684 movq org_scapset@GOTPCREL(%rip),%r9
685 movq (%r9),%r9
686 movl (%r9),%edx
687 testl $AV_386_AVX,%edx
688 jne .restore_ymm
690 .restore_xmm:
691 movdqa LSXMM0OFF(%rsp), %xmm0
692 movdqa LSXMM1OFF(%rsp), %xmm1
693 movdqa LSXMM2OFF(%rsp), %xmm2
694 movdqa LSXMM3OFF(%rsp), %xmm3
695 movdqa LSXMM4OFF(%rsp), %xmm4
696 movdqa LSXMM5OFF(%rsp), %xmm5
697 movdqa LSXMM6OFF(%rsp), %xmm6
698 movdqa LSXMM7OFF(%rsp), %xmm7
699 jmp .restore_finish
701 .restore_ymm:
702 vmovdqa LSXMM0OFF(%rsp), %ymm0
703 vmovdqa LSXMM1OFF(%rsp), %ymm1
704 vmovdqa LSXMM2OFF(%rsp), %ymm2
705 vmovdqa LSXMM3OFF(%rsp), %ymm3
706 vmovdqa LSXMM4OFF(%rsp), %ymm4
707 vmovdqa LSXMM5OFF(%rsp), %ymm5
708 vmovdqa LSXMM6OFF(%rsp), %ymm6
709 vmovdqa LSXMM7OFF(%rsp), %ymm7
711 .restore_finish:
712 movq LSRAXOFF(%rsp), %rax
713 movq LSRDIOFF(%rsp), %rdi
714 movq LSRSIOFF(%rsp), %rsi
715 movq LSRDXOFF(%rsp), %rdx
716 movq LSRCXOFF(%rsp), %rcx
717 movq LSR8OFF(%rsp), %r8
718 movq LSR9OFF(%rsp), %r9
719 movq LSR10OFF(%rsp), %r10
721 movq %rbp, %rsp
722 popq %rbp
724 addq $8, %rsp /* pop 1st plt-pushed args */
725 /* the second arguement is used */
726 /* for the 'return' address to our */
727 /* final destination */
729 ret /* invoke resolved function */
730 .size elf_rtbndr, .-elf_rtbndr