1 /* PLT trampolines. ia64 version.
2 Copyright (C) 2005 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
23 This code is used in dl-runtime.c to call the `_dl_fixup' function
24 and then redirect to the address it returns. `_dl_fixup()' takes two
25 arguments, however _dl_profile_fixup() takes five.
27 The ABI specifies that we will never see more than 8 input
28 registers to a function call, thus it is safe to simply allocate
29 those, and simpler than playing stack games. */
31 /* Used to save and restore 8 incoming fp registers */
32 #define RESOLVE_FRAME_SIZE (16*8)
34 ENTRY(_dl_runtime_resolve)
38 alloc loc0 = ar.pfs, 8, 6, 2, 0
39 /* Use the 16 byte scratch area. r2 will start at f8 and
40 r3 will start at f9. */
41 adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
42 adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
45 .fframe RESOLVE_FRAME_SIZE
46 adds r12 = -RESOLVE_FRAME_SIZE, r12
50 mov loc2 = r8 /* preserve struct value register */
54 mov loc3 = r9 /* preserve language specific register */
55 mov loc4 = r10 /* preserve language specific register */
56 mov loc5 = r11 /* preserve language specific register */
59 stf.spill [r2] = f8, 32
60 stf.spill [r3] = f9, 32
65 stf.spill [r2] = f10, 32
66 stf.spill [r3] = f11, 32
71 stf.spill [r2] = f12, 32
72 stf.spill [r3] = f13, 32
73 /* Relocation record is 24 byte. */
74 shladd out1 = r15, 3, out1
80 br.call.sptk.many b0 = _dl_fixup
83 /* Skip the 16byte scratch area. */
90 ldf.fill f8 = [r2], 32
91 ldf.fill f9 = [r3], 32
96 ldf.fill f10 = [r2], 32
97 ldf.fill f11 = [r3], 32
102 ldf.fill f12 = [r2], 32
103 ldf.fill f13 = [r3], 32
108 ldf.fill f14 = [r2], 32
109 ldf.fill f15 = [r3], 32
110 .restore sp /* pop the unwind frame state */
111 adds r12 = RESOLVE_FRAME_SIZE, r12
115 mov r9 = loc3 /* restore language specific register */
116 mov r10 = loc4 /* restore language specific register */
117 mov r11 = loc5 /* restore language specific register */
120 mov r8 = loc2 /* restore struct value register */
123 /* An alloc is needed for the break system call to work.
124 We don't care about the old value of the pfs register. */
128 alloc r2 = ar.pfs, 0, 0, 8, 0
132 END(_dl_runtime_resolve)
135 /* The fourth argument to _dl_profile_fixup and the third one to
136 _dl_call_pltexit are a pointer to La_ia64_regs:
161 The fifth argument to _dl_profile_fixup is a pointer to long int.
162 The fourth argument to _dl_call_pltexit is a pointer to
178 Since stack has to be 16 byte aligned, the stack allocation is in
179 16byte increment. Before calling _dl_profile_fixup, the stack will
188 #define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
189 #define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
192 ENTRY(_dl_runtime_profile)
196 alloc loc0 = ar.pfs, 8, 12, 8, 0
211 /* There is a 16 byte scratch area. r2 will start at r8 and
212 r3 will start at r9 for La_ia64_regs. */
213 adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
214 adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
215 adds r12 = -PLTENTER_FRAME_SIZE, r12
221 mov out2 = b0 /* needed by _dl_fixup_profile */
227 adds out3 = 16, r12 /* pointer to La_ia64_regs */
232 st8.spill [r2] = in0, 16
234 st8.spill [r3] = in1, 16
235 mov out4 = loc10 /* pointer to new frame size */
240 st8.spill [r2] = in2, 16
242 st8.spill [r3] = in3, 16
243 mov loc2 = r8 /* preserve struct value register */
248 st8.spill [r2] = in4, 16
250 st8.spill [r3] = in5, 16
251 mov loc3 = r9 /* preserve language specific register */
258 st8 [r3] = in7, 24 /* adjust for f9 */
259 mov loc4 = r10 /* preserve language specific register */
263 mov r18 = ar.unat /* save it in La_ia64_regs */
264 mov loc7 = out3 /* save it for _dl_call_pltexit */
265 mov loc5 = r11 /* preserve language specific register */
268 stf.spill [r2] = f8, 32
269 stf.spill [r3] = f9, 32
270 mov out0 = r16 /* needed by _dl_fixup_profile */
274 mov ar.unat = r17 /* restore it for function call */
275 mov loc8 = r16 /* save it for _dl_call_pltexit */
279 stf.spill [r2] = f10, 32
280 stf.spill [r3] = f11, 32
285 stf.spill [r2] = f12, 32
286 stf.spill [r3] = f13, 32
287 /* Relocation record is 24 byte. */
288 shladd out1 = r15, 3, out1
292 stf.spill [r2] = f14, 32
293 stf.spill [r3] = f15, 24
294 mov loc9 = out1 /* save it for _dl_call_pltexit */
298 st8 [r2] = r18 /* store ar.unat */
299 st8 [r3] = loc10 /* store sp */
300 br.call.sptk.many b0 = _dl_profile_fixup
303 /* Skip the 16byte scratch area, 4 language specific GRs and
304 8 incoming GRs to restore incoming fp registers. */
305 adds r2 = (4*8 + 8*8 + 16), r12
306 adds r3 = (4*8 + 8*8 + 32), r12
311 ldf.fill f8 = [r2], 32
312 ldf.fill f9 = [r3], 32
317 ldf.fill f10 = [r2], 32
318 ldf.fill f11 = [r3], 32
319 mov r8 = loc2 /* restore struct value register */
323 ldf.fill f12 = [r2], 32
324 ldf.fill f13 = [r3], 32
325 mov r9 = loc3 /* restore language specific register */
329 ldf.fill f14 = [r2], 32
330 ldf.fill f15 = [r3], 32
331 mov r10 = loc4 /* restore language specific register */
335 ld8 r15 = [loc10] /* load the new frame size */
336 mov r11 = loc5 /* restore language specific register */
338 cmp.eq p6, p7 = -1, r15
342 (p7) cmp.eq p8, p9 = 0, r15
344 (p6) mov ar.lc = loc6
348 (p6) mov ar.pfs = loc0
349 (p6) br.cond.dptk.many .Lresolved
353 /* At this point, the stack looks like
359 We need to keep the current stack and call the resolved
360 function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
361 + 16 (scratch area) to sp + 16 (scratch area). Since stack
362 has to be 16byte aligned, we around r15 up to 16byte. */
365 (p9) adds r15 = 15, r15
366 (p8) br.cond.dptk.many .Lno_new_frame
373 /* We don't copy the 16byte scatch area. Prepare r16/r17 as
382 sub r12 = r12, r15 /* Adjust stack */
393 /* Skip the 16byte scatch area. Prepare r2/r3 as source. */
409 br.cloop.sptk.few .Lcopy
425 /* Call the resolved function */
426 br.call.sptk.many b0 = b6
429 /* Prepare stack for _dl_call_pltexit. Loc10 has the original
431 adds r12 = -PLTEXIT_FRAME_SIZE, loc10
432 adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
433 adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
437 /* Load all possible return values into buffer. */
450 stf.spill [r2] = f8, 32
451 stf.spill [r3] = f9, 32
452 mov out2 = loc7 /* Pointer to La_ia64_regs */
456 stf.spill [r2] = f10, 32
457 stf.spill [r3] = f11, 32
458 adds out3 = 16, r12 /* Pointer to La_ia64_retval */
462 stf.spill [r2] = f12, 32
463 stf.spill [r3] = f13, 32
464 /* We need to restore gp for _dl_call_pltexit. */
471 br.call.sptk.many b0 = _dl_call_pltexit
474 /* Load all the non-floating and floating return values. Skip
475 the 16byte scratch area. */
494 ldf.fill f8 = [r2], 32
495 ldf.fill f9 = [r3], 32
500 ldf.fill f10 = [r2], 32
501 ldf.fill f11 = [r3], 32
506 ldf.fill f12 = [r2], 32
507 ldf.fill f13 = [r3], 32
514 /* We know that the previous stack pointer, loc10, isn't 0.
515 We use it to reload p7. */
516 cmp.ne p7, p0 = 0, loc10
523 (p7) br.ret.sptk.many b0
526 /* An alloc is needed for the break system call to work. We
527 don't care about the old value of the pfs register. After
528 this alloc, we can't use any rotating registers. Otherwise
529 assembler won't be happy. This has to be at the end. */
533 alloc r2 = ar.pfs, 0, 0, 8, 0
537 END(_dl_runtime_profile)