2 /*--------------------------------------------------------------------*/
3 /*--- The core dispatch loop, for jumping to a code address. ---*/
4 /*--- dispatch-ppc64-linux.S ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2005-2017 Cerion Armour-Brown <cerion@open-works.co.uk>
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "pub_core_basics_asm.h"
31 #if defined(VGP_ppc64le_linux)
33 #include "pub_core_dispatch_asm.h"
34 #include "pub_core_transtab_asm.h"
35 #include "libvex_guest_offsets.h" /* for OFFSET_ppc64_CIA */
37 /* NOTE: PPC64 supports Big Endian and Little Endian. It also supports the
38 ELF version 1 and ELF version 2 APIs.
40 Currently LE uses ELF version 2 and BE uses ELF version 1. However,
41 BE and LE may support the other ELF version in the future. So, the
42 _CALL_ELF is used in the assembly function to enable code for a
43 specific ELF version independently of the Enianess of the machine.
44 The test "#if _CALL_ELF == 2" checks if ELF version 2 is being used.
47 /* References to globals via the TOC */
50 .globl vgPlain_tt_fast
51 .lcomm vgPlain_tt_fast,4,4
52 .type vgPlain_tt_fast, @object
56 .tocent__vgPlain_tt_fast:
57 .tc vgPlain_tt_fast[TC],vgPlain_tt_fast
59 .tocent__vgPlain_stats__n_xIndirs_32:
60 .tc vgPlain_stats__n_xIndirs_32[TC], vgPlain_stats__n_xIndirs_32
62 .tocent__vgPlain_stats__n_xIndir_hits1_32:
63 .tc vgPlain_stats__n_xIndir_hits1_32[TC], vgPlain_stats__n_xIndir_hits1_32
65 .tocent__vgPlain_stats__n_xIndir_hits2_32:
66 .tc vgPlain_stats__n_xIndir_hits2_32[TC], vgPlain_stats__n_xIndir_hits2_32
68 .tocent__vgPlain_stats__n_xIndir_hits3_32:
69 .tc vgPlain_stats__n_xIndir_hits3_32[TC], vgPlain_stats__n_xIndir_hits3_32
71 .tocent__vgPlain_stats__n_xIndir_misses_32:
72 .tc vgPlain_stats__n_xIndir_misses_32[TC], vgPlain_stats__n_xIndir_misses_32
74 .tocent__vgPlain_machine_ppc64_has_VMX:
75 .tc vgPlain_machine_ppc64_has_VMX[TC], vgPlain_machine_ppc64_has_VMX
77 /*------------------------------------------------------------*/
79 /*--- The dispatch loop. VG_(disp_run_translations) is ---*/
80 /*--- used to run all translations, ---*/
81 /*--- including no-redir ones. ---*/
83 /*------------------------------------------------------------*/
85 /*----------------------------------------------------*/
86 /*--- Entry and preamble (set everything up) ---*/
87 /*----------------------------------------------------*/
90 void VG_(disp_run_translations)( UWord* two_words,
97 .globl VG_(disp_run_translations)
99 .type VG_(disp_run_translations),@function
100 VG_(disp_run_translations):
101 .type .VG_(disp_run_translations),@function
105 VG_(disp_run_translations):
106 .quad .VG_(disp_run_translations),.TOC.@tocbase,0
108 .type .VG_(disp_run_translations),@function
110 .globl .VG_(disp_run_translations)
111 .VG_(disp_run_translations):
113 0: addis 2, 12,.TOC.-0b@ha
115 .localentry VG_(disp_run_translations), .-VG_(disp_run_translations)
118 /* r3 holds two_words */
119 /* r4 holds guest_state */
120 /* r5 holds host_addr */
122 /* ----- entry point to VG_(disp_run_translations) ----- */
123 /* PPC64 ABI saves LR->16(prt_sp), CR->8(prt_sp)) */
131 /* New stack frame */
132 stdu 1,-624(1) /* sp should maintain 16-byte alignment */
134 /* General reg save area : 152 bytes */
154 std 3,104(1) /* save two_words for later */
156 /* Save callee-saved registers... */
157 /* Floating-point reg save area : 144 bytes */
177 /* It's necessary to save/restore VRSAVE in the AIX / Darwin ABI.
178 The Linux kernel might not actually use VRSAVE for its intended
179 purpose, but it should be harmless to preserve anyway. */
180 /* r3, r4, r5 are live here, so use r6 */
181 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
186 /* VRSAVE save word : 32 bytes */
187 mfspr 6,256 /* vrsave reg is spr number 256 */
190 /* Alignment padding : 4 bytes */
192 /* Vector reg save area (quadword aligned) : 192 bytes */
219 /* Local variable space... */
221 /* r3 holds two_words */
222 /* r4 holds guest_state */
223 /* r5 holds host_addr */
225 /* 96(sp) used later to check FPSCR[RM] */
226 /* 88(sp) used later to load fpscr with zero */
229 /* Linkage Area (reserved) BE ABI
231 32(sp) : link editor doubleword
232 24(sp) : compiler doubleword
238 /* set host FPU control word to the default mode expected
239 by VEX-generated code. See comments in libvex.h for
241 /* => get zero into f3 (tedious)
242 fsub 3,3,3 is not a reliable way to do this, since if
243 f3 holds a NaN or similar then we don't necessarily
244 wind up with zero. */
248 mtfsf 0xFF,3 /* fpscr = lo32 of f3 */
250 /* set host AltiVec control word to the default mode expected
251 by VEX-generated code. */
252 ld 6,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
257 vspltisw 3,0x0 /* generate zero */
258 mtvscr 3 /* sets VSCR[NJ]=0 */
262 /* make a stack frame for the code we are calling */
265 /* Set up the guest state ptr */
266 mr 31,4 /* r31 (generated code gsp) = r4 */
268 /* for the LE ABI need to setup r2 and r12 */
269 0: addis 2, 12,.TOC.-0b@ha
273 /* and jump into the code cache. Chained translations in
274 the code cache run, until for whatever reason, they can't
275 continue. When that happens, the translation in question
276 will jump (or call) to one of the continuation points
277 VG_(cp_...) below. */
282 /*----------------------------------------------------*/
283 /*--- Postamble and exit. ---*/
284 /*----------------------------------------------------*/
287 /* At this point, r6 and r7 contain two
288 words to be returned to the caller. r6
289 holds a TRC value, and r7 optionally may
290 hold another word (for CHAIN_ME exits, the
291 address of the place to patch.) */
293 /* undo the "make a stack frame for the code we are calling" */
296 /* We're leaving. Check that nobody messed with
297 VSCR or FPSCR in ways we don't expect. */
298 /* Using r11 - value used again further on, so don't trash! */
299 ld 11,.tocent__vgPlain_machine_ppc64_has_VMX@toc(2)
302 /* Set fpscr back to a known state, since vex-generated code
303 may have messed with fpscr[rm]. */
309 mtfsf 0xFF,3 /* fpscr = f3 */
311 cmpldi 11,0 /* Do we have altivec? */
314 /* Expect VSCR[NJ] to be 0, call invariant_violation if
316 /* first generate 4x 0x00010000 */
317 vspltisw 4,0x1 /* 4x 0x00000001 */
318 vspltisw 5,0x0 /* zero */
319 vsldoi 6,4,5,0x2 /* <<2*8 => 4x 0x00010000 */
320 /* retrieve VSCR and mask wanted bits */
322 vand 7,7,6 /* gives NJ flag */
323 vspltw 7,7,0x3 /* flags-word to all lanes */
324 vcmpequw. 8,6,7 /* CR[24] = 1 if v6 == v7 */
325 bt 24,.invariant_violation /* branch if all_equal, i.e. NJ=1 */
328 /* otherwise we're OK */
331 .invariant_violation:
332 li 6,VG_TRC_INVARIANT_FAILED
337 /* r11 already holds VG_(machine_ppc32_has_VMX) value */
341 /* Restore Altivec regs.
342 Use r5 as scratch since r6/r7 are live. */
345 mfspr 5,256 /* VRSAVE reg is spr number 256 */
374 /* Restore FP regs */
375 /* Floating-point regs */
395 /* restore int regs, including importantly r3 (two_words) */
416 /* Stash return values */
420 /* restore lr & sp, and leave */
421 ld 0,632(1) /* stack_size + 8 */
423 ld 0,640(1) /* stack_size + 16 */
425 addi 1,1,624 /* stack_size */
428 .size VG_(disp_run_translations),.-VG_(disp_run_translations)
432 /*----------------------------------------------------*/
433 /*--- Continuation points ---*/
434 /*----------------------------------------------------*/
436 /* ------ Chain me to slow entry point ------ */
439 .globl VG_(disp_cp_chain_me_to_slowEP)
441 .type VG_(disp_cp_chain_me_to_slowEP),@function
442 VG_(disp_cp_chain_me_to_slowEP):
446 VG_(disp_cp_chain_me_to_slowEP):
447 .quad .VG_(disp_cp_chain_me_to_slowEP),.TOC.@tocbase,0
450 .type .VG_(disp_cp_chain_me_to_slowEP),@function
451 .globl .VG_(disp_cp_chain_me_to_slowEP)
452 .VG_(disp_cp_chain_me_to_slowEP):
454 0: addis 2, 12,.TOC.-0b@ha
456 .localentry VG_(disp_cp_chain_me_to_slowEP), .-VG_(disp_cp_chain_me_to_slowEP)
458 /* We got called. The return address indicates
459 where the patching needs to happen. Collect
460 the return address and, exit back to C land,
461 handing the caller the pair (Chain_me_S, RA) */
462 li 6, VG_TRC_CHAIN_ME_TO_SLOW_EP
464 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_slowEP
471 .size VG_(disp_cp_chain_me_to_slowEP),.-VG_(disp_cp_chain_me_to_slowEP)
474 /* ------ Chain me to fast entry point ------ */
477 .globl VG_(disp_cp_chain_me_to_fastEP)
479 .type VG_(disp_cp_chain_me_to_fastEP),@function
480 VG_(disp_cp_chain_me_to_fastEP):
484 VG_(disp_cp_chain_me_to_fastEP):
485 .quad .VG_(disp_cp_chain_me_to_fastEP),.TOC.@tocbase,0
488 .type .VG_(disp_cp_chain_me_to_fastEP),@function
489 .globl .VG_(disp_cp_chain_me_to_fastEP)
490 .VG_(disp_cp_chain_me_to_fastEP):
492 0: addis 2, 12,.TOC.-0b@ha
494 .localentry VG_(disp_cp_chain_me_to_fastEP), .-VG_(disp_cp_chain_me_to_fastEP)
496 /* We got called. The return address indicates
497 where the patching needs to happen. Collect
498 the return address and, exit back to C land,
499 handing the caller the pair (Chain_me_S, RA) */
500 li 6, VG_TRC_CHAIN_ME_TO_FAST_EP
502 /* 20 = imm64-fixed5 r30, disp_cp_chain_me_to_fastEP
509 .size VG_(disp_cp_chain_me_to_fastEP),.-VG_(disp_cp_chain_me_to_fastEP)
512 /* ------ Indirect but boring jump ------ */
515 .globl VG_(disp_cp_xindir)
517 .type VG_(disp_cp_xindir),@function
523 .quad .VG_(disp_cp_xindir),.TOC.@tocbase,0
526 .type .VG_(disp_cp_xindir),@function
527 .globl .VG_(disp_cp_xindir)
528 .VG_(disp_cp_xindir):
530 0: addis 2, 12,.TOC.-0b@ha
532 .localentry VG_(disp_cp_xindir), .-VG_(disp_cp_xindir)
534 /* Where are we going? */
535 ld 20, OFFSET_ppc64_CIA(31)
538 ld 24, .tocent__vgPlain_stats__n_xIndirs_32@toc(2)
543 // LIVE: r31 (guest state ptr), r20 (guest address to go to).
544 // We use 6 temporaries:
545 // r26 (to point at the relevant FastCacheSet),
546 // r21, r22, r23 (scratch, for swapping entries within a set)
547 // r24, r25 (other scratch)
549 /* Try a fast lookup in the translation cache. This is pretty much
550 a handcoded version of VG_(lookupInFastCache). */
552 // Compute r26 = VG_TT_FAST_HASH(guest)
553 srdi 26, 20, 2 // g2 = guest >> 2
554 srdi 25, 20, (VG_TT_FAST_BITS + 2) // (g2 >> VG_TT_FAST_BITS)
555 xor 26, 26, 25 // (g2 >> VG_TT_FAST_BITS) ^ g2
556 andi. 26, 26, VG_TT_FAST_MASK // setNo
558 // Compute r6 = &VG_(tt_fast)[r6]
559 ld 25, .tocent__vgPlain_tt_fast@toc(2)
560 sldi 26, 26, VG_FAST_CACHE_SET_BITS
563 // LIVE: r31 (guest state ptr), r20 (guest addr), r26 (cache set)
565 ld 24, FCS_g0(26) // .guest0
566 ld 25, FCS_h0(26) // .host0
567 cmpd 24, 20 // cmp against .guest0
577 cmpd 24, 20 // cmp against .guest1
579 // hit at way 1; swap upwards
580 ld 21, FCS_g0(26) // 21 = old .guest0
581 ld 22, FCS_h0(26) // 22 = old .host0
582 ld 23, FCS_h1(26) // 23 = old .host1
583 std 20, FCS_g0(26) // new .guest0 = guest
584 std 23, FCS_h0(26) // new .host0 = old .host1
585 std 21, FCS_g1(26) // new .guest1 = old .guest0
586 std 22, FCS_h1(26) // new .host1 = old .host0
588 ld 24, .tocent__vgPlain_stats__n_xIndir_hits1_32@toc(2)
592 // goto old .host1 a.k.a. new .host0
599 cmpd 24, 20 // cmp against .guest2
601 // hit at way 2; swap upwards
610 ld 24, .tocent__vgPlain_stats__n_xIndir_hits2_32@toc(2)
614 // goto old .host2 a.k.a. new .host1
621 cmpd 24, 20 // cmp against .guest3
623 // hit at way 3; swap upwards
632 ld 24, .tocent__vgPlain_stats__n_xIndir_hits3_32@toc(2)
636 // goto old .host3 a.k.a. new .host2
641 4: // fast lookup failed:
643 ld 24, .tocent__vgPlain_stats__n_xIndir_misses_32@toc(2)
648 li 6,VG_TRC_INNER_FASTMISS
653 .size VG_(disp_cp_xindir),.-VG_(disp_cp_xindir)
656 /* ------ Assisted jump ------ */
659 .globl VG_(disp_cp_xassisted)
661 .type VG_(disp_cp_xassisted),@function
662 VG_(disp_cp_xassisted):
666 VG_(disp_cp_xassisted):
667 .quad .VG_(disp_cp_xassisted),.TOC.@tocbase,0
671 0: addis 2, 12,.TOC.-0b@ha
673 .localentry VG_(disp_cp_xassisted), .-VG_(disp_cp_xassisted)
675 .type .VG_(disp_cp_xassisted),@function
676 .globl .VG_(disp_cp_xassisted)
677 .VG_(disp_cp_xassisted):
678 /* r31 contains the TRC */
683 .size VG_(disp_cp_xassisted),.-VG_(disp_cp_xassisted)
686 /* ------ Event check failed ------ */
689 .globl VG_(disp_cp_evcheck_fail)
691 .type VG_(disp_cp_evcheck_fail),@function
692 VG_(disp_cp_evcheck_fail):
696 VG_(disp_cp_evcheck_fail):
697 .quad .VG_(disp_cp_evcheck_fail),.TOC.@tocbase,0
701 0: addis 2, 12,.TOC.-0b@ha
703 .localentry VG_(disp_cp_evcheck_fail), .-VG_(disp_cp_evcheck_fail)
705 .type .VG_(disp_cp_evcheck_fail),@function
706 .globl .VG_(disp_cp_evcheck_fail)
707 .VG_(disp_cp_evcheck_fail):
708 li 6,VG_TRC_INNER_COUNTERZERO
712 .size VG_(disp_cp_evcheck_fail),.-VG_(disp_cp_evcheck_fail)
715 .size .VG_(disp_run_translations), .-.VG_(disp_run_translations)
717 #endif // defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
719 /* Let the linker know we don't need an executable stack */
722 /*--------------------------------------------------------------------*/
724 /*--------------------------------------------------------------------*/