1 /*----------------------------------------------------------------------------*/
2 /*--- begin host_generic_reg_alloc3.c ---*/
3 /*----------------------------------------------------------------------------*/
6 This file is part of Valgrind, a dynamic binary instrumentation framework.
8 Copyright (C) 2017-2017 Ivo Raisr
11 This program is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of the
14 License, or (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, see <http://www.gnu.org/licenses/>.
24 The GNU General Public License is contained in the file COPYING.
27 #include "libvex_basictypes.h"
30 #include "main_util.h"
31 #include "host_generic_regs.h"
33 /* Set to 1 for lots of debugging output. */
34 #define DEBUG_REGALLOC 0
36 /* Set to 1 for sanity checking at every instruction.
37 Set to 0 for sanity checking only every 17th one and the last one. */
38 #define SANITY_CHECKS_EVERY_INSTR 0
41 #define INVALID_INSTRNO (-2)
42 #define INVALID_INDEX (-2)
44 /* Register allocator state is kept in an array of VRegState's.
45 There is an element for every virtual register (vreg).
46 Elements are indexed [0 .. n_vregs-1].
47 Records information about vreg live range and its state. */
50 /* Live range, register class and spill offset are computed during the
51 first register allocator pass and remain unchanged after that. */
53 /* This vreg becomes live with this instruction (inclusive). Contains
54 either an instruction number or INVALID_INSTRNO. */
56 /* This vreg becomes dead before this instruction (exclusive). Contains
57 either an instruction number or INVALID_INSTRNO. */
59 /* What kind of register this is. */
62 /* What is its current disposition? */
63 enum { Unallocated
, /* Neither spilled nor assigned to a real reg. */
64 Assigned
, /* Assigned to a real register, viz rreg. */
65 Spilled
/* Spilled to the spill slot. */
68 /* If .disp == Assigned, what rreg is it bound to? */
71 /* The "home" spill slot. The offset is relative to the beginning of
75 /* This vreg (vregS) is coalesced to another vreg
76 if |coalescedTo| != INVALID_HREG.
77 Coalescing means that there is a MOV instruction which occurs in the
78 instruction stream right at vregS' dead_before
79 and vregD's live_after. */
80 HReg coalescedTo
; /* Which vreg it is coalesced to. */
81 HReg coalescedFirst
; /* First vreg in the coalescing chain. */
83 /* If this vregS is coalesced to another vregD, what is the combined
84 dead_before for vregS+vregD. Used to effectively allocate registers. */
85 Short effective_dead_before
;
89 /* The allocator also maintains a redundant array of indexes (rreg_state) from
90 rreg numbers back to entries in vreg_state. It is redundant because iff
91 rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the
92 two entries point at each other. The purpose of this is to speed up
93 activities which involve looking for a particular rreg: there is no need to
94 scan the vreg_state looking for it, just index directly into rreg_state.
95 The FAQ "does this rreg already have an associated vreg" is the main
97 The identity of the real register is not recorded here, because the index
98 of this structure in |rreg_state| is the index number of the register, and
99 the register itself can be extracted from the RRegUniverse (univ). */
102 /* What is its current disposition? */
103 enum { Free
, /* Not bound to any vreg. */
104 Bound
, /* Bound to a vreg, viz vreg. */
105 Reserved
/* Reserved for an instruction. */
108 /* If .disp == Bound, what vreg is it bound to? */
111 /* If .disp == Bound, has the associated vreg been reloaded from its spill
112 slot recently and is this rreg still equal to that spill slot?
113 Avoids unnecessary spilling that vreg later, when this rreg needs
119 /* Records information on a real-register live range, associated with
120 a particular real register. Computed once; does not change. */
123 /* This rreg becomes live with this instruction (inclusive). Contains
124 either an instruction number or INVALID_INSTRNO. */
126 /* This rreg becomes dead before this instruction (exclusive). Contains
127 either an instruction number or INVALID_INSTRNO. */
132 /* Live ranges for a single rreg and the current one.
133 Live ranges are computed during the first register allocator pass and remain
134 unchanged after that.
135 The identity of the real register is not recorded here, because the index
136 of this structure in |rreg_lr_state| is the index number of the register, and
137 the register itself can be extracted from the RRegUniverse (univ). */
144 /* Live range corresponding to the currently processed instruction.
145 Points into |lrs| array. */
151 /* v and r are always unsigned, wish we could static assert that */
152 #define IS_VALID_VREGNO(v) ((v) < n_vregs)
153 #define IS_VALID_RREGNO(r) ((r) < n_rregs)
155 #define FREE_VREG(v) \
157 (v)->disp = Unallocated; \
158 (v)->rreg = INVALID_HREG; \
161 #define FREE_RREG(r) \
164 (r)->vreg = INVALID_HREG; \
165 (r)->eq_spill_slot = False; \
169 /* Compute the index of the highest and lowest 1 in a ULong, respectively.
170 Results are undefined if the argument is zero. Don't pass it zero :) */
171 static inline UInt
ULong__maxIndex ( ULong w64
) {
172 return 63 - __builtin_clzll(w64
);
175 static inline UInt
ULong__minIndex ( ULong w64
) {
176 return __builtin_ctzll(w64
);
179 static inline void enlarge_rreg_lrs(RRegLRState
* rreg_lrs
)
181 vassert(rreg_lrs
->lrs_used
== rreg_lrs
->lrs_size
);
183 RRegLR
* lr2
= LibVEX_Alloc_inline(2 * rreg_lrs
->lrs_used
* sizeof(RRegLR
));
184 for (UInt l
= 0; l
< rreg_lrs
->lrs_used
; l
++) {
185 lr2
[l
] = rreg_lrs
->lrs
[l
];
189 rreg_lrs
->lrs_size
= 2 * rreg_lrs
->lrs_used
;
192 #define PRINT_STATE \
194 print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, \
195 rreg_lr_state, ii); \
198 static inline void print_state(
199 const RegAllocControl
* con
,
200 const VRegState
* vreg_state
, UInt n_vregs
,
201 const RRegState
* rreg_state
, UInt n_rregs
,
202 const RRegLRState
* rreg_lr_state
,
205 # define RIGHT_JUSTIFY(_total, _written) \
207 for (Int w = (_total) - (_written); w > 0; w--) { \
212 for (UInt v_idx
= 0; v_idx
< n_vregs
; v_idx
++) {
213 const VRegState
* vreg
= &vreg_state
[v_idx
];
215 if (vreg
->live_after
== INVALID_INSTRNO
) {
216 continue; /* This is a dead vreg. Never comes into live. */
218 vex_printf("vreg_state[%3u] ", v_idx
);
221 switch (vreg
->disp
) {
223 written
= vex_printf("unallocated");
226 written
= vex_printf("assigned to ");
227 written
+= con
->ppReg(vreg
->rreg
);
230 written
= vex_printf("spilled at offset %u", vreg
->spill_offset
);
235 RIGHT_JUSTIFY(25, written
);
237 written
= vex_printf("lr: [%d, %d) ",
238 vreg
->live_after
, vreg
->dead_before
);
239 RIGHT_JUSTIFY(15, written
);
241 written
= vex_printf("effective lr: [%d, %d)",
242 vreg
->live_after
, vreg
->effective_dead_before
);
243 RIGHT_JUSTIFY(25, written
);
245 if (vreg
->live_after
> (Short
) current_ii
) {
246 vex_printf("[not live yet]\n");
247 } else if ((Short
) current_ii
>= vreg
->dead_before
) {
248 if (hregIsInvalid(vreg
->coalescedTo
)) {
249 vex_printf("[now dead]\n");
251 vex_printf("[now dead, coalesced to ");
252 con
->ppReg(vreg
->coalescedTo
);
256 vex_printf("[live]\n");
260 for (UInt r_idx
= 0; r_idx
< n_rregs
; r_idx
++) {
261 const RRegState
* rreg
= &rreg_state
[r_idx
];
262 const RRegLRState
* rreg_lrs
= &rreg_lr_state
[r_idx
];
263 vex_printf("rreg_state[%2u] = ", r_idx
);
264 UInt written
= con
->ppReg(con
->univ
->regs
[r_idx
]);
265 RIGHT_JUSTIFY(10, written
);
267 switch (rreg
->disp
) {
269 vex_printf("free\n");
272 vex_printf("bound for ");
273 con
->ppReg(rreg
->vreg
);
274 if (rreg
->eq_spill_slot
) {
275 vex_printf(" (equals to its spill slot)");
280 vex_printf("reserved - live range [%d, %d)\n",
281 rreg_lrs
->lr_current
->live_after
,
282 rreg_lrs
->lr_current
->dead_before
);
287 # undef RIGHT_JUSTIFY
290 static inline void emit_instr(HInstr
* instr
, HInstrArray
* instrs_out
,
291 const RegAllocControl
* con
, const HChar
* why
)
293 if (DEBUG_REGALLOC
) {
295 con
->ppInstr(instr
, con
->mode64
);
297 vex_printf(" (%s)", why
);
302 addHInstr(instrs_out
, instr
);
305 /* Updates register allocator state after vreg has been spilled. */
306 static inline void mark_vreg_spilled(
307 UInt v_idx
, VRegState
* vreg_state
, UInt n_vregs
,
308 RRegState
* rreg_state
, UInt n_rregs
)
310 HReg rreg
= vreg_state
[v_idx
].rreg
;
311 UInt r_idx
= hregIndex(rreg
);
313 vreg_state
[v_idx
].disp
= Spilled
;
314 vreg_state
[v_idx
].rreg
= INVALID_HREG
;
315 FREE_RREG(&rreg_state
[r_idx
]);
318 /* Spills a vreg assigned to some rreg.
319 The vreg is spilled and the rreg is freed.
320 Returns rreg's index. */
321 static inline UInt
spill_vreg(
322 HReg vreg
, UInt v_idx
, UInt current_ii
, VRegState
* vreg_state
, UInt n_vregs
,
323 RRegState
* rreg_state
, UInt n_rregs
, HInstrArray
* instrs_out
,
324 const RegAllocControl
* con
)
326 /* Check some invariants first. */
327 vassert(IS_VALID_VREGNO((v_idx
)));
328 vassert(vreg_state
[v_idx
].disp
== Assigned
);
329 HReg rreg
= vreg_state
[v_idx
].rreg
;
330 UInt r_idx
= hregIndex(rreg
);
331 vassert(IS_VALID_RREGNO(r_idx
));
332 vassert(hregClass(con
->univ
->regs
[r_idx
]) == hregClass(vreg
));
333 vassert(vreg_state
[v_idx
].dead_before
> (Short
) current_ii
);
334 vassert(vreg_state
[v_idx
].reg_class
!= HRcINVALID
);
336 /* Generate spill. */
337 HInstr
* spill1
= NULL
;
338 HInstr
* spill2
= NULL
;
339 con
->genSpill(&spill1
, &spill2
, rreg
, vreg_state
[v_idx
].spill_offset
,
341 vassert(spill1
!= NULL
|| spill2
!= NULL
); /* cannot be both NULL */
342 if (spill1
!= NULL
) {
343 emit_instr(spill1
, instrs_out
, con
, "spill1");
345 if (spill2
!= NULL
) {
346 emit_instr(spill2
, instrs_out
, con
, "spill2");
349 mark_vreg_spilled(v_idx
, vreg_state
, n_vregs
, rreg_state
, n_rregs
);
353 /* Chooses a vreg to be spilled based on various criteria.
354 The vreg must not be from the instruction being processed, that is, it must
355 not be listed in reg_usage->vRegs. */
356 static inline HReg
find_vreg_to_spill(
357 VRegState
* vreg_state
, UInt n_vregs
,
358 RRegState
* rreg_state
, UInt n_rregs
,
359 const HRegUsage
* instr_regusage
, HRegClass target_hregclass
,
360 const HRegUsage
* reg_usage
, UInt scan_forward_from
, UInt scan_forward_max
,
361 const RegAllocControl
* con
)
363 /* Scan forwards a few instructions to find the most distant mentioned
364 use of a vreg. We can scan in the range of (inclusive):
365 - reg_usage[scan_forward_from]
366 - reg_usage[scan_forward_end], where scan_forward_end
367 = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */
368 # define FEW_INSTRUCTIONS 20
369 UInt scan_forward_end
370 = (scan_forward_max
<= scan_forward_from
+ FEW_INSTRUCTIONS
) ?
371 scan_forward_max
: scan_forward_from
+ FEW_INSTRUCTIONS
;
372 # undef FEW_INSTRUCTIONS
374 HReg vreg_found
= INVALID_HREG
;
375 UInt distance_so_far
= 0;
377 for (UInt r_idx
= con
->univ
->allocable_start
[target_hregclass
];
378 r_idx
<= con
->univ
->allocable_end
[target_hregclass
]; r_idx
++) {
379 if (rreg_state
[r_idx
].disp
== Bound
) {
380 HReg vreg
= rreg_state
[r_idx
].vreg
;
381 if (! HRegUsage__contains(instr_regusage
, vreg
)) {
382 UInt ii
= scan_forward_from
;
383 for ( ; ii
<= scan_forward_end
; ii
++) {
384 if (HRegUsage__contains(®_usage
[ii
], vreg
)) {
389 if (ii
>= distance_so_far
) {
390 distance_so_far
= ii
;
392 if (distance_so_far
== scan_forward_end
) {
393 break; /* We are at the end. Nothing could be better. */
400 if (hregIsInvalid(vreg_found
)) {
401 vex_printf("doRegisterAllocation_v3: cannot find a register in class: ");
402 ppHRegClass(target_hregclass
);
404 vpanic("doRegisterAllocation_v3: cannot find a register.");
410 /* Find a free rreg of the correct class.
411 Tries to find an rreg whose hard live range (if any) starts after the vreg's
412 live range ends. If that is not possible, then at least whose live range
413 is as far ahead in the incoming instruction stream as possible.
414 An ideal rreg candidate is a caller-save register for short-lived vregs
415 and a callee-save register for long-lived vregs because it won't need to
416 be spilled around helper calls. */
417 static Int
find_free_rreg(
418 const VRegState
* vreg_state
, UInt n_vregs
,
419 const RRegState
* rreg_state
, UInt n_rregs
,
420 const RRegLRState
* rreg_lr_state
,
421 UInt v_idx
, UInt current_ii
, HRegClass target_hregclass
,
422 Bool reserve_phase
, const RegAllocControl
* con
)
424 Int r_idx_found
= INVALID_INDEX
;
425 UInt distance_so_far
= 0; /* running max for |live_after - current_ii| */
426 const VRegState
* vreg
= &vreg_state
[v_idx
];
428 /* Assume majority of vregs are short-lived. Start scannig from caller-save
430 for (Int r_idx
= (Int
) con
->univ
->allocable_end
[target_hregclass
];
431 r_idx
>= (Int
) con
->univ
->allocable_start
[target_hregclass
]; r_idx
--) {
432 const RRegState
* rreg
= &rreg_state
[r_idx
];
433 const RRegLRState
* rreg_lrs
= &rreg_lr_state
[r_idx
];
434 if (rreg
->disp
== Free
) {
435 if (rreg_lrs
->lrs_used
== 0) {
437 break; /* There could be nothing better, so break now. */
439 const RRegLR
* lr
= rreg_lrs
->lr_current
;
440 if (lr
->live_after
> (Short
) current_ii
) {
441 /* RReg's hard live range is not live, yet. */
442 if (vreg
->effective_dead_before
<= lr
->live_after
) {
444 break; /* VReg is short-lived; it fits in. */
446 if ((lr
->live_after
- (Short
) current_ii
) > distance_so_far
) {
447 distance_so_far
= lr
->live_after
- (Short
) current_ii
;
450 } else if ((Short
) current_ii
>= lr
->dead_before
) {
451 /* Now dead. Effectively as if there is no LR now. */
453 break; /* There could be nothing better, so break now. */
455 /* Going live for this instruction. This could happen only when
456 rregs are being reserved en mass, for example before
458 vassert(reserve_phase
);
467 /* A target-independent register allocator (v3). Requires various functions
468 which it uses to deal abstractly with instructions and registers, since it
469 cannot have any target-specific knowledge.
471 Returns a new list of instructions, which, as a result of the behaviour of
472 mapRegs, will be in-place modifications of the original instructions.
474 Requires that the incoming code has been generated using vreg numbers
475 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked
478 Takes unallocated instructions and returns allocated instructions.
480 HInstrArray
* doRegisterAllocation_v3(
481 /* Incoming virtual-registerised code. */
482 HInstrArray
* instrs_in
,
484 /* Register allocator controls to use. */
485 const RegAllocControl
* con
488 vassert((con
->guest_sizeB
% LibVEX_GUEST_STATE_ALIGN
) == 0);
490 /* The main register allocator state. */
491 UInt n_vregs
= instrs_in
->n_vregs
;
492 VRegState
* vreg_state
= NULL
;
494 vreg_state
= LibVEX_Alloc_inline(n_vregs
* sizeof(VRegState
));
497 /* If this is not so, the universe we have is nonsensical. */
498 UInt n_rregs
= con
->univ
->allocable
;
499 vassert(n_rregs
> 0);
500 STATIC_ASSERT(N_RREGUNIVERSE_REGS
== 64);
502 /* Redundant rreg -> vreg state. */
503 RRegState
* rreg_state
= LibVEX_Alloc_inline(n_rregs
* sizeof(RRegState
));
505 /* Info on rreg live ranges. */
506 RRegLRState
* rreg_lr_state
507 = LibVEX_Alloc_inline(n_rregs
* sizeof(RRegLRState
));
509 /* Info on register usage in the incoming instruction array. Computed once
510 and remains unchanged, more or less; updated sometimes by the
511 direct-reload optimisation. */
513 = LibVEX_Alloc_inline(sizeof(HRegUsage
) * instrs_in
->arr_used
);
515 /* Mark vreg indexes where coalesce chains start at. */
516 UInt
* coalesce_heads
= LibVEX_Alloc_inline(n_vregs
* sizeof(UInt
));
517 UInt nr_coalesce_heads
= 0;
519 /* The live range numbers are signed shorts, and so limiting the
520 number of instructions to 15000 comfortably guards against them
522 vassert(instrs_in
->arr_used
<= 15000);
524 /* The output array of instructions. */
525 HInstrArray
* instrs_out
= newHInstrArray();
528 # define OFFENDING_VREG(_v_idx, _instr, _mode) \
530 vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \
531 vex_printf("\nOffending instruction = "); \
532 con->ppInstr((_instr), con->mode64); \
534 vpanic("doRegisterAllocation_v3: first event for vreg is "#_mode \
535 " (should be Write)"); \
538 # define OFFENDING_RREG(_r_idx, _instr, _mode) \
540 vex_printf("\n\nOffending rreg = "); \
541 con->ppReg(con->univ->regs[(_r_idx)]); \
542 vex_printf("\nOffending instruction = "); \
543 con->ppInstr((_instr), con->mode64); \
545 vpanic("doRegisterAllocation_v3: first event for rreg is "#_mode \
546 " (should be Write)"); \
550 /* Finds an rreg of the correct class.
551 If a free rreg is not found, then spills a vreg not used by the current
552 instruction and makes free the corresponding rreg. */
553 # define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \
555 Int _r_free_idx = find_free_rreg( \
556 vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \
557 (_v_idx), (_ii), (_reg_class), (_reserve_phase), con); \
558 if (_r_free_idx == INVALID_INDEX) { \
559 HReg vreg_to_spill = find_vreg_to_spill( \
560 vreg_state, n_vregs, rreg_state, n_rregs, \
561 ®_usage[(_ii)], (_reg_class), \
562 reg_usage, (_ii) + 1, \
563 instrs_in->arr_used - 1, con); \
564 _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \
565 (_ii), vreg_state, n_vregs, \
566 rreg_state, n_rregs, \
570 vassert(IS_VALID_RREGNO(_r_free_idx)); \
576 /* --- Stage 0. Initialize the state. --- */
577 for (UInt v_idx
= 0; v_idx
< n_vregs
; v_idx
++) {
578 vreg_state
[v_idx
].live_after
= INVALID_INSTRNO
;
579 vreg_state
[v_idx
].dead_before
= INVALID_INSTRNO
;
580 vreg_state
[v_idx
].reg_class
= HRcINVALID
;
581 vreg_state
[v_idx
].disp
= Unallocated
;
582 vreg_state
[v_idx
].rreg
= INVALID_HREG
;
583 vreg_state
[v_idx
].spill_offset
= 0;
584 vreg_state
[v_idx
].coalescedTo
= INVALID_HREG
;
585 vreg_state
[v_idx
].coalescedFirst
= INVALID_HREG
;
586 vreg_state
[v_idx
].effective_dead_before
= INVALID_INSTRNO
;
589 for (UInt r_idx
= 0; r_idx
< n_rregs
; r_idx
++) {
590 rreg_state
[r_idx
].disp
= Free
;
591 rreg_state
[r_idx
].vreg
= INVALID_HREG
;
592 rreg_state
[r_idx
].eq_spill_slot
= False
;
595 for (UInt r_idx
= 0; r_idx
< n_rregs
; r_idx
++) {
596 RRegLRState
* rreg_lrs
= &rreg_lr_state
[r_idx
];
597 rreg_lrs
->lrs_size
= 4;
598 rreg_lrs
->lrs
= LibVEX_Alloc_inline(rreg_lrs
->lrs_size
600 rreg_lrs
->lrs_used
= 0;
601 rreg_lrs
->lr_current
= &rreg_lrs
->lrs
[0];
602 rreg_lrs
->lr_current_idx
= 0;
605 /* --- Stage 1. Scan the incoming instructions. --- */
606 for (UShort ii
= 0; ii
< instrs_in
->arr_used
; ii
++) {
607 const HInstr
* instr
= instrs_in
->arr
[ii
];
609 con
->getRegUsage(®_usage
[ii
], instr
, con
->mode64
);
610 reg_usage
[ii
].isVregVregMove
611 = reg_usage
[ii
].isRegRegMove
612 && hregIsVirtual(reg_usage
[ii
].regMoveSrc
)
613 && hregIsVirtual(reg_usage
[ii
].regMoveDst
);
616 vex_printf("\n%u stage 1: ", ii
);
617 con
->ppInstr(instr
, con
->mode64
);
619 ppHRegUsage(con
->univ
, ®_usage
[ii
]);
622 /* Process virtual registers mentioned in the instruction. */
623 for (UInt j
= 0; j
< reg_usage
[ii
].n_vRegs
; j
++) {
624 HReg vreg
= reg_usage
[ii
].vRegs
[j
];
625 vassert(hregIsVirtual(vreg
));
627 UInt v_idx
= hregIndex(vreg
);
628 if (!IS_VALID_VREGNO(v_idx
)) {
630 con
->ppInstr(instr
, con
->mode64
);
632 vex_printf("vreg %u (n_vregs %u)\n", v_idx
, n_vregs
);
633 vpanic("doRegisterAllocation_v3: out-of-range vreg");
636 /* Note the register class. */
637 if (vreg_state
[v_idx
].reg_class
== HRcINVALID
) {
638 /* First mention of this vreg. */
639 vreg_state
[v_idx
].reg_class
= hregClass(vreg
);
641 /* Seen it before, so check for consistency. */
642 vassert(vreg_state
[v_idx
].reg_class
== hregClass(vreg
));
645 /* Consider live ranges. */
646 switch (reg_usage
[ii
].vMode
[j
]) {
648 if (vreg_state
[v_idx
].live_after
== INVALID_INSTRNO
) {
649 OFFENDING_VREG(v_idx
, instr
, "Read");
653 if (vreg_state
[v_idx
].live_after
== INVALID_INSTRNO
) {
654 vreg_state
[v_idx
].live_after
= toShort(ii
);
658 if (vreg_state
[v_idx
].live_after
== INVALID_INSTRNO
) {
659 OFFENDING_VREG(v_idx
, instr
, "Modify");
666 vreg_state
[v_idx
].dead_before
= toShort(ii
+ 1);
667 vreg_state
[v_idx
].effective_dead_before
668 = vreg_state
[v_idx
].dead_before
;
671 /* Process real registers mentioned in the instruction. */
672 const ULong rRead
= reg_usage
[ii
].rRead
;
673 const ULong rWritten
= reg_usage
[ii
].rWritten
;
674 const ULong rMentioned
= rRead
| rWritten
;
676 if (rMentioned
!= 0) {
677 UInt rReg_minIndex
= ULong__minIndex(rMentioned
);
678 UInt rReg_maxIndex
= ULong__maxIndex(rMentioned
);
679 /* Don't bother to look at registers which are not available
680 to the allocator such as the stack or guest state pointers. These
681 are unavailable to the register allocator and so we never visit
682 them. We asserted above that n_rregs > 0, so (n_rregs - 1) is
684 if (rReg_maxIndex
>= n_rregs
) {
685 rReg_maxIndex
= n_rregs
- 1;
688 for (UInt r_idx
= rReg_minIndex
; r_idx
<= rReg_maxIndex
; r_idx
++) {
689 const ULong jMask
= 1ULL << r_idx
;
691 if (LIKELY((rMentioned
& jMask
) == 0)) {
695 RRegLRState
* rreg_lrs
= &rreg_lr_state
[r_idx
];
696 const Bool isR
= (rRead
& jMask
) != 0;
697 const Bool isW
= (rWritten
& jMask
) != 0;
700 if (rreg_lrs
->lrs_used
== rreg_lrs
->lrs_size
) {
701 enlarge_rreg_lrs(rreg_lrs
);
704 rreg_lrs
->lrs
[rreg_lrs
->lrs_used
].live_after
= toShort(ii
);
705 rreg_lrs
->lrs
[rreg_lrs
->lrs_used
].dead_before
= toShort(ii
+ 1);
706 rreg_lrs
->lrs_used
+= 1;
707 } else if (!isW
&& isR
) {
708 if ((rreg_lrs
->lrs_used
== 0)
709 || (rreg_lrs
->lrs
[rreg_lrs
->lrs_used
- 1].live_after
710 == INVALID_INSTRNO
)) {
711 OFFENDING_RREG(r_idx
, instr
, "Read");
713 rreg_lrs
->lrs
[rreg_lrs
->lrs_used
- 1].dead_before
717 if ((rreg_lrs
->lrs_used
== 0)
718 || (rreg_lrs
->lrs
[rreg_lrs
->lrs_used
- 1].live_after
719 == INVALID_INSTRNO
)) {
720 OFFENDING_RREG(r_idx
, instr
, "Modify");
722 rreg_lrs
->lrs
[rreg_lrs
->lrs_used
- 1].dead_before
729 if (DEBUG_REGALLOC
) {
730 for (UInt v_idx
= 0; v_idx
< n_vregs
; v_idx
++) {
731 vex_printf("vreg %3u: [%3d, %3d)\n",
732 v_idx
, vreg_state
[v_idx
].live_after
,
733 vreg_state
[v_idx
].dead_before
);
736 for (UInt r_idx
= 0; r_idx
< n_rregs
; r_idx
++) {
737 vex_printf("rreg %2u (", r_idx
);
738 UInt written
= con
->ppReg(con
->univ
->regs
[r_idx
]);
740 for (Int t
= 15 - written
; t
> 0; t
--) {
744 const RRegLRState
* rreg_lrs
= &rreg_lr_state
[r_idx
];
745 for (UInt l
= 0; l
< rreg_lrs
->lrs_used
; l
++) {
746 vex_printf("[%3d, %3d) ",
747 rreg_lrs
->lrs
[l
].live_after
, rreg_lrs
->lrs
[l
].dead_before
);
754 /* --- Stage 2. MOV coalescing (preparation). --- */
755 /* Optimise register coalescing:
756 MOV v <-> v coalescing (done here).
757 MOV v <-> r coalescing (TODO: not yet, not here). */
758 /* If doing a reg-reg move between two vregs, and the src's live range ends
759 here and the dst's live range starts here, coalesce the src vreg
761 Bool coalesce_happened
= False
;
762 for (UShort ii
= 0; ii
< instrs_in
->arr_used
; ii
++) {
763 if (reg_usage
[ii
].isVregVregMove
) {
764 HReg vregS
= reg_usage
[ii
].regMoveSrc
;
765 HReg vregD
= reg_usage
[ii
].regMoveDst
;
767 /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
768 vassert(hregClass(vregS
) == hregClass(vregD
));
769 UInt vs_idx
= hregIndex(vregS
);
770 UInt vd_idx
= hregIndex(vregD
);
771 vassert(IS_VALID_VREGNO(vs_idx
));
772 vassert(IS_VALID_VREGNO(vd_idx
));
773 vassert(! sameHReg(vregS
, vregD
));
774 VRegState
* vs_st
= &vreg_state
[vs_idx
];
775 VRegState
* vd_st
= &vreg_state
[vd_idx
];
777 if ((vs_st
->dead_before
== ii
+ 1) && (vd_st
->live_after
== ii
)) {
778 /* Live ranges are adjacent. */
780 vs_st
->coalescedTo
= vregD
;
781 if (hregIsInvalid(vs_st
->coalescedFirst
)) {
782 vd_st
->coalescedFirst
= vregS
;
783 coalesce_heads
[nr_coalesce_heads
] = vs_idx
;
784 nr_coalesce_heads
+= 1;
786 vd_st
->coalescedFirst
= vs_st
->coalescedFirst
;
789 vreg_state
[hregIndex(vd_st
->coalescedFirst
)].effective_dead_before
790 = vd_st
->dead_before
;
792 if (DEBUG_REGALLOC
) {
793 vex_printf("vreg coalescing: ");
800 coalesce_happened
= True
;
805 /* --- Stage 3. Allocate spill slots. --- */
807 /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits
808 to spill (for example classes Flt64 and Vec128), we have to allocate two
809 consecutive spill slots. For 256 bit registers (class Vec256), we have to
810 allocate four consecutive spill slots.
812 For Vec128-class on PowerPC, the spill slot's actual address must be
813 16-byte aligned. Since the spill slot's address is computed as an offset
814 from the guest state pointer, and since the user of the generated code
815 must set that pointer to a 32-byte aligned value, we have the residual
816 obligation here of choosing a 16-byte aligned spill slot offset for
817 Vec128-class values. Since each spill slot is 8 bytes long, that means for
818 Vec128-class values we must allocate a spill slot number which is
821 Similarly, for Vec256 class on amd64, find a spill slot number which is
822 zero mod 4. This guarantees it will be 32-byte aligned, which isn't
823 actually necessary on amd64 (we use movUpd etc to spill), but seems like
826 Do a rank-based allocation of vregs to spill slot numbers. We put as few
827 values as possible in spill slots, but nevertheless need to have a spill
828 slot available for all vregs, just in case. */
830 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
831 STATIC_ASSERT((N_SPILL64S
% 2) == 0);
832 STATIC_ASSERT((LibVEX_N_SPILL_BYTES
% LibVEX_GUEST_STATE_ALIGN
) == 0);
834 Short ss_busy_until_before
[N_SPILL64S
];
835 vex_bzero(&ss_busy_until_before
, sizeof(ss_busy_until_before
));
837 for (UInt v_idx
= 0; v_idx
< n_vregs
; v_idx
++) {
838 /* True iff this vreg is unused. In which case we also expect that the
839 reg_class field for it has not been set. */
840 if (vreg_state
[v_idx
].live_after
== INVALID_INSTRNO
) {
841 vassert(vreg_state
[v_idx
].reg_class
== HRcINVALID
);
844 if (! hregIsInvalid(vreg_state
[v_idx
].coalescedFirst
)) {
845 /* Coalesced vregs should share the same spill slot with the first vreg
846 in the coalescing chain. But we don't have that information, yet. */
850 /* The spill slots are 64 bits in size. As per the comment on definition
851 of HRegClass in host_generic_regs.h, that means, to spill a vreg of
852 class Flt64 or Vec128, we'll need to find two adjacent spill slots to
853 use. For Vec256, we'll need to find four adjacent slots to use. Note,
854 this logic needs to be kept in sync with the size info on the
855 definition of HRegClass. */
857 switch (vreg_state
[v_idx
].reg_class
) {
860 /* Find two adjacent free slots which provide up to 128 bits to
861 spill the vreg. Since we are trying to find an even:odd pair,
862 move along in steps of 2 (slots). */
863 for (ss_no
= 0; ss_no
< N_SPILL64S
- 1; ss_no
+= 2)
864 if (ss_busy_until_before
[ss_no
+ 0] <= vreg_state
[v_idx
].live_after
865 && ss_busy_until_before
[ss_no
+ 1] <= vreg_state
[v_idx
].live_after
)
867 if (ss_no
>= N_SPILL64S
- 1) {
868 vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
870 ss_busy_until_before
[ss_no
+ 0]
871 = vreg_state
[v_idx
].effective_dead_before
;
872 ss_busy_until_before
[ss_no
+ 1]
873 = vreg_state
[v_idx
].effective_dead_before
;
876 /* The ordinary case -- just find a single lowest-numbered spill
877 slot which is available at the start point of this interval,
878 and assign the interval to it. */
879 for (ss_no
= 0; ss_no
< N_SPILL64S
; ss_no
++) {
880 if (ss_busy_until_before
[ss_no
] <= vreg_state
[v_idx
].live_after
)
883 if (ss_no
== N_SPILL64S
) {
884 vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
886 ss_busy_until_before
[ss_no
]
887 = vreg_state
[v_idx
].effective_dead_before
;
891 /* This reflects VEX's hard-wired knowledge of the guest state layout:
892 the guest state itself, then two equal sized areas following it for two
893 sets of shadow state, and then the spill area. */
894 vreg_state
[v_idx
].spill_offset
895 = toShort(con
->guest_sizeB
* 3 + ss_no
* 8);
897 /* Independent check that we've made a sane choice of the slot. */
898 switch (vreg_state
[v_idx
].reg_class
) {
899 case HRcVec128
: case HRcFlt64
:
900 vassert((vreg_state
[v_idx
].spill_offset
% 16) == 0);
903 vassert((vreg_state
[v_idx
].spill_offset
% 8) == 0);
908 /* Fill in the spill offsets and effective_dead_before for coalesced vregs.*/
909 for (UInt i
= 0; i
< nr_coalesce_heads
; i
++) {
910 UInt vs_idx
= coalesce_heads
[i
];
911 Short effective_dead_before
= vreg_state
[vs_idx
].effective_dead_before
;
912 UShort spill_offset
= vreg_state
[vs_idx
].spill_offset
;
913 HReg vregD
= vreg_state
[vs_idx
].coalescedTo
;
914 while (! hregIsInvalid(vregD
)) {
915 UInt vd_idx
= hregIndex(vregD
);
916 vreg_state
[vd_idx
].effective_dead_before
= effective_dead_before
;
917 vreg_state
[vd_idx
].spill_offset
= spill_offset
;
918 vregD
= vreg_state
[vd_idx
].coalescedTo
;
922 if (DEBUG_REGALLOC
&& coalesce_happened
) {
924 vex_printf("After vreg<->vreg MOV coalescing:\n");
930 for (UInt v_idx
= 0; v_idx
< n_vregs
; v_idx
++) {
931 if (vreg_state
[v_idx
].live_after
!= INVALID_INSTRNO
) {
932 vex_printf("vreg %3u --> spill offset %u\n",
933 v_idx
, vreg_state
[v_idx
].spill_offset
);
939 /* --- State 4. Process instructions. --- */
940 for (UShort ii
= 0; ii
< instrs_in
->arr_used
; ii
++) {
941 HInstr
* instr
= instrs_in
->arr
[ii
];
943 if (DEBUG_REGALLOC
) {
944 vex_printf("\n====----====---- Instr %d ----====----====\n", ii
);
946 con
->ppInstr(instrs_in
->arr
[ii
], con
->mode64
);
947 vex_printf("\n\nInitial state:\n");
952 /* ------------ Sanity checks ------------ */
954 /* Sanity checks are relatively expensive. So they are done only once
955 every 17 instructions, and just before the last instruction. */
958 SANITY_CHECKS_EVERY_INSTR
959 || ii
== instrs_in
->arr_used
- 1
960 || (ii
> 0 && (ii
% 17) == 0)
963 if (do_sanity_check
) {
964 /* Sanity check: the vreg_state and rreg_state mutually-redundant
965 mappings are consistent. If vreg_state[v].rreg points at some
966 rreg_state entry then that rreg_state entry should point back at
968 for (UInt v_idx
= 0; v_idx
< n_vregs
; v_idx
++) {
969 if (vreg_state
[v_idx
].disp
== Assigned
) {
970 vassert(!hregIsVirtual(vreg_state
[v_idx
].rreg
));
972 UInt r_idx
= hregIndex(vreg_state
[v_idx
].rreg
);
973 vassert(IS_VALID_RREGNO(r_idx
));
974 vassert(rreg_state
[r_idx
].disp
== Bound
);
975 vassert(hregIndex(rreg_state
[r_idx
].vreg
) == v_idx
);
977 vassert(hregClass(vreg_state
[v_idx
].rreg
)
978 == hregClass(con
->univ
->regs
[r_idx
]));
982 for (UInt r_idx
= 0; r_idx
< n_rregs
; r_idx
++) {
983 if (rreg_state
[r_idx
].disp
== Bound
) {
984 vassert(hregIsVirtual(rreg_state
[r_idx
].vreg
));
986 UInt v_idx
= hregIndex(rreg_state
[r_idx
].vreg
);
987 vassert(IS_VALID_VREGNO(v_idx
));
988 vassert(vreg_state
[v_idx
].disp
== Assigned
);
989 vassert(hregIndex(vreg_state
[v_idx
].rreg
) == r_idx
);
991 vassert(rreg_state
[r_idx
].eq_spill_slot
== False
);
995 /* Sanity check: if rreg has been marked as Reserved, there must be
996 a corresponding hard live range for it. */
997 for (UInt r_idx
= 0; r_idx
< n_rregs
; r_idx
++) {
998 if (rreg_state
[r_idx
].disp
== Reserved
) {
999 const RRegLRState
* rreg_lrs
= &rreg_lr_state
[r_idx
];
1000 vassert(rreg_lrs
->lrs_used
> 0);
1001 vassert(rreg_lrs
->lr_current_idx
< rreg_lrs
->lrs_used
);
1002 vassert(rreg_lrs
->lr_current
->live_after
<= (Short
) ii
);
1003 vassert((Short
) ii
< rreg_lrs
->lr_current
->dead_before
);
1007 /* Sanity check: if vregS has been marked as coalesced to vregD,
1008 then the effective live range of vregS must also cover live range
1010 /* The following sanity check is quite expensive. Some basic blocks
1011 contain very lengthy coalescing chains... */
1012 if (SANITY_CHECKS_EVERY_INSTR
) {
1013 for (UInt vs_idx
= 0; vs_idx
< n_vregs
; vs_idx
++) {
1014 const VRegState
* vS_st
= &vreg_state
[vs_idx
];
1015 HReg vregD
= vS_st
->coalescedTo
;
1016 while (! hregIsInvalid(vregD
)) {
1017 const VRegState
* vD_st
= &vreg_state
[hregIndex(vregD
)];
1018 vassert(vS_st
->live_after
<= vD_st
->live_after
);
1019 vassert(vS_st
->effective_dead_before
>= vD_st
->dead_before
);
1020 vregD
= vD_st
->coalescedTo
;
1027 /* --- MOV coalescing (finishing) --- */
1028 /* Optimise register coalescing:
1029 MOV v <-> v coalescing (finished here).
1030 MOV v <-> r coalescing (TODO: not yet). */
1031 if (reg_usage
[ii
].isVregVregMove
) {
1032 HReg vregS
= reg_usage
[ii
].regMoveSrc
;
1033 HReg vregD
= reg_usage
[ii
].regMoveDst
;
1034 UInt vs_idx
= hregIndex(vregS
);
1035 UInt vd_idx
= hregIndex(vregD
);
1037 if (sameHReg(vreg_state
[vs_idx
].coalescedTo
, vregD
)) {
1038 /* Finally do the coalescing. */
1040 HReg rreg
= vreg_state
[vs_idx
].rreg
;
1041 switch (vreg_state
[vs_idx
].disp
) {
1043 vreg_state
[vd_idx
].rreg
= rreg
;
1044 UInt r_idx
= hregIndex(rreg
);
1045 vassert(rreg_state
[r_idx
].disp
== Bound
);
1046 rreg_state
[r_idx
].vreg
= vregD
;
1049 vassert(hregIsInvalid(vreg_state
[vs_idx
].rreg
));
1055 vreg_state
[vd_idx
].disp
= vreg_state
[vs_idx
].disp
;
1056 FREE_VREG(&vreg_state
[vs_idx
]);
1058 if (DEBUG_REGALLOC
) {
1059 vex_printf("coalesced: ");
1066 /* In rare cases it can happen that vregD's live range ends here.
1067 Check and eventually free the vreg and rreg.
1068 This effectively means that either the translated program
1069 contained dead code (but VEX iropt passes are pretty good
1070 at eliminating it) or the VEX backend generated dead code. */
1071 if (vreg_state
[vd_idx
].dead_before
<= (Short
) ii
+ 1) {
1072 if (vreg_state
[vd_idx
].disp
== Assigned
) {
1073 UInt r_idx
= hregIndex(rreg
);
1074 FREE_RREG(&rreg_state
[r_idx
]);
1076 FREE_VREG(&vreg_state
[vd_idx
]);
1079 /* Move on to the next instruction. We skip the post-instruction
1080 stuff because all required house-keeping was done here. */
1086 /* --- Reserve and free rregs if needed. --- */
1087 /* If the rreg enters its hard live range and is not free:
1088 1. If the corresponding vreg is not used by the instruction, spill it.
1089 2. If the corresponding vreg is used by the instruction, then:
1090 2a. If there are no free rregs, spill a vreg not used by this
1092 2b. Move the corresponding vreg to a free rreg. This is better than
1093 spilling it and immediatelly reloading it.
1095 const ULong rRead
= reg_usage
[ii
].rRead
;
1096 const ULong rWritten
= reg_usage
[ii
].rWritten
;
1097 const ULong rMentioned
= rRead
| rWritten
;
1099 if (rMentioned
!= 0) {
1100 UInt rReg_minIndex
= ULong__minIndex(rMentioned
);
1101 UInt rReg_maxIndex
= ULong__maxIndex(rMentioned
);
1102 if (rReg_maxIndex
>= n_rregs
) {
1103 rReg_maxIndex
= n_rregs
- 1;
1106 for (UInt r_idx
= rReg_minIndex
; r_idx
<= rReg_maxIndex
; r_idx
++) {
1107 const ULong jMask
= 1ULL << r_idx
;
1109 if (LIKELY((rMentioned
& jMask
) == 0)) {
1113 RRegState
* rreg
= &rreg_state
[r_idx
];
1114 const RRegLRState
* rreg_lrs
= &rreg_lr_state
[r_idx
];
1115 if (LIKELY(rreg_lrs
->lrs_used
== 0)) {
1118 if (rreg
->disp
== Reserved
) {
1122 if ((rreg_lrs
->lr_current
->live_after
<= (Short
) ii
)
1123 && ((Short
) ii
< rreg_lrs
->lr_current
->dead_before
)) {
1125 switch (rreg
->disp
) {
1127 /* Yes, there is an associated vreg. We need to deal with
1129 HReg vreg
= rreg
->vreg
;
1130 UInt v_idx
= hregIndex(vreg
);
1132 if (! HRegUsage__contains(®_usage
[ii
], vreg
)) {
1133 if (rreg
->eq_spill_slot
) {
1134 mark_vreg_spilled(v_idx
, vreg_state
, n_vregs
,
1135 rreg_state
, n_rregs
);
1137 /* Spill the vreg. It is not used by this instruction.*/
1138 spill_vreg(vreg
, v_idx
, ii
, vreg_state
, n_vregs
,
1139 rreg_state
, n_rregs
, instrs_out
, con
);
1142 /* Find or make a free rreg where to move this vreg to. */
1143 UInt r_free_idx
= FIND_OR_MAKE_FREE_RREG(
1144 ii
, v_idx
, vreg_state
[v_idx
].reg_class
, True
);
1146 /* Generate "move" between real registers. */
1147 HInstr
* move
= con
->genMove(con
->univ
->regs
[r_idx
],
1148 con
->univ
->regs
[r_free_idx
], con
->mode64
);
1149 vassert(move
!= NULL
);
1150 emit_instr(move
, instrs_out
, con
, "move");
1152 /* Update the register allocator state. */
1153 vassert(vreg_state
[v_idx
].disp
== Assigned
);
1154 vreg_state
[v_idx
].rreg
= con
->univ
->regs
[r_free_idx
];
1155 rreg_state
[r_free_idx
].disp
= Bound
;
1156 rreg_state
[r_free_idx
].vreg
= vreg
;
1157 rreg_state
[r_free_idx
].eq_spill_slot
= rreg
->eq_spill_slot
;
1168 /* Finally claim the rreg as reserved. */
1169 rreg
->disp
= Reserved
;
1171 if (DEBUG_REGALLOC
) {
1172 vex_printf("rreg has been reserved: ");
1173 con
->ppReg(con
->univ
->regs
[r_idx
]);
1181 /* --- Direct reload optimisation. --- */
1182 /* If the instruction reads exactly one vreg which is currently spilled,
1183 and this is the last use of that vreg, see if we can convert
1184 the instruction into one that reads directly from the spill slot.
1185 This is clearly only possible for x86, amd64, and s390x targets,
1186 since ppc and arm are load-store architectures. If successful, replace
1187 instrs_in->arr[ii] with this new instruction, and recompute
1188 its reg_usage, so that the change is invisible to the standard-case
1189 handling that follows. */
1190 if ((con
->directReload
!= NULL
) && (reg_usage
[ii
].n_vRegs
<= 2)) {
1191 Bool debug_direct_reload
= False
;
1193 HReg vreg_found
= INVALID_HREG
;
1194 Short spill_offset
= 0;
1196 for (UInt j
= 0; j
< reg_usage
[ii
].n_vRegs
; j
++) {
1197 HReg vreg
= reg_usage
[ii
].vRegs
[j
];
1198 vassert(hregIsVirtual(vreg
));
1200 if (reg_usage
[ii
].vMode
[j
] == HRmRead
) {
1202 UInt v_idx
= hregIndex(vreg
);
1203 vassert(IS_VALID_VREGNO(v_idx
));
1204 if (vreg_state
[v_idx
].disp
== Spilled
) {
1205 /* Is this its last use? */
1206 vassert(vreg_state
[v_idx
].dead_before
>= (Short
) (ii
+ 1));
1207 if ((vreg_state
[v_idx
].dead_before
== (Short
) (ii
+ 1))
1208 && hregIsInvalid(vreg_found
)) {
1210 spill_offset
= vreg_state
[v_idx
].spill_offset
;
1216 if (!hregIsInvalid(vreg_found
) && (nreads
== 1)) {
1217 if (reg_usage
[ii
].n_vRegs
== 2) {
1218 vassert(! sameHReg(reg_usage
[ii
].vRegs
[0],
1219 reg_usage
[ii
].vRegs
[1]));
1222 HInstr
* reloaded
= con
->directReload(instrs_in
->arr
[ii
],
1223 vreg_found
, spill_offset
);
1224 if (debug_direct_reload
&& (reloaded
!= NULL
)) {
1225 vex_printf("[%3d] ", spill_offset
);
1228 con
->ppInstr(instr
, con
->mode64
);
1230 if (reloaded
!= NULL
) {
1231 /* Update info about the instruction, so it looks as if it had
1232 been in this form all along. */
1234 instrs_in
->arr
[ii
] = reloaded
;
1235 con
->getRegUsage(®_usage
[ii
], instr
, con
->mode64
);
1236 if (debug_direct_reload
) {
1237 vex_printf(" --> ");
1238 con
->ppInstr(reloaded
, con
->mode64
);
1242 if (debug_direct_reload
&& (reloaded
!= NULL
)) {
1249 /* The vreg -> rreg map constructed and then applied to each
1252 initHRegRemap(&remap
);
1254 /* --- Allocate vregs used by the instruction. --- */
1255 /* Vregs used by the instruction can be in the following states:
1256 - Unallocated: vreg is entering its live range. Find a free rreg.
1257 - Assigned: we do nothing; rreg has been allocated previously.
1258 - Spilled: Find a free rreg and reload vreg into it.
1259 Naturally, finding a free rreg may involve spilling a vreg not used by
1261 for (UInt j
= 0; j
< reg_usage
[ii
].n_vRegs
; j
++) {
1262 HReg vreg
= reg_usage
[ii
].vRegs
[j
];
1263 vassert(hregIsVirtual(vreg
));
1266 vex_printf("considering "); con
->ppReg(vreg
); vex_printf("\n");
1269 UInt v_idx
= hregIndex(vreg
);
1270 vassert(IS_VALID_VREGNO(v_idx
));
1271 HReg rreg
= vreg_state
[v_idx
].rreg
;
1273 if (vreg_state
[v_idx
].disp
== Assigned
) {
1274 r_idx
= hregIndex(rreg
);
1275 vassert(rreg_state
[r_idx
].disp
== Bound
);
1276 addToHRegRemap(&remap
, vreg
, rreg
);
1278 vassert(hregIsInvalid(rreg
));
1280 /* Find or make a free rreg of the correct class. */
1281 r_idx
= FIND_OR_MAKE_FREE_RREG(
1282 ii
, v_idx
, vreg_state
[v_idx
].reg_class
, False
);
1283 rreg
= con
->univ
->regs
[r_idx
];
1285 /* Generate reload only if the vreg is spilled and is about to being
1286 read or modified. If it is merely written than reloading it first
1287 would be pointless. */
1288 if ((vreg_state
[v_idx
].disp
== Spilled
)
1289 && (reg_usage
[ii
].vMode
[j
] != HRmWrite
)) {
1291 HInstr
* reload1
= NULL
;
1292 HInstr
* reload2
= NULL
;
1293 con
->genReload(&reload1
, &reload2
, rreg
,
1294 vreg_state
[v_idx
].spill_offset
, con
->mode64
);
1295 vassert(reload1
!= NULL
|| reload2
!= NULL
);
1296 if (reload1
!= NULL
) {
1297 emit_instr(reload1
, instrs_out
, con
, "reload1");
1299 if (reload2
!= NULL
) {
1300 emit_instr(reload2
, instrs_out
, con
, "reload2");
1306 rreg_state
[r_idx
].disp
= Bound
;
1307 rreg_state
[r_idx
].vreg
= vreg
;
1308 rreg_state
[r_idx
].eq_spill_slot
= True
;
1309 vreg_state
[v_idx
].disp
= Assigned
;
1310 vreg_state
[v_idx
].rreg
= rreg
;
1311 addToHRegRemap(&remap
, vreg
, rreg
);
1314 /* If this vreg is written or modified, mark it so. */
1315 if (reg_usage
[ii
].vMode
[j
] != HRmRead
) {
1316 rreg_state
[r_idx
].eq_spill_slot
= False
;
1320 con
->mapRegs(&remap
, instr
, con
->mode64
);
1321 emit_instr(instr
, instrs_out
, con
, NULL
);
1323 if (DEBUG_REGALLOC
) {
1324 vex_printf("After dealing with current instruction:\n");
1329 /* ------ Post-instruction actions. ------ */
1330 /* Free rregs which:
1331 - Have been reserved and whose hard live range ended.
1332 - Have been bound to vregs whose live range ended. */
1333 for (UInt r_idx
= 0; r_idx
< n_rregs
; r_idx
++) {
1334 RRegState
* rreg
= &rreg_state
[r_idx
];
1335 RRegLRState
* rreg_lrs
= &rreg_lr_state
[r_idx
];
1336 switch (rreg
->disp
) {
1340 if (rreg_lrs
->lrs_used
> 0) {
1341 /* Consider "dead before" the next instruction. */
1342 if (rreg_lrs
->lr_current
->dead_before
<= (Short
) ii
+ 1) {
1343 FREE_RREG(&rreg_state
[r_idx
]);
1344 if (rreg_lrs
->lr_current_idx
< rreg_lrs
->lrs_used
- 1) {
1345 rreg_lrs
->lr_current_idx
+= 1;
1346 rreg_lrs
->lr_current
1347 = &rreg_lrs
->lrs
[rreg_lrs
->lr_current_idx
];
1353 UInt v_idx
= hregIndex(rreg
->vreg
);
1354 /* Consider "dead before" the next instruction. */
1355 if (vreg_state
[v_idx
].dead_before
<= (Short
) ii
+ 1) {
1356 FREE_VREG(&vreg_state
[v_idx
]);
1357 FREE_RREG(&rreg_state
[r_idx
]);
1370 /*----------------------------------------------------------------------------*/
1371 /*--- host_generic_reg_alloc3.c ---*/
1372 /*----------------------------------------------------------------------------*/