Adjust ppc set_AV_CR6 computation to help Memcheck instrumentation.
[valgrind.git] / VEX / priv / host_generic_reg_alloc3.c
blobe6908ce8f23f86792628c7076b63049346d143c8
1 /*----------------------------------------------------------------------------*/
2 /*--- begin host_generic_reg_alloc3.c ---*/
3 /*----------------------------------------------------------------------------*/
5 /*
6 This file is part of Valgrind, a dynamic binary instrumentation framework.
8 Copyright (C) 2017-2017 Ivo Raisr
9 ivosh@ivosh.net
11 This program is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as
13 published by the Free Software Foundation; either version 2 of the
14 License, or (at your option) any later version.
16 This program is distributed in the hope that it will be useful, but
17 WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 General Public License for more details.
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
24 02110-1301, USA.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
30 #include "libvex.h"
32 #include "main_util.h"
33 #include "host_generic_regs.h"
35 /* Set to 1 for lots of debugging output. */
36 #define DEBUG_REGALLOC 0
38 /* Set to 1 for sanity checking at every instruction.
39 Set to 0 for sanity checking only every 17th one and the last one. */
40 #define SANITY_CHECKS_EVERY_INSTR 0
43 #define INVALID_INSTRNO (-2)
44 #define INVALID_INDEX (-2)
46 /* Register allocator state is kept in an array of VRegState's.
47 There is an element for every virtual register (vreg).
48 Elements are indexed [0 .. n_vregs-1].
49 Records information about vreg live range and its state. */
50 typedef
51 struct {
52 /* Live range, register class and spill offset are computed during the
53 first register allocator pass and remain unchanged after that. */
55 /* This vreg becomes live with this instruction (inclusive). Contains
56 either an instruction number or INVALID_INSTRNO. */
57 Short live_after;
58 /* This vreg becomes dead before this instruction (exclusive). Contains
59 either an instruction number or INVALID_INSTRNO. */
60 Short dead_before;
61 /* What kind of register this is. */
62 HRegClass reg_class;
64 /* What is its current disposition? */
65 enum { Unallocated, /* Neither spilled nor assigned to a real reg. */
66 Assigned, /* Assigned to a real register, viz rreg. */
67 Spilled /* Spilled to the spill slot. */
68 } disp;
70 /* If .disp == Assigned, what rreg is it bound to? */
71 HReg rreg;
73 /* The "home" spill slot. The offset is relative to the beginning of
74 the guest state. */
75 UShort spill_offset;
77 /* This vreg (vregS) is coalesced to another vreg
78 if |coalescedTo| != INVALID_HREG.
79 Coalescing means that there is a MOV instruction which occurs in the
80 instruction stream right at vregS' dead_before
81 and vregD's live_after. */
82 HReg coalescedTo; /* Which vreg it is coalesced to. */
83 HReg coalescedFirst; /* First vreg in the coalescing chain. */
85 /* If this vregS is coalesced to another vregD, what is the combined
86 dead_before for vregS+vregD. Used to effectively allocate registers. */
87 Short effective_dead_before;
89 VRegState;
91 /* The allocator also maintains a redundant array of indexes (rreg_state) from
92 rreg numbers back to entries in vreg_state. It is redundant because iff
93 rreg_state[r] == v then hregNumber(vreg_state[v].rreg) == r -- that is, the
94 two entries point at each other. The purpose of this is to speed up
95 activities which involve looking for a particular rreg: there is no need to
96 scan the vreg_state looking for it, just index directly into rreg_state.
97 The FAQ "does this rreg already have an associated vreg" is the main
98 beneficiary.
99 The identity of the real register is not recorded here, because the index
100 of this structure in |rreg_state| is the index number of the register, and
101 the register itself can be extracted from the RRegUniverse (univ). */
102 typedef
103 struct {
104 /* What is its current disposition? */
105 enum { Free, /* Not bound to any vreg. */
106 Bound, /* Bound to a vreg, viz vreg. */
107 Reserved /* Reserved for an instruction. */
108 } disp;
110 /* If .disp == Bound, what vreg is it bound to? */
111 HReg vreg;
113 /* If .disp == Bound, has the associated vreg been reloaded from its spill
114 slot recently and is this rreg still equal to that spill slot?
115 Avoids unnecessary spilling that vreg later, when this rreg needs
116 to be reserved. */
117 Bool eq_spill_slot;
119 RRegState;
121 /* Records information on a real-register live range, associated with
122 a particular real register. Computed once; does not change. */
123 typedef
124 struct {
125 /* This rreg becomes live with this instruction (inclusive). Contains
126 either an instruction number or INVALID_INSTRNO. */
127 Short live_after;
128 /* This rreg becomes dead before this instruction (exclusive). Contains
129 either an instruction number or INVALID_INSTRNO. */
130 Short dead_before;
132 RRegLR;
134 /* Live ranges for a single rreg and the current one.
135 Live ranges are computed during the first register allocator pass and remain
136 unchanged after that.
137 The identity of the real register is not recorded here, because the index
138 of this structure in |rreg_lr_state| is the index number of the register, and
139 the register itself can be extracted from the RRegUniverse (univ). */
140 typedef
141 struct {
142 RRegLR* lrs;
143 UInt lrs_size;
144 UInt lrs_used;
146 /* Live range corresponding to the currently processed instruction.
147 Points into |lrs| array. */
148 RRegLR *lr_current;
149 UInt lr_current_idx;
151 RRegLRState;
153 #define IS_VALID_VREGNO(v) ((v) >= 0 && (v) < n_vregs)
154 #define IS_VALID_RREGNO(r) ((r) >= 0 && (r) < n_rregs)
156 #define FREE_VREG(v) \
157 do { \
158 (v)->disp = Unallocated; \
159 (v)->rreg = INVALID_HREG; \
160 } while (0)
162 #define FREE_RREG(r) \
163 do { \
164 (r)->disp = Free; \
165 (r)->vreg = INVALID_HREG; \
166 (r)->eq_spill_slot = False; \
167 } while (0)
170 /* Compute the index of the highest and lowest 1 in a ULong, respectively.
171 Results are undefined if the argument is zero. Don't pass it zero :) */
172 static inline UInt ULong__maxIndex ( ULong w64 ) {
173 return 63 - __builtin_clzll(w64);
176 static inline UInt ULong__minIndex ( ULong w64 ) {
177 return __builtin_ctzll(w64);
180 static inline void enlarge_rreg_lrs(RRegLRState* rreg_lrs)
182 vassert(rreg_lrs->lrs_used == rreg_lrs->lrs_size);
184 RRegLR* lr2 = LibVEX_Alloc_inline(2 * rreg_lrs->lrs_used * sizeof(RRegLR));
185 for (UInt l = 0; l < rreg_lrs->lrs_used; l++) {
186 lr2[l] = rreg_lrs->lrs[l];
189 rreg_lrs->lrs = lr2;
190 rreg_lrs->lrs_size = 2 * rreg_lrs->lrs_used;
193 #define PRINT_STATE \
194 do { \
195 print_state(con, vreg_state, n_vregs, rreg_state, n_rregs, \
196 rreg_lr_state, ii); \
197 } while (0)
199 static inline void print_state(
200 const RegAllocControl* con,
201 const VRegState* vreg_state, UInt n_vregs,
202 const RRegState* rreg_state, UInt n_rregs,
203 const RRegLRState* rreg_lr_state,
204 UShort current_ii)
206 # define RIGHT_JUSTIFY(_total, _written) \
207 do { \
208 for (Int w = (_total) - (_written); w > 0; w--) { \
209 vex_printf(" "); \
211 } while (0)
213 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
214 const VRegState* vreg = &vreg_state[v_idx];
216 if (vreg->live_after == INVALID_INSTRNO) {
217 continue; /* This is a dead vreg. Never comes into live. */
219 vex_printf("vreg_state[%3u] ", v_idx);
221 UInt written;
222 switch (vreg->disp) {
223 case Unallocated:
224 written = vex_printf("unallocated");
225 break;
226 case Assigned:
227 written = vex_printf("assigned to ");
228 written += con->ppReg(vreg->rreg);
229 break;
230 case Spilled:
231 written = vex_printf("spilled at offset %u", vreg->spill_offset);
232 break;
233 default:
234 vassert(0);
236 RIGHT_JUSTIFY(25, written);
238 written = vex_printf("lr: [%d, %d) ",
239 vreg->live_after, vreg->dead_before);
240 RIGHT_JUSTIFY(15, written);
242 written = vex_printf("effective lr: [%d, %d)",
243 vreg->live_after, vreg->effective_dead_before);
244 RIGHT_JUSTIFY(25, written);
246 if (vreg->live_after > (Short) current_ii) {
247 vex_printf("[not live yet]\n");
248 } else if ((Short) current_ii >= vreg->dead_before) {
249 if (hregIsInvalid(vreg->coalescedTo)) {
250 vex_printf("[now dead]\n");
251 } else {
252 vex_printf("[now dead, coalesced to ");
253 con->ppReg(vreg->coalescedTo);
254 vex_printf("]\n");
256 } else {
257 vex_printf("[live]\n");
261 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
262 const RRegState* rreg = &rreg_state[r_idx];
263 const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
264 vex_printf("rreg_state[%2u] = ", r_idx);
265 UInt written = con->ppReg(con->univ->regs[r_idx]);
266 RIGHT_JUSTIFY(10, written);
268 switch (rreg->disp) {
269 case Free:
270 vex_printf("free\n");
271 break;
272 case Bound:
273 vex_printf("bound for ");
274 con->ppReg(rreg->vreg);
275 if (rreg->eq_spill_slot) {
276 vex_printf(" (equals to its spill slot)");
278 vex_printf("\n");
279 break;
280 case Reserved:
281 vex_printf("reserved - live range [%d, %d)\n",
282 rreg_lrs->lr_current->live_after,
283 rreg_lrs->lr_current->dead_before);
284 break;
288 # undef RIGHT_JUSTIFY
291 static inline void emit_instr(HInstr* instr, HInstrArray* instrs_out,
292 const RegAllocControl* con, const HChar* why)
294 if (DEBUG_REGALLOC) {
295 vex_printf("** ");
296 con->ppInstr(instr, con->mode64);
297 if (why != NULL) {
298 vex_printf(" (%s)", why);
300 vex_printf("\n\n");
303 addHInstr(instrs_out, instr);
306 /* Updates register allocator state after vreg has been spilled. */
307 static inline void mark_vreg_spilled(
308 UInt v_idx, VRegState* vreg_state, UInt n_vregs,
309 RRegState* rreg_state, UInt n_rregs)
311 HReg rreg = vreg_state[v_idx].rreg;
312 UInt r_idx = hregIndex(rreg);
314 vreg_state[v_idx].disp = Spilled;
315 vreg_state[v_idx].rreg = INVALID_HREG;
316 FREE_RREG(&rreg_state[r_idx]);
319 /* Spills a vreg assigned to some rreg.
320 The vreg is spilled and the rreg is freed.
321 Returns rreg's index. */
322 static inline UInt spill_vreg(
323 HReg vreg, UInt v_idx, UInt current_ii, VRegState* vreg_state, UInt n_vregs,
324 RRegState* rreg_state, UInt n_rregs, HInstrArray* instrs_out,
325 const RegAllocControl* con)
327 /* Check some invariants first. */
328 vassert(IS_VALID_VREGNO((v_idx)));
329 vassert(vreg_state[v_idx].disp == Assigned);
330 HReg rreg = vreg_state[v_idx].rreg;
331 UInt r_idx = hregIndex(rreg);
332 vassert(IS_VALID_RREGNO(r_idx));
333 vassert(hregClass(con->univ->regs[r_idx]) == hregClass(vreg));
334 vassert(vreg_state[v_idx].dead_before > (Short) current_ii);
335 vassert(vreg_state[v_idx].reg_class != HRcINVALID);
337 /* Generate spill. */
338 HInstr* spill1 = NULL;
339 HInstr* spill2 = NULL;
340 con->genSpill(&spill1, &spill2, rreg, vreg_state[v_idx].spill_offset,
341 con->mode64);
342 vassert(spill1 != NULL || spill2 != NULL); /* cannot be both NULL */
343 if (spill1 != NULL) {
344 emit_instr(spill1, instrs_out, con, "spill1");
346 if (spill2 != NULL) {
347 emit_instr(spill2, instrs_out, con, "spill2");
350 mark_vreg_spilled(v_idx, vreg_state, n_vregs, rreg_state, n_rregs);
351 return r_idx;
354 /* Chooses a vreg to be spilled based on various criteria.
355 The vreg must not be from the instruction being processed, that is, it must
356 not be listed in reg_usage->vRegs. */
357 static inline HReg find_vreg_to_spill(
358 VRegState* vreg_state, UInt n_vregs,
359 RRegState* rreg_state, UInt n_rregs,
360 const HRegUsage* instr_regusage, HRegClass target_hregclass,
361 const HRegUsage* reg_usage, UInt scan_forward_from, UInt scan_forward_max,
362 const RegAllocControl* con)
364 /* Scan forwards a few instructions to find the most distant mentioned
365 use of a vreg. We can scan in the range of (inclusive):
366 - reg_usage[scan_forward_from]
367 - reg_usage[scan_forward_end], where scan_forward_end
368 = MIN(scan_forward_max, scan_forward_from + FEW_INSTRUCTIONS). */
369 # define FEW_INSTRUCTIONS 20
370 UInt scan_forward_end
371 = (scan_forward_max <= scan_forward_from + FEW_INSTRUCTIONS) ?
372 scan_forward_max : scan_forward_from + FEW_INSTRUCTIONS;
373 # undef FEW_INSTRUCTIONS
375 HReg vreg_found = INVALID_HREG;
376 UInt distance_so_far = 0;
378 for (UInt r_idx = con->univ->allocable_start[target_hregclass];
379 r_idx <= con->univ->allocable_end[target_hregclass]; r_idx++) {
380 if (rreg_state[r_idx].disp == Bound) {
381 HReg vreg = rreg_state[r_idx].vreg;
382 if (! HRegUsage__contains(instr_regusage, vreg)) {
383 UInt ii = scan_forward_from;
384 for ( ; ii <= scan_forward_end; ii++) {
385 if (HRegUsage__contains(&reg_usage[ii], vreg)) {
386 break;
390 if (ii >= distance_so_far) {
391 distance_so_far = ii;
392 vreg_found = vreg;
393 if (distance_so_far == scan_forward_end) {
394 break; /* We are at the end. Nothing could be better. */
401 if (hregIsInvalid(vreg_found)) {
402 vex_printf("doRegisterAllocation_v3: cannot find a register in class: ");
403 ppHRegClass(target_hregclass);
404 vex_printf("\n");
405 vpanic("doRegisterAllocation_v3: cannot find a register.");
408 return vreg_found;
411 /* Find a free rreg of the correct class.
412 Tries to find an rreg whose hard live range (if any) starts after the vreg's
413 live range ends. If that is not possible, then at least whose live range
414 is as far ahead in the incoming instruction stream as possible.
415 An ideal rreg candidate is a caller-save register for short-lived vregs
416 and a callee-save register for long-lived vregs because it won't need to
417 be spilled around helper calls. */
418 static Int find_free_rreg(
419 const VRegState* vreg_state, UInt n_vregs,
420 const RRegState* rreg_state, UInt n_rregs,
421 const RRegLRState* rreg_lr_state,
422 UInt v_idx, UInt current_ii, HRegClass target_hregclass,
423 Bool reserve_phase, const RegAllocControl* con)
425 Int r_idx_found = INVALID_INDEX;
426 UInt distance_so_far = 0; /* running max for |live_after - current_ii| */
427 const VRegState* vreg = &vreg_state[v_idx];
429 /* Assume majority of vregs are short-lived. Start scannig from caller-save
430 registers first. */
431 for (Int r_idx = (Int) con->univ->allocable_end[target_hregclass];
432 r_idx >= (Int) con->univ->allocable_start[target_hregclass]; r_idx--) {
433 const RRegState* rreg = &rreg_state[r_idx];
434 const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
435 if (rreg->disp == Free) {
436 if (rreg_lrs->lrs_used == 0) {
437 r_idx_found = r_idx;
438 break; /* There could be nothing better, so break now. */
439 } else {
440 const RRegLR* lr = rreg_lrs->lr_current;
441 if (lr->live_after > (Short) current_ii) {
442 /* RReg's hard live range is not live, yet. */
443 if (vreg->effective_dead_before <= lr->live_after) {
444 r_idx_found = r_idx;
445 break; /* VReg is short-lived; it fits in. */
447 if ((lr->live_after - (Short) current_ii) > distance_so_far) {
448 distance_so_far = lr->live_after - (Short) current_ii;
449 r_idx_found = r_idx;
451 } else if ((Short) current_ii >= lr->dead_before) {
452 /* Now dead. Effectively as if there is no LR now. */
453 r_idx_found = r_idx;
454 break; /* There could be nothing better, so break now. */
455 } else {
456 /* Going live for this instruction. This could happen only when
457 rregs are being reserved en mass, for example before
458 a helper call. */
459 vassert(reserve_phase);
465 return r_idx_found;
468 /* A target-independent register allocator (v3). Requires various functions
469 which it uses to deal abstractly with instructions and registers, since it
470 cannot have any target-specific knowledge.
472 Returns a new list of instructions, which, as a result of the behaviour of
473 mapRegs, will be in-place modifications of the original instructions.
475 Requires that the incoming code has been generated using vreg numbers
476 0, 1 .. n_vregs-1. Appearance of a vreg outside that range is a checked
477 run-time error.
479 Takes unallocated instructions and returns allocated instructions.
481 HInstrArray* doRegisterAllocation_v3(
482 /* Incoming virtual-registerised code. */
483 HInstrArray* instrs_in,
485 /* Register allocator controls to use. */
486 const RegAllocControl* con
489 vassert((con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN) == 0);
491 /* The main register allocator state. */
492 UInt n_vregs = instrs_in->n_vregs;
493 VRegState* vreg_state = NULL;
494 if (n_vregs > 0) {
495 vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(VRegState));
498 /* If this is not so, the universe we have is nonsensical. */
499 UInt n_rregs = con->univ->allocable;
500 vassert(n_rregs > 0);
501 STATIC_ASSERT(N_RREGUNIVERSE_REGS == 64);
503 /* Redundant rreg -> vreg state. */
504 RRegState* rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
506 /* Info on rreg live ranges. */
507 RRegLRState* rreg_lr_state
508 = LibVEX_Alloc_inline(n_rregs * sizeof(RRegLRState));
510 /* Info on register usage in the incoming instruction array. Computed once
511 and remains unchanged, more or less; updated sometimes by the
512 direct-reload optimisation. */
513 HRegUsage* reg_usage
514 = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
516 /* Mark vreg indexes where coalesce chains start at. */
517 UInt* coalesce_heads = LibVEX_Alloc_inline(n_vregs * sizeof(UInt));
518 UInt nr_coalesce_heads = 0;
520 /* The live range numbers are signed shorts, and so limiting the
521 number of instructions to 15000 comfortably guards against them
522 overflowing 32k. */
523 vassert(instrs_in->arr_used <= 15000);
525 /* The output array of instructions. */
526 HInstrArray* instrs_out = newHInstrArray();
529 # define OFFENDING_VREG(_v_idx, _instr, _mode) \
530 do { \
531 vex_printf("\n\nOffending vreg = %u\n", (_v_idx)); \
532 vex_printf("\nOffending instruction = "); \
533 con->ppInstr((_instr), con->mode64); \
534 vex_printf("\n"); \
535 vpanic("doRegisterAllocation_v3: first event for vreg is "#_mode \
536 " (should be Write)"); \
537 } while (0)
539 # define OFFENDING_RREG(_r_idx, _instr, _mode) \
540 do { \
541 vex_printf("\n\nOffending rreg = "); \
542 con->ppReg(con->univ->regs[(_r_idx)]); \
543 vex_printf("\nOffending instruction = "); \
544 con->ppInstr((_instr), con->mode64); \
545 vex_printf("\n"); \
546 vpanic("doRegisterAllocation_v3: first event for rreg is "#_mode \
547 " (should be Write)"); \
548 } while (0)
551 /* Finds an rreg of the correct class.
552 If a free rreg is not found, then spills a vreg not used by the current
553 instruction and makes free the corresponding rreg. */
554 # define FIND_OR_MAKE_FREE_RREG(_ii, _v_idx, _reg_class, _reserve_phase) \
555 ({ \
556 Int _r_free_idx = find_free_rreg( \
557 vreg_state, n_vregs, rreg_state, n_rregs, rreg_lr_state, \
558 (_v_idx), (_ii), (_reg_class), (_reserve_phase), con); \
559 if (_r_free_idx == INVALID_INDEX) { \
560 HReg vreg_to_spill = find_vreg_to_spill( \
561 vreg_state, n_vregs, rreg_state, n_rregs, \
562 &reg_usage[(_ii)], (_reg_class), \
563 reg_usage, (_ii) + 1, \
564 instrs_in->arr_used - 1, con); \
565 _r_free_idx = spill_vreg(vreg_to_spill, hregIndex(vreg_to_spill), \
566 (_ii), vreg_state, n_vregs, \
567 rreg_state, n_rregs, \
568 instrs_out, con); \
571 vassert(IS_VALID_RREGNO(_r_free_idx)); \
573 _r_free_idx; \
577 /* --- Stage 0. Initialize the state. --- */
578 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
579 vreg_state[v_idx].live_after = INVALID_INSTRNO;
580 vreg_state[v_idx].dead_before = INVALID_INSTRNO;
581 vreg_state[v_idx].reg_class = HRcINVALID;
582 vreg_state[v_idx].disp = Unallocated;
583 vreg_state[v_idx].rreg = INVALID_HREG;
584 vreg_state[v_idx].spill_offset = 0;
585 vreg_state[v_idx].coalescedTo = INVALID_HREG;
586 vreg_state[v_idx].coalescedFirst = INVALID_HREG;
587 vreg_state[v_idx].effective_dead_before = INVALID_INSTRNO;
590 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
591 rreg_state[r_idx].disp = Free;
592 rreg_state[r_idx].vreg = INVALID_HREG;
593 rreg_state[r_idx].eq_spill_slot = False;
596 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
597 RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
598 rreg_lrs->lrs_size = 4;
599 rreg_lrs->lrs = LibVEX_Alloc_inline(rreg_lrs->lrs_size
600 * sizeof(RRegLR));
601 rreg_lrs->lrs_used = 0;
602 rreg_lrs->lr_current = &rreg_lrs->lrs[0];
603 rreg_lrs->lr_current_idx = 0;
606 /* --- Stage 1. Scan the incoming instructions. --- */
607 for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
608 const HInstr* instr = instrs_in->arr[ii];
610 con->getRegUsage(&reg_usage[ii], instr, con->mode64);
611 reg_usage[ii].isVregVregMove
612 = reg_usage[ii].isRegRegMove
613 && hregIsVirtual(reg_usage[ii].regMoveSrc)
614 && hregIsVirtual(reg_usage[ii].regMoveDst);
616 if (0) {
617 vex_printf("\n%u stage 1: ", ii);
618 con->ppInstr(instr, con->mode64);
619 vex_printf("\n");
620 ppHRegUsage(con->univ, &reg_usage[ii]);
623 /* Process virtual registers mentioned in the instruction. */
624 for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
625 HReg vreg = reg_usage[ii].vRegs[j];
626 vassert(hregIsVirtual(vreg));
628 UInt v_idx = hregIndex(vreg);
629 if (!IS_VALID_VREGNO(v_idx)) {
630 vex_printf("\n");
631 con->ppInstr(instr, con->mode64);
632 vex_printf("\n");
633 vex_printf("vreg %u (n_vregs %u)\n", v_idx, n_vregs);
634 vpanic("doRegisterAllocation_v3: out-of-range vreg");
637 /* Note the register class. */
638 if (vreg_state[v_idx].reg_class == HRcINVALID) {
639 /* First mention of this vreg. */
640 vreg_state[v_idx].reg_class = hregClass(vreg);
641 } else {
642 /* Seen it before, so check for consistency. */
643 vassert(vreg_state[v_idx].reg_class == hregClass(vreg));
646 /* Consider live ranges. */
647 switch (reg_usage[ii].vMode[j]) {
648 case HRmRead:
649 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
650 OFFENDING_VREG(v_idx, instr, "Read");
652 break;
653 case HRmWrite:
654 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
655 vreg_state[v_idx].live_after = toShort(ii);
657 break;
658 case HRmModify:
659 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
660 OFFENDING_VREG(v_idx, instr, "Modify");
662 break;
663 default:
664 vassert(0);
667 vreg_state[v_idx].dead_before = toShort(ii + 1);
668 vreg_state[v_idx].effective_dead_before
669 = vreg_state[v_idx].dead_before;
672 /* Process real registers mentioned in the instruction. */
673 const ULong rRead = reg_usage[ii].rRead;
674 const ULong rWritten = reg_usage[ii].rWritten;
675 const ULong rMentioned = rRead | rWritten;
677 if (rMentioned != 0) {
678 UInt rReg_minIndex = ULong__minIndex(rMentioned);
679 UInt rReg_maxIndex = ULong__maxIndex(rMentioned);
680 /* Don't bother to look at registers which are not available
681 to the allocator such as the stack or guest state pointers. These
682 are unavailable to the register allocator and so we never visit
683 them. We asserted above that n_rregs > 0, so (n_rregs - 1) is
684 safe. */
685 if (rReg_maxIndex >= n_rregs) {
686 rReg_maxIndex = n_rregs - 1;
689 for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) {
690 const ULong jMask = 1ULL << r_idx;
692 if (LIKELY((rMentioned & jMask) == 0)) {
693 continue;
696 RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
697 const Bool isR = (rRead & jMask) != 0;
698 const Bool isW = (rWritten & jMask) != 0;
700 if (isW && !isR) {
701 if (rreg_lrs->lrs_used == rreg_lrs->lrs_size) {
702 enlarge_rreg_lrs(rreg_lrs);
705 rreg_lrs->lrs[rreg_lrs->lrs_used].live_after = toShort(ii);
706 rreg_lrs->lrs[rreg_lrs->lrs_used].dead_before = toShort(ii + 1);
707 rreg_lrs->lrs_used += 1;
708 } else if (!isW && isR) {
709 if ((rreg_lrs->lrs_used == 0)
710 || (rreg_lrs->lrs[rreg_lrs->lrs_used - 1].live_after
711 == INVALID_INSTRNO)) {
712 OFFENDING_RREG(r_idx, instr, "Read");
714 rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before
715 = toShort(ii + 1);
716 } else {
717 vassert(isR && isW);
718 if ((rreg_lrs->lrs_used == 0)
719 || (rreg_lrs->lrs[rreg_lrs->lrs_used - 1].live_after
720 == INVALID_INSTRNO)) {
721 OFFENDING_RREG(r_idx, instr, "Modify");
723 rreg_lrs->lrs[rreg_lrs->lrs_used - 1].dead_before
724 = toShort(ii + 1);
730 if (DEBUG_REGALLOC) {
731 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
732 vex_printf("vreg %3u: [%3d, %3d)\n",
733 v_idx, vreg_state[v_idx].live_after,
734 vreg_state[v_idx].dead_before);
737 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
738 vex_printf("rreg %2u (", r_idx);
739 UInt written = con->ppReg(con->univ->regs[r_idx]);
740 vex_printf("):");
741 for (Int t = 15 - written; t > 0; t--) {
742 vex_printf(" ");
745 const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
746 for (UInt l = 0; l < rreg_lrs->lrs_used; l++) {
747 vex_printf("[%3d, %3d) ",
748 rreg_lrs->lrs[l].live_after, rreg_lrs->lrs[l].dead_before);
750 vex_printf("\n");
755 /* --- Stage 2. MOV coalescing (preparation). --- */
756 /* Optimise register coalescing:
757 MOV v <-> v coalescing (done here).
758 MOV v <-> r coalescing (TODO: not yet, not here). */
759 /* If doing a reg-reg move between two vregs, and the src's live range ends
760 here and the dst's live range starts here, coalesce the src vreg
761 to the dst vreg. */
762 Bool coalesce_happened = False;
763 for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
764 if (reg_usage[ii].isVregVregMove) {
765 HReg vregS = reg_usage[ii].regMoveSrc;
766 HReg vregD = reg_usage[ii].regMoveDst;
768 /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
769 vassert(hregClass(vregS) == hregClass(vregD));
770 UInt vs_idx = hregIndex(vregS);
771 UInt vd_idx = hregIndex(vregD);
772 vassert(IS_VALID_VREGNO(vs_idx));
773 vassert(IS_VALID_VREGNO(vd_idx));
774 vassert(! sameHReg(vregS, vregD));
775 VRegState* vs_st = &vreg_state[vs_idx];
776 VRegState* vd_st = &vreg_state[vd_idx];
778 if ((vs_st->dead_before == ii + 1) && (vd_st->live_after == ii)) {
779 /* Live ranges are adjacent. */
781 vs_st->coalescedTo = vregD;
782 if (hregIsInvalid(vs_st->coalescedFirst)) {
783 vd_st->coalescedFirst = vregS;
784 coalesce_heads[nr_coalesce_heads] = vs_idx;
785 nr_coalesce_heads += 1;
786 } else {
787 vd_st->coalescedFirst = vs_st->coalescedFirst;
790 vreg_state[hregIndex(vd_st->coalescedFirst)].effective_dead_before
791 = vd_st->dead_before;
793 if (DEBUG_REGALLOC) {
794 vex_printf("vreg coalescing: ");
795 con->ppReg(vregS);
796 vex_printf(" -> ");
797 con->ppReg(vregD);
798 vex_printf("\n");
801 coalesce_happened = True;
806 /* --- Stage 3. Allocate spill slots. --- */
808 /* Each spill slot is 8 bytes long. For vregs which take more than 64 bits
809 to spill (for example classes Flt64 and Vec128), we have to allocate two
810 consecutive spill slots. For 256 bit registers (class Vec256), we have to
811 allocate four consecutive spill slots.
813 For Vec128-class on PowerPC, the spill slot's actual address must be
814 16-byte aligned. Since the spill slot's address is computed as an offset
815 from the guest state pointer, and since the user of the generated code
816 must set that pointer to a 32-byte aligned value, we have the residual
817 obligation here of choosing a 16-byte aligned spill slot offset for
818 Vec128-class values. Since each spill slot is 8 bytes long, that means for
819 Vec128-class values we must allocate a spill slot number which is
820 zero mod 2.
822 Similarly, for Vec256 class on amd64, find a spill slot number which is
823 zero mod 4. This guarantees it will be 32-byte aligned, which isn't
824 actually necessary on amd64 (we use movUpd etc to spill), but seems like
825 a good practice.
827 Do a rank-based allocation of vregs to spill slot numbers. We put as few
828 values as possible in spill slots, but nevertheless need to have a spill
829 slot available for all vregs, just in case. */
831 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
832 STATIC_ASSERT((N_SPILL64S % 2) == 0);
833 STATIC_ASSERT((LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN) == 0);
835 Short ss_busy_until_before[N_SPILL64S];
836 vex_bzero(&ss_busy_until_before, sizeof(ss_busy_until_before));
838 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
839 /* True iff this vreg is unused. In which case we also expect that the
840 reg_class field for it has not been set. */
841 if (vreg_state[v_idx].live_after == INVALID_INSTRNO) {
842 vassert(vreg_state[v_idx].reg_class == HRcINVALID);
843 continue;
845 if (! hregIsInvalid(vreg_state[v_idx].coalescedFirst)) {
846 /* Coalesced vregs should share the same spill slot with the first vreg
847 in the coalescing chain. But we don't have that information, yet. */
848 continue;
851 /* The spill slots are 64 bits in size. As per the comment on definition
852 of HRegClass in host_generic_regs.h, that means, to spill a vreg of
853 class Flt64 or Vec128, we'll need to find two adjacent spill slots to
854 use. For Vec256, we'll need to find four adjacent slots to use. Note,
855 this logic needs to be kept in sync with the size info on the
856 definition of HRegClass. */
857 UInt ss_no;
858 switch (vreg_state[v_idx].reg_class) {
859 case HRcFlt64:
860 case HRcVec128:
861 /* Find two adjacent free slots which provide up to 128 bits to
862 spill the vreg. Since we are trying to find an even:odd pair,
863 move along in steps of 2 (slots). */
864 for (ss_no = 0; ss_no < N_SPILL64S - 1; ss_no += 2)
865 if (ss_busy_until_before[ss_no + 0] <= vreg_state[v_idx].live_after
866 && ss_busy_until_before[ss_no + 1] <= vreg_state[v_idx].live_after)
867 break;
868 if (ss_no >= N_SPILL64S - 1) {
869 vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
871 ss_busy_until_before[ss_no + 0]
872 = vreg_state[v_idx].effective_dead_before;
873 ss_busy_until_before[ss_no + 1]
874 = vreg_state[v_idx].effective_dead_before;
875 break;
876 default:
877 /* The ordinary case -- just find a single lowest-numbered spill
878 slot which is available at the start point of this interval,
879 and assign the interval to it. */
880 for (ss_no = 0; ss_no < N_SPILL64S; ss_no++) {
881 if (ss_busy_until_before[ss_no] <= vreg_state[v_idx].live_after)
882 break;
884 if (ss_no == N_SPILL64S) {
885 vpanic("N_SPILL64S is too low in VEX. Increase and recompile.");
887 ss_busy_until_before[ss_no]
888 = vreg_state[v_idx].effective_dead_before;
889 break;
892 /* This reflects VEX's hard-wired knowledge of the guest state layout:
893 the guest state itself, then two equal sized areas following it for two
894 sets of shadow state, and then the spill area. */
895 vreg_state[v_idx].spill_offset
896 = toShort(con->guest_sizeB * 3 + ss_no * 8);
898 /* Independent check that we've made a sane choice of the slot. */
899 switch (vreg_state[v_idx].reg_class) {
900 case HRcVec128: case HRcFlt64:
901 vassert((vreg_state[v_idx].spill_offset % 16) == 0);
902 break;
903 default:
904 vassert((vreg_state[v_idx].spill_offset % 8) == 0);
905 break;
909 /* Fill in the spill offsets and effective_dead_before for coalesced vregs.*/
910 for (UInt i = 0; i < nr_coalesce_heads; i++) {
911 UInt vs_idx = coalesce_heads[i];
912 Short effective_dead_before = vreg_state[vs_idx].effective_dead_before;
913 UShort spill_offset = vreg_state[vs_idx].spill_offset;
914 HReg vregD = vreg_state[vs_idx].coalescedTo;
915 while (! hregIsInvalid(vregD)) {
916 UInt vd_idx = hregIndex(vregD);
917 vreg_state[vd_idx].effective_dead_before = effective_dead_before;
918 vreg_state[vd_idx].spill_offset = spill_offset;
919 vregD = vreg_state[vd_idx].coalescedTo;
923 if (DEBUG_REGALLOC && coalesce_happened) {
924 UInt ii = 0;
925 vex_printf("After vreg<->vreg MOV coalescing:\n");
926 PRINT_STATE;
929 if (0) {
930 vex_printf("\n\n");
931 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
932 if (vreg_state[v_idx].live_after != INVALID_INSTRNO) {
933 vex_printf("vreg %3u --> spill offset %u\n",
934 v_idx, vreg_state[v_idx].spill_offset);
940 /* --- State 4. Process instructions. --- */
941 for (UShort ii = 0; ii < instrs_in->arr_used; ii++) {
942 HInstr* instr = instrs_in->arr[ii];
944 if (DEBUG_REGALLOC) {
945 vex_printf("\n====----====---- Instr %d ----====----====\n", ii);
946 vex_printf("---- ");
947 con->ppInstr(instrs_in->arr[ii], con->mode64);
948 vex_printf("\n\nInitial state:\n");
949 PRINT_STATE;
950 vex_printf("\n");
953 /* ------------ Sanity checks ------------ */
955 /* Sanity checks are relatively expensive. So they are done only once
956 every 17 instructions, and just before the last instruction. */
957 Bool do_sanity_check
958 = toBool(
959 SANITY_CHECKS_EVERY_INSTR
960 || ii == instrs_in->arr_used - 1
961 || (ii > 0 && (ii % 17) == 0)
964 if (do_sanity_check) {
965 /* Sanity check: the vreg_state and rreg_state mutually-redundant
966 mappings are consistent. If vreg_state[v].rreg points at some
967 rreg_state entry then that rreg_state entry should point back at
968 vreg_state[v]. */
969 for (UInt v_idx = 0; v_idx < n_vregs; v_idx++) {
970 if (vreg_state[v_idx].disp == Assigned) {
971 vassert(!hregIsVirtual(vreg_state[v_idx].rreg));
973 UInt r_idx = hregIndex(vreg_state[v_idx].rreg);
974 vassert(IS_VALID_RREGNO(r_idx));
975 vassert(rreg_state[r_idx].disp == Bound);
976 vassert(hregIndex(rreg_state[r_idx].vreg) == v_idx);
978 vassert(hregClass(vreg_state[v_idx].rreg)
979 == hregClass(con->univ->regs[r_idx]));
983 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
984 if (rreg_state[r_idx].disp == Bound) {
985 vassert(hregIsVirtual(rreg_state[r_idx].vreg));
987 UInt v_idx = hregIndex(rreg_state[r_idx].vreg);
988 vassert(IS_VALID_VREGNO(v_idx));
989 vassert(vreg_state[v_idx].disp == Assigned);
990 vassert(hregIndex(vreg_state[v_idx].rreg) == r_idx);
991 } else {
992 vassert(rreg_state[r_idx].eq_spill_slot == False);
996 /* Sanity check: if rreg has been marked as Reserved, there must be
997 a corresponding hard live range for it. */
998 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
999 if (rreg_state[r_idx].disp == Reserved) {
1000 const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
1001 vassert(rreg_lrs->lrs_used > 0);
1002 vassert(rreg_lrs->lr_current_idx < rreg_lrs->lrs_used);
1003 vassert(rreg_lrs->lr_current->live_after <= (Short) ii);
1004 vassert((Short) ii < rreg_lrs->lr_current->dead_before);
1008 /* Sanity check: if vregS has been marked as coalesced to vregD,
1009 then the effective live range of vregS must also cover live range
1010 of vregD. */
1011 /* The following sanity check is quite expensive. Some basic blocks
1012 contain very lengthy coalescing chains... */
1013 if (SANITY_CHECKS_EVERY_INSTR) {
1014 for (UInt vs_idx = 0; vs_idx < n_vregs; vs_idx++) {
1015 const VRegState* vS_st = &vreg_state[vs_idx];
1016 HReg vregD = vS_st->coalescedTo;
1017 while (! hregIsInvalid(vregD)) {
1018 const VRegState* vD_st = &vreg_state[hregIndex(vregD)];
1019 vassert(vS_st->live_after <= vD_st->live_after);
1020 vassert(vS_st->effective_dead_before >= vD_st->dead_before);
1021 vregD = vD_st->coalescedTo;
1028 /* --- MOV coalescing (finishing) --- */
1029 /* Optimise register coalescing:
1030 MOV v <-> v coalescing (finished here).
1031 MOV v <-> r coalescing (TODO: not yet). */
1032 if (reg_usage[ii].isVregVregMove) {
1033 HReg vregS = reg_usage[ii].regMoveSrc;
1034 HReg vregD = reg_usage[ii].regMoveDst;
1035 UInt vs_idx = hregIndex(vregS);
1036 UInt vd_idx = hregIndex(vregD);
1038 if (sameHReg(vreg_state[vs_idx].coalescedTo, vregD)) {
1039 /* Finally do the coalescing. */
1041 HReg rreg = vreg_state[vs_idx].rreg;
1042 switch (vreg_state[vs_idx].disp) {
1043 case Assigned:
1044 vreg_state[vd_idx].rreg = rreg;
1045 UInt r_idx = hregIndex(rreg);
1046 vassert(rreg_state[r_idx].disp == Bound);
1047 rreg_state[r_idx].vreg = vregD;
1048 break;
1049 case Spilled:
1050 vassert(hregIsInvalid(vreg_state[vs_idx].rreg));
1051 break;
1052 default:
1053 vassert(0);
1056 vreg_state[vd_idx].disp = vreg_state[vs_idx].disp;
1057 FREE_VREG(&vreg_state[vs_idx]);
1059 if (DEBUG_REGALLOC) {
1060 vex_printf("coalesced: ");
1061 con->ppReg(vregS);
1062 vex_printf(" -> ");
1063 con->ppReg(vregD);
1064 vex_printf("\n\n");
1067 /* In rare cases it can happen that vregD's live range ends here.
1068 Check and eventually free the vreg and rreg.
1069 This effectively means that either the translated program
1070 contained dead code (but VEX iropt passes are pretty good
1071 at eliminating it) or the VEX backend generated dead code. */
1072 if (vreg_state[vd_idx].dead_before <= (Short) ii + 1) {
1073 if (vreg_state[vd_idx].disp == Assigned) {
1074 UInt r_idx = hregIndex(rreg);
1075 FREE_RREG(&rreg_state[r_idx]);
1077 FREE_VREG(&vreg_state[vd_idx]);
1080 /* Move on to the next instruction. We skip the post-instruction
1081 stuff because all required house-keeping was done here. */
1082 continue;
1087 /* --- Reserve and free rregs if needed. --- */
1088 /* If the rreg enters its hard live range and is not free:
1089 1. If the corresponding vreg is not used by the instruction, spill it.
1090 2. If the corresponding vreg is used by the instruction, then:
1091 2a. If there are no free rregs, spill a vreg not used by this
1092 instruction.
1093 2b. Move the corresponding vreg to a free rreg. This is better than
1094 spilling it and immediatelly reloading it.
1096 const ULong rRead = reg_usage[ii].rRead;
1097 const ULong rWritten = reg_usage[ii].rWritten;
1098 const ULong rMentioned = rRead | rWritten;
1100 if (rMentioned != 0) {
1101 UInt rReg_minIndex = ULong__minIndex(rMentioned);
1102 UInt rReg_maxIndex = ULong__maxIndex(rMentioned);
1103 if (rReg_maxIndex >= n_rregs) {
1104 rReg_maxIndex = n_rregs - 1;
1107 for (UInt r_idx = rReg_minIndex; r_idx <= rReg_maxIndex; r_idx++) {
1108 const ULong jMask = 1ULL << r_idx;
1110 if (LIKELY((rMentioned & jMask) == 0)) {
1111 continue;
1114 RRegState* rreg = &rreg_state[r_idx];
1115 const RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
1116 if (LIKELY(rreg_lrs->lrs_used == 0)) {
1117 continue;
1119 if (rreg->disp == Reserved) {
1120 continue;
1123 if ((rreg_lrs->lr_current->live_after <= (Short) ii)
1124 && ((Short) ii < rreg_lrs->lr_current->dead_before)) {
1126 switch (rreg->disp) {
1127 case Bound: {
1128 /* Yes, there is an associated vreg. We need to deal with
1129 it now somehow. */
1130 HReg vreg = rreg->vreg;
1131 UInt v_idx = hregIndex(vreg);
1133 if (! HRegUsage__contains(&reg_usage[ii], vreg)) {
1134 if (rreg->eq_spill_slot) {
1135 mark_vreg_spilled(v_idx, vreg_state, n_vregs,
1136 rreg_state, n_rregs);
1137 } else {
1138 /* Spill the vreg. It is not used by this instruction.*/
1139 spill_vreg(vreg, v_idx, ii, vreg_state, n_vregs,
1140 rreg_state, n_rregs, instrs_out, con);
1142 } else {
1143 /* Find or make a free rreg where to move this vreg to. */
1144 UInt r_free_idx = FIND_OR_MAKE_FREE_RREG(
1145 ii, v_idx, vreg_state[v_idx].reg_class, True);
1147 /* Generate "move" between real registers. */
1148 HInstr* move = con->genMove(con->univ->regs[r_idx],
1149 con->univ->regs[r_free_idx], con->mode64);
1150 vassert(move != NULL);
1151 emit_instr(move, instrs_out, con, "move");
1153 /* Update the register allocator state. */
1154 vassert(vreg_state[v_idx].disp == Assigned);
1155 vreg_state[v_idx].rreg = con->univ->regs[r_free_idx];
1156 rreg_state[r_free_idx].disp = Bound;
1157 rreg_state[r_free_idx].vreg = vreg;
1158 rreg_state[r_free_idx].eq_spill_slot = rreg->eq_spill_slot;
1159 FREE_RREG(rreg);
1161 break;
1163 case Free:
1164 break;
1165 default:
1166 vassert(0);
1169 /* Finally claim the rreg as reserved. */
1170 rreg->disp = Reserved;
1172 if (DEBUG_REGALLOC) {
1173 vex_printf("rreg has been reserved: ");
1174 con->ppReg(con->univ->regs[r_idx]);
1175 vex_printf("\n\n");
1182 /* --- Direct reload optimisation. --- */
1183 /* If the instruction reads exactly one vreg which is currently spilled,
1184 and this is the last use of that vreg, see if we can convert
1185 the instruction into one that reads directly from the spill slot.
1186 This is clearly only possible for x86 and amd64 targets, since ppc and
1187 arm are load-store architectures. If successful, replace
1188 instrs_in->arr[ii] with this new instruction, and recompute
1189 its reg_usage, so that the change is invisible to the standard-case
1190 handling that follows. */
1191 if ((con->directReload != NULL) && (reg_usage[ii].n_vRegs <= 2)) {
1192 Bool debug_direct_reload = False;
1193 Bool nreads = 0;
1194 HReg vreg_found = INVALID_HREG;
1195 Short spill_offset = 0;
1197 for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
1198 HReg vreg = reg_usage[ii].vRegs[j];
1199 vassert(hregIsVirtual(vreg));
1201 if (reg_usage[ii].vMode[j] == HRmRead) {
1202 nreads++;
1203 UInt v_idx = hregIndex(vreg);
1204 vassert(IS_VALID_VREGNO(v_idx));
1205 if (vreg_state[v_idx].disp == Spilled) {
1206 /* Is this its last use? */
1207 vassert(vreg_state[v_idx].dead_before >= (Short) (ii + 1));
1208 if ((vreg_state[v_idx].dead_before == (Short) (ii + 1))
1209 && hregIsInvalid(vreg_found)) {
1210 vreg_found = vreg;
1211 spill_offset = vreg_state[v_idx].spill_offset;
1217 if (!hregIsInvalid(vreg_found) && (nreads == 1)) {
1218 if (reg_usage[ii].n_vRegs == 2) {
1219 vassert(! sameHReg(reg_usage[ii].vRegs[0],
1220 reg_usage[ii].vRegs[1]));
1223 HInstr* reloaded = con->directReload(instrs_in->arr[ii],
1224 vreg_found, spill_offset);
1225 if (debug_direct_reload && (reloaded != NULL)) {
1226 vex_printf("[%3d] ", spill_offset);
1227 ppHReg(vreg_found);
1228 vex_printf(": ");
1229 con->ppInstr(instr, con->mode64);
1231 if (reloaded != NULL) {
1232 /* Update info about the instruction, so it looks as if it had
1233 been in this form all along. */
1234 instr = reloaded;
1235 instrs_in->arr[ii] = reloaded;
1236 con->getRegUsage(&reg_usage[ii], instr, con->mode64);
1237 if (debug_direct_reload) {
1238 vex_printf(" --> ");
1239 con->ppInstr(reloaded, con->mode64);
1243 if (debug_direct_reload && (reloaded != NULL)) {
1244 vex_printf("\n");
1250 /* The vreg -> rreg map constructed and then applied to each
1251 instruction. */
1252 HRegRemap remap;
1253 initHRegRemap(&remap);
1255 /* --- Allocate vregs used by the instruction. --- */
1256 /* Vregs used by the instruction can be in the following states:
1257 - Unallocated: vreg is entering its live range. Find a free rreg.
1258 - Assigned: we do nothing; rreg has been allocated previously.
1259 - Spilled: Find a free rreg and reload vreg into it.
1260 Naturally, finding a free rreg may involve spilling a vreg not used by
1261 the instruction. */
1262 for (UInt j = 0; j < reg_usage[ii].n_vRegs; j++) {
1263 HReg vreg = reg_usage[ii].vRegs[j];
1264 vassert(hregIsVirtual(vreg));
1266 if (0) {
1267 vex_printf("considering "); con->ppReg(vreg); vex_printf("\n");
1270 UInt v_idx = hregIndex(vreg);
1271 vassert(IS_VALID_VREGNO(v_idx));
1272 HReg rreg = vreg_state[v_idx].rreg;
1273 UInt r_idx;
1274 if (vreg_state[v_idx].disp == Assigned) {
1275 r_idx = hregIndex(rreg);
1276 vassert(rreg_state[r_idx].disp == Bound);
1277 addToHRegRemap(&remap, vreg, rreg);
1278 } else {
1279 vassert(hregIsInvalid(rreg));
1281 /* Find or make a free rreg of the correct class. */
1282 r_idx = FIND_OR_MAKE_FREE_RREG(
1283 ii, v_idx, vreg_state[v_idx].reg_class, False);
1284 rreg = con->univ->regs[r_idx];
1286 /* Generate reload only if the vreg is spilled and is about to being
1287 read or modified. If it is merely written than reloading it first
1288 would be pointless. */
1289 if ((vreg_state[v_idx].disp == Spilled)
1290 && (reg_usage[ii].vMode[j] != HRmWrite)) {
1292 HInstr* reload1 = NULL;
1293 HInstr* reload2 = NULL;
1294 con->genReload(&reload1, &reload2, rreg,
1295 vreg_state[v_idx].spill_offset, con->mode64);
1296 vassert(reload1 != NULL || reload2 != NULL);
1297 if (reload1 != NULL) {
1298 emit_instr(reload1, instrs_out, con, "reload1");
1300 if (reload2 != NULL) {
1301 emit_instr(reload2, instrs_out, con, "reload2");
1307 rreg_state[r_idx].disp = Bound;
1308 rreg_state[r_idx].vreg = vreg;
1309 rreg_state[r_idx].eq_spill_slot = True;
1310 vreg_state[v_idx].disp = Assigned;
1311 vreg_state[v_idx].rreg = rreg;
1312 addToHRegRemap(&remap, vreg, rreg);
1315 /* If this vreg is written or modified, mark it so. */
1316 if (reg_usage[ii].vMode[j] != HRmRead) {
1317 rreg_state[r_idx].eq_spill_slot = False;
1321 con->mapRegs(&remap, instr, con->mode64);
1322 emit_instr(instr, instrs_out, con, NULL);
1324 if (DEBUG_REGALLOC) {
1325 vex_printf("After dealing with current instruction:\n");
1326 PRINT_STATE;
1327 vex_printf("\n");
1330 /* ------ Post-instruction actions. ------ */
1331 /* Free rregs which:
1332 - Have been reserved and whose hard live range ended.
1333 - Have been bound to vregs whose live range ended. */
1334 for (UInt r_idx = 0; r_idx < n_rregs; r_idx++) {
1335 RRegState* rreg = &rreg_state[r_idx];
1336 RRegLRState* rreg_lrs = &rreg_lr_state[r_idx];
1337 switch (rreg->disp) {
1338 case Free:
1339 break;
1340 case Reserved:
1341 if (rreg_lrs->lrs_used > 0) {
1342 /* Consider "dead before" the next instruction. */
1343 if (rreg_lrs->lr_current->dead_before <= (Short) ii + 1) {
1344 FREE_RREG(&rreg_state[r_idx]);
1345 if (rreg_lrs->lr_current_idx < rreg_lrs->lrs_used - 1) {
1346 rreg_lrs->lr_current_idx += 1;
1347 rreg_lrs->lr_current
1348 = &rreg_lrs->lrs[rreg_lrs->lr_current_idx];
1352 break;
1353 case Bound: {
1354 UInt v_idx = hregIndex(rreg->vreg);
1355 /* Consider "dead before" the next instruction. */
1356 if (vreg_state[v_idx].dead_before <= (Short) ii + 1) {
1357 FREE_VREG(&vreg_state[v_idx]);
1358 FREE_RREG(&rreg_state[r_idx]);
1360 break;
1362 default:
1363 vassert(0);
1368 return instrs_out;
1371 /*----------------------------------------------------------------------------*/
1372 /*--- host_generic_reg_alloc3.c ---*/
1373 /*----------------------------------------------------------------------------*/