Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / host_generic_reg_alloc2.c
blobb731720f22effac7c9773d072164724f09ef4f0b
2 /*---------------------------------------------------------------*/
3 /*--- begin host_reg_alloc2.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
28 Neither the names of the U.S. Department of Energy nor the
29 University of California nor the names of its contributors may be
30 used to endorse or promote products derived from this software
31 without prior written permission.
34 #include "libvex_basictypes.h"
35 #include "libvex.h"
37 #include "main_util.h"
38 #include "host_generic_regs.h"
40 /* Set to 1 for lots of debugging output. */
41 #define DEBUG_REGALLOC 0
44 /* TODO 27 Oct 04:
46 We can possibly do V-V coalescing even when the src is spilled,
47 providing we can arrange for the dst to have the same spill slot.
49 Note that state[].hreg is the same as the available real regs.
51 Generally rationalise data structures. */
54 /* Records information on virtual register live ranges. Computed once
55 and remains unchanged after that. */
56 typedef
57 struct {
58 /* Becomes live for the first time after this insn ... */
59 Short live_after;
60 /* Becomes dead for the last time before this insn ... */
61 Short dead_before;
62 /* The "home" spill slot, if needed. Never changes. */
63 Short spill_offset;
64 Short spill_size;
65 /* What kind of register this is. */
66 HRegClass reg_class;
68 VRegLR;
71 /* Records information on real-register live ranges. Computed once
72 and remains unchanged after that. */
73 typedef
74 struct {
75 HReg rreg;
76 /* Becomes live after this insn ... */
77 Short live_after;
78 /* Becomes dead before this insn ... */
79 Short dead_before;
81 RRegLR;
84 /* An array of the following structs (rreg_state) comprises the
85 running state of the allocator. It indicates what the current
86 disposition of each allocatable real register is. The array gets
87 updated as the allocator processes instructions. The identity of
88 the register is not recorded here, because the index of this
89 structure in doRegisterAllocation()'s |rreg_state| is the index
90 number of the register, and the register itself can be extracted
91 from the RRegUniverse supplied to doRegisterAllocation(). */
92 typedef
93 struct {
94 /* ------ FIELDS WHICH DO NOT CHANGE ------ */
95 /* Is this involved in any HLRs? (only an optimisation hint) */
96 Bool has_hlrs;
97 /* ------ FIELDS WHICH DO CHANGE ------ */
98 /* 6 May 07: rearranged fields below so the whole struct fits
99 into 16 bytes on both x86 and amd64. */
100 /* Used when .disp == Bound and we are looking for vregs to
101 spill. */
102 Bool is_spill_cand;
103 /* Optimisation: used when .disp == Bound. Indicates when the
104 rreg has the same value as the spill slot for the associated
105 vreg. Is safely left at False, and becomes True after a
106 spill store or reload for this rreg. */
107 Bool eq_spill_slot;
108 /* What's it's current disposition? */
109 enum { Free, /* available for use */
110 Unavail, /* in a real-reg live range */
111 Bound /* in use (holding value of some vreg) */
113 disp;
114 /* If .disp == Bound, what vreg is it bound to? */
115 HReg vreg;
117 RRegState;
120 /* The allocator also maintains a redundant array of indexes
121 (vreg_state) from vreg numbers back to entries in rreg_state. It
122 is redundant because iff vreg_state[i] == j then
123 hregNumber(rreg_state[j].vreg) == i -- that is, the two entries
124 point at each other. The purpose of this is to speed up activities
125 which involve looking for a particular vreg: there is no need to
126 scan the rreg_state looking for it, just index directly into
127 vreg_state. The FAQ "does this vreg already have an associated
128 rreg" is the main beneficiary.
130 To indicate, in vreg_state[i], that a given vreg is not currently
131 associated with any rreg, that entry can be set to INVALID_RREG_NO.
133 Because the vreg_state entries are signed Shorts, the max number
134 of vregs that can be handed by regalloc is 32767.
137 #define INVALID_RREG_NO ((Short)(-1))
139 #define IS_VALID_VREGNO(_zz) ((_zz) >= 0 && (_zz) < n_vregs)
140 #define IS_VALID_UNSIGNED_VREGNO(_zz) ((_zz) < n_vregs)
141 #define IS_VALID_RREGNO(_zz) ((_zz) >= 0 && (_zz) < n_rregs)
144 /* Search forward from some given point in the incoming instruction
145 sequence. Point is to select a virtual register to spill, by
146 finding the vreg which is mentioned as far ahead as possible, in
147 the hope that this will minimise the number of consequent reloads.
149 Only do the search for vregs which are Bound in the running state,
150 and for which the .is_spill_cand field is set. This allows the
151 caller to arbitrarily restrict the set of spill candidates to be
152 considered.
154 To do this we don't actually need to see the incoming instruction
155 stream. Rather, what we need us the HRegUsage records for the
156 incoming instruction stream. Hence that is passed in.
158 Returns an index into the state array indicating the (v,r) pair to
159 spill, or -1 if none was found. */
160 static
161 Int findMostDistantlyMentionedVReg (
162 HRegUsage* reg_usages_in,
163 Int search_from_instr,
164 Int num_instrs,
165 RRegState* rreg_state,
166 HRegClass hreg_class,
167 const RegAllocControl* con
170 Int m;
171 Int furthest_k = -1;
172 Int furthest = -1;
173 vassert(search_from_instr >= 0);
174 for (UInt k = con->univ->allocable_start[hreg_class];
175 k <= con->univ->allocable_end[hreg_class]; k++) {
176 if (!rreg_state[k].is_spill_cand)
177 continue;
178 vassert(rreg_state[k].disp == Bound);
179 for (m = search_from_instr; m < num_instrs; m++) {
180 if (HRegUsage__contains(&reg_usages_in[m], rreg_state[k].vreg))
181 break;
183 if (m > furthest) {
184 furthest = m;
185 furthest_k = k;
188 return furthest_k;
192 /* Check that this vreg has been assigned a sane spill offset. */
193 inline
194 static void sanity_check_spill_offset ( VRegLR* vreg )
196 switch (vreg->reg_class) {
197 case HRcVec128: case HRcFlt64:
198 vassert(0 == ((UShort)vreg->spill_offset % 16)); break;
199 default:
200 vassert(0 == ((UShort)vreg->spill_offset % 8)); break;
205 /* Double the size of the real-reg live-range array, if needed. */
206 __attribute__((noinline))
207 static void ensureRRLRspace_SLOW ( RRegLR** info, Int* size, Int used )
209 Int k;
210 RRegLR* arr2;
211 if (0)
212 vex_printf("ensureRRLRspace: %d -> %d\n", *size, 2 * *size);
213 vassert(used == *size);
214 arr2 = LibVEX_Alloc_inline(2 * *size * sizeof(RRegLR));
215 for (k = 0; k < *size; k++)
216 arr2[k] = (*info)[k];
217 *size *= 2;
218 *info = arr2;
220 inline
221 static void ensureRRLRspace ( RRegLR** info, Int* size, Int used )
223 if (LIKELY(used < *size)) return;
224 ensureRRLRspace_SLOW(info, size, used);
228 /* Sort an array of RRegLR entries by either the .live_after or
229 .dead_before fields. This is performance-critical. */
230 static void sortRRLRarray ( RRegLR* arr,
231 Int size, Bool by_live_after )
233 Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
234 9841, 29524, 88573, 265720,
235 797161, 2391484 };
236 Int lo = 0;
237 Int hi = size-1;
238 Int i, j, h, bigN, hp;
239 RRegLR v;
241 vassert(size >= 0);
242 if (size == 0)
243 return;
245 bigN = hi - lo + 1; if (bigN < 2) return;
246 hp = 0; while (hp < 14 && incs[hp] < bigN) hp++; hp--;
248 if (by_live_after) {
250 for ( ; hp >= 0; hp--) {
251 h = incs[hp];
252 for (i = lo + h; i <= hi; i++) {
253 v = arr[i];
254 j = i;
255 while (arr[j-h].live_after > v.live_after) {
256 arr[j] = arr[j-h];
257 j = j - h;
258 if (j <= (lo + h - 1)) break;
260 arr[j] = v;
264 } else {
266 for ( ; hp >= 0; hp--) {
267 h = incs[hp];
268 for (i = lo + h; i <= hi; i++) {
269 v = arr[i];
270 j = i;
271 while (arr[j-h].dead_before > v.dead_before) {
272 arr[j] = arr[j-h];
273 j = j - h;
274 if (j <= (lo + h - 1)) break;
276 arr[j] = v;
284 /* Compute the index of the highest and lowest 1 in a ULong,
285 respectively. Results are undefined if the argument is zero.
286 Don't pass it zero :) */
287 static inline UInt ULong__maxIndex ( ULong w64 ) {
288 return 63 - __builtin_clzll(w64);
291 static inline UInt ULong__minIndex ( ULong w64 ) {
292 return __builtin_ctzll(w64);
296 /* A target-independent register allocator. Requires various
297 functions which it uses to deal abstractly with instructions and
298 registers, since it cannot have any target-specific knowledge.
300 Returns a new list of instructions, which, as a result of the
301 behaviour of mapRegs, will be in-place modifications of the
302 original instructions.
304 Requires that the incoming code has been generated using
305 vreg numbers 0, 1 .. n_vregs-1. Appearance of a vreg outside
306 that range is a checked run-time error.
308 Takes an expandable array of pointers to unallocated insns.
309 Returns an expandable array of pointers to allocated insns.
311 HInstrArray* doRegisterAllocation_v2 (
313 /* Incoming virtual-registerised code. */
314 HInstrArray* instrs_in,
316 /* Register allocator controls to use. */
317 const RegAllocControl* con
320 # define N_SPILL64S (LibVEX_N_SPILL_BYTES / 8)
322 const Bool eq_spill_opt = True;
324 /* Info on vregs and rregs. Computed once and remains
325 unchanged. */
326 Int n_vregs;
327 VRegLR* vreg_lrs; /* [0 .. n_vregs-1] */
329 /* We keep two copies of the real-reg live range info, one sorted
330 by .live_after and the other by .dead_before. First the
331 unsorted info is created in the _la variant is copied into the
332 _db variant. Once that's done both of them are sorted.
333 We also need two integer cursors which record the next
334 location in the two arrays to consider. */
335 RRegLR* rreg_lrs_la;
336 RRegLR* rreg_lrs_db;
337 Int rreg_lrs_size;
338 Int rreg_lrs_used;
339 Int rreg_lrs_la_next;
340 Int rreg_lrs_db_next;
342 /* Info on register usage in the incoming instruction array.
343 Computed once and remains unchanged, more or less; updated
344 sometimes by the direct-reload optimisation. */
345 HRegUsage* reg_usage_arr; /* [0 .. instrs_in->arr_used-1] */
347 /* Used when constructing vreg_lrs (for allocating stack
348 slots). */
349 Short ss_busy_until_before[N_SPILL64S];
351 /* Used when constructing rreg_lrs. */
352 Int* rreg_live_after;
353 Int* rreg_dead_before;
355 /* Running state of the core allocation algorithm. */
356 RRegState* rreg_state; /* [0 .. n_rregs-1] */
357 Int n_rregs;
359 /* .. and the redundant backward map */
360 /* Each value is 0 .. n_rregs-1 or is INVALID_RREG_NO.
361 This implies n_rregs must be <= 32768. */
362 Short* vreg_state; /* [0 .. n_vregs-1] */
364 /* The vreg -> rreg map constructed and then applied to each
365 instr. */
366 HRegRemap remap;
368 /* The output array of instructions. */
369 HInstrArray* instrs_out;
371 /* Sanity checks are expensive. They are only done periodically,
372 not at each insn processed. */
373 Bool do_sanity_check;
375 vassert(0 == (con->guest_sizeB % LibVEX_GUEST_STATE_ALIGN));
376 vassert(0 == (LibVEX_N_SPILL_BYTES % LibVEX_GUEST_STATE_ALIGN));
377 vassert(0 == (N_SPILL64S % 2));
379 /* The live range numbers are signed shorts, and so limiting the
380 number of insns to 15000 comfortably guards against them
381 overflowing 32k. */
382 vassert(instrs_in->arr_used <= 15000);
384 # define INVALID_INSTRNO (-2)
386 # define EMIT_INSTR(_instr) \
387 do { \
388 HInstr* _tmp = (_instr); \
389 if (DEBUG_REGALLOC) { \
390 vex_printf("** "); \
391 con->ppInstr(_tmp, con->mode64); \
392 vex_printf("\n\n"); \
394 addHInstr ( instrs_out, _tmp ); \
395 } while (0)
397 # define PRINT_STATE \
398 do { \
399 Int z, q; \
400 for (z = 0; z < n_rregs; z++) { \
401 vex_printf(" rreg_state[%2d] = ", z); \
402 con->ppReg(con->univ->regs[z]); \
403 vex_printf(" \t"); \
404 switch (rreg_state[z].disp) { \
405 case Free: vex_printf("Free\n"); break; \
406 case Unavail: vex_printf("Unavail\n"); break; \
407 case Bound: vex_printf("BoundTo "); \
408 con->ppReg(rreg_state[z].vreg); \
409 vex_printf("\n"); break; \
412 vex_printf("\n vreg_state[0 .. %d]:\n ", n_vregs-1); \
413 q = 0; \
414 for (z = 0; z < n_vregs; z++) { \
415 if (vreg_state[z] == INVALID_RREG_NO) \
416 continue; \
417 vex_printf("[%d] -> %d ", z, vreg_state[z]); \
418 q++; \
419 if (q > 0 && (q % 6) == 0) \
420 vex_printf("\n "); \
422 vex_printf("\n"); \
423 } while (0)
426 /* --------- Stage 0: set up output array --------- */
427 /* --------- and allocate/initialise running state. --------- */
429 instrs_out = newHInstrArray();
431 /* ... and initialise running state. */
432 /* n_rregs is no more than a short name for n_available_real_regs. */
433 n_rregs = con->univ->allocable;
434 n_vregs = instrs_in->n_vregs;
436 /* If this is not so, vreg_state entries will overflow. */
437 vassert(n_vregs < 32767);
439 /* If this is not so, the universe we have is nonsensical. */
440 vassert(n_rregs > 0);
442 rreg_state = LibVEX_Alloc_inline(n_rregs * sizeof(RRegState));
443 vreg_state = LibVEX_Alloc_inline(n_vregs * sizeof(Short));
445 for (Int j = 0; j < n_rregs; j++) {
446 rreg_state[j].has_hlrs = False;
447 rreg_state[j].disp = Free;
448 rreg_state[j].vreg = INVALID_HREG;
449 rreg_state[j].is_spill_cand = False;
450 rreg_state[j].eq_spill_slot = False;
453 for (Int j = 0; j < n_vregs; j++)
454 vreg_state[j] = INVALID_RREG_NO;
457 /* --------- Stage 1: compute vreg live ranges. --------- */
458 /* --------- Stage 2: compute rreg live ranges. --------- */
460 /* ------ start of SET UP TO COMPUTE VREG LIVE RANGES ------ */
462 /* This is relatively simple, because (1) we only seek the complete
463 end-to-end live range of each vreg, and are not interested in
464 any holes in it, and (2) the vregs are conveniently numbered 0
465 .. n_vregs-1, so we can just dump the results in a
466 pre-allocated array. */
468 vreg_lrs = NULL;
469 if (n_vregs > 0)
470 vreg_lrs = LibVEX_Alloc_inline(sizeof(VRegLR) * n_vregs);
472 for (Int j = 0; j < n_vregs; j++) {
473 vreg_lrs[j].live_after = INVALID_INSTRNO;
474 vreg_lrs[j].dead_before = INVALID_INSTRNO;
475 vreg_lrs[j].spill_offset = 0;
476 vreg_lrs[j].spill_size = 0;
477 vreg_lrs[j].reg_class = HRcINVALID;
480 /* An array to hold the reg-usage info for the incoming
481 instructions. */
482 reg_usage_arr = LibVEX_Alloc_inline(sizeof(HRegUsage) * instrs_in->arr_used);
484 /* ------ end of SET UP TO COMPUTE VREG LIVE RANGES ------ */
486 /* ------ start of SET UP TO COMPUTE RREG LIVE RANGES ------ */
488 /* This is more complex than Stage 1, because we need to compute
489 exactly all the live ranges of all the allocatable real regs,
490 and we don't know in advance how many there will be. */
492 rreg_lrs_used = 0;
493 rreg_lrs_size = 4;
494 rreg_lrs_la = LibVEX_Alloc_inline(rreg_lrs_size * sizeof(RRegLR));
495 rreg_lrs_db = NULL; /* we'll create this later */
497 /* We'll need to track live range start/end points separately for
498 each rreg. Sigh. */
499 vassert(n_rregs > 0);
500 rreg_live_after = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
501 rreg_dead_before = LibVEX_Alloc_inline(n_rregs * sizeof(Int));
503 for (Int j = 0; j < n_rregs; j++) {
504 rreg_live_after[j] =
505 rreg_dead_before[j] = INVALID_INSTRNO;
508 /* ------ end of SET UP TO COMPUTE RREG LIVE RANGES ------ */
510 /* ------ start of ITERATE OVER INSNS ------ */
512 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
514 con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii], con->mode64);
515 reg_usage_arr[ii].isVregVregMove
516 = reg_usage_arr[ii].isRegRegMove
517 && hregIsVirtual(reg_usage_arr[ii].regMoveSrc)
518 && hregIsVirtual(reg_usage_arr[ii].regMoveDst);
520 if (0) {
521 vex_printf("\n%d stage1: ", ii);
522 con->ppInstr(instrs_in->arr[ii], con->mode64);
523 vex_printf("\n");
524 ppHRegUsage(con->univ, &reg_usage_arr[ii]);
527 /* ------ start of DEAL WITH VREG LIVE RANGES ------ */
529 /* for each virtual reg mentioned in the insn ... */
530 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
532 HReg vreg = reg_usage_arr[ii].vRegs[j];
533 vassert(hregIsVirtual(vreg));
535 Int k = hregIndex(vreg);
536 if (k < 0 || k >= n_vregs) {
537 vex_printf("\n");
538 con->ppInstr(instrs_in->arr[ii], con->mode64);
539 vex_printf("\n");
540 vex_printf("vreg %d, n_vregs %d\n", k, n_vregs);
541 vpanic("doRegisterAllocation: out-of-range vreg");
544 /* Take the opportunity to note its regclass. We'll need
545 that when allocating spill slots. */
546 if (vreg_lrs[k].reg_class == HRcINVALID) {
547 /* First mention of this vreg. */
548 vreg_lrs[k].reg_class = hregClass(vreg);
549 } else {
550 /* Seen it before, so check for consistency. */
551 vassert(vreg_lrs[k].reg_class == hregClass(vreg));
554 /* Now consider live ranges. */
555 switch (reg_usage_arr[ii].vMode[j]) {
556 case HRmRead:
557 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
558 vex_printf("\n\nOFFENDING VREG = %d\n", k);
559 vpanic("doRegisterAllocation: "
560 "first event for vreg is Read");
562 vreg_lrs[k].dead_before = toShort(ii + 1);
563 break;
564 case HRmWrite:
565 if (vreg_lrs[k].live_after == INVALID_INSTRNO)
566 vreg_lrs[k].live_after = toShort(ii);
567 vreg_lrs[k].dead_before = toShort(ii + 1);
568 break;
569 case HRmModify:
570 if (vreg_lrs[k].live_after == INVALID_INSTRNO) {
571 vex_printf("\n\nOFFENDING VREG = %d\n", k);
572 vpanic("doRegisterAllocation: "
573 "first event for vreg is Modify");
575 vreg_lrs[k].dead_before = toShort(ii + 1);
576 break;
577 default:
578 vpanic("doRegisterAllocation(1)");
579 } /* switch */
581 } /* iterate over virtual registers */
583 /* ------ end of DEAL WITH VREG LIVE RANGES ------ */
585 /* ------ start of DEAL WITH RREG LIVE RANGES ------ */
587 /* If this doesn't hold, the following iteration over real registers
588 will fail miserably. */
589 vassert(N_RREGUNIVERSE_REGS == 64);
591 const ULong rRead = reg_usage_arr[ii].rRead;
592 const ULong rWritten = reg_usage_arr[ii].rWritten;
593 const ULong rMentioned = rRead | rWritten;
595 UInt rReg_minIndex;
596 UInt rReg_maxIndex;
597 if (rMentioned == 0) {
598 /* There are no real register uses in this insn. Set
599 rReg_{min,max}Index so that the following loop doesn't iterate
600 at all, so as to avoid wasting time. */
601 rReg_minIndex = 1;
602 rReg_maxIndex = 0;
603 } else {
604 rReg_minIndex = ULong__minIndex(rMentioned);
605 rReg_maxIndex = ULong__maxIndex(rMentioned);
606 /* Don't bother to look at registers which are not available
607 to the allocator. We asserted above that n_rregs > 0, so
608 n_rregs-1 is safe. */
609 if (rReg_maxIndex >= n_rregs)
610 rReg_maxIndex = n_rregs-1;
613 /* for each allocator-available real reg mentioned in the insn ... */
614 /* Note. We are allocating only over the real regs available to
615 the allocator. Others, eg the stack or baseblock pointers,
616 are unavailable to allocation and so we never visit them.
617 Hence the iteration is cut off at n_rregs-1, since n_rregs ==
618 univ->allocable. */
619 for (Int j = rReg_minIndex; j <= rReg_maxIndex; j++) {
621 const ULong jMask = 1ULL << j;
622 if (LIKELY((rMentioned & jMask) == 0))
623 continue;
625 const Bool isR = (rRead & jMask) != 0;
626 const Bool isW = (rWritten & jMask) != 0;
628 /* Dummy initialisations of flush_la and flush_db to avoid
629 possible bogus uninit-var warnings from gcc. */
630 Int flush_la = INVALID_INSTRNO, flush_db = INVALID_INSTRNO;
631 Bool flush = False;
633 if (isW && !isR) {
634 flush_la = rreg_live_after[j];
635 flush_db = rreg_dead_before[j];
636 if (flush_la != INVALID_INSTRNO && flush_db != INVALID_INSTRNO)
637 flush = True;
638 rreg_live_after[j] = ii;
639 rreg_dead_before[j] = ii+1;
640 } else if (!isW && isR) {
641 if (rreg_live_after[j] == INVALID_INSTRNO) {
642 vex_printf("\nOFFENDING RREG = ");
643 con->ppReg(con->univ->regs[j]);
644 vex_printf("\n");
645 vex_printf("\nOFFENDING instr = ");
646 con->ppInstr(instrs_in->arr[ii], con->mode64);
647 vex_printf("\n");
648 vpanic("doRegisterAllocation: "
649 "first event for rreg is Read");
651 rreg_dead_before[j] = ii+1;
652 } else {
653 vassert(isR && isW);
654 if (rreg_live_after[j] == INVALID_INSTRNO) {
655 vex_printf("\nOFFENDING RREG = ");
656 con->ppReg(con->univ->regs[j]);
657 vex_printf("\n");
658 vex_printf("\nOFFENDING instr = ");
659 con->ppInstr(instrs_in->arr[ii], con->mode64);
660 vex_printf("\n");
661 vpanic("doRegisterAllocation: "
662 "first event for rreg is Modify");
664 rreg_dead_before[j] = ii+1;
667 if (flush) {
668 vassert(flush_la != INVALID_INSTRNO);
669 vassert(flush_db != INVALID_INSTRNO);
670 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
671 if (0)
672 vex_printf("FLUSH 1 (%d,%d)\n", flush_la, flush_db);
673 rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j];
674 rreg_lrs_la[rreg_lrs_used].live_after = toShort(flush_la);
675 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(flush_db);
676 rreg_lrs_used++;
679 } /* iterate over rregs in the instr */
681 /* ------ end of DEAL WITH RREG LIVE RANGES ------ */
683 } /* iterate over insns */
685 /* ------ end of ITERATE OVER INSNS ------ */
687 /* ------ start of FINALISE RREG LIVE RANGES ------ */
689 /* Now finish up any live ranges left over. */
690 for (Int j = 0; j < n_rregs; j++) {
692 if (0) {
693 vex_printf("residual %d: %d %d\n", j, rreg_live_after[j],
694 rreg_dead_before[j]);
696 vassert( (rreg_live_after[j] == INVALID_INSTRNO
697 && rreg_dead_before[j] == INVALID_INSTRNO)
699 (rreg_live_after[j] != INVALID_INSTRNO
700 && rreg_dead_before[j] != INVALID_INSTRNO)
703 if (rreg_live_after[j] == INVALID_INSTRNO)
704 continue;
706 ensureRRLRspace(&rreg_lrs_la, &rreg_lrs_size, rreg_lrs_used);
707 if (0)
708 vex_printf("FLUSH 2 (%d,%d)\n",
709 rreg_live_after[j], rreg_dead_before[j]);
710 rreg_lrs_la[rreg_lrs_used].rreg = con->univ->regs[j];
711 rreg_lrs_la[rreg_lrs_used].live_after = toShort(rreg_live_after[j]);
712 rreg_lrs_la[rreg_lrs_used].dead_before = toShort(rreg_dead_before[j]);
713 rreg_lrs_used++;
716 /* Compute summary hints for choosing real regs. If a real reg is
717 involved in a hard live range, record that fact in the fixed
718 part of the running rreg_state. Later, when offered a choice between
719 rregs, it's better to choose one which is not marked as having
720 any HLRs, since ones with HLRs may need to be spilled around
721 their HLRs. Correctness of final assignment is unaffected by
722 this mechanism -- it is only an optimisation. */
724 for (Int j = 0; j < rreg_lrs_used; j++) {
725 HReg rreg = rreg_lrs_la[j].rreg;
726 vassert(!hregIsVirtual(rreg));
727 /* rreg is involved in a HLR. Record this info in the array, if
728 there is space. */
729 UInt ix = hregIndex(rreg);
730 vassert(ix < n_rregs);
731 rreg_state[ix].has_hlrs = True;
733 if (0) {
734 for (Int j = 0; j < n_rregs; j++) {
735 if (!rreg_state[j].has_hlrs)
736 continue;
737 con->ppReg(con->univ->regs[j]);
738 vex_printf(" hinted\n");
742 /* Finally, copy the _la variant into the _db variant and
743 sort both by their respective fields. */
744 rreg_lrs_db = LibVEX_Alloc_inline(rreg_lrs_used * sizeof(RRegLR));
745 for (Int j = 0; j < rreg_lrs_used; j++)
746 rreg_lrs_db[j] = rreg_lrs_la[j];
748 sortRRLRarray( rreg_lrs_la, rreg_lrs_used, True /* by .live_after*/ );
749 sortRRLRarray( rreg_lrs_db, rreg_lrs_used, False/* by .dead_before*/ );
751 /* And set up the cursors. */
752 rreg_lrs_la_next = 0;
753 rreg_lrs_db_next = 0;
755 for (Int j = 1; j < rreg_lrs_used; j++) {
756 vassert(rreg_lrs_la[j-1].live_after <= rreg_lrs_la[j].live_after);
757 vassert(rreg_lrs_db[j-1].dead_before <= rreg_lrs_db[j].dead_before);
760 /* ------ end of FINALISE RREG LIVE RANGES ------ */
762 if (DEBUG_REGALLOC) {
763 for (Int j = 0; j < n_vregs; j++) {
764 vex_printf("vreg %d: la = %d, db = %d\n",
765 j, vreg_lrs[j].live_after, vreg_lrs[j].dead_before );
769 if (DEBUG_REGALLOC) {
770 vex_printf("RRegLRs by LA:\n");
771 for (Int j = 0; j < rreg_lrs_used; j++) {
772 vex_printf(" ");
773 con->ppReg(rreg_lrs_la[j].rreg);
774 vex_printf(" la = %d, db = %d\n",
775 rreg_lrs_la[j].live_after, rreg_lrs_la[j].dead_before );
777 vex_printf("RRegLRs by DB:\n");
778 for (Int j = 0; j < rreg_lrs_used; j++) {
779 vex_printf(" ");
780 con->ppReg(rreg_lrs_db[j].rreg);
781 vex_printf(" la = %d, db = %d\n",
782 rreg_lrs_db[j].live_after, rreg_lrs_db[j].dead_before );
786 /* --------- Stage 3: allocate spill slots. --------- */
788 /* Each spill slot is 8 bytes long. For vregs which take more than
789 64 bits to spill (classes Flt64 and Vec128), we have to allocate
790 two consecutive spill slots. For 256 bit registers (class
791 Vec256), we have to allocate four consecutive spill slots.
793 For Vec128-class on PowerPC, the spill slot's actual address
794 must be 16-byte aligned. Since the spill slot's address is
795 computed as an offset from the guest state pointer, and since
796 the user of the generated code must set that pointer to a
797 32-aligned value, we have the residual obligation here of
798 choosing a 16-aligned spill slot offset for Vec128-class values.
799 Since each spill slot is 8 bytes long, that means for
800 Vec128-class values we must allocated a spill slot number which
801 is zero mod 2.
803 Similarly, for Vec256 class on amd64, find a spill slot number
804 which is zero mod 4. This guarantees it will be 32 byte
805 aligned, which isn't actually necessary on amd64 (we use movUpd
806 etc to spill), but seems like good practice.
808 Do a rank-based allocation of vregs to spill slot numbers. We
809 put as few values as possible in spill slots, but nevertheless
810 need to have a spill slot available for all vregs, just in case.
812 /* Int max_ss_no = -1; */
814 vex_bzero(ss_busy_until_before, sizeof(ss_busy_until_before));
816 for (Int j = 0; j < n_vregs; j++) {
818 /* True iff this vreg is unused. In which case we also expect
819 that the reg_class field for it has not been set. */
820 if (vreg_lrs[j].live_after == INVALID_INSTRNO) {
821 vassert(vreg_lrs[j].reg_class == HRcINVALID);
822 continue;
825 /* The spill slots are 64 bits in size. As per the comment on
826 definition of HRegClass in host_generic_regs.h, that means,
827 to spill a vreg of class Flt64 or Vec128, we'll need to find
828 two adjacent spill slots to use. For Vec256, we'll need to
829 find four adjacent slots to use. Note, this logic needs to
830 kept in sync with the size info on the definition of
831 HRegClass. */
832 Int ss_no = -1;
833 switch (vreg_lrs[j].reg_class) {
835 case HRcVec128: case HRcFlt64:
836 /* Find two adjacent free slots in which between them
837 provide up to 128 bits in which to spill the vreg.
838 Since we are trying to find an even:odd pair, move
839 along in steps of 2 (slots). */
840 for (ss_no = 0; ss_no < N_SPILL64S-1; ss_no += 2)
841 if (ss_busy_until_before[ss_no+0] <= vreg_lrs[j].live_after
842 && ss_busy_until_before[ss_no+1] <= vreg_lrs[j].live_after)
843 break;
844 if (ss_no >= N_SPILL64S-1) {
845 vpanic("LibVEX_N_SPILL_BYTES is too low. "
846 "Increase and recompile.");
848 ss_busy_until_before[ss_no+0] = vreg_lrs[j].dead_before;
849 ss_busy_until_before[ss_no+1] = vreg_lrs[j].dead_before;
850 break;
852 default:
853 /* The ordinary case -- just find a single spill slot. */
854 /* Find the lowest-numbered spill slot which is available
855 at the start point of this interval, and assign the
856 interval to it. */
857 for (ss_no = 0; ss_no < N_SPILL64S; ss_no++)
858 if (ss_busy_until_before[ss_no] <= vreg_lrs[j].live_after)
859 break;
860 if (ss_no == N_SPILL64S) {
861 vpanic("LibVEX_N_SPILL_BYTES is too low. "
862 "Increase and recompile.");
864 ss_busy_until_before[ss_no] = vreg_lrs[j].dead_before;
865 break;
867 } /* switch (vreg_lrs[j].reg_class) */
869 /* This reflects LibVEX's hard-wired knowledge of the baseBlock
870 layout: the guest state, then two equal sized areas following
871 it for two sets of shadow state, and then the spill area. */
872 vreg_lrs[j].spill_offset = toShort(con->guest_sizeB * 3 + ss_no * 8);
874 /* Independent check that we've made a sane choice of slot */
875 sanity_check_spill_offset( &vreg_lrs[j] );
876 /* if (j > max_ss_no) */
877 /* max_ss_no = j; */
880 if (0) {
881 vex_printf("\n\n");
882 for (Int j = 0; j < n_vregs; j++)
883 vex_printf("vreg %d --> spill offset %d\n",
884 j, vreg_lrs[j].spill_offset);
887 /* --------- Stage 4: establish rreg preferences --------- */
889 /* It may be advantageous to allocating certain vregs to specific
890 rregs, as a way of avoiding reg-reg moves later. Here we
891 establish which, if any, rreg each vreg would prefer to be in.
892 Note that this constrains the allocator -- ideally we end up
893 with as few as possible vregs expressing a preference.
895 This is an optimisation: if the .preferred_rreg field is never
896 set to anything different from INVALID_HREG, the allocator still
897 works. */
899 /* 30 Dec 04: removed this mechanism as it does not seem to
900 help. */
902 /* --------- Stage 5: process instructions --------- */
904 /* This is the main loop of the allocator. First, we need to
905 correctly set up our running state, which tracks the status of
906 each real register. */
908 /* ------ BEGIN: Process each insn in turn. ------ */
910 for (Int ii = 0; ii < instrs_in->arr_used; ii++) {
912 if (DEBUG_REGALLOC) {
913 vex_printf("\n====----====---- Insn %d ----====----====\n", ii);
914 vex_printf("---- ");
915 con->ppInstr(instrs_in->arr[ii], con->mode64);
916 vex_printf("\n\nInitial state:\n");
917 PRINT_STATE;
918 vex_printf("\n");
921 /* ------------ Sanity checks ------------ */
923 /* Sanity checks are expensive. So they are done only once
924 every 17 instructions, and just before the last
925 instruction. */
926 do_sanity_check
927 = toBool(
928 False /* Set to True for sanity checking of all insns. */
929 || ii == instrs_in->arr_used-1
930 || (ii > 0 && (ii % 17) == 0)
933 if (do_sanity_check) {
935 /* Sanity check 1: all rregs with a hard live range crossing
936 this insn must be marked as unavailable in the running
937 state. */
938 for (Int j = 0; j < rreg_lrs_used; j++) {
939 if (rreg_lrs_la[j].live_after < ii
940 && ii < rreg_lrs_la[j].dead_before) {
941 /* ii is the middle of a hard live range for some real
942 reg. Check it's marked as such in the running
943 state. */
944 HReg reg = rreg_lrs_la[j].rreg;
946 if (0) {
947 vex_printf("considering la %d .. db %d reg = ",
948 rreg_lrs_la[j].live_after,
949 rreg_lrs_la[j].dead_before);
950 con->ppReg(reg);
951 vex_printf("\n");
954 /* assert that this rreg is marked as unavailable */
955 vassert(!hregIsVirtual(reg));
956 vassert(rreg_state[hregIndex(reg)].disp == Unavail);
960 /* Sanity check 2: conversely, all rregs marked as
961 unavailable in the running rreg_state must have a
962 corresponding hard live range entry in the rreg_lrs
963 array. */
964 for (Int j = 0; j < n_rregs; j++) {
965 vassert(rreg_state[j].disp == Bound
966 || rreg_state[j].disp == Free
967 || rreg_state[j].disp == Unavail);
968 if (rreg_state[j].disp != Unavail)
969 continue;
970 Int k;
971 for (k = 0; k < rreg_lrs_used; k++) {
972 HReg reg = rreg_lrs_la[k].rreg;
973 vassert(!hregIsVirtual(reg));
974 if (hregIndex(reg) == j
975 && rreg_lrs_la[k].live_after < ii
976 && ii < rreg_lrs_la[k].dead_before)
977 break;
979 /* If this vassertion fails, we couldn't find a
980 corresponding HLR. */
981 vassert(k < rreg_lrs_used);
984 /* Sanity check 3: all vreg-rreg bindings must bind registers
985 of the same class. */
986 for (Int j = 0; j < n_rregs; j++) {
987 if (rreg_state[j].disp != Bound) {
988 vassert(rreg_state[j].eq_spill_slot == False);
989 continue;
991 vassert(hregClass(con->univ->regs[j])
992 == hregClass(rreg_state[j].vreg));
993 vassert( hregIsVirtual(rreg_state[j].vreg));
996 /* Sanity check 4: the vreg_state and rreg_state
997 mutually-redundant mappings are consistent. If
998 rreg_state[j].vreg points at some vreg_state entry then
999 that vreg_state entry should point back at
1000 rreg_state[j]. */
1001 for (Int j = 0; j < n_rregs; j++) {
1002 if (rreg_state[j].disp != Bound)
1003 continue;
1004 Int k = hregIndex(rreg_state[j].vreg);
1005 vassert(IS_VALID_VREGNO(k));
1006 vassert(vreg_state[k] == j);
1008 for (Int j = 0; j < n_vregs; j++) {
1009 Int k = vreg_state[j];
1010 if (k == INVALID_RREG_NO)
1011 continue;
1012 vassert(IS_VALID_RREGNO(k));
1013 vassert(rreg_state[k].disp == Bound);
1014 vassert(hregIndex(rreg_state[k].vreg) == j);
1017 } /* if (do_sanity_check) */
1019 /* ------------ end of Sanity checks ------------ */
1021 /* Do various optimisations pertaining to register coalescing
1022 and preferencing:
1023 MOV v <-> v coalescing (done here).
1024 MOV v <-> r coalescing (not yet, if ever)
1026 /* If doing a reg-reg move between two vregs, and the src's live
1027 range ends here and the dst's live range starts here, bind
1028 the dst to the src's rreg, and that's all. */
1029 if (reg_usage_arr[ii].isVregVregMove) {
1030 HReg vregS = reg_usage_arr[ii].regMoveSrc;
1031 HReg vregD = reg_usage_arr[ii].regMoveDst;
1032 /* Check that |isVregVregMove| is not telling us a bunch of lies ... */
1033 vassert(hregClass(vregS) == hregClass(vregD));
1034 Int k = hregIndex(vregS);
1035 Int m = hregIndex(vregD);
1036 vassert(IS_VALID_VREGNO(k));
1037 vassert(IS_VALID_VREGNO(m));
1038 if (vreg_lrs[k].dead_before != ii + 1) goto cannot_coalesce;
1039 if (vreg_lrs[m].live_after != ii) goto cannot_coalesce;
1040 if (DEBUG_REGALLOC) {
1041 vex_printf("COALESCE ");
1042 con->ppReg(vregS);
1043 vex_printf(" -> ");
1044 con->ppReg(vregD);
1045 vex_printf("\n\n");
1047 /* Find the state entry for vregS. */
1048 Int n = vreg_state[k]; /* k is the index of vregS */
1049 if (n == INVALID_RREG_NO) {
1050 /* vregS is not currently in a real register. So we can't
1051 do the coalescing. Give up. */
1052 goto cannot_coalesce;
1054 vassert(IS_VALID_RREGNO(n));
1056 /* Finally, we can do the coalescing. It's trivial -- merely
1057 claim vregS's register for vregD. */
1058 rreg_state[n].vreg = vregD;
1059 vassert(IS_VALID_UNSIGNED_VREGNO(hregIndex(vregD)));
1060 vassert(IS_VALID_UNSIGNED_VREGNO(hregIndex(vregS)));
1061 vreg_state[hregIndex(vregD)] = toShort(n);
1062 vreg_state[hregIndex(vregS)] = INVALID_RREG_NO;
1064 /* This rreg has become associated with a different vreg and
1065 hence with a different spill slot. Play safe. */
1066 rreg_state[n].eq_spill_slot = False;
1068 /* Move on to the next insn. We skip the post-insn stuff for
1069 fixed registers, since this move should not interact with
1070 them in any way. */
1071 continue;
1073 cannot_coalesce:
1075 /* ------ Free up rregs bound to dead vregs ------ */
1077 /* Look for vregs whose live range has just ended, and
1078 mark the associated rreg as free. */
1080 for (Int j = 0; j < n_rregs; j++) {
1081 if (rreg_state[j].disp != Bound)
1082 continue;
1083 UInt vregno = hregIndex(rreg_state[j].vreg);
1084 vassert(IS_VALID_UNSIGNED_VREGNO(vregno));
1085 if (vreg_lrs[vregno].dead_before <= ii) {
1086 rreg_state[j].disp = Free;
1087 rreg_state[j].eq_spill_slot = False;
1088 Int m = hregIndex(rreg_state[j].vreg);
1089 vassert(IS_VALID_VREGNO(m));
1090 vreg_state[m] = INVALID_RREG_NO;
1091 if (DEBUG_REGALLOC) {
1092 vex_printf("free up ");
1093 con->ppReg(con->univ->regs[j]);
1094 vex_printf("\n");
1099 /* ------ Pre-instruction actions for fixed rreg uses ------ */
1101 /* Now we have to deal with rregs which are about to be made
1102 live by this instruction -- in other words, are entering into
1103 one of their live ranges. If any such rreg holds a vreg, we
1104 will have to free up the rreg. The simplest solution which
1105 is correct is to spill the rreg.
1107 Note we could do better:
1108 * Could move it into some other free rreg, if one is available
1110 Do this efficiently, by incrementally stepping along an array
1111 of rreg HLRs that are known to be sorted by start point
1112 (their .live_after field).
1114 while (True) {
1115 vassert(rreg_lrs_la_next >= 0);
1116 vassert(rreg_lrs_la_next <= rreg_lrs_used);
1117 if (rreg_lrs_la_next == rreg_lrs_used)
1118 break; /* no more real reg live ranges to consider */
1119 if (ii < rreg_lrs_la[rreg_lrs_la_next].live_after)
1120 break; /* next live range does not yet start */
1121 vassert(ii == rreg_lrs_la[rreg_lrs_la_next].live_after);
1122 /* rreg_lrs_la[rreg_lrs_la_next].rreg needs to be freed up.
1123 Find the associated rreg_state entry. */
1124 /* Note, re ii == rreg_lrs_la[rreg_lrs_la_next].live_after.
1125 Real register live ranges are guaranteed to be well-formed
1126 in that they start with a write to the register -- Stage 2
1127 rejects any code not satisfying this. So the correct
1128 question to ask is whether
1129 rreg_lrs_la[rreg_lrs_la_next].live_after == ii, that is,
1130 whether the reg becomes live after this insn -- rather
1131 than before it. */
1132 if (DEBUG_REGALLOC) {
1133 vex_printf("need to free up rreg: ");
1134 con->ppReg(rreg_lrs_la[rreg_lrs_la_next].rreg);
1135 vex_printf("\n\n");
1137 Int k = hregIndex(rreg_lrs_la[rreg_lrs_la_next].rreg);
1139 /* If this fails, we don't have an entry for this rreg.
1140 Which we should. */
1141 vassert(IS_VALID_RREGNO(k));
1142 Int m = hregIndex(rreg_state[k].vreg);
1143 if (rreg_state[k].disp == Bound) {
1144 /* Yes, there is an associated vreg. Spill it if it's
1145 still live. */
1146 vassert(IS_VALID_VREGNO(m));
1147 vreg_state[m] = INVALID_RREG_NO;
1148 if (vreg_lrs[m].dead_before > ii) {
1149 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1150 if ((!eq_spill_opt) || !rreg_state[k].eq_spill_slot) {
1151 HInstr* spill1 = NULL;
1152 HInstr* spill2 = NULL;
1153 con->genSpill(&spill1, &spill2, con->univ->regs[k],
1154 vreg_lrs[m].spill_offset, con->mode64);
1155 vassert(spill1 || spill2); /* can't both be NULL */
1156 if (spill1)
1157 EMIT_INSTR(spill1);
1158 if (spill2)
1159 EMIT_INSTR(spill2);
1161 rreg_state[k].eq_spill_slot = True;
1164 rreg_state[k].disp = Unavail;
1165 rreg_state[k].vreg = INVALID_HREG;
1166 rreg_state[k].eq_spill_slot = False;
1168 /* check for further rregs entering HLRs at this point */
1169 rreg_lrs_la_next++;
1172 if (DEBUG_REGALLOC) {
1173 vex_printf("After pre-insn actions for fixed regs:\n");
1174 PRINT_STATE;
1175 vex_printf("\n");
1178 /* ------ Deal with the current instruction. ------ */
1180 /* Finally we can begin the processing of this instruction
1181 itself. The aim is to free up enough rregs for this insn.
1182 This may generate spill stores since we may have to evict
1183 some vregs currently in rregs. Also generates spill loads.
1184 We also build up the final vreg->rreg mapping to be applied
1185 to the insn. */
1187 initHRegRemap(&remap);
1189 /* ------------ BEGIN directReload optimisation ----------- */
1191 /* If the instruction reads exactly one vreg which is currently
1192 in a spill slot, and this is last use of that vreg, see if we
1193 can convert the instruction into one that reads directly from
1194 the spill slot. This is clearly only possible for x86 and
1195 amd64 targets, since ppc and arm are load-store
1196 architectures. If successful, replace instrs_in->arr[ii]
1197 with this new instruction, and recompute its reg usage, so
1198 that the change is invisible to the standard-case handling
1199 that follows. */
1201 if (con->directReload != NULL && reg_usage_arr[ii].n_vRegs <= 2) {
1202 Bool debug_direct_reload = False;
1203 HReg cand = INVALID_HREG;
1204 Bool nreads = 0;
1205 Short spilloff = 0;
1207 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1209 HReg vreg = reg_usage_arr[ii].vRegs[j];
1210 vassert(hregIsVirtual(vreg));
1212 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1213 nreads++;
1214 Int m = hregIndex(vreg);
1215 vassert(IS_VALID_VREGNO(m));
1216 Int k = vreg_state[m];
1217 if (!IS_VALID_RREGNO(k)) {
1218 /* ok, it is spilled. Now, is this its last use? */
1219 vassert(vreg_lrs[m].dead_before >= ii+1);
1220 if (vreg_lrs[m].dead_before == ii+1
1221 && hregIsInvalid(cand)) {
1222 spilloff = vreg_lrs[m].spill_offset;
1223 cand = vreg;
1229 if (nreads == 1 && ! hregIsInvalid(cand)) {
1230 HInstr* reloaded;
1231 if (reg_usage_arr[ii].n_vRegs == 2)
1232 vassert(! sameHReg(reg_usage_arr[ii].vRegs[0],
1233 reg_usage_arr[ii].vRegs[1]));
1235 reloaded = con->directReload(instrs_in->arr[ii], cand, spilloff);
1236 if (debug_direct_reload && !reloaded) {
1237 vex_printf("[%3d] ", spilloff); ppHReg(cand); vex_printf(" ");
1238 con->ppInstr(instrs_in->arr[ii], con->mode64);
1240 if (reloaded) {
1241 /* Update info about the insn, so it looks as if it had
1242 been in this form all along. */
1243 instrs_in->arr[ii] = reloaded;
1244 con->getRegUsage(&reg_usage_arr[ii], instrs_in->arr[ii],
1245 con->mode64);
1246 if (debug_direct_reload && !reloaded) {
1247 vex_printf(" --> ");
1248 con->ppInstr(reloaded, con->mode64);
1252 if (debug_direct_reload && !reloaded)
1253 vex_printf("\n");
1258 /* ------------ END directReload optimisation ------------ */
1260 /* for each virtual reg mentioned in the insn ... */
1261 for (Int j = 0; j < reg_usage_arr[ii].n_vRegs; j++) {
1263 HReg vreg = reg_usage_arr[ii].vRegs[j];
1264 vassert(hregIsVirtual(vreg));
1266 if (0) {
1267 vex_printf("considering "); con->ppReg(vreg); vex_printf("\n");
1270 /* Now we're trying to find a rreg for "vreg". First of all,
1271 if it already has an rreg assigned, we don't need to do
1272 anything more. Inspect the current state to find out. */
1273 Int m = hregIndex(vreg);
1274 vassert(IS_VALID_VREGNO(m));
1275 Int n = vreg_state[m];
1276 if (IS_VALID_RREGNO(n)) {
1277 vassert(rreg_state[n].disp == Bound);
1278 addToHRegRemap(&remap, vreg, con->univ->regs[n]);
1279 /* If this rreg is written or modified, mark it as different
1280 from any spill slot value. */
1281 if (reg_usage_arr[ii].vMode[j] != HRmRead)
1282 rreg_state[n].eq_spill_slot = False;
1283 continue;
1284 } else {
1285 vassert(n == INVALID_RREG_NO);
1288 /* No luck. The next thing to do is see if there is a
1289 currently free rreg available, of the correct class. If
1290 so, bag it. NOTE, we could improve this by selecting an
1291 rreg for which the next live-range event is as far ahead
1292 as possible. */
1293 Int k_suboptimal = -1;
1294 Int k;
1295 for (k = con->univ->allocable_start[hregClass(vreg)];
1296 k <= con->univ->allocable_end[hregClass(vreg)]; k++) {
1297 if (rreg_state[k].disp != Free)
1298 continue;
1299 if (rreg_state[k].has_hlrs) {
1300 /* Well, at least we can use k_suboptimal if we really
1301 have to. Keep on looking for a better candidate. */
1302 k_suboptimal = k;
1303 } else {
1304 /* Found a preferable reg. Use it. */
1305 k_suboptimal = -1;
1306 break;
1309 if (k_suboptimal >= 0)
1310 k = k_suboptimal;
1312 if (k <= con->univ->allocable_end[hregClass(vreg)]) {
1313 rreg_state[k].disp = Bound;
1314 rreg_state[k].vreg = vreg;
1315 Int p = hregIndex(vreg);
1316 vassert(IS_VALID_VREGNO(p));
1317 vreg_state[p] = toShort(k);
1318 addToHRegRemap(&remap, vreg, con->univ->regs[k]);
1319 /* Generate a reload if needed. This only creates needed
1320 reloads because the live range builder for vregs will
1321 guarantee that the first event for a vreg is a write.
1322 Hence, if this reference is not a write, it cannot be
1323 the first reference for this vreg, and so a reload is
1324 indeed needed. */
1325 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1326 vassert(vreg_lrs[p].reg_class != HRcINVALID);
1327 HInstr* reload1 = NULL;
1328 HInstr* reload2 = NULL;
1329 con->genReload(&reload1, &reload2, con->univ->regs[k],
1330 vreg_lrs[p].spill_offset, con->mode64);
1331 vassert(reload1 || reload2); /* can't both be NULL */
1332 if (reload1)
1333 EMIT_INSTR(reload1);
1334 if (reload2)
1335 EMIT_INSTR(reload2);
1336 /* This rreg is read or modified by the instruction.
1337 If it's merely read we can claim it now equals the
1338 spill slot, but not so if it is modified. */
1339 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1340 rreg_state[k].eq_spill_slot = True;
1341 } else {
1342 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1343 rreg_state[k].eq_spill_slot = False;
1345 } else {
1346 rreg_state[k].eq_spill_slot = False;
1349 continue;
1352 /* Well, now we have no option but to spill a vreg. It's
1353 important to make a good choice of vreg to spill, and of
1354 course we need to be careful not to spill a vreg which is
1355 needed by this insn. */
1357 /* First, mark in the rreg_state, those rregs which are not spill
1358 candidates, due to holding a vreg mentioned by this
1359 instruction. Or being of the wrong class. */
1360 for (k = con->univ->allocable_start[hregClass(vreg)];
1361 k <= con->univ->allocable_end[hregClass(vreg)]; k++) {
1362 rreg_state[k].is_spill_cand = False;
1363 if (rreg_state[k].disp != Bound)
1364 continue;
1365 rreg_state[k].is_spill_cand = True;
1366 /* Note, the following loop visits only the virtual regs
1367 mentioned by the instruction. */
1368 for (m = 0; m < reg_usage_arr[ii].n_vRegs; m++) {
1369 if (sameHReg(rreg_state[k].vreg, reg_usage_arr[ii].vRegs[m])) {
1370 rreg_state[k].is_spill_cand = False;
1371 break;
1376 /* We can choose to spill any rreg satisfying
1377 rreg_state[r].is_spill_cand (so to speak). Choose r so that
1378 the next use of its associated vreg is as far ahead as
1379 possible, in the hope that this will minimise the number
1380 of consequent reloads required. */
1381 Int spillee
1382 = findMostDistantlyMentionedVReg (
1383 reg_usage_arr, ii+1, instrs_in->arr_used, rreg_state,
1384 hregClass(vreg), con);
1386 if (spillee == -1) {
1387 /* Hmmmmm. There don't appear to be any spill candidates.
1388 We're hosed. */
1389 vex_printf("reg_alloc: can't find a register in class: ");
1390 ppHRegClass(hregClass(vreg));
1391 vex_printf("\n");
1392 vpanic("reg_alloc: can't create a free register.");
1395 /* Right. So we're going to spill rreg_state[spillee]. */
1396 vassert(IS_VALID_RREGNO(spillee));
1397 vassert(rreg_state[spillee].disp == Bound);
1398 /* check it's the right class */
1399 vassert(hregClass(con->univ->regs[spillee]) == hregClass(vreg));
1400 /* check we're not ejecting the vreg for which we are trying
1401 to free up a register. */
1402 vassert(! sameHReg(rreg_state[spillee].vreg, vreg));
1404 m = hregIndex(rreg_state[spillee].vreg);
1405 vassert(IS_VALID_VREGNO(m));
1407 /* So here's the spill store. Assert that we're spilling a
1408 live vreg. */
1409 vassert(vreg_lrs[m].dead_before > ii);
1410 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1411 if ((!eq_spill_opt) || !rreg_state[spillee].eq_spill_slot) {
1412 HInstr* spill1 = NULL;
1413 HInstr* spill2 = NULL;
1414 con->genSpill(&spill1, &spill2, con->univ->regs[spillee],
1415 vreg_lrs[m].spill_offset, con->mode64);
1416 vassert(spill1 || spill2); /* can't both be NULL */
1417 if (spill1)
1418 EMIT_INSTR(spill1);
1419 if (spill2)
1420 EMIT_INSTR(spill2);
1423 /* Update the rreg_state to reflect the new assignment for this
1424 rreg. */
1425 rreg_state[spillee].vreg = vreg;
1426 vreg_state[m] = INVALID_RREG_NO;
1428 rreg_state[spillee].eq_spill_slot = False; /* be safe */
1430 m = hregIndex(vreg);
1431 vassert(IS_VALID_VREGNO(m));
1432 vreg_state[m] = toShort(spillee);
1434 /* Now, if this vreg is being read or modified (as opposed to
1435 written), we have to generate a reload for it. */
1436 if (reg_usage_arr[ii].vMode[j] != HRmWrite) {
1437 vassert(vreg_lrs[m].reg_class != HRcINVALID);
1438 HInstr* reload1 = NULL;
1439 HInstr* reload2 = NULL;
1440 con->genReload(&reload1, &reload2, con->univ->regs[spillee],
1441 vreg_lrs[m].spill_offset, con->mode64);
1442 vassert(reload1 || reload2); /* can't both be NULL */
1443 if (reload1)
1444 EMIT_INSTR(reload1);
1445 if (reload2)
1446 EMIT_INSTR(reload2);
1447 /* This rreg is read or modified by the instruction.
1448 If it's merely read we can claim it now equals the
1449 spill slot, but not so if it is modified. */
1450 if (reg_usage_arr[ii].vMode[j] == HRmRead) {
1451 rreg_state[spillee].eq_spill_slot = True;
1452 } else {
1453 vassert(reg_usage_arr[ii].vMode[j] == HRmModify);
1454 rreg_state[spillee].eq_spill_slot = False;
1458 /* So after much twisting and turning, we have vreg mapped to
1459 rreg_state[spillee].rreg. Note that in the map. */
1460 addToHRegRemap(&remap, vreg, con->univ->regs[spillee]);
1462 } /* iterate over virtual registers in this instruction. */
1464 /* We've finished clowning around with registers in this instruction.
1465 Three results:
1466 - the running rreg_state[] has been updated
1467 - a suitable vreg->rreg mapping for this instruction has been
1468 constructed
1469 - spill and reload instructions may have been emitted.
1471 The final step is to apply the mapping to the instruction,
1472 and emit that.
1475 /* NOTE, DESTRUCTIVELY MODIFIES instrs_in->arr[ii]. */
1476 con->mapRegs(&remap, instrs_in->arr[ii], con->mode64);
1477 EMIT_INSTR( instrs_in->arr[ii] );
1479 if (DEBUG_REGALLOC) {
1480 vex_printf("After dealing with current insn:\n");
1481 PRINT_STATE;
1482 vex_printf("\n");
1485 /* ------ Post-instruction actions for fixed rreg uses ------ */
1487 /* Now we need to check for rregs exiting fixed live ranges
1488 after this instruction, and if so mark them as free. */
1489 while (True) {
1490 vassert(rreg_lrs_db_next >= 0);
1491 vassert(rreg_lrs_db_next <= rreg_lrs_used);
1492 if (rreg_lrs_db_next == rreg_lrs_used)
1493 break; /* no more real reg live ranges to consider */
1494 if (ii+1 < rreg_lrs_db[rreg_lrs_db_next].dead_before)
1495 break; /* next live range does not yet start */
1496 vassert(ii+1 == rreg_lrs_db[rreg_lrs_db_next].dead_before);
1497 /* rreg_lrs_db[[rreg_lrs_db_next].rreg is exiting a hard live
1498 range. Mark it as such in the main rreg_state array. */
1499 HReg reg = rreg_lrs_db[rreg_lrs_db_next].rreg;
1500 vassert(!hregIsVirtual(reg));
1501 Int k = hregIndex(reg);
1502 vassert(IS_VALID_RREGNO(k));
1503 vassert(rreg_state[k].disp == Unavail);
1504 rreg_state[k].disp = Free;
1505 rreg_state[k].vreg = INVALID_HREG;
1506 rreg_state[k].eq_spill_slot = False;
1508 /* check for further rregs leaving HLRs at this point */
1509 rreg_lrs_db_next++;
1512 if (DEBUG_REGALLOC) {
1513 vex_printf("After post-insn actions for fixed regs:\n");
1514 PRINT_STATE;
1515 vex_printf("\n");
1518 } /* iterate over insns */
1520 /* ------ END: Process each insn in turn. ------ */
1522 /* free(rreg_state); */
1523 /* free(rreg_lrs); */
1524 /* if (vreg_lrs) free(vreg_lrs); */
1526 /* Paranoia */
1527 vassert(rreg_lrs_la_next == rreg_lrs_used);
1528 vassert(rreg_lrs_db_next == rreg_lrs_used);
1530 return instrs_out;
1532 # undef INVALID_INSTRNO
1533 # undef EMIT_INSTR
1534 # undef PRINT_STATE
1539 /*---------------------------------------------------------------*/
1540 /*--- host_reg_alloc2.c ---*/
1541 /*---------------------------------------------------------------*/