revert 213 commits (to 56092) from the last month. 10 still need work to resolve...
[AROS.git] / workbench / libs / mesa / src / gallium / drivers / nvc0 / nvc0_pc_regalloc.c
blob15bebb2134abf9c3b49f5c0c71394652204771d3
1 /*
2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
23 #if NV50_DEBUG & NV50_DEBUG_PROG_RA
24 # define NVC0_RA_DEBUG_LIVEI
25 # define NVC0_RA_DEBUG_LIVE_SETS
26 # define NVC0_RA_DEBUG_JOIN
27 #endif
29 #include "nvc0_pc.h"
30 #include "util/u_simple_list.h"
32 #define NVC0_NUM_REGISTER_FILES 3
34 /* @unit_shift: log2 of min allocation unit for register */
35 struct register_set {
36 uint32_t bits[NVC0_NUM_REGISTER_FILES][2];
37 uint32_t last[NVC0_NUM_REGISTER_FILES];
38 int log2_unit[NVC0_NUM_REGISTER_FILES];
39 struct nv_pc *pc;
42 /* aliasing is allowed */
43 static void
44 intersect_register_sets(struct register_set *dst,
45 struct register_set *src1, struct register_set *src2)
47 int i;
49 for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) {
50 dst->bits[i][0] = src1->bits[i][0] | src2->bits[i][0];
51 dst->bits[i][1] = src1->bits[i][1] | src2->bits[i][1];
55 static void
56 mask_register_set(struct register_set *set, uint32_t mask, uint32_t umask)
58 int i;
60 for (i = 0; i < NVC0_NUM_REGISTER_FILES; ++i) {
61 set->bits[i][0] = (set->bits[i][0] | mask) & umask;
62 set->bits[i][1] = (set->bits[i][1] | mask) & umask;
66 struct nv_pc_pass {
67 struct nv_pc *pc;
68 struct nv_instruction **insns;
69 uint num_insns;
70 uint pass_seq;
73 static void
74 ranges_coalesce(struct nv_range *range)
76 while (range->next && range->end >= range->next->bgn) {
77 struct nv_range *rnn = range->next->next;
78 assert(range->bgn <= range->next->bgn);
79 range->end = MAX2(range->end, range->next->end);
80 FREE(range->next);
81 range->next = rnn;
85 static boolean
86 add_range_ex(struct nv_value *val, int bgn, int end, struct nv_range *new_range)
88 struct nv_range *range, **nextp = &val->livei;
90 if (bgn == end) /* [a, a) is invalid / empty */
91 return TRUE;
93 for (range = val->livei; range; range = range->next) {
94 if (end < range->bgn)
95 break; /* insert before */
97 if (bgn > range->end) {
98 nextp = &range->next;
99 continue; /* insert after */
102 /* overlap */
103 if (bgn < range->bgn) {
104 range->bgn = bgn;
105 if (end > range->end)
106 range->end = end;
107 ranges_coalesce(range);
108 return TRUE;
110 if (end > range->end) {
111 range->end = end;
112 ranges_coalesce(range);
113 return TRUE;
115 assert(bgn >= range->bgn);
116 assert(end <= range->end);
117 return TRUE;
120 if (!new_range)
121 new_range = CALLOC_STRUCT(nv_range);
123 new_range->bgn = bgn;
124 new_range->end = end;
125 new_range->next = range;
126 *(nextp) = new_range;
127 return FALSE;
130 static void
131 add_range(struct nv_value *val, struct nv_basic_block *b, int end)
133 int bgn;
135 if (!val->insn) /* ignore non-def values */
136 return;
137 assert(b->entry->serial <= b->exit->serial);
138 assert(b->phi->serial <= end);
139 assert(b->exit->serial + 1 >= end);
141 bgn = val->insn->serial;
142 if (bgn < b->entry->serial || bgn > b->exit->serial)
143 bgn = b->entry->serial;
145 assert(bgn <= end);
147 add_range_ex(val, bgn, end, NULL);
150 #if defined(NVC0_RA_DEBUG_JOIN) || defined(NVC0_RA_DEBUG_LIVEI)
151 static void
152 livei_print(struct nv_value *a)
154 struct nv_range *r = a->livei;
156 debug_printf("livei %i: ", a->n);
157 while (r) {
158 debug_printf("[%i, %i) ", r->bgn, r->end);
159 r = r->next;
161 debug_printf("\n");
163 #endif
165 static void
166 livei_unify(struct nv_value *dst, struct nv_value *src)
168 struct nv_range *range, *next;
170 for (range = src->livei; range; range = next) {
171 next = range->next;
172 if (add_range_ex(dst, range->bgn, range->end, range))
173 FREE(range);
175 src->livei = NULL;
178 static void
179 livei_release(struct nv_value *val)
181 struct nv_range *range, *next;
183 for (range = val->livei; range; range = next) {
184 next = range->next;
185 FREE(range);
189 static boolean
190 livei_have_overlap(struct nv_value *a, struct nv_value *b)
192 struct nv_range *r_a, *r_b;
194 for (r_a = a->livei; r_a; r_a = r_a->next) {
195 for (r_b = b->livei; r_b; r_b = r_b->next) {
196 if (r_b->bgn < r_a->end &&
197 r_b->end > r_a->bgn)
198 return TRUE;
201 return FALSE;
204 static int
205 livei_end(struct nv_value *a)
207 struct nv_range *r = a->livei;
209 assert(r);
210 while (r->next)
211 r = r->next;
212 return r->end;
215 static boolean
216 livei_contains(struct nv_value *a, int pos)
218 struct nv_range *r;
220 for (r = a->livei; r && r->bgn <= pos; r = r->next)
221 if (r->end > pos)
222 return TRUE;
223 return FALSE;
226 static boolean
227 reg_assign(struct register_set *set, struct nv_value **def, int n)
229 int i, id, s, k;
230 uint32_t m;
231 int f = def[0]->reg.file;
233 k = n;
234 if (k == 3)
235 k = 4;
236 s = (k * def[0]->reg.size) >> set->log2_unit[f];
237 m = (1 << s) - 1;
239 id = set->last[f];
241 for (i = 0; i * 32 < set->last[f]; ++i) {
242 if (set->bits[f][i] == 0xffffffff)
243 continue;
245 for (id = 0; id < 32; id += s)
246 if (!(set->bits[f][i] & (m << id)))
247 break;
248 if (id < 32)
249 break;
251 if (i * 32 + id > set->last[f])
252 return FALSE;
254 set->bits[f][i] |= m << id;
256 id += i * 32;
258 set->pc->max_reg[f] = MAX2(set->pc->max_reg[f], id + s - 1);
260 for (i = 0; i < n; ++i)
261 if (def[i]->livei)
262 def[i]->reg.id = id++;
264 return TRUE;
267 static INLINE void
268 reg_occupy(struct register_set *set, struct nv_value *val)
270 int id = val->reg.id, f = val->reg.file;
271 uint32_t m;
273 if (id < 0)
274 return;
275 m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
277 set->bits[f][id / 32] |= m << (id % 32);
279 if (set->pc->max_reg[f] < id)
280 set->pc->max_reg[f] = id;
283 static INLINE void
284 reg_release(struct register_set *set, struct nv_value *val)
286 int id = val->reg.id, f = val->reg.file;
287 uint32_t m;
289 if (id < 0)
290 return;
291 m = (1 << (val->reg.size >> set->log2_unit[f])) - 1;
293 set->bits[f][id / 32] &= ~(m << (id % 32));
296 static INLINE boolean
297 join_allowed(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
299 int i;
300 struct nv_value *val;
302 if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
303 return FALSE;
305 if (a->join->reg.id == b->join->reg.id)
306 return TRUE;
308 /* either a or b or both have been assigned */
310 if (a->join->reg.id >= 0 && b->join->reg.id >= 0)
311 return FALSE;
312 else
313 if (b->join->reg.id >= 0) {
314 if (b->join->reg.id == 63)
315 return FALSE;
316 val = a;
317 a = b;
318 b = val;
319 } else
320 if (a->join->reg.id == 63)
321 return FALSE;
323 for (i = 0; i < ctx->pc->num_values; ++i) {
324 val = &ctx->pc->values[i];
326 if (val->join->reg.id != a->join->reg.id)
327 continue;
328 if (val->join != a->join && livei_have_overlap(val->join, b->join))
329 return FALSE;
331 return TRUE;
334 static INLINE void
335 do_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
337 int j;
338 struct nv_value *bjoin = b->join;
340 if (b->join->reg.id >= 0)
341 a->join->reg.id = b->join->reg.id;
343 livei_unify(a->join, b->join);
345 #ifdef NVC0_RA_DEBUG_JOIN
346 debug_printf("joining %i to %i\n", b->n, a->n);
347 #endif
349 /* make a->join the new representative */
350 for (j = 0; j < ctx->pc->num_values; ++j)
351 if (ctx->pc->values[j].join == bjoin)
352 ctx->pc->values[j].join = a->join;
354 assert(b->join == a->join);
357 static INLINE boolean
358 try_join_values(struct nv_pc_pass *ctx, struct nv_value *a, struct nv_value *b)
360 if (!join_allowed(ctx, a, b)) {
361 #ifdef NVC0_RA_DEBUG_JOIN
362 debug_printf("cannot join %i to %i: not allowed\n", b->n, a->n);
363 #endif
364 return FALSE;
366 if (livei_have_overlap(a->join, b->join)) {
367 #ifdef NVC0_RA_DEBUG_JOIN
368 debug_printf("cannot join %i to %i: livei overlap\n", b->n, a->n);
369 livei_print(a);
370 livei_print(b);
371 #endif
372 return FALSE;
375 do_join_values(ctx, a, b);
377 return TRUE;
380 static void
381 join_values_nofail(struct nv_pc_pass *ctx,
382 struct nv_value *a, struct nv_value *b, boolean type_only)
384 if (type_only) {
385 assert(join_allowed(ctx, a, b));
386 do_join_values(ctx, a, b);
387 } else {
388 boolean ok = try_join_values(ctx, a, b);
389 if (!ok) {
390 NOUVEAU_ERR("failed to coalesce values\n");
395 static INLINE boolean
396 need_new_else_block(struct nv_basic_block *b, struct nv_basic_block *p)
398 int i = 0, n = 0;
400 for (; i < 2; ++i)
401 if (p->out[i] && !IS_LOOP_EDGE(p->out_kind[i]))
402 ++n;
404 return (b->num_in > 1) && (n == 2);
407 /* Look for the @phi's operand whose definition reaches @b. */
408 static int
409 phi_opnd_for_bb(struct nv_instruction *phi, struct nv_basic_block *b,
410 struct nv_basic_block *tb)
412 struct nv_ref *srci, *srcj;
413 int i, j;
415 for (j = -1, i = 0; i < 6 && phi->src[i]; ++i) {
416 srci = phi->src[i];
417 /* if already replaced, check with original source first */
418 if (srci->flags & NV_REF_FLAG_REGALLOC_PRIV)
419 srci = srci->value->insn->src[0];
420 if (!nvc0_bblock_reachable_by(b, srci->value->insn->bb, NULL))
421 continue;
422 /* NOTE: back-edges are ignored by the reachable-by check */
423 if (j < 0 || !nvc0_bblock_reachable_by(srcj->value->insn->bb,
424 srci->value->insn->bb, NULL)) {
425 j = i;
426 srcj = srci;
429 if (j >= 0 && nvc0_bblock_reachable_by(b, phi->def[0]->insn->bb, NULL))
430 if (!nvc0_bblock_reachable_by(srcj->value->insn->bb,
431 phi->def[0]->insn->bb, NULL))
432 j = -1;
433 return j;
436 /* For each operand of each PHI in b, generate a new value by inserting a MOV
437 * at the end of the block it is coming from and replace the operand with its
438 * result. This eliminates liveness conflicts and enables us to let values be
439 * copied to the right register if such a conflict exists nonetheless.
441 * These MOVs are also crucial in making sure the live intervals of phi srces
442 * are extended until the end of the loop, since they are not included in the
443 * live-in sets.
445 static int
446 pass_generate_phi_movs(struct nv_pc_pass *ctx, struct nv_basic_block *b)
448 struct nv_instruction *i, *ni;
449 struct nv_value *val;
450 struct nv_basic_block *p, *pn;
451 int n, j;
453 b->pass_seq = ctx->pc->pass_seq;
455 for (n = 0; n < b->num_in; ++n) {
456 p = pn = b->in[n];
457 assert(p);
459 if (need_new_else_block(b, p)) {
460 pn = new_basic_block(ctx->pc);
462 if (p->out[0] == b)
463 p->out[0] = pn;
464 else
465 p->out[1] = pn;
467 if (p->exit->target == b) /* target to new else-block */
468 p->exit->target = pn;
470 b->in[n] = pn;
472 pn->out[0] = b;
473 pn->in[0] = p;
474 pn->num_in = 1;
476 ctx->pc->current_block = pn;
478 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next) {
479 j = phi_opnd_for_bb(i, p, b);
481 if (j < 0) {
482 val = i->def[0];
483 } else {
484 val = i->src[j]->value;
485 if (i->src[j]->flags & NV_REF_FLAG_REGALLOC_PRIV) {
486 j = -1;
487 /* use original value, we already encountered & replaced it */
488 val = val->insn->src[0]->value;
491 if (j < 0) /* need an additional source ? */
492 for (j = 0; j < 6 && i->src[j] && i->src[j]->value != val; ++j);
493 assert(j < 6); /* XXX: really ugly shaders */
495 ni = new_instruction(ctx->pc, NV_OP_MOV);
496 if (ni->prev && ni->prev->target)
497 nvc0_insns_permute(ni->prev, ni);
499 ni->def[0] = new_value_like(ctx->pc, val);
500 ni->def[0]->insn = ni;
501 nv_reference(ctx->pc, ni, 0, val);
502 nv_reference(ctx->pc, i, j, ni->def[0]); /* new phi source = MOV def */
503 i->src[j]->flags |= NV_REF_FLAG_REGALLOC_PRIV;
506 if (pn != p && pn->exit) {
507 assert(!b->in[!n]->exit || b->in[!n]->exit->terminator);
508 /* insert terminator (branch to ENDIF) in new else block */
509 ctx->pc->current_block = pn;
510 ni = new_instruction(ctx->pc, NV_OP_BRA);
511 ni->target = b;
512 ni->terminator = 1;
516 for (j = 0; j < 2; ++j)
517 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq)
518 pass_generate_phi_movs(ctx, b->out[j]);
520 return 0;
523 #define JOIN_MASK_PHI (1 << 0)
524 #define JOIN_MASK_SELECT (1 << 1)
525 #define JOIN_MASK_MOV (1 << 2)
526 #define JOIN_MASK_BIND (1 << 3)
528 static int
529 pass_join_values(struct nv_pc_pass *ctx, unsigned mask)
531 int c, n;
533 for (n = 0; n < ctx->num_insns; ++n) {
534 struct nv_instruction *i = ctx->insns[n];
536 switch (i->opcode) {
537 case NV_OP_PHI:
538 if (!(mask & JOIN_MASK_PHI))
539 break;
540 for (c = 0; c < 6 && i->src[c]; ++c)
541 join_values_nofail(ctx, i->def[0], i->src[c]->value, FALSE);
542 break;
543 case NV_OP_MOV:
544 if (!(mask & JOIN_MASK_MOV))
545 break;
546 if (i->src[0]->value->insn && !i->src[0]->value->insn->def[1])
547 try_join_values(ctx, i->def[0], i->src[0]->value);
548 break;
549 case NV_OP_SELECT:
550 if (!(mask & JOIN_MASK_SELECT))
551 break;
552 for (c = 0; c < 6 && i->src[c]; ++c)
553 join_values_nofail(ctx, i->def[0], i->src[c]->value, TRUE);
554 break;
555 case NV_OP_BIND:
556 if (!(mask & JOIN_MASK_BIND))
557 break;
558 for (c = 0; c < 4 && i->src[c]; ++c)
559 join_values_nofail(ctx, i->def[c], i->src[c]->value, TRUE);
560 break;
561 case NV_OP_TEX:
562 case NV_OP_TXB:
563 case NV_OP_TXL:
564 case NV_OP_TXQ: /* on nvc0, TEX src and dst can differ */
565 default:
566 break;
569 return 0;
572 /* Order the instructions so that live intervals can be expressed in numbers. */
573 static void
574 pass_order_instructions(void *priv, struct nv_basic_block *b)
576 struct nv_pc_pass *ctx = (struct nv_pc_pass *)priv;
577 struct nv_instruction *i;
579 b->pass_seq = ctx->pc->pass_seq;
581 assert(!b->exit || !b->exit->next);
582 for (i = b->phi; i; i = i->next) {
583 i->serial = ctx->num_insns;
584 ctx->insns[ctx->num_insns++] = i;
588 static void
589 bb_live_set_print(struct nv_pc *pc, struct nv_basic_block *b)
591 #ifdef NVC0_RA_DEBUG_LIVE_SETS
592 struct nv_value *val;
593 int j;
595 debug_printf("LIVE-INs of BB:%i: ", b->id);
597 for (j = 0; j < pc->num_values; ++j) {
598 if (!(b->live_set[j / 32] & (1 << (j % 32))))
599 continue;
600 val = &pc->values[j];
601 if (!val->insn)
602 continue;
603 debug_printf("%i ", val->n);
605 debug_printf("\n");
606 #endif
609 static INLINE void
610 live_set_add(struct nv_basic_block *b, struct nv_value *val)
612 if (!val->insn) /* don't add non-def values */
613 return;
614 b->live_set[val->n / 32] |= 1 << (val->n % 32);
617 static INLINE void
618 live_set_rem(struct nv_basic_block *b, struct nv_value *val)
620 b->live_set[val->n / 32] &= ~(1 << (val->n % 32));
623 static INLINE boolean
624 live_set_test(struct nv_basic_block *b, struct nv_ref *ref)
626 int n = ref->value->n;
627 return b->live_set[n / 32] & (1 << (n % 32));
630 /* The live set of a block contains those values that are live immediately
631 * before the beginning of the block, so do a backwards scan.
633 static int
634 pass_build_live_sets(struct nv_pc_pass *ctx, struct nv_basic_block *b)
636 struct nv_instruction *i;
637 int j, n, ret = 0;
639 if (b->pass_seq >= ctx->pc->pass_seq)
640 return 0;
641 b->pass_seq = ctx->pc->pass_seq;
643 /* slight hack for undecidedness: set phi = entry if it's undefined */
644 if (!b->phi)
645 b->phi = b->entry;
647 for (n = 0; n < 2; ++n) {
648 if (!b->out[n] || b->out[n] == b)
649 continue;
650 ret = pass_build_live_sets(ctx, b->out[n]);
651 if (ret)
652 return ret;
654 if (n == 0) {
655 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
656 b->live_set[j] = b->out[n]->live_set[j];
657 } else {
658 for (j = 0; j < (ctx->pc->num_values + 31) / 32; ++j)
659 b->live_set[j] |= b->out[n]->live_set[j];
663 if (!b->entry)
664 return 0;
666 bb_live_set_print(ctx->pc, b);
668 for (i = b->exit; i != b->entry->prev; i = i->prev) {
669 for (j = 0; j < 5 && i->def[j]; j++)
670 live_set_rem(b, i->def[j]);
671 for (j = 0; j < 6 && i->src[j]; j++)
672 live_set_add(b, i->src[j]->value);
674 for (i = b->phi; i && i->opcode == NV_OP_PHI; i = i->next)
675 live_set_rem(b, i->def[0]);
677 bb_live_set_print(ctx->pc, b);
679 return 0;
682 static void collect_live_values(struct nv_basic_block *b, const int n)
684 int i;
686 /* XXX: what to do about back/fake-edges (used to include both here) ? */
687 if (b->out[0] && b->out_kind[0] != CFG_EDGE_FAKE) {
688 if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
689 for (i = 0; i < n; ++i)
690 b->live_set[i] = b->out[0]->live_set[i] | b->out[1]->live_set[i];
691 } else {
692 memcpy(b->live_set, b->out[0]->live_set, n * sizeof(uint32_t));
694 } else
695 if (b->out[1] && b->out_kind[1] != CFG_EDGE_FAKE) {
696 memcpy(b->live_set, b->out[1]->live_set, n * sizeof(uint32_t));
697 } else {
698 memset(b->live_set, 0, n * sizeof(uint32_t));
702 /* NOTE: the live intervals of phi functions start at the first non-phi insn. */
703 static int
704 pass_build_intervals(struct nv_pc_pass *ctx, struct nv_basic_block *b)
706 struct nv_instruction *i, *i_stop;
707 int j, s;
708 const int n = (ctx->pc->num_values + 31) / 32;
710 /* verify that first block does not have live-in values */
711 if (b->num_in == 0)
712 for (j = 0; j < n; ++j)
713 assert(b->live_set[j] == 0);
715 collect_live_values(b, n);
717 /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
718 for (j = 0; j < 2; ++j) {
719 if (!b->out[j] || !b->out[j]->phi)
720 continue;
721 for (i = b->out[j]->phi; i->opcode == NV_OP_PHI; i = i->next) {
722 live_set_rem(b, i->def[0]);
724 for (s = 0; s < 6 && i->src[s]; ++s) {
725 assert(i->src[s]->value->insn);
726 if (nvc0_bblock_reachable_by(b, i->src[s]->value->insn->bb,
727 b->out[j]))
728 live_set_add(b, i->src[s]->value);
729 else
730 live_set_rem(b, i->src[s]->value);
735 /* remaining live-outs are live until the end */
736 if (b->exit) {
737 for (j = 0; j < ctx->pc->num_values; ++j) {
738 if (!(b->live_set[j / 32] & (1 << (j % 32))))
739 continue;
740 add_range(&ctx->pc->values[j], b, b->exit->serial + 1);
741 #ifdef NVC0_RA_DEBUG_LIVEI
742 debug_printf("adding range for live value %i: ", j);
743 livei_print(&ctx->pc->values[j]);
744 #endif
748 i_stop = b->entry ? b->entry->prev : NULL;
750 /* don't have to include phi functions here (will have 0 live range) */
751 for (i = b->exit; i != i_stop; i = i->prev) {
752 assert(i->serial >= b->phi->serial && i->serial <= b->exit->serial);
753 for (j = 0; j < 4 && i->def[j]; ++j)
754 live_set_rem(b, i->def[j]);
756 for (j = 0; j < 6 && i->src[j]; ++j) {
757 if (!live_set_test(b, i->src[j])) {
758 live_set_add(b, i->src[j]->value);
759 add_range(i->src[j]->value, b, i->serial);
760 #ifdef NVC0_RA_DEBUG_LIVEI
761 debug_printf("adding range for source %i (ends living): ",
762 i->src[j]->value->n);
763 livei_print(i->src[j]->value);
764 #endif
769 b->pass_seq = ctx->pc->pass_seq;
771 if (b->out[0] && b->out[0]->pass_seq < ctx->pc->pass_seq)
772 pass_build_intervals(ctx, b->out[0]);
774 if (b->out[1] && b->out[1]->pass_seq < ctx->pc->pass_seq)
775 pass_build_intervals(ctx, b->out[1]);
777 return 0;
780 static INLINE void
781 nvc0_ctor_register_set(struct nv_pc *pc, struct register_set *set)
783 memset(set, 0, sizeof(*set));
785 set->last[NV_FILE_GPR] = 62;
786 set->last[NV_FILE_PRED] = 6;
787 set->last[NV_FILE_COND] = 1;
789 set->log2_unit[NV_FILE_GPR] = 2;
790 set->log2_unit[NV_FILE_COND] = 0;
791 set->log2_unit[NV_FILE_PRED] = 0;
793 set->pc = pc;
796 static void
797 insert_ordered_tail(struct nv_value *list, struct nv_value *nval)
799 struct nv_value *elem;
801 for (elem = list->prev;
802 elem != list && elem->livei->bgn > nval->livei->bgn;
803 elem = elem->prev);
804 /* now elem begins before or at the same time as val */
806 nval->prev = elem;
807 nval->next = elem->next;
808 elem->next->prev = nval;
809 elem->next = nval;
812 static void
813 collect_register_values(struct nv_pc_pass *ctx, struct nv_value *head,
814 boolean assigned_only)
816 struct nv_value *val;
817 int k, n;
819 make_empty_list(head);
821 for (n = 0; n < ctx->num_insns; ++n) {
822 struct nv_instruction *i = ctx->insns[n];
824 /* for joined values, only the representative will have livei != NULL */
825 for (k = 0; k < 5; ++k) {
826 if (i->def[k] && i->def[k]->livei)
827 if (!assigned_only || i->def[k]->reg.id >= 0)
828 insert_ordered_tail(head, i->def[k]);
832 for (val = head->next; val != head->prev; val = val->next) {
833 assert(val->join == val);
834 assert(val->livei->bgn <= val->next->livei->bgn);
838 static int
839 pass_linear_scan(struct nv_pc_pass *ctx)
841 struct register_set f, free;
842 struct nv_value *cur, *val, *tmp[2];
843 struct nv_value active, inactive, handled, unhandled;
845 make_empty_list(&active);
846 make_empty_list(&inactive);
847 make_empty_list(&handled);
849 nvc0_ctor_register_set(ctx->pc, &free);
851 collect_register_values(ctx, &unhandled, FALSE);
853 foreach_s(cur, tmp[0], &unhandled) {
854 remove_from_list(cur);
856 foreach_s(val, tmp[1], &active) {
857 if (livei_end(val) <= cur->livei->bgn) {
858 reg_release(&free, val);
859 move_to_head(&handled, val);
860 } else
861 if (!livei_contains(val, cur->livei->bgn)) {
862 reg_release(&free, val);
863 move_to_head(&inactive, val);
867 foreach_s(val, tmp[1], &inactive) {
868 if (livei_end(val) <= cur->livei->bgn)
869 move_to_head(&handled, val);
870 else
871 if (livei_contains(val, cur->livei->bgn)) {
872 reg_occupy(&free, val);
873 move_to_head(&active, val);
877 f = free;
879 foreach(val, &inactive)
880 if (livei_have_overlap(val, cur))
881 reg_occupy(&f, val);
883 foreach(val, &unhandled)
884 if (val->reg.id >= 0 && livei_have_overlap(val, cur))
885 reg_occupy(&f, val);
887 if (cur->reg.id < 0) {
888 boolean mem = !reg_assign(&f, &cur, 1);
890 if (mem) {
891 NOUVEAU_ERR("out of registers\n");
892 abort();
895 insert_at_head(&active, cur);
896 reg_occupy(&free, cur);
899 return 0;
902 /* Allocate values defined by instructions such as TEX, which have to be
903 * assigned to consecutive registers.
904 * Linear scan doesn't really work here since the values can have different
905 * live intervals.
907 static int
908 pass_allocate_constrained_values(struct nv_pc_pass *ctx)
910 struct nv_value regvals, *val;
911 struct nv_instruction *i;
912 struct nv_value *defs[4];
913 struct register_set regs[4];
914 int n, vsize, c;
915 uint32_t mask;
916 boolean mem;
918 collect_register_values(ctx, &regvals, TRUE);
920 for (n = 0; n < ctx->num_insns; ++n) {
921 i = ctx->insns[n];
922 vsize = nvi_vector_size(i);
923 if (!(vsize > 1))
924 continue;
925 assert(vsize <= 4);
927 for (c = 0; c < vsize; ++c)
928 defs[c] = i->def[c]->join;
930 if (defs[0]->reg.id >= 0) {
931 for (c = 1; c < vsize; ++c)
932 assert(defs[c]->reg.id >= 0);
933 continue;
936 for (c = 0; c < vsize; ++c) {
937 nvc0_ctor_register_set(ctx->pc, &regs[c]);
939 foreach(val, &regvals) {
940 if (val->reg.id >= 0 && livei_have_overlap(val, defs[c]))
941 reg_occupy(&regs[c], val);
943 mask = 0x11111111;
944 if (vsize == 2) /* granularity is 2 and not 4 */
945 mask |= 0x11111111 << 2;
946 mask_register_set(&regs[c], 0, mask << c);
948 if (defs[c]->livei)
949 insert_ordered_tail(&regvals, defs[c]);
951 for (c = 1; c < vsize; ++c)
952 intersect_register_sets(&regs[0], &regs[0], &regs[c]);
954 mem = !reg_assign(&regs[0], &defs[0], vsize);
956 if (mem) {
957 NOUVEAU_ERR("out of registers\n");
958 abort();
961 return 0;
964 static int
965 nv_pc_pass1(struct nv_pc *pc, struct nv_basic_block *root)
967 struct nv_pc_pass *ctx;
968 int i, ret;
970 NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - entering\n");
972 ctx = CALLOC_STRUCT(nv_pc_pass);
973 if (!ctx)
974 return -1;
975 ctx->pc = pc;
977 ctx->insns = CALLOC(NV_PC_MAX_INSTRUCTIONS, sizeof(struct nv_instruction *));
978 if (!ctx->insns) {
979 FREE(ctx);
980 return -1;
983 pc->pass_seq++;
984 ret = pass_generate_phi_movs(ctx, root);
985 assert(!ret);
987 #ifdef NVC0_RA_DEBUG_LIVEI
988 nvc0_print_function(root);
989 #endif
991 for (i = 0; i < pc->loop_nesting_bound; ++i) {
992 pc->pass_seq++;
993 ret = pass_build_live_sets(ctx, root);
994 assert(!ret && "live sets");
995 if (ret) {
996 NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i);
997 goto out;
1001 pc->pass_seq++;
1002 nvc0_pc_pass_in_order(root, pass_order_instructions, ctx);
1004 pc->pass_seq++;
1005 ret = pass_build_intervals(ctx, root);
1006 assert(!ret && "build intervals");
1007 if (ret) {
1008 NOUVEAU_ERR("failed to build live intervals\n");
1009 goto out;
1012 #ifdef NVC0_RA_DEBUG_LIVEI
1013 for (i = 0; i < pc->num_values; ++i)
1014 livei_print(&pc->values[i]);
1015 #endif
1017 ret = pass_join_values(ctx, JOIN_MASK_PHI);
1018 if (ret)
1019 goto out;
1020 ret = pass_join_values(ctx, JOIN_MASK_SELECT | JOIN_MASK_BIND);
1021 if (ret)
1022 goto out;
1023 ret = pass_join_values(ctx, JOIN_MASK_MOV);
1024 if (ret)
1025 goto out;
1026 ret = pass_allocate_constrained_values(ctx);
1027 if (ret)
1028 goto out;
1029 ret = pass_linear_scan(ctx);
1030 if (ret)
1031 goto out;
1033 for (i = 0; i < pc->num_values; ++i)
1034 livei_release(&pc->values[i]);
1036 NV50_DBGMSG(PROG_RA, "REGISTER ALLOCATION - leaving\n");
1038 out:
1039 FREE(ctx->insns);
1040 FREE(ctx);
1041 return ret;
1045 nvc0_pc_exec_pass1(struct nv_pc *pc)
1047 int i, ret;
1049 for (i = 0; i < pc->num_subroutines + 1; ++i)
1050 if (pc->root[i] && (ret = nv_pc_pass1(pc, pc->root[i])))
1051 return ret;
1052 return 0;