2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #if NV50_DEBUG & NV50_DEBUG_PROG_RA
24 # define NV50_RA_DEBUG_LIVEI
25 # define NV50_RA_DEBUG_LIVE_SETS
26 # define NV50_RA_DEBUG_JOIN
29 #include "nv50_context.h"
32 #include "util/u_simple_list.h"
34 #define NUM_REGISTER_FILES 4
35 #define MAX_REGISTER_COUNT 256
40 uint32_t last
[NUM_REGISTER_FILES
];
41 uint32_t bits
[NUM_REGISTER_FILES
][(MAX_REGISTER_COUNT
+ 31) / 32];
44 /* using OR because a set bit means occupied/unavailable, aliasing is allowed */
46 intersect_register_sets(struct register_set
*dst
,
47 struct register_set
*src1
, struct register_set
*src2
)
51 for (i
= 0; i
< NUM_REGISTER_FILES
; ++i
) {
52 for (j
= 0; j
< (MAX_REGISTER_COUNT
+ 31) / 32; ++j
)
53 dst
->bits
[i
][j
] = src1
->bits
[i
][j
] | src2
->bits
[i
][j
];
58 mask_register_set(struct register_set
*set
, uint32_t mask
, uint32_t umask
)
62 for (i
= 0; i
< NUM_REGISTER_FILES
; ++i
) {
63 for (j
= 0; j
< (MAX_REGISTER_COUNT
+ 31) / 32; ++j
)
64 set
->bits
[i
][j
] = (set
->bits
[i
][j
] | mask
) & umask
;
71 struct nv_instruction
**insns
;
78 ranges_coalesce(struct nv_range
*range
)
80 while (range
->next
&& range
->end
>= range
->next
->bgn
) {
81 struct nv_range
*rnn
= range
->next
->next
;
82 assert(range
->bgn
<= range
->next
->bgn
);
83 range
->end
= MAX2(range
->end
, range
->next
->end
);
89 /* @return: TRUE if @new_range can be freed (i.e. was not reused) */
91 add_range_ex(struct nv_value
*val
, int bgn
, int end
, struct nv_range
*new_range
)
93 struct nv_range
*range
, **nextp
= &val
->livei
;
95 if (bgn
== end
) /* [a, a) is invalid / empty */
98 for (range
= val
->livei
; range
; range
= range
->next
) {
100 break; /* insert before */
102 if (bgn
> range
->end
) {
103 nextp
= &range
->next
;
104 continue; /* insert after */
108 if (bgn
< range
->bgn
) {
110 if (end
> range
->end
)
112 ranges_coalesce(range
);
115 if (end
> range
->end
) {
117 ranges_coalesce(range
);
120 assert(bgn
>= range
->bgn
);
121 assert(end
<= range
->end
);
126 new_range
= CALLOC_STRUCT(nv_range
);
128 new_range
->bgn
= bgn
;
129 new_range
->end
= end
;
130 new_range
->next
= range
;
131 *(nextp
) = new_range
;
136 add_range(struct nv_value
*val
, struct nv_basic_block
*b
, int end
)
140 if (!val
->insn
) /* ignore non-def values */
142 assert(b
->entry
->serial
<= b
->exit
->serial
);
143 assert(b
->phi
->serial
<= end
);
144 assert(b
->exit
->serial
+ 1 >= end
);
146 bgn
= val
->insn
->serial
;
147 if (bgn
< b
->entry
->serial
|| bgn
> b
->exit
->serial
)
148 bgn
= b
->entry
->serial
;
152 add_range_ex(val
, bgn
, end
, NULL
);
155 #if defined(NV50_RA_DEBUG_JOIN) || defined(NV50_RA_DEBUG_LIVEI)
157 livei_print(struct nv_value
*a
)
159 struct nv_range
*r
= a
->livei
;
161 debug_printf("livei %i: ", a
->n
);
163 debug_printf("[%i, %i) ", r
->bgn
, r
->end
);
171 livei_unify(struct nv_value
*dst
, struct nv_value
*src
)
173 struct nv_range
*range
, *next
;
175 for (range
= src
->livei
; range
; range
= next
) {
177 if (add_range_ex(dst
, range
->bgn
, range
->end
, range
))
184 livei_release(struct nv_value
*val
)
186 struct nv_range
*range
, *next
;
188 for (range
= val
->livei
; range
; range
= next
) {
195 livei_have_overlap(struct nv_value
*a
, struct nv_value
*b
)
197 struct nv_range
*r_a
, *r_b
;
199 for (r_a
= a
->livei
; r_a
; r_a
= r_a
->next
) {
200 for (r_b
= b
->livei
; r_b
; r_b
= r_b
->next
) {
201 if (r_b
->bgn
< r_a
->end
&&
210 livei_end(struct nv_value
*a
)
212 struct nv_range
*r
= a
->livei
;
221 livei_contains(struct nv_value
*a
, int pos
)
225 for (r
= a
->livei
; r
&& r
->bgn
<= pos
; r
= r
->next
)
232 reg_assign(struct register_set
*set
, struct nv_value
**def
, int n
)
236 int f
= def
[0]->reg
.file
;
238 s
= n
<< (nv_type_order(def
[0]->reg
.type
) - 1);
243 for (i
= 0; i
* 32 < set
->last
[f
]; ++i
) {
244 if (set
->bits
[f
][i
] == 0xffffffff)
247 for (id
= 0; id
< 32; id
+= s
)
248 if (!(set
->bits
[f
][i
] & (m
<< id
)))
253 if (i
* 32 + id
> set
->last
[f
])
256 set
->bits
[f
][i
] |= m
<< id
;
260 set
->pc
->max_reg
[f
] = MAX2(set
->pc
->max_reg
[f
], id
+ s
- 1);
262 id
>>= nv_type_order(def
[0]->reg
.type
) - 1;
264 for (i
= 0; i
< n
; ++i
)
266 def
[i
]->reg
.id
= id
++;
272 reg_occupy(struct register_set
*set
, struct nv_value
*val
)
274 int s
, id
= val
->reg
.id
, f
= val
->reg
.file
;
279 s
= nv_type_order(val
->reg
.type
) - 1;
281 m
= (1 << (1 << s
)) - 1;
283 assert(s
>= 0); /* XXX: remove me */
285 set
->bits
[f
][id
/ 32] |= m
<< (id
% 32);
287 if (set
->pc
->max_reg
[f
] < id
)
288 set
->pc
->max_reg
[f
] = id
;
292 reg_release(struct register_set
*set
, struct nv_value
*val
)
294 int s
, id
= val
->reg
.id
, f
= val
->reg
.file
;
300 s
= nv_type_order(val
->reg
.type
) - 1;
302 m
= (1 << (1 << s
)) - 1;
304 set
->bits
[f
][id
/ 32] &= ~(m
<< (id
% 32));
307 static INLINE boolean
308 join_allowed(struct nv_pc_pass
*ctx
, struct nv_value
*a
, struct nv_value
*b
)
311 struct nv_value
*val
;
313 if (a
->reg
.file
!= b
->reg
.file
||
314 nv_type_sizeof(a
->reg
.type
) != nv_type_sizeof(b
->reg
.type
))
317 if (a
->join
->reg
.id
== b
->join
->reg
.id
)
320 /* either a or b or both have been assigned */
322 if (a
->join
->reg
.id
>= 0 && b
->join
->reg
.id
>= 0)
325 if (b
->join
->reg
.id
>= 0) {
331 for (i
= 0; i
< ctx
->pc
->num_values
; ++i
) {
332 val
= &ctx
->pc
->values
[i
];
334 if (val
->join
->reg
.id
!= a
->join
->reg
.id
)
336 if (val
->join
!= a
->join
&& livei_have_overlap(val
->join
, b
->join
))
343 do_join_values(struct nv_pc_pass
*ctx
, struct nv_value
*a
, struct nv_value
*b
)
346 struct nv_value
*bjoin
= b
->join
;
348 if (b
->join
->reg
.id
>= 0)
349 a
->join
->reg
.id
= b
->join
->reg
.id
;
351 livei_unify(a
->join
, b
->join
);
353 #ifdef NV50_RA_DEBUG_JOIN
354 debug_printf("joining %i to %i\n", b
->n
, a
->n
);
357 /* make a->join the new representative */
358 for (j
= 0; j
< ctx
->pc
->num_values
; ++j
)
359 if (ctx
->pc
->values
[j
].join
== bjoin
)
360 ctx
->pc
->values
[j
].join
= a
->join
;
362 assert(b
->join
== a
->join
);
365 static INLINE boolean
366 try_join_values(struct nv_pc_pass
*ctx
, struct nv_value
*a
, struct nv_value
*b
)
368 if (!join_allowed(ctx
, a
, b
)) {
369 #ifdef NV50_RA_DEBUG_JOIN
370 debug_printf("cannot join %i to %i: not allowed\n", b
->n
, a
->n
);
374 if (livei_have_overlap(a
->join
, b
->join
)) {
375 #ifdef NV50_RA_DEBUG_JOIN
376 debug_printf("cannot join %i to %i: livei overlap\n", b
->n
, a
->n
);
383 do_join_values(ctx
, a
, b
);
389 join_values_nofail(struct nv_pc_pass
*ctx
,
390 struct nv_value
*a
, struct nv_value
*b
, boolean type_only
)
393 assert(join_allowed(ctx
, a
, b
));
394 do_join_values(ctx
, a
, b
);
396 boolean ok
= try_join_values(ctx
, a
, b
);
398 NOUVEAU_ERR("failed to coalesce values\n");
403 static INLINE boolean
404 need_new_else_block(struct nv_basic_block
*b
, struct nv_basic_block
*p
)
409 if (p
->out
[i
] && !IS_LOOP_EDGE(p
->out_kind
[i
]))
412 return (b
->num_in
> 1) && (n
== 2);
415 /* Look for the @phi's operand whose definition reaches @b. */
417 phi_opnd_for_bb(struct nv_instruction
*phi
, struct nv_basic_block
*b
,
418 struct nv_basic_block
*tb
)
420 struct nv_ref
*srci
, *srcj
;
423 for (j
= -1, i
= 0; i
< 6 && phi
->src
[i
]; ++i
) {
425 /* if already replaced, check with original source first */
426 if (srci
->flags
& NV_REF_FLAG_REGALLOC_PRIV
)
427 srci
= srci
->value
->insn
->src
[0];
428 if (!nvbb_reachable_by(b
, srci
->value
->insn
->bb
, NULL
))
430 /* NOTE: back-edges are ignored by the reachable-by check */
431 if (j
< 0 || !nvbb_reachable_by(srcj
->value
->insn
->bb
,
432 srci
->value
->insn
->bb
, NULL
)) {
437 if (j
>= 0 && nvbb_reachable_by(b
, phi
->def
[0]->insn
->bb
, NULL
))
438 if (!nvbb_reachable_by(srcj
->value
->insn
->bb
,
439 phi
->def
[0]->insn
->bb
, NULL
))
444 /* For each operand of each PHI in b, generate a new value by inserting a MOV
445 * at the end of the block it is coming from and replace the operand with its
446 * result. This eliminates liveness conflicts and enables us to let values be
447 * copied to the right register if such a conflict exists nonetheless.
449 * These MOVs are also crucial in making sure the live intervals of phi srces
450 * are extended until the end of the loop, since they are not included in the
454 pass_generate_phi_movs(struct nv_pc_pass
*ctx
, struct nv_basic_block
*b
)
456 struct nv_instruction
*i
, *ni
;
457 struct nv_value
*val
;
458 struct nv_basic_block
*p
, *pn
;
461 b
->pass_seq
= ctx
->pc
->pass_seq
;
463 for (n
= 0; n
< b
->num_in
; ++n
) {
467 if (need_new_else_block(b
, p
)) {
468 pn
= new_basic_block(ctx
->pc
);
475 if (p
->exit
->target
== b
) /* target to new else-block */
476 p
->exit
->target
= pn
;
484 ctx
->pc
->current_block
= pn
;
486 for (i
= b
->phi
; i
&& i
->opcode
== NV_OP_PHI
; i
= i
->next
) {
487 j
= phi_opnd_for_bb(i
, p
, b
);
492 val
= i
->src
[j
]->value
;
493 if (i
->src
[j
]->flags
& NV_REF_FLAG_REGALLOC_PRIV
) {
495 /* use original value, we already encountered & replaced it */
496 val
= val
->insn
->src
[0]->value
;
499 if (j
< 0) /* need an additional source ? */
500 for (j
= 0; j
< 5 && i
->src
[j
] && i
->src
[j
]->value
!= val
; ++j
);
503 ni
= new_instruction(ctx
->pc
, NV_OP_MOV
);
505 /* TODO: insert instruction at correct position in the first place */
506 if (ni
->prev
&& ni
->prev
->target
)
507 nv_nvi_permute(ni
->prev
, ni
);
509 ni
->def
[0] = new_value(ctx
->pc
, val
->reg
.file
, val
->reg
.type
);
510 ni
->def
[0]->insn
= ni
;
511 ni
->src
[0] = new_ref(ctx
->pc
, val
);
513 nv_reference(ctx
->pc
, &i
->src
[j
], ni
->def
[0]);
515 i
->src
[j
]->flags
|= NV_REF_FLAG_REGALLOC_PRIV
;
518 if (pn
!= p
&& pn
->exit
) {
519 assert(!b
->in
[!n
]->exit
|| b
->in
[!n
]->exit
->is_terminator
);
520 /* insert terminator (branch to ENDIF) in new else block */
521 ctx
->pc
->current_block
= pn
;
522 ni
= new_instruction(ctx
->pc
, NV_OP_BRA
);
524 ni
->is_terminator
= 1;
528 for (j
= 0; j
< 2; ++j
)
529 if (b
->out
[j
] && b
->out
[j
]->pass_seq
< ctx
->pc
->pass_seq
)
530 pass_generate_phi_movs(ctx
, b
->out
[j
]);
535 #define JOIN_MASK_PHI (1 << 0)
536 #define JOIN_MASK_SELECT (1 << 1)
537 #define JOIN_MASK_MOV (1 << 2)
538 #define JOIN_MASK_TEX (1 << 3)
541 pass_join_values(struct nv_pc_pass
*ctx
, unsigned mask
)
545 for (n
= 0; n
< ctx
->num_insns
; ++n
) {
546 struct nv_instruction
*nvi
, *i
= ctx
->insns
[n
];
550 if (!(mask
& JOIN_MASK_PHI
))
552 for (c
= 0; c
< 5 && i
->src
[c
]; ++c
)
553 join_values_nofail(ctx
, i
->def
[0], i
->src
[c
]->value
, FALSE
);
556 if (!(mask
& JOIN_MASK_MOV
))
558 nvi
= i
->src
[0]->value
->join
->insn
;
559 if (nvi
&& !nv_is_vector_op(nvi
->opcode
))
560 try_join_values(ctx
, i
->def
[0], i
->src
[0]->value
);
563 if (!(mask
& JOIN_MASK_SELECT
))
565 for (c
= 0; c
< 5 && i
->src
[c
]; ++c
)
566 join_values_nofail(ctx
, i
->def
[0], i
->src
[c
]->value
, TRUE
);
572 if (!(mask
& JOIN_MASK_TEX
))
574 /* This should work without conflicts because we always generate
575 * extra MOVs for the sources of a TEX.
577 for (c
= 0; c
< 4 && i
->src
[c
]; ++c
)
578 join_values_nofail(ctx
, i
->def
[c
], i
->src
[c
]->value
, TRUE
);
587 /* Order the instructions so that live intervals can be expressed in numbers. */
589 pass_order_instructions(void *priv
, struct nv_basic_block
*b
)
591 struct nv_pc_pass
*ctx
= (struct nv_pc_pass
*)priv
;
592 struct nv_instruction
*i
;
594 b
->pass_seq
= ctx
->pc
->pass_seq
;
596 assert(!b
->exit
|| !b
->exit
->next
);
597 for (i
= b
->phi
; i
; i
= i
->next
) {
598 i
->serial
= ctx
->num_insns
;
599 ctx
->insns
[ctx
->num_insns
++] = i
;
604 bb_live_set_print(struct nv_pc
*pc
, struct nv_basic_block
*b
)
606 #ifdef NV50_RA_DEBUG_LIVE_SETS
608 struct nv_value
*val
;
610 debug_printf("LIVE-INs of BB:%i: ", b
->id
);
612 for (j
= 0; j
< pc
->num_values
; ++j
) {
613 if (!(b
->live_set
[j
/ 32] & (1 << (j
% 32))))
615 val
= &pc
->values
[j
];
618 debug_printf("%i ", val
->n
);
625 live_set_add(struct nv_basic_block
*b
, struct nv_value
*val
)
627 if (!val
->insn
) /* don't add non-def values */
629 b
->live_set
[val
->n
/ 32] |= 1 << (val
->n
% 32);
633 live_set_rem(struct nv_basic_block
*b
, struct nv_value
*val
)
635 b
->live_set
[val
->n
/ 32] &= ~(1 << (val
->n
% 32));
638 static INLINE boolean
639 live_set_test(struct nv_basic_block
*b
, struct nv_ref
*ref
)
641 int n
= ref
->value
->n
;
642 return b
->live_set
[n
/ 32] & (1 << (n
% 32));
645 /* The live set of a block contains those values that are live immediately
646 * before the beginning of the block, so do a backwards scan.
649 pass_build_live_sets(struct nv_pc_pass
*ctx
, struct nv_basic_block
*b
)
651 struct nv_instruction
*i
;
654 if (b
->pass_seq
>= ctx
->pc
->pass_seq
)
656 b
->pass_seq
= ctx
->pc
->pass_seq
;
658 /* slight hack for undecidedness: set phi = entry if it's undefined */
662 for (n
= 0; n
< 2; ++n
) {
663 if (!b
->out
[n
] || b
->out
[n
] == b
)
665 ret
= pass_build_live_sets(ctx
, b
->out
[n
]);
670 for (j
= 0; j
< (ctx
->pc
->num_values
+ 31) / 32; ++j
)
671 b
->live_set
[j
] = b
->out
[n
]->live_set
[j
];
673 for (j
= 0; j
< (ctx
->pc
->num_values
+ 31) / 32; ++j
)
674 b
->live_set
[j
] |= b
->out
[n
]->live_set
[j
];
681 bb_live_set_print(ctx
->pc
, b
);
683 for (i
= b
->exit
; i
!= b
->entry
->prev
; i
= i
->prev
) {
684 for (j
= 0; j
< 4; j
++) {
687 live_set_rem(b
, i
->def
[j
]);
689 for (j
= 0; j
< 4; j
++) {
692 live_set_add(b
, i
->src
[j
]->value
);
695 live_set_add(b
, i
->src
[4]->value
);
697 live_set_rem(b
, i
->flags_def
);
699 live_set_add(b
, i
->flags_src
->value
);
701 for (i
= b
->phi
; i
&& i
->opcode
== NV_OP_PHI
; i
= i
->next
)
702 live_set_rem(b
, i
->def
[0]);
704 bb_live_set_print(ctx
->pc
, b
);
709 static void collect_live_values(struct nv_basic_block
*b
, const int n
)
713 if (b
->out
[0] && b
->out_kind
[0] != CFG_EDGE_FAKE
) {
714 if (b
->out
[1] && b
->out_kind
[1] != CFG_EDGE_FAKE
) {
715 for (i
= 0; i
< n
; ++i
)
716 b
->live_set
[i
] = b
->out
[0]->live_set
[i
] | b
->out
[1]->live_set
[i
];
718 memcpy(b
->live_set
, b
->out
[0]->live_set
, n
* sizeof(uint32_t));
721 if (b
->out
[1] && b
->out_kind
[1] != CFG_EDGE_FAKE
) {
722 memcpy(b
->live_set
, b
->out
[1]->live_set
, n
* sizeof(uint32_t));
724 memset(b
->live_set
, 0, n
* sizeof(uint32_t));
728 /* NOTE: the live intervals of phi functions start at the first non-phi insn. */
730 pass_build_intervals(struct nv_pc_pass
*ctx
, struct nv_basic_block
*b
)
732 struct nv_instruction
*i
, *i_stop
;
734 const int n
= (ctx
->pc
->num_values
+ 31) / 32;
736 /* verify that first block does not have live-in values */
738 for (j
= 0; j
< n
; ++j
)
739 assert(b
->live_set
[j
] == 0);
741 collect_live_values(b
, n
);
743 /* remove live-outs def'd in a parallel block, hopefully they're all phi'd */
744 for (j
= 0; j
< 2; ++j
) {
745 if (!b
->out
[j
] || !b
->out
[j
]->phi
)
747 for (i
= b
->out
[j
]->phi
; i
->opcode
== NV_OP_PHI
; i
= i
->next
) {
748 live_set_rem(b
, i
->def
[0]);
750 for (s
= 0; s
< 4; ++s
) {
753 assert(i
->src
[s
]->value
->insn
);
754 if (nvbb_reachable_by(b
, i
->src
[s
]->value
->insn
->bb
, b
->out
[j
]))
755 live_set_add(b
, i
->src
[s
]->value
);
757 live_set_rem(b
, i
->src
[s
]->value
);
762 /* remaining live-outs are live until the end */
764 for (j
= 0; j
< ctx
->pc
->num_values
; ++j
) {
765 if (!(b
->live_set
[j
/ 32] & (1 << (j
% 32))))
767 add_range(&ctx
->pc
->values
[j
], b
, b
->exit
->serial
+ 1);
768 #ifdef NV50_RA_DEBUG_LIVEI
769 debug_printf("adding range for live value %i: ", j
);
770 livei_print(&ctx
->pc
->values
[j
]);
776 i_stop
= b
->entry
? b
->entry
->prev
: NULL
;
778 /* don't have to include phi functions here (will have 0 live range) */
779 for (i
= b
->exit
; i
!= i_stop
; i
= i
->prev
) {
780 assert(i
->serial
>= b
->phi
->serial
&& i
->serial
<= b
->exit
->serial
);
781 for (j
= 0; j
< 4; ++j
) {
783 live_set_rem(b
, i
->def
[j
]);
786 live_set_rem(b
, i
->flags_def
);
788 for (j
= 0; j
< 5; ++j
) {
789 if (i
->src
[j
] && !live_set_test(b
, i
->src
[j
])) {
790 live_set_add(b
, i
->src
[j
]->value
);
791 add_range(i
->src
[j
]->value
, b
, i
->serial
);
792 #ifdef NV50_RA_DEBUG_LIVEI
793 debug_printf("adding range for source %i (ends living): ",
794 i
->src
[j
]->value
->n
);
795 livei_print(i
->src
[j
]->value
);
799 if (i
->flags_src
&& !live_set_test(b
, i
->flags_src
)) {
800 live_set_add(b
, i
->flags_src
->value
);
801 add_range(i
->flags_src
->value
, b
, i
->serial
);
802 #ifdef NV50_RA_DEBUG_LIVEI
803 debug_printf("adding range for source %i (ends living): ",
804 i
->flags_src
->value
->n
);
805 livei_print(i
->flags_src
->value
);
810 b
->pass_seq
= ctx
->pc
->pass_seq
;
812 if (b
->out
[0] && b
->out
[0]->pass_seq
< ctx
->pc
->pass_seq
)
813 pass_build_intervals(ctx
, b
->out
[0]);
815 if (b
->out
[1] && b
->out
[1]->pass_seq
< ctx
->pc
->pass_seq
)
816 pass_build_intervals(ctx
, b
->out
[1]);
822 nv50_ctor_register_set(struct nv_pc
*pc
, struct register_set
*set
)
824 memset(set
, 0, sizeof(*set
));
826 set
->last
[NV_FILE_GPR
] = 255;
827 set
->last
[NV_FILE_OUT
] = 127;
828 set
->last
[NV_FILE_FLAGS
] = 4;
829 set
->last
[NV_FILE_ADDR
] = 4;
835 insert_ordered_tail(struct nv_value
*list
, struct nv_value
*nval
)
837 struct nv_value
*elem
;
839 for (elem
= list
->prev
;
840 elem
!= list
&& elem
->livei
->bgn
> nval
->livei
->bgn
;
842 /* now elem begins before or at the same time as val */
845 nval
->next
= elem
->next
;
846 elem
->next
->prev
= nval
;
851 collect_register_values(struct nv_pc_pass
*ctx
, struct nv_value
*head
,
852 boolean assigned_only
)
854 struct nv_value
*val
;
857 make_empty_list(head
);
859 for (n
= 0; n
< ctx
->num_insns
; ++n
) {
860 struct nv_instruction
*i
= ctx
->insns
[n
];
862 /* for joined values, only the representative will have livei != NULL */
863 for (k
= 0; k
< 4; ++k
) {
864 if (i
->def
[k
] && i
->def
[k
]->livei
)
865 if (!assigned_only
|| i
->def
[k
]->reg
.id
>= 0)
866 insert_ordered_tail(head
, i
->def
[k
]);
868 if (i
->flags_def
&& i
->flags_def
->livei
)
869 if (!assigned_only
|| i
->flags_def
->reg
.id
>= 0)
870 insert_ordered_tail(head
, i
->flags_def
);
873 for (val
= head
->next
; val
!= head
->prev
; val
= val
->next
) {
874 assert(val
->join
== val
);
875 assert(val
->livei
->bgn
<= val
->next
->livei
->bgn
);
880 pass_linear_scan(struct nv_pc_pass
*ctx
, int iter
)
882 struct register_set f
, free
;
883 struct nv_value
*cur
, *val
, *tmp
[2];
884 struct nv_value active
, inactive
, handled
, unhandled
;
886 make_empty_list(&active
);
887 make_empty_list(&inactive
);
888 make_empty_list(&handled
);
890 nv50_ctor_register_set(ctx
->pc
, &free
);
892 collect_register_values(ctx
, &unhandled
, FALSE
);
894 foreach_s(cur
, tmp
[0], &unhandled
) {
895 remove_from_list(cur
);
897 foreach_s(val
, tmp
[1], &active
) {
898 if (livei_end(val
) <= cur
->livei
->bgn
) {
899 reg_release(&free
, val
);
900 move_to_head(&handled
, val
);
902 if (!livei_contains(val
, cur
->livei
->bgn
)) {
903 reg_release(&free
, val
);
904 move_to_head(&inactive
, val
);
908 foreach_s(val
, tmp
[1], &inactive
) {
909 if (livei_end(val
) <= cur
->livei
->bgn
)
910 move_to_head(&handled
, val
);
912 if (livei_contains(val
, cur
->livei
->bgn
)) {
913 reg_occupy(&free
, val
);
914 move_to_head(&active
, val
);
920 foreach(val
, &inactive
)
921 if (livei_have_overlap(val
, cur
))
924 foreach(val
, &unhandled
)
925 if (val
->reg
.id
>= 0 && livei_have_overlap(val
, cur
))
928 if (cur
->reg
.id
< 0) {
929 boolean mem
= !reg_assign(&f
, &cur
, 1);
932 NOUVEAU_ERR("out of registers\n");
936 insert_at_head(&active
, cur
);
937 reg_occupy(&free
, cur
);
943 /* Allocate values defined by instructions such as TEX, which have to be
944 * assigned to consecutive registers.
945 * Linear scan doesn't really work here since the values can have different
949 pass_allocate_constrained_values(struct nv_pc_pass
*ctx
)
951 struct nv_value regvals
, *val
;
952 struct nv_instruction
*i
;
953 struct nv_value
*defs
[4];
954 struct register_set regs
[4];
959 collect_register_values(ctx
, ®vals
, TRUE
);
961 for (n
= 0; n
< ctx
->num_insns
; ++n
) {
963 vsize
= nvi_vector_size(i
);
967 for (c
= 0; c
< vsize
; ++c
)
968 defs
[c
] = i
->def
[c
]->join
;
970 if (defs
[0]->reg
.id
>= 0) {
971 for (c
= 1; c
< vsize
; ++c
)
972 assert(defs
[c
]->reg
.id
>= 0);
976 /* Compute registers available for this "vector" of consecutive registers.
977 * Each value (component) has its own independent live interval.
979 for (c
= 0; c
< vsize
; ++c
) {
980 nv50_ctor_register_set(ctx
->pc
, ®s
[c
]);
982 foreach(val
, ®vals
) {
983 if (val
->reg
.id
>= 0 && livei_have_overlap(val
, defs
[c
]))
984 reg_occupy(®s
[c
], val
);
986 /* Only 32 bit GPRs will be allocated here, but register set
987 * granularity for GPRs is 16 bit.
990 if (vsize
== 2) /* granularity is 2 and not 4 */
991 mask
|= 0x03030303 << 4;
992 mask_register_set(®s
[c
], 0, mask
<< (c
* 2));
995 insert_ordered_tail(®vals
, defs
[c
]);
997 for (c
= 1; c
< vsize
; ++c
)
998 intersect_register_sets(®s
[0], ®s
[0], ®s
[c
]);
1000 mem
= !reg_assign(®s
[0], &defs
[0], vsize
);
1003 NOUVEAU_ERR("out of registers\n");
1011 nv_pc_pass1(struct nv_pc
*pc
, struct nv_basic_block
*root
)
1013 struct nv_pc_pass
*ctx
;
1016 NV50_DBGMSG(PROG_RA
, "REGISTER ALLOCATION - entering\n");
1018 ctx
= CALLOC_STRUCT(nv_pc_pass
);
1023 ctx
->insns
= CALLOC(NV_PC_MAX_INSTRUCTIONS
, sizeof(struct nv_instruction
*));
1030 ret
= pass_generate_phi_movs(ctx
, root
);
1033 for (i
= 0; i
< pc
->loop_nesting_bound
; ++i
) {
1035 ret
= pass_build_live_sets(ctx
, root
);
1036 assert(!ret
&& "live sets");
1038 NOUVEAU_ERR("failed to build live sets (iteration %d)\n", i
);
1044 nv_pc_pass_in_order(root
, pass_order_instructions
, ctx
);
1047 ret
= pass_build_intervals(ctx
, root
);
1048 assert(!ret
&& "build intervals");
1050 NOUVEAU_ERR("failed to build live intervals\n");
1054 #ifdef NV50_RA_DEBUG_LIVEI
1055 for (i
= 0; i
< pc
->num_values
; ++i
)
1056 livei_print(&pc
->values
[i
]);
1059 ret
= pass_join_values(ctx
, JOIN_MASK_PHI
);
1062 ret
= pass_join_values(ctx
, JOIN_MASK_SELECT
| JOIN_MASK_TEX
);
1065 ret
= pass_join_values(ctx
, JOIN_MASK_MOV
);
1068 ret
= pass_allocate_constrained_values(ctx
);
1071 ret
= pass_linear_scan(ctx
, 1);
1075 for (i
= 0; i
< pc
->num_values
; ++i
)
1076 livei_release(&pc
->values
[i
]);
1078 NV50_DBGMSG(PROG_RA
, "REGISTER ALLOCATION - leaving\n");
1087 nv_pc_exec_pass1(struct nv_pc
*pc
)
1091 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
1092 if (pc
->root
[i
] && (ret
= nv_pc_pass1(pc
, pc
->root
[i
])))