2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nv50_program.h"
28 /* returns TRUE if operands 0 and 1 can be swapped */
30 nv_op_commutative(uint opcode
)
48 /* return operand to which the address register applies */
50 nv50_indirect_opnd(struct nv_instruction
*i
)
66 nv50_nvi_can_use_imm(struct nv_instruction
*nvi
, int s
)
68 if (nvi
->flags_src
|| nvi
->flags_def
)
71 switch (nvi
->opcode
) {
79 return (s
== 1) && (nvi
->src
[0]->value
->reg
.file
== NV_FILE_GPR
) &&
80 (nvi
->def
[0]->reg
.file
== NV_FILE_GPR
);
83 return (nvi
->def
[0]->reg
.file
== NV_FILE_GPR
);
90 nv50_nvi_can_load(struct nv_instruction
*nvi
, int s
, struct nv_value
*value
)
94 for (i
= 0; i
< 3 && nvi
->src
[i
]; ++i
)
95 if (nvi
->src
[i
]->value
->reg
.file
== NV_FILE_IMM
)
98 switch (nvi
->opcode
) {
112 if (s
== 0 && (value
->reg
.file
== NV_FILE_MEM_S
||
113 value
->reg
.file
== NV_FILE_MEM_P
))
115 if (value
->reg
.file
< NV_FILE_MEM_C(0) ||
116 value
->reg
.file
> NV_FILE_MEM_C(15))
119 ((s
== 2) && (nvi
->src
[1]->value
->reg
.file
== NV_FILE_GPR
));
122 return /* TRUE */ FALSE
; /* don't turn MOVs into loads */
128 /* Return whether this instruction can be executed conditionally. */
130 nv50_nvi_can_predicate(struct nv_instruction
*nvi
)
136 for (i
= 0; i
< 4 && nvi
->src
[i
]; ++i
)
137 if (nvi
->src
[i
]->value
->reg
.file
== NV_FILE_IMM
)
143 nv50_supported_src_mods(uint opcode
, int s
)
147 return NV_MOD_NEG
| NV_MOD_ABS
; /* obviously */
166 return NV_MOD_ABS
| NV_MOD_NEG
;
172 /* We may want an opcode table. */
174 nv50_op_can_write_flags(uint opcode
)
176 if (nv_is_vector_op(opcode
))
178 switch (opcode
) { /* obvious ones like KIL, CALL, etc. not included */
189 if (opcode
>= NV_OP_RCP
&& opcode
<= NV_OP_PREEX2
)
195 nv_nvi_refcount(struct nv_instruction
*nvi
)
199 rc
= nvi
->flags_def
? nvi
->flags_def
->refc
: 0;
201 for (i
= 0; i
< 4; ++i
) {
204 rc
+= nvi
->def
[i
]->refc
;
210 nvcg_replace_value(struct nv_pc
*pc
, struct nv_value
*old_val
,
211 struct nv_value
*new_val
)
215 if (old_val
== new_val
)
216 return old_val
->refc
;
218 for (i
= 0, n
= 0; i
< pc
->num_refs
; ++i
) {
219 if (pc
->refs
[i
]->value
== old_val
) {
221 nv_reference(pc
, &pc
->refs
[i
], new_val
);
228 nvcg_find_constant(struct nv_ref
*ref
)
230 struct nv_value
*src
;
236 while (src
->insn
&& src
->insn
->opcode
== NV_OP_MOV
) {
237 assert(!src
->insn
->src
[0]->mod
);
238 src
= src
->insn
->src
[0]->value
;
240 if ((src
->reg
.file
== NV_FILE_IMM
) ||
241 (src
->insn
&& src
->insn
->opcode
== NV_OP_LDA
&&
242 src
->insn
->src
[0]->value
->reg
.file
>= NV_FILE_MEM_C(0) &&
243 src
->insn
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_C(15)))
249 nvcg_find_immediate(struct nv_ref
*ref
)
251 struct nv_value
*src
= nvcg_find_constant(ref
);
253 return (src
&& src
->reg
.file
== NV_FILE_IMM
) ? src
: NULL
;
257 nv_pc_free_refs(struct nv_pc
*pc
)
260 for (i
= 0; i
< pc
->num_refs
; i
+= 64)
266 edge_name(ubyte type
)
269 case CFG_EDGE_FORWARD
: return "forward";
270 case CFG_EDGE_BACK
: return "back";
271 case CFG_EDGE_LOOP_ENTER
: return "loop";
272 case CFG_EDGE_LOOP_LEAVE
: return "break";
273 case CFG_EDGE_FAKE
: return "fake";
280 nv_pc_pass_in_order(struct nv_basic_block
*root
, nv_pc_pass_func f
, void *priv
)
282 struct nv_basic_block
*bb
[64], *bbb
[16], *b
;
293 for (j
= 1; j
>= 0; --j
) {
297 switch (b
->out_kind
[j
]) {
300 case CFG_EDGE_FORWARD
:
302 if (++b
->out
[j
]->priv
== b
->out
[j
]->num_in
)
305 case CFG_EDGE_LOOP_ENTER
:
308 case CFG_EDGE_LOOP_LEAVE
:
309 if (!b
->out
[j
]->priv
) {
310 bbb
[pp
++] = b
->out
[j
];
325 bb
[pp
- 1] = bbb
[pp
- 1];
331 nv_do_print_function(void *priv
, struct nv_basic_block
*b
)
333 struct nv_instruction
*i
;
335 debug_printf("=== BB %i ", b
->id
);
337 debug_printf("[%s -> %i] ", edge_name(b
->out_kind
[0]), b
->out
[0]->id
);
339 debug_printf("[%s -> %i] ", edge_name(b
->out_kind
[1]), b
->out
[1]->id
);
340 debug_printf("===\n");
345 for (; i
; i
= i
->next
)
346 nv_print_instruction(i
);
350 nv_print_function(struct nv_basic_block
*root
)
352 if (root
->subroutine
)
353 debug_printf("SUBROUTINE %i\n", root
->subroutine
);
355 debug_printf("MAIN\n");
357 nv_pc_pass_in_order(root
, nv_do_print_function
, root
);
361 nv_print_program(struct nv_pc
*pc
)
364 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
366 nv_print_function(pc
->root
[i
]);
369 #if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
371 nv_do_print_cfgraph(struct nv_pc
*pc
, FILE *f
, struct nv_basic_block
*b
)
375 b
->pass_seq
= pc
->pass_seq
;
377 fprintf(f
, "\t%i [shape=box]\n", b
->id
);
379 for (i
= 0; i
< 2; ++i
) {
382 switch (b
->out_kind
[i
]) {
383 case CFG_EDGE_FORWARD
:
384 fprintf(f
, "\t%i -> %i;\n", b
->id
, b
->out
[i
]->id
);
386 case CFG_EDGE_LOOP_ENTER
:
387 fprintf(f
, "\t%i -> %i [color=green];\n", b
->id
, b
->out
[i
]->id
);
389 case CFG_EDGE_LOOP_LEAVE
:
390 fprintf(f
, "\t%i -> %i [color=red];\n", b
->id
, b
->out
[i
]->id
);
393 fprintf(f
, "\t%i -> %i;\n", b
->id
, b
->out
[i
]->id
);
396 fprintf(f
, "\t%i -> %i [style=dotted];\n", b
->id
, b
->out
[i
]->id
);
402 if (b
->out
[i
]->pass_seq
< pc
->pass_seq
)
403 nv_do_print_cfgraph(pc
, f
, b
->out
[i
]);
407 /* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
409 nv_print_cfgraph(struct nv_pc
*pc
, const char *filepath
, int subr
)
413 f
= fopen(filepath
, "a");
417 fprintf(f
, "digraph G {\n");
421 nv_do_print_cfgraph(pc
, f
, pc
->root
[subr
]);
427 #endif /* NV50_DEBUG_PROG_CFLOW */
430 nvcg_show_bincode(struct nv_pc
*pc
)
434 for (i
= 0; i
< pc
->bin_size
/ 4; ++i
) {
435 debug_printf("0x%08x ", pc
->emit
[i
]);
443 nv50_emit_program(struct nv_pc
*pc
)
445 uint32_t *code
= pc
->emit
;
448 NV50_DBGMSG(SHADER
, "emitting program: size = %u\n", pc
->bin_size
);
450 for (n
= 0; n
< pc
->num_blocks
; ++n
) {
451 struct nv_instruction
*i
;
452 struct nv_basic_block
*b
= pc
->bb_list
[n
];
454 for (i
= b
->entry
; i
; i
= i
->next
) {
455 nv50_emit_instruction(pc
, i
);
457 pc
->bin_pos
+= 1 + (pc
->emit
[0] & 1);
458 pc
->emit
+= 1 + (pc
->emit
[0] & 1);
461 assert(pc
->emit
== &code
[pc
->bin_size
/ 4]);
463 /* XXX: we can do better than this ... */
465 !(pc
->emit
[-2] & 1) || (pc
->emit
[-2] & 2) || (pc
->emit
[-1] & 3)) {
466 pc
->emit
[0] = 0xf0000001;
467 pc
->emit
[1] = 0xe0000000;
472 code
[pc
->bin_size
/ 4 - 1] |= 1;
474 #if NV50_DEBUG & NV50_DEBUG_SHADER
475 nvcg_show_bincode(pc
);
482 nv50_generate_code(struct nv50_translation_info
*ti
)
488 pc
= CALLOC_STRUCT(nv_pc
);
492 pc
->root
= CALLOC(ti
->subr_nr
+ 1, sizeof(pc
->root
[0]));
497 pc
->num_subroutines
= ti
->subr_nr
;
499 ret
= nv50_tgsi_to_nc(pc
, ti
);
502 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
503 nv_print_program(pc
);
506 pc
->opt_reload_elim
= ti
->store_to_memory
? FALSE
: TRUE
;
509 ret
= nv_pc_exec_pass0(pc
);
512 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
513 nv_print_program(pc
);
516 /* register allocation */
517 ret
= nv_pc_exec_pass1(pc
);
520 #if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
521 nv_print_program(pc
);
522 nv_print_cfgraph(pc
, "nv50_shader_cfgraph.dot", 0);
525 /* prepare for emission */
526 ret
= nv_pc_exec_pass2(pc
);
529 assert(!(pc
->bin_size
% 8));
531 pc
->emit
= CALLOC(pc
->bin_size
/ 4 + 2, 4);
536 ret
= nv50_emit_program(pc
);
540 ti
->p
->code_size
= pc
->bin_size
;
541 ti
->p
->code
= pc
->emit
;
543 ti
->p
->immd_size
= pc
->immd_count
* 4;
544 ti
->p
->immd
= pc
->immd_buf
;
546 /* highest 16 bit reg to num of 32 bit regs, limit to >= 4 */
547 ti
->p
->max_gpr
= MAX2(4, (pc
->max_reg
[NV_FILE_GPR
] >> 1) + 1);
549 ti
->p
->fixups
= pc
->fixups
;
550 ti
->p
->num_fixups
= pc
->num_fixups
;
552 ti
->p
->uses_lmem
= ti
->store_to_memory
;
554 NV50_DBGMSG(SHADER
, "SHADER TRANSLATION - %s\n", ret
? "failed" : "success");
559 for (i
= 0; i
< pc
->num_blocks
; ++i
)
560 FREE(pc
->bb_list
[i
]);
563 if (ret
) { /* on success, these will be referenced by nv50_program */
576 nvbb_insert_phi(struct nv_basic_block
*b
, struct nv_instruction
*i
)
583 assert(!b
->entry
->prev
&& b
->exit
);
591 if (b
->entry
->opcode
== NV_OP_PHI
) { /* insert after entry */
592 assert(b
->entry
== b
->exit
);
597 } else { /* insert before entry */
598 assert(b
->entry
->prev
&& b
->exit
);
600 i
->prev
= b
->entry
->prev
;
608 nvbb_insert_tail(struct nv_basic_block
*b
, struct nv_instruction
*i
)
610 if (i
->opcode
== NV_OP_PHI
) {
611 nvbb_insert_phi(b
, i
);
620 if (i
->prev
&& i
->prev
->opcode
== NV_OP_PHI
)
625 b
->num_instructions
++;
627 if (i
->prev
&& i
->prev
->is_terminator
)
628 nv_nvi_permute(i
->prev
, i
);
632 nvi_insert_after(struct nv_instruction
*at
, struct nv_instruction
*ni
)
635 nvbb_insert_tail(at
->bb
, ni
);
645 nv_nvi_delete(struct nv_instruction
*nvi
)
647 struct nv_basic_block
*b
= nvi
->bb
;
650 /* debug_printf("REM: "); nv_print_instruction(nvi); */
652 for (j
= 0; j
< 5; ++j
)
653 nv_reference(NULL
, &nvi
->src
[j
], NULL
);
654 nv_reference(NULL
, &nvi
->flags_src
, NULL
);
657 nvi
->next
->prev
= nvi
->prev
;
659 assert(nvi
== b
->exit
);
664 nvi
->prev
->next
= nvi
->next
;
666 if (nvi
== b
->entry
) {
667 /* PHIs don't get hooked to b->entry */
668 b
->entry
= nvi
->next
;
669 assert(!nvi
->prev
|| nvi
->prev
->opcode
== NV_OP_PHI
);
673 if (nvi
->opcode
!= NV_OP_PHI
)
674 NV50_DBGMSG(PROG_IR
, "NOTE: b->phi points to non-PHI instruction\n");
677 if (!nvi
->next
|| nvi
->next
->opcode
!= NV_OP_PHI
)
685 nv_nvi_permute(struct nv_instruction
*i1
, struct nv_instruction
*i2
)
687 struct nv_basic_block
*b
= i1
->bb
;
689 assert(i1
->opcode
!= NV_OP_PHI
&&
690 i2
->opcode
!= NV_OP_PHI
);
691 assert(i1
->next
== i2
);
711 nvbb_attach_block(struct nv_basic_block
*parent
,
712 struct nv_basic_block
*b
, ubyte edge_kind
)
714 assert(b
->num_in
< 8);
716 if (parent
->out
[0]) {
717 assert(!parent
->out
[1]);
719 parent
->out_kind
[1] = edge_kind
;
722 parent
->out_kind
[0] = edge_kind
;
725 b
->in
[b
->num_in
] = parent
;
726 b
->in_kind
[b
->num_in
++] = edge_kind
;
729 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
732 nvbb_dominated_by(struct nv_basic_block
*b
, struct nv_basic_block
*d
)
739 for (j
= 0; j
< b
->num_in
; ++j
)
740 if ((b
->in_kind
[j
] != CFG_EDGE_BACK
) && !nvbb_dominated_by(b
->in
[j
], d
))
743 return j
? TRUE
: FALSE
;
746 /* check if @bf (future) can be reached from @bp (past), stop at @bt */
748 nvbb_reachable_by(struct nv_basic_block
*bf
, struct nv_basic_block
*bp
,
749 struct nv_basic_block
*bt
)
751 struct nv_basic_block
*q
[NV_PC_MAX_BASIC_BLOCKS
], *b
;
765 assert(n
<= (1024 - 2));
767 for (i
= 0; i
< 2; ++i
) {
768 if (b
->out
[i
] && !IS_WALL_EDGE(b
->out_kind
[i
]) && !b
->out
[i
]->priv
) {
774 for (--n
; n
>= 0; --n
)
780 static struct nv_basic_block
*
781 nvbb_find_dom_frontier(struct nv_basic_block
*b
, struct nv_basic_block
*df
)
783 struct nv_basic_block
*out
;
786 if (!nvbb_dominated_by(df
, b
)) {
787 for (i
= 0; i
< df
->num_in
; ++i
) {
788 if (df
->in_kind
[i
] == CFG_EDGE_BACK
)
790 if (nvbb_dominated_by(df
->in
[i
], b
))
794 for (i
= 0; i
< 2 && df
->out
[i
]; ++i
) {
795 if (df
->out_kind
[i
] == CFG_EDGE_BACK
)
797 if ((out
= nvbb_find_dom_frontier(b
, df
->out
[i
])))
803 struct nv_basic_block
*
804 nvbb_dom_frontier(struct nv_basic_block
*b
)
806 struct nv_basic_block
*df
;
809 for (i
= 0; i
< 2 && b
->out
[i
]; ++i
)
810 if ((df
= nvbb_find_dom_frontier(b
, b
->out
[i
])))