2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nvc0_program.h"
27 nvc0_ir_reverse_cc(uint8_t cc
)
29 static const uint8_t cc_swapped
[8] = { 0, 4, 2, 6, 1, 5, 3, 7 };
31 return cc_swapped
[cc
& 7] | (cc
& ~7);
35 nvc0_insn_can_load(struct nv_instruction
*nvi
, int s
,
36 struct nv_instruction
*ld
)
40 if (ld
->opcode
== NV_OP_MOV
&& ld
->src
[0]->value
->reg
.file
== NV_FILE_IMM
) {
41 if (s
> 1 || !(nvc0_op_info_table
[nvi
->opcode
].immediate
& (1 << s
)))
43 if (!(nvc0_op_info_table
[nvi
->opcode
].immediate
& 4))
44 if (ld
->src
[0]->value
->reg
.imm
.u32
& 0xfff)
47 if (!(nvc0_op_info_table
[nvi
->opcode
].memory
& (1 << s
)))
50 if (ld
->indirect
>= 0)
53 /* a few ops can use g[] sources directly, but we don't support g[] yet */
54 if (ld
->src
[0]->value
->reg
.file
== NV_FILE_MEM_L
||
55 ld
->src
[0]->value
->reg
.file
== NV_FILE_MEM_G
)
58 for (i
= 0; i
< 3 && nvi
->src
[i
]; ++i
)
59 if (nvi
->src
[i
]->value
->reg
.file
== NV_FILE_IMM
)
65 /* Return whether this instruction can be executed conditionally. */
67 nvc0_insn_is_predicateable(struct nv_instruction
*nvi
)
69 if (nvi
->predicate
>= 0) /* already predicated */
71 if (!nvc0_op_info_table
[nvi
->opcode
].predicate
&&
72 !nvc0_op_info_table
[nvi
->opcode
].pseudo
)
78 nvc0_insn_refcount(struct nv_instruction
*nvi
)
82 for (i
= 0; i
< 5 && nvi
->def
[i
]; ++i
) {
85 rc
+= nvi
->def
[i
]->refc
;
91 nvc0_pc_replace_value(struct nv_pc
*pc
,
92 struct nv_value
*old_val
,
93 struct nv_value
*new_val
)
97 if (old_val
== new_val
)
100 for (i
= 0, n
= 0; i
< pc
->num_refs
; ++i
) {
101 if (pc
->refs
[i
]->value
== old_val
) {
103 for (s
= 0; s
< 6 && pc
->refs
[i
]->insn
->src
[s
]; ++s
)
104 if (pc
->refs
[i
]->insn
->src
[s
] == pc
->refs
[i
])
107 nv_reference(pc
, pc
->refs
[i
]->insn
, s
, new_val
);
113 static INLINE boolean
114 is_gpr63(struct nv_value
*val
)
116 return (val
->reg
.file
== NV_FILE_GPR
&& val
->reg
.id
== 63);
120 nvc0_pc_find_constant(struct nv_ref
*ref
)
122 struct nv_value
*src
;
128 while (src
->insn
&& src
->insn
->opcode
== NV_OP_MOV
) {
129 assert(!src
->insn
->src
[0]->mod
);
130 src
= src
->insn
->src
[0]->value
;
132 if ((src
->reg
.file
== NV_FILE_IMM
) || is_gpr63(src
) ||
134 src
->insn
->opcode
== NV_OP_LD
&&
135 src
->insn
->src
[0]->value
->reg
.file
>= NV_FILE_MEM_C(0) &&
136 src
->insn
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_C(15)))
142 nvc0_pc_find_immediate(struct nv_ref
*ref
)
144 struct nv_value
*src
= nvc0_pc_find_constant(ref
);
146 return (src
&& (src
->reg
.file
== NV_FILE_IMM
|| is_gpr63(src
))) ? src
: NULL
;
150 nv_pc_free_refs(struct nv_pc
*pc
)
153 for (i
= 0; i
< pc
->num_refs
; i
+= 64)
159 edge_name(ubyte type
)
162 case CFG_EDGE_FORWARD
: return "forward";
163 case CFG_EDGE_BACK
: return "back";
164 case CFG_EDGE_LOOP_ENTER
: return "loop";
165 case CFG_EDGE_LOOP_LEAVE
: return "break";
166 case CFG_EDGE_FAKE
: return "fake";
173 nvc0_pc_pass_in_order(struct nv_basic_block
*root
, nv_pc_pass_func f
,
176 struct nv_basic_block
*bb
[64], *bbb
[16], *b
;
187 for (j
= 1; j
>= 0; --j
) {
191 switch (b
->out_kind
[j
]) {
194 case CFG_EDGE_FORWARD
:
196 if (++b
->out
[j
]->priv
== b
->out
[j
]->num_in
)
199 case CFG_EDGE_LOOP_ENTER
:
202 case CFG_EDGE_LOOP_LEAVE
:
203 if (!b
->out
[j
]->priv
) {
204 bbb
[pp
++] = b
->out
[j
];
219 bb
[pp
- 1] = bbb
[pp
- 1];
225 nv_do_print_function(void *priv
, struct nv_basic_block
*b
)
227 struct nv_instruction
*i
;
229 debug_printf("=== BB %i ", b
->id
);
231 debug_printf("[%s -> %i] ", edge_name(b
->out_kind
[0]), b
->out
[0]->id
);
233 debug_printf("[%s -> %i] ", edge_name(b
->out_kind
[1]), b
->out
[1]->id
);
234 debug_printf("===\n");
239 for (; i
; i
= i
->next
)
240 nvc0_print_instruction(i
);
244 nvc0_print_function(struct nv_basic_block
*root
)
246 if (root
->subroutine
)
247 debug_printf("SUBROUTINE %i\n", root
->subroutine
);
249 debug_printf("MAIN\n");
251 nvc0_pc_pass_in_order(root
, nv_do_print_function
, root
);
255 nvc0_print_program(struct nv_pc
*pc
)
258 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
260 nvc0_print_function(pc
->root
[i
]);
263 #if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
265 nv_do_print_cfgraph(struct nv_pc
*pc
, FILE *f
, struct nv_basic_block
*b
)
269 b
->pass_seq
= pc
->pass_seq
;
271 fprintf(f
, "\t%i [shape=box]\n", b
->id
);
273 for (i
= 0; i
< 2; ++i
) {
276 switch (b
->out_kind
[i
]) {
277 case CFG_EDGE_FORWARD
:
278 fprintf(f
, "\t%i -> %i;\n", b
->id
, b
->out
[i
]->id
);
280 case CFG_EDGE_LOOP_ENTER
:
281 fprintf(f
, "\t%i -> %i [color=green];\n", b
->id
, b
->out
[i
]->id
);
283 case CFG_EDGE_LOOP_LEAVE
:
284 fprintf(f
, "\t%i -> %i [color=red];\n", b
->id
, b
->out
[i
]->id
);
287 fprintf(f
, "\t%i -> %i;\n", b
->id
, b
->out
[i
]->id
);
290 fprintf(f
, "\t%i -> %i [style=dotted];\n", b
->id
, b
->out
[i
]->id
);
296 if (b
->out
[i
]->pass_seq
< pc
->pass_seq
)
297 nv_do_print_cfgraph(pc
, f
, b
->out
[i
]);
301 /* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
303 nv_print_cfgraph(struct nv_pc
*pc
, const char *filepath
, int subr
)
307 f
= fopen(filepath
, "a");
311 fprintf(f
, "digraph G {\n");
315 nv_do_print_cfgraph(pc
, f
, pc
->root
[subr
]);
324 nvc0_pc_print_binary(struct nv_pc
*pc
)
328 NV50_DBGMSG(SHADER
, "nvc0_pc_print_binary(%u ops)\n", pc
->emit_size
/ 8);
330 for (i
= 0; i
< pc
->emit_size
/ 4; i
+= 2) {
331 debug_printf("0x%08x ", pc
->emit
[i
+ 0]);
332 debug_printf("0x%08x ", pc
->emit
[i
+ 1]);
340 nvc0_emit_program(struct nv_pc
*pc
)
342 uint32_t *code
= pc
->emit
;
345 NV50_DBGMSG(SHADER
, "emitting program: size = %u\n", pc
->emit_size
);
348 for (n
= 0; n
< pc
->num_blocks
; ++n
) {
349 struct nv_instruction
*i
;
350 struct nv_basic_block
*b
= pc
->bb_list
[n
];
352 for (i
= b
->entry
; i
; i
= i
->next
) {
353 nvc0_emit_instruction(pc
, i
);
358 assert(pc
->emit
== &code
[pc
->emit_size
/ 4]);
360 pc
->emit
[0] = 0x00001de7;
361 pc
->emit
[1] = 0x80000000;
366 #if NV50_DEBUG & NV50_DEBUG_SHADER
367 nvc0_pc_print_binary(pc
);
374 nvc0_generate_code(struct nvc0_translation_info
*ti
)
380 pc
= CALLOC_STRUCT(nv_pc
);
384 pc
->is_fragprog
= ti
->prog
->type
== PIPE_SHADER_FRAGMENT
;
386 pc
->root
= CALLOC(ti
->num_subrs
+ 1, sizeof(pc
->root
[0]));
391 pc
->num_subroutines
= ti
->num_subrs
;
393 ret
= nvc0_tgsi_to_nc(pc
, ti
);
396 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
397 nvc0_print_program(pc
);
400 pc
->opt_reload_elim
= ti
->require_stores
? FALSE
: TRUE
;
403 ret
= nvc0_pc_exec_pass0(pc
);
406 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
407 nvc0_print_program(pc
);
410 /* register allocation */
411 ret
= nvc0_pc_exec_pass1(pc
);
414 #if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
415 nvc0_print_program(pc
);
416 nv_print_cfgraph(pc
, "nvc0_shader_cfgraph.dot", 0);
419 /* prepare for emission */
420 ret
= nvc0_pc_exec_pass2(pc
);
423 assert(!(pc
->emit_size
% 8));
425 pc
->emit
= CALLOC(pc
->emit_size
/ 4 + 2, 4);
430 ret
= nvc0_emit_program(pc
);
434 ti
->prog
->code
= pc
->emit
;
435 ti
->prog
->code_base
= 0;
436 ti
->prog
->code_size
= pc
->emit_size
;
437 ti
->prog
->parm_size
= 0;
439 ti
->prog
->max_gpr
= MAX2(4, pc
->max_reg
[NV_FILE_GPR
] + 1);
441 ti
->prog
->relocs
= pc
->reloc_entries
;
442 ti
->prog
->num_relocs
= pc
->num_relocs
;
444 NV50_DBGMSG(SHADER
, "SHADER TRANSLATION - %s\n", ret
? "failed" : "success");
449 for (i
= 0; i
< pc
->num_blocks
; ++i
)
450 FREE(pc
->bb_list
[i
]);
454 /* on success, these will be referenced by struct nvc0_program */
459 if (pc
->reloc_entries
)
460 FREE(pc
->reloc_entries
);
467 nvbb_insert_phi(struct nv_basic_block
*b
, struct nv_instruction
*i
)
474 assert(!b
->entry
->prev
&& b
->exit
);
482 if (b
->entry
->opcode
== NV_OP_PHI
) { /* insert after entry */
483 assert(b
->entry
== b
->exit
);
488 } else { /* insert before entry */
489 assert(b
->entry
->prev
&& b
->exit
);
491 i
->prev
= b
->entry
->prev
;
499 nvc0_insn_append(struct nv_basic_block
*b
, struct nv_instruction
*i
)
501 if (i
->opcode
== NV_OP_PHI
) {
502 nvbb_insert_phi(b
, i
);
511 if (i
->prev
&& i
->prev
->opcode
== NV_OP_PHI
)
516 b
->num_instructions
++;
518 if (i
->prev
&& i
->prev
->terminator
)
519 nvc0_insns_permute(i
->prev
, i
);
523 nvc0_insn_insert_after(struct nv_instruction
*at
, struct nv_instruction
*ni
)
526 nvc0_insn_append(at
->bb
, ni
);
534 ni
->bb
->num_instructions
++;
538 nvc0_insn_insert_before(struct nv_instruction
*at
, struct nv_instruction
*ni
)
540 nvc0_insn_insert_after(at
, ni
);
541 nvc0_insns_permute(at
, ni
);
545 nvc0_insn_delete(struct nv_instruction
*nvi
)
547 struct nv_basic_block
*b
= nvi
->bb
;
550 /* debug_printf("REM: "); nv_print_instruction(nvi); */
552 for (s
= 0; s
< 6 && nvi
->src
[s
]; ++s
)
553 nv_reference(NULL
, nvi
, s
, NULL
);
556 nvi
->next
->prev
= nvi
->prev
;
558 assert(nvi
== b
->exit
);
563 nvi
->prev
->next
= nvi
->next
;
565 if (nvi
== b
->entry
) {
566 /* PHIs don't get hooked to b->entry */
567 b
->entry
= nvi
->next
;
568 assert(!nvi
->prev
|| nvi
->prev
->opcode
== NV_OP_PHI
);
572 if (nvi
->opcode
!= NV_OP_PHI
)
573 NV50_DBGMSG(PROG_IR
, "NOTE: b->phi points to non-PHI instruction\n");
576 if (!nvi
->next
|| nvi
->next
->opcode
!= NV_OP_PHI
)
584 nvc0_insns_permute(struct nv_instruction
*i1
, struct nv_instruction
*i2
)
586 struct nv_basic_block
*b
= i1
->bb
;
588 assert(i1
->opcode
!= NV_OP_PHI
&&
589 i2
->opcode
!= NV_OP_PHI
);
590 assert(i1
->next
== i2
);
610 nvc0_bblock_attach(struct nv_basic_block
*parent
,
611 struct nv_basic_block
*b
, ubyte edge_kind
)
613 assert(b
->num_in
< 8);
615 if (parent
->out
[0]) {
616 assert(!parent
->out
[1]);
618 parent
->out_kind
[1] = edge_kind
;
621 parent
->out_kind
[0] = edge_kind
;
624 b
->in
[b
->num_in
] = parent
;
625 b
->in_kind
[b
->num_in
++] = edge_kind
;
628 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
631 nvc0_bblock_dominated_by(struct nv_basic_block
*b
, struct nv_basic_block
*d
)
638 for (j
= 0; j
< b
->num_in
; ++j
)
639 if ((b
->in_kind
[j
] != CFG_EDGE_BACK
) &&
640 !nvc0_bblock_dominated_by(b
->in
[j
], d
))
643 return j
? TRUE
: FALSE
;
646 /* check if @bf (future) can be reached from @bp (past), stop at @bt */
648 nvc0_bblock_reachable_by(struct nv_basic_block
*bf
, struct nv_basic_block
*bp
,
649 struct nv_basic_block
*bt
)
651 struct nv_basic_block
*q
[NV_PC_MAX_BASIC_BLOCKS
], *b
;
665 assert(n
<= (1024 - 2));
667 for (i
= 0; i
< 2; ++i
) {
668 if (b
->out
[i
] && !IS_WALL_EDGE(b
->out_kind
[i
]) && !b
->out
[i
]->priv
) {
674 for (--n
; n
>= 0; --n
)
680 static struct nv_basic_block
*
681 nvbb_find_dom_frontier(struct nv_basic_block
*b
, struct nv_basic_block
*df
)
683 struct nv_basic_block
*out
;
686 if (!nvc0_bblock_dominated_by(df
, b
)) {
687 for (i
= 0; i
< df
->num_in
; ++i
) {
688 if (df
->in_kind
[i
] == CFG_EDGE_BACK
)
690 if (nvc0_bblock_dominated_by(df
->in
[i
], b
))
694 for (i
= 0; i
< 2 && df
->out
[i
]; ++i
) {
695 if (df
->out_kind
[i
] == CFG_EDGE_BACK
)
697 if ((out
= nvbb_find_dom_frontier(b
, df
->out
[i
])))
703 struct nv_basic_block
*
704 nvc0_bblock_dom_frontier(struct nv_basic_block
*b
)
706 struct nv_basic_block
*df
;
709 for (i
= 0; i
< 2 && b
->out
[i
]; ++i
)
710 if ((df
= nvbb_find_dom_frontier(b
, b
->out
[i
])))