revert 213 commits (to 56092) from the last month. 10 still need work to resolve...
[AROS.git] / workbench / libs / mesa / src / gallium / drivers / nv50 / nv50_pc.c
blob7900bf811dfe85d3259e8eb4acf7186aacee114f
1 /*
2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
23 #include "nv50_pc.h"
24 #include "nv50_program.h"
26 #include <stdio.h>
28 /* returns TRUE if operands 0 and 1 can be swapped */
29 boolean
30 nv_op_commutative(uint opcode)
32 switch (opcode) {
33 case NV_OP_ADD:
34 case NV_OP_MUL:
35 case NV_OP_MAD:
36 case NV_OP_AND:
37 case NV_OP_OR:
38 case NV_OP_XOR:
39 case NV_OP_MIN:
40 case NV_OP_MAX:
41 case NV_OP_SAD:
42 return TRUE;
43 default:
44 return FALSE;
48 /* return operand to which the address register applies */
49 int
50 nv50_indirect_opnd(struct nv_instruction *i)
52 if (!i->src[4])
53 return -1;
55 switch (i->opcode) {
56 case NV_OP_MOV:
57 case NV_OP_LDA:
58 case NV_OP_STA:
59 return 0;
60 default:
61 return 1;
65 boolean
66 nv50_nvi_can_use_imm(struct nv_instruction *nvi, int s)
68 if (nvi->flags_src || nvi->flags_def)
69 return FALSE;
71 switch (nvi->opcode) {
72 case NV_OP_ADD:
73 case NV_OP_MUL:
74 case NV_OP_AND:
75 case NV_OP_OR:
76 case NV_OP_XOR:
77 case NV_OP_SHL:
78 case NV_OP_SHR:
79 return (s == 1) && (nvi->src[0]->value->reg.file == NV_FILE_GPR) &&
80 (nvi->def[0]->reg.file == NV_FILE_GPR);
81 case NV_OP_MOV:
82 assert(s == 0);
83 return (nvi->def[0]->reg.file == NV_FILE_GPR);
84 default:
85 return FALSE;
89 boolean
90 nv50_nvi_can_load(struct nv_instruction *nvi, int s, struct nv_value *value)
92 int i;
94 for (i = 0; i < 3 && nvi->src[i]; ++i)
95 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
96 return FALSE;
98 switch (nvi->opcode) {
99 case NV_OP_ABS:
100 case NV_OP_ADD:
101 case NV_OP_CEIL:
102 case NV_OP_FLOOR:
103 case NV_OP_TRUNC:
104 case NV_OP_CVT:
105 case NV_OP_NEG:
106 case NV_OP_MAD:
107 case NV_OP_MUL:
108 case NV_OP_SAT:
109 case NV_OP_SUB:
110 case NV_OP_MAX:
111 case NV_OP_MIN:
112 if (s == 0 && (value->reg.file == NV_FILE_MEM_S ||
113 value->reg.file == NV_FILE_MEM_P))
114 return TRUE;
115 if (value->reg.file < NV_FILE_MEM_C(0) ||
116 value->reg.file > NV_FILE_MEM_C(15))
117 return FALSE;
118 return (s == 1) ||
119 ((s == 2) && (nvi->src[1]->value->reg.file == NV_FILE_GPR));
120 case NV_OP_MOV:
121 assert(s == 0);
122 return /* TRUE */ FALSE; /* don't turn MOVs into loads */
123 default:
124 return FALSE;
128 /* Return whether this instruction can be executed conditionally. */
129 boolean
130 nv50_nvi_can_predicate(struct nv_instruction *nvi)
132 int i;
134 if (nvi->flags_src)
135 return FALSE;
136 for (i = 0; i < 4 && nvi->src[i]; ++i)
137 if (nvi->src[i]->value->reg.file == NV_FILE_IMM)
138 return FALSE;
139 return TRUE;
142 ubyte
143 nv50_supported_src_mods(uint opcode, int s)
145 switch (opcode) {
146 case NV_OP_ABS:
147 return NV_MOD_NEG | NV_MOD_ABS; /* obviously */
148 case NV_OP_ADD:
149 case NV_OP_MUL:
150 case NV_OP_MAD:
151 return NV_MOD_NEG;
152 case NV_OP_DFDX:
153 case NV_OP_DFDY:
154 assert(s == 0);
155 return NV_MOD_NEG;
156 case NV_OP_MAX:
157 case NV_OP_MIN:
158 return NV_MOD_ABS;
159 case NV_OP_CVT:
160 case NV_OP_LG2:
161 case NV_OP_NEG:
162 case NV_OP_PREEX2:
163 case NV_OP_PRESIN:
164 case NV_OP_RCP:
165 case NV_OP_RSQ:
166 return NV_MOD_ABS | NV_MOD_NEG;
167 default:
168 return 0;
172 /* We may want an opcode table. */
173 boolean
174 nv50_op_can_write_flags(uint opcode)
176 if (nv_is_vector_op(opcode))
177 return FALSE;
178 switch (opcode) { /* obvious ones like KIL, CALL, etc. not included */
179 case NV_OP_PHI:
180 case NV_OP_MOV:
181 case NV_OP_SELECT:
182 case NV_OP_LINTERP:
183 case NV_OP_PINTERP:
184 case NV_OP_LDA:
185 return FALSE;
186 default:
187 break;
189 if (opcode >= NV_OP_RCP && opcode <= NV_OP_PREEX2)
190 return FALSE;
191 return TRUE;
195 nv_nvi_refcount(struct nv_instruction *nvi)
197 int i, rc;
199 rc = nvi->flags_def ? nvi->flags_def->refc : 0;
201 for (i = 0; i < 4; ++i) {
202 if (!nvi->def[i])
203 return rc;
204 rc += nvi->def[i]->refc;
206 return rc;
210 nvcg_replace_value(struct nv_pc *pc, struct nv_value *old_val,
211 struct nv_value *new_val)
213 int i, n;
215 if (old_val == new_val)
216 return old_val->refc;
218 for (i = 0, n = 0; i < pc->num_refs; ++i) {
219 if (pc->refs[i]->value == old_val) {
220 ++n;
221 nv_reference(pc, &pc->refs[i], new_val);
224 return n;
227 struct nv_value *
228 nvcg_find_constant(struct nv_ref *ref)
230 struct nv_value *src;
232 if (!ref)
233 return NULL;
235 src = ref->value;
236 while (src->insn && src->insn->opcode == NV_OP_MOV) {
237 assert(!src->insn->src[0]->mod);
238 src = src->insn->src[0]->value;
240 if ((src->reg.file == NV_FILE_IMM) ||
241 (src->insn && src->insn->opcode == NV_OP_LDA &&
242 src->insn->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
243 src->insn->src[0]->value->reg.file <= NV_FILE_MEM_C(15)))
244 return src;
245 return NULL;
248 struct nv_value *
249 nvcg_find_immediate(struct nv_ref *ref)
251 struct nv_value *src = nvcg_find_constant(ref);
253 return (src && src->reg.file == NV_FILE_IMM) ? src : NULL;
256 static void
257 nv_pc_free_refs(struct nv_pc *pc)
259 int i;
260 for (i = 0; i < pc->num_refs; i += 64)
261 FREE(pc->refs[i]);
262 FREE(pc->refs);
265 static const char *
266 edge_name(ubyte type)
268 switch (type) {
269 case CFG_EDGE_FORWARD: return "forward";
270 case CFG_EDGE_BACK: return "back";
271 case CFG_EDGE_LOOP_ENTER: return "loop";
272 case CFG_EDGE_LOOP_LEAVE: return "break";
273 case CFG_EDGE_FAKE: return "fake";
274 default:
275 return "?";
279 void
280 nv_pc_pass_in_order(struct nv_basic_block *root, nv_pc_pass_func f, void *priv)
282 struct nv_basic_block *bb[64], *bbb[16], *b;
283 int j, p, pp;
285 bb[0] = root;
286 p = 1;
287 pp = 0;
289 while (p > 0) {
290 b = bb[--p];
291 b->priv = 0;
293 for (j = 1; j >= 0; --j) {
294 if (!b->out[j])
295 continue;
297 switch (b->out_kind[j]) {
298 case CFG_EDGE_BACK:
299 continue;
300 case CFG_EDGE_FORWARD:
301 case CFG_EDGE_FAKE:
302 if (++b->out[j]->priv == b->out[j]->num_in)
303 bb[p++] = b->out[j];
304 break;
305 case CFG_EDGE_LOOP_ENTER:
306 bb[p++] = b->out[j];
307 break;
308 case CFG_EDGE_LOOP_LEAVE:
309 if (!b->out[j]->priv) {
310 bbb[pp++] = b->out[j];
311 b->out[j]->priv = 1;
313 break;
314 default:
315 assert(0);
316 break;
320 f(priv, b);
322 if (!p) {
323 p = pp;
324 for (; pp > 0; --pp)
325 bb[pp - 1] = bbb[pp - 1];
330 static void
331 nv_do_print_function(void *priv, struct nv_basic_block *b)
333 struct nv_instruction *i;
335 debug_printf("=== BB %i ", b->id);
336 if (b->out[0])
337 debug_printf("[%s -> %i] ", edge_name(b->out_kind[0]), b->out[0]->id);
338 if (b->out[1])
339 debug_printf("[%s -> %i] ", edge_name(b->out_kind[1]), b->out[1]->id);
340 debug_printf("===\n");
342 i = b->phi;
343 if (!i)
344 i = b->entry;
345 for (; i; i = i->next)
346 nv_print_instruction(i);
349 void
350 nv_print_function(struct nv_basic_block *root)
352 if (root->subroutine)
353 debug_printf("SUBROUTINE %i\n", root->subroutine);
354 else
355 debug_printf("MAIN\n");
357 nv_pc_pass_in_order(root, nv_do_print_function, root);
360 void
361 nv_print_program(struct nv_pc *pc)
363 int i;
364 for (i = 0; i < pc->num_subroutines + 1; ++i)
365 if (pc->root[i])
366 nv_print_function(pc->root[i]);
369 #if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
370 static void
371 nv_do_print_cfgraph(struct nv_pc *pc, FILE *f, struct nv_basic_block *b)
373 int i;
375 b->pass_seq = pc->pass_seq;
377 fprintf(f, "\t%i [shape=box]\n", b->id);
379 for (i = 0; i < 2; ++i) {
380 if (!b->out[i])
381 continue;
382 switch (b->out_kind[i]) {
383 case CFG_EDGE_FORWARD:
384 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
385 break;
386 case CFG_EDGE_LOOP_ENTER:
387 fprintf(f, "\t%i -> %i [color=green];\n", b->id, b->out[i]->id);
388 break;
389 case CFG_EDGE_LOOP_LEAVE:
390 fprintf(f, "\t%i -> %i [color=red];\n", b->id, b->out[i]->id);
391 break;
392 case CFG_EDGE_BACK:
393 fprintf(f, "\t%i -> %i;\n", b->id, b->out[i]->id);
394 continue;
395 case CFG_EDGE_FAKE:
396 fprintf(f, "\t%i -> %i [style=dotted];\n", b->id, b->out[i]->id);
397 break;
398 default:
399 assert(0);
400 break;
402 if (b->out[i]->pass_seq < pc->pass_seq)
403 nv_do_print_cfgraph(pc, f, b->out[i]);
407 /* Print the control flow graph of subroutine @subr (0 == MAIN) to a file. */
408 static void
409 nv_print_cfgraph(struct nv_pc *pc, const char *filepath, int subr)
411 FILE *f;
413 f = fopen(filepath, "a");
414 if (!f)
415 return;
417 fprintf(f, "digraph G {\n");
419 ++pc->pass_seq;
421 nv_do_print_cfgraph(pc, f, pc->root[subr]);
423 fprintf(f, "}\n");
425 fclose(f);
427 #endif /* NV50_DEBUG_PROG_CFLOW */
429 static INLINE void
430 nvcg_show_bincode(struct nv_pc *pc)
432 unsigned i;
434 for (i = 0; i < pc->bin_size / 4; ++i) {
435 debug_printf("0x%08x ", pc->emit[i]);
436 if ((i % 16) == 15)
437 debug_printf("\n");
439 debug_printf("\n");
442 static int
443 nv50_emit_program(struct nv_pc *pc)
445 uint32_t *code = pc->emit;
446 int n;
448 NV50_DBGMSG(SHADER, "emitting program: size = %u\n", pc->bin_size);
450 for (n = 0; n < pc->num_blocks; ++n) {
451 struct nv_instruction *i;
452 struct nv_basic_block *b = pc->bb_list[n];
454 for (i = b->entry; i; i = i->next) {
455 nv50_emit_instruction(pc, i);
457 pc->bin_pos += 1 + (pc->emit[0] & 1);
458 pc->emit += 1 + (pc->emit[0] & 1);
461 assert(pc->emit == &code[pc->bin_size / 4]);
463 /* XXX: we can do better than this ... */
464 if (!pc->bin_size ||
465 !(pc->emit[-2] & 1) || (pc->emit[-2] & 2) || (pc->emit[-1] & 3)) {
466 pc->emit[0] = 0xf0000001;
467 pc->emit[1] = 0xe0000000;
468 pc->bin_size += 8;
471 pc->emit = code;
472 code[pc->bin_size / 4 - 1] |= 1;
474 #if NV50_DEBUG & NV50_DEBUG_SHADER
475 nvcg_show_bincode(pc);
476 #endif
478 return 0;
482 nv50_generate_code(struct nv50_translation_info *ti)
484 struct nv_pc *pc;
485 int ret;
486 int i;
488 pc = CALLOC_STRUCT(nv_pc);
489 if (!pc)
490 return 1;
492 pc->root = CALLOC(ti->subr_nr + 1, sizeof(pc->root[0]));
493 if (!pc->root) {
494 FREE(pc);
495 return 1;
497 pc->num_subroutines = ti->subr_nr;
499 ret = nv50_tgsi_to_nc(pc, ti);
500 if (ret)
501 goto out;
502 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
503 nv_print_program(pc);
504 #endif
506 pc->opt_reload_elim = ti->store_to_memory ? FALSE : TRUE;
508 /* optimization */
509 ret = nv_pc_exec_pass0(pc);
510 if (ret)
511 goto out;
512 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
513 nv_print_program(pc);
514 #endif
516 /* register allocation */
517 ret = nv_pc_exec_pass1(pc);
518 if (ret)
519 goto out;
520 #if NV50_DEBUG & NV50_DEBUG_PROG_CFLOW
521 nv_print_program(pc);
522 nv_print_cfgraph(pc, "nv50_shader_cfgraph.dot", 0);
523 #endif
525 /* prepare for emission */
526 ret = nv_pc_exec_pass2(pc);
527 if (ret)
528 goto out;
529 assert(!(pc->bin_size % 8));
531 pc->emit = CALLOC(pc->bin_size / 4 + 2, 4);
532 if (!pc->emit) {
533 ret = 3;
534 goto out;
536 ret = nv50_emit_program(pc);
537 if (ret)
538 goto out;
540 ti->p->code_size = pc->bin_size;
541 ti->p->code = pc->emit;
543 ti->p->immd_size = pc->immd_count * 4;
544 ti->p->immd = pc->immd_buf;
546 /* highest 16 bit reg to num of 32 bit regs, limit to >= 4 */
547 ti->p->max_gpr = MAX2(4, (pc->max_reg[NV_FILE_GPR] >> 1) + 1);
549 ti->p->fixups = pc->fixups;
550 ti->p->num_fixups = pc->num_fixups;
552 ti->p->uses_lmem = ti->store_to_memory;
554 NV50_DBGMSG(SHADER, "SHADER TRANSLATION - %s\n", ret ? "failed" : "success");
556 out:
557 nv_pc_free_refs(pc);
559 for (i = 0; i < pc->num_blocks; ++i)
560 FREE(pc->bb_list[i]);
561 if (pc->root)
562 FREE(pc->root);
563 if (ret) { /* on success, these will be referenced by nv50_program */
564 if (pc->emit)
565 FREE(pc->emit);
566 if (pc->immd_buf)
567 FREE(pc->immd_buf);
568 if (pc->fixups)
569 FREE(pc->fixups);
571 FREE(pc);
572 return ret;
575 static void
576 nvbb_insert_phi(struct nv_basic_block *b, struct nv_instruction *i)
578 if (!b->phi) {
579 i->prev = NULL;
580 b->phi = i;
581 i->next = b->entry;
582 if (b->entry) {
583 assert(!b->entry->prev && b->exit);
584 b->entry->prev = i;
585 } else {
586 b->entry = i;
587 b->exit = i;
589 } else {
590 assert(b->entry);
591 if (b->entry->opcode == NV_OP_PHI) { /* insert after entry */
592 assert(b->entry == b->exit);
593 b->entry->next = i;
594 i->prev = b->entry;
595 b->entry = i;
596 b->exit = i;
597 } else { /* insert before entry */
598 assert(b->entry->prev && b->exit);
599 i->next = b->entry;
600 i->prev = b->entry->prev;
601 b->entry->prev = i;
602 i->prev->next = i;
607 void
608 nvbb_insert_tail(struct nv_basic_block *b, struct nv_instruction *i)
610 if (i->opcode == NV_OP_PHI) {
611 nvbb_insert_phi(b, i);
612 } else {
613 i->prev = b->exit;
614 if (b->exit)
615 b->exit->next = i;
616 b->exit = i;
617 if (!b->entry)
618 b->entry = i;
619 else
620 if (i->prev && i->prev->opcode == NV_OP_PHI)
621 b->entry = i;
624 i->bb = b;
625 b->num_instructions++;
627 if (i->prev && i->prev->is_terminator)
628 nv_nvi_permute(i->prev, i);
631 void
632 nvi_insert_after(struct nv_instruction *at, struct nv_instruction *ni)
634 if (!at->next) {
635 nvbb_insert_tail(at->bb, ni);
636 return;
638 ni->next = at->next;
639 ni->prev = at;
640 ni->next->prev = ni;
641 ni->prev->next = ni;
644 void
645 nv_nvi_delete(struct nv_instruction *nvi)
647 struct nv_basic_block *b = nvi->bb;
648 int j;
650 /* debug_printf("REM: "); nv_print_instruction(nvi); */
652 for (j = 0; j < 5; ++j)
653 nv_reference(NULL, &nvi->src[j], NULL);
654 nv_reference(NULL, &nvi->flags_src, NULL);
656 if (nvi->next)
657 nvi->next->prev = nvi->prev;
658 else {
659 assert(nvi == b->exit);
660 b->exit = nvi->prev;
663 if (nvi->prev)
664 nvi->prev->next = nvi->next;
666 if (nvi == b->entry) {
667 /* PHIs don't get hooked to b->entry */
668 b->entry = nvi->next;
669 assert(!nvi->prev || nvi->prev->opcode == NV_OP_PHI);
672 if (nvi == b->phi) {
673 if (nvi->opcode != NV_OP_PHI)
674 NV50_DBGMSG(PROG_IR, "NOTE: b->phi points to non-PHI instruction\n");
676 assert(!nvi->prev);
677 if (!nvi->next || nvi->next->opcode != NV_OP_PHI)
678 b->phi = NULL;
679 else
680 b->phi = nvi->next;
684 void
685 nv_nvi_permute(struct nv_instruction *i1, struct nv_instruction *i2)
687 struct nv_basic_block *b = i1->bb;
689 assert(i1->opcode != NV_OP_PHI &&
690 i2->opcode != NV_OP_PHI);
691 assert(i1->next == i2);
693 if (b->exit == i2)
694 b->exit = i1;
696 if (b->entry == i1)
697 b->entry = i2;
699 i2->prev = i1->prev;
700 i1->next = i2->next;
701 i2->next = i1;
702 i1->prev = i2;
704 if (i2->prev)
705 i2->prev->next = i2;
706 if (i1->next)
707 i1->next->prev = i1;
710 void
711 nvbb_attach_block(struct nv_basic_block *parent,
712 struct nv_basic_block *b, ubyte edge_kind)
714 assert(b->num_in < 8);
716 if (parent->out[0]) {
717 assert(!parent->out[1]);
718 parent->out[1] = b;
719 parent->out_kind[1] = edge_kind;
720 } else {
721 parent->out[0] = b;
722 parent->out_kind[0] = edge_kind;
725 b->in[b->num_in] = parent;
726 b->in_kind[b->num_in++] = edge_kind;
729 /* NOTE: all BRKs are treated as conditional, so there are 2 outgoing BBs */
731 boolean
732 nvbb_dominated_by(struct nv_basic_block *b, struct nv_basic_block *d)
734 int j;
736 if (b == d)
737 return TRUE;
739 for (j = 0; j < b->num_in; ++j)
740 if ((b->in_kind[j] != CFG_EDGE_BACK) && !nvbb_dominated_by(b->in[j], d))
741 return FALSE;
743 return j ? TRUE : FALSE;
746 /* check if @bf (future) can be reached from @bp (past), stop at @bt */
747 boolean
748 nvbb_reachable_by(struct nv_basic_block *bf, struct nv_basic_block *bp,
749 struct nv_basic_block *bt)
751 struct nv_basic_block *q[NV_PC_MAX_BASIC_BLOCKS], *b;
752 int i, p, n;
754 p = 0;
755 n = 1;
756 q[0] = bp;
758 while (p < n) {
759 b = q[p++];
761 if (b == bf)
762 break;
763 if (b == bt)
764 continue;
765 assert(n <= (1024 - 2));
767 for (i = 0; i < 2; ++i) {
768 if (b->out[i] && !IS_WALL_EDGE(b->out_kind[i]) && !b->out[i]->priv) {
769 q[n] = b->out[i];
770 q[n++]->priv = 1;
774 for (--n; n >= 0; --n)
775 q[n]->priv = 0;
777 return (b == bf);
780 static struct nv_basic_block *
781 nvbb_find_dom_frontier(struct nv_basic_block *b, struct nv_basic_block *df)
783 struct nv_basic_block *out;
784 int i;
786 if (!nvbb_dominated_by(df, b)) {
787 for (i = 0; i < df->num_in; ++i) {
788 if (df->in_kind[i] == CFG_EDGE_BACK)
789 continue;
790 if (nvbb_dominated_by(df->in[i], b))
791 return df;
794 for (i = 0; i < 2 && df->out[i]; ++i) {
795 if (df->out_kind[i] == CFG_EDGE_BACK)
796 continue;
797 if ((out = nvbb_find_dom_frontier(b, df->out[i])))
798 return out;
800 return NULL;
803 struct nv_basic_block *
804 nvbb_dom_frontier(struct nv_basic_block *b)
806 struct nv_basic_block *df;
807 int i;
809 for (i = 0; i < 2 && b->out[i]; ++i)
810 if ((df = nvbb_find_dom_frontier(b, b->out[i])))
811 return df;
812 return NULL;