revert 213 commits (to 56092) from the last month. 10 still need work to resolve...
[AROS.git] / workbench / libs / mesa / src / gallium / drivers / nvc0 / nvc0_pc_optimize.c
bloba1ef6ba163b75363de4d8b5080edd470b7cabfd7
1 /*
2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
23 #include "nvc0_pc.h"
24 #include "nvc0_program.h"
26 #define DESCEND_ARBITRARY(j, f) \
27 do { \
28 b->pass_seq = ctx->pc->pass_seq; \
30 for (j = 0; j < 2; ++j) \
31 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
32 f(ctx, b->out[j]); \
33 } while (0)
35 static INLINE boolean
36 registers_interfere(struct nv_value *a, struct nv_value *b)
38 if (a->reg.file != b->reg.file)
39 return FALSE;
40 if (NV_IS_MEMORY_FILE(a->reg.file) || NV_IS_MEMORY_FILE(b->reg.file))
41 return FALSE;
43 assert(a->join->reg.id >= 0 && b->join->reg.id >= 0);
45 if (a->join->reg.id < b->join->reg.id) {
46 return (a->join->reg.id + a->reg.size >= b->join->reg.id);
47 } else
48 if (a->join->reg.id > b->join->reg.id) {
49 return (b->join->reg.id + b->reg.size >= a->join->reg.id);
52 return FALSE;
55 static INLINE boolean
56 values_equal(struct nv_value *a, struct nv_value *b)
58 if (a->reg.file != b->reg.file || a->reg.size != b->reg.size)
59 return FALSE;
60 if (NV_IS_MEMORY_FILE(a->reg.file))
61 return a->reg.address == b->reg.address;
62 else
63 return a->join->reg.id == b->join->reg.id;
66 #if 0
67 static INLINE boolean
68 inst_commutation_check(struct nv_instruction *a, struct nv_instruction *b)
70 int si, di;
72 for (di = 0; di < 4 && a->def[di]; ++di)
73 for (si = 0; si < 5 && b->src[si]; ++si)
74 if (registers_interfere(a->def[di], b->src[si]->value))
75 return FALSE;
77 return TRUE;
80 /* Check whether we can swap the order of the instructions,
81 * where a & b may be either the earlier or the later one.
83 static boolean
84 inst_commutation_legal(struct nv_instruction *a, struct nv_instruction *b)
86 return inst_commutation_check(a, b) && inst_commutation_check(b, a);
88 #endif
90 static INLINE boolean
91 inst_removable(struct nv_instruction *nvi)
93 if (nvi->opcode == NV_OP_ST)
94 return FALSE;
95 return (!(nvi->terminator ||
96 nvi->join ||
97 nvi->target ||
98 nvi->fixed ||
99 nvc0_insn_refcount(nvi)));
102 /* Check if we do not actually have to emit this instruction. */
103 static INLINE boolean
104 inst_is_noop(struct nv_instruction *nvi)
106 if (nvi->opcode == NV_OP_UNDEF || nvi->opcode == NV_OP_BIND)
107 return TRUE;
108 if (nvi->terminator || nvi->join)
109 return FALSE;
110 if (nvi->def[0] && nvi->def[0]->join->reg.id < 0)
111 return TRUE;
112 if (nvi->opcode != NV_OP_MOV && nvi->opcode != NV_OP_SELECT)
113 return FALSE;
114 if (nvi->def[0]->reg.file != nvi->src[0]->value->reg.file)
115 return FALSE;
117 if (nvi->src[0]->value->join->reg.id < 0) {
118 NV50_DBGMSG(PROG_IR, "inst_is_noop: orphaned value detected\n");
119 return TRUE;
122 if (nvi->opcode == NV_OP_SELECT)
123 if (!values_equal(nvi->def[0], nvi->src[1]->value))
124 return FALSE;
125 return values_equal(nvi->def[0], nvi->src[0]->value);
128 struct nv_pass {
129 struct nv_pc *pc;
130 int n;
131 void *priv;
134 static int
135 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b);
137 static void
138 nv_pc_pass_pre_emission(void *priv, struct nv_basic_block *b)
140 struct nv_pc *pc = (struct nv_pc *)priv;
141 struct nv_basic_block *in;
142 struct nv_instruction *nvi, *next;
143 int j;
145 /* find first non-empty block emitted before b */
146 for (j = pc->num_blocks - 1; j >= 0 && !pc->bb_list[j]->emit_size; --j);
148 for (; j >= 0; --j) {
149 in = pc->bb_list[j];
151 /* check for no-op branches (BRA $PC+8) */
152 if (in->exit && in->exit->opcode == NV_OP_BRA && in->exit->target == b) {
153 in->emit_size -= 8;
154 pc->emit_size -= 8;
156 for (++j; j < pc->num_blocks; ++j)
157 pc->bb_list[j]->emit_pos -= 8;
159 nvc0_insn_delete(in->exit);
161 b->emit_pos = in->emit_pos + in->emit_size;
163 if (in->emit_size) /* no more no-op branches to b */
164 break;
167 pc->bb_list[pc->num_blocks++] = b;
169 /* visit node */
171 for (nvi = b->entry; nvi; nvi = next) {
172 next = nvi->next;
173 if (inst_is_noop(nvi) ||
174 (pc->is_fragprog && nvi->opcode == NV_OP_EXPORT)) {
175 nvc0_insn_delete(nvi);
176 } else
177 b->emit_size += 8;
179 pc->emit_size += b->emit_size;
181 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
182 if (!b->entry)
183 debug_printf("BB:%i is now empty\n", b->id);
184 else
185 debug_printf("BB:%i size = %u\n", b->id, b->emit_size);
186 #endif
189 static int
190 nv_pc_pass2(struct nv_pc *pc, struct nv_basic_block *root)
192 struct nv_pass pass;
194 pass.pc = pc;
196 pc->pass_seq++;
197 nv_pass_flatten(&pass, root);
199 nvc0_pc_pass_in_order(root, nv_pc_pass_pre_emission, pc);
201 return 0;
205 nvc0_pc_exec_pass2(struct nv_pc *pc)
207 int i, ret;
209 NV50_DBGMSG(PROG_IR, "preparing %u blocks for emission\n", pc->num_blocks);
211 pc->num_blocks = 0; /* will reorder bb_list */
213 for (i = 0; i < pc->num_subroutines + 1; ++i)
214 if (pc->root[i] && (ret = nv_pc_pass2(pc, pc->root[i])))
215 return ret;
216 return 0;
219 static INLINE boolean
220 is_cspace_load(struct nv_instruction *nvi)
222 if (!nvi)
223 return FALSE;
224 assert(nvi->indirect != 0);
225 return (nvi->opcode == NV_OP_LD &&
226 nvi->src[0]->value->reg.file >= NV_FILE_MEM_C(0) &&
227 nvi->src[0]->value->reg.file <= NV_FILE_MEM_C(15));
230 static INLINE boolean
231 is_immd32_load(struct nv_instruction *nvi)
233 if (!nvi)
234 return FALSE;
235 return (nvi->opcode == NV_OP_MOV &&
236 nvi->src[0]->value->reg.file == NV_FILE_IMM &&
237 nvi->src[0]->value->reg.size == 4);
240 static INLINE void
241 check_swap_src_0_1(struct nv_instruction *nvi)
243 struct nv_ref *src0 = nvi->src[0];
244 struct nv_ref *src1 = nvi->src[1];
246 if (!nv_op_commutative(nvi->opcode) &&
247 NV_BASEOP(nvi->opcode) != NV_OP_SET &&
248 NV_BASEOP(nvi->opcode) != NV_OP_SLCT)
249 return;
250 assert(src0 && src1 && src0->value && src1->value);
252 if (src1->value->reg.file != NV_FILE_GPR)
253 return;
255 if (is_cspace_load(src0->value->insn)) {
256 if (!is_cspace_load(src1->value->insn)) {
257 nvi->src[0] = src1;
258 nvi->src[1] = src0;
260 } else
261 if (is_immd32_load(src0->value->insn)) {
262 if (!is_cspace_load(src1->value->insn) &&
263 !is_immd32_load(src1->value->insn)) {
264 nvi->src[0] = src1;
265 nvi->src[1] = src0;
269 if (nvi->src[0] != src0) {
270 if (NV_BASEOP(nvi->opcode) == NV_OP_SET)
271 nvi->set_cond = nvc0_ir_reverse_cc(nvi->set_cond);
272 else
273 if (NV_BASEOP(nvi->opcode) == NV_OP_SLCT)
274 nvi->set_cond = NV_CC_INVERSE(nvi->set_cond);
278 static void
279 nvi_set_indirect_load(struct nv_pc *pc,
280 struct nv_instruction *nvi, struct nv_value *val)
282 for (nvi->indirect = 0; nvi->indirect < 6 && nvi->src[nvi->indirect];
283 ++nvi->indirect);
284 assert(nvi->indirect < 6);
285 nv_reference(pc, nvi, nvi->indirect, val);
288 static int
289 nvc0_pass_fold_loads(struct nv_pass *ctx, struct nv_basic_block *b)
291 struct nv_instruction *nvi, *ld;
292 int s;
294 for (nvi = b->entry; nvi; nvi = nvi->next) {
295 check_swap_src_0_1(nvi);
297 for (s = 0; s < 3 && nvi->src[s]; ++s) {
298 ld = nvi->src[s]->value->insn;
299 if (!ld || (ld->opcode != NV_OP_LD && ld->opcode != NV_OP_MOV))
300 continue;
301 if (!nvc0_insn_can_load(nvi, s, ld))
302 continue;
304 /* fold it ! */
305 nv_reference(ctx->pc, nvi, s, ld->src[0]->value);
306 if (ld->indirect >= 0)
307 nvi_set_indirect_load(ctx->pc, nvi, ld->src[ld->indirect]->value);
309 if (!nvc0_insn_refcount(ld))
310 nvc0_insn_delete(ld);
313 DESCEND_ARBITRARY(s, nvc0_pass_fold_loads);
315 return 0;
318 /* NOTE: Assumes loads have not yet been folded. */
319 static int
320 nv_pass_lower_mods(struct nv_pass *ctx, struct nv_basic_block *b)
322 struct nv_instruction *nvi, *mi, *next;
323 int j;
324 uint8_t mod;
326 for (nvi = b->entry; nvi; nvi = next) {
327 next = nvi->next;
328 if (nvi->opcode == NV_OP_SUB) {
329 nvi->src[1]->mod ^= NV_MOD_NEG;
330 nvi->opcode = NV_OP_ADD;
333 for (j = 0; j < 3 && nvi->src[j]; ++j) {
334 mi = nvi->src[j]->value->insn;
335 if (!mi)
336 continue;
337 if (mi->def[0]->refc > 1 || mi->predicate >= 0)
338 continue;
340 if (NV_BASEOP(mi->opcode) == NV_OP_NEG) mod = NV_MOD_NEG;
341 else
342 if (NV_BASEOP(mi->opcode) == NV_OP_ABS) mod = NV_MOD_ABS;
343 else
344 continue;
345 assert(!(mod & mi->src[0]->mod & NV_MOD_NEG));
347 mod |= mi->src[0]->mod;
349 if ((nvi->opcode == NV_OP_ABS) || (nvi->src[j]->mod & NV_MOD_ABS)) {
350 /* abs neg [abs] = abs */
351 mod &= ~(NV_MOD_NEG | NV_MOD_ABS);
352 } else
353 if ((nvi->opcode == NV_OP_NEG) && (mod & NV_MOD_NEG)) {
354 /* neg as opcode and modifier on same insn cannot occur */
355 /* neg neg abs = abs, neg neg = identity */
356 assert(j == 0);
357 if (mod & NV_MOD_ABS)
358 nvi->opcode = NV_OP_ABS;
359 else
360 nvi->opcode = NV_OP_MOV;
361 mod = 0;
364 if ((nv_op_supported_src_mods(nvi->opcode, j) & mod) != mod)
365 continue;
367 nv_reference(ctx->pc, nvi, j, mi->src[0]->value);
369 nvi->src[j]->mod ^= mod;
372 if (nvi->opcode == NV_OP_SAT) {
373 mi = nvi->src[0]->value->insn;
375 if (mi->def[0]->refc > 1 ||
376 (mi->opcode != NV_OP_ADD &&
377 mi->opcode != NV_OP_MUL &&
378 mi->opcode != NV_OP_MAD))
379 continue;
380 mi->saturate = 1;
381 mi->def[0] = nvi->def[0];
382 mi->def[0]->insn = mi;
383 nvc0_insn_delete(nvi);
386 DESCEND_ARBITRARY(j, nv_pass_lower_mods);
388 return 0;
391 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
393 static void
394 apply_modifiers(uint32_t *val, uint8_t type, uint8_t mod)
396 if (mod & NV_MOD_ABS) {
397 if (type == NV_TYPE_F32)
398 *val &= 0x7fffffff;
399 else
400 if ((*val) & (1 << 31))
401 *val = ~(*val) + 1;
403 if (mod & NV_MOD_NEG) {
404 if (type == NV_TYPE_F32)
405 *val ^= 0x80000000;
406 else
407 *val = ~(*val) + 1;
409 if (mod & NV_MOD_SAT) {
410 union {
411 float f;
412 uint32_t u;
413 int32_t i;
414 } u;
415 u.u = *val;
416 if (type == NV_TYPE_F32) {
417 u.f = CLAMP(u.f, -1.0f, 1.0f);
418 } else
419 if (type == NV_TYPE_U16) {
420 u.u = MIN2(u.u, 0xffff);
421 } else
422 if (type == NV_TYPE_S16) {
423 u.i = CLAMP(u.i, -32768, 32767);
425 *val = u.u;
427 if (mod & NV_MOD_NOT)
428 *val = ~*val;
431 static void
432 constant_expression(struct nv_pc *pc, struct nv_instruction *nvi,
433 struct nv_value *src0, struct nv_value *src1)
435 struct nv_value *val;
436 union {
437 float f32;
438 uint32_t u32;
439 int32_t s32;
440 } u0, u1, u;
441 ubyte type;
443 if (!nvi->def[0])
444 return;
445 type = NV_OPTYPE(nvi->opcode);
447 u.u32 = 0;
448 u0.u32 = src0->reg.imm.u32;
449 u1.u32 = src1->reg.imm.u32;
451 apply_modifiers(&u0.u32, type, nvi->src[0]->mod);
452 apply_modifiers(&u1.u32, type, nvi->src[1]->mod);
454 switch (nvi->opcode) {
455 case NV_OP_MAD_F32:
456 if (nvi->src[2]->value->reg.file != NV_FILE_GPR)
457 return;
458 /* fall through */
459 case NV_OP_MUL_F32:
460 u.f32 = u0.f32 * u1.f32;
461 break;
462 case NV_OP_MUL_B32:
463 u.u32 = u0.u32 * u1.u32;
464 break;
465 case NV_OP_ADD_F32:
466 u.f32 = u0.f32 + u1.f32;
467 break;
468 case NV_OP_ADD_B32:
469 u.u32 = u0.u32 + u1.u32;
470 break;
471 case NV_OP_SUB_F32:
472 u.f32 = u0.f32 - u1.f32;
473 break;
475 case NV_OP_SUB_B32:
476 u.u32 = u0.u32 - u1.u32;
477 break;
479 default:
480 return;
483 val = new_value(pc, NV_FILE_IMM, nv_type_sizeof(type));
484 val->reg.imm.u32 = u.u32;
486 nv_reference(pc, nvi, 1, NULL);
487 nv_reference(pc, nvi, 0, val);
489 if (nvi->opcode == NV_OP_MAD_F32) {
490 nvi->src[1] = nvi->src[0];
491 nvi->src[0] = nvi->src[2];
492 nvi->src[2] = NULL;
493 nvi->opcode = NV_OP_ADD_F32;
495 if (val->reg.imm.u32 == 0) {
496 nvi->src[1] = NULL;
497 nvi->opcode = NV_OP_MOV;
499 } else {
500 nvi->opcode = NV_OP_MOV;
504 static void
505 constant_operand(struct nv_pc *pc,
506 struct nv_instruction *nvi, struct nv_value *val, int s)
508 union {
509 float f32;
510 uint32_t u32;
511 int32_t s32;
512 } u;
513 int shift;
514 int t = s ? 0 : 1;
515 uint op;
516 ubyte type;
518 if (!nvi->def[0])
519 return;
520 type = NV_OPTYPE(nvi->opcode);
522 u.u32 = val->reg.imm.u32;
523 apply_modifiers(&u.u32, type, nvi->src[s]->mod);
525 if (u.u32 == 0 && NV_BASEOP(nvi->opcode) == NV_OP_MUL) {
526 nvi->opcode = NV_OP_MOV;
527 nv_reference(pc, nvi, t, NULL);
528 if (s) {
529 nvi->src[0] = nvi->src[1];
530 nvi->src[1] = NULL;
532 return;
535 switch (nvi->opcode) {
536 case NV_OP_MUL_F32:
537 if (u.f32 == 1.0f || u.f32 == -1.0f) {
538 if (u.f32 == -1.0f)
539 nvi->src[t]->mod ^= NV_MOD_NEG;
540 switch (nvi->src[t]->mod) {
541 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
542 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
543 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
544 default:
545 return;
547 nvi->opcode = op;
548 nv_reference(pc, nvi, 0, nvi->src[t]->value);
549 nv_reference(pc, nvi, 1, NULL);
550 nvi->src[0]->mod = 0;
551 } else
552 if (u.f32 == 2.0f || u.f32 == -2.0f) {
553 if (u.f32 == -2.0f)
554 nvi->src[t]->mod ^= NV_MOD_NEG;
555 nvi->opcode = NV_OP_ADD_F32;
556 nv_reference(pc, nvi, s, nvi->src[t]->value);
557 nvi->src[s]->mod = nvi->src[t]->mod;
559 break;
560 case NV_OP_ADD_F32:
561 if (u.u32 == 0) {
562 switch (nvi->src[t]->mod) {
563 case 0: op = nvi->saturate ? NV_OP_SAT : NV_OP_MOV; break;
564 case NV_MOD_NEG: op = NV_OP_NEG_F32; break;
565 case NV_MOD_ABS: op = NV_OP_ABS_F32; break;
566 case NV_MOD_NEG | NV_MOD_ABS:
567 op = NV_OP_CVT;
568 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
569 break;
570 default:
571 return;
573 nvi->opcode = op;
574 nv_reference(pc, nvi, 0, nvi->src[t]->value);
575 nv_reference(pc, nvi, 1, NULL);
576 if (nvi->opcode != NV_OP_CVT)
577 nvi->src[0]->mod = 0;
579 break;
580 case NV_OP_ADD_B32:
581 if (u.u32 == 0) {
582 assert(nvi->src[t]->mod == 0);
583 nvi->opcode = nvi->saturate ? NV_OP_CVT : NV_OP_MOV;
584 nvi->ext.cvt.s = nvi->ext.cvt.d = type;
585 nv_reference(pc, nvi, 0, nvi->src[t]->value);
586 nv_reference(pc, nvi, 1, NULL);
588 break;
589 case NV_OP_MUL_B32:
590 /* multiplication by 0 already handled above */
591 assert(nvi->src[s]->mod == 0);
592 shift = ffs(u.s32) - 1;
593 if (shift == 0) {
594 nvi->opcode = NV_OP_MOV;
595 nv_reference(pc, nvi, 0, nvi->src[t]->value);
596 nv_reference(pc, nvi, 1, NULL);
597 } else
598 if (u.s32 > 0 && u.s32 == (1 << shift)) {
599 nvi->opcode = NV_OP_SHL;
600 (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.s32 = shift;
601 nv_reference(pc, nvi, 0, nvi->src[t]->value);
602 nv_reference(pc, nvi, 1, val);
603 break;
605 break;
606 case NV_OP_RCP:
607 u.f32 = 1.0f / u.f32;
608 (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
609 nvi->opcode = NV_OP_MOV;
610 assert(s == 0);
611 nv_reference(pc, nvi, 0, val);
612 break;
613 case NV_OP_RSQ:
614 u.f32 = 1.0f / sqrtf(u.f32);
615 (val = new_value(pc, NV_FILE_IMM, 4))->reg.imm.f32 = u.f32;
616 nvi->opcode = NV_OP_MOV;
617 assert(s == 0);
618 nv_reference(pc, nvi, 0, val);
619 break;
620 default:
621 break;
625 static void
626 handle_min_max(struct nv_pass *ctx, struct nv_instruction *nvi)
628 struct nv_value *src0 = nvi->src[0]->value;
629 struct nv_value *src1 = nvi->src[1]->value;
631 if (src0 != src1 || (nvi->src[0]->mod | nvi->src[1]->mod))
632 return;
633 if (src0->reg.file != NV_FILE_GPR)
634 return;
635 nvc0_pc_replace_value(ctx->pc, nvi->def[0], src0);
636 nvc0_insn_delete(nvi);
639 /* check if we can MUL + ADD -> MAD/FMA */
640 static void
641 handle_add_mul(struct nv_pass *ctx, struct nv_instruction *nvi)
643 struct nv_value *src0 = nvi->src[0]->value;
644 struct nv_value *src1 = nvi->src[1]->value;
645 struct nv_value *src;
646 int s;
647 uint8_t mod[4];
649 if (SRC_IS_MUL(src0) && src0->refc == 1) s = 0;
650 else
651 if (SRC_IS_MUL(src1) && src1->refc == 1) s = 1;
652 else
653 return;
655 if ((src0->insn && src0->insn->bb != nvi->bb) ||
656 (src1->insn && src1->insn->bb != nvi->bb))
657 return;
659 /* check for immediates from prior constant folding */
660 if (src0->reg.file != NV_FILE_GPR || src1->reg.file != NV_FILE_GPR)
661 return;
662 src = nvi->src[s]->value;
664 mod[0] = nvi->src[0]->mod;
665 mod[1] = nvi->src[1]->mod;
666 mod[2] = src->insn->src[0]->mod;
667 mod[3] = src->insn->src[1]->mod;
669 if ((mod[0] | mod[1] | mod[2] | mod[3]) & ~NV_MOD_NEG)
670 return;
672 nvi->opcode = NV_OP_MAD_F32;
674 nv_reference(ctx->pc, nvi, s, NULL);
675 nvi->src[2] = nvi->src[!s];
676 nvi->src[!s] = NULL;
678 nv_reference(ctx->pc, nvi, 0, src->insn->src[0]->value);
679 nvi->src[0]->mod = mod[2] ^ mod[s];
680 nv_reference(ctx->pc, nvi, 1, src->insn->src[1]->value);
681 nvi->src[1]->mod = mod[3];
684 static int
685 nv_pass_algebraic_opt(struct nv_pass *ctx, struct nv_basic_block *b)
687 struct nv_instruction *nvi, *next;
688 int j;
690 for (nvi = b->entry; nvi; nvi = next) {
691 struct nv_value *src0, *src1;
692 uint baseop = NV_BASEOP(nvi->opcode);
694 next = nvi->next;
696 src0 = nvc0_pc_find_immediate(nvi->src[0]);
697 src1 = nvc0_pc_find_immediate(nvi->src[1]);
699 if (src0 && src1) {
700 constant_expression(ctx->pc, nvi, src0, src1);
701 } else {
702 if (src0)
703 constant_operand(ctx->pc, nvi, src0, 0);
704 else
705 if (src1)
706 constant_operand(ctx->pc, nvi, src1, 1);
709 if (baseop == NV_OP_MIN || baseop == NV_OP_MAX)
710 handle_min_max(ctx, nvi);
711 else
712 if (nvi->opcode == NV_OP_ADD_F32)
713 handle_add_mul(ctx, nvi);
715 DESCEND_ARBITRARY(j, nv_pass_algebraic_opt);
717 return 0;
720 /* TODO: redundant store elimination */
722 struct mem_record {
723 struct mem_record *next;
724 struct nv_instruction *insn;
725 uint32_t ofst;
726 uint32_t base;
727 uint32_t size;
730 #define MEM_RECORD_POOL_SIZE 1024
732 struct pass_reld_elim {
733 struct nv_pc *pc;
735 struct mem_record *imm;
736 struct mem_record *mem_v;
737 struct mem_record *mem_a;
738 struct mem_record *mem_c[16];
739 struct mem_record *mem_l;
741 struct mem_record pool[MEM_RECORD_POOL_SIZE];
742 int alloc;
745 /* Extend the load operation in @rec to also cover the data loaded by @ld.
746 * The two loads may not overlap but reference adjacent memory locations.
748 static void
749 combine_load(struct nv_pc *pc, struct mem_record *rec,
750 struct nv_instruction *ld)
752 struct nv_instruction *fv = rec->insn;
753 struct nv_value *mem = ld->src[0]->value;
754 uint32_t size = rec->size + mem->reg.size;
755 int j;
756 int d = rec->size / 4;
758 assert(rec->size < 16);
759 if (rec->ofst > mem->reg.address) {
760 if ((size == 8 && mem->reg.address & 3) ||
761 (size > 8 && mem->reg.address & 7))
762 return;
763 rec->ofst = mem->reg.address;
764 for (j = 0; j < d; ++j)
765 fv->def[mem->reg.size / 4 + j] = fv->def[j];
766 d = 0;
767 } else
768 if ((size == 8 && rec->ofst & 3) ||
769 (size > 8 && rec->ofst & 7)) {
770 return;
773 for (j = 0; j < mem->reg.size / 4; ++j) {
774 fv->def[d] = ld->def[j];
775 fv->def[d++]->insn = fv;
778 if (fv->src[0]->value->refc > 1)
779 nv_reference(pc, fv, 0, new_value_like(pc, fv->src[0]->value));
780 fv->src[0]->value->reg.address = rec->ofst;
781 fv->src[0]->value->reg.size = rec->size = size;
783 nvc0_insn_delete(ld);
786 static void
787 combine_export(struct mem_record *rec, struct nv_instruction *ex)
792 static INLINE void
793 add_mem_record(struct pass_reld_elim *ctx, struct mem_record **rec,
794 uint32_t base, uint32_t ofst, struct nv_instruction *nvi)
796 struct mem_record *it = &ctx->pool[ctx->alloc++];
798 it->next = *rec;
799 *rec = it;
800 it->base = base;
801 it->ofst = ofst;
802 it->insn = nvi;
803 it->size = nvi->src[0]->value->reg.size;
806 /* vectorize and reuse loads from memory or of immediates */
807 static int
808 nv_pass_mem_opt(struct pass_reld_elim *ctx, struct nv_basic_block *b)
810 struct mem_record **rec, *it;
811 struct nv_instruction *ld, *next;
812 struct nv_value *mem;
813 uint32_t base, ofst;
814 int s;
816 for (ld = b->entry; ld; ld = next) {
817 next = ld->next;
819 if (is_cspace_load(ld)) {
820 mem = ld->src[0]->value;
821 rec = &ctx->mem_c[ld->src[0]->value->reg.file - NV_FILE_MEM_C(0)];
822 } else
823 if (ld->opcode == NV_OP_VFETCH) {
824 mem = ld->src[0]->value;
825 rec = &ctx->mem_a;
826 } else
827 if (ld->opcode == NV_OP_EXPORT) {
828 mem = ld->src[0]->value;
829 if (mem->reg.file != NV_FILE_MEM_V)
830 continue;
831 rec = &ctx->mem_v;
832 } else {
833 continue;
835 if (ld->def[0] && ld->def[0]->refc == 0)
836 continue;
837 ofst = mem->reg.address;
838 base = (ld->indirect >= 0) ? ld->src[ld->indirect]->value->n : 0;
840 for (it = *rec; it; it = it->next) {
841 if (it->base == base &&
842 ((it->ofst >> 4) == (ofst >> 4)) &&
843 ((it->ofst + it->size == ofst) ||
844 (it->ofst - mem->reg.size == ofst))) {
845 /* only NV_OP_VFETCH can load exactly 12 bytes */
846 if (ld->opcode == NV_OP_LD && it->size + mem->reg.size == 12)
847 continue;
848 if (it->ofst < ofst) {
849 if ((it->ofst & 0xf) == 4)
850 continue;
851 } else
852 if ((ofst & 0xf) == 4)
853 continue;
854 break;
857 if (it) {
858 switch (ld->opcode) {
859 case NV_OP_EXPORT: combine_export(it, ld); break;
860 default:
861 combine_load(ctx->pc, it, ld);
862 break;
864 } else
865 if (ctx->alloc < MEM_RECORD_POOL_SIZE) {
866 add_mem_record(ctx, rec, base, ofst, ld);
870 ctx->alloc = 0;
871 ctx->mem_a = ctx->mem_v = ctx->mem_l = NULL;
872 for (s = 0; s < 16; ++s)
873 ctx->mem_c[s] = NULL;
875 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
876 return 0;
879 #ifdef USE_UNUSED_CODE
880 static void
881 eliminate_store(struct mem_record *rec, struct nv_instruction *st)
885 /* elimination of redundant stores */
886 static int
887 pass_store_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
889 struct mem_record **rec, *it;
890 struct nv_instruction *st, *next;
891 struct nv_value *mem;
892 uint32_t base, ofst, size;
893 int s;
895 for (st = b->entry; st; st = next) {
896 next = st->next;
898 if (st->opcode == NV_OP_ST) {
899 mem = st->src[0]->value;
900 rec = &ctx->mem_l;
901 } else
902 if (st->opcode == NV_OP_EXPORT) {
903 mem = st->src[0]->value;
904 if (mem->reg.file != NV_FILE_MEM_V)
905 continue;
906 rec = &ctx->mem_v;
907 } else
908 if (st->opcode == NV_OP_ST) {
909 /* TODO: purge */
911 ofst = mem->reg.address;
912 base = (st->indirect >= 0) ? st->src[st->indirect]->value->n : 0;
913 size = mem->reg.size;
915 for (it = *rec; it; it = it->next) {
916 if (it->base == base &&
917 (it->ofst <= ofst && (it->ofst + size) > ofst))
918 break;
920 if (it)
921 eliminate_store(it, st);
922 else
923 add_mem_record(ctx, rec, base, ofst, st);
926 DESCEND_ARBITRARY(s, nv_pass_mem_opt);
927 return 0;
929 #endif
931 /* TODO: properly handle loads from l[] memory in the presence of stores */
932 static int
933 nv_pass_reload_elim(struct pass_reld_elim *ctx, struct nv_basic_block *b)
935 #if 0
936 struct load_record **rec, *it;
937 struct nv_instruction *ld, *next;
938 uint64_t data[2];
939 struct nv_value *val;
940 int j;
942 for (ld = b->entry; ld; ld = next) {
943 next = ld->next;
944 if (!ld->src[0])
945 continue;
946 val = ld->src[0]->value;
947 rec = NULL;
949 if (ld->opcode == NV_OP_LINTERP || ld->opcode == NV_OP_PINTERP) {
950 data[0] = val->reg.id;
951 data[1] = 0;
952 rec = &ctx->mem_v;
953 } else
954 if (ld->opcode == NV_OP_LDA) {
955 data[0] = val->reg.id;
956 data[1] = ld->src[4] ? ld->src[4]->value->n : ~0ULL;
957 if (val->reg.file >= NV_FILE_MEM_C(0) &&
958 val->reg.file <= NV_FILE_MEM_C(15))
959 rec = &ctx->mem_c[val->reg.file - NV_FILE_MEM_C(0)];
960 else
961 if (val->reg.file == NV_FILE_MEM_S)
962 rec = &ctx->mem_s;
963 else
964 if (val->reg.file == NV_FILE_MEM_L)
965 rec = &ctx->mem_l;
966 } else
967 if ((ld->opcode == NV_OP_MOV) && (val->reg.file == NV_FILE_IMM)) {
968 data[0] = val->reg.imm.u32;
969 data[1] = 0;
970 rec = &ctx->imm;
973 if (!rec || !ld->def[0]->refc)
974 continue;
976 for (it = *rec; it; it = it->next)
977 if (it->data[0] == data[0] && it->data[1] == data[1])
978 break;
980 if (it) {
981 if (ld->def[0]->reg.id >= 0)
982 it->value = ld->def[0];
983 else
984 if (!ld->fixed)
985 nvc0_pc_replace_value(ctx->pc, ld->def[0], it->value);
986 } else {
987 if (ctx->alloc == LOAD_RECORD_POOL_SIZE)
988 continue;
989 it = &ctx->pool[ctx->alloc++];
990 it->next = *rec;
991 it->data[0] = data[0];
992 it->data[1] = data[1];
993 it->value = ld->def[0];
994 *rec = it;
998 ctx->imm = NULL;
999 ctx->mem_s = NULL;
1000 ctx->mem_v = NULL;
1001 for (j = 0; j < 16; ++j)
1002 ctx->mem_c[j] = NULL;
1003 ctx->mem_l = NULL;
1004 ctx->alloc = 0;
1006 DESCEND_ARBITRARY(j, nv_pass_reload_elim);
1007 #endif
1008 return 0;
1011 static int
1012 nv_pass_tex_mask(struct nv_pass *ctx, struct nv_basic_block *b)
1014 int i, c, j;
1016 for (i = 0; i < ctx->pc->num_instructions; ++i) {
1017 struct nv_instruction *nvi = &ctx->pc->instructions[i];
1018 struct nv_value *def[4];
1020 if (!nv_is_texture_op(nvi->opcode))
1021 continue;
1022 nvi->tex_mask = 0;
1024 for (c = 0; c < 4; ++c) {
1025 if (nvi->def[c]->refc)
1026 nvi->tex_mask |= 1 << c;
1027 def[c] = nvi->def[c];
1030 j = 0;
1031 for (c = 0; c < 4; ++c)
1032 if (nvi->tex_mask & (1 << c))
1033 nvi->def[j++] = def[c];
1034 for (c = 0; c < 4; ++c)
1035 if (!(nvi->tex_mask & (1 << c)))
1036 nvi->def[j++] = def[c];
1037 assert(j == 4);
1039 return 0;
1042 struct nv_pass_dce {
1043 struct nv_pc *pc;
1044 uint removed;
1047 static int
1048 nv_pass_dce(struct nv_pass_dce *ctx, struct nv_basic_block *b)
1050 int j;
1051 struct nv_instruction *nvi, *next;
1053 for (nvi = b->phi ? b->phi : b->entry; nvi; nvi = next) {
1054 next = nvi->next;
1056 if (inst_removable(nvi)) {
1057 nvc0_insn_delete(nvi);
1058 ++ctx->removed;
1061 DESCEND_ARBITRARY(j, nv_pass_dce);
1063 return 0;
1066 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1067 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1068 * BREAK and dummy ELSE block.
1070 static INLINE boolean
1071 bb_is_if_else_endif(struct nv_basic_block *bb)
1073 if (!bb->out[0] || !bb->out[1])
1074 return FALSE;
1076 if (bb->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) {
1077 return (bb->out[0]->out[1] == bb->out[1]->out[0] &&
1078 !bb->out[1]->out[1]);
1079 } else {
1080 return (bb->out[0]->out[0] == bb->out[1]->out[0] &&
1081 !bb->out[0]->out[1] &&
1082 !bb->out[1]->out[1]);
1086 /* Predicate instructions and delete any branch at the end if it is
1087 * not a break from a loop.
1089 static void
1090 predicate_instructions(struct nv_pc *pc, struct nv_basic_block *b,
1091 struct nv_value *pred, uint8_t cc)
1093 struct nv_instruction *nvi, *prev;
1094 int s;
1096 if (!b->entry)
1097 return;
1098 for (nvi = b->entry; nvi; nvi = nvi->next) {
1099 prev = nvi;
1100 if (inst_is_noop(nvi))
1101 continue;
1102 for (s = 0; nvi->src[s]; ++s);
1103 assert(s < 6);
1104 nvi->predicate = s;
1105 nvi->cc = cc;
1106 nv_reference(pc, nvi, nvi->predicate, pred);
1108 if (prev->opcode == NV_OP_BRA &&
1109 b->out_kind[0] != CFG_EDGE_LOOP_LEAVE &&
1110 b->out_kind[1] != CFG_EDGE_LOOP_LEAVE)
1111 nvc0_insn_delete(prev);
1114 static INLINE boolean
1115 may_predicate_insn(struct nv_instruction *nvi, struct nv_value *pred)
1117 if (nvi->def[0] && values_equal(nvi->def[0], pred))
1118 return FALSE;
1119 return nvc0_insn_is_predicateable(nvi);
1122 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1123 * where feasible.
1125 static int
1126 nv_pass_flatten(struct nv_pass *ctx, struct nv_basic_block *b)
1128 struct nv_instruction *nvi;
1129 struct nv_value *pred;
1130 int k;
1131 int n0, n1; /* instruction counts of outgoing blocks */
1133 if (bb_is_if_else_endif(b)) {
1134 assert(b->exit && b->exit->opcode == NV_OP_BRA);
1136 assert(b->exit->predicate >= 0);
1137 pred = b->exit->src[b->exit->predicate]->value;
1139 n1 = n0 = 0;
1140 for (nvi = b->out[0]->entry; nvi; nvi = nvi->next, ++n0)
1141 if (!may_predicate_insn(nvi, pred))
1142 break;
1143 if (!nvi) {
1144 /* we're after register allocation, so there always is an ELSE block */
1145 for (nvi = b->out[1]->entry; nvi; nvi = nvi->next, ++n1)
1146 if (!may_predicate_insn(nvi, pred))
1147 break;
1150 /* 12 is an arbitrary limit */
1151 if (!nvi && n0 < 12 && n1 < 12) {
1152 predicate_instructions(ctx->pc, b->out[0], pred, !b->exit->cc);
1153 predicate_instructions(ctx->pc, b->out[1], pred, b->exit->cc);
1155 nvc0_insn_delete(b->exit); /* delete the branch */
1157 /* and a potential joinat before it */
1158 if (b->exit && b->exit->opcode == NV_OP_JOINAT)
1159 nvc0_insn_delete(b->exit);
1161 /* remove join operations at the end of the conditional */
1162 k = (b->out[0]->out_kind[0] == CFG_EDGE_LOOP_LEAVE) ? 1 : 0;
1163 if ((nvi = b->out[0]->out[k]->entry)) {
1164 nvi->join = 0;
1165 if (nvi->opcode == NV_OP_JOIN)
1166 nvc0_insn_delete(nvi);
1170 DESCEND_ARBITRARY(k, nv_pass_flatten);
1172 return 0;
1175 /* Tests instructions for equality, but independently of sources. */
1176 static boolean
1177 is_operation_equal(struct nv_instruction *a, struct nv_instruction *b)
1179 if (a->opcode != b->opcode)
1180 return FALSE;
1181 if (nv_is_texture_op(a->opcode)) {
1182 if (a->ext.tex.t != b->ext.tex.t ||
1183 a->ext.tex.s != b->ext.tex.s)
1184 return FALSE;
1185 if (a->tex_dim != b->tex_dim ||
1186 a->tex_array != b->tex_array ||
1187 a->tex_cube != b->tex_cube ||
1188 a->tex_shadow != b->tex_shadow ||
1189 a->tex_live != b->tex_live)
1190 return FALSE;
1191 } else
1192 if (a->opcode == NV_OP_CVT) {
1193 if (a->ext.cvt.s != b->ext.cvt.s ||
1194 a->ext.cvt.d != b->ext.cvt.d)
1195 return FALSE;
1196 } else
1197 if (NV_BASEOP(a->opcode) == NV_OP_SET ||
1198 NV_BASEOP(a->opcode) == NV_OP_SLCT) {
1199 if (a->set_cond != b->set_cond)
1200 return FALSE;
1201 } else
1202 if (a->opcode == NV_OP_LINTERP ||
1203 a->opcode == NV_OP_PINTERP) {
1204 if (a->centroid != b->centroid ||
1205 a->flat != b->flat)
1206 return FALSE;
1208 if (a->cc != b->cc)
1209 return FALSE;
1210 if (a->lanes != b->lanes ||
1211 a->patch != b->patch ||
1212 a->saturate != b->saturate)
1213 return FALSE;
1214 if (a->opcode == NV_OP_QUADOP) /* beware quadon ! */
1215 return FALSE;
1216 return TRUE;
1219 /* local common subexpression elimination, stupid O(n^2) implementation */
1220 static int
1221 nv_pass_cse(struct nv_pass *ctx, struct nv_basic_block *b)
1223 struct nv_instruction *ir, *ik, *next;
1224 struct nv_instruction *entry = b->phi ? b->phi : b->entry;
1225 int s, d;
1226 unsigned int reps;
1228 do {
1229 reps = 0;
1230 for (ir = entry; ir; ir = next) {
1231 next = ir->next;
1232 if (ir->fixed)
1233 continue;
1234 for (ik = entry; ik != ir; ik = ik->next) {
1235 if (!is_operation_equal(ir, ik))
1236 continue;
1237 if (!ir->def[0] || !ik->def[0])
1238 continue;
1240 if (ik->indirect != ir->indirect || ik->predicate != ir->predicate)
1241 continue;
1243 for (d = 0; d < 4; ++d) {
1244 if ((ir->def[d] ? 1 : 0) != (ik->def[d] ? 1 : 0))
1245 break;
1246 if (ir->def[d]) {
1247 if (!values_equal(ik->def[0], ir->def[0]))
1248 break;
1249 } else {
1250 d = 4;
1251 break;
1254 if (d != 4)
1255 continue;
1257 for (s = 0; s < 5; ++s) {
1258 struct nv_value *a, *b;
1260 if ((ir->src[s] ? 1 : 0) != (ik->src[s] ? 1 : 0))
1261 break;
1262 if (!ir->src[s]) {
1263 s = 5;
1264 break;
1267 if (ik->src[s]->mod != ir->src[s]->mod)
1268 break;
1269 a = ik->src[s]->value;
1270 b = ir->src[s]->value;
1271 if (a == b)
1272 continue;
1273 if (a->reg.file != b->reg.file ||
1274 a->reg.id < 0 || /* this excludes memory loads/stores */
1275 a->reg.id != b->reg.id)
1276 break;
1278 if (s == 5) {
1279 nvc0_insn_delete(ir);
1280 for (d = 0; d < 4 && ir->def[d]; ++d)
1281 nvc0_pc_replace_value(ctx->pc, ir->def[d], ik->def[d]);
1282 ++reps;
1283 break;
1287 } while(reps);
1289 DESCEND_ARBITRARY(s, nv_pass_cse);
1291 return 0;
1294 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1295 * neighbouring registers. CSE might have messed this up.
1296 * Just generate a MOV for each source to avoid conflicts if they're used in
1297 * multiple NV_OP_BIND at different positions.
1299 * Add a dummy use of the pointer source of >= 8 byte loads after the load
1300 * to prevent it from being assigned a register which overlaps the load's
1301 * destination, which would produce random corruptions.
1303 static int
1304 nv_pass_fixups(struct nv_pass *ctx, struct nv_basic_block *b)
1306 struct nv_value *val;
1307 struct nv_instruction *fix, *nvi, *next;
1308 int s;
1310 for (fix = b->entry; fix; fix = next) {
1311 next = fix->next;
1313 if (fix->opcode == NV_OP_LD) {
1314 if (fix->indirect >= 0 && fix->src[0]->value->reg.size >= 8) {
1315 nvi = nv_alloc_instruction(ctx->pc, NV_OP_UNDEF);
1316 nv_reference(ctx->pc, nvi, 0, fix->src[fix->indirect]->value);
1318 nvc0_insn_insert_after(fix, nvi);
1320 continue;
1321 } else
1322 if (fix->opcode == NV_OP_BIND) {
1323 for (s = 0; s < 4 && fix->src[s]; ++s) {
1324 val = fix->src[s]->value;
1326 nvi = nv_alloc_instruction(ctx->pc, NV_OP_MOV);
1327 nvi->def[0] = new_value_like(ctx->pc, val);
1328 nvi->def[0]->insn = nvi;
1329 nv_reference(ctx->pc, nvi, 0, val);
1330 nv_reference(ctx->pc, fix, s, nvi->def[0]);
1332 nvc0_insn_insert_before(fix, nvi);
1336 DESCEND_ARBITRARY(s, nv_pass_fixups);
1338 return 0;
1341 static int
1342 nv_pc_pass0(struct nv_pc *pc, struct nv_basic_block *root)
1344 struct pass_reld_elim *reldelim = NULL;
1345 struct nv_pass pass;
1346 struct nv_pass_dce dce;
1347 int ret;
1349 pass.n = 0;
1350 pass.pc = pc;
1352 /* Do CSE so we can just compare values by pointer in subsequent passes. */
1353 pc->pass_seq++;
1354 ret = nv_pass_cse(&pass, root);
1355 if (ret)
1356 return ret;
1358 /* Do this first, so we don't have to pay attention
1359 * to whether sources are supported memory loads.
1361 pc->pass_seq++;
1362 ret = nv_pass_algebraic_opt(&pass, root);
1363 if (ret)
1364 return ret;
1366 pc->pass_seq++;
1367 ret = nv_pass_lower_mods(&pass, root);
1368 if (ret)
1369 return ret;
1371 pc->pass_seq++;
1372 ret = nvc0_pass_fold_loads(&pass, root);
1373 if (ret)
1374 return ret;
1376 if (pc->opt_reload_elim) {
1377 reldelim = CALLOC_STRUCT(pass_reld_elim);
1378 reldelim->pc = pc;
1380 pc->pass_seq++;
1381 ret = nv_pass_reload_elim(reldelim, root);
1382 if (ret) {
1383 FREE(reldelim);
1384 return ret;
1386 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1387 reldelim->pc = pc;
1390 /* May run DCE before load-combining since that pass will clean up
1391 * after itself.
1393 dce.pc = pc;
1394 do {
1395 dce.removed = 0;
1396 pc->pass_seq++;
1397 ret = nv_pass_dce(&dce, root);
1398 if (ret)
1399 return ret;
1400 } while (dce.removed);
1402 if (pc->opt_reload_elim) {
1403 pc->pass_seq++;
1404 ret = nv_pass_mem_opt(reldelim, root);
1405 if (!ret) {
1406 memset(reldelim, 0, sizeof(struct pass_reld_elim));
1407 reldelim->pc = pc;
1409 pc->pass_seq++;
1410 ret = nv_pass_mem_opt(reldelim, root);
1412 FREE(reldelim);
1413 if (ret)
1414 return ret;
1417 ret = nv_pass_tex_mask(&pass, root);
1418 if (ret)
1419 return ret;
1421 pc->pass_seq++;
1422 ret = nv_pass_fixups(&pass, root);
1424 return ret;
1428 nvc0_pc_exec_pass0(struct nv_pc *pc)
1430 int i, ret;
1432 for (i = 0; i < pc->num_subroutines + 1; ++i)
1433 if (pc->root[i] && (ret = nv_pc_pass0(pc, pc->root[i])))
1434 return ret;
1435 return 0;