2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nvc0_program.h"
26 #define DESCEND_ARBITRARY(j, f) \
28 b->pass_seq = ctx->pc->pass_seq; \
30 for (j = 0; j < 2; ++j) \
31 if (b->out[j] && b->out[j]->pass_seq < ctx->pc->pass_seq) \
36 registers_interfere(struct nv_value
*a
, struct nv_value
*b
)
38 if (a
->reg
.file
!= b
->reg
.file
)
40 if (NV_IS_MEMORY_FILE(a
->reg
.file
) || NV_IS_MEMORY_FILE(b
->reg
.file
))
43 assert(a
->join
->reg
.id
>= 0 && b
->join
->reg
.id
>= 0);
45 if (a
->join
->reg
.id
< b
->join
->reg
.id
) {
46 return (a
->join
->reg
.id
+ a
->reg
.size
>= b
->join
->reg
.id
);
48 if (a
->join
->reg
.id
> b
->join
->reg
.id
) {
49 return (b
->join
->reg
.id
+ b
->reg
.size
>= a
->join
->reg
.id
);
56 values_equal(struct nv_value
*a
, struct nv_value
*b
)
58 if (a
->reg
.file
!= b
->reg
.file
|| a
->reg
.size
!= b
->reg
.size
)
60 if (NV_IS_MEMORY_FILE(a
->reg
.file
))
61 return a
->reg
.address
== b
->reg
.address
;
63 return a
->join
->reg
.id
== b
->join
->reg
.id
;
68 inst_commutation_check(struct nv_instruction
*a
, struct nv_instruction
*b
)
72 for (di
= 0; di
< 4 && a
->def
[di
]; ++di
)
73 for (si
= 0; si
< 5 && b
->src
[si
]; ++si
)
74 if (registers_interfere(a
->def
[di
], b
->src
[si
]->value
))
80 /* Check whether we can swap the order of the instructions,
81 * where a & b may be either the earlier or the later one.
84 inst_commutation_legal(struct nv_instruction
*a
, struct nv_instruction
*b
)
86 return inst_commutation_check(a
, b
) && inst_commutation_check(b
, a
);
91 inst_removable(struct nv_instruction
*nvi
)
93 if (nvi
->opcode
== NV_OP_ST
)
95 return (!(nvi
->terminator
||
99 nvc0_insn_refcount(nvi
)));
102 /* Check if we do not actually have to emit this instruction. */
103 static INLINE boolean
104 inst_is_noop(struct nv_instruction
*nvi
)
106 if (nvi
->opcode
== NV_OP_UNDEF
|| nvi
->opcode
== NV_OP_BIND
)
108 if (nvi
->terminator
|| nvi
->join
)
110 if (nvi
->def
[0] && nvi
->def
[0]->join
->reg
.id
< 0)
112 if (nvi
->opcode
!= NV_OP_MOV
&& nvi
->opcode
!= NV_OP_SELECT
)
114 if (nvi
->def
[0]->reg
.file
!= nvi
->src
[0]->value
->reg
.file
)
117 if (nvi
->src
[0]->value
->join
->reg
.id
< 0) {
118 NV50_DBGMSG(PROG_IR
, "inst_is_noop: orphaned value detected\n");
122 if (nvi
->opcode
== NV_OP_SELECT
)
123 if (!values_equal(nvi
->def
[0], nvi
->src
[1]->value
))
125 return values_equal(nvi
->def
[0], nvi
->src
[0]->value
);
135 nv_pass_flatten(struct nv_pass
*ctx
, struct nv_basic_block
*b
);
138 nv_pc_pass_pre_emission(void *priv
, struct nv_basic_block
*b
)
140 struct nv_pc
*pc
= (struct nv_pc
*)priv
;
141 struct nv_basic_block
*in
;
142 struct nv_instruction
*nvi
, *next
;
145 /* find first non-empty block emitted before b */
146 for (j
= pc
->num_blocks
- 1; j
>= 0 && !pc
->bb_list
[j
]->emit_size
; --j
);
148 for (; j
>= 0; --j
) {
151 /* check for no-op branches (BRA $PC+8) */
152 if (in
->exit
&& in
->exit
->opcode
== NV_OP_BRA
&& in
->exit
->target
== b
) {
156 for (++j
; j
< pc
->num_blocks
; ++j
)
157 pc
->bb_list
[j
]->emit_pos
-= 8;
159 nvc0_insn_delete(in
->exit
);
161 b
->emit_pos
= in
->emit_pos
+ in
->emit_size
;
163 if (in
->emit_size
) /* no more no-op branches to b */
167 pc
->bb_list
[pc
->num_blocks
++] = b
;
171 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
173 if (inst_is_noop(nvi
) ||
174 (pc
->is_fragprog
&& nvi
->opcode
== NV_OP_EXPORT
)) {
175 nvc0_insn_delete(nvi
);
179 pc
->emit_size
+= b
->emit_size
;
181 #if NV50_DEBUG & NV50_DEBUG_PROG_IR
183 debug_printf("BB:%i is now empty\n", b
->id
);
185 debug_printf("BB:%i size = %u\n", b
->id
, b
->emit_size
);
190 nv_pc_pass2(struct nv_pc
*pc
, struct nv_basic_block
*root
)
197 nv_pass_flatten(&pass
, root
);
199 nvc0_pc_pass_in_order(root
, nv_pc_pass_pre_emission
, pc
);
205 nvc0_pc_exec_pass2(struct nv_pc
*pc
)
209 NV50_DBGMSG(PROG_IR
, "preparing %u blocks for emission\n", pc
->num_blocks
);
211 pc
->num_blocks
= 0; /* will reorder bb_list */
213 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
214 if (pc
->root
[i
] && (ret
= nv_pc_pass2(pc
, pc
->root
[i
])))
219 static INLINE boolean
220 is_cspace_load(struct nv_instruction
*nvi
)
224 assert(nvi
->indirect
!= 0);
225 return (nvi
->opcode
== NV_OP_LD
&&
226 nvi
->src
[0]->value
->reg
.file
>= NV_FILE_MEM_C(0) &&
227 nvi
->src
[0]->value
->reg
.file
<= NV_FILE_MEM_C(15));
230 static INLINE boolean
231 is_immd32_load(struct nv_instruction
*nvi
)
235 return (nvi
->opcode
== NV_OP_MOV
&&
236 nvi
->src
[0]->value
->reg
.file
== NV_FILE_IMM
&&
237 nvi
->src
[0]->value
->reg
.size
== 4);
241 check_swap_src_0_1(struct nv_instruction
*nvi
)
243 struct nv_ref
*src0
= nvi
->src
[0];
244 struct nv_ref
*src1
= nvi
->src
[1];
246 if (!nv_op_commutative(nvi
->opcode
) &&
247 NV_BASEOP(nvi
->opcode
) != NV_OP_SET
&&
248 NV_BASEOP(nvi
->opcode
) != NV_OP_SLCT
)
250 assert(src0
&& src1
&& src0
->value
&& src1
->value
);
252 if (src1
->value
->reg
.file
!= NV_FILE_GPR
)
255 if (is_cspace_load(src0
->value
->insn
)) {
256 if (!is_cspace_load(src1
->value
->insn
)) {
261 if (is_immd32_load(src0
->value
->insn
)) {
262 if (!is_cspace_load(src1
->value
->insn
) &&
263 !is_immd32_load(src1
->value
->insn
)) {
269 if (nvi
->src
[0] != src0
) {
270 if (NV_BASEOP(nvi
->opcode
) == NV_OP_SET
)
271 nvi
->set_cond
= nvc0_ir_reverse_cc(nvi
->set_cond
);
273 if (NV_BASEOP(nvi
->opcode
) == NV_OP_SLCT
)
274 nvi
->set_cond
= NV_CC_INVERSE(nvi
->set_cond
);
279 nvi_set_indirect_load(struct nv_pc
*pc
,
280 struct nv_instruction
*nvi
, struct nv_value
*val
)
282 for (nvi
->indirect
= 0; nvi
->indirect
< 6 && nvi
->src
[nvi
->indirect
];
284 assert(nvi
->indirect
< 6);
285 nv_reference(pc
, nvi
, nvi
->indirect
, val
);
289 nvc0_pass_fold_loads(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
291 struct nv_instruction
*nvi
, *ld
;
294 for (nvi
= b
->entry
; nvi
; nvi
= nvi
->next
) {
295 check_swap_src_0_1(nvi
);
297 for (s
= 0; s
< 3 && nvi
->src
[s
]; ++s
) {
298 ld
= nvi
->src
[s
]->value
->insn
;
299 if (!ld
|| (ld
->opcode
!= NV_OP_LD
&& ld
->opcode
!= NV_OP_MOV
))
301 if (!nvc0_insn_can_load(nvi
, s
, ld
))
305 nv_reference(ctx
->pc
, nvi
, s
, ld
->src
[0]->value
);
306 if (ld
->indirect
>= 0)
307 nvi_set_indirect_load(ctx
->pc
, nvi
, ld
->src
[ld
->indirect
]->value
);
309 if (!nvc0_insn_refcount(ld
))
310 nvc0_insn_delete(ld
);
313 DESCEND_ARBITRARY(s
, nvc0_pass_fold_loads
);
318 /* NOTE: Assumes loads have not yet been folded. */
320 nv_pass_lower_mods(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
322 struct nv_instruction
*nvi
, *mi
, *next
;
326 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
328 if (nvi
->opcode
== NV_OP_SUB
) {
329 nvi
->src
[1]->mod
^= NV_MOD_NEG
;
330 nvi
->opcode
= NV_OP_ADD
;
333 for (j
= 0; j
< 3 && nvi
->src
[j
]; ++j
) {
334 mi
= nvi
->src
[j
]->value
->insn
;
337 if (mi
->def
[0]->refc
> 1 || mi
->predicate
>= 0)
340 if (NV_BASEOP(mi
->opcode
) == NV_OP_NEG
) mod
= NV_MOD_NEG
;
342 if (NV_BASEOP(mi
->opcode
) == NV_OP_ABS
) mod
= NV_MOD_ABS
;
345 assert(!(mod
& mi
->src
[0]->mod
& NV_MOD_NEG
));
347 mod
|= mi
->src
[0]->mod
;
349 if ((nvi
->opcode
== NV_OP_ABS
) || (nvi
->src
[j
]->mod
& NV_MOD_ABS
)) {
350 /* abs neg [abs] = abs */
351 mod
&= ~(NV_MOD_NEG
| NV_MOD_ABS
);
353 if ((nvi
->opcode
== NV_OP_NEG
) && (mod
& NV_MOD_NEG
)) {
354 /* neg as opcode and modifier on same insn cannot occur */
355 /* neg neg abs = abs, neg neg = identity */
357 if (mod
& NV_MOD_ABS
)
358 nvi
->opcode
= NV_OP_ABS
;
360 nvi
->opcode
= NV_OP_MOV
;
364 if ((nv_op_supported_src_mods(nvi
->opcode
, j
) & mod
) != mod
)
367 nv_reference(ctx
->pc
, nvi
, j
, mi
->src
[0]->value
);
369 nvi
->src
[j
]->mod
^= mod
;
372 if (nvi
->opcode
== NV_OP_SAT
) {
373 mi
= nvi
->src
[0]->value
->insn
;
375 if (mi
->def
[0]->refc
> 1 ||
376 (mi
->opcode
!= NV_OP_ADD
&&
377 mi
->opcode
!= NV_OP_MUL
&&
378 mi
->opcode
!= NV_OP_MAD
))
381 mi
->def
[0] = nvi
->def
[0];
382 mi
->def
[0]->insn
= mi
;
383 nvc0_insn_delete(nvi
);
386 DESCEND_ARBITRARY(j
, nv_pass_lower_mods
);
391 #define SRC_IS_MUL(s) ((s)->insn && (s)->insn->opcode == NV_OP_MUL)
394 apply_modifiers(uint32_t *val
, uint8_t type
, uint8_t mod
)
396 if (mod
& NV_MOD_ABS
) {
397 if (type
== NV_TYPE_F32
)
400 if ((*val
) & (1 << 31))
403 if (mod
& NV_MOD_NEG
) {
404 if (type
== NV_TYPE_F32
)
409 if (mod
& NV_MOD_SAT
) {
416 if (type
== NV_TYPE_F32
) {
417 u
.f
= CLAMP(u
.f
, -1.0f
, 1.0f
);
419 if (type
== NV_TYPE_U16
) {
420 u
.u
= MIN2(u
.u
, 0xffff);
422 if (type
== NV_TYPE_S16
) {
423 u
.i
= CLAMP(u
.i
, -32768, 32767);
427 if (mod
& NV_MOD_NOT
)
432 constant_expression(struct nv_pc
*pc
, struct nv_instruction
*nvi
,
433 struct nv_value
*src0
, struct nv_value
*src1
)
435 struct nv_value
*val
;
445 type
= NV_OPTYPE(nvi
->opcode
);
448 u0
.u32
= src0
->reg
.imm
.u32
;
449 u1
.u32
= src1
->reg
.imm
.u32
;
451 apply_modifiers(&u0
.u32
, type
, nvi
->src
[0]->mod
);
452 apply_modifiers(&u1
.u32
, type
, nvi
->src
[1]->mod
);
454 switch (nvi
->opcode
) {
456 if (nvi
->src
[2]->value
->reg
.file
!= NV_FILE_GPR
)
460 u
.f32
= u0
.f32
* u1
.f32
;
463 u
.u32
= u0
.u32
* u1
.u32
;
466 u
.f32
= u0
.f32
+ u1
.f32
;
469 u
.u32
= u0
.u32
+ u1
.u32
;
472 u
.f32
= u0
.f32
- u1
.f32
;
476 u.u32 = u0.u32 - u1.u32;
483 val
= new_value(pc
, NV_FILE_IMM
, nv_type_sizeof(type
));
484 val
->reg
.imm
.u32
= u
.u32
;
486 nv_reference(pc
, nvi
, 1, NULL
);
487 nv_reference(pc
, nvi
, 0, val
);
489 if (nvi
->opcode
== NV_OP_MAD_F32
) {
490 nvi
->src
[1] = nvi
->src
[0];
491 nvi
->src
[0] = nvi
->src
[2];
493 nvi
->opcode
= NV_OP_ADD_F32
;
495 if (val
->reg
.imm
.u32
== 0) {
497 nvi
->opcode
= NV_OP_MOV
;
500 nvi
->opcode
= NV_OP_MOV
;
505 constant_operand(struct nv_pc
*pc
,
506 struct nv_instruction
*nvi
, struct nv_value
*val
, int s
)
520 type
= NV_OPTYPE(nvi
->opcode
);
522 u
.u32
= val
->reg
.imm
.u32
;
523 apply_modifiers(&u
.u32
, type
, nvi
->src
[s
]->mod
);
525 if (u
.u32
== 0 && NV_BASEOP(nvi
->opcode
) == NV_OP_MUL
) {
526 nvi
->opcode
= NV_OP_MOV
;
527 nv_reference(pc
, nvi
, t
, NULL
);
529 nvi
->src
[0] = nvi
->src
[1];
535 switch (nvi
->opcode
) {
537 if (u
.f32
== 1.0f
|| u
.f32
== -1.0f
) {
539 nvi
->src
[t
]->mod
^= NV_MOD_NEG
;
540 switch (nvi
->src
[t
]->mod
) {
541 case 0: op
= nvi
->saturate
? NV_OP_SAT
: NV_OP_MOV
; break;
542 case NV_MOD_NEG
: op
= NV_OP_NEG_F32
; break;
543 case NV_MOD_ABS
: op
= NV_OP_ABS_F32
; break;
548 nv_reference(pc
, nvi
, 0, nvi
->src
[t
]->value
);
549 nv_reference(pc
, nvi
, 1, NULL
);
550 nvi
->src
[0]->mod
= 0;
552 if (u
.f32
== 2.0f
|| u
.f32
== -2.0f
) {
554 nvi
->src
[t
]->mod
^= NV_MOD_NEG
;
555 nvi
->opcode
= NV_OP_ADD_F32
;
556 nv_reference(pc
, nvi
, s
, nvi
->src
[t
]->value
);
557 nvi
->src
[s
]->mod
= nvi
->src
[t
]->mod
;
562 switch (nvi
->src
[t
]->mod
) {
563 case 0: op
= nvi
->saturate
? NV_OP_SAT
: NV_OP_MOV
; break;
564 case NV_MOD_NEG
: op
= NV_OP_NEG_F32
; break;
565 case NV_MOD_ABS
: op
= NV_OP_ABS_F32
; break;
566 case NV_MOD_NEG
| NV_MOD_ABS
:
568 nvi
->ext
.cvt
.s
= nvi
->ext
.cvt
.d
= type
;
574 nv_reference(pc
, nvi
, 0, nvi
->src
[t
]->value
);
575 nv_reference(pc
, nvi
, 1, NULL
);
576 if (nvi
->opcode
!= NV_OP_CVT
)
577 nvi
->src
[0]->mod
= 0;
582 assert(nvi
->src
[t
]->mod
== 0);
583 nvi
->opcode
= nvi
->saturate
? NV_OP_CVT
: NV_OP_MOV
;
584 nvi
->ext
.cvt
.s
= nvi
->ext
.cvt
.d
= type
;
585 nv_reference(pc
, nvi
, 0, nvi
->src
[t
]->value
);
586 nv_reference(pc
, nvi
, 1, NULL
);
590 /* multiplication by 0 already handled above */
591 assert(nvi
->src
[s
]->mod
== 0);
592 shift
= ffs(u
.s32
) - 1;
594 nvi
->opcode
= NV_OP_MOV
;
595 nv_reference(pc
, nvi
, 0, nvi
->src
[t
]->value
);
596 nv_reference(pc
, nvi
, 1, NULL
);
598 if (u
.s32
> 0 && u
.s32
== (1 << shift
)) {
599 nvi
->opcode
= NV_OP_SHL
;
600 (val
= new_value(pc
, NV_FILE_IMM
, 4))->reg
.imm
.s32
= shift
;
601 nv_reference(pc
, nvi
, 0, nvi
->src
[t
]->value
);
602 nv_reference(pc
, nvi
, 1, val
);
607 u
.f32
= 1.0f
/ u
.f32
;
608 (val
= new_value(pc
, NV_FILE_IMM
, 4))->reg
.imm
.f32
= u
.f32
;
609 nvi
->opcode
= NV_OP_MOV
;
611 nv_reference(pc
, nvi
, 0, val
);
614 u
.f32
= 1.0f
/ sqrtf(u
.f32
);
615 (val
= new_value(pc
, NV_FILE_IMM
, 4))->reg
.imm
.f32
= u
.f32
;
616 nvi
->opcode
= NV_OP_MOV
;
618 nv_reference(pc
, nvi
, 0, val
);
626 handle_min_max(struct nv_pass
*ctx
, struct nv_instruction
*nvi
)
628 struct nv_value
*src0
= nvi
->src
[0]->value
;
629 struct nv_value
*src1
= nvi
->src
[1]->value
;
631 if (src0
!= src1
|| (nvi
->src
[0]->mod
| nvi
->src
[1]->mod
))
633 if (src0
->reg
.file
!= NV_FILE_GPR
)
635 nvc0_pc_replace_value(ctx
->pc
, nvi
->def
[0], src0
);
636 nvc0_insn_delete(nvi
);
639 /* check if we can MUL + ADD -> MAD/FMA */
641 handle_add_mul(struct nv_pass
*ctx
, struct nv_instruction
*nvi
)
643 struct nv_value
*src0
= nvi
->src
[0]->value
;
644 struct nv_value
*src1
= nvi
->src
[1]->value
;
645 struct nv_value
*src
;
649 if (SRC_IS_MUL(src0
) && src0
->refc
== 1) s
= 0;
651 if (SRC_IS_MUL(src1
) && src1
->refc
== 1) s
= 1;
655 if ((src0
->insn
&& src0
->insn
->bb
!= nvi
->bb
) ||
656 (src1
->insn
&& src1
->insn
->bb
!= nvi
->bb
))
659 /* check for immediates from prior constant folding */
660 if (src0
->reg
.file
!= NV_FILE_GPR
|| src1
->reg
.file
!= NV_FILE_GPR
)
662 src
= nvi
->src
[s
]->value
;
664 mod
[0] = nvi
->src
[0]->mod
;
665 mod
[1] = nvi
->src
[1]->mod
;
666 mod
[2] = src
->insn
->src
[0]->mod
;
667 mod
[3] = src
->insn
->src
[1]->mod
;
669 if ((mod
[0] | mod
[1] | mod
[2] | mod
[3]) & ~NV_MOD_NEG
)
672 nvi
->opcode
= NV_OP_MAD_F32
;
674 nv_reference(ctx
->pc
, nvi
, s
, NULL
);
675 nvi
->src
[2] = nvi
->src
[!s
];
678 nv_reference(ctx
->pc
, nvi
, 0, src
->insn
->src
[0]->value
);
679 nvi
->src
[0]->mod
= mod
[2] ^ mod
[s
];
680 nv_reference(ctx
->pc
, nvi
, 1, src
->insn
->src
[1]->value
);
681 nvi
->src
[1]->mod
= mod
[3];
685 nv_pass_algebraic_opt(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
687 struct nv_instruction
*nvi
, *next
;
690 for (nvi
= b
->entry
; nvi
; nvi
= next
) {
691 struct nv_value
*src0
, *src1
;
692 uint baseop
= NV_BASEOP(nvi
->opcode
);
696 src0
= nvc0_pc_find_immediate(nvi
->src
[0]);
697 src1
= nvc0_pc_find_immediate(nvi
->src
[1]);
700 constant_expression(ctx
->pc
, nvi
, src0
, src1
);
703 constant_operand(ctx
->pc
, nvi
, src0
, 0);
706 constant_operand(ctx
->pc
, nvi
, src1
, 1);
709 if (baseop
== NV_OP_MIN
|| baseop
== NV_OP_MAX
)
710 handle_min_max(ctx
, nvi
);
712 if (nvi
->opcode
== NV_OP_ADD_F32
)
713 handle_add_mul(ctx
, nvi
);
715 DESCEND_ARBITRARY(j
, nv_pass_algebraic_opt
);
720 /* TODO: redundant store elimination */
723 struct mem_record
*next
;
724 struct nv_instruction
*insn
;
730 #define MEM_RECORD_POOL_SIZE 1024
732 struct pass_reld_elim
{
735 struct mem_record
*imm
;
736 struct mem_record
*mem_v
;
737 struct mem_record
*mem_a
;
738 struct mem_record
*mem_c
[16];
739 struct mem_record
*mem_l
;
741 struct mem_record pool
[MEM_RECORD_POOL_SIZE
];
745 /* Extend the load operation in @rec to also cover the data loaded by @ld.
746 * The two loads may not overlap but reference adjacent memory locations.
749 combine_load(struct nv_pc
*pc
, struct mem_record
*rec
,
750 struct nv_instruction
*ld
)
752 struct nv_instruction
*fv
= rec
->insn
;
753 struct nv_value
*mem
= ld
->src
[0]->value
;
754 uint32_t size
= rec
->size
+ mem
->reg
.size
;
756 int d
= rec
->size
/ 4;
758 assert(rec
->size
< 16);
759 if (rec
->ofst
> mem
->reg
.address
) {
760 if ((size
== 8 && mem
->reg
.address
& 3) ||
761 (size
> 8 && mem
->reg
.address
& 7))
763 rec
->ofst
= mem
->reg
.address
;
764 for (j
= 0; j
< d
; ++j
)
765 fv
->def
[mem
->reg
.size
/ 4 + j
] = fv
->def
[j
];
768 if ((size
== 8 && rec
->ofst
& 3) ||
769 (size
> 8 && rec
->ofst
& 7)) {
773 for (j
= 0; j
< mem
->reg
.size
/ 4; ++j
) {
774 fv
->def
[d
] = ld
->def
[j
];
775 fv
->def
[d
++]->insn
= fv
;
778 if (fv
->src
[0]->value
->refc
> 1)
779 nv_reference(pc
, fv
, 0, new_value_like(pc
, fv
->src
[0]->value
));
780 fv
->src
[0]->value
->reg
.address
= rec
->ofst
;
781 fv
->src
[0]->value
->reg
.size
= rec
->size
= size
;
783 nvc0_insn_delete(ld
);
787 combine_export(struct mem_record
*rec
, struct nv_instruction
*ex
)
793 add_mem_record(struct pass_reld_elim
*ctx
, struct mem_record
**rec
,
794 uint32_t base
, uint32_t ofst
, struct nv_instruction
*nvi
)
796 struct mem_record
*it
= &ctx
->pool
[ctx
->alloc
++];
803 it
->size
= nvi
->src
[0]->value
->reg
.size
;
806 /* vectorize and reuse loads from memory or of immediates */
808 nv_pass_mem_opt(struct pass_reld_elim
*ctx
, struct nv_basic_block
*b
)
810 struct mem_record
**rec
, *it
;
811 struct nv_instruction
*ld
, *next
;
812 struct nv_value
*mem
;
816 for (ld
= b
->entry
; ld
; ld
= next
) {
819 if (is_cspace_load(ld
)) {
820 mem
= ld
->src
[0]->value
;
821 rec
= &ctx
->mem_c
[ld
->src
[0]->value
->reg
.file
- NV_FILE_MEM_C(0)];
823 if (ld
->opcode
== NV_OP_VFETCH
) {
824 mem
= ld
->src
[0]->value
;
827 if (ld
->opcode
== NV_OP_EXPORT
) {
828 mem
= ld
->src
[0]->value
;
829 if (mem
->reg
.file
!= NV_FILE_MEM_V
)
835 if (ld
->def
[0] && ld
->def
[0]->refc
== 0)
837 ofst
= mem
->reg
.address
;
838 base
= (ld
->indirect
>= 0) ? ld
->src
[ld
->indirect
]->value
->n
: 0;
840 for (it
= *rec
; it
; it
= it
->next
) {
841 if (it
->base
== base
&&
842 ((it
->ofst
>> 4) == (ofst
>> 4)) &&
843 ((it
->ofst
+ it
->size
== ofst
) ||
844 (it
->ofst
- mem
->reg
.size
== ofst
))) {
845 /* only NV_OP_VFETCH can load exactly 12 bytes */
846 if (ld
->opcode
== NV_OP_LD
&& it
->size
+ mem
->reg
.size
== 12)
848 if (it
->ofst
< ofst
) {
849 if ((it
->ofst
& 0xf) == 4)
852 if ((ofst
& 0xf) == 4)
858 switch (ld
->opcode
) {
859 case NV_OP_EXPORT
: combine_export(it
, ld
); break;
861 combine_load(ctx
->pc
, it
, ld
);
865 if (ctx
->alloc
< MEM_RECORD_POOL_SIZE
) {
866 add_mem_record(ctx
, rec
, base
, ofst
, ld
);
871 ctx
->mem_a
= ctx
->mem_v
= ctx
->mem_l
= NULL
;
872 for (s
= 0; s
< 16; ++s
)
873 ctx
->mem_c
[s
] = NULL
;
875 DESCEND_ARBITRARY(s
, nv_pass_mem_opt
);
879 #ifdef USE_UNUSED_CODE
881 eliminate_store(struct mem_record
*rec
, struct nv_instruction
*st
)
885 /* elimination of redundant stores */
887 pass_store_elim(struct pass_reld_elim
*ctx
, struct nv_basic_block
*b
)
889 struct mem_record
**rec
, *it
;
890 struct nv_instruction
*st
, *next
;
891 struct nv_value
*mem
;
892 uint32_t base
, ofst
, size
;
895 for (st
= b
->entry
; st
; st
= next
) {
898 if (st
->opcode
== NV_OP_ST
) {
899 mem
= st
->src
[0]->value
;
902 if (st
->opcode
== NV_OP_EXPORT
) {
903 mem
= st
->src
[0]->value
;
904 if (mem
->reg
.file
!= NV_FILE_MEM_V
)
908 if (st
->opcode
== NV_OP_ST
) {
911 ofst
= mem
->reg
.address
;
912 base
= (st
->indirect
>= 0) ? st
->src
[st
->indirect
]->value
->n
: 0;
913 size
= mem
->reg
.size
;
915 for (it
= *rec
; it
; it
= it
->next
) {
916 if (it
->base
== base
&&
917 (it
->ofst
<= ofst
&& (it
->ofst
+ size
) > ofst
))
921 eliminate_store(it
, st
);
923 add_mem_record(ctx
, rec
, base
, ofst
, st
);
926 DESCEND_ARBITRARY(s
, nv_pass_mem_opt
);
931 /* TODO: properly handle loads from l[] memory in the presence of stores */
933 nv_pass_reload_elim(struct pass_reld_elim
*ctx
, struct nv_basic_block
*b
)
936 struct load_record
**rec
, *it
;
937 struct nv_instruction
*ld
, *next
;
939 struct nv_value
*val
;
942 for (ld
= b
->entry
; ld
; ld
= next
) {
946 val
= ld
->src
[0]->value
;
949 if (ld
->opcode
== NV_OP_LINTERP
|| ld
->opcode
== NV_OP_PINTERP
) {
950 data
[0] = val
->reg
.id
;
954 if (ld
->opcode
== NV_OP_LDA
) {
955 data
[0] = val
->reg
.id
;
956 data
[1] = ld
->src
[4] ? ld
->src
[4]->value
->n
: ~0ULL;
957 if (val
->reg
.file
>= NV_FILE_MEM_C(0) &&
958 val
->reg
.file
<= NV_FILE_MEM_C(15))
959 rec
= &ctx
->mem_c
[val
->reg
.file
- NV_FILE_MEM_C(0)];
961 if (val
->reg
.file
== NV_FILE_MEM_S
)
964 if (val
->reg
.file
== NV_FILE_MEM_L
)
967 if ((ld
->opcode
== NV_OP_MOV
) && (val
->reg
.file
== NV_FILE_IMM
)) {
968 data
[0] = val
->reg
.imm
.u32
;
973 if (!rec
|| !ld
->def
[0]->refc
)
976 for (it
= *rec
; it
; it
= it
->next
)
977 if (it
->data
[0] == data
[0] && it
->data
[1] == data
[1])
981 if (ld
->def
[0]->reg
.id
>= 0)
982 it
->value
= ld
->def
[0];
985 nvc0_pc_replace_value(ctx
->pc
, ld
->def
[0], it
->value
);
987 if (ctx
->alloc
== LOAD_RECORD_POOL_SIZE
)
989 it
= &ctx
->pool
[ctx
->alloc
++];
991 it
->data
[0] = data
[0];
992 it
->data
[1] = data
[1];
993 it
->value
= ld
->def
[0];
1001 for (j
= 0; j
< 16; ++j
)
1002 ctx
->mem_c
[j
] = NULL
;
1006 DESCEND_ARBITRARY(j
, nv_pass_reload_elim
);
1012 nv_pass_tex_mask(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
1016 for (i
= 0; i
< ctx
->pc
->num_instructions
; ++i
) {
1017 struct nv_instruction
*nvi
= &ctx
->pc
->instructions
[i
];
1018 struct nv_value
*def
[4];
1020 if (!nv_is_texture_op(nvi
->opcode
))
1024 for (c
= 0; c
< 4; ++c
) {
1025 if (nvi
->def
[c
]->refc
)
1026 nvi
->tex_mask
|= 1 << c
;
1027 def
[c
] = nvi
->def
[c
];
1031 for (c
= 0; c
< 4; ++c
)
1032 if (nvi
->tex_mask
& (1 << c
))
1033 nvi
->def
[j
++] = def
[c
];
1034 for (c
= 0; c
< 4; ++c
)
1035 if (!(nvi
->tex_mask
& (1 << c
)))
1036 nvi
->def
[j
++] = def
[c
];
1042 struct nv_pass_dce
{
1048 nv_pass_dce(struct nv_pass_dce
*ctx
, struct nv_basic_block
*b
)
1051 struct nv_instruction
*nvi
, *next
;
1053 for (nvi
= b
->phi
? b
->phi
: b
->entry
; nvi
; nvi
= next
) {
1056 if (inst_removable(nvi
)) {
1057 nvc0_insn_delete(nvi
);
1061 DESCEND_ARBITRARY(j
, nv_pass_dce
);
1066 /* Register allocation inserted ELSE blocks for all IF/ENDIF without ELSE.
1067 * Returns TRUE if @bb initiates an IF/ELSE/ENDIF clause, or is an IF with
1068 * BREAK and dummy ELSE block.
1070 static INLINE boolean
1071 bb_is_if_else_endif(struct nv_basic_block
*bb
)
1073 if (!bb
->out
[0] || !bb
->out
[1])
1076 if (bb
->out
[0]->out_kind
[0] == CFG_EDGE_LOOP_LEAVE
) {
1077 return (bb
->out
[0]->out
[1] == bb
->out
[1]->out
[0] &&
1078 !bb
->out
[1]->out
[1]);
1080 return (bb
->out
[0]->out
[0] == bb
->out
[1]->out
[0] &&
1081 !bb
->out
[0]->out
[1] &&
1082 !bb
->out
[1]->out
[1]);
1086 /* Predicate instructions and delete any branch at the end if it is
1087 * not a break from a loop.
1090 predicate_instructions(struct nv_pc
*pc
, struct nv_basic_block
*b
,
1091 struct nv_value
*pred
, uint8_t cc
)
1093 struct nv_instruction
*nvi
, *prev
;
1098 for (nvi
= b
->entry
; nvi
; nvi
= nvi
->next
) {
1100 if (inst_is_noop(nvi
))
1102 for (s
= 0; nvi
->src
[s
]; ++s
);
1106 nv_reference(pc
, nvi
, nvi
->predicate
, pred
);
1108 if (prev
->opcode
== NV_OP_BRA
&&
1109 b
->out_kind
[0] != CFG_EDGE_LOOP_LEAVE
&&
1110 b
->out_kind
[1] != CFG_EDGE_LOOP_LEAVE
)
1111 nvc0_insn_delete(prev
);
1114 static INLINE boolean
1115 may_predicate_insn(struct nv_instruction
*nvi
, struct nv_value
*pred
)
1117 if (nvi
->def
[0] && values_equal(nvi
->def
[0], pred
))
1119 return nvc0_insn_is_predicateable(nvi
);
1122 /* Transform IF/ELSE/ENDIF constructs into predicated instructions
1126 nv_pass_flatten(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
1128 struct nv_instruction
*nvi
;
1129 struct nv_value
*pred
;
1131 int n0
, n1
; /* instruction counts of outgoing blocks */
1133 if (bb_is_if_else_endif(b
)) {
1134 assert(b
->exit
&& b
->exit
->opcode
== NV_OP_BRA
);
1136 assert(b
->exit
->predicate
>= 0);
1137 pred
= b
->exit
->src
[b
->exit
->predicate
]->value
;
1140 for (nvi
= b
->out
[0]->entry
; nvi
; nvi
= nvi
->next
, ++n0
)
1141 if (!may_predicate_insn(nvi
, pred
))
1144 /* we're after register allocation, so there always is an ELSE block */
1145 for (nvi
= b
->out
[1]->entry
; nvi
; nvi
= nvi
->next
, ++n1
)
1146 if (!may_predicate_insn(nvi
, pred
))
1150 /* 12 is an arbitrary limit */
1151 if (!nvi
&& n0
< 12 && n1
< 12) {
1152 predicate_instructions(ctx
->pc
, b
->out
[0], pred
, !b
->exit
->cc
);
1153 predicate_instructions(ctx
->pc
, b
->out
[1], pred
, b
->exit
->cc
);
1155 nvc0_insn_delete(b
->exit
); /* delete the branch */
1157 /* and a potential joinat before it */
1158 if (b
->exit
&& b
->exit
->opcode
== NV_OP_JOINAT
)
1159 nvc0_insn_delete(b
->exit
);
1161 /* remove join operations at the end of the conditional */
1162 k
= (b
->out
[0]->out_kind
[0] == CFG_EDGE_LOOP_LEAVE
) ? 1 : 0;
1163 if ((nvi
= b
->out
[0]->out
[k
]->entry
)) {
1165 if (nvi
->opcode
== NV_OP_JOIN
)
1166 nvc0_insn_delete(nvi
);
1170 DESCEND_ARBITRARY(k
, nv_pass_flatten
);
1175 /* Tests instructions for equality, but independently of sources. */
1177 is_operation_equal(struct nv_instruction
*a
, struct nv_instruction
*b
)
1179 if (a
->opcode
!= b
->opcode
)
1181 if (nv_is_texture_op(a
->opcode
)) {
1182 if (a
->ext
.tex
.t
!= b
->ext
.tex
.t
||
1183 a
->ext
.tex
.s
!= b
->ext
.tex
.s
)
1185 if (a
->tex_dim
!= b
->tex_dim
||
1186 a
->tex_array
!= b
->tex_array
||
1187 a
->tex_cube
!= b
->tex_cube
||
1188 a
->tex_shadow
!= b
->tex_shadow
||
1189 a
->tex_live
!= b
->tex_live
)
1192 if (a
->opcode
== NV_OP_CVT
) {
1193 if (a
->ext
.cvt
.s
!= b
->ext
.cvt
.s
||
1194 a
->ext
.cvt
.d
!= b
->ext
.cvt
.d
)
1197 if (NV_BASEOP(a
->opcode
) == NV_OP_SET
||
1198 NV_BASEOP(a
->opcode
) == NV_OP_SLCT
) {
1199 if (a
->set_cond
!= b
->set_cond
)
1202 if (a
->opcode
== NV_OP_LINTERP
||
1203 a
->opcode
== NV_OP_PINTERP
) {
1204 if (a
->centroid
!= b
->centroid
||
1210 if (a
->lanes
!= b
->lanes
||
1211 a
->patch
!= b
->patch
||
1212 a
->saturate
!= b
->saturate
)
1214 if (a
->opcode
== NV_OP_QUADOP
) /* beware quadon ! */
1219 /* local common subexpression elimination, stupid O(n^2) implementation */
1221 nv_pass_cse(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
1223 struct nv_instruction
*ir
, *ik
, *next
;
1224 struct nv_instruction
*entry
= b
->phi
? b
->phi
: b
->entry
;
1230 for (ir
= entry
; ir
; ir
= next
) {
1234 for (ik
= entry
; ik
!= ir
; ik
= ik
->next
) {
1235 if (!is_operation_equal(ir
, ik
))
1237 if (!ir
->def
[0] || !ik
->def
[0])
1240 if (ik
->indirect
!= ir
->indirect
|| ik
->predicate
!= ir
->predicate
)
1243 for (d
= 0; d
< 4; ++d
) {
1244 if ((ir
->def
[d
] ? 1 : 0) != (ik
->def
[d
] ? 1 : 0))
1247 if (!values_equal(ik
->def
[0], ir
->def
[0]))
1257 for (s
= 0; s
< 5; ++s
) {
1258 struct nv_value
*a
, *b
;
1260 if ((ir
->src
[s
] ? 1 : 0) != (ik
->src
[s
] ? 1 : 0))
1267 if (ik
->src
[s
]->mod
!= ir
->src
[s
]->mod
)
1269 a
= ik
->src
[s
]->value
;
1270 b
= ir
->src
[s
]->value
;
1273 if (a
->reg
.file
!= b
->reg
.file
||
1274 a
->reg
.id
< 0 || /* this excludes memory loads/stores */
1275 a
->reg
.id
!= b
->reg
.id
)
1279 nvc0_insn_delete(ir
);
1280 for (d
= 0; d
< 4 && ir
->def
[d
]; ++d
)
1281 nvc0_pc_replace_value(ctx
->pc
, ir
->def
[d
], ik
->def
[d
]);
1289 DESCEND_ARBITRARY(s
, nv_pass_cse
);
1294 /* Make sure all sources of an NV_OP_BIND are distinct, they need to occupy
1295 * neighbouring registers. CSE might have messed this up.
1296 * Just generate a MOV for each source to avoid conflicts if they're used in
1297 * multiple NV_OP_BIND at different positions.
1299 * Add a dummy use of the pointer source of >= 8 byte loads after the load
1300 * to prevent it from being assigned a register which overlaps the load's
1301 * destination, which would produce random corruptions.
1304 nv_pass_fixups(struct nv_pass
*ctx
, struct nv_basic_block
*b
)
1306 struct nv_value
*val
;
1307 struct nv_instruction
*fix
, *nvi
, *next
;
1310 for (fix
= b
->entry
; fix
; fix
= next
) {
1313 if (fix
->opcode
== NV_OP_LD
) {
1314 if (fix
->indirect
>= 0 && fix
->src
[0]->value
->reg
.size
>= 8) {
1315 nvi
= nv_alloc_instruction(ctx
->pc
, NV_OP_UNDEF
);
1316 nv_reference(ctx
->pc
, nvi
, 0, fix
->src
[fix
->indirect
]->value
);
1318 nvc0_insn_insert_after(fix
, nvi
);
1322 if (fix
->opcode
== NV_OP_BIND
) {
1323 for (s
= 0; s
< 4 && fix
->src
[s
]; ++s
) {
1324 val
= fix
->src
[s
]->value
;
1326 nvi
= nv_alloc_instruction(ctx
->pc
, NV_OP_MOV
);
1327 nvi
->def
[0] = new_value_like(ctx
->pc
, val
);
1328 nvi
->def
[0]->insn
= nvi
;
1329 nv_reference(ctx
->pc
, nvi
, 0, val
);
1330 nv_reference(ctx
->pc
, fix
, s
, nvi
->def
[0]);
1332 nvc0_insn_insert_before(fix
, nvi
);
1336 DESCEND_ARBITRARY(s
, nv_pass_fixups
);
1342 nv_pc_pass0(struct nv_pc
*pc
, struct nv_basic_block
*root
)
1344 struct pass_reld_elim
*reldelim
= NULL
;
1345 struct nv_pass pass
;
1346 struct nv_pass_dce dce
;
1352 /* Do CSE so we can just compare values by pointer in subsequent passes. */
1354 ret
= nv_pass_cse(&pass
, root
);
1358 /* Do this first, so we don't have to pay attention
1359 * to whether sources are supported memory loads.
1362 ret
= nv_pass_algebraic_opt(&pass
, root
);
1367 ret
= nv_pass_lower_mods(&pass
, root
);
1372 ret
= nvc0_pass_fold_loads(&pass
, root
);
1376 if (pc
->opt_reload_elim
) {
1377 reldelim
= CALLOC_STRUCT(pass_reld_elim
);
1381 ret
= nv_pass_reload_elim(reldelim
, root
);
1386 memset(reldelim
, 0, sizeof(struct pass_reld_elim
));
1390 /* May run DCE before load-combining since that pass will clean up
1397 ret
= nv_pass_dce(&dce
, root
);
1400 } while (dce
.removed
);
1402 if (pc
->opt_reload_elim
) {
1404 ret
= nv_pass_mem_opt(reldelim
, root
);
1406 memset(reldelim
, 0, sizeof(struct pass_reld_elim
));
1410 ret
= nv_pass_mem_opt(reldelim
, root
);
1417 ret
= nv_pass_tex_mask(&pass
, root
);
1422 ret
= nv_pass_fixups(&pass
, root
);
1428 nvc0_pc_exec_pass0(struct nv_pc
*pc
)
1432 for (i
= 0; i
< pc
->num_subroutines
+ 1; ++i
)
1433 if (pc
->root
[i
] && (ret
= nv_pc_pass0(pc
, pc
->root
[i
])))