1 #include "util/u_math.h"
4 #include "brw_context.h"
9 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
10 const struct brw_fp_instruction
*inst
,
15 reclaim_temps(struct brw_wm_compile
*c
);
18 /** Mark GRF register as used. */
20 prealloc_grf(struct brw_wm_compile
*c
, int r
)
22 c
->used_grf
[r
] = GL_TRUE
;
26 /** Mark given GRF register as not in use. */
28 release_grf(struct brw_wm_compile
*c
, int r
)
30 /*assert(c->used_grf[r]);*/
31 c
->used_grf
[r
] = GL_FALSE
;
32 c
->first_free_grf
= MIN2(c
->first_free_grf
, r
);
36 /** Return index of a free GRF, mark it as used. */
38 alloc_grf(struct brw_wm_compile
*c
)
41 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
42 if (!c
->used_grf
[r
]) {
43 c
->used_grf
[r
] = GL_TRUE
;
44 c
->first_free_grf
= r
+ 1; /* a guess */
49 /* no free temps, try to reclaim some */
51 c
->first_free_grf
= 0;
54 for (r
= c
->first_free_grf
; r
< BRW_WM_MAX_GRF
; r
++) {
55 if (!c
->used_grf
[r
]) {
56 c
->used_grf
[r
] = GL_TRUE
;
57 c
->first_free_grf
= r
+ 1; /* a guess */
62 for (r
= 0; r
< BRW_WM_MAX_GRF
; r
++) {
63 assert(c
->used_grf
[r
]);
66 /* really, no free GRF regs found */
67 if (!c
->out_of_regs
) {
68 /* print warning once per compilation */
69 debug_printf("%s: ran out of registers for fragment program", __FUNCTION__
);
70 c
->out_of_regs
= GL_TRUE
;
77 /** Return number of GRF registers used */
79 num_grf_used(const struct brw_wm_compile
*c
)
82 for (r
= BRW_WM_MAX_GRF
- 1; r
>= 0; r
--)
91 * Record the mapping of a Mesa register to a hardware register.
93 static void set_reg(struct brw_wm_compile
*c
, int file
, int index
,
94 int component
, struct brw_reg reg
)
96 c
->wm_regs
[file
][index
][component
].reg
= reg
;
97 c
->wm_regs
[file
][index
][component
].inited
= GL_TRUE
;
100 static struct brw_reg
alloc_tmp(struct brw_wm_compile
*c
)
104 /* if we need to allocate another temp, grow the tmp_regs[] array */
105 if (c
->tmp_index
== c
->tmp_max
) {
106 int r
= alloc_grf(c
);
108 /*printf("Out of temps in %s\n", __FUNCTION__);*/
109 r
= 50; /* XXX random register! */
111 c
->tmp_regs
[ c
->tmp_max
++ ] = r
;
114 /* form the GRF register */
115 reg
= brw_vec8_grf(c
->tmp_regs
[ c
->tmp_index
++ ], 0);
116 /*printf("alloc_temp %d\n", reg.nr);*/
117 assert(reg
.nr
< BRW_WM_MAX_GRF
);
123 * Save current temp register info.
124 * There must be a matching call to release_tmps().
126 static int mark_tmps(struct brw_wm_compile
*c
)
131 static struct brw_reg
lookup_tmp( struct brw_wm_compile
*c
, int index
)
133 return brw_vec8_grf( c
->tmp_regs
[ index
], 0 );
136 static void release_tmps(struct brw_wm_compile
*c
, int mark
)
142 * Convert Mesa src register to brw register.
144 * Since we're running in SOA mode each Mesa register corresponds to four
145 * hardware registers. We allocate the hardware registers as needed here.
147 * \param file register file, one of PROGRAM_x
148 * \param index register number
149 * \param component src component (X=0, Y=1, Z=2, W=3)
150 * \param nr not used?!?
151 * \param neg negate value?
152 * \param abs take absolute value?
154 static struct brw_reg
155 get_reg(struct brw_wm_compile
*c
, int file
, int index
, int component
,
156 int nr
, GLuint neg
, GLuint abs
)
161 return brw_null_reg();
163 case TGSI_FILE_CONSTANT
:
164 case TGSI_FILE_TEMPORARY
:
165 case TGSI_FILE_INPUT
:
166 case TGSI_FILE_OUTPUT
:
167 case BRW_FILE_PAYLOAD
:
171 debug_printf("%s: Unexpected file type\n", __FUNCTION__
);
172 return brw_null_reg();
176 assert(component
< 4);
178 /* see if we've already allocated a HW register for this Mesa register */
179 if (c
->wm_regs
[file
][index
][component
].inited
) {
181 reg
= c
->wm_regs
[file
][index
][component
].reg
;
184 /* no, allocate new register */
185 int grf
= alloc_grf(c
);
186 /*printf("alloc grf %d for reg %d:%d.%d\n", grf, file, index, component);*/
188 /* totally out of temps */
189 grf
= 51; /* XXX random register! */
192 reg
= brw_vec8_grf(grf
, 0);
193 /*printf("Alloc new grf %d for %d.%d\n", reg.nr, index, component);*/
195 set_reg(c
, file
, index
, component
, reg
);
198 if (neg
& (1 << component
)) {
210 * Find first/last instruction that references each temporary register.
213 _mesa_find_temp_intervals(const struct prog_instruction
*instructions
,
214 GLuint numInstructions
,
215 GLint intBegin
[MAX_PROGRAM_TEMPS
],
216 GLint intEnd
[MAX_PROGRAM_TEMPS
])
220 GLuint Start
, End
; /**< Start, end instructions of loop */
222 struct loop_info loopStack
[MAX_LOOP_NESTING
];
223 GLuint loopStackDepth
= 0;
226 for (i
= 0; i
< MAX_PROGRAM_TEMPS
; i
++){
227 intBegin
[i
] = intEnd
[i
] = -1;
230 /* Scan instructions looking for temporary registers */
231 for (i
= 0; i
< numInstructions
; i
++) {
232 const struct prog_instruction
*inst
= instructions
+ i
;
233 if (inst
->Opcode
== OPCODE_BGNLOOP
) {
234 loopStack
[loopStackDepth
].Start
= i
;
235 loopStack
[loopStackDepth
].End
= inst
->BranchTarget
;
238 else if (inst
->Opcode
== OPCODE_ENDLOOP
) {
241 else if (inst
->Opcode
== OPCODE_CAL
) {
245 const GLuint numSrc
= 3;
247 for (j
= 0; j
< numSrc
; j
++) {
248 if (inst
->SrcReg
[j
].File
== PROGRAM_TEMPORARY
) {
249 const GLuint index
= inst
->SrcReg
[j
].Index
;
250 if (inst
->SrcReg
[j
].RelAddr
)
252 update_interval(intBegin
, intEnd
, index
, i
);
253 if (loopStackDepth
> 0) {
254 /* extend temp register's interval to end of loop */
255 GLuint loopEnd
= loopStack
[loopStackDepth
- 1].End
;
256 update_interval(intBegin
, intEnd
, index
, loopEnd
);
260 if (inst
->DstReg
.File
== PROGRAM_TEMPORARY
) {
261 const GLuint index
= inst
->DstReg
.Index
;
262 if (inst
->DstReg
.RelAddr
)
264 update_interval(intBegin
, intEnd
, index
, i
);
265 if (loopStackDepth
> 0) {
266 /* extend temp register's interval to end of loop */
267 GLuint loopEnd
= loopStack
[loopStackDepth
- 1].End
;
268 update_interval(intBegin
, intEnd
, index
, loopEnd
);
279 * This is called if we run out of GRF registers. Examine the live intervals
280 * of temp regs in the program and free those which won't be used again.
283 reclaim_temps(struct brw_wm_compile
*c
)
285 GLint intBegin
[BRW_WM_MAX_TEMPS
];
286 GLint intEnd
[BRW_WM_MAX_TEMPS
];
289 /*printf("Reclaim temps:\n");*/
291 _mesa_find_temp_intervals(c
->fp_instructions
, c
->nr_fp_insns
,
294 for (index
= 0; index
< BRW_WM_MAX_TEMPS
; index
++) {
295 if (intEnd
[index
] != -1 && intEnd
[index
] < c
->cur_inst
) {
296 /* program temp[i] can be freed */
298 /*printf(" temp[%d] is dead\n", index);*/
299 for (component
= 0; component
< 4; component
++) {
300 if (c
->wm_regs
[TGSI_FILE_TEMPORARY
][index
][component
].inited
) {
301 int r
= c
->wm_regs
[TGSI_FILE_TEMPORARY
][index
][component
].reg
.nr
;
304 printf(" Reclaim temp %d, reg %d at inst %d\n",
305 index, r, c->cur_inst);
307 c
->wm_regs
[TGSI_FILE_TEMPORARY
][index
][component
].inited
= GL_FALSE
;
318 * Preallocate registers. This sets up the Mesa to hardware register
319 * mapping for certain registers, such as constants (uniforms/state vars)
322 static void prealloc_reg(struct brw_wm_compile
*c
)
326 int urb_read_length
= 0;
327 GLuint inputs
= FRAG_BIT_WPOS
| c
->fp_interp_emitted
;
328 GLuint reg_index
= 0;
330 memset(c
->used_grf
, GL_FALSE
, sizeof(c
->used_grf
));
331 c
->first_free_grf
= 0;
333 for (i
= 0; i
< 4; i
++) {
334 if (i
< c
->key
.nr_depth_regs
)
335 reg
= brw_vec8_grf(i
* 2, 0);
337 reg
= brw_vec8_grf(0, 0);
338 set_reg(c
, TGSI_FILE_PAYLOAD
, PAYLOAD_DEPTH
, i
, reg
);
340 reg_index
+= 2 * c
->key
.nr_depth_regs
;
344 const GLuint nr_params
= c
->fp
->program
.Base
.Parameters
->NumParameters
;
345 const GLuint nr_temps
= c
->fp
->program
.Base
.NumTemporaries
;
347 /* use a real constant buffer, or just use a section of the GRF? */
348 /* XXX this heuristic may need adjustment... */
349 if ((nr_params
+ nr_temps
) * 4 + reg_index
> 80)
350 c
->fp
->use_const_buffer
= GL_TRUE
;
352 c
->fp
->use_const_buffer
= GL_FALSE
;
353 /*printf("WM use_const_buffer = %d\n", c->fp->use_const_buffer);*/
355 if (c
->fp
->use_const_buffer
) {
356 /* We'll use a real constant buffer and fetch constants from
357 * it with a dataport read message.
360 /* number of float constants in CURBE */
361 c
->prog_data
.nr_params
= 0;
364 const struct gl_program_parameter_list
*plist
=
365 c
->fp
->program
.Base
.Parameters
;
368 /* number of float constants in CURBE */
369 c
->prog_data
.nr_params
= 4 * nr_params
;
371 /* loop over program constants (float[4]) */
372 for (i
= 0; i
< nr_params
; i
++) {
373 /* loop over XYZW channels */
374 for (j
= 0; j
< 4; j
++, index
++) {
375 reg
= brw_vec1_grf(reg_index
+ index
/ 8, index
% 8);
376 /* Save pointer to parameter/constant value.
377 * Constants will be copied in prepare_constant_buffer()
379 c
->prog_data
.param
[index
] = &plist
->ParameterValues
[i
][j
];
380 set_reg(c
, TGSI_FILE_STATE_VAR
, i
, j
, reg
);
383 /* number of constant regs used (each reg is float[8]) */
384 c
->nr_creg
= 2 * ((4 * nr_params
+ 15) / 16);
385 reg_index
+= c
->nr_creg
;
389 /* fragment shader inputs */
390 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
393 if (i
>= VERT_RESULT_VAR0
)
394 fp_input
= i
- VERT_RESULT_VAR0
+ FRAG_ATTRIB_VAR0
;
395 else if (i
<= VERT_RESULT_TEX7
)
400 if (fp_input
>= 0 && inputs
& (1 << fp_input
)) {
401 urb_read_length
= reg_index
;
402 reg
= brw_vec8_grf(reg_index
, 0);
403 for (j
= 0; j
< 4; j
++)
404 set_reg(c
, TGSI_FILE_PAYLOAD
, fp_input
, j
, reg
);
406 if (c
->key
.nr_vp_outputs
> i
) {
411 c
->prog_data
.first_curbe_grf
= c
->key
.nr_depth_regs
* 2;
412 c
->prog_data
.urb_read_length
= urb_read_length
;
413 c
->prog_data
.curb_read_length
= c
->nr_creg
;
414 c
->emit_mask_reg
= brw_uw1_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
416 c
->stack
= brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, reg_index
, 0);
419 /* mark GRF regs [0..reg_index-1] as in-use */
420 for (i
= 0; i
< reg_index
; i
++)
423 /* Don't use GRF 126, 127. Using them seems to lead to GPU lock-ups */
424 prealloc_grf(c
, 126);
425 prealloc_grf(c
, 127);
427 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
428 const struct brw_fp_instruction
*inst
= &c
->fp_instructions
[i
];
429 struct brw_reg dst
[4];
431 switch (inst
->Opcode
) {
434 /* Allocate the channels of texture results contiguously,
435 * since they are written out that way by the sampler unit.
437 for (j
= 0; j
< 4; j
++) {
438 dst
[j
] = get_dst_reg(c
, inst
, j
);
440 assert(dst
[j
].nr
== dst
[j
- 1].nr
+ 1);
448 /* An instruction may reference up to three constants.
449 * They'll be found in these registers.
450 * XXX alloc these on demand!
452 if (c
->fp
->use_const_buffer
) {
453 for (i
= 0; i
< 3; i
++) {
454 c
->current_const
[i
].index
= -1;
455 c
->current_const
[i
].reg
= brw_vec8_grf(alloc_grf(c
), 0);
459 printf("USE CONST BUFFER? %d\n", c
->fp
->use_const_buffer
);
460 printf("AFTER PRE_ALLOC, reg_index = %d\n", reg_index
);
466 * Check if any of the instruction's src registers are constants, uniforms,
467 * or statevars. If so, fetch any constants that we don't already have in
468 * the three GRF slots.
470 static void fetch_constants(struct brw_wm_compile
*c
,
471 const struct brw_fp_instruction
*inst
)
473 struct brw_compile
*p
= &c
->func
;
476 /* loop over instruction src regs */
477 for (i
= 0; i
< 3; i
++) {
478 const struct prog_src_register
*src
= &inst
->SrcReg
[i
];
479 if (src
->File
== TGSI_FILE_IMMEDIATE
||
480 src
->File
== TGSI_FILE_CONSTANT
) {
481 c
->current_const
[i
].index
= src
->Index
;
484 printf(" fetch const[%d] for arg %d into reg %d\n",
485 src
->Index
, i
, c
->current_const
[i
].reg
.nr
);
488 /* need to fetch the constant now */
490 c
->current_const
[i
].reg
, /* writeback dest */
491 src
->RelAddr
, /* relative indexing? */
492 16 * src
->Index
, /* byte offset */
493 SURF_INDEX_FRAG_CONST_BUFFER
/* binding table index */
501 * Convert Mesa dst register to brw register.
503 static struct brw_reg
get_dst_reg(struct brw_wm_compile
*c
,
504 const struct brw_fp_instruction
*inst
,
508 return get_reg(c
, inst
->DstReg
.File
, inst
->DstReg
.Index
, component
, nr
,
513 static struct brw_reg
514 get_src_reg_const(struct brw_wm_compile
*c
,
515 const struct brw_fp_instruction
*inst
,
516 GLuint srcRegIndex
, GLuint component
)
518 /* We should have already fetched the constant from the constant
519 * buffer in fetch_constants(). Now we just have to return a
520 * register description that extracts the needed component and
521 * smears it across all eight vector components.
523 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
524 struct brw_reg const_reg
;
526 assert(component
< 4);
527 assert(srcRegIndex
< 3);
528 assert(c
->current_const
[srcRegIndex
].index
!= -1);
529 const_reg
= c
->current_const
[srcRegIndex
].reg
;
531 /* extract desired float from the const_reg, and smear */
532 const_reg
= stride(const_reg
, 0, 1, 0);
533 const_reg
.subnr
= component
* 4;
536 const_reg
= negate(const_reg
);
538 const_reg
= brw_abs(const_reg
);
541 printf(" form const[%d].%d for arg %d, reg %d\n",
542 c
->current_const
[srcRegIndex
].index
,
553 * Convert Mesa src register to brw register.
555 static struct brw_reg
get_src_reg(struct brw_wm_compile
*c
,
556 const struct brw_fp_instruction
*inst
,
557 GLuint srcRegIndex
, GLuint channel
)
559 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
561 const GLuint component
= BRW_GET_SWZ(src
->Swizzle
, channel
);
563 /* Extended swizzle terms */
564 if (component
== SWIZZLE_ZERO
) {
565 return brw_imm_f(0.0F
);
567 else if (component
== SWIZZLE_ONE
) {
568 return brw_imm_f(1.0F
);
571 if (c
->fp
->use_const_buffer
&&
572 (src
->File
== TGSI_FILE_STATE_VAR
||
573 src
->File
== TGSI_FILE_CONSTANT
||
574 src
->File
== TGSI_FILE_UNIFORM
)) {
575 return get_src_reg_const(c
, inst
, srcRegIndex
, component
);
578 /* other type of source register */
579 return get_reg(c
, src
->File
, src
->Index
, component
, nr
,
580 src
->Negate
, src
->Abs
);
586 * Same as \sa get_src_reg() but if the register is a immediate, emit
587 * a brw_reg encoding the immediate.
588 * Note that a brw instruction only allows one src operand to be a immediate.
589 * For instructions with more than one operand, only the second can be a
590 * immediate. This means that we treat some immediates as constants
591 * (which why TGSI_FILE_IMMEDIATE is checked in fetch_constants()).
594 static struct brw_reg
get_src_reg_imm(struct brw_wm_compile
*c
,
595 const struct brw_fp_instruction
*inst
,
596 GLuint srcRegIndex
, GLuint channel
)
598 const struct prog_src_register
*src
= &inst
->SrcReg
[srcRegIndex
];
599 if (src
->File
== TGSI_FILE_IMMEDIATE
) {
601 const int component
= BRW_GET_SWZ(src
->Swizzle
, channel
);
602 const GLfloat
*param
=
603 c
->fp
->program
.Base
.Parameters
->ParameterValues
[src
->Index
];
604 GLfloat value
= param
[component
];
608 value
= FABSF(value
);
610 printf(" form immed value %f for chan %d\n", value
, channel
);
612 return brw_imm_f(value
);
615 return get_src_reg(c
, inst
, srcRegIndex
, channel
);
621 * Subroutines are minimal support for resusable instruction sequences.
622 * They are implemented as simply as possible to minimise overhead: there
623 * is no explicit support for communication between the caller and callee
624 * other than saving the return address in a temporary register, nor is
625 * there any automatic local storage. This implies that great care is
626 * required before attempting reentrancy or any kind of nested
627 * subroutine invocations.
629 static void invoke_subroutine( struct brw_wm_compile
*c
,
630 enum _subroutine subroutine
,
631 void (*emit
)( struct brw_wm_compile
* ) )
633 struct brw_compile
*p
= &c
->func
;
635 assert( subroutine
< BRW_WM_MAX_SUBROUTINE
);
637 if( c
->subroutines
[ subroutine
] ) {
638 /* subroutine previously emitted: reuse existing instructions */
640 int mark
= mark_tmps( c
);
641 struct brw_reg return_address
= retype( alloc_tmp( c
),
642 BRW_REGISTER_TYPE_UD
);
643 int here
= p
->nr_insn
;
645 brw_push_insn_state(p
);
646 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
647 brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 2 << 4 ) );
649 brw_ADD( p
, brw_ip_reg(), brw_ip_reg(),
650 brw_imm_d( ( c
->subroutines
[ subroutine
] -
652 brw_pop_insn_state(p
);
654 release_tmps( c
, mark
);
656 /* previously unused subroutine: emit, and mark for later reuse */
658 int mark
= mark_tmps( c
);
659 struct brw_reg return_address
= retype( alloc_tmp( c
),
660 BRW_REGISTER_TYPE_UD
);
661 struct brw_instruction
*calc
;
662 int base
= p
->nr_insn
;
664 brw_push_insn_state(p
);
665 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
666 calc
= brw_ADD( p
, return_address
, brw_ip_reg(), brw_imm_ud( 0 ) );
667 brw_pop_insn_state(p
);
669 c
->subroutines
[ subroutine
] = p
->nr_insn
;
673 brw_push_insn_state(p
);
674 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
675 brw_MOV( p
, brw_ip_reg(), return_address
);
676 brw_pop_insn_state(p
);
678 brw_set_src1( calc
, brw_imm_ud( ( p
->nr_insn
- base
) << 4 ) );
680 release_tmps( c
, mark
);
684 static void emit_trunc( struct brw_wm_compile
*c
,
685 const struct brw_fp_instruction
*inst
)
688 struct brw_compile
*p
= &c
->func
;
689 GLuint mask
= inst
->DstReg
.WriteMask
;
690 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
691 for (i
= 0; i
< 4; i
++) {
693 struct brw_reg src
, dst
;
694 dst
= get_dst_reg(c
, inst
, i
);
695 src
= get_src_reg(c
, inst
, 0, i
);
696 brw_RNDZ(p
, dst
, src
);
699 brw_set_saturate(p
, 0);
702 static void emit_mov( struct brw_wm_compile
*c
,
703 const struct brw_fp_instruction
*inst
)
706 struct brw_compile
*p
= &c
->func
;
707 GLuint mask
= inst
->DstReg
.WriteMask
;
708 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
709 for (i
= 0; i
< 4; i
++) {
711 struct brw_reg src
, dst
;
712 dst
= get_dst_reg(c
, inst
, i
);
713 /* XXX some moves from immediate value don't work reliably!!! */
714 /*src = get_src_reg_imm(c, inst, 0, i);*/
715 src
= get_src_reg(c
, inst
, 0, i
);
716 brw_MOV(p
, dst
, src
);
719 brw_set_saturate(p
, 0);
722 static void emit_pixel_xy(struct brw_wm_compile
*c
,
723 const struct brw_fp_instruction
*inst
)
725 struct brw_reg r1
= brw_vec1_grf(1, 0);
726 struct brw_reg r1_uw
= retype(r1
, BRW_REGISTER_TYPE_UW
);
728 struct brw_reg dst0
, dst1
;
729 struct brw_compile
*p
= &c
->func
;
730 GLuint mask
= inst
->DstReg
.WriteMask
;
732 dst0
= get_dst_reg(c
, inst
, 0);
733 dst1
= get_dst_reg(c
, inst
, 1);
734 /* Calculate pixel centers by adding 1 or 0 to each of the
735 * micro-tile coordinates passed in r1.
737 if (mask
& WRITEMASK_X
) {
739 vec8(retype(dst0
, BRW_REGISTER_TYPE_UW
)),
740 stride(suboffset(r1_uw
, 4), 2, 4, 0),
741 brw_imm_v(0x10101010));
744 if (mask
& WRITEMASK_Y
) {
746 vec8(retype(dst1
, BRW_REGISTER_TYPE_UW
)),
747 stride(suboffset(r1_uw
, 5), 2, 4, 0),
748 brw_imm_v(0x11001100));
752 static void emit_delta_xy(struct brw_wm_compile
*c
,
753 const struct brw_fp_instruction
*inst
)
755 struct brw_reg r1
= brw_vec1_grf(1, 0);
756 struct brw_reg dst0
, dst1
, src0
, src1
;
757 struct brw_compile
*p
= &c
->func
;
758 GLuint mask
= inst
->DstReg
.WriteMask
;
760 dst0
= get_dst_reg(c
, inst
, 0);
761 dst1
= get_dst_reg(c
, inst
, 1);
762 src0
= get_src_reg(c
, inst
, 0, 0);
763 src1
= get_src_reg(c
, inst
, 0, 1);
764 /* Calc delta X,Y by subtracting origin in r1 from the pixel
767 if (mask
& WRITEMASK_X
) {
770 retype(src0
, BRW_REGISTER_TYPE_UW
),
774 if (mask
& WRITEMASK_Y
) {
777 retype(src1
, BRW_REGISTER_TYPE_UW
),
778 negate(suboffset(r1
,1)));
783 static void fire_fb_write( struct brw_wm_compile
*c
,
789 struct brw_compile
*p
= &c
->func
;
790 /* Pass through control information:
792 /* mov (8) m1.0<1>:ud r1.0<8;8,1>:ud { Align1 NoMask } */
794 brw_push_insn_state(p
);
795 brw_set_mask_control(p
, BRW_MASK_DISABLE
); /* ? */
797 brw_message_reg(base_reg
+ 1),
799 brw_pop_insn_state(p
);
801 /* Send framebuffer write message: */
803 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
805 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
812 static void emit_fb_write(struct brw_wm_compile
*c
,
813 const struct brw_fp_instruction
*inst
)
815 struct brw_compile
*p
= &c
->func
;
821 /* Reserve a space for AA - may not be needed:
823 if (c
->key
.aa_dest_stencil_reg
)
826 brw_push_insn_state(p
);
827 for (channel
= 0; channel
< 4; channel
++) {
828 src0
= get_src_reg(c
, inst
, 0, channel
);
829 /* mov (8) m2.0<1>:ud r28.0<8;8,1>:ud { Align1 } */
830 /* mov (8) m6.0<1>:ud r29.0<8;8,1>:ud { Align1 SecHalf } */
831 brw_MOV(p
, brw_message_reg(nr
+ channel
), src0
);
833 /* skip over the regs populated above: */
835 brw_pop_insn_state(p
);
837 if (c
->key
.source_depth_to_render_target
) {
838 if (c
->key
.computes_depth
) {
839 src0
= get_src_reg(c
, inst
, 2, 2);
840 brw_MOV(p
, brw_message_reg(nr
), src0
);
843 src0
= get_src_reg(c
, inst
, 1, 1);
844 brw_MOV(p
, brw_message_reg(nr
), src0
);
850 if (c
->key
.dest_depth_reg
) {
851 const GLuint comp
= c
->key
.dest_depth_reg
/ 2;
852 const GLuint off
= c
->key
.dest_depth_reg
% 2;
855 /* XXX this code needs review/testing */
856 struct brw_reg arg1_0
= get_src_reg(c
, inst
, 1, comp
);
857 struct brw_reg arg1_1
= get_src_reg(c
, inst
, 1, comp
+1);
859 brw_push_insn_state(p
);
860 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
862 brw_MOV(p
, brw_message_reg(nr
), offset(arg1_0
, 1));
864 brw_MOV(p
, brw_message_reg(nr
+1), arg1_1
);
865 brw_pop_insn_state(p
);
869 struct brw_reg src
= get_src_reg(c
, inst
, 1, 1);
870 brw_MOV(p
, brw_message_reg(nr
), src
);
875 target
= inst
->Aux
>> 1;
877 fire_fb_write(c
, 0, nr
, target
, eot
);
880 static void emit_pixel_w( struct brw_wm_compile
*c
,
881 const struct brw_fp_instruction
*inst
)
883 struct brw_compile
*p
= &c
->func
;
884 GLuint mask
= inst
->DstReg
.WriteMask
;
885 if (mask
& WRITEMASK_W
) {
886 struct brw_reg dst
, src0
, delta0
, delta1
;
887 struct brw_reg interp3
;
889 dst
= get_dst_reg(c
, inst
, 3);
890 src0
= get_src_reg(c
, inst
, 0, 0);
891 delta0
= get_src_reg(c
, inst
, 1, 0);
892 delta1
= get_src_reg(c
, inst
, 1, 1);
894 interp3
= brw_vec1_grf(src0
.nr
+1, 4);
895 /* Calc 1/w - just linterp wpos[3] optimized by putting the
896 * result straight into a message reg.
898 brw_LINE(p
, brw_null_reg(), interp3
, delta0
);
899 brw_MAC(p
, brw_message_reg(2), suboffset(interp3
, 1), delta1
);
903 BRW_MATH_FUNCTION_INV
,
904 BRW_MATH_SATURATE_NONE
,
906 BRW_MATH_PRECISION_FULL
);
910 static void emit_linterp(struct brw_wm_compile
*c
,
911 const struct brw_fp_instruction
*inst
)
913 struct brw_compile
*p
= &c
->func
;
914 GLuint mask
= inst
->DstReg
.WriteMask
;
915 struct brw_reg interp
[4];
916 struct brw_reg dst
, delta0
, delta1
;
920 src0
= get_src_reg(c
, inst
, 0, 0);
921 delta0
= get_src_reg(c
, inst
, 1, 0);
922 delta1
= get_src_reg(c
, inst
, 1, 1);
925 interp
[0] = brw_vec1_grf(nr
, 0);
926 interp
[1] = brw_vec1_grf(nr
, 4);
927 interp
[2] = brw_vec1_grf(nr
+1, 0);
928 interp
[3] = brw_vec1_grf(nr
+1, 4);
930 for(i
= 0; i
< 4; i
++ ) {
932 dst
= get_dst_reg(c
, inst
, i
);
933 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
934 brw_MAC(p
, dst
, suboffset(interp
[i
],1), delta1
);
939 static void emit_cinterp(struct brw_wm_compile
*c
,
940 const struct brw_fp_instruction
*inst
)
942 struct brw_compile
*p
= &c
->func
;
943 GLuint mask
= inst
->DstReg
.WriteMask
;
945 struct brw_reg interp
[4];
946 struct brw_reg dst
, src0
;
949 src0
= get_src_reg(c
, inst
, 0, 0);
952 interp
[0] = brw_vec1_grf(nr
, 0);
953 interp
[1] = brw_vec1_grf(nr
, 4);
954 interp
[2] = brw_vec1_grf(nr
+1, 0);
955 interp
[3] = brw_vec1_grf(nr
+1, 4);
957 for(i
= 0; i
< 4; i
++ ) {
959 dst
= get_dst_reg(c
, inst
, i
);
960 brw_MOV(p
, dst
, suboffset(interp
[i
],3));
965 static void emit_pinterp(struct brw_wm_compile
*c
,
966 const struct brw_fp_instruction
*inst
)
968 struct brw_compile
*p
= &c
->func
;
969 GLuint mask
= inst
->DstReg
.WriteMask
;
971 struct brw_reg interp
[4];
972 struct brw_reg dst
, delta0
, delta1
;
973 struct brw_reg src0
, w
;
976 src0
= get_src_reg(c
, inst
, 0, 0);
977 delta0
= get_src_reg(c
, inst
, 1, 0);
978 delta1
= get_src_reg(c
, inst
, 1, 1);
979 w
= get_src_reg(c
, inst
, 2, 3);
982 interp
[0] = brw_vec1_grf(nr
, 0);
983 interp
[1] = brw_vec1_grf(nr
, 4);
984 interp
[2] = brw_vec1_grf(nr
+1, 0);
985 interp
[3] = brw_vec1_grf(nr
+1, 4);
987 for(i
= 0; i
< 4; i
++ ) {
989 dst
= get_dst_reg(c
, inst
, i
);
990 brw_LINE(p
, brw_null_reg(), interp
[i
], delta0
);
991 brw_MAC(p
, dst
, suboffset(interp
[i
],1),
993 brw_MUL(p
, dst
, dst
, w
);
998 /* Sets the destination channels to 1.0 or 0.0 according to glFrontFacing. */
999 static void emit_frontfacing(struct brw_wm_compile
*c
,
1000 const struct brw_fp_instruction
*inst
)
1002 struct brw_compile
*p
= &c
->func
;
1003 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
1005 GLuint mask
= inst
->DstReg
.WriteMask
;
1008 for (i
= 0; i
< 4; i
++) {
1009 if (mask
& (1<<i
)) {
1010 dst
= get_dst_reg(c
, inst
, i
);
1011 brw_MOV(p
, dst
, brw_imm_f(0.0));
1015 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
1018 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, r1_6ud
, brw_imm_ud(1 << 31));
1019 for (i
= 0; i
< 4; i
++) {
1020 if (mask
& (1<<i
)) {
1021 dst
= get_dst_reg(c
, inst
, i
);
1022 brw_MOV(p
, dst
, brw_imm_f(1.0));
1025 brw_set_predicate_control_flag_value(p
, 0xff);
1028 static void emit_xpd(struct brw_wm_compile
*c
,
1029 const struct brw_fp_instruction
*inst
)
1032 struct brw_compile
*p
= &c
->func
;
1033 GLuint mask
= inst
->DstReg
.WriteMask
;
1034 for (i
= 0; i
< 4; i
++) {
1035 GLuint i2
= (i
+2)%3;
1036 GLuint i1
= (i
+1)%3;
1037 if (mask
& (1<<i
)) {
1038 struct brw_reg src0
, src1
, dst
;
1039 dst
= get_dst_reg(c
, inst
, i
);
1040 src0
= negate(get_src_reg(c
, inst
, 0, i2
));
1041 src1
= get_src_reg_imm(c
, inst
, 1, i1
);
1042 brw_MUL(p
, brw_null_reg(), src0
, src1
);
1043 src0
= get_src_reg(c
, inst
, 0, i1
);
1044 src1
= get_src_reg_imm(c
, inst
, 1, i2
);
1045 brw_set_saturate(p
, inst
->SaturateMode
!= SATURATE_OFF
);
1046 brw_MAC(p
, dst
, src0
, src1
);
1047 brw_set_saturate(p
, 0);
1050 brw_set_saturate(p
, 0);
1053 static void emit_dp3(struct brw_wm_compile
*c
,
1054 const struct brw_fp_instruction
*inst
)
1056 struct brw_reg src0
[3], src1
[3], dst
;
1058 struct brw_compile
*p
= &c
->func
;
1059 GLuint mask
= inst
->DstReg
.WriteMask
;
1060 int dst_chan
= ffs(mask
& WRITEMASK_XYZW
) - 1;
1062 if (!(mask
& WRITEMASK_XYZW
))
1065 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1067 for (i
= 0; i
< 3; i
++) {
1068 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1069 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1072 dst
= get_dst_reg(c
, inst
, dst_chan
);
1073 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1074 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1075 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1076 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1077 brw_set_saturate(p
, 0);
1080 static void emit_dp4(struct brw_wm_compile
*c
,
1081 const struct brw_fp_instruction
*inst
)
1083 struct brw_reg src0
[4], src1
[4], dst
;
1085 struct brw_compile
*p
= &c
->func
;
1086 GLuint mask
= inst
->DstReg
.WriteMask
;
1087 int dst_chan
= ffs(mask
& WRITEMASK_XYZW
) - 1;
1089 if (!(mask
& WRITEMASK_XYZW
))
1092 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1094 for (i
= 0; i
< 4; i
++) {
1095 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1096 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1098 dst
= get_dst_reg(c
, inst
, dst_chan
);
1099 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1100 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1101 brw_MAC(p
, brw_null_reg(), src0
[2], src1
[2]);
1102 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1103 brw_MAC(p
, dst
, src0
[3], src1
[3]);
1104 brw_set_saturate(p
, 0);
1107 static void emit_dph(struct brw_wm_compile
*c
,
1108 const struct brw_fp_instruction
*inst
)
1110 struct brw_reg src0
[4], src1
[4], dst
;
1112 struct brw_compile
*p
= &c
->func
;
1113 GLuint mask
= inst
->DstReg
.WriteMask
;
1114 int dst_chan
= ffs(mask
& WRITEMASK_XYZW
) - 1;
1116 if (!(mask
& WRITEMASK_XYZW
))
1119 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1121 for (i
= 0; i
< 4; i
++) {
1122 src0
[i
] = get_src_reg(c
, inst
, 0, i
);
1123 src1
[i
] = get_src_reg_imm(c
, inst
, 1, i
);
1125 dst
= get_dst_reg(c
, inst
, dst_chan
);
1126 brw_MUL(p
, brw_null_reg(), src0
[0], src1
[0]);
1127 brw_MAC(p
, brw_null_reg(), src0
[1], src1
[1]);
1128 brw_MAC(p
, dst
, src0
[2], src1
[2]);
1129 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1130 brw_ADD(p
, dst
, dst
, src1
[3]);
1131 brw_set_saturate(p
, 0);
1135 * Emit a scalar instruction, like RCP, RSQ, LOG, EXP.
1136 * Note that the result of the function is smeared across the dest
1137 * register's X, Y, Z and W channels (subject to writemasking of course).
1139 static void emit_math1(struct brw_wm_compile
*c
,
1140 const struct brw_fp_instruction
*inst
, GLuint func
)
1142 struct brw_compile
*p
= &c
->func
;
1143 struct brw_reg src0
, dst
;
1144 GLuint mask
= inst
->DstReg
.WriteMask
;
1145 int dst_chan
= ffs(mask
& WRITEMASK_XYZW
) - 1;
1147 if (!(mask
& WRITEMASK_XYZW
))
1150 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1152 /* Get first component of source register */
1153 dst
= get_dst_reg(c
, inst
, dst_chan
);
1154 src0
= get_src_reg(c
, inst
, 0, 0);
1156 brw_MOV(p
, brw_message_reg(2), src0
);
1160 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1163 BRW_MATH_DATA_VECTOR
,
1164 BRW_MATH_PRECISION_FULL
);
1167 static void emit_rcp(struct brw_wm_compile
*c
,
1168 const struct brw_fp_instruction
*inst
)
1170 emit_math1(c
, inst
, BRW_MATH_FUNCTION_INV
);
1173 static void emit_rsq(struct brw_wm_compile
*c
,
1174 const struct brw_fp_instruction
*inst
)
1176 emit_math1(c
, inst
, BRW_MATH_FUNCTION_RSQ
);
1179 static void emit_sin(struct brw_wm_compile
*c
,
1180 const struct brw_fp_instruction
*inst
)
1182 emit_math1(c
, inst
, BRW_MATH_FUNCTION_SIN
);
1185 static void emit_cos(struct brw_wm_compile
*c
,
1186 const struct brw_fp_instruction
*inst
)
1188 emit_math1(c
, inst
, BRW_MATH_FUNCTION_COS
);
1191 static void emit_ex2(struct brw_wm_compile
*c
,
1192 const struct brw_fp_instruction
*inst
)
1194 emit_math1(c
, inst
, BRW_MATH_FUNCTION_EXP
);
1197 static void emit_lg2(struct brw_wm_compile
*c
,
1198 const struct brw_fp_instruction
*inst
)
1200 emit_math1(c
, inst
, BRW_MATH_FUNCTION_LOG
);
1203 static void emit_add(struct brw_wm_compile
*c
,
1204 const struct brw_fp_instruction
*inst
)
1206 struct brw_compile
*p
= &c
->func
;
1207 struct brw_reg src0
, src1
, dst
;
1208 GLuint mask
= inst
->DstReg
.WriteMask
;
1210 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1211 for (i
= 0 ; i
< 4; i
++) {
1212 if (mask
& (1<<i
)) {
1213 dst
= get_dst_reg(c
, inst
, i
);
1214 src0
= get_src_reg(c
, inst
, 0, i
);
1215 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1216 brw_ADD(p
, dst
, src0
, src1
);
1219 brw_set_saturate(p
, 0);
1222 static void emit_arl(struct brw_wm_compile
*c
,
1223 const struct brw_fp_instruction
*inst
)
1225 struct brw_compile
*p
= &c
->func
;
1226 struct brw_reg src0
, addr_reg
;
1227 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1228 addr_reg
= brw_uw8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
1229 BRW_ARF_ADDRESS
, 0);
1230 src0
= get_src_reg(c
, inst
, 0, 0); /* channel 0 */
1231 brw_MOV(p
, addr_reg
, src0
);
1232 brw_set_saturate(p
, 0);
1236 static void emit_mul(struct brw_wm_compile
*c
,
1237 const struct brw_fp_instruction
*inst
)
1239 struct brw_compile
*p
= &c
->func
;
1240 struct brw_reg src0
, src1
, dst
;
1241 GLuint mask
= inst
->DstReg
.WriteMask
;
1243 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1244 for (i
= 0 ; i
< 4; i
++) {
1245 if (mask
& (1<<i
)) {
1246 dst
= get_dst_reg(c
, inst
, i
);
1247 src0
= get_src_reg(c
, inst
, 0, i
);
1248 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1249 brw_MUL(p
, dst
, src0
, src1
);
1252 brw_set_saturate(p
, 0);
1255 static void emit_frc(struct brw_wm_compile
*c
,
1256 const struct brw_fp_instruction
*inst
)
1258 struct brw_compile
*p
= &c
->func
;
1259 struct brw_reg src0
, dst
;
1260 GLuint mask
= inst
->DstReg
.WriteMask
;
1262 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1263 for (i
= 0 ; i
< 4; i
++) {
1264 if (mask
& (1<<i
)) {
1265 dst
= get_dst_reg(c
, inst
, i
);
1266 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1267 brw_FRC(p
, dst
, src0
);
1270 if (inst
->SaturateMode
!= SATURATE_OFF
)
1271 brw_set_saturate(p
, 0);
1274 static void emit_flr(struct brw_wm_compile
*c
,
1275 const struct brw_fp_instruction
*inst
)
1277 struct brw_compile
*p
= &c
->func
;
1278 struct brw_reg src0
, dst
;
1279 GLuint mask
= inst
->DstReg
.WriteMask
;
1281 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1282 for (i
= 0 ; i
< 4; i
++) {
1283 if (mask
& (1<<i
)) {
1284 dst
= get_dst_reg(c
, inst
, i
);
1285 src0
= get_src_reg_imm(c
, inst
, 0, i
);
1286 brw_RNDD(p
, dst
, src0
);
1289 brw_set_saturate(p
, 0);
1293 static void emit_min_max(struct brw_wm_compile
*c
,
1294 const struct brw_fp_instruction
*inst
)
1296 struct brw_compile
*p
= &c
->func
;
1297 const GLuint mask
= inst
->DstReg
.WriteMask
;
1298 const int mark
= mark_tmps(c
);
1300 brw_push_insn_state(p
);
1301 for (i
= 0; i
< 4; i
++) {
1302 if (mask
& (1<<i
)) {
1303 struct brw_reg real_dst
= get_dst_reg(c
, inst
, i
);
1304 struct brw_reg src0
= get_src_reg(c
, inst
, 0, i
);
1305 struct brw_reg src1
= get_src_reg(c
, inst
, 1, i
);
1307 /* if dst==src0 or dst==src1 we need to use a temp reg */
1308 GLboolean use_temp
= brw_same_reg(dst
, src0
) ||
1309 brw_same_reg(dst
, src1
);
1316 printf(" Min/max: dst %d src0 %d src1 %d\n",
1317 dst.nr, src0.nr, src1.nr);
1319 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1320 brw_MOV(p
, dst
, src0
);
1321 brw_set_saturate(p
, 0);
1323 if (inst
->Opcode
== OPCODE_MIN
)
1324 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_L
, src1
, src0
);
1326 brw_CMP(p
, brw_null_reg(), BRW_CONDITIONAL_G
, src1
, src0
);
1328 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1329 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1330 brw_MOV(p
, dst
, src1
);
1331 brw_set_saturate(p
, 0);
1332 brw_set_predicate_control_flag_value(p
, 0xff);
1334 brw_MOV(p
, real_dst
, dst
);
1337 brw_pop_insn_state(p
);
1338 release_tmps(c
, mark
);
1341 static void emit_pow(struct brw_wm_compile
*c
,
1342 const struct brw_fp_instruction
*inst
)
1344 struct brw_compile
*p
= &c
->func
;
1345 struct brw_reg dst
, src0
, src1
;
1346 GLuint mask
= inst
->DstReg
.WriteMask
;
1347 int dst_chan
= ffs(mask
& WRITEMASK_XYZW
) - 1;
1349 if (!(mask
& WRITEMASK_XYZW
))
1352 assert(is_power_of_two(mask
& WRITEMASK_XYZW
));
1354 dst
= get_dst_reg(c
, inst
, dst_chan
);
1355 src0
= get_src_reg_imm(c
, inst
, 0, 0);
1356 src1
= get_src_reg_imm(c
, inst
, 1, 0);
1358 brw_MOV(p
, brw_message_reg(2), src0
);
1359 brw_MOV(p
, brw_message_reg(3), src1
);
1363 BRW_MATH_FUNCTION_POW
,
1364 (inst
->SaturateMode
!= SATURATE_OFF
) ? BRW_MATH_SATURATE_SATURATE
: BRW_MATH_SATURATE_NONE
,
1367 BRW_MATH_DATA_VECTOR
,
1368 BRW_MATH_PRECISION_FULL
);
1371 static void emit_lrp(struct brw_wm_compile
*c
,
1372 const struct brw_fp_instruction
*inst
)
1374 struct brw_compile
*p
= &c
->func
;
1375 GLuint mask
= inst
->DstReg
.WriteMask
;
1376 struct brw_reg dst
, tmp1
, tmp2
, src0
, src1
, src2
;
1378 int mark
= mark_tmps(c
);
1379 for (i
= 0; i
< 4; i
++) {
1380 if (mask
& (1<<i
)) {
1381 dst
= get_dst_reg(c
, inst
, i
);
1382 src0
= get_src_reg(c
, inst
, 0, i
);
1384 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1386 if (src1
.nr
== dst
.nr
) {
1387 tmp1
= alloc_tmp(c
);
1388 brw_MOV(p
, tmp1
, src1
);
1392 src2
= get_src_reg(c
, inst
, 2, i
);
1393 if (src2
.nr
== dst
.nr
) {
1394 tmp2
= alloc_tmp(c
);
1395 brw_MOV(p
, tmp2
, src2
);
1399 brw_ADD(p
, dst
, negate(src0
), brw_imm_f(1.0));
1400 brw_MUL(p
, brw_null_reg(), dst
, tmp2
);
1401 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1402 brw_MAC(p
, dst
, src0
, tmp1
);
1403 brw_set_saturate(p
, 0);
1405 release_tmps(c
, mark
);
1410 * For GLSL shaders, this KIL will be unconditional.
1411 * It may be contained inside an IF/ENDIF structure of course.
1413 static void emit_kil(struct brw_wm_compile
*c
)
1415 struct brw_compile
*p
= &c
->func
;
1416 struct brw_reg depth
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1417 brw_push_insn_state(p
);
1418 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1419 brw_NOT(p
, c
->emit_mask_reg
, brw_mask_reg(1)); //IMASK
1420 brw_AND(p
, depth
, c
->emit_mask_reg
, depth
);
1421 brw_pop_insn_state(p
);
1424 static void emit_mad(struct brw_wm_compile
*c
,
1425 const struct brw_fp_instruction
*inst
)
1427 struct brw_compile
*p
= &c
->func
;
1428 GLuint mask
= inst
->DstReg
.WriteMask
;
1429 struct brw_reg dst
, src0
, src1
, src2
;
1432 for (i
= 0; i
< 4; i
++) {
1433 if (mask
& (1<<i
)) {
1434 dst
= get_dst_reg(c
, inst
, i
);
1435 src0
= get_src_reg(c
, inst
, 0, i
);
1436 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1437 src2
= get_src_reg_imm(c
, inst
, 2, i
);
1438 brw_MUL(p
, dst
, src0
, src1
);
1440 brw_set_saturate(p
, (inst
->SaturateMode
!= SATURATE_OFF
) ? 1 : 0);
1441 brw_ADD(p
, dst
, dst
, src2
);
1442 brw_set_saturate(p
, 0);
1447 static void emit_sop(struct brw_wm_compile
*c
,
1448 const struct brw_fp_instruction
*inst
, GLuint cond
)
1450 struct brw_compile
*p
= &c
->func
;
1451 GLuint mask
= inst
->DstReg
.WriteMask
;
1452 struct brw_reg dst
, src0
, src1
;
1455 for (i
= 0; i
< 4; i
++) {
1456 if (mask
& (1<<i
)) {
1457 dst
= get_dst_reg(c
, inst
, i
);
1458 src0
= get_src_reg(c
, inst
, 0, i
);
1459 src1
= get_src_reg_imm(c
, inst
, 1, i
);
1460 brw_push_insn_state(p
);
1461 brw_CMP(p
, brw_null_reg(), cond
, src0
, src1
);
1462 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1463 brw_MOV(p
, dst
, brw_imm_f(0.0));
1464 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
1465 brw_MOV(p
, dst
, brw_imm_f(1.0));
1466 brw_pop_insn_state(p
);
1471 static void emit_slt(struct brw_wm_compile
*c
,
1472 const struct brw_fp_instruction
*inst
)
1474 emit_sop(c
, inst
, BRW_CONDITIONAL_L
);
1477 static void emit_sle(struct brw_wm_compile
*c
,
1478 const struct brw_fp_instruction
*inst
)
1480 emit_sop(c
, inst
, BRW_CONDITIONAL_LE
);
1483 static void emit_sgt(struct brw_wm_compile
*c
,
1484 const struct brw_fp_instruction
*inst
)
1486 emit_sop(c
, inst
, BRW_CONDITIONAL_G
);
1489 static void emit_sge(struct brw_wm_compile
*c
,
1490 const struct brw_fp_instruction
*inst
)
1492 emit_sop(c
, inst
, BRW_CONDITIONAL_GE
);
1495 static void emit_seq(struct brw_wm_compile
*c
,
1496 const struct brw_fp_instruction
*inst
)
1498 emit_sop(c
, inst
, BRW_CONDITIONAL_EQ
);
1501 static void emit_sne(struct brw_wm_compile
*c
,
1502 const struct brw_fp_instruction
*inst
)
1504 emit_sop(c
, inst
, BRW_CONDITIONAL_NEQ
);
1507 static INLINE
struct brw_reg
high_words( struct brw_reg reg
)
1509 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_W
), 1 ),
1513 static INLINE
struct brw_reg
low_words( struct brw_reg reg
)
1515 return stride( retype( reg
, BRW_REGISTER_TYPE_W
), 0, 8, 2 );
1518 static INLINE
struct brw_reg
even_bytes( struct brw_reg reg
)
1520 return stride( retype( reg
, BRW_REGISTER_TYPE_B
), 0, 16, 2 );
1523 static INLINE
struct brw_reg
odd_bytes( struct brw_reg reg
)
1525 return stride( suboffset( retype( reg
, BRW_REGISTER_TYPE_B
), 1 ),
1531 static void emit_wpos_xy(struct brw_wm_compile
*c
,
1532 const struct brw_fp_instruction
*inst
)
1534 struct brw_compile
*p
= &c
->func
;
1535 GLuint mask
= inst
->DstReg
.WriteMask
;
1536 struct brw_reg src0
[2], dst
[2];
1538 dst
[0] = get_dst_reg(c
, inst
, 0);
1539 dst
[1] = get_dst_reg(c
, inst
, 1);
1541 src0
[0] = get_src_reg(c
, inst
, 0, 0);
1542 src0
[1] = get_src_reg(c
, inst
, 0, 1);
1544 /* Calculate the pixel offset from window bottom left into destination
1547 if (mask
& WRITEMASK_X
) {
1551 retype(src0
[0], BRW_REGISTER_TYPE_W
));
1554 if (mask
& WRITEMASK_Y
) {
1555 /* Y' = height - 1 - Y */
1558 negate(retype(src0
[1], BRW_REGISTER_TYPE_W
)),
1559 brw_imm_d(c
->key
.drawable_height
- 1));
1564 BIAS on SIMD8 not working yet...
1566 static void emit_txb(struct brw_wm_compile
*c
,
1567 const struct brw_fp_instruction
*inst
)
1569 struct brw_compile
*p
= &c
->func
;
1570 struct brw_reg dst
[4], src
[4], payload_reg
;
1571 /* Note: tex_unit was already looked up through SamplerTextures[] */
1572 const GLuint unit
= inst
->tex_unit
;
1576 assert(unit
< BRW_MAX_TEX_UNIT
);
1578 payload_reg
= get_reg(c
, TGSI_FILE_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
1580 for (i
= 0; i
< 4; i
++)
1581 dst
[i
] = get_dst_reg(c
, inst
, i
);
1582 for (i
= 0; i
< 4; i
++)
1583 src
[i
] = get_src_reg(c
, inst
, 0, i
);
1585 switch (inst
->tex_target
) {
1586 case TEXTURE_1D_INDEX
:
1587 brw_MOV(p
, brw_message_reg(2), src
[0]); /* s coord */
1588 brw_MOV(p
, brw_message_reg(3), brw_imm_f(0)); /* t coord */
1589 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0)); /* r coord */
1591 case TEXTURE_2D_INDEX
:
1592 case TEXTURE_RECT_INDEX
:
1593 brw_MOV(p
, brw_message_reg(2), src
[0]);
1594 brw_MOV(p
, brw_message_reg(3), src
[1]);
1595 brw_MOV(p
, brw_message_reg(4), brw_imm_f(0));
1597 case TEXTURE_3D_INDEX
:
1598 case TEXTURE_CUBE_INDEX
:
1599 brw_MOV(p
, brw_message_reg(2), src
[0]);
1600 brw_MOV(p
, brw_message_reg(3), src
[1]);
1601 brw_MOV(p
, brw_message_reg(4), src
[2]);
1604 /* invalid target */
1607 brw_MOV(p
, brw_message_reg(5), src
[3]); /* bias */
1608 brw_MOV(p
, brw_message_reg(6), brw_imm_f(0)); /* ref (unused?) */
1610 if (BRW_IS_IGDNG(p
->brw
)) {
1611 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_IGDNG
;
1613 /* Does it work well on SIMD8? */
1614 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1618 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
1620 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
1621 SURF_INDEX_TEXTURE(unit
),
1623 inst
->DstReg
.WriteMask
, /* writemask */
1624 msg_type
, /* msg_type */
1625 4, /* response_length */
1629 BRW_SAMPLER_SIMD_MODE_SIMD8
);
1633 static void emit_tex(struct brw_wm_compile
*c
,
1634 const struct brw_fp_instruction
*inst
)
1636 struct brw_compile
*p
= &c
->func
;
1637 struct brw_reg dst
[4], src
[4], payload_reg
;
1638 /* Note: tex_unit was already looked up through SamplerTextures[] */
1639 const GLuint unit
= inst
->tex_unit
;
1643 GLboolean shadow
= (c
->key
.shadowtex_mask
& (1<<unit
)) ? 1 : 0;
1646 assert(unit
< BRW_MAX_TEX_UNIT
);
1648 payload_reg
= get_reg(c
, TGSI_FILE_PAYLOAD
, PAYLOAD_DEPTH
, 0, 1, 0, 0);
1650 for (i
= 0; i
< 4; i
++)
1651 dst
[i
] = get_dst_reg(c
, inst
, i
);
1652 for (i
= 0; i
< 4; i
++)
1653 src
[i
] = get_src_reg(c
, inst
, 0, i
);
1655 switch (inst
->tex_target
) {
1656 case TEXTURE_1D_INDEX
:
1660 case TEXTURE_2D_INDEX
:
1661 case TEXTURE_RECT_INDEX
:
1662 emit
= WRITEMASK_XY
;
1665 case TEXTURE_3D_INDEX
:
1666 case TEXTURE_CUBE_INDEX
:
1667 emit
= WRITEMASK_XYZ
;
1671 /* invalid target */
1676 /* move/load S, T, R coords */
1677 for (i
= 0; i
< nr
; i
++) {
1678 static const GLuint swz
[4] = {0,1,2,2};
1680 brw_MOV(p
, brw_message_reg(msg_len
+1), src
[swz
[i
]]);
1682 brw_MOV(p
, brw_message_reg(msg_len
+1), brw_imm_f(0));
1687 brw_MOV(p
, brw_message_reg(5), brw_imm_f(0)); /* lod / bias */
1688 brw_MOV(p
, brw_message_reg(6), src
[2]); /* ref value / R coord */
1691 if (BRW_IS_IGDNG(p
->brw
)) {
1693 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_COMPARE_IGDNG
;
1695 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_IGDNG
;
1697 /* Does it work for shadow on SIMD8 ? */
1698 msg_type
= BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE
;
1702 retype(vec8(dst
[0]), BRW_REGISTER_TYPE_UW
), /* dest */
1704 retype(payload_reg
, BRW_REGISTER_TYPE_UW
), /* src0 */
1705 SURF_INDEX_TEXTURE(unit
),
1707 inst
->DstReg
.WriteMask
, /* writemask */
1708 msg_type
, /* msg_type */
1709 4, /* response_length */
1710 shadow
? 6 : 4, /* msg_length */
1713 BRW_SAMPLER_SIMD_MODE_SIMD8
);
1716 brw_MOV(p
, dst
[3], brw_imm_f(1.0));
1721 * Resolve subroutine calls after code emit is done.
1723 static void post_wm_emit( struct brw_wm_compile
*c
)
1725 brw_resolve_cals(&c
->func
);
1729 get_argument_regs(struct brw_wm_compile
*c
,
1730 const struct brw_fp_instruction
*inst
,
1732 struct brw_reg
*regs
,
1737 for (i
= 0; i
< 4; i
++) {
1738 if (mask
& (1 << i
))
1739 regs
[i
] = get_src_reg(c
, inst
, index
, i
);
1743 static void brw_wm_emit_branching_shader(struct brw_context
*brw
, struct brw_wm_compile
*c
)
1745 #define MAX_IF_DEPTH 32
1746 #define MAX_LOOP_DEPTH 32
1747 struct brw_instruction
*if_inst
[MAX_IF_DEPTH
], *loop_inst
[MAX_LOOP_DEPTH
];
1748 GLuint i
, if_depth
= 0, loop_depth
= 0;
1749 struct brw_compile
*p
= &c
->func
;
1750 struct brw_indirect stack_index
= brw_indirect(0, 0);
1752 c
->out_of_regs
= GL_FALSE
;
1755 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1756 brw_MOV(p
, get_addr_reg(stack_index
), brw_address(c
->stack
));
1758 for (i
= 0; i
< c
->nr_fp_insns
; i
++) {
1759 const struct brw_fp_instruction
*inst
= &c
->fp_instructions
[i
];
1761 struct brw_reg args
[3][4], dst
[4];
1767 debug_printf("Inst %d: ", i
);
1768 _mesa_print_instruction(inst
);
1771 /* fetch any constants that this instruction needs */
1772 if (c
->fp
->use_const_buffer
)
1773 fetch_constants(c
, inst
);
1775 if (inst
->CondUpdate
)
1776 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NZ
);
1778 brw_set_conditionalmod(p
, BRW_CONDITIONAL_NONE
);
1780 dst_flags
= inst
->DstReg
.WriteMask
;
1781 if (inst
->SaturateMode
== SATURATE_ZERO_ONE
)
1782 dst_flags
|= SATURATE
;
1784 switch (inst
->Opcode
) {
1786 emit_pixel_xy(c
, inst
);
1789 emit_delta_xy(c
, inst
);
1792 emit_pixel_w(c
, inst
);
1795 emit_linterp(c
, inst
);
1798 emit_pinterp(c
, inst
);
1801 emit_cinterp(c
, inst
);
1804 emit_wpos_xy(c
, inst
);
1807 emit_fb_write(c
, inst
);
1809 case WM_FRONTFACING
:
1810 emit_frontfacing(c
, inst
);
1828 emit_trunc(c
, inst
);
1865 emit_min_max(c
, inst
);
1869 for (j
= 0; j
< 4; j
++) {
1870 if (inst
->DstReg
.WriteMask
& (1 << j
))
1871 dst
[j
] = get_dst_reg(c
, inst
, j
);
1873 dst
[j
] = brw_null_reg();
1875 get_argument_regs(c
, inst
, 0, args
[0], WRITEMASK_XYZW
);
1876 emit_ddxy(p
, dst
, dst_flags
, (inst
->Opcode
== OPCODE_DDX
),
1916 assert(if_depth
< MAX_IF_DEPTH
);
1917 if_inst
[if_depth
++] = brw_IF(p
, BRW_EXECUTE_8
);
1920 if_inst
[if_depth
-1] = brw_ELSE(p
, if_inst
[if_depth
-1]);
1923 assert(if_depth
> 0);
1924 brw_ENDIF(p
, if_inst
[--if_depth
]);
1927 brw_save_label(p
, inst
->Comment
, p
->nr_insn
);
1933 brw_push_insn_state(p
);
1934 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1935 brw_set_access_mode(p
, BRW_ALIGN_1
);
1936 brw_ADD(p
, deref_1ud(stack_index
, 0), brw_ip_reg(), brw_imm_d(3*16));
1937 brw_set_access_mode(p
, BRW_ALIGN_16
);
1938 brw_ADD(p
, get_addr_reg(stack_index
),
1939 get_addr_reg(stack_index
), brw_imm_d(4));
1940 brw_save_call(&c
->func
, inst
->label
, p
->nr_insn
);
1941 brw_ADD(p
, brw_ip_reg(), brw_ip_reg(), brw_imm_d(1*16));
1942 brw_pop_insn_state(p
);
1946 brw_push_insn_state(p
);
1947 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1948 brw_ADD(p
, get_addr_reg(stack_index
),
1949 get_addr_reg(stack_index
), brw_imm_d(-4));
1950 brw_set_access_mode(p
, BRW_ALIGN_1
);
1951 brw_MOV(p
, brw_ip_reg(), deref_1ud(stack_index
, 0));
1952 brw_set_access_mode(p
, BRW_ALIGN_16
);
1953 brw_pop_insn_state(p
);
1956 case OPCODE_BGNLOOP
:
1957 /* XXX may need to invalidate the current_constant regs */
1958 loop_inst
[loop_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
1962 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1966 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
1968 case OPCODE_ENDLOOP
:
1970 struct brw_instruction
*inst0
, *inst1
;
1973 if (BRW_IS_IGDNG(brw
))
1977 inst0
= inst1
= brw_WHILE(p
, loop_inst
[loop_depth
]);
1978 /* patch all the BREAK/CONT instructions from last BGNLOOP */
1979 while (inst0
> loop_inst
[loop_depth
]) {
1981 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
) {
1982 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
1983 inst0
->bits3
.if_else
.pop_count
= 0;
1985 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
) {
1986 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
1987 inst0
->bits3
.if_else
.pop_count
= 0;
1993 debug_printf("unsupported IR in fragment shader %d\n",
1997 if (inst
->CondUpdate
)
1998 brw_set_predicate_control(p
, BRW_PREDICATE_NORMAL
);
2000 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2004 if (BRW_DEBUG
& DEBUG_WM
) {
2005 debug_printf("wm-native:\n");
2006 brw_disasm(stderr
, p
->store
, p
->nr_insn
);
2011 * Do GPU code generation for shaders that use GLSL features such as
2012 * flow control. Other shaders will be compiled with the
2014 void brw_wm_branching_shader_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2016 if (BRW_DEBUG
& DEBUG_WM
) {
2017 debug_printf("%s:\n", __FUNCTION__
);
2020 /* initial instruction translation/simplification */
2023 /* actual code generation */
2024 brw_wm_emit_branching_shader(brw
, c
);
2026 if (BRW_DEBUG
& DEBUG_WM
) {
2027 brw_wm_print_program(c
, "brw_wm_branching_shader_emit done");
2030 c
->prog_data
.total_grf
= num_grf_used(c
);
2031 c
->prog_data
.total_scratch
= 0;