2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
33 #include "pipe/p_shader_tokens.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
38 #include "tgsi/tgsi_parse.h"
39 #include "tgsi/tgsi_dump.h"
40 #include "tgsi/tgsi_info.h"
41 #include "tgsi/tgsi_util.h"
44 #include "brw_debug.h"
47 /***********************************************************************
51 static struct brw_fp_src
src_reg(GLuint file
, GLuint idx
)
53 struct brw_fp_src reg
;
56 reg
.swizzle
= BRW_SWIZZLE_XYZW
;
63 static struct brw_fp_src
src_reg_from_dst(struct brw_fp_dst dst
)
65 return src_reg(dst
.file
, dst
.index
);
68 static struct brw_fp_src
src_undef( void )
70 return src_reg(TGSI_FILE_NULL
, 0);
73 static GLboolean
src_is_undef(struct brw_fp_src src
)
75 return src
.file
== TGSI_FILE_NULL
;
78 static struct brw_fp_src
src_swizzle( struct brw_fp_src reg
, int x
, int y
, int z
, int w
)
80 unsigned swz
= reg
.swizzle
;
82 reg
.swizzle
= ( BRW_GET_SWZ(swz
, x
) << 0 |
83 BRW_GET_SWZ(swz
, y
) << 2 |
84 BRW_GET_SWZ(swz
, z
) << 4 |
85 BRW_GET_SWZ(swz
, w
) << 6 );
90 static struct brw_fp_src
src_scalar( struct brw_fp_src reg
, int x
)
92 return src_swizzle(reg
, x
, x
, x
, x
);
95 static struct brw_fp_src
src_abs( struct brw_fp_src src
)
102 static struct brw_fp_src
src_negate( struct brw_fp_src src
)
110 static int match_or_expand_immediate( const float *v
,
120 for (i
= 0; i
< nr
; i
++) {
121 boolean found
= FALSE
;
123 for (j
= 0; j
< *nr2
&& !found
; j
++) {
125 *swizzle
|= j
<< (i
* 2);
135 *swizzle
|= *nr2
<< (i
* 2);
145 /* Internally generated immediates: overkill...
147 static struct brw_fp_src
src_imm( struct brw_wm_compile
*c
,
154 /* Could do a first pass where we examine all existing immediates
158 for (i
= 0; i
< c
->nr_immediates
; i
++) {
159 if (match_or_expand_immediate( v
,
167 if (c
->nr_immediates
< Elements(c
->immediate
)) {
168 i
= c
->nr_immediates
++;
169 if (match_or_expand_immediate( v
,
181 /* Make sure that all referenced elements are from this immediate.
182 * Has the effect of making size-one immediates into scalars.
184 for (j
= nr
; j
< 4; j
++)
185 swizzle
|= (swizzle
& 0x3) << (j
* 2);
187 return src_swizzle( src_reg( TGSI_FILE_IMMEDIATE
, i
),
188 BRW_GET_SWZ(swizzle
, X
),
189 BRW_GET_SWZ(swizzle
, Y
),
190 BRW_GET_SWZ(swizzle
, Z
),
191 BRW_GET_SWZ(swizzle
, W
) );
196 static struct brw_fp_src
src_imm1f( struct brw_wm_compile
*c
,
199 return src_imm(c
, &f
, 1);
202 static struct brw_fp_src
src_imm4f( struct brw_wm_compile
*c
,
208 GLfloat f
[4] = {x
,y
,z
,w
};
209 return src_imm(c
, f
, 4);
214 /***********************************************************************
218 static struct brw_fp_dst
dst_reg(GLuint file
, GLuint idx
)
220 struct brw_fp_dst reg
;
223 reg
.writemask
= BRW_WRITEMASK_XYZW
;
229 static struct brw_fp_dst
dst_mask( struct brw_fp_dst reg
, int mask
)
231 reg
.writemask
&= mask
;
235 static struct brw_fp_dst
dst_undef( void )
237 return dst_reg(TGSI_FILE_NULL
, 0);
240 static boolean
dst_is_undef( struct brw_fp_dst dst
)
242 return dst
.file
== TGSI_FILE_NULL
;
245 static struct brw_fp_dst
dst_saturate( struct brw_fp_dst reg
, boolean flag
)
251 static struct brw_fp_dst
get_temp( struct brw_wm_compile
*c
)
253 int bit
= ffs( ~c
->fp_temp
);
256 debug_printf("%s: out of temporaries\n", __FILE__
);
259 c
->fp_temp
|= 1<<(bit
-1);
260 return dst_reg(TGSI_FILE_TEMPORARY
, c
->fp_first_internal_temp
+(bit
-1));
264 static void release_temp( struct brw_wm_compile
*c
, struct brw_fp_dst temp
)
266 c
->fp_temp
&= ~(1 << (temp
.index
- c
->fp_first_internal_temp
));
270 /***********************************************************************
274 static struct brw_fp_instruction
*get_fp_inst(struct brw_wm_compile
*c
)
276 return &c
->fp_instructions
[c
->nr_fp_insns
++];
279 static struct brw_fp_instruction
* emit_tex_op(struct brw_wm_compile
*c
,
281 struct brw_fp_dst dest
,
285 struct brw_fp_src src0
,
286 struct brw_fp_src src1
,
287 struct brw_fp_src src2
)
289 struct brw_fp_instruction
*inst
= get_fp_inst(c
);
291 if (tex_unit
|| target
)
292 assert(op
== TGSI_OPCODE_TXP
||
293 op
== TGSI_OPCODE_TXB
||
294 op
== TGSI_OPCODE_TEX
||
299 inst
->tex_unit
= tex_unit
;
300 inst
->target
= target
;
301 inst
->sampler
= sampler
;
310 static INLINE
void emit_op3(struct brw_wm_compile
*c
,
312 struct brw_fp_dst dest
,
313 struct brw_fp_src src0
,
314 struct brw_fp_src src1
,
315 struct brw_fp_src src2
)
317 emit_tex_op(c
, op
, dest
, 0, 0, 0, src0
, src1
, src2
);
321 static INLINE
void emit_op2(struct brw_wm_compile
*c
,
323 struct brw_fp_dst dest
,
324 struct brw_fp_src src0
,
325 struct brw_fp_src src1
)
327 emit_tex_op(c
, op
, dest
, 0, 0, 0, src0
, src1
, src_undef());
330 static INLINE
void emit_op1(struct brw_wm_compile
*c
,
332 struct brw_fp_dst dest
,
333 struct brw_fp_src src0
)
335 emit_tex_op(c
, op
, dest
, 0, 0, 0, src0
, src_undef(), src_undef());
338 static INLINE
void emit_op0(struct brw_wm_compile
*c
,
340 struct brw_fp_dst dest
)
342 emit_tex_op(c
, op
, dest
, 0, 0, 0, src_undef(), src_undef(), src_undef());
347 /* Many opcodes produce the same value across all the result channels.
348 * We'd rather not have to support that splatting in the opcode implementations,
349 * and brw_wm_pass*.c wants to optimize them out by shuffling references around
350 * anyway. We can easily get both by emitting the opcode to one channel, and
351 * then MOVing it to the others, which brw_wm_pass*.c already understands.
353 static void emit_scalar_insn(struct brw_wm_compile
*c
,
355 struct brw_fp_dst dst
,
356 struct brw_fp_src src0
,
357 struct brw_fp_src src1
,
358 struct brw_fp_src src2
)
360 unsigned first_chan
= ffs(dst
.writemask
) - 1;
361 unsigned first_mask
= 1 << first_chan
;
363 if (dst
.writemask
== 0)
367 dst_mask(dst
, first_mask
),
370 if (dst
.writemask
!= first_mask
) {
371 emit_op1(c
, TGSI_OPCODE_MOV
,
372 dst_mask(dst
, ~first_mask
),
373 src_scalar(src_reg_from_dst(dst
), first_chan
));
378 /***********************************************************************
379 * Special instructions for interpolation and other tasks
382 static struct brw_fp_src
get_pixel_xy( struct brw_wm_compile
*c
)
384 if (src_is_undef(c
->fp_pixel_xy
)) {
385 struct brw_fp_dst pixel_xy
= get_temp(c
);
386 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
389 /* Emit the out calculations, and hold onto the results. Use
390 * two instructions as a temporary is required.
392 /* pixel_xy.xy = PIXELXY payload[0];
396 dst_mask(pixel_xy
, BRW_WRITEMASK_XY
),
399 c
->fp_pixel_xy
= src_reg_from_dst(pixel_xy
);
402 return c
->fp_pixel_xy
;
405 static struct brw_fp_src
get_delta_xy( struct brw_wm_compile
*c
)
407 if (src_is_undef(c
->fp_delta_xy
)) {
408 struct brw_fp_dst delta_xy
= get_temp(c
);
409 struct brw_fp_src pixel_xy
= get_pixel_xy(c
);
410 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
412 /* deltas.xy = DELTAXY pixel_xy, payload[0]
416 dst_mask(delta_xy
, BRW_WRITEMASK_XY
),
421 c
->fp_delta_xy
= src_reg_from_dst(delta_xy
);
424 return c
->fp_delta_xy
;
427 static struct brw_fp_src
get_pixel_w( struct brw_wm_compile
*c
)
429 if (src_is_undef(c
->fp_pixel_w
)) {
430 struct brw_fp_dst pixel_w
= get_temp(c
);
431 struct brw_fp_src deltas
= get_delta_xy(c
);
433 /* XXX: assuming position is always first -- valid?
435 struct brw_fp_src interp_wpos
= src_reg(BRW_FILE_PAYLOAD
, 0);
437 /* deltas.xyw = DELTAS2 deltas.xy, payload.interp_wpos.x
441 dst_mask(pixel_w
, BRW_WRITEMASK_W
),
447 c
->fp_pixel_w
= src_reg_from_dst(pixel_w
);
450 return c
->fp_pixel_w
;
454 /***********************************************************************
455 * Emit INTERP instructions ahead of first use of each attrib.
458 static void emit_interp( struct brw_wm_compile
*c
,
463 struct brw_fp_dst dst
= dst_reg(TGSI_FILE_INPUT
, idx
);
464 struct brw_fp_src interp
= src_reg(BRW_FILE_PAYLOAD
, idx
);
465 struct brw_fp_src deltas
= get_delta_xy(c
);
467 /* Need to use PINTERP on attributes which have been
468 * multiplied by 1/W in the SF program, and LINTERP on those
472 case TGSI_SEMANTIC_POSITION
:
473 /* Have to treat wpos.xy specially:
477 dst_mask(dst
, BRW_WRITEMASK_XY
),
480 /* TGSI_FILE_INPUT.attr.xyzw = INTERP payload.interp[attr].x, deltas.xyw
484 dst_mask(dst
, BRW_WRITEMASK_ZW
),
489 case TGSI_SEMANTIC_COLOR
:
490 if (c
->key
.flat_shade
) {
496 else if (interp_mode
== TGSI_INTERPOLATE_LINEAR
) {
514 case TGSI_SEMANTIC_FOG
:
515 /* Interpolate the fog coordinate */
518 dst_mask(dst
, BRW_WRITEMASK_X
),
525 dst_mask(dst
, BRW_WRITEMASK_YZ
),
530 dst_mask(dst
, BRW_WRITEMASK_W
),
534 case TGSI_SEMANTIC_FACE
:
535 /* XXX review/test this case */
538 dst_mask(dst
, BRW_WRITEMASK_X
));
542 dst_mask(dst
, BRW_WRITEMASK_YZ
),
547 dst_mask(dst
, BRW_WRITEMASK_W
),
551 case TGSI_SEMANTIC_PSIZE
:
552 /* XXX review/test this case */
555 dst_mask(dst
, BRW_WRITEMASK_XY
),
562 dst_mask(dst
, BRW_WRITEMASK_Z
),
567 dst_mask(dst
, BRW_WRITEMASK_W
),
572 switch (interp_mode
) {
573 case TGSI_INTERPOLATE_CONSTANT
:
580 case TGSI_INTERPOLATE_LINEAR
:
588 case TGSI_INTERPOLATE_PERSPECTIVE
:
602 /***********************************************************************
603 * Expand various instructions here to simpler forms.
605 static void precalc_dst( struct brw_wm_compile
*c
,
606 struct brw_fp_dst dst
,
607 struct brw_fp_src src0
,
608 struct brw_fp_src src1
)
610 if (dst
.writemask
& BRW_WRITEMASK_Y
) {
611 /* dst.y = mul src0.y, src1.y
615 dst_mask(dst
, BRW_WRITEMASK_Y
),
620 if (dst
.writemask
& BRW_WRITEMASK_XZ
) {
621 /* dst.z = mov src0.zzzz
625 dst_mask(dst
, BRW_WRITEMASK_Z
),
626 src_scalar(src0
, Z
));
628 /* dst.x = imm1f(1.0)
632 dst_saturate(dst_mask(dst
, BRW_WRITEMASK_X
), 0),
635 if (dst
.writemask
& BRW_WRITEMASK_W
) {
636 /* dst.w = mov src1.w
640 dst_mask(dst
, BRW_WRITEMASK_W
),
646 static void precalc_lit( struct brw_wm_compile
*c
,
647 struct brw_fp_dst dst
,
648 struct brw_fp_src src0
)
650 if (dst
.writemask
& BRW_WRITEMASK_XW
) {
651 /* dst.xw = imm(1.0f)
655 dst_saturate(dst_mask(dst
, BRW_WRITEMASK_XW
), 0),
659 if (dst
.writemask
& BRW_WRITEMASK_YZ
) {
662 dst_mask(dst
, BRW_WRITEMASK_YZ
),
669 * Some TEX instructions require extra code, cube map coordinate
670 * normalization, or coordinate scaling for RECT textures, etc.
671 * This function emits those extra instructions and the TEX
672 * instruction itself.
674 static void precalc_tex( struct brw_wm_compile
*c
,
675 struct brw_fp_dst dst
,
678 struct brw_fp_src src0
,
679 struct brw_fp_src sampler
)
681 struct brw_fp_src coord
= src_undef();
682 struct brw_fp_dst tmp
= dst_undef();
684 assert(unit
< BRW_MAX_TEX_UNIT
);
686 /* Cubemap: find longest component of coord vector and normalize
689 if (target
== TGSI_TEXTURE_CUBE
) {
690 struct brw_fp_src tmpsrc
;
693 tmpsrc
= src_reg_from_dst(tmp
);
695 /* tmp = abs(src0) */
701 /* tmp.X = MAX(tmp.X, tmp.Y) */
702 emit_op2(c
, TGSI_OPCODE_MAX
,
703 dst_mask(tmp
, BRW_WRITEMASK_X
),
704 src_scalar(tmpsrc
, X
),
705 src_scalar(tmpsrc
, Y
));
707 /* tmp.X = MAX(tmp.X, tmp.Z) */
708 emit_op2(c
, TGSI_OPCODE_MAX
,
709 dst_mask(tmp
, BRW_WRITEMASK_X
),
711 src_scalar(tmpsrc
, Z
));
713 /* tmp.X = 1 / tmp.X */
714 emit_op1(c
, TGSI_OPCODE_RCP
,
715 dst_mask(tmp
, BRW_WRITEMASK_X
),
718 /* tmp = src0 * tmp.xxxx */
719 emit_op2(c
, TGSI_OPCODE_MUL
,
722 src_scalar(tmpsrc
, X
));
726 else if (target
== TGSI_TEXTURE_RECT
||
727 target
== TGSI_TEXTURE_SHADOWRECT
) {
728 /* XXX: need a mechanism for internally generated constants.
736 /* Need to emit YUV texture conversions by hand. Probably need to
737 * do this here - the alternative is in brw_wm_emit.c, but the
738 * conversion requires allocating a temporary variable which we
739 * don't have the facility to do that late in the compilation.
741 if (c
->key
.yuvtex_mask
& (1 << unit
)) {
742 /* convert ycbcr to RGBA */
743 GLboolean swap_uv
= c
->key
.yuvtex_swap_mask
& (1<<unit
);
744 struct brw_fp_dst tmp
= get_temp(c
);
745 struct brw_fp_src tmpsrc
= src_reg_from_dst(tmp
);
746 struct brw_fp_src C0
= src_imm4f( c
, -.5, -.0625, -.5, 1.164 );
747 struct brw_fp_src C1
= src_imm4f( c
, 1.596, -0.813, 2.018, -.391 );
753 dst_saturate(tmp
, dst
.saturate
),
761 /* tmp.xyz = ADD TMP, C0
763 emit_op2(c
, TGSI_OPCODE_ADD
,
764 dst_mask(tmp
, BRW_WRITEMASK_XYZ
),
768 /* YUV.y = MUL YUV.y, C0.w
770 emit_op2(c
, TGSI_OPCODE_MUL
,
771 dst_mask(tmp
, BRW_WRITEMASK_Y
),
777 * RGB.xyz = MAD YUV.zzx, C1, YUV.y
779 * RGB.xyz = MAD YUV.xxz, C1, YUV.y
782 emit_op3(c
, TGSI_OPCODE_MAD
,
783 dst_mask(dst
, BRW_WRITEMASK_XYZ
),
785 src_swizzle(tmpsrc
, Z
,Z
,X
,X
) :
786 src_swizzle(tmpsrc
, X
,X
,Z
,Z
)),
788 src_scalar(tmpsrc
, Y
));
790 /* RGB.y = MAD YUV.z, C1.w, RGB.y
794 dst_mask(dst
, BRW_WRITEMASK_Y
),
795 src_scalar(tmpsrc
, Z
),
797 src_scalar(src_reg_from_dst(dst
), Y
));
799 release_temp(c
, tmp
);
802 /* ordinary RGBA tex instruction */
814 /* XXX: add GL_EXT_texture_swizzle support to gallium -- by
815 * generating shader varients in mesa state tracker.
818 /* Release this temp if we ended up allocating it:
820 if (!dst_is_undef(tmp
))
821 release_temp(c
, tmp
);
826 * Check if the given TXP instruction really needs the divide-by-W step.
828 static GLboolean
projtex( struct brw_wm_compile
*c
,
830 struct brw_fp_src src
)
832 /* Only try to detect the simplest cases. Could detect (later)
833 * cases where we are trying to emit code like RCP {1.0}, MUL x,
836 * More complex cases than this typically only arise from
837 * user-provided fragment programs anyway:
839 if (target
== TGSI_TEXTURE_CUBE
)
840 return GL_FALSE
; /* ut2004 gun rendering !?! */
842 if (src
.file
== TGSI_FILE_INPUT
&&
843 BRW_GET_SWZ(src
.swizzle
, W
) == W
&&
844 c
->fp
->info
.input_interpolate
[src
.index
] != TGSI_INTERPOLATE_PERSPECTIVE
)
854 static void precalc_txp( struct brw_wm_compile
*c
,
855 struct brw_fp_dst dst
,
858 struct brw_fp_src src0
,
859 struct brw_fp_src sampler
)
861 if (projtex(c
, target
, src0
)) {
862 struct brw_fp_dst tmp
= get_temp(c
);
864 /* tmp0.w = RCP inst.arg[0][3]
868 dst_mask(tmp
, BRW_WRITEMASK_W
),
869 src_scalar(src0
, W
));
871 /* tmp0.xyz = MUL inst.arg[0], tmp0.wwww
875 dst_mask(tmp
, BRW_WRITEMASK_XYZ
),
877 src_scalar(src_reg_from_dst(tmp
), W
));
885 src_reg_from_dst(tmp
),
888 release_temp(c
, tmp
);
894 precalc_tex(c
, dst
, target
, unit
, src0
, sampler
);
899 /* XXX: note this returns a src_reg.
901 static struct brw_fp_src
902 find_output_by_semantic( struct brw_wm_compile
*c
,
906 const struct tgsi_shader_info
*info
= &c
->fp
->info
;
909 for (i
= 0; i
< info
->num_outputs
; i
++)
910 if (info
->output_semantic_name
[i
] == semantic
&&
911 info
->output_semantic_index
[i
] == index
)
912 return src_reg( TGSI_FILE_OUTPUT
, i
);
914 /* If not found, return some arbitrary immediate value:
916 * XXX: this is a good idea but immediates are up generating extra
917 * curbe entries atm, as they would have in the original driver.
919 return src_reg( TGSI_FILE_OUTPUT
, 0 ); /* src_imm1f(c, 1.0); */
923 static void emit_fb_write( struct brw_wm_compile
*c
)
925 struct brw_fp_src payload_r0_depth
= src_reg(BRW_FILE_PAYLOAD
, PAYLOAD_DEPTH
);
926 struct brw_fp_src outdepth
= find_output_by_semantic(c
, TGSI_SEMANTIC_POSITION
, 0);
930 outdepth
= src_scalar(outdepth
, Z
);
932 for (i
= 0 ; i
< c
->key
.nr_cbufs
; i
++) {
933 struct brw_fp_src outcolor
;
935 outcolor
= find_output_by_semantic(c
, TGSI_SEMANTIC_COLOR
, i
);
937 /* Use emit_tex_op so that we can specify the inst->target
938 * field, which is abused to contain the FB write target and the
941 emit_tex_op(c
, WM_FB_WRITE
,
943 (i
== c
->key
.nr_cbufs
- 1), /* EOT */
953 static struct brw_fp_dst
translate_dst( struct brw_wm_compile
*c
,
954 const struct tgsi_full_dst_register
*dst
,
957 struct brw_fp_dst out
;
959 out
.file
= dst
->Register
.File
;
960 out
.index
= dst
->Register
.Index
;
961 out
.writemask
= dst
->Register
.WriteMask
;
962 out
.indirect
= dst
->Register
.Indirect
;
963 out
.saturate
= (saturate
== TGSI_SAT_ZERO_ONE
);
966 assert(dst
->Indirect
.File
== TGSI_FILE_ADDRESS
);
967 assert(dst
->Indirect
.Index
== 0);
974 static struct brw_fp_src
translate_src( struct brw_wm_compile
*c
,
975 const struct tgsi_full_src_register
*src
)
977 struct brw_fp_src out
;
979 out
.file
= src
->Register
.File
;
980 out
.index
= src
->Register
.Index
;
981 out
.indirect
= src
->Register
.Indirect
;
983 out
.swizzle
= ((src
->Register
.SwizzleX
<< 0) |
984 (src
->Register
.SwizzleY
<< 2) |
985 (src
->Register
.SwizzleZ
<< 4) |
986 (src
->Register
.SwizzleW
<< 6));
988 switch (tgsi_util_get_full_src_register_sign_mode( src
, 0 )) {
989 case TGSI_UTIL_SIGN_CLEAR
:
994 case TGSI_UTIL_SIGN_SET
:
999 case TGSI_UTIL_SIGN_TOGGLE
:
1004 case TGSI_UTIL_SIGN_KEEP
:
1012 assert(src
->Indirect
.File
== TGSI_FILE_ADDRESS
);
1013 assert(src
->Indirect
.Index
== 0);
1021 static void emit_insn( struct brw_wm_compile
*c
,
1022 const struct tgsi_full_instruction
*inst
)
1024 unsigned opcode
= inst
->Instruction
.Opcode
;
1025 struct brw_fp_dst dst
;
1026 struct brw_fp_src src
[3];
1029 dst
= translate_dst( c
, &inst
->Dst
[0],
1030 inst
->Instruction
.Saturate
);
1032 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++)
1033 src
[i
] = translate_src( c
, &inst
->Src
[i
] );
1036 case TGSI_OPCODE_ABS
:
1037 emit_op1(c
, TGSI_OPCODE_MOV
,
1042 case TGSI_OPCODE_SUB
:
1043 emit_op2(c
, TGSI_OPCODE_ADD
,
1046 src_negate(src
[1]));
1049 case TGSI_OPCODE_SCS
:
1050 emit_op1(c
, TGSI_OPCODE_SCS
,
1051 dst_mask(dst
, BRW_WRITEMASK_XY
),
1055 case TGSI_OPCODE_DST
:
1056 precalc_dst(c
, dst
, src
[0], src
[1]);
1059 case TGSI_OPCODE_LIT
:
1060 precalc_lit(c
, dst
, src
[0]);
1063 case TGSI_OPCODE_TEX
:
1065 inst
->Texture
.Texture
,
1066 src
[1].index
, /* use sampler unit for tex idx */
1068 src
[1]); /* sampler */
1071 case TGSI_OPCODE_TXP
:
1073 inst
->Texture
.Texture
,
1074 src
[1].index
, /* use sampler unit for tex idx */
1076 src
[1]); /* sampler */
1079 case TGSI_OPCODE_TXB
:
1080 /* XXX: TXB not done
1083 inst
->Texture
.Texture
,
1084 src
[1].index
, /* use sampler unit for tex idx*/
1089 case TGSI_OPCODE_XPD
:
1090 emit_op2(c
, TGSI_OPCODE_XPD
,
1091 dst_mask(dst
, BRW_WRITEMASK_XYZ
),
1096 case TGSI_OPCODE_KIL
:
1097 emit_op1(c
, TGSI_OPCODE_KIL
,
1098 dst_mask(dst_undef(), 0),
1102 case TGSI_OPCODE_END
:
1106 if (!c
->key
.has_flow_control
&&
1107 brw_wm_is_scalar_result(opcode
))
1108 emit_scalar_insn(c
, opcode
, dst
, src
[0], src
[1], src
[2]);
1110 emit_op3(c
, opcode
, dst
, src
[0], src
[1], src
[2]);
1116 * Initial pass for fragment program code generation.
1117 * This function is used by both the GLSL and non-GLSL paths.
1119 int brw_wm_pass_fp( struct brw_wm_compile
*c
)
1121 struct brw_fragment_shader
*fs
= c
->fp
;
1122 struct tgsi_parse_context parse
;
1123 struct tgsi_full_instruction
*inst
;
1124 struct tgsi_full_declaration
*decl
;
1129 if (BRW_DEBUG
& DEBUG_WM
) {
1130 debug_printf("pre-fp:\n");
1131 tgsi_dump(fs
->tokens
, 0);
1134 c
->fp_pixel_xy
= src_undef();
1135 c
->fp_delta_xy
= src_undef();
1136 c
->fp_pixel_w
= src_undef();
1138 c
->nr_immediates
= 0;
1141 /* Loop over all instructions doing assorted simplifications and
1144 tgsi_parse_init( &parse
, fs
->tokens
);
1145 while( !tgsi_parse_end_of_tokens( &parse
) ) {
1146 tgsi_parse_token( &parse
);
1148 switch( parse
.FullToken
.Token
.Type
) {
1149 case TGSI_TOKEN_TYPE_DECLARATION
:
1150 /* Turn intput declarations into special WM_* instructions.
1152 * XXX: For non-branching shaders, consider deferring variable
1153 * initialization as late as possible to minimize register
1154 * usage. This is how the original BRW driver worked.
1156 * In a branching shader, must preamble instructions at decl
1157 * time, as instruction order in the shader does not
1158 * correspond to the order instructions are executed in the
1161 * This is where special instructions such as WM_CINTERP,
1162 * WM_LINTERP, WM_PINTERP and WM_WPOSXY are emitted to
1163 * compute shader inputs from the payload registers and pixel
1166 decl
= &parse
.FullToken
.FullDeclaration
;
1167 if( decl
->Declaration
.File
== TGSI_FILE_INPUT
) {
1168 unsigned first
, last
, mask
;
1171 first
= decl
->Range
.First
;
1172 last
= decl
->Range
.Last
;
1173 mask
= decl
->Declaration
.UsageMask
;
1175 for (attrib
= first
; attrib
<= last
; attrib
++) {
1178 decl
->Semantic
.Name
,
1179 decl
->Declaration
.Interpolate
);
1185 case TGSI_TOKEN_TYPE_IMMEDIATE
:
1186 /* Unlike VS programs we can probably manage fine encoding
1187 * immediate values directly into the emitted EU
1188 * instructions, as we probably only need to reference one
1189 * float value per instruction. Just save the data for now
1190 * and use directly later.
1192 i
= c
->nr_immediates
++;
1193 imm
= &parse
.FullToken
.FullImmediate
.u
[i
].Float
;
1194 size
= parse
.FullToken
.FullImmediate
.Immediate
.NrTokens
- 1;
1196 if (c
->nr_immediates
>= BRW_WM_MAX_CONST
)
1197 return PIPE_ERROR_OUT_OF_MEMORY
;
1199 for (i
= 0; i
< size
; i
++)
1200 c
->immediate
[c
->nr_immediates
].v
[i
] = imm
[i
];
1203 c
->immediate
[c
->nr_immediates
].v
[i
] = 0.0;
1205 c
->immediate
[c
->nr_immediates
].nr
= size
;
1209 case TGSI_TOKEN_TYPE_INSTRUCTION
:
1210 inst
= &parse
.FullToken
.FullInstruction
;
1216 if (BRW_DEBUG
& DEBUG_WM
) {
1217 brw_wm_print_fp_program( c
, "pass_fp" );