2 * Pixel and vertex shaders implementation using ARB_vertex_program
3 * and ARB_fragment_program GL extensions.
5 * Copyright 2002-2003 Jason Edmeades
6 * Copyright 2002-2003 Raphael Junqueira
7 * Copyright 2004 Christian Costa
8 * Copyright 2005 Oliver Stieber
9 * Copyright 2006 Ivan Gyurdiev
10 * Copyright 2006 Jason Green
11 * Copyright 2006 Henri Verbeet
13 * This library is free software; you can redistribute it and/or
14 * modify it under the terms of the GNU Lesser General Public
15 * License as published by the Free Software Foundation; either
16 * version 2.1 of the License, or (at your option) any later version.
18 * This library is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 * Lesser General Public License for more details.
23 * You should have received a copy of the GNU Lesser General Public
24 * License along with this library; if not, write to the Free Software
25 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
33 #include "wined3d_private.h"
35 WINE_DEFAULT_DEBUG_CHANNEL(d3d_shader
);
36 WINE_DECLARE_DEBUG_CHANNEL(d3d_constants
);
38 #define GLINFO_LOCATION (*gl_info)
40 /********************************************************
41 * ARB_[vertex/fragment]_program helper functions follow
42 ********************************************************/
45 * Loads floating point constants into the currently set ARB_vertex/fragment_program.
46 * When constant_list == NULL, it will load all the constants.
48 * @target_type should be either GL_VERTEX_PROGRAM_ARB (for vertex shaders)
49 * or GL_FRAGMENT_PROGRAM_ARB (for pixel shaders)
51 static void shader_arb_load_constantsF(IWineD3DBaseShaderImpl
* This
, WineD3D_GL_Info
*gl_info
, GLuint target_type
,
52 unsigned int max_constants
, float* constants
, struct list
*constant_list
) {
53 constant_entry
*constant
;
54 local_constant
* lconst
;
57 if (TRACE_ON(d3d_shader
)) {
58 LIST_FOR_EACH_ENTRY(constant
, constant_list
, constant_entry
, entry
) {
60 TRACE_(d3d_constants
)("Loading constants %i: %f, %f, %f, %f\n", i
,
61 constants
[i
* 4 + 0], constants
[i
* 4 + 1],
62 constants
[i
* 4 + 2], constants
[i
* 4 + 3]);
65 LIST_FOR_EACH_ENTRY(constant
, constant_list
, constant_entry
, entry
) {
67 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type
, i
, constants
+ (i
* 4)));
69 checkGLcall("glProgramEnvParameter4fvARB()");
71 /* Load immediate constants */
72 if (TRACE_ON(d3d_shader
)) {
73 LIST_FOR_EACH_ENTRY(lconst
, &This
->baseShader
.constantsF
, local_constant
, entry
) {
74 GLfloat
* values
= (GLfloat
*)lconst
->value
;
75 TRACE_(d3d_constants
)("Loading local constants %i: %f, %f, %f, %f\n", lconst
->idx
,
76 values
[0], values
[1], values
[2], values
[3]);
79 LIST_FOR_EACH_ENTRY(lconst
, &This
->baseShader
.constantsF
, local_constant
, entry
) {
80 GL_EXTCALL(glProgramEnvParameter4fvARB(target_type
, lconst
->idx
, (GLfloat
*)lconst
->value
));
82 checkGLcall("glProgramEnvParameter4fvARB()");
86 * Loads the app-supplied constants into the currently set ARB_[vertex/fragment]_programs.
88 * We only support float constants in ARB at the moment, so don't
89 * worry about the Integers or Booleans
91 void shader_arb_load_constants(
92 IWineD3DDevice
* device
,
94 char useVertexShader
) {
96 IWineD3DDeviceImpl
* deviceImpl
= (IWineD3DDeviceImpl
*) device
;
97 IWineD3DStateBlockImpl
* stateBlock
= deviceImpl
->stateBlock
;
98 WineD3D_GL_Info
*gl_info
= &((IWineD3DImpl
*)deviceImpl
->wineD3D
)->gl_info
;
100 if (useVertexShader
) {
101 IWineD3DBaseShaderImpl
* vshader
= (IWineD3DBaseShaderImpl
*) stateBlock
->vertexShader
;
103 /* Load DirectX 9 float constants for vertex shader */
104 shader_arb_load_constantsF(vshader
, gl_info
, GL_VERTEX_PROGRAM_ARB
,
105 GL_LIMITS(vshader_constantsF
),
106 stateBlock
->vertexShaderConstantF
,
107 &stateBlock
->set_vconstantsF
);
109 /* Upload the position fixup */
110 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_VERTEX_PROGRAM_ARB
, ARB_SHADER_PRIVCONST_POS
, deviceImpl
->posFixup
));
113 if (usePixelShader
) {
115 IWineD3DBaseShaderImpl
* pshader
= (IWineD3DBaseShaderImpl
*) stateBlock
->pixelShader
;
117 /* Load DirectX 9 float constants for pixel shader */
118 shader_arb_load_constantsF(pshader
, gl_info
, GL_FRAGMENT_PROGRAM_ARB
,
119 GL_LIMITS(pshader_constantsF
),
120 stateBlock
->pixelShaderConstantF
,
121 &stateBlock
->set_pconstantsF
);
122 if(((IWineD3DPixelShaderImpl
*) pshader
)->bumpenvmatconst
) {
123 /* needsbumpmat stores the stage number from where to load the matrix. bumpenvmatconst stores the
124 * number of the constant to load the matrix into.
125 * The state manager takes care that this function is always called if the bump env matrix changes
127 IWineD3DPixelShaderImpl
*psi
= (IWineD3DPixelShaderImpl
*) pshader
;
128 float *data
= (float *) &stateBlock
->textureState
[(int) psi
->needsbumpmat
][WINED3DTSS_BUMPENVMAT00
];
129 GL_EXTCALL(glProgramEnvParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB
, psi
->bumpenvmatconst
, data
));
134 /* Generate the variable & register declarations for the ARB_vertex_program output target */
135 void shader_generate_arb_declarations(
136 IWineD3DBaseShader
*iface
,
137 shader_reg_maps
* reg_maps
,
138 SHADER_BUFFER
* buffer
,
139 WineD3D_GL_Info
* gl_info
) {
141 IWineD3DBaseShaderImpl
* This
= (IWineD3DBaseShaderImpl
*) iface
;
143 char pshader
= shader_is_pshader_version(This
->baseShader
.hex_version
);
144 unsigned max_constantsF
= min(This
->baseShader
.limits
.constant_float
,
145 (pshader
? GL_LIMITS(pshader_constantsF
) : GL_LIMITS(vshader_constantsF
)));
147 /* Temporary Output register */
148 shader_addline(buffer
, "TEMP TMP_OUT;\n");
150 for(i
= 0; i
< This
->baseShader
.limits
.temporary
; i
++) {
151 if (reg_maps
->temporary
[i
])
152 shader_addline(buffer
, "TEMP R%u;\n", i
);
155 for (i
= 0; i
< This
->baseShader
.limits
.address
; i
++) {
156 if (reg_maps
->address
[i
])
157 shader_addline(buffer
, "ADDRESS A%d;\n", i
);
160 for(i
= 0; i
< This
->baseShader
.limits
.texcoord
; i
++) {
161 if (reg_maps
->texcoord
[i
])
162 shader_addline(buffer
,"TEMP T%u;\n", i
);
165 /* Texture coordinate registers must be pre-loaded */
166 for (i
= 0; i
< This
->baseShader
.limits
.texcoord
; i
++) {
167 if (reg_maps
->texcoord
[i
])
168 shader_addline(buffer
, "MOV T%u, fragment.texcoord[%u];\n", i
, i
);
171 if(reg_maps
->bumpmat
/* Only a pshader can use texbem */) {
172 /* If the shader does not use all available constants, use the next free constant to load the bump mapping environment matrix from
173 * the stateblock into the shader. If no constant is available don't load, texbem will then just sample the texture without applying
176 if(max_constantsF
< GL_LIMITS(pshader_constantsF
)) {
177 ((IWineD3DPixelShaderImpl
*)This
)->bumpenvmatconst
= max_constantsF
;
178 shader_addline(buffer
, "PARAM bumpenvmat = program.env[%d];\n", ((IWineD3DPixelShaderImpl
*)This
)->bumpenvmatconst
);
180 FIXME("No free constant found to load environemnt bump mapping matrix into the shader. texbem instruction will not apply bump mapping\n");
184 /* Need to PARAM the environment parameters (constants) so we can use relative addressing */
185 shader_addline(buffer
, "PARAM C[%d] = { program.env[0..%d] };\n",
186 max_constantsF
, max_constantsF
- 1);
189 static const char * const shift_tab
[] = {
190 "dummy", /* 0 (none) */
191 "coefmul.x", /* 1 (x2) */
192 "coefmul.y", /* 2 (x4) */
193 "coefmul.z", /* 3 (x8) */
194 "coefmul.w", /* 4 (x16) */
195 "dummy", /* 5 (x32) */
196 "dummy", /* 6 (x64) */
197 "dummy", /* 7 (x128) */
198 "dummy", /* 8 (d256) */
199 "dummy", /* 9 (d128) */
200 "dummy", /* 10 (d64) */
201 "dummy", /* 11 (d32) */
202 "coefdiv.w", /* 12 (d16) */
203 "coefdiv.z", /* 13 (d8) */
204 "coefdiv.y", /* 14 (d4) */
205 "coefdiv.x" /* 15 (d2) */
208 static void shader_arb_get_write_mask(const DWORD param
, char *write_mask
) {
209 char *ptr
= write_mask
;
211 if ((param
& WINED3DSP_WRITEMASK_ALL
) != WINED3DSP_WRITEMASK_ALL
) {
213 if (param
& WINED3DSP_WRITEMASK_0
) *ptr
++ = 'x';
214 if (param
& WINED3DSP_WRITEMASK_1
) *ptr
++ = 'y';
215 if (param
& WINED3DSP_WRITEMASK_2
) *ptr
++ = 'z';
216 if (param
& WINED3DSP_WRITEMASK_3
) *ptr
++ = 'w';
222 static void shader_arb_get_swizzle(const DWORD param
, BOOL fixup
, char *swizzle_str
) {
223 /* For registers of type WINED3DDECLTYPE_D3DCOLOR, data is stored as "bgra",
224 * but addressed as "rgba". To fix this we need to swap the register's x
225 * and z components. */
226 const char *swizzle_chars
= fixup
? "zyxw" : "xyzw";
227 char *ptr
= swizzle_str
;
229 /* swizzle bits fields: wwzzyyxx */
230 DWORD swizzle
= (param
& WINED3DSP_SWIZZLE_MASK
) >> WINED3DSP_SWIZZLE_SHIFT
;
231 DWORD swizzle_x
= swizzle
& 0x03;
232 DWORD swizzle_y
= (swizzle
>> 2) & 0x03;
233 DWORD swizzle_z
= (swizzle
>> 4) & 0x03;
234 DWORD swizzle_w
= (swizzle
>> 6) & 0x03;
236 /* If the swizzle is the default swizzle (ie, "xyzw"), we don't need to
237 * generate a swizzle string. Unless we need to our own swizzling. */
238 if ((WINED3DSP_NOSWIZZLE
>> WINED3DSP_SWIZZLE_SHIFT
) != swizzle
|| fixup
) {
240 if (swizzle_x
== swizzle_y
&& swizzle_x
== swizzle_z
&& swizzle_x
== swizzle_w
) {
241 *ptr
++ = swizzle_chars
[swizzle_x
];
243 *ptr
++ = swizzle_chars
[swizzle_x
];
244 *ptr
++ = swizzle_chars
[swizzle_y
];
245 *ptr
++ = swizzle_chars
[swizzle_z
];
246 *ptr
++ = swizzle_chars
[swizzle_w
];
253 static void pshader_get_register_name(
254 const DWORD param
, char* regstr
) {
256 DWORD reg
= param
& WINED3DSP_REGNUM_MASK
;
257 DWORD regtype
= shader_get_regtype(param
);
260 case WINED3DSPR_TEMP
:
261 sprintf(regstr
, "R%u", reg
);
263 case WINED3DSPR_INPUT
:
265 strcpy(regstr
, "fragment.color.primary");
267 strcpy(regstr
, "fragment.color.secondary");
270 case WINED3DSPR_CONST
:
271 sprintf(regstr
, "C[%u]", reg
);
273 case WINED3DSPR_TEXTURE
: /* case WINED3DSPR_ADDR: */
274 sprintf(regstr
,"T%u", reg
);
276 case WINED3DSPR_COLOROUT
:
278 sprintf(regstr
, "result.color");
280 /* TODO: See GL_ARB_draw_buffers */
281 FIXME("Unsupported write to render target %u\n", reg
);
282 sprintf(regstr
, "unsupported_register");
285 case WINED3DSPR_DEPTHOUT
:
286 sprintf(regstr
, "result.depth");
288 case WINED3DSPR_ATTROUT
:
289 sprintf(regstr
, "oD[%u]", reg
);
291 case WINED3DSPR_TEXCRDOUT
:
292 sprintf(regstr
, "oT[%u]", reg
);
295 FIXME("Unhandled register name Type(%d)\n", regtype
);
296 sprintf(regstr
, "unrecognized_register");
301 /* TODO: merge with pixel shader */
302 static void vshader_program_add_param(SHADER_OPCODE_ARG
*arg
, const DWORD param
, BOOL is_input
, char *hwLine
) {
304 IWineD3DVertexShaderImpl
* This
= (IWineD3DVertexShaderImpl
*) arg
->shader
;
306 /* oPos, oFog and oPts in D3D */
307 static const char * const hwrastout_reg_names
[] = { "TMP_OUT", "TMP_FOG", "result.pointsize" };
309 DWORD reg
= param
& WINED3DSP_REGNUM_MASK
;
310 DWORD regtype
= shader_get_regtype(param
);
312 BOOL is_color
= FALSE
;
314 if ((param
& WINED3DSP_SRCMOD_MASK
) == WINED3DSPSM_NEG
) {
315 strcat(hwLine
, " -");
321 case WINED3DSPR_TEMP
:
322 sprintf(tmpReg
, "R%u", reg
);
323 strcat(hwLine
, tmpReg
);
325 case WINED3DSPR_INPUT
:
327 if (vshader_input_is_color((IWineD3DVertexShader
*) This
, reg
))
330 sprintf(tmpReg
, "vertex.attrib[%u]", reg
);
331 strcat(hwLine
, tmpReg
);
333 case WINED3DSPR_CONST
:
334 sprintf(tmpReg
, "C[%s%u]", (param
& WINED3DSHADER_ADDRMODE_RELATIVE
) ? "A0.x + " : "", reg
);
335 strcat(hwLine
, tmpReg
);
337 case WINED3DSPR_ADDR
: /*case D3DSPR_TEXTURE:*/
338 sprintf(tmpReg
, "A%u", reg
);
339 strcat(hwLine
, tmpReg
);
341 case WINED3DSPR_RASTOUT
:
342 sprintf(tmpReg
, "%s", hwrastout_reg_names
[reg
]);
343 strcat(hwLine
, tmpReg
);
345 case WINED3DSPR_ATTROUT
:
347 strcat(hwLine
, "result.color.primary");
349 strcat(hwLine
, "result.color.secondary");
352 case WINED3DSPR_TEXCRDOUT
:
353 sprintf(tmpReg
, "result.texcoord[%u]", reg
);
354 strcat(hwLine
, tmpReg
);
357 FIXME("Unknown reg type %d %d\n", regtype
, reg
);
358 strcat(hwLine
, "unrecognized_register");
364 shader_arb_get_write_mask(param
, write_mask
);
365 strcat(hwLine
, write_mask
);
368 shader_arb_get_swizzle(param
, is_color
, swizzle
);
369 strcat(hwLine
, swizzle
);
373 static void shader_hw_sample(SHADER_OPCODE_ARG
* arg
, DWORD sampler_idx
, const char *dst_str
, const char *coord_reg
) {
374 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
375 IWineD3DDeviceImpl
* deviceImpl
= (IWineD3DDeviceImpl
*) This
->baseShader
.device
;
377 SHADER_BUFFER
* buffer
= arg
->buffer
;
378 DWORD sampler_type
= arg
->reg_maps
->samplers
[sampler_idx
] & WINED3DSP_TEXTURETYPE_MASK
;
379 const char *tex_type
;
381 switch(sampler_type
) {
390 case WINED3DSTT_VOLUME
:
394 case WINED3DSTT_CUBE
:
399 ERR("Unexpected texture type %d\n", sampler_type
);
403 if (deviceImpl
->stateBlock
->textureState
[sampler_idx
][WINED3DTSS_TEXTURETRANSFORMFLAGS
] & WINED3DTTFF_PROJECTED
) {
404 shader_addline(buffer
, "TXP %s, %s, texture[%u], %s;\n", dst_str
, coord_reg
, sampler_idx
, tex_type
);
406 shader_addline(buffer
, "TEX %s, %s, texture[%u], %s;\n", dst_str
, coord_reg
, sampler_idx
, tex_type
);
411 static void pshader_gen_input_modifier_line (
412 SHADER_BUFFER
* buffer
,
417 /* Generate a line that does the input modifier computation and return the input register to use */
422 /* Assume a new line will be added */
425 /* Get register name */
426 pshader_get_register_name(instr
, regstr
);
427 shader_arb_get_swizzle(instr
, FALSE
, swzstr
);
429 switch (instr
& WINED3DSP_SRCMOD_MASK
) {
430 case WINED3DSPSM_NONE
:
431 sprintf(outregstr
, "%s%s", regstr
, swzstr
);
434 case WINED3DSPSM_NEG
:
435 sprintf(outregstr
, "-%s%s", regstr
, swzstr
);
438 case WINED3DSPSM_BIAS
:
439 shader_addline(buffer
, "ADD T%c, %s, -coefdiv.x;\n", 'A' + tmpreg
, regstr
);
441 case WINED3DSPSM_BIASNEG
:
442 shader_addline(buffer
, "ADD T%c, -%s, coefdiv.x;\n", 'A' + tmpreg
, regstr
);
444 case WINED3DSPSM_SIGN
:
445 shader_addline(buffer
, "MAD T%c, %s, coefmul.x, -one.x;\n", 'A' + tmpreg
, regstr
);
447 case WINED3DSPSM_SIGNNEG
:
448 shader_addline(buffer
, "MAD T%c, %s, -coefmul.x, one.x;\n", 'A' + tmpreg
, regstr
);
450 case WINED3DSPSM_COMP
:
451 shader_addline(buffer
, "SUB T%c, one.x, %s;\n", 'A' + tmpreg
, regstr
);
454 shader_addline(buffer
, "ADD T%c, %s, %s;\n", 'A' + tmpreg
, regstr
, regstr
);
456 case WINED3DSPSM_X2NEG
:
457 shader_addline(buffer
, "ADD T%c, -%s, -%s;\n", 'A' + tmpreg
, regstr
, regstr
);
460 shader_addline(buffer
, "RCP T%c, %s.z;\n", 'A' + tmpreg
, regstr
);
461 shader_addline(buffer
, "MUL T%c, %s, T%c;\n", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
464 shader_addline(buffer
, "RCP T%c, %s.w;\n", 'A' + tmpreg
, regstr
);
465 shader_addline(buffer
, "MUL T%c, %s, T%c;\n", 'A' + tmpreg
, regstr
, 'A' + tmpreg
);
468 sprintf(outregstr
, "%s%s", regstr
, swzstr
);
472 /* Return modified or original register, with swizzle */
474 sprintf(outregstr
, "T%c%s", 'A' + tmpreg
, swzstr
);
477 inline static void pshader_gen_output_modifier_line(
478 SHADER_BUFFER
* buffer
,
484 /* Generate a line that does the output modifier computation */
485 shader_addline(buffer
, "MUL%s %s%s, %s, %s;\n", saturate
? "_SAT" : "",
486 regstr
, write_mask
, regstr
, shift_tab
[shift
]);
489 void pshader_hw_cnd(SHADER_OPCODE_ARG
* arg
) {
491 SHADER_BUFFER
* buffer
= arg
->buffer
;
494 char src_name
[3][50];
496 /* FIXME: support output modifiers */
498 /* Handle output register */
499 pshader_get_register_name(arg
->dst
, dst_name
);
500 shader_arb_get_write_mask(arg
->dst
, dst_wmask
);
501 strcat(dst_name
, dst_wmask
);
503 /* Generate input register names (with modifiers) */
504 pshader_gen_input_modifier_line(buffer
, arg
->src
[0], 0, src_name
[0]);
505 pshader_gen_input_modifier_line(buffer
, arg
->src
[1], 1, src_name
[1]);
506 pshader_gen_input_modifier_line(buffer
, arg
->src
[2], 2, src_name
[2]);
508 shader_addline(buffer
, "ADD TMP, -%s, coefdiv.x;\n", src_name
[0]);
509 shader_addline(buffer
, "CMP %s, TMP, %s, %s;\n", dst_name
, src_name
[1], src_name
[2]);
512 void pshader_hw_cmp(SHADER_OPCODE_ARG
* arg
) {
514 SHADER_BUFFER
* buffer
= arg
->buffer
;
517 char src_name
[3][50];
519 /* FIXME: support output modifiers */
521 /* Handle output register */
522 pshader_get_register_name(arg
->dst
, dst_name
);
523 shader_arb_get_write_mask(arg
->dst
, dst_wmask
);
524 strcat(dst_name
, dst_wmask
);
526 /* Generate input register names (with modifiers) */
527 pshader_gen_input_modifier_line(buffer
, arg
->src
[0], 0, src_name
[0]);
528 pshader_gen_input_modifier_line(buffer
, arg
->src
[1], 1, src_name
[1]);
529 pshader_gen_input_modifier_line(buffer
, arg
->src
[2], 2, src_name
[2]);
531 shader_addline(buffer
, "CMP %s, %s, %s, %s;\n", dst_name
,
532 src_name
[0], src_name
[2], src_name
[1]);
535 /* Map the opcode 1-to-1 to the GL code */
536 void pshader_hw_map2gl(SHADER_OPCODE_ARG
* arg
) {
538 CONST SHADER_OPCODE
* curOpcode
= arg
->opcode
;
539 SHADER_BUFFER
* buffer
= arg
->buffer
;
540 DWORD dst
= arg
->dst
;
541 DWORD
* src
= arg
->src
;
546 /* Output token related */
547 char output_rname
[256];
548 char output_wmask
[20];
549 BOOL saturate
= FALSE
;
550 BOOL centroid
= FALSE
;
551 BOOL partialprecision
= FALSE
;
554 strcpy(tmpLine
, curOpcode
->glname
);
556 /* Process modifiers */
557 if (0 != (dst
& WINED3DSP_DSTMOD_MASK
)) {
558 DWORD mask
= dst
& WINED3DSP_DSTMOD_MASK
;
560 saturate
= mask
& WINED3DSPDM_SATURATE
;
561 centroid
= mask
& WINED3DSPDM_MSAMPCENTROID
;
562 partialprecision
= mask
& WINED3DSPDM_PARTIALPRECISION
;
563 mask
&= ~(WINED3DSPDM_MSAMPCENTROID
| WINED3DSPDM_PARTIALPRECISION
| WINED3DSPDM_SATURATE
);
565 FIXME("Unrecognized modifier(0x%#x)\n", mask
>> WINED3DSP_DSTMOD_SHIFT
);
568 FIXME("Unhandled modifier(0x%#x)\n", mask
>> WINED3DSP_DSTMOD_SHIFT
);
570 shift
= (dst
& WINED3DSP_DSTSHIFT_MASK
) >> WINED3DSP_DSTSHIFT_SHIFT
;
572 /* Generate input and output registers */
573 if (curOpcode
->num_params
> 0) {
574 char operands
[4][100];
576 /* Generate input register names (with modifiers) */
577 for (i
= 1; i
< curOpcode
->num_params
; ++i
)
578 pshader_gen_input_modifier_line(buffer
, src
[i
-1], i
-1, operands
[i
]);
580 /* Handle output register */
581 pshader_get_register_name(dst
, output_rname
);
582 strcpy(operands
[0], output_rname
);
583 shader_arb_get_write_mask(dst
, output_wmask
);
584 strcat(operands
[0], output_wmask
);
586 if (saturate
&& (shift
== 0))
587 strcat(tmpLine
, "_SAT");
588 strcat(tmpLine
, " ");
589 strcat(tmpLine
, operands
[0]);
590 for (i
= 1; i
< curOpcode
->num_params
; i
++) {
591 strcat(tmpLine
, ", ");
592 strcat(tmpLine
, operands
[i
]);
594 strcat(tmpLine
,";\n");
595 shader_addline(buffer
, tmpLine
);
597 /* A shift requires another line. */
599 pshader_gen_output_modifier_line(buffer
, saturate
, output_wmask
, shift
, output_rname
);
603 void pshader_hw_tex(SHADER_OPCODE_ARG
* arg
) {
605 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
607 DWORD dst
= arg
->dst
;
608 DWORD
* src
= arg
->src
;
609 SHADER_BUFFER
* buffer
= arg
->buffer
;
610 DWORD hex_version
= This
->baseShader
.hex_version
;
615 DWORD reg_sampler_code
;
617 /* All versions have a destination register */
618 reg_dest_code
= dst
& WINED3DSP_REGNUM_MASK
;
619 pshader_get_register_name(dst
, reg_dest
);
621 /* 1.0-1.3: Use destination register as coordinate source.
622 1.4+: Use provided coordinate source register. */
623 if (hex_version
< WINED3DPS_VERSION(1,4))
624 strcpy(reg_coord
, reg_dest
);
626 pshader_gen_input_modifier_line(buffer
, src
[0], 0, reg_coord
);
628 /* 1.0-1.4: Use destination register number as texture code.
629 2.0+: Use provided sampler number as texure code. */
630 if (hex_version
< WINED3DPS_VERSION(2,0))
631 reg_sampler_code
= reg_dest_code
;
633 reg_sampler_code
= src
[1] & WINED3DSP_REGNUM_MASK
;
635 shader_hw_sample(arg
, reg_sampler_code
, reg_dest
, reg_coord
);
638 void pshader_hw_texcoord(SHADER_OPCODE_ARG
* arg
) {
640 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
641 DWORD dst
= arg
->dst
;
642 DWORD
* src
= arg
->src
;
643 SHADER_BUFFER
* buffer
= arg
->buffer
;
644 DWORD hex_version
= This
->baseShader
.hex_version
;
647 shader_arb_get_write_mask(dst
, tmp
);
648 if (hex_version
!= WINED3DPS_VERSION(1,4)) {
649 DWORD reg
= dst
& WINED3DSP_REGNUM_MASK
;
650 shader_addline(buffer
, "MOV_SAT T%u%s, fragment.texcoord[%u];\n", reg
, tmp
, reg
);
652 DWORD reg1
= dst
& WINED3DSP_REGNUM_MASK
;
653 DWORD reg2
= src
[0] & WINED3DSP_REGNUM_MASK
;
654 shader_addline(buffer
, "MOV R%u%s, fragment.texcoord[%u];\n", reg1
, tmp
, reg2
);
658 void pshader_hw_texreg2ar(SHADER_OPCODE_ARG
* arg
) {
660 SHADER_BUFFER
* buffer
= arg
->buffer
;
662 DWORD reg1
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
663 DWORD reg2
= arg
->src
[0] & WINED3DSP_REGNUM_MASK
;
666 sprintf(dst_str
, "T%u", reg1
);
667 shader_addline(buffer
, "MOV TMP.r, T%u.a;\n", reg2
);
668 shader_addline(buffer
, "MOV TMP.g, T%u.r;\n", reg2
);
669 shader_hw_sample(arg
, reg1
, dst_str
, "TMP");
672 void pshader_hw_texreg2gb(SHADER_OPCODE_ARG
* arg
) {
674 SHADER_BUFFER
* buffer
= arg
->buffer
;
676 DWORD reg1
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
677 DWORD reg2
= arg
->src
[0] & WINED3DSP_REGNUM_MASK
;
680 sprintf(dst_str
, "T%u", reg1
);
681 shader_addline(buffer
, "MOV TMP.r, T%u.g;\n", reg2
);
682 shader_addline(buffer
, "MOV TMP.g, T%u.b;\n", reg2
);
683 shader_hw_sample(arg
, reg1
, dst_str
, "TMP");
686 void pshader_hw_texbem(SHADER_OPCODE_ARG
* arg
) {
688 SHADER_BUFFER
* buffer
= arg
->buffer
;
689 DWORD reg1
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
690 DWORD reg2
= arg
->src
[0] & WINED3DSP_REGNUM_MASK
;
693 /* FIXME: Should apply the BUMPMAPENV matrix */
694 sprintf(dst_str
, "T%u", reg1
);
695 shader_addline(buffer
, "ADD TMP.rg, fragment.texcoord[%u], T%u;\n", reg1
, reg2
);
696 shader_hw_sample(arg
, reg1
, dst_str
, "TMP");
698 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
700 DWORD dst
= arg
->dst
;
701 DWORD src
= arg
->src
[0] & WINED3DSP_REGNUM_MASK
;
702 SHADER_BUFFER
* buffer
= arg
->buffer
;
707 /* All versions have a destination register */
708 reg_dest_code
= dst
& WINED3DSP_REGNUM_MASK
;
709 /* Can directly use the name because texbem is only valid for <= 1.3 shaders */
710 pshader_get_register_name(dst
, reg_coord
);
712 if(This
->bumpenvmatconst
) {
713 /*shader_addline(buffer, "MOV T%u, fragment.texcoord[%u];\n", 1, 1); Not needed - done already */
714 shader_addline(buffer
, "SWZ TMP2, bumpenvmat, x, z, 0, 0;\n");
715 shader_addline(buffer
, "DP3 TMP.r, TMP2, T%u;\n", src
);
716 shader_addline(buffer
, "SWZ TMP2, bumpenvmat, y, w, 0, 0;\n");
717 shader_addline(buffer
, "DP3 TMP.g, TMP2, T%u;\n", src
);
718 shader_addline(buffer
, "ADD TMP.rg, TMP, %s;\n", reg_coord
);
719 /* Not sure about this, but hl2 needs it. It uses a projected texture with texbem and depends on the 4th coordinate */
720 shader_addline(buffer
, "MOV TMP.a, %s;\n", reg_coord
);
721 shader_hw_sample(arg
, reg_dest_code
, reg_coord
, "TMP");
723 /* Without a bump matrix loaded, just sample with the unmodified coordinates */
724 shader_hw_sample(arg
, reg_dest_code
, reg_coord
, reg_coord
);
728 void pshader_hw_texm3x2pad(SHADER_OPCODE_ARG
* arg
) {
730 DWORD reg
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
731 SHADER_BUFFER
* buffer
= arg
->buffer
;
734 pshader_gen_input_modifier_line(buffer
, arg
->src
[0], 0, src0_name
);
735 shader_addline(buffer
, "DP3 TMP.x, T%u, %s;\n", reg
, src0_name
);
738 void pshader_hw_texm3x2tex(SHADER_OPCODE_ARG
* arg
) {
740 DWORD reg
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
741 SHADER_BUFFER
* buffer
= arg
->buffer
;
745 sprintf(dst_str
, "T%u", reg
);
746 pshader_gen_input_modifier_line(buffer
, arg
->src
[0], 0, src0_name
);
747 shader_addline(buffer
, "DP3 TMP.y, T%u, %s;\n", reg
, src0_name
);
748 shader_hw_sample(arg
, reg
, dst_str
, "TMP");
751 void pshader_hw_texm3x3pad(SHADER_OPCODE_ARG
* arg
) {
753 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
754 DWORD reg
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
755 SHADER_BUFFER
* buffer
= arg
->buffer
;
756 SHADER_PARSE_STATE
* current_state
= &This
->baseShader
.parse_state
;
759 pshader_gen_input_modifier_line(buffer
, arg
->src
[0], 0, src0_name
);
760 shader_addline(buffer
, "DP3 TMP.%c, T%u, %s;\n", 'x' + current_state
->current_row
, reg
, src0_name
);
761 current_state
->texcoord_w
[current_state
->current_row
++] = reg
;
764 void pshader_hw_texm3x3tex(SHADER_OPCODE_ARG
* arg
) {
766 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
767 DWORD reg
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
768 SHADER_BUFFER
* buffer
= arg
->buffer
;
769 SHADER_PARSE_STATE
* current_state
= &This
->baseShader
.parse_state
;
773 pshader_gen_input_modifier_line(buffer
, arg
->src
[0], 0, src0_name
);
774 shader_addline(buffer
, "DP3 TMP.z, T%u, %s;\n", reg
, src0_name
);
776 /* Sample the texture using the calculated coordinates */
777 sprintf(dst_str
, "T%u", reg
);
778 shader_hw_sample(arg
, reg
, dst_str
, "TMP");
779 current_state
->current_row
= 0;
782 void pshader_hw_texm3x3vspec(SHADER_OPCODE_ARG
* arg
) {
784 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
785 DWORD reg
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
786 SHADER_BUFFER
* buffer
= arg
->buffer
;
787 SHADER_PARSE_STATE
* current_state
= &This
->baseShader
.parse_state
;
791 pshader_gen_input_modifier_line(buffer
, arg
->src
[0], 0, src0_name
);
792 shader_addline(buffer
, "DP3 TMP.z, T%u, %s;\n", reg
, src0_name
);
794 /* Construct the eye-ray vector from w coordinates */
795 shader_addline(buffer
, "MOV TMP2.x, fragment.texcoord[%u].w;\n", current_state
->texcoord_w
[0]);
796 shader_addline(buffer
, "MOV TMP2.y, fragment.texcoord[%u].w;\n", current_state
->texcoord_w
[1]);
797 shader_addline(buffer
, "MOV TMP2.z, fragment.texcoord[%u].w;\n", reg
);
799 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
800 shader_addline(buffer
, "DP3 TMP.w, TMP, TMP2;\n");
801 shader_addline(buffer
, "MUL TMP, TMP.w, TMP;\n");
802 shader_addline(buffer
, "MAD TMP, coefmul.x, TMP, -TMP2;\n");
804 /* Sample the texture using the calculated coordinates */
805 sprintf(dst_str
, "T%u", reg
);
806 shader_hw_sample(arg
, reg
, dst_str
, "TMP");
807 current_state
->current_row
= 0;
810 void pshader_hw_texm3x3spec(SHADER_OPCODE_ARG
* arg
) {
812 IWineD3DPixelShaderImpl
* This
= (IWineD3DPixelShaderImpl
*) arg
->shader
;
813 DWORD reg
= arg
->dst
& WINED3DSP_REGNUM_MASK
;
814 DWORD reg3
= arg
->src
[1] & WINED3DSP_REGNUM_MASK
;
815 SHADER_PARSE_STATE
* current_state
= &This
->baseShader
.parse_state
;
816 SHADER_BUFFER
* buffer
= arg
->buffer
;
820 pshader_gen_input_modifier_line(buffer
, arg
->src
[0], 0, src0_name
);
821 shader_addline(buffer
, "DP3 TMP.z, T%u, %s;\n", reg
, src0_name
);
823 /* Calculate reflection vector (Assume normal is normalized): RF = 2*(N.E)*N -E */
824 shader_addline(buffer
, "DP3 TMP.w, TMP, C[%u];\n", reg3
);
825 shader_addline(buffer
, "MUL TMP, TMP.w, TMP;\n");
826 shader_addline(buffer
, "MAD TMP, coefmul.x, TMP, -C[%u];\n", reg3
);
828 /* Sample the texture using the calculated coordinates */
829 sprintf(dst_str
, "T%u", reg
);
830 shader_hw_sample(arg
, reg
, dst_str
, "TMP");
831 current_state
->current_row
= 0;
834 /** Handles transforming all WINED3DSIO_M?x? opcodes for
835 Vertex shaders to ARB_vertex_program codes */
836 void vshader_hw_mnxn(SHADER_OPCODE_ARG
* arg
) {
840 SHADER_OPCODE_ARG tmpArg
;
842 memset(&tmpArg
, 0, sizeof(SHADER_OPCODE_ARG
));
844 /* Set constants for the temporary argument */
845 tmpArg
.shader
= arg
->shader
;
846 tmpArg
.buffer
= arg
->buffer
;
847 tmpArg
.src
[0] = arg
->src
[0];
848 tmpArg
.src_addr
[0] = arg
->src_addr
[0];
849 tmpArg
.src_addr
[1] = arg
->src_addr
[1];
850 tmpArg
.reg_maps
= arg
->reg_maps
;
852 switch(arg
->opcode
->opcode
) {
853 case WINED3DSIO_M4x4
:
855 tmpArg
.opcode
= shader_get_opcode(arg
->shader
, WINED3DSIO_DP4
);
857 case WINED3DSIO_M4x3
:
859 tmpArg
.opcode
= shader_get_opcode(arg
->shader
, WINED3DSIO_DP4
);
861 case WINED3DSIO_M3x4
:
863 tmpArg
.opcode
= shader_get_opcode(arg
->shader
, WINED3DSIO_DP3
);
865 case WINED3DSIO_M3x3
:
867 tmpArg
.opcode
= shader_get_opcode(arg
->shader
, WINED3DSIO_DP3
);
869 case WINED3DSIO_M3x2
:
871 tmpArg
.opcode
= shader_get_opcode(arg
->shader
, WINED3DSIO_DP3
);
877 for (i
= 0; i
< nComponents
; i
++) {
878 tmpArg
.dst
= ((arg
->dst
) & ~WINED3DSP_WRITEMASK_ALL
)|(WINED3DSP_WRITEMASK_0
<<i
);
879 tmpArg
.src
[1] = arg
->src
[1]+i
;
880 vshader_hw_map2gl(&tmpArg
);
884 void vshader_hw_rsq_rcp(SHADER_OPCODE_ARG
* arg
) {
885 CONST SHADER_OPCODE
* curOpcode
= arg
->opcode
;
886 SHADER_BUFFER
* buffer
= arg
->buffer
;
887 DWORD dst
= arg
->dst
;
888 DWORD src
= arg
->src
[0];
889 DWORD swizzle
= (src
& WINED3DSP_SWIZZLE_MASK
) >> WINED3DSP_SWIZZLE_SHIFT
;
893 strcpy(tmpLine
, curOpcode
->glname
); /* Opcode */
894 vshader_program_add_param(arg
, dst
, FALSE
, tmpLine
); /* Destination */
895 strcat(tmpLine
, ",");
896 vshader_program_add_param(arg
, src
, TRUE
, tmpLine
);
897 if ((WINED3DSP_NOSWIZZLE
>> WINED3DSP_SWIZZLE_SHIFT
) == swizzle
) {
898 /* Dx sdk says .x is used if no swizzle is given */
899 strcat(tmpLine
, ".x");
902 shader_addline(buffer
, "%s;\n", tmpLine
);
905 /* TODO: merge with pixel shader */
906 /* Map the opcode 1-to-1 to the GL code */
907 void vshader_hw_map2gl(SHADER_OPCODE_ARG
* arg
) {
909 CONST SHADER_OPCODE
* curOpcode
= arg
->opcode
;
910 SHADER_BUFFER
* buffer
= arg
->buffer
;
911 DWORD dst
= arg
->dst
;
912 DWORD
* src
= arg
->src
;
914 DWORD dst_regtype
= shader_get_regtype(dst
);
918 if ((curOpcode
->opcode
== WINED3DSIO_MOV
&& dst_regtype
== WINED3DSPR_ADDR
) || curOpcode
->opcode
== WINED3DSIO_MOVA
)
919 strcpy(tmpLine
, "ARL");
921 strcpy(tmpLine
, curOpcode
->glname
);
923 if (curOpcode
->num_params
> 0) {
924 vshader_program_add_param(arg
, dst
, FALSE
, tmpLine
);
925 for (i
= 1; i
< curOpcode
->num_params
; ++i
) {
926 strcat(tmpLine
, ",");
927 vshader_program_add_param(arg
, src
[i
-1], TRUE
, tmpLine
);
930 shader_addline(buffer
, "%s;\n", tmpLine
);
933 static GLuint
create_arb_blt_vertex_program(WineD3D_GL_Info
*gl_info
) {
934 GLuint program_id
= 0;
935 const char *blt_vprogram
=
937 "PARAM c[1] = { { 1, 0.5 } };\n"
938 "MOV result.position, vertex.position;\n"
939 "MOV result.color, c[0].x;\n"
940 "MAD result.texcoord[0].y, -vertex.position, c[0], c[0];\n"
941 "MAD result.texcoord[0].x, vertex.position, c[0].y, c[0].y;\n"
944 GL_EXTCALL(glGenProgramsARB(1, &program_id
));
945 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB
, program_id
));
946 GL_EXTCALL(glProgramStringARB(GL_VERTEX_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(blt_vprogram
), blt_vprogram
));
948 if (glGetError() == GL_INVALID_OPERATION
) {
950 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &pos
);
951 FIXME("Vertex program error at position %d: %s\n", pos
,
952 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
958 static GLuint
create_arb_blt_fragment_program(WineD3D_GL_Info
*gl_info
) {
959 GLuint program_id
= 0;
960 const char *blt_fprogram
=
963 "TEX R0.x, fragment.texcoord[0], texture[0], 2D;\n"
964 "MOV result.depth.z, R0.x;\n"
967 GL_EXTCALL(glGenProgramsARB(1, &program_id
));
968 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, program_id
));
969 GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB
, GL_PROGRAM_FORMAT_ASCII_ARB
, strlen(blt_fprogram
), blt_fprogram
));
971 if (glGetError() == GL_INVALID_OPERATION
) {
973 glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB
, &pos
);
974 FIXME("Fragment program error at position %d: %s\n", pos
,
975 debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB
)));
981 static void shader_arb_select(IWineD3DDevice
*iface
, BOOL usePS
, BOOL useVS
) {
982 IWineD3DDeviceImpl
*This
= (IWineD3DDeviceImpl
*)iface
;
983 WineD3D_GL_Info
*gl_info
= &((IWineD3DImpl
*)(This
->wineD3D
))->gl_info
;
986 TRACE("Using vertex shader\n");
988 /* Bind the vertex program */
989 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB
,
990 ((IWineD3DVertexShaderImpl
*)This
->stateBlock
->vertexShader
)->baseShader
.prgId
));
991 checkGLcall("glBindProgramARB(GL_VERTEX_PROGRAM_ARB, vertexShader->prgId);");
993 /* Enable OpenGL vertex programs */
994 glEnable(GL_VERTEX_PROGRAM_ARB
);
995 checkGLcall("glEnable(GL_VERTEX_PROGRAM_ARB);");
996 TRACE("(%p) : Bound vertex program %u and enabled GL_VERTEX_PROGRAM_ARB\n",
997 This
, ((IWineD3DVertexShaderImpl
*)This
->stateBlock
->vertexShader
)->baseShader
.prgId
);
998 } else if(GL_SUPPORT(ARB_VERTEX_PROGRAM
)) {
999 glDisable(GL_VERTEX_PROGRAM_ARB
);
1000 checkGLcall("glDisable(GL_VERTEX_PROGRAM_ARB)");
1004 TRACE("Using pixel shader\n");
1006 /* Bind the fragment program */
1007 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
,
1008 ((IWineD3DPixelShaderImpl
*)This
->stateBlock
->pixelShader
)->baseShader
.prgId
));
1009 checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, pixelShader->prgId);");
1011 /* Enable OpenGL fragment programs */
1012 glEnable(GL_FRAGMENT_PROGRAM_ARB
);
1013 checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB);");
1014 TRACE("(%p) : Bound fragment program %u and enabled GL_FRAGMENT_PROGRAM_ARB\n",
1015 This
, ((IWineD3DPixelShaderImpl
*)This
->stateBlock
->pixelShader
)->baseShader
.prgId
);
1016 } else if(GL_SUPPORT(ARB_FRAGMENT_PROGRAM
)) {
1017 glDisable(GL_FRAGMENT_PROGRAM_ARB
);
1018 checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
1022 static void shader_arb_select_depth_blt(IWineD3DDevice
*iface
) {
1023 IWineD3DDeviceImpl
*This
= (IWineD3DDeviceImpl
*)iface
;
1024 WineD3D_GL_Info
*gl_info
= &((IWineD3DImpl
*)(This
->wineD3D
))->gl_info
;
1025 static GLuint vprogram_id
= 0;
1026 static GLuint fprogram_id
= 0;
1028 if (!vprogram_id
) vprogram_id
= create_arb_blt_vertex_program(gl_info
);
1029 GL_EXTCALL(glBindProgramARB(GL_VERTEX_PROGRAM_ARB
, vprogram_id
));
1030 glEnable(GL_VERTEX_PROGRAM_ARB
);
1032 if (!fprogram_id
) fprogram_id
= create_arb_blt_fragment_program(gl_info
);
1033 GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB
, fprogram_id
));
1034 glEnable(GL_FRAGMENT_PROGRAM_ARB
);
1037 static void shader_arb_cleanup(IWineD3DDevice
*iface
) {
1038 IWineD3DDeviceImpl
*This
= (IWineD3DDeviceImpl
*)iface
;
1039 WineD3D_GL_Info
*gl_info
= &((IWineD3DImpl
*)(This
->wineD3D
))->gl_info
;
1040 if (GL_SUPPORT(ARB_VERTEX_PROGRAM
)) glDisable(GL_VERTEX_PROGRAM_ARB
);
1041 if (GL_SUPPORT(ARB_FRAGMENT_PROGRAM
)) glDisable(GL_FRAGMENT_PROGRAM_ARB
);
1044 const shader_backend_t arb_program_shader_backend
= {
1046 &shader_arb_select_depth_blt
,
1047 &shader_arb_load_constants
,