2 * Copyright 2010 Chrsitoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "nv50_program.h"
25 #include "nv50_context.h"
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
32 static INLINE
unsigned
33 bitcount4(const uint32_t val
)
35 static const unsigned cnt
[16]
36 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
37 return cnt
[val
& 0xf];
41 nv50_tgsi_src_mask(const struct tgsi_full_instruction
*inst
, int c
)
43 unsigned mask
= inst
->Dst
[0].Register
.WriteMask
;
45 switch (inst
->Instruction
.Opcode
) {
48 return (mask
& 0x8) | ((mask
& 0x7) ? 0x1 : 0x0);
53 case TGSI_OPCODE_KIL
: /* WriteMask ignored */
56 return mask
& (c
? 0xa : 0x6);
75 const struct tgsi_instruction_texture
*tex
;
77 assert(inst
->Instruction
.Texture
);
81 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TEX
&&
82 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXD
)
83 mask
|= 0x8; /* bias, lod or proj */
85 switch (tex
->Texture
) {
89 case TGSI_TEXTURE_SHADOW1D
:
100 case TGSI_OPCODE_XPD
:
103 if (mask
& 1) x
|= 0x6;
104 if (mask
& 2) x
|= 0x5;
105 if (mask
& 4) x
|= 0x3;
116 nv50_indirect_inputs(struct nv50_translation_info
*ti
, int id
)
120 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
121 for (c
= 0; c
< 4; ++c
)
122 ti
->input_access
[i
][c
] = id
;
124 ti
->indirect_inputs
= TRUE
;
128 nv50_indirect_outputs(struct nv50_translation_info
*ti
, int id
)
132 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
133 for (c
= 0; c
< 4; ++c
)
134 ti
->output_access
[i
][c
] = id
;
136 ti
->indirect_outputs
= TRUE
;
140 prog_inst(struct nv50_translation_info
*ti
,
141 const struct tgsi_full_instruction
*inst
, int id
)
143 const struct tgsi_dst_register
*dst
;
144 const struct tgsi_src_register
*src
;
148 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_BGNSUB
) {
149 ti
->subr
[ti
->subr_nr
].pos
= id
- 1;
150 ti
->subr
[ti
->subr_nr
].id
= ti
->subr_nr
+ 1; /* id 0 is main program */
154 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
155 dst
= &inst
->Dst
[0].Register
;
157 for (c
= 0; c
< 4; ++c
) {
159 nv50_indirect_outputs(ti
, id
);
160 if (!(dst
->WriteMask
& (1 << c
)))
162 ti
->output_access
[dst
->Index
][c
] = id
;
165 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
166 inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
167 dst
->Index
== ti
->edgeflag_out
)
168 ti
->p
->vp
.edgeflag
= inst
->Src
[0].Register
.Index
;
170 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
171 if (inst
->Dst
[0].Register
.Indirect
)
172 ti
->store_to_memory
= TRUE
;
175 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
176 src
= &inst
->Src
[s
].Register
;
177 if (src
->File
== TGSI_FILE_TEMPORARY
)
178 if (inst
->Src
[s
].Register
.Indirect
)
179 ti
->store_to_memory
= TRUE
;
180 if (src
->File
!= TGSI_FILE_INPUT
)
182 mask
= nv50_tgsi_src_mask(inst
, s
);
184 if (inst
->Src
[s
].Register
.Indirect
)
185 nv50_indirect_inputs(ti
, id
);
187 for (c
= 0; c
< 4; ++c
) {
188 if (!(mask
& (1 << c
)))
190 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
191 if (k
<= TGSI_SWIZZLE_W
)
192 ti
->input_access
[src
->Index
][k
] = id
;
197 /* Probably should introduce something like struct tgsi_function_declaration
198 * instead of trying to guess inputs/outputs.
201 prog_subroutine_inst(struct nv50_subroutine
*subr
,
202 const struct tgsi_full_instruction
*inst
)
204 const struct tgsi_dst_register
*dst
;
205 const struct tgsi_src_register
*src
;
209 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
210 src
= &inst
->Src
[s
].Register
;
211 if (src
->File
!= TGSI_FILE_TEMPORARY
)
213 mask
= nv50_tgsi_src_mask(inst
, s
);
215 assert(!inst
->Src
[s
].Register
.Indirect
);
217 for (c
= 0; c
< 4; ++c
) {
218 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
220 if ((mask
& (1 << c
)) && k
< TGSI_SWIZZLE_W
)
221 if (!(subr
->retv
[src
->Index
/ 32][k
] & (1 << (src
->Index
% 32))))
222 subr
->argv
[src
->Index
/ 32][k
] |= 1 << (src
->Index
% 32);
226 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
227 dst
= &inst
->Dst
[0].Register
;
229 for (c
= 0; c
< 4; ++c
)
230 if (dst
->WriteMask
& (1 << c
))
231 subr
->retv
[dst
->Index
/ 32][c
] |= 1 << (dst
->Index
% 32);
236 prog_immediate(struct nv50_translation_info
*ti
,
237 const struct tgsi_full_immediate
*imm
)
240 unsigned n
= ti
->immd32_nr
++;
242 assert(ti
->immd32_nr
<= ti
->scan
.immediate_count
);
244 for (c
= 0; c
< 4; ++c
)
245 ti
->immd32
[n
* 4 + c
] = imm
->u
[c
].Uint
;
247 ti
->immd32_ty
[n
] = imm
->Immediate
.DataType
;
250 static INLINE
unsigned
251 translate_interpolate(const struct tgsi_full_declaration
*decl
)
255 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_CONSTANT
)
256 mode
= NV50_INTERP_FLAT
;
258 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
261 mode
= NV50_INTERP_LINEAR
;
263 if (decl
->Declaration
.Centroid
)
264 mode
|= NV50_INTERP_CENTROID
;
270 prog_decl(struct nv50_translation_info
*ti
,
271 const struct tgsi_full_declaration
*decl
)
273 unsigned i
, first
, last
, sn
= 0, si
= 0;
275 first
= decl
->Range
.First
;
276 last
= decl
->Range
.Last
;
278 if (decl
->Declaration
.Semantic
) {
279 sn
= decl
->Semantic
.Name
;
280 si
= decl
->Semantic
.Index
;
283 switch (decl
->Declaration
.File
) {
284 case TGSI_FILE_INPUT
:
285 for (i
= first
; i
<= last
; ++i
)
286 ti
->interp_mode
[i
] = translate_interpolate(decl
);
288 if (!decl
->Declaration
.Semantic
)
291 for (i
= first
; i
<= last
; ++i
) {
292 ti
->p
->in
[i
].sn
= sn
;
293 ti
->p
->in
[i
].si
= si
;
297 case TGSI_SEMANTIC_FACE
:
299 case TGSI_SEMANTIC_COLOR
:
300 if (ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
301 ti
->p
->vp
.bfc
[si
] = first
;
305 case TGSI_FILE_OUTPUT
:
306 if (!decl
->Declaration
.Semantic
)
309 for (i
= first
; i
<= last
; ++i
) {
310 ti
->p
->out
[i
].sn
= sn
;
311 ti
->p
->out
[i
].si
= si
;
315 case TGSI_SEMANTIC_BCOLOR
:
316 ti
->p
->vp
.bfc
[si
] = first
;
318 case TGSI_SEMANTIC_PSIZE
:
319 ti
->p
->vp
.psiz
= first
;
321 case TGSI_SEMANTIC_EDGEFLAG
:
322 ti
->edgeflag_out
= first
;
328 case TGSI_FILE_SYSTEM_VALUE
:
329 /* For VP/GP inputs, they are put in s[] after the last normal input.
330 * Let sysval_map reflect the order of the sysvals in s[] and fixup later.
332 switch (decl
->Semantic
.Name
) {
333 case TGSI_SEMANTIC_FACE
:
335 case TGSI_SEMANTIC_INSTANCEID
:
336 ti
->p
->vp
.attrs
[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID
;
337 ti
->sysval_map
[first
] = 2;
339 case TGSI_SEMANTIC_PRIMID
:
342 case TGSI_SEMANTIC_PRIMIDIN:
344 case TGSI_SEMANTIC_VERTEXID:
351 case TGSI_FILE_CONSTANT
:
352 ti
->p
->parm_size
= MAX2(ti
->p
->parm_size
, (last
+ 1) * 16);
354 case TGSI_FILE_ADDRESS
:
355 case TGSI_FILE_SAMPLER
:
356 case TGSI_FILE_TEMPORARY
:
365 nv50_vertprog_prepare(struct nv50_translation_info
*ti
)
367 struct nv50_program
*p
= ti
->p
;
369 unsigned num_inputs
= 0;
371 ti
->input_file
= NV_FILE_MEM_S
;
372 ti
->output_file
= NV_FILE_OUT
;
374 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
376 p
->in
[i
].hw
= num_inputs
;
378 for (c
= 0; c
< 4; ++c
) {
379 if (!ti
->input_access
[i
][c
])
381 ti
->input_map
[i
][c
] = num_inputs
++;
382 p
->vp
.attrs
[(4 * i
+ c
) / 32] |= 1 << ((i
* 4 + c
) % 32);
386 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
388 p
->out
[i
].hw
= p
->max_out
;
390 for (c
= 0; c
< 4; ++c
) {
391 if (!ti
->output_access
[i
][c
])
393 ti
->output_map
[i
][c
] = p
->max_out
++;
394 p
->out
[i
].mask
|= 1 << c
;
398 p
->vp
.clpd
= p
->max_out
;
399 p
->max_out
+= p
->vp
.clpd_nr
;
401 for (i
= 0; i
< TGSI_SEMANTIC_COUNT
; ++i
) {
402 switch (ti
->sysval_map
[i
]) {
404 if (!(ti
->p
->vp
.attrs
[2] & NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID
))
405 ti
->sysval_map
[i
] = 1;
406 ti
->sysval_map
[i
] = (ti
->sysval_map
[i
] - 1) + num_inputs
;
413 if (p
->vp
.psiz
< 0x40)
414 p
->vp
.psiz
= p
->out
[p
->vp
.psiz
].hw
;
420 nv50_fragprog_prepare(struct nv50_translation_info
*ti
)
422 struct nv50_program
*p
= ti
->p
;
424 unsigned nvary
, nintp
, depr
;
425 unsigned n
= 0, m
= 0, skip
= 0;
426 ubyte sn
[16], si
[16];
430 if (ti
->scan
.writes_z
) {
431 p
->fp
.flags
[1] = 0x11;
432 p
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z
;
435 if (ti
->scan
.uses_kill
)
436 p
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_USES_KIL
;
440 ti
->input_file
= NV_FILE_MEM_V
;
441 ti
->output_file
= NV_FILE_GPR
;
443 /* count non-flat inputs, save semantic info */
444 for (i
= 0; i
< p
->in_nr
; ++i
) {
445 m
+= (ti
->interp_mode
[i
] & NV50_INTERP_FLAT
) ? 0 : 1;
450 /* reorder p->in[] so that non-flat inputs are first and
451 * kick out special inputs that don't use VP/GP_RESULT_MAP
454 for (i
= 0; i
< p
->in_nr
; ++i
) {
455 if (sn
[i
] == TGSI_SEMANTIC_POSITION
) {
456 for (c
= 0; c
< 4; ++c
) {
457 ti
->input_map
[i
][c
] = nintp
;
458 if (ti
->input_access
[i
][c
]) {
459 p
->fp
.interp
|= 1 << (24 + c
);
466 if (sn
[i
] == TGSI_SEMANTIC_FACE
) {
467 ti
->input_map
[i
][0] = 255;
472 j
= (ti
->interp_mode
[i
] & NV50_INTERP_FLAT
) ? m
++ : n
++;
474 if (sn
[i
] == TGSI_SEMANTIC_COLOR
)
475 p
->vp
.bfc
[si
[i
]] = j
;
477 p
->in
[j
].linear
= (ti
->interp_mode
[i
] & NV50_INTERP_LINEAR
) ? 1 : 0;
485 if (!(p
->fp
.interp
& (8 << 24))) {
486 p
->fp
.interp
|= (8 << 24);
490 p
->fp
.colors
= 4 << NV50_3D_MAP_SEMANTIC_0_FFC0_ID__SHIFT
; /* after HPOS */
492 for (i
= 0; i
< p
->in_nr
; ++i
) {
496 for (c
= 0; c
< 4; ++c
) {
497 if (!ti
->input_access
[j
][c
])
499 p
->in
[i
].mask
|= 1 << c
;
500 ti
->input_map
[j
][c
] = nintp
++;
502 /* count color inputs */
503 if (i
== p
->vp
.bfc
[0] || i
== p
->vp
.bfc
[1])
504 p
->fp
.colors
+= bitcount4(p
->in
[i
].mask
) << 16;
506 nintp
-= bitcount4(p
->fp
.interp
>> 24); /* subtract position inputs */
509 nvary
-= p
->in
[n
].hw
;
511 p
->fp
.interp
|= nvary
<< NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT
;
512 p
->fp
.interp
|= nintp
<< NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT
;
516 if (p
->out_nr
> (1 + (ti
->scan
.writes_z
? 1 : 0)))
517 p
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS
;
520 for (i
= 0; i
< p
->out_nr
; ++i
) {
522 if (p
->out
[i
].sn
== TGSI_SEMANTIC_POSITION
) {
526 p
->out
[i
].hw
= p
->max_out
;
527 p
->out
[i
].mask
= 0xf;
529 for (c
= 0; c
< 4; ++c
)
530 ti
->output_map
[i
][c
] = p
->max_out
++;
532 if (depr
< p
->out_nr
) {
533 p
->out
[depr
].mask
= 0x4;
534 p
->out
[depr
].hw
= ti
->output_map
[depr
][2] = p
->max_out
++;
536 /* allowed values are 1, 4, 5, 8, 9, ... */
537 p
->max_out
= MAX2(4, p
->max_out
);
544 nv50_geomprog_prepare(struct nv50_translation_info
*ti
)
546 ti
->input_file
= NV_FILE_MEM_S
;
547 ti
->output_file
= NV_FILE_OUT
;
554 nv50_prog_scan(struct nv50_translation_info
*ti
)
556 struct nv50_program
*p
= ti
->p
;
557 struct tgsi_parse_context parse
;
560 p
->vp
.edgeflag
= 0x40;
566 tgsi_scan_shader(p
->pipe
.tokens
, &ti
->scan
);
568 #if NV50_DEBUG & NV50_DEBUG_SHADER
569 tgsi_dump(p
->pipe
.tokens
, 0);
573 CALLOC(ti
->scan
.opcode_count
[TGSI_OPCODE_BGNSUB
], sizeof(ti
->subr
[0]));
575 ti
->immd32
= (uint32_t *)MALLOC(ti
->scan
.immediate_count
* 16);
576 ti
->immd32_ty
= (ubyte
*)MALLOC(ti
->scan
.immediate_count
* sizeof(ubyte
));
578 ti
->insns
= MALLOC(ti
->scan
.num_instructions
* sizeof(ti
->insns
[0]));
580 tgsi_parse_init(&parse
, p
->pipe
.tokens
);
581 while (!tgsi_parse_end_of_tokens(&parse
)) {
582 tgsi_parse_token(&parse
);
584 switch (parse
.FullToken
.Token
.Type
) {
585 case TGSI_TOKEN_TYPE_IMMEDIATE
:
586 prog_immediate(ti
, &parse
.FullToken
.FullImmediate
);
588 case TGSI_TOKEN_TYPE_DECLARATION
:
589 prog_decl(ti
, &parse
.FullToken
.FullDeclaration
);
591 case TGSI_TOKEN_TYPE_INSTRUCTION
:
592 ti
->insns
[ti
->inst_nr
] = parse
.FullToken
.FullInstruction
;
593 prog_inst(ti
, &parse
.FullToken
.FullInstruction
, ++ti
->inst_nr
);
598 /* Scan to determine which registers are inputs/outputs of a subroutine. */
599 for (i
= 0; i
< ti
->subr_nr
; ++i
) {
600 int pc
= ti
->subr
[i
].id
;
601 while (ti
->insns
[pc
].Instruction
.Opcode
!= TGSI_OPCODE_ENDSUB
)
602 prog_subroutine_inst(&ti
->subr
[i
], &ti
->insns
[pc
++]);
605 p
->in_nr
= ti
->scan
.file_max
[TGSI_FILE_INPUT
] + 1;
606 p
->out_nr
= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
] + 1;
609 case PIPE_SHADER_VERTEX
:
610 ret
= nv50_vertprog_prepare(ti
);
612 case PIPE_SHADER_FRAGMENT
:
613 ret
= nv50_fragprog_prepare(ti
);
615 case PIPE_SHADER_GEOMETRY
:
616 ret
= nv50_geomprog_prepare(ti
);
619 assert(!"unsupported program type");
629 nv50_program_translate(struct nv50_program
*p
)
631 struct nv50_translation_info
*ti
;
634 ti
= CALLOC_STRUCT(nv50_translation_info
);
637 ti
->edgeflag_out
= PIPE_MAX_SHADER_OUTPUTS
;
639 ret
= nv50_prog_scan(ti
);
641 NOUVEAU_ERR("unsupported shader program\n");
645 ret
= nv50_generate_code(ti
);
647 NOUVEAU_ERR("error during shader translation\n");
661 return ret
? FALSE
: TRUE
;
665 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
668 nouveau_resource_free(&p
->res
);
676 p
->translated
= FALSE
;