2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "pipe/p_shader_tokens.h"
24 #include "pipe/p_defines.h"
26 #include "tgsi/tgsi_parse.h"
27 #include "tgsi/tgsi_util.h"
28 #include "tgsi/tgsi_dump.h"
30 #include "nvc0_context.h"
34 nvc0_tgsi_src_mask(const struct tgsi_full_instruction
*inst
, int c
)
36 unsigned mask
= inst
->Dst
[0].Register
.WriteMask
;
38 switch (inst
->Instruction
.Opcode
) {
41 return (mask
& 0x8) | ((mask
& 0x7) ? 0x1 : 0x0);
46 case TGSI_OPCODE_KIL
: /* WriteMask ignored */
49 return mask
& (c
? 0xa : 0x6);
68 const struct tgsi_instruction_texture
*tex
;
70 assert(inst
->Instruction
.Texture
);
74 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TEX
&&
75 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXD
)
76 mask
|= 0x8; /* bias, lod or proj */
78 switch (tex
->Texture
) {
82 case TGSI_TEXTURE_SHADOW1D
:
96 if (mask
& 1) x
|= 0x6;
97 if (mask
& 2) x
|= 0x5;
98 if (mask
& 4) x
|= 0x3;
109 nvc0_indirect_inputs(struct nvc0_translation_info
*ti
, int id
)
113 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
114 for (c
= 0; c
< 4; ++c
)
115 ti
->input_access
[i
][c
] = id
;
117 ti
->indirect_inputs
= TRUE
;
121 nvc0_indirect_outputs(struct nvc0_translation_info
*ti
, int id
)
125 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
126 for (c
= 0; c
< 4; ++c
)
127 ti
->output_access
[i
][c
] = id
;
129 ti
->indirect_outputs
= TRUE
;
132 static INLINE
unsigned
133 nvc0_system_value_location(unsigned sn
, unsigned si
, boolean
*is_input
)
135 /* NOTE: locations 0xfxx indicate special regs */
138 case TGSI_SEMANTIC_VERTEXID:
142 case TGSI_SEMANTIC_PRIMID
:
146 case TGSI_SEMANTIC_LAYER_INDEX:
148 case TGSI_SEMANTIC_VIEWPORT_INDEX:
151 case TGSI_SEMANTIC_INSTANCEID
:
154 case TGSI_SEMANTIC_FACE
:
158 case TGSI_SEMANTIC_INVOCATIONID:
167 static INLINE
unsigned
168 nvc0_varying_location(unsigned sn
, unsigned si
)
171 case TGSI_SEMANTIC_POSITION
:
173 case TGSI_SEMANTIC_COLOR
:
174 return 0x280 + (si
* 16); /* are these hard-wired ? */
175 case TGSI_SEMANTIC_BCOLOR
:
176 return 0x2a0 + (si
* 16);
177 case TGSI_SEMANTIC_FOG
:
179 case TGSI_SEMANTIC_PSIZE
:
182 case TGSI_SEMANTIC_PNTC:
185 case TGSI_SEMANTIC_GENERIC
:
186 /* We'd really like to distinguish between TEXCOORD and GENERIC here,
187 * since only 0x300 to 0x37c can be replaced by sprite coordinates.
188 * Also, gl_PointCoord should be a system value and must be assigned to
189 * address 0x2e0. For now, let's cheat:
193 return 0x300 + si
* 16;
196 return 0x80 + ((si
- 8) * 16);
197 case TGSI_SEMANTIC_NORMAL
:
199 case TGSI_SEMANTIC_PRIMID
:
201 case TGSI_SEMANTIC_FACE
:
203 case TGSI_SEMANTIC_EDGEFLAG
: /* doesn't exist, set value like for an sreg */
206 case TGSI_SEMANTIC_CLIP_DISTANCE:
207 return 0x2c0 + (si * 4);
215 static INLINE
unsigned
216 nvc0_interp_mode(const struct tgsi_full_declaration
*decl
)
220 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_CONSTANT
)
221 mode
= NVC0_INTERP_FLAT
;
223 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
224 mode
= NVC0_INTERP_PERSPECTIVE
;
226 if (decl
->Declaration
.Semantic
&& decl
->Semantic
.Name
== TGSI_SEMANTIC_COLOR
)
227 mode
= NVC0_INTERP_PERSPECTIVE
;
229 mode
= NVC0_INTERP_LINEAR
;
231 if (decl
->Declaration
.Centroid
)
232 mode
|= NVC0_INTERP_CENTROID
;
238 prog_immediate(struct nvc0_translation_info
*ti
,
239 const struct tgsi_full_immediate
*imm
)
242 unsigned n
= ti
->immd32_nr
++;
244 assert(ti
->immd32_nr
<= ti
->scan
.immediate_count
);
246 for (c
= 0; c
< 4; ++c
)
247 ti
->immd32
[n
* 4 + c
] = imm
->u
[c
].Uint
;
249 ti
->immd32_ty
[n
] = imm
->Immediate
.DataType
;
253 prog_decl(struct nvc0_translation_info
*ti
,
254 const struct tgsi_full_declaration
*decl
)
257 unsigned sn
= TGSI_SEMANTIC_GENERIC
;
259 const unsigned first
= decl
->Range
.First
;
260 const unsigned last
= decl
->Range
.Last
;
262 if (decl
->Declaration
.Semantic
) {
263 sn
= decl
->Semantic
.Name
;
264 si
= decl
->Semantic
.Index
;
267 switch (decl
->Declaration
.File
) {
268 case TGSI_FILE_INPUT
:
269 for (i
= first
; i
<= last
; ++i
) {
270 if (ti
->prog
->type
== PIPE_SHADER_VERTEX
) {
271 for (c
= 0; c
< 4; ++c
)
272 ti
->input_loc
[i
][c
] = 0x80 + i
* 16 + c
* 4;
274 for (c
= 0; c
< 4; ++c
)
275 ti
->input_loc
[i
][c
] = nvc0_varying_location(sn
, si
) + c
* 4;
276 /* for sprite coordinates: */
277 ti
->prog
->fp
.in_pos
[i
] = ti
->input_loc
[i
][0] / 4;
279 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
)
280 ti
->interp_mode
[i
] = nvc0_interp_mode(decl
);
283 case TGSI_FILE_OUTPUT
:
284 for (i
= first
; i
<= last
; ++i
, ++si
) {
285 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
) {
287 if (i
== ti
->fp_depth_output
) {
288 ti
->output_loc
[i
][2] = (ti
->scan
.num_outputs
- 1) * 4;
290 if (i
> ti
->fp_depth_output
)
292 for (c
= 0; c
< 4; ++c
)
293 ti
->output_loc
[i
][c
] = si
* 4 + c
;
296 if (sn
== TGSI_SEMANTIC_EDGEFLAG
)
297 ti
->edgeflag_out
= i
;
298 for (c
= 0; c
< 4; ++c
)
299 ti
->output_loc
[i
][c
] = nvc0_varying_location(sn
, si
) + c
* 4;
300 /* for TFB_VARYING_LOCS: */
301 ti
->prog
->vp
.out_pos
[i
] = ti
->output_loc
[i
][0] / 4;
305 case TGSI_FILE_SYSTEM_VALUE
:
307 ti
->sysval_loc
[i
] = nvc0_system_value_location(sn
, si
, &ti
->sysval_in
[i
]);
308 assert(first
== last
);
310 case TGSI_FILE_TEMPORARY
:
311 ti
->temp128_nr
= MAX2(ti
->temp128_nr
, last
+ 1);
314 case TGSI_FILE_CONSTANT
:
315 case TGSI_FILE_SAMPLER
:
316 case TGSI_FILE_ADDRESS
:
317 case TGSI_FILE_IMMEDIATE
:
318 case TGSI_FILE_PREDICATE
:
321 NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl
->Declaration
.File
);
328 prog_inst(struct nvc0_translation_info
*ti
,
329 const struct tgsi_full_instruction
*inst
, int id
)
331 const struct tgsi_dst_register
*dst
;
332 const struct tgsi_src_register
*src
;
336 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_BGNSUB
) {
337 ti
->subr
[ti
->num_subrs
].first_insn
= id
- 1;
338 ti
->subr
[ti
->num_subrs
].id
= ti
->num_subrs
+ 1; /* id 0 is main program */
342 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
343 dst
= &inst
->Dst
[0].Register
;
345 for (c
= 0; c
< 4; ++c
) {
347 nvc0_indirect_outputs(ti
, id
);
348 if (!(dst
->WriteMask
& (1 << c
)))
350 ti
->output_access
[dst
->Index
][c
] = id
;
353 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
354 inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
355 dst
->Index
== ti
->edgeflag_out
)
356 ti
->prog
->vp
.edgeflag
= inst
->Src
[0].Register
.Index
;
358 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
359 if (inst
->Dst
[0].Register
.Indirect
)
360 ti
->require_stores
= TRUE
;
363 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
364 src
= &inst
->Src
[s
].Register
;
365 if (src
->File
== TGSI_FILE_TEMPORARY
)
366 if (inst
->Src
[s
].Register
.Indirect
)
367 ti
->require_stores
= TRUE
;
368 if (src
->File
!= TGSI_FILE_INPUT
)
370 mask
= nvc0_tgsi_src_mask(inst
, s
);
372 if (inst
->Src
[s
].Register
.Indirect
)
373 nvc0_indirect_inputs(ti
, id
);
375 for (c
= 0; c
< 4; ++c
) {
376 if (!(mask
& (1 << c
)))
378 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
379 if (k
<= TGSI_SWIZZLE_W
)
380 ti
->input_access
[src
->Index
][k
] = id
;
385 /* Probably should introduce something like struct tgsi_function_declaration
386 * instead of trying to guess inputs/outputs.
389 prog_subroutine_inst(struct nvc0_subroutine
*subr
,
390 const struct tgsi_full_instruction
*inst
)
392 const struct tgsi_dst_register
*dst
;
393 const struct tgsi_src_register
*src
;
397 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
398 src
= &inst
->Src
[s
].Register
;
399 if (src
->File
!= TGSI_FILE_TEMPORARY
)
401 mask
= nvc0_tgsi_src_mask(inst
, s
);
403 for (c
= 0; c
< 4; ++c
) {
404 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
406 if ((mask
& (1 << c
)) && k
< TGSI_SWIZZLE_W
)
407 if (!(subr
->retv
[src
->Index
/ 32][k
] & (1 << (src
->Index
% 32))))
408 subr
->argv
[src
->Index
/ 32][k
] |= 1 << (src
->Index
% 32);
412 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
413 dst
= &inst
->Dst
[0].Register
;
415 for (c
= 0; c
< 4; ++c
)
416 if (dst
->WriteMask
& (1 << c
))
417 subr
->retv
[dst
->Index
/ 32][c
] |= 1 << (dst
->Index
% 32);
422 nvc0_vp_gp_gen_header(struct nvc0_program
*vp
, struct nvc0_translation_info
*ti
)
427 for (a
= 0x80/4, i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
428 for (c
= 0; c
< 4; ++c
, ++a
)
429 if (ti
->input_access
[i
][c
])
430 vp
->hdr
[5 + a
/ 32] |= 1 << (a
% 32); /* VP_ATTR_EN */
433 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
434 a
= (ti
->output_loc
[i
][0] - 0x40) / 4;
435 if (ti
->output_loc
[i
][0] >= 0xf00)
437 for (c
= 0; c
< 4; ++c
, ++a
) {
438 if (!ti
->output_access
[i
][c
])
440 vp
->hdr
[13 + a
/ 32] |= 1 << (a
% 32); /* VP_EXPORT_EN */
444 for (i
= 0; i
< TGSI_SEMANTIC_COUNT
; ++i
) {
445 a
= ti
->sysval_loc
[i
] / 4;
446 if (a
> 0 && a
< (0xf00 / 4))
447 vp
->hdr
[(ti
->sysval_in
[i
] ? 5 : 13) + a
/ 32] |= 1 << (a
% 32);
454 nvc0_vp_gen_header(struct nvc0_program
*vp
, struct nvc0_translation_info
*ti
)
456 vp
->hdr
[0] = 0x20461;
457 vp
->hdr
[4] = 0xff000;
459 vp
->hdr
[18] = (1 << vp
->vp
.num_ucps
) - 1;
461 return nvc0_vp_gp_gen_header(vp
, ti
);
464 #ifdef USE_UNUSED_CODE
466 nvc0_gp_gen_header(struct nvc0_program
*gp
, struct nvc0_translation_info
*ti
)
468 unsigned invocations
= 1;
469 unsigned max_output_verts
, output_prim
;
472 gp
->hdr
[0] = 0x21061;
474 for (i
= 0; i
< ti
->scan
.num_properties
; ++i
) {
475 switch (ti
->scan
.properties
[i
].name
) {
476 case TGSI_PROPERTY_GS_OUTPUT_PRIM
:
477 output_prim
= ti
->scan
.properties
[i
].data
[0];
479 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
:
480 max_output_verts
= ti
->scan
.properties
[i
].data
[0];
481 assert(max_output_verts
< 512);
484 case TGSI_PROPERTY_GS_INVOCATIONS:
485 invocations = ti->scan.properties[i].data[0];
486 assert(invocations <= 32);
494 gp
->hdr
[2] = MIN2(invocations
, 32) << 24;
496 switch (output_prim
) {
497 case PIPE_PRIM_POINTS
:
498 gp
->hdr
[3] = 0x01000000;
499 gp
->hdr
[0] |= 0xf0000000;
501 case PIPE_PRIM_LINE_STRIP
:
502 gp
->hdr
[3] = 0x06000000;
503 gp
->hdr
[0] |= 0x10000000;
505 case PIPE_PRIM_TRIANGLE_STRIP
:
506 gp
->hdr
[3] = 0x07000000;
507 gp
->hdr
[0] |= 0x10000000;
514 gp
->hdr
[4] = max_output_verts
& 0x1ff;
516 return nvc0_vp_gp_gen_header(gp
, ti
);
521 nvc0_fp_gen_header(struct nvc0_program
*fp
, struct nvc0_translation_info
*ti
)
526 fp
->hdr
[0] = 0x21462;
527 fp
->hdr
[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
529 if (ti
->scan
.uses_kill
)
530 fp
->hdr
[0] |= 0x8000;
531 if (ti
->scan
.writes_z
) {
533 if (ti
->scan
.num_outputs
> 2)
534 fp
->hdr
[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
536 if (ti
->scan
.num_outputs
> 1)
537 fp
->hdr
[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
540 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
541 m
= ti
->interp_mode
[i
] & 3;
542 for (c
= 0; c
< 4; ++c
) {
543 if (!ti
->input_access
[i
][c
])
545 a
= ti
->input_loc
[i
][c
] / 2;
546 if (ti
->input_loc
[i
][c
] >= 0x2c0)
548 if (ti
->input_loc
[i
][0] == 0x70)
549 fp
->hdr
[5] |= 1 << (28 + c
); /* FRAG_COORD_UMASK */
551 if (ti
->input_loc
[i
][0] == 0x2e0)
552 fp
->hdr
[14] |= 1 << (24 + c
); /* POINT_COORD */
554 fp
->hdr
[4 + a
/ 32] |= m
<< (a
% 32);
558 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
559 if (i
!= ti
->fp_depth_output
)
560 fp
->hdr
[18] |= 0xf << ti
->output_loc
[i
][0];
563 for (i
= 0; i
< TGSI_SEMANTIC_COUNT
; ++i
) {
564 a
= ti
->sysval_loc
[i
] / 2;
565 if ((a
> 0) && (a
< 0xf00 / 2))
566 fp
->hdr
[4 + a
/ 32] |= NVC0_INTERP_FLAT
<< (a
% 32);
573 nvc0_prog_scan(struct nvc0_translation_info
*ti
)
575 struct nvc0_program
*prog
= ti
->prog
;
576 struct tgsi_parse_context parse
;
580 #if NV50_DEBUG & NV50_DEBUG_SHADER
581 tgsi_dump(prog
->pipe
.tokens
, 0);
584 tgsi_scan_shader(prog
->pipe
.tokens
, &ti
->scan
);
586 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
) {
587 ti
->fp_depth_output
= 255;
588 for (i
= 0; i
< ti
->scan
.num_outputs
; ++i
)
589 if (ti
->scan
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
)
590 ti
->fp_depth_output
= i
;
594 CALLOC(ti
->scan
.opcode_count
[TGSI_OPCODE_BGNSUB
], sizeof(ti
->subr
[0]));
596 ti
->immd32
= (uint32_t *)MALLOC(ti
->scan
.immediate_count
* 16);
597 ti
->immd32_ty
= (ubyte
*)MALLOC(ti
->scan
.immediate_count
* sizeof(ubyte
));
599 ti
->insns
= MALLOC(ti
->scan
.num_instructions
* sizeof(ti
->insns
[0]));
601 tgsi_parse_init(&parse
, prog
->pipe
.tokens
);
602 while (!tgsi_parse_end_of_tokens(&parse
)) {
603 tgsi_parse_token(&parse
);
605 switch (parse
.FullToken
.Token
.Type
) {
606 case TGSI_TOKEN_TYPE_IMMEDIATE
:
607 prog_immediate(ti
, &parse
.FullToken
.FullImmediate
);
609 case TGSI_TOKEN_TYPE_DECLARATION
:
610 prog_decl(ti
, &parse
.FullToken
.FullDeclaration
);
612 case TGSI_TOKEN_TYPE_INSTRUCTION
:
613 ti
->insns
[ti
->num_insns
] = parse
.FullToken
.FullInstruction
;
614 prog_inst(ti
, &parse
.FullToken
.FullInstruction
, ++ti
->num_insns
);
621 for (i
= 0; i
< ti
->num_subrs
; ++i
) {
622 unsigned pc
= ti
->subr
[i
].id
;
623 while (ti
->insns
[pc
].Instruction
.Opcode
!= TGSI_OPCODE_ENDSUB
)
624 prog_subroutine_inst(&ti
->subr
[i
], &ti
->insns
[pc
++]);
627 switch (prog
->type
) {
628 case PIPE_SHADER_VERTEX
:
629 ti
->input_file
= NV_FILE_MEM_A
;
630 ti
->output_file
= NV_FILE_MEM_V
;
631 ret
= nvc0_vp_gen_header(prog
, ti
);
634 case PIPE_SHADER_TESSELLATION_CONTROL:
635 ret = nvc0_tcp_gen_header(ti);
637 case PIPE_SHADER_TESSELLATION_EVALUATION:
638 ret = nvc0_tep_gen_header(ti);
640 case PIPE_SHADER_GEOMETRY:
641 ret = nvc0_gp_gen_header(ti);
644 case PIPE_SHADER_FRAGMENT
:
645 ti
->input_file
= NV_FILE_MEM_V
;
646 ti
->output_file
= NV_FILE_GPR
;
648 if (ti
->scan
.writes_z
)
649 prog
->flags
[0] = 0x11; /* ? */
651 if (!ti
->scan
.uses_kill
&& !ti
->global_stores
)
652 prog
->fp
.early_z
= 1;
654 ret
= nvc0_fp_gen_header(prog
, ti
);
657 assert(!"unsupported program type");
662 if (ti
->require_stores
) {
663 prog
->hdr
[0] |= 1 << 26;
664 prog
->hdr
[1] |= ti
->temp128_nr
* 16; /* l[] size */
672 nvc0_program_translate(struct nvc0_program
*prog
)
674 struct nvc0_translation_info
*ti
;
677 ti
= CALLOC_STRUCT(nvc0_translation_info
);
680 ti
->edgeflag_out
= PIPE_MAX_SHADER_OUTPUTS
;
682 prog
->vp
.edgeflag
= PIPE_MAX_ATTRIBS
;
684 if (prog
->type
== PIPE_SHADER_VERTEX
&& prog
->vp
.num_ucps
)
685 ti
->append_ucp
= TRUE
;
687 ret
= nvc0_prog_scan(ti
);
689 NOUVEAU_ERR("unsupported shader program\n");
693 ret
= nvc0_generate_code(ti
);
695 NOUVEAU_ERR("shader translation failed\n");
697 #if NV50_DEBUG & NV50_DEBUG_SHADER
699 for (i
= 0; i
< sizeof(prog
->hdr
) / sizeof(prog
->hdr
[0]); ++i
)
700 debug_printf("HDR[%02lx] = 0x%08x\n",
701 i
* sizeof(prog
->hdr
[0]), prog
->hdr
[i
]);
714 return ret
? FALSE
: TRUE
;
718 nvc0_program_destroy(struct nvc0_context
*nvc0
, struct nvc0_program
*prog
)
721 nouveau_resource_free(&prog
->res
);
728 memset(prog
->hdr
, 0, sizeof(prog
->hdr
));
730 prog
->translated
= FALSE
;