src/gallium/docs/source/tgsi.rst

   1 TGSI
   2 ====
   3
   4 TGSI, Tungsten Graphics Shader Infrastructure, is an intermediate language
   5 for describing shaders. Since Gallium is inherently shaderful, shaders are
   6 an important part of the API. TGSI is the only intermediate representation
   7 used by all drivers.
   8
   9 Basics
  10 ------
  11
  12 All TGSI instructions, known as *opcodes*, operate on arbitrary-precision
  13 floating-point four-component vectors. An opcode may have up to one
  14 destination register, known as *dst*, and between zero and three source
  15 registers, called *src0* through *src2*, or simply *src* if there is only
  16 one.
  17
  18 Some instructions, like :opcode:`I2F`, permit re-interpretation of vector
  19 components as integers. Other instructions permit using registers as
  20 two-component vectors with double precision; see :ref:`Double Opcodes`.
  21
  22 When an instruction has a scalar result, the result is usually copied into
  23 each of the components of *dst*. When this happens, the result is said to be
  24 *replicated* to *dst*. :opcode:`RCP` is one such instruction.
  25
  26 Instruction Set
  27 ---------------
  28
  29 From GL_NV_vertex_program
  30 ^^^^^^^^^^^^^^^^^^^^^^^^^
  31
  32
  33 .. opcode:: ARL - Address Register Load
  34
  35 .. math::
  36
  37   dst.x = \lfloor src.x\rfloor
  38
  39   dst.y = \lfloor src.y\rfloor
  40
  41   dst.z = \lfloor src.z\rfloor
  42
  43   dst.w = \lfloor src.w\rfloor
  44
  45
  46 .. opcode:: MOV - Move
  47
  48 .. math::
  49
  50   dst.x = src.x
  51
  52   dst.y = src.y
  53
  54   dst.z = src.z
  55
  56   dst.w = src.w
  57
  58
  59 .. opcode:: LIT - Light Coefficients
  60
  61 .. math::
  62
  63   dst.x = 1
  64
  65   dst.y = max(src.x, 0)
  66
  67   dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0
  68
  69   dst.w = 1
  70
  71
  72 .. opcode:: RCP - Reciprocal
  73
  74 This instruction replicates its result.
  75
  76 .. math::
  77
  78   dst = \frac{1}{src.x}
  79
  80
  81 .. opcode:: RSQ - Reciprocal Square Root
  82
  83 This instruction replicates its result.
  84
  85 .. math::
  86
  87   dst = \frac{1}{\sqrt{|src.x|}}
  88
  89
  90 .. opcode:: EXP - Approximate Exponential Base 2
  91
  92 .. math::
  93
  94   dst.x = 2^{\lfloor src.x\rfloor}
  95
  96   dst.y = src.x - \lfloor src.x\rfloor
  97
  98   dst.z = 2^{src.x}
  99
 100   dst.w = 1
 101
 102
 103 .. opcode:: LOG - Approximate Logarithm Base 2
 104
 105 .. math::
 106
 107   dst.x = \lfloor\log_2{|src.x|}\rfloor
 108
 109   dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
 110
 111   dst.z = \log_2{|src.x|}
 112
 113   dst.w = 1
 114
 115
 116 .. opcode:: MUL - Multiply
 117
 118 .. math::
 119
 120   dst.x = src0.x \times src1.x
 121
 122   dst.y = src0.y \times src1.y
 123
 124   dst.z = src0.z \times src1.z
 125
 126   dst.w = src0.w \times src1.w
 127
 128
 129 .. opcode:: ADD - Add
 130
 131 .. math::
 132
 133   dst.x = src0.x + src1.x
 134
 135   dst.y = src0.y + src1.y
 136
 137   dst.z = src0.z + src1.z
 138
 139   dst.w = src0.w + src1.w
 140
 141
 142 .. opcode:: DP3 - 3-component Dot Product
 143
 144 This instruction replicates its result.
 145
 146 .. math::
 147
 148   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
 149
 150
 151 .. opcode:: DP4 - 4-component Dot Product
 152
 153 This instruction replicates its result.
 154
 155 .. math::
 156
 157   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
 158
 159
 160 .. opcode:: DST - Distance Vector
 161
 162 .. math::
 163
 164   dst.x = 1
 165
 166   dst.y = src0.y \times src1.y
 167
 168   dst.z = src0.z
 169
 170   dst.w = src1.w
 171
 172
 173 .. opcode:: MIN - Minimum
 174
 175 .. math::
 176
 177   dst.x = min(src0.x, src1.x)
 178
 179   dst.y = min(src0.y, src1.y)
 180
 181   dst.z = min(src0.z, src1.z)
 182
 183   dst.w = min(src0.w, src1.w)
 184
 185
 186 .. opcode:: MAX - Maximum
 187
 188 .. math::
 189
 190   dst.x = max(src0.x, src1.x)
 191
 192   dst.y = max(src0.y, src1.y)
 193
 194   dst.z = max(src0.z, src1.z)
 195
 196   dst.w = max(src0.w, src1.w)
 197
 198
 199 .. opcode:: SLT - Set On Less Than
 200
 201 .. math::
 202
 203   dst.x = (src0.x < src1.x) ? 1 : 0
 204
 205   dst.y = (src0.y < src1.y) ? 1 : 0
 206
 207   dst.z = (src0.z < src1.z) ? 1 : 0
 208
 209   dst.w = (src0.w < src1.w) ? 1 : 0
 210
 211
 212 .. opcode:: SGE - Set On Greater Equal Than
 213
 214 .. math::
 215
 216   dst.x = (src0.x >= src1.x) ? 1 : 0
 217
 218   dst.y = (src0.y >= src1.y) ? 1 : 0
 219
 220   dst.z = (src0.z >= src1.z) ? 1 : 0
 221
 222   dst.w = (src0.w >= src1.w) ? 1 : 0
 223
 224
 225 .. opcode:: MAD - Multiply And Add
 226
 227 .. math::
 228
 229   dst.x = src0.x \times src1.x + src2.x
 230
 231   dst.y = src0.y \times src1.y + src2.y
 232
 233   dst.z = src0.z \times src1.z + src2.z
 234
 235   dst.w = src0.w \times src1.w + src2.w
 236
 237
 238 .. opcode:: SUB - Subtract
 239
 240 .. math::
 241
 242   dst.x = src0.x - src1.x
 243
 244   dst.y = src0.y - src1.y
 245
 246   dst.z = src0.z - src1.z
 247
 248   dst.w = src0.w - src1.w
 249
 250
 251 .. opcode:: LRP - Linear Interpolate
 252
 253 .. math::
 254
 255   dst.x = src0.x \times src1.x + (1 - src0.x) \times src2.x
 256
 257   dst.y = src0.y \times src1.y + (1 - src0.y) \times src2.y
 258
 259   dst.z = src0.z \times src1.z + (1 - src0.z) \times src2.z
 260
 261   dst.w = src0.w \times src1.w + (1 - src0.w) \times src2.w
 262
 263
 264 .. opcode:: CND - Condition
 265
 266 .. math::
 267
 268   dst.x = (src2.x > 0.5) ? src0.x : src1.x
 269
 270   dst.y = (src2.y > 0.5) ? src0.y : src1.y
 271
 272   dst.z = (src2.z > 0.5) ? src0.z : src1.z
 273
 274   dst.w = (src2.w > 0.5) ? src0.w : src1.w
 275
 276
 277 .. opcode:: DP2A - 2-component Dot Product And Add
 278
 279 .. math::
 280
 281   dst.x = src0.x \times src1.x + src0.y \times src1.y + src2.x
 282
 283   dst.y = src0.x \times src1.x + src0.y \times src1.y + src2.x
 284
 285   dst.z = src0.x \times src1.x + src0.y \times src1.y + src2.x
 286
 287   dst.w = src0.x \times src1.x + src0.y \times src1.y + src2.x
 288
 289
 290 .. opcode:: FRAC - Fraction
 291
 292 .. math::
 293
 294   dst.x = src.x - \lfloor src.x\rfloor
 295
 296   dst.y = src.y - \lfloor src.y\rfloor
 297
 298   dst.z = src.z - \lfloor src.z\rfloor
 299
 300   dst.w = src.w - \lfloor src.w\rfloor
 301
 302
 303 .. opcode:: CLAMP - Clamp
 304
 305 .. math::
 306
 307   dst.x = clamp(src0.x, src1.x, src2.x)
 308
 309   dst.y = clamp(src0.y, src1.y, src2.y)
 310
 311   dst.z = clamp(src0.z, src1.z, src2.z)
 312
 313   dst.w = clamp(src0.w, src1.w, src2.w)
 314
 315
 316 .. opcode:: FLR - Floor
 317
 318 This is identical to :opcode:`ARL`.
 319
 320 .. math::
 321
 322   dst.x = \lfloor src.x\rfloor
 323
 324   dst.y = \lfloor src.y\rfloor
 325
 326   dst.z = \lfloor src.z\rfloor
 327
 328   dst.w = \lfloor src.w\rfloor
 329
 330
 331 .. opcode:: ROUND - Round
 332
 333 .. math::
 334
 335   dst.x = round(src.x)
 336
 337   dst.y = round(src.y)
 338
 339   dst.z = round(src.z)
 340
 341   dst.w = round(src.w)
 342
 343
 344 .. opcode:: EX2 - Exponential Base 2
 345
 346 This instruction replicates its result.
 347
 348 .. math::
 349
 350   dst = 2^{src.x}
 351
 352
 353 .. opcode:: LG2 - Logarithm Base 2
 354
 355 This instruction replicates its result.
 356
 357 .. math::
 358
 359   dst = \log_2{src.x}
 360
 361
 362 .. opcode:: POW - Power
 363
 364 This instruction replicates its result.
 365
 366 .. math::
 367
 368   dst = src0.x^{src1.x}
 369
 370 .. opcode:: XPD - Cross Product
 371
 372 .. math::
 373
 374   dst.x = src0.y \times src1.z - src1.y \times src0.z
 375
 376   dst.y = src0.z \times src1.x - src1.z \times src0.x
 377
 378   dst.z = src0.x \times src1.y - src1.x \times src0.y
 379
 380   dst.w = 1
 381
 382
 383 .. opcode:: ABS - Absolute
 384
 385 .. math::
 386
 387   dst.x = |src.x|
 388
 389   dst.y = |src.y|
 390
 391   dst.z = |src.z|
 392
 393   dst.w = |src.w|
 394
 395
 396 .. opcode:: RCC - Reciprocal Clamped
 397
 398 This instruction replicates its result.
 399
 400 XXX cleanup on aisle three
 401
 402 .. math::
 403
 404   dst = (1 / src.x) > 0 ? clamp(1 / src.x, 5.42101e-020, 1.884467e+019) : clamp(1 / src.x, -1.884467e+019, -5.42101e-020)
 405
 406
 407 .. opcode:: DPH - Homogeneous Dot Product
 408
 409 This instruction replicates its result.
 410
 411 .. math::
 412
 413   dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src1.w
 414
 415
 416 .. opcode:: COS - Cosine
 417
 418 This instruction replicates its result.
 419
 420 .. math::
 421
 422   dst = \cos{src.x}
 423
 424
 425 .. opcode:: DDX - Derivative Relative To X
 426
 427 .. math::
 428
 429   dst.x = partialx(src.x)
 430
 431   dst.y = partialx(src.y)
 432
 433   dst.z = partialx(src.z)
 434
 435   dst.w = partialx(src.w)
 436
 437
 438 .. opcode:: DDY - Derivative Relative To Y
 439
 440 .. math::
 441
 442   dst.x = partialy(src.x)
 443
 444   dst.y = partialy(src.y)
 445
 446   dst.z = partialy(src.z)
 447
 448   dst.w = partialy(src.w)
 449
 450
 451 .. opcode:: KILP - Predicated Discard
 452
 453   discard
 454
 455
 456 .. opcode:: PK2H - Pack Two 16-bit Floats
 457
 458   TBD
 459
 460
 461 .. opcode:: PK2US - Pack Two Unsigned 16-bit Scalars
 462
 463   TBD
 464
 465
 466 .. opcode:: PK4B - Pack Four Signed 8-bit Scalars
 467
 468   TBD
 469
 470
 471 .. opcode:: PK4UB - Pack Four Unsigned 8-bit Scalars
 472
 473   TBD
 474
 475
 476 .. opcode:: RFL - Reflection Vector
 477
 478 .. math::
 479
 480   dst.x = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.x - src1.x
 481
 482   dst.y = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.y - src1.y
 483
 484   dst.z = 2 \times (src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z) / (src0.x \times src0.x + src0.y \times src0.y + src0.z \times src0.z) \times src0.z - src1.z
 485
 486   dst.w = 1
 487
 488 .. note::
 489
 490    Considered for removal.
 491
 492
 493 .. opcode:: SEQ - Set On Equal
 494
 495 .. math::
 496
 497   dst.x = (src0.x == src1.x) ? 1 : 0
 498
 499   dst.y = (src0.y == src1.y) ? 1 : 0
 500
 501   dst.z = (src0.z == src1.z) ? 1 : 0
 502
 503   dst.w = (src0.w == src1.w) ? 1 : 0
 504
 505
 506 .. opcode:: SFL - Set On False
 507
 508 This instruction replicates its result.
 509
 510 .. math::
 511
 512   dst = 0
 513
 514 .. note::
 515
 516    Considered for removal.
 517
 518
 519 .. opcode:: SGT - Set On Greater Than
 520
 521 .. math::
 522
 523   dst.x = (src0.x > src1.x) ? 1 : 0
 524
 525   dst.y = (src0.y > src1.y) ? 1 : 0
 526
 527   dst.z = (src0.z > src1.z) ? 1 : 0
 528
 529   dst.w = (src0.w > src1.w) ? 1 : 0
 530
 531
 532 .. opcode:: SIN - Sine
 533
 534 This instruction replicates its result.
 535
 536 .. math::
 537
 538   dst = \sin{src.x}
 539
 540
 541 .. opcode:: SLE - Set On Less Equal Than
 542
 543 .. math::
 544
 545   dst.x = (src0.x <= src1.x) ? 1 : 0
 546
 547   dst.y = (src0.y <= src1.y) ? 1 : 0
 548
 549   dst.z = (src0.z <= src1.z) ? 1 : 0
 550
 551   dst.w = (src0.w <= src1.w) ? 1 : 0
 552
 553
 554 .. opcode:: SNE - Set On Not Equal
 555
 556 .. math::
 557
 558   dst.x = (src0.x != src1.x) ? 1 : 0
 559
 560   dst.y = (src0.y != src1.y) ? 1 : 0
 561
 562   dst.z = (src0.z != src1.z) ? 1 : 0
 563
 564   dst.w = (src0.w != src1.w) ? 1 : 0
 565
 566
 567 .. opcode:: STR - Set On True
 568
 569 This instruction replicates its result.
 570
 571 .. math::
 572
 573   dst = 1
 574
 575
 576 .. opcode:: TEX - Texture Lookup
 577
 578   TBD
 579
 580
 581 .. opcode:: TXD - Texture Lookup with Derivatives
 582
 583   TBD
 584
 585
 586 .. opcode:: TXP - Projective Texture Lookup
 587
 588   TBD
 589
 590
 591 .. opcode:: UP2H - Unpack Two 16-Bit Floats
 592
 593   TBD
 594
 595 .. note::
 596
 597    Considered for removal.
 598
 599 .. opcode:: UP2US - Unpack Two Unsigned 16-Bit Scalars
 600
 601   TBD
 602
 603 .. note::
 604
 605    Considered for removal.
 606
 607 .. opcode:: UP4B - Unpack Four Signed 8-Bit Values
 608
 609   TBD
 610
 611 .. note::
 612
 613    Considered for removal.
 614
 615 .. opcode:: UP4UB - Unpack Four Unsigned 8-Bit Scalars
 616
 617   TBD
 618
 619 .. note::
 620
 621    Considered for removal.
 622
 623 .. opcode:: X2D - 2D Coordinate Transformation
 624
 625 .. math::
 626
 627   dst.x = src0.x + src1.x \times src2.x + src1.y \times src2.y
 628
 629   dst.y = src0.y + src1.x \times src2.z + src1.y \times src2.w
 630
 631   dst.z = src0.x + src1.x \times src2.x + src1.y \times src2.y
 632
 633   dst.w = src0.y + src1.x \times src2.z + src1.y \times src2.w
 634
 635 .. note::
 636
 637    Considered for removal.
 638
 639
 640 From GL_NV_vertex_program2
 641 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 642
 643
 644 .. opcode:: ARA - Address Register Add
 645
 646   TBD
 647
 648 .. note::
 649
 650    Considered for removal.
 651
 652 .. opcode:: ARR - Address Register Load With Round
 653
 654 .. math::
 655
 656   dst.x = round(src.x)
 657
 658   dst.y = round(src.y)
 659
 660   dst.z = round(src.z)
 661
 662   dst.w = round(src.w)
 663
 664
 665 .. opcode:: BRA - Branch
 666
 667   pc = target
 668
 669 .. note::
 670
 671    Considered for removal.
 672
 673 .. opcode:: CAL - Subroutine Call
 674
 675   push(pc)
 676   pc = target
 677
 678
 679 .. opcode:: RET - Subroutine Call Return
 680
 681   pc = pop()
 682
 683   Potential restrictions:
 684   * Only occurs at end of function.
 685
 686 .. opcode:: SSG - Set Sign
 687
 688 .. math::
 689
 690   dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0
 691
 692   dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0
 693
 694   dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0
 695
 696   dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0
 697
 698
 699 .. opcode:: CMP - Compare
 700
 701 .. math::
 702
 703   dst.x = (src0.x < 0) ? src1.x : src2.x
 704
 705   dst.y = (src0.y < 0) ? src1.y : src2.y
 706
 707   dst.z = (src0.z < 0) ? src1.z : src2.z
 708
 709   dst.w = (src0.w < 0) ? src1.w : src2.w
 710
 711
 712 .. opcode:: KIL - Conditional Discard
 713
 714 .. math::
 715
 716   if (src.x < 0 || src.y < 0 || src.z < 0 || src.w < 0)
 717     discard
 718   endif
 719
 720
 721 .. opcode:: SCS - Sine Cosine
 722
 723 .. math::
 724
 725   dst.x = \cos{src.x}
 726
 727   dst.y = \sin{src.x}
 728
 729   dst.z = 0
 730
 731   dst.y = 1
 732
 733
 734 .. opcode:: TXB - Texture Lookup With Bias
 735
 736   TBD
 737
 738
 739 .. opcode:: NRM - 3-component Vector Normalise
 740
 741 .. math::
 742
 743   dst.x = src.x / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 744
 745   dst.y = src.y / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 746
 747   dst.z = src.z / (src.x \times src.x + src.y \times src.y + src.z \times src.z)
 748
 749   dst.w = 1
 750
 751
 752 .. opcode:: DIV - Divide
 753
 754 .. math::
 755
 756   dst.x = \frac{src0.x}{src1.x}
 757
 758   dst.y = \frac{src0.y}{src1.y}
 759
 760   dst.z = \frac{src0.z}{src1.z}
 761
 762   dst.w = \frac{src0.w}{src1.w}
 763
 764
 765 .. opcode:: DP2 - 2-component Dot Product
 766
 767 This instruction replicates its result.
 768
 769 .. math::
 770
 771   dst = src0.x \times src1.x + src0.y \times src1.y
 772
 773
 774 .. opcode:: TXL - Texture Lookup With LOD
 775
 776   TBD
 777
 778
 779 .. opcode:: BRK - Break
 780
 781   TBD
 782
 783
 784 .. opcode:: IF - If
 785
 786   TBD
 787
 788
 789 .. opcode:: BGNFOR - Begin a For-Loop
 790
 791   dst.x = floor(src.x)
 792   dst.y = floor(src.y)
 793   dst.z = floor(src.z)
 794
 795   if (dst.y <= 0)
 796     pc = [matching ENDFOR] + 1
 797   endif
 798
 799   Note: The destination must be a loop register.
 800         The source must be a constant register.
 801
 802 .. note::
 803
 804    Considered for cleanup.
 805
 806 .. note::
 807
 808    Considered for removal.
 809
 810
 811 .. opcode:: REP - Repeat
 812
 813   TBD
 814
 815
 816 .. opcode:: ELSE - Else
 817
 818   TBD
 819
 820
 821 .. opcode:: ENDIF - End If
 822
 823   TBD
 824
 825
 826 .. opcode:: ENDFOR - End a For-Loop
 827
 828   dst.x = dst.x + dst.z
 829   dst.y = dst.y - 1.0
 830
 831   if (dst.y > 0)
 832     pc = [matching BGNFOR instruction] + 1
 833   endif
 834
 835   Note: The destination must be a loop register.
 836
 837 .. note::
 838
 839    Considered for cleanup.
 840
 841 .. note::
 842
 843    Considered for removal.
 844
 845 .. opcode:: ENDREP - End Repeat
 846
 847   TBD
 848
 849
 850 .. opcode:: PUSHA - Push Address Register On Stack
 851
 852   push(src.x)
 853   push(src.y)
 854   push(src.z)
 855   push(src.w)
 856
 857 .. note::
 858
 859    Considered for cleanup.
 860
 861 .. note::
 862
 863    Considered for removal.
 864
 865 .. opcode:: POPA - Pop Address Register From Stack
 866
 867   dst.w = pop()
 868   dst.z = pop()
 869   dst.y = pop()
 870   dst.x = pop()
 871
 872 .. note::
 873
 874    Considered for cleanup.
 875
 876 .. note::
 877
 878    Considered for removal.
 879
 880
 881 From GL_NV_gpu_program4
 882 ^^^^^^^^^^^^^^^^^^^^^^^^
 883
 884 Support for these opcodes indicated by a special pipe capability bit (TBD).
 885
 886 .. opcode:: CEIL - Ceiling
 887
 888 .. math::
 889
 890   dst.x = \lceil src.x\rceil
 891
 892   dst.y = \lceil src.y\rceil
 893
 894   dst.z = \lceil src.z\rceil
 895
 896   dst.w = \lceil src.w\rceil
 897
 898
 899 .. opcode:: I2F - Integer To Float
 900
 901 .. math::
 902
 903   dst.x = (float) src.x
 904
 905   dst.y = (float) src.y
 906
 907   dst.z = (float) src.z
 908
 909   dst.w = (float) src.w
 910
 911
 912 .. opcode:: NOT - Bitwise Not
 913
 914 .. math::
 915
 916   dst.x = ~src.x
 917
 918   dst.y = ~src.y
 919
 920   dst.z = ~src.z
 921
 922   dst.w = ~src.w
 923
 924
 925 .. opcode:: TRUNC - Truncate
 926
 927 .. math::
 928
 929   dst.x = trunc(src.x)
 930
 931   dst.y = trunc(src.y)
 932
 933   dst.z = trunc(src.z)
 934
 935   dst.w = trunc(src.w)
 936
 937
 938 .. opcode:: SHL - Shift Left
 939
 940 .. math::
 941
 942   dst.x = src0.x << src1.x
 943
 944   dst.y = src0.y << src1.x
 945
 946   dst.z = src0.z << src1.x
 947
 948   dst.w = src0.w << src1.x
 949
 950
 951 .. opcode:: SHR - Shift Right
 952
 953 .. math::
 954
 955   dst.x = src0.x >> src1.x
 956
 957   dst.y = src0.y >> src1.x
 958
 959   dst.z = src0.z >> src1.x
 960
 961   dst.w = src0.w >> src1.x
 962
 963
 964 .. opcode:: AND - Bitwise And
 965
 966 .. math::
 967
 968   dst.x = src0.x & src1.x
 969
 970   dst.y = src0.y & src1.y
 971
 972   dst.z = src0.z & src1.z
 973
 974   dst.w = src0.w & src1.w
 975
 976
 977 .. opcode:: OR - Bitwise Or
 978
 979 .. math::
 980
 981   dst.x = src0.x | src1.x
 982
 983   dst.y = src0.y | src1.y
 984
 985   dst.z = src0.z | src1.z
 986
 987   dst.w = src0.w | src1.w
 988
 989
 990 .. opcode:: MOD - Modulus
 991
 992 .. math::
 993
 994   dst.x = src0.x \bmod src1.x
 995
 996   dst.y = src0.y \bmod src1.y
 997
 998   dst.z = src0.z \bmod src1.z
 999
1000   dst.w = src0.w \bmod src1.w
1001
1002
1003 .. opcode:: XOR - Bitwise Xor
1004
1005 .. math::
1006
1007   dst.x = src0.x \oplus src1.x
1008
1009   dst.y = src0.y \oplus src1.y
1010
1011   dst.z = src0.z \oplus src1.z
1012
1013   dst.w = src0.w \oplus src1.w
1014
1015
1016 .. opcode:: SAD - Sum Of Absolute Differences
1017
1018 .. math::
1019
1020   dst.x = |src0.x - src1.x| + src2.x
1021
1022   dst.y = |src0.y - src1.y| + src2.y
1023
1024   dst.z = |src0.z - src1.z| + src2.z
1025
1026   dst.w = |src0.w - src1.w| + src2.w
1027
1028
1029 .. opcode:: TXF - Texel Fetch
1030
1031   TBD
1032
1033
1034 .. opcode:: TXQ - Texture Size Query
1035
1036   TBD
1037
1038
1039 .. opcode:: CONT - Continue
1040
1041   TBD
1042
1043
1044 From GL_NV_geometry_program4
1045 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1046
1047
1048 .. opcode:: EMIT - Emit
1049
1050   TBD
1051
1052
1053 .. opcode:: ENDPRIM - End Primitive
1054
1055   TBD
1056
1057
1058 From GLSL
1059 ^^^^^^^^^^
1060
1061
1062 .. opcode:: BGNLOOP - Begin a Loop
1063
1064   TBD
1065
1066
1067 .. opcode:: BGNSUB - Begin Subroutine
1068
1069   TBD
1070
1071
1072 .. opcode:: ENDLOOP - End a Loop
1073
1074   TBD
1075
1076
1077 .. opcode:: ENDSUB - End Subroutine
1078
1079   TBD
1080
1081
1082 .. opcode:: NOP - No Operation
1083
1084   Do nothing.
1085
1086
1087 .. opcode:: NRM4 - 4-component Vector Normalise
1088
1089 This instruction replicates its result.
1090
1091 .. math::
1092
1093   dst = \frac{src.x}{src.x \times src.x + src.y \times src.y + src.z \times src.z + src.w \times src.w}
1094
1095
1096 ps_2_x
1097 ^^^^^^^^^^^^
1098
1099
1100 .. opcode:: CALLNZ - Subroutine Call If Not Zero
1101
1102   TBD
1103
1104
1105 .. opcode:: IFC - If
1106
1107   TBD
1108
1109
1110 .. opcode:: BREAKC - Break Conditional
1111
1112   TBD
1113
1114 .. _doubleopcodes:
1115
1116 Double Opcodes
1117 ^^^^^^^^^^^^^^^
1118
1119 .. opcode:: DADD - Add Double
1120
1121 .. math::
1122
1123   dst.xy = src0.xy + src1.xy
1124
1125   dst.zw = src0.zw + src1.zw
1126
1127
1128 .. opcode:: DDIV - Divide Double
1129
1130 .. math::
1131
1132   dst.xy = src0.xy / src1.xy
1133
1134   dst.zw = src0.zw / src1.zw
1135
1136 .. opcode:: DSEQ - Set Double on Equal
1137
1138 .. math::
1139
1140   dst.xy = src0.xy == src1.xy ? 1.0F : 0.0F
1141
1142   dst.zw = src0.zw == src1.zw ? 1.0F : 0.0F
1143
1144 .. opcode:: DSLT - Set Double on Less than
1145
1146 .. math::
1147
1148   dst.xy = src0.xy < src1.xy ? 1.0F : 0.0F
1149
1150   dst.zw = src0.zw < src1.zw ? 1.0F : 0.0F
1151
1152 .. opcode:: DFRAC - Double Fraction
1153
1154 .. math::
1155
1156   dst.xy = src.xy - \lfloor src.xy\rfloor
1157
1158   dst.zw = src.zw - \lfloor src.zw\rfloor
1159
1160
1161 .. opcode:: DFRACEXP - Convert Double Number to Fractional and Integral Components
1162
1163 .. math::
1164
1165   dst0.xy = frexp(src.xy, dst1.xy)
1166
1167   dst0.zw = frexp(src.zw, dst1.zw)
1168
1169 .. opcode:: DLDEXP - Multiple Double Number by Integral Power of 2
1170
1171 .. math::
1172
1173   dst.xy = ldexp(src0.xy, src1.xy)
1174
1175   dst.zw = ldexp(src0.zw, src1.zw)
1176
1177 .. opcode:: DMIN - Minimum Double
1178
1179 .. math::
1180
1181   dst.xy = min(src0.xy, src1.xy)
1182
1183   dst.zw = min(src0.zw, src1.zw)
1184
1185 .. opcode:: DMAX - Maximum Double
1186
1187 .. math::
1188
1189   dst.xy = max(src0.xy, src1.xy)
1190
1191   dst.zw = max(src0.zw, src1.zw)
1192
1193 .. opcode:: DMUL - Multiply Double
1194
1195 .. math::
1196
1197   dst.xy = src0.xy \times src1.xy
1198
1199   dst.zw = src0.zw \times src1.zw
1200
1201
1202 .. opcode:: DMAD - Multiply And Add Doubles
1203
1204 .. math::
1205
1206   dst.xy = src0.xy \times src1.xy + src2.xy
1207
1208   dst.zw = src0.zw \times src1.zw + src2.zw
1209
1210
1211 .. opcode:: DRCP - Reciprocal Double
1212
1213 .. math::
1214
1215    dst.xy = \frac{1}{src.xy}
1216
1217    dst.zw = \frac{1}{src.zw}
1218
1219 .. opcode:: DSQRT - Square root double
1220
1221 .. math::
1222
1223    dst.xy = \sqrt{src.xy}
1224
1225    dst.zw = \sqrt{src.zw}
1226
1227
1228 Explanation of symbols used
1229 ------------------------------
1230
1231
1232 Functions
1233 ^^^^^^^^^^^^^^
1234
1235
1236   :math:`|x|`       Absolute value of `x`.
1237
1238   :math:`\lceil x \rceil` Ceiling of `x`.
1239
1240   clamp(x,y,z)      Clamp x between y and z.
1241                     (x < y) ? y : (x > z) ? z : x
1242
1243   :math:`\lfloor x\rfloor` Floor of `x`.
1244
1245   :math:`\log_2{x}` Logarithm of `x`, base 2.
1246
1247   max(x,y)          Maximum of x and y.
1248                     (x > y) ? x : y
1249
1250   min(x,y)          Minimum of x and y.
1251                     (x < y) ? x : y
1252
1253   partialx(x)       Derivative of x relative to fragment's X.
1254
1255   partialy(x)       Derivative of x relative to fragment's Y.
1256
1257   pop()             Pop from stack.
1258
1259   :math:`x^y`       `x` to the power `y`.
1260
1261   push(x)           Push x on stack.
1262
1263   round(x)          Round x.
1264
1265   trunc(x)          Truncate x, i.e. drop the fraction bits.
1266
1267
1268 Keywords
1269 ^^^^^^^^^^^^^
1270
1271
1272   discard           Discard fragment.
1273
1274   pc                Program counter.
1275
1276   target            Label of target instruction.
1277
1278
1279 Other tokens
1280 ---------------
1281
1282
1283 Declaration
1284 ^^^^^^^^^^^
1285
1286
1287 Declares a register that is will be referenced as an operand in Instruction
1288 tokens.
1289
1290 File field contains register file that is being declared and is one
1291 of TGSI_FILE.
1292
1293 UsageMask field specifies which of the register components can be accessed
1294 and is one of TGSI_WRITEMASK.
1295
1296 Interpolate field is only valid for fragment shader INPUT register files.
1297 It specifes the way input is being interpolated by the rasteriser and is one
1298 of TGSI_INTERPOLATE.
1299
1300 If Dimension flag is set to 1, a Declaration Dimension token follows.
1301
1302 If Semantic flag is set to 1, a Declaration Semantic token follows.
1303
1304 CylindricalWrap bitfield is only valid for fragment shader INPUT register
1305 files. It specifies which register components should be subject to cylindrical
1306 wrapping when interpolating by the rasteriser. If TGSI_CYLINDRICAL_WRAP_X
1307 is set to 1, the X component should be interpolated according to cylindrical
1308 wrapping rules.
1309
1310
1311 Declaration Semantic
1312 ^^^^^^^^^^^^^^^^^^^^^^^^
1313
1314
1315   Follows Declaration token if Semantic bit is set.
1316
1317   Since its purpose is to link a shader with other stages of the pipeline,
1318   it is valid to follow only those Declaration tokens that declare a register
1319   either in INPUT or OUTPUT file.
1320
1321   SemanticName field contains the semantic name of the register being declared.
1322   There is no default value.
1323
1324   SemanticIndex is an optional subscript that can be used to distinguish
1325   different register declarations with the same semantic name. The default value
1326   is 0.
1327
1328   The meanings of the individual semantic names are explained in the following
1329   sections.
1330
1331 TGSI_SEMANTIC_POSITION
1332 """"""""""""""""""""""
1333
1334 Position, sometimes known as HPOS or WPOS for historical reasons, is the
1335 location of the vertex in space, in ``(x, y, z, w)`` format. ``x``, ``y``, and ``z``
1336 are the Cartesian coordinates, and ``w`` is the homogenous coordinate and used
1337 for the perspective divide, if enabled.
1338
1339 As a vertex shader output, position should be scaled to the viewport. When
1340 used in fragment shaders, position will be in window coordinates. The convention
1341 used depends on the FS_COORD_ORIGIN and FS_COORD_PIXEL_CENTER properties.
1342
1343 XXX additionally, is there a way to configure the perspective divide? it's
1344 accelerated on most chipsets AFAIK...
1345
1346 Position, if not specified, usually defaults to ``(0, 0, 0, 1)``, and can
1347 be partially specified as ``(x, y, 0, 1)`` or ``(x, y, z, 1)``.
1348
1349 XXX usually? can we solidify that?
1350
1351 TGSI_SEMANTIC_COLOR
1352 """""""""""""""""""
1353
1354 Colors are used to, well, color the primitives. Colors are always in
1355 ``(r, g, b, a)`` format.
1356
1357 If alpha is not specified, it defaults to 1.
1358
1359 TGSI_SEMANTIC_BCOLOR
1360 """"""""""""""""""""
1361
1362 Back-facing colors are only used for back-facing polygons, and are only valid
1363 in vertex shader outputs. After rasterization, all polygons are front-facing
1364 and COLOR and BCOLOR end up occupying the same slots in the fragment, so
1365 all BCOLORs effectively become regular COLORs in the fragment shader.
1366
1367 TGSI_SEMANTIC_FOG
1368 """""""""""""""""
1369
1370 The fog coordinate historically has been used to replace the depth coordinate
1371 for generation of fog in dedicated fog blocks. Gallium, however, does not use
1372 dedicated fog acceleration, placing it entirely in the fragment shader
1373 instead.
1374
1375 The fog coordinate should be written in ``(f, 0, 0, 1)`` format. Only the first
1376 component matters when writing from the vertex shader; the driver will ensure
1377 that the coordinate is in this format when used as a fragment shader input.
1378
1379 TGSI_SEMANTIC_PSIZE
1380 """""""""""""""""""
1381
1382 PSIZE, or point size, is used to specify point sizes per-vertex. It should
1383 be in ``(s, 0, 0, 1)`` format, where ``s`` is the (possibly clamped) point size.
1384 Only the first component matters when writing from the vertex shader.
1385
1386 When using this semantic, be sure to set the appropriate state in the
1387 :ref:`rasterizer` first.
1388
1389 TGSI_SEMANTIC_GENERIC
1390 """""""""""""""""""""
1391
1392 Generic semantics are nearly always used for texture coordinate attributes,
1393 in ``(s, t, r, q)`` format. ``t`` and ``r`` may be unused for certain kinds
1394 of lookups, and ``q`` is the level-of-detail bias for biased sampling.
1395
1396 These attributes are called "generic" because they may be used for anything
1397 else, including parameters, texture generation information, or anything that
1398 can be stored inside a four-component vector.
1399
1400 TGSI_SEMANTIC_FACE
1401 """"""""""""""""""
1402
1403 FACE is the facing bit, to store the facing information for the fragment
1404 shader. ``(f, 0, 0, 1)`` is the format. The first component will be positive
1405 when the fragment is front-facing, and negative when the component is
1406 back-facing.
1407
1408 TGSI_SEMANTIC_EDGEFLAG
1409 """"""""""""""""""""""
1410
1411 XXX no clue
1412
1413
1414 Properties
1415 ^^^^^^^^^^^^^^^^^^^^^^^^
1416
1417
1418   Properties are general directives that apply to the whole TGSI program.
1419
1420 FS_COORD_ORIGIN
1421 """""""""""""""
1422
1423 Specifies the fragment shader TGSI_SEMANTIC_POSITION coordinate origin.
1424 The default value is UPPER_LEFT.
1425
1426 If UPPER_LEFT, the position will be (0,0) at the upper left corner and
1427 increase downward and rightward.
1428 If LOWER_LEFT, the position will be (0,0) at the lower left corner and
1429 increase upward and rightward.
1430
1431 OpenGL defaults to LOWER_LEFT, and is configurable with the
1432 GL_ARB_fragment_coord_conventions extension.
1433
1434 DirectX 9/10 use UPPER_LEFT.
1435
1436 FS_COORD_PIXEL_CENTER
1437 """""""""""""""""""""
1438
1439 Specifies the fragment shader TGSI_SEMANTIC_POSITION pixel center convention.
1440 The default value is HALF_INTEGER.
1441
1442 If HALF_INTEGER, the fractionary part of the position will be 0.5
1443 If INTEGER, the fractionary part of the position will be 0.0
1444
1445 Note that this does not affect the set of fragments generated by
1446 rasterization, which is instead controlled by gl_rasterization_rules in the
1447 rasterizer.
1448
1449 OpenGL defaults to HALF_INTEGER, and is configurable with the
1450 GL_ARB_fragment_coord_conventions extension.
1451
1452 DirectX 9 uses INTEGER.
1453 DirectX 10 uses HALF_INTEGER.
1454
1455
1456
1457 Texture Sampling and Texture Formats
1458 ------------------------------------
1459
1460 This table shows how texture image components are returned as (x,y,z,w) tuples
1461 by TGSI texture instructions, such as :opcode:`TEX`, :opcode:`TXD`, and
1462 :opcode:`TXP`. For reference, OpenGL and Direct3D conventions are shown as
1463 well.
1464
1465 +--------------------+--------------+--------------------+--------------+
1466 | Texture Components | Gallium      | OpenGL             | Direct3D 9   |
1467 +====================+==============+====================+==============+
1468 | R                  | XXX TBD      | (r, 0, 0, 1)       | (r, 1, 1, 1) |
1469 +--------------------+--------------+--------------------+--------------+
1470 | RG                 | XXX TBD      | (r, g, 0, 1)       | (r, g, 1, 1) |
1471 +--------------------+--------------+--------------------+--------------+
1472 | RGB                | (r, g, b, 1) | (r, g, b, 1)       | (r, g, b, 1) |
1473 +--------------------+--------------+--------------------+--------------+
1474 | RGBA               | (r, g, b, a) | (r, g, b, a)       | (r, g, b, a) |
1475 +--------------------+--------------+--------------------+--------------+
1476 | A                  | (0, 0, 0, a) | (0, 0, 0, a)       | (0, 0, 0, a) |
1477 +--------------------+--------------+--------------------+--------------+
1478 | L                  | (l, l, l, 1) | (l, l, l, 1)       | (l, l, l, 1) |
1479 +--------------------+--------------+--------------------+--------------+
1480 | LA                 | (l, l, l, a) | (l, l, l, a)       | (l, l, l, a) |
1481 +--------------------+--------------+--------------------+--------------+
1482 | I                  | (i, i, i, i) | (i, i, i, i)       | N/A          |
1483 +--------------------+--------------+--------------------+--------------+
1484 | UV                 | XXX TBD      | (0, 0, 0, 1)       | (u, v, 1, 1) |
1485 |                    |              | [#envmap-bumpmap]_ |              |
1486 +--------------------+--------------+--------------------+--------------+
1487 | Z                  | XXX TBD      | (z, z, z, 1)       | (0, z, 0, 1) |
1488 |                    |              | [#depth-tex-mode]_ |              |
1489 +--------------------+--------------+--------------------+--------------+
1490
1491 .. [#envmap-bumpmap] http://www.opengl.org/registry/specs/ATI/envmap_bumpmap.txt
1492 .. [#depth-tex-mode] the default is (z, z, z, 1) but may also be (0, 0, 0, z)
1493    or (z, z, z, z) depending on the value of GL_DEPTH_TEXTURE_MODE.