2 * (C) Copyright IBM Corporation 2008
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
27 * Generate code to perform all per-fragment operations.
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
33 * Occlusion query is not supported, but this is the right place to add that
36 * \author Ian Romanick <idr@us.ibm.com>
39 #include "pipe/p_defines.h"
40 #include "pipe/p_state.h"
42 #include "cell_context.h"
44 #include "rtasm/rtasm_ppc_spe.h"
48 * Generate code to perform alpha testing.
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
58 * \note Emits a maximum of 6 instructions.
61 emit_alpha_test(struct pipe_depth_stencil_alpha_state
*dsa
,
62 struct spe_function
*f
, int mask
, int alphas
)
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
68 if (dsa
->alpha
.enabled
69 && (dsa
->alpha
.func
!= PIPE_FUNC_NEVER
)
70 && (dsa
->alpha
.func
!= PIPE_FUNC_ALWAYS
)) {
71 int ref
= spe_allocate_available_register(f
);
72 int tmp_a
= spe_allocate_available_register(f
);
73 int tmp_b
= spe_allocate_available_register(f
);
78 boolean complement
= FALSE
;
80 ref_val
.f
= dsa
->alpha
.ref
;
82 spe_il(f
, ref
, ref_val
.u
& 0x0000ffff);
83 spe_ilh(f
, ref
, ref_val
.u
>> 16);
85 switch (dsa
->alpha
.func
) {
86 case PIPE_FUNC_NOTEQUAL
:
91 spe_fceq(f
, tmp_a
, ref
, alphas
);
94 case PIPE_FUNC_LEQUAL
:
98 case PIPE_FUNC_GREATER
:
99 spe_fcgt(f
, tmp_a
, ref
, alphas
);
106 case PIPE_FUNC_GEQUAL
:
107 spe_fcgt(f
, tmp_a
, ref
, alphas
);
108 spe_fceq(f
, tmp_b
, ref
, alphas
);
109 spe_or(f
, tmp_a
, tmp_b
, tmp_a
);
112 case PIPE_FUNC_ALWAYS
:
113 case PIPE_FUNC_NEVER
:
120 spe_andc(f
, mask
, mask
, tmp_a
);
122 spe_and(f
, mask
, mask
, tmp_a
);
125 spe_release_register(f
, ref
);
126 spe_release_register(f
, tmp_a
);
127 spe_release_register(f
, tmp_b
);
128 } else if (dsa
->alpha
.enabled
&& (dsa
->alpha
.func
== PIPE_FUNC_NEVER
)) {
135 * Generate code to perform Z testing. Four Z values are tested at once.
136 * \param dsa Current depth-test state
137 * \param f Function to which code should be appended
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
147 * \note Emits a maximum of 3 instructions.
150 emit_depth_test(struct pipe_depth_stencil_alpha_state
*dsa
,
151 struct spe_function
*f
, int mask
, int stored
, int calculated
)
153 unsigned func
= (dsa
->depth
.enabled
)
154 ? dsa
->depth
.func
: PIPE_FUNC_ALWAYS
;
155 int tmp
= spe_allocate_available_register(f
);
156 boolean compliment
= FALSE
;
159 case PIPE_FUNC_NEVER
:
163 case PIPE_FUNC_NOTEQUAL
:
166 case PIPE_FUNC_EQUAL
:
167 spe_ceq(f
, mask
, calculated
, stored
);
170 case PIPE_FUNC_LEQUAL
:
173 case PIPE_FUNC_GREATER
:
174 spe_clgt(f
, mask
, calculated
, stored
);
180 case PIPE_FUNC_GEQUAL
:
181 spe_clgt(f
, mask
, calculated
, stored
);
182 spe_ceq(f
, tmp
, calculated
, stored
);
183 spe_or(f
, mask
, mask
, tmp
);
186 case PIPE_FUNC_ALWAYS
:
195 spe_release_register(f
, tmp
);
201 * Generate code to apply the stencil operation (after testing).
202 * \note Emits a maximum of 5 instructions.
205 * Since \c out and \c in might be the same register, this routine cannot
206 * generate code that uses \c out as a temporary.
209 emit_stencil_op(struct spe_function
*f
,
210 int out
, int in
, int mask
, unsigned op
, unsigned ref
)
212 const int clamp
= spe_allocate_available_register(f
);
213 const int clamp_mask
= spe_allocate_available_register(f
);
214 const int result
= spe_allocate_available_register(f
);
217 case PIPE_STENCIL_OP_KEEP
:
219 case PIPE_STENCIL_OP_ZERO
:
220 spe_il(f
, result
, 0);
222 case PIPE_STENCIL_OP_REPLACE
:
223 spe_il(f
, result
, ref
);
225 case PIPE_STENCIL_OP_INCR
:
226 /* clamp = [0xff, 0xff, 0xff, 0xff] */
227 spe_il(f
, clamp
, 0x0ff);
228 /* result[i] = in[i] + 1 */
229 spe_ai(f
, result
, in
, 1);
230 /* clamp_mask[i] = (result[i] > 0xff) */
231 spe_clgti(f
, clamp_mask
, result
, 0x0ff);
232 /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
233 spe_selb(f
, result
, result
, clamp
, clamp_mask
);
235 case PIPE_STENCIL_OP_DECR
:
237 spe_ai(f
, result
, in
, -1);
239 /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
242 spe_clgti(f
, clamp_mask
, result
, 0x0ff);
243 spe_selb(f
, result
, result
, clamp
, clamp_mask
);
245 case PIPE_STENCIL_OP_INCR_WRAP
:
246 spe_ai(f
, result
, in
, 1);
248 case PIPE_STENCIL_OP_DECR_WRAP
:
249 spe_ai(f
, result
, in
, -1);
251 case PIPE_STENCIL_OP_INVERT
:
252 spe_nor(f
, result
, in
, in
);
258 spe_selb(f
, out
, in
, result
, mask
);
260 spe_release_register(f
, result
);
261 spe_release_register(f
, clamp_mask
);
262 spe_release_register(f
, clamp
);
267 * Generate code to do stencil test. Four pixels are tested at once.
268 * \param dsa Depth / stencil test state
269 * \param face 0 for front face, 1 for back face
270 * \param f Function to append instructions to
271 * \param mask Register containing mask of fragments passing the
273 * \param depth_mask Register containing mask of fragments passing the
275 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
276 * \param stencil Register containing values from stencil buffer
277 * \param depth_pass Register to store mask of fragments passing stencil test
281 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
284 emit_stencil_test(struct pipe_depth_stencil_alpha_state
*dsa
,
285 struct pipe_stencil_ref
*sr
,
287 struct spe_function
*f
,
290 boolean depth_complement
,
294 int stencil_fail
= spe_allocate_available_register(f
);
295 int depth_fail
= spe_allocate_available_register(f
);
296 int stencil_mask
= spe_allocate_available_register(f
);
297 int stencil_pass
= spe_allocate_available_register(f
);
298 int face_stencil
= spe_allocate_available_register(f
);
299 int stencil_src
= stencil
;
300 const unsigned ref
= (sr
->ref_value
[face
]
301 & dsa
->stencil
[face
].valuemask
);
302 boolean complement
= FALSE
;
304 int tmp
= spe_allocate_available_register(f
);
307 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
308 && (dsa
->stencil
[face
].func
!= PIPE_FUNC_ALWAYS
)
309 && (dsa
->stencil
[face
].valuemask
!= 0x0ff)) {
310 stored
= spe_allocate_available_register(f
);
311 spe_andi(f
, stored
, stencil
, dsa
->stencil
[face
].valuemask
);
317 switch (dsa
->stencil
[face
].func
) {
318 case PIPE_FUNC_NEVER
:
319 spe_il(f
, stencil_mask
, 0); /* stencil_mask[0..3] = [0,0,0,0] */
322 case PIPE_FUNC_NOTEQUAL
:
325 case PIPE_FUNC_EQUAL
:
326 /* stencil_mask[i] = (stored[i] == ref) */
327 spe_ceqi(f
, stencil_mask
, stored
, ref
);
330 case PIPE_FUNC_LEQUAL
:
333 case PIPE_FUNC_GREATER
:
335 /* stencil_mask[i] = (stored[i] > ref) */
336 spe_clgti(f
, stencil_mask
, stored
, ref
);
342 case PIPE_FUNC_GEQUAL
:
343 /* stencil_mask[i] = (stored[i] > ref) */
344 spe_clgti(f
, stencil_mask
, stored
, ref
);
345 /* tmp[i] = (stored[i] == ref) */
346 spe_ceqi(f
, tmp
, stored
, ref
);
347 /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
348 spe_or(f
, stencil_mask
, stencil_mask
, tmp
);
351 case PIPE_FUNC_ALWAYS
:
352 /* See comment below. */
360 if (stored
!= stencil
) {
361 spe_release_register(f
, stored
);
363 spe_release_register(f
, tmp
);
366 /* ALWAYS is a very common stencil-test, so some effort is applied to
367 * optimize that case. The stencil-pass mask is the same as the input
368 * fragment mask. This makes the stencil-test (above) a no-op, and the
369 * input fragment mask can be "renamed" the stencil-pass mask.
371 if (dsa
->stencil
[face
].func
== PIPE_FUNC_ALWAYS
) {
372 spe_release_register(f
, stencil_pass
);
376 spe_andc(f
, stencil_pass
, mask
, stencil_mask
);
378 spe_and(f
, stencil_pass
, mask
, stencil_mask
);
382 if (depth_complement
) {
383 spe_andc(f
, depth_pass
, stencil_pass
, depth_mask
);
385 spe_and(f
, depth_pass
, stencil_pass
, depth_mask
);
389 /* Conditionally emit code to update the stencil value under various
390 * condititons. Note that there is no need to generate code under the
391 * following circumstances:
393 * - Stencil write mask is zero.
394 * - For stencil-fail if the stencil test is ALWAYS
395 * - For depth-fail if the stencil test is NEVER
396 * - For depth-pass if the stencil test is NEVER
397 * - Any of the 3 conditions if the operation is KEEP
399 if (dsa
->stencil
[face
].writemask
!= 0) {
400 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_ALWAYS
)
401 && (dsa
->stencil
[face
].fail_op
!= PIPE_STENCIL_OP_KEEP
)) {
403 spe_and(f
, stencil_fail
, mask
, stencil_mask
);
405 spe_andc(f
, stencil_fail
, mask
, stencil_mask
);
408 emit_stencil_op(f
, face_stencil
, stencil_src
, stencil_fail
,
409 dsa
->stencil
[face
].fail_op
,
410 sr
->ref_value
[face
]);
412 stencil_src
= face_stencil
;
415 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
416 && (dsa
->stencil
[face
].zfail_op
!= PIPE_STENCIL_OP_KEEP
)) {
417 if (depth_complement
) {
418 spe_and(f
, depth_fail
, stencil_pass
, depth_mask
);
420 spe_andc(f
, depth_fail
, stencil_pass
, depth_mask
);
423 emit_stencil_op(f
, face_stencil
, stencil_src
, depth_fail
,
424 dsa
->stencil
[face
].zfail_op
,
425 sr
->ref_value
[face
]);
426 stencil_src
= face_stencil
;
429 if ((dsa
->stencil
[face
].func
!= PIPE_FUNC_NEVER
)
430 && (dsa
->stencil
[face
].zpass_op
!= PIPE_STENCIL_OP_KEEP
)) {
431 emit_stencil_op(f
, face_stencil
, stencil_src
, depth_pass
,
432 dsa
->stencil
[face
].zpass_op
,
433 sr
->ref_value
[face
]);
434 stencil_src
= face_stencil
;
438 spe_release_register(f
, stencil_fail
);
439 spe_release_register(f
, depth_fail
);
440 spe_release_register(f
, stencil_mask
);
441 if (stencil_pass
!= mask
) {
442 spe_release_register(f
, stencil_pass
);
445 /* If all of the stencil operations were KEEP or the stencil write mask was
446 * zero, "stencil_src" will still be set to "stencil". In this case
447 * release the "face_stencil" register. Otherwise apply the stencil write
448 * mask to select bits from the calculated stencil value and the previous
451 if (stencil_src
== stencil
) {
452 spe_release_register(f
, face_stencil
);
453 } else if (dsa
->stencil
[face
].writemask
!= 0x0ff) {
454 int tmp
= spe_allocate_available_register(f
);
456 spe_il(f
, tmp
, dsa
->stencil
[face
].writemask
);
457 spe_selb(f
, stencil_src
, stencil
, stencil_src
, tmp
);
459 spe_release_register(f
, tmp
);
467 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state
*cdsa
,
468 struct pipe_stencil_ref
*sr
)
470 struct pipe_depth_stencil_alpha_state
*const dsa
= &cdsa
->base
;
471 struct spe_function
*const f
= &cdsa
->code
;
473 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
474 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
475 * up to 64 to make it a happy power-of-two.
477 spe_init_func(f
, SPE_INST_SIZE
* 64);
480 /* Allocate registers for the function's input parameters. Cleverly (and
481 * clever code is usually dangerous, but I couldn't resist) the generated
482 * function returns a structure. Returned structures start with register
483 * 3, and the structure fields are ordered to match up exactly with the
486 int mask
= spe_allocate_register(f
, 3);
487 int depth
= spe_allocate_register(f
, 4);
488 int stencil
= spe_allocate_register(f
, 5);
489 int zvals
= spe_allocate_register(f
, 6);
490 int frag_a
= spe_allocate_register(f
, 7);
491 int facing
= spe_allocate_register(f
, 8);
493 int depth_mask
= spe_allocate_available_register(f
);
495 boolean depth_complement
;
498 emit_alpha_test(dsa
, f
, mask
, frag_a
);
500 depth_complement
= emit_depth_test(dsa
, f
, depth_mask
, depth
, zvals
);
502 if (dsa
->stencil
[0].enabled
) {
503 const int front_depth_pass
= spe_allocate_available_register(f
);
504 int front_stencil
= emit_stencil_test(dsa
, sr
, 0, f
, mask
,
505 depth_mask
, depth_complement
,
506 stencil
, front_depth_pass
);
508 if (dsa
->stencil
[1].enabled
) {
509 const int back_depth_pass
= spe_allocate_available_register(f
);
510 int back_stencil
= emit_stencil_test(dsa
, sr
, 1, f
, mask
,
511 depth_mask
, depth_complement
,
512 stencil
, back_depth_pass
);
514 /* If the front facing stencil value and the back facing stencil
515 * value are stored in the same register, there is no need to select
516 * a value based on the facing. This can happen if the stencil value
517 * was not modified due to the write masks being zero, the stencil
518 * operations being KEEP, etc.
520 if (front_stencil
!= back_stencil
) {
521 spe_selb(f
, stencil
, back_stencil
, front_stencil
, facing
);
524 if (back_stencil
!= stencil
) {
525 spe_release_register(f
, back_stencil
);
528 if (front_stencil
!= stencil
) {
529 spe_release_register(f
, front_stencil
);
532 spe_selb(f
, mask
, back_depth_pass
, front_depth_pass
, facing
);
534 spe_release_register(f
, back_depth_pass
);
536 if (front_stencil
!= stencil
) {
537 spe_or(f
, stencil
, front_stencil
, front_stencil
);
538 spe_release_register(f
, front_stencil
);
540 spe_or(f
, mask
, front_depth_pass
, front_depth_pass
);
543 spe_release_register(f
, front_depth_pass
);
544 } else if (dsa
->depth
.enabled
) {
545 if (depth_complement
) {
546 spe_andc(f
, mask
, mask
, depth_mask
);
548 spe_and(f
, mask
, mask
, depth_mask
);
552 if (dsa
->depth
.writemask
) {
553 spe_selb(f
, depth
, depth
, zvals
, mask
);
556 spe_bi(f
, 0, 0, 0); /* return from function call */
561 const uint32_t *p
= f
->store
;
564 printf("# alpha (%sabled)\n",
565 (dsa
->alpha
.enabled
) ? "en" : "dis");
566 printf("# func: %u\n", dsa
->alpha
.func
);
567 printf("# ref: %.2f\n", dsa
->alpha
.ref
);
569 printf("# depth (%sabled)\n",
570 (dsa
->depth
.enabled
) ? "en" : "dis");
571 printf("# func: %u\n", dsa
->depth
.func
);
573 for (i
= 0; i
< 2; i
++) {
574 printf("# %s stencil (%sabled)\n",
575 (i
== 0) ? "front" : "back",
576 (dsa
->stencil
[i
].enabled
) ? "en" : "dis");
578 printf("# func: %u\n", dsa
->stencil
[i
].func
);
579 printf("# op (sf, zf, zp): %u %u %u\n",
580 dsa
->stencil
[i
].fail_op
,
581 dsa
->stencil
[i
].zfail_op
,
582 dsa
->stencil
[i
].zpass_op
);
583 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
585 dsa
->stencil
[i
].valuemask
,
586 dsa
->stencil
[i
].writemask
);
590 for (/* empty */; p
< f
->csr
; p
++) {
591 printf("\t.long\t0x%04x\n", *p
);
600 * \note Emits a maximum of 3 instructions
603 emit_alpha_factor_calculation(struct spe_function
*f
,
605 int src_alpha
, int dst_alpha
, int const_alpha
)
612 case PIPE_BLENDFACTOR_ONE
:
616 case PIPE_BLENDFACTOR_SRC_ALPHA
:
617 factor_reg
= spe_allocate_available_register(f
);
619 spe_or(f
, factor_reg
, src_alpha
, src_alpha
);
622 case PIPE_BLENDFACTOR_DST_ALPHA
:
623 factor_reg
= dst_alpha
;
626 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
630 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
631 factor_reg
= spe_allocate_available_register(f
);
633 tmp
= spe_allocate_available_register(f
);
635 spe_cuflt(f
, tmp
, tmp
, 0);
636 spe_fs(f
, factor_reg
, tmp
, const_alpha
);
637 spe_release_register(f
, tmp
);
640 case PIPE_BLENDFACTOR_CONST_ALPHA
:
641 factor_reg
= const_alpha
;
644 case PIPE_BLENDFACTOR_ZERO
:
648 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
649 tmp
= spe_allocate_available_register(f
);
650 factor_reg
= spe_allocate_available_register(f
);
653 spe_cuflt(f
, tmp
, tmp
, 0);
654 spe_fs(f
, factor_reg
, tmp
, src_alpha
);
656 spe_release_register(f
, tmp
);
659 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
660 tmp
= spe_allocate_available_register(f
);
661 factor_reg
= spe_allocate_available_register(f
);
664 spe_cuflt(f
, tmp
, tmp
, 0);
665 spe_fs(f
, factor_reg
, tmp
, dst_alpha
);
667 spe_release_register(f
, tmp
);
670 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
671 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
683 * \note Emits a maximum of 6 instructions
686 emit_color_factor_calculation(struct spe_function
*f
,
687 unsigned sF
, unsigned mask
,
690 const int *const_color
,
703 case PIPE_BLENDFACTOR_ONE
:
706 case PIPE_BLENDFACTOR_SRC_COLOR
:
707 for (i
= 0; i
< 3; ++i
) {
708 if ((mask
& (1U << i
)) != 0) {
709 factor
[i
] = spe_allocate_available_register(f
);
710 spe_or(f
, factor
[i
], src
[i
], src
[i
]);
715 case PIPE_BLENDFACTOR_SRC_ALPHA
:
716 factor
[0] = spe_allocate_available_register(f
);
717 factor
[1] = factor
[0];
718 factor
[2] = factor
[0];
720 spe_or(f
, factor
[0], src
[3], src
[3]);
723 case PIPE_BLENDFACTOR_DST_ALPHA
:
729 case PIPE_BLENDFACTOR_DST_COLOR
:
735 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
736 tmp
= spe_allocate_available_register(f
);
737 factor
[0] = spe_allocate_available_register(f
);
738 factor
[1] = factor
[0];
739 factor
[2] = factor
[0];
741 /* Alpha saturate means min(As, 1-Ad).
744 spe_cuflt(f
, tmp
, tmp
, 0);
745 spe_fs(f
, tmp
, tmp
, dst
[3]);
746 spe_fcgt(f
, factor
[0], tmp
, src
[3]);
747 spe_selb(f
, factor
[0], src
[3], tmp
, factor
[0]);
749 spe_release_register(f
, tmp
);
752 case PIPE_BLENDFACTOR_INV_CONST_COLOR
:
753 tmp
= spe_allocate_available_register(f
);
755 spe_cuflt(f
, tmp
, tmp
, 0);
757 for (i
= 0; i
< 3; i
++) {
758 factor
[i
] = spe_allocate_available_register(f
);
760 spe_fs(f
, factor
[i
], tmp
, const_color
[i
]);
762 spe_release_register(f
, tmp
);
765 case PIPE_BLENDFACTOR_CONST_COLOR
:
766 for (i
= 0; i
< 3; i
++) {
767 factor
[i
] = const_color
[i
];
771 case PIPE_BLENDFACTOR_INV_CONST_ALPHA
:
772 factor
[0] = spe_allocate_available_register(f
);
773 factor
[1] = factor
[0];
774 factor
[2] = factor
[0];
776 tmp
= spe_allocate_available_register(f
);
778 spe_cuflt(f
, tmp
, tmp
, 0);
779 spe_fs(f
, factor
[0], tmp
, const_color
[3]);
780 spe_release_register(f
, tmp
);
783 case PIPE_BLENDFACTOR_CONST_ALPHA
:
784 factor
[0] = const_color
[3];
785 factor
[1] = factor
[0];
786 factor
[2] = factor
[0];
789 case PIPE_BLENDFACTOR_ZERO
:
792 case PIPE_BLENDFACTOR_INV_SRC_COLOR
:
793 tmp
= spe_allocate_available_register(f
);
796 spe_cuflt(f
, tmp
, tmp
, 0);
798 for (i
= 0; i
< 3; ++i
) {
799 if ((mask
& (1U << i
)) != 0) {
800 factor
[i
] = spe_allocate_available_register(f
);
801 spe_fs(f
, factor
[i
], tmp
, src
[i
]);
805 spe_release_register(f
, tmp
);
808 case PIPE_BLENDFACTOR_INV_SRC_ALPHA
:
809 tmp
= spe_allocate_available_register(f
);
810 factor
[0] = spe_allocate_available_register(f
);
811 factor
[1] = factor
[0];
812 factor
[2] = factor
[0];
815 spe_cuflt(f
, tmp
, tmp
, 0);
816 spe_fs(f
, factor
[0], tmp
, src
[3]);
818 spe_release_register(f
, tmp
);
821 case PIPE_BLENDFACTOR_INV_DST_ALPHA
:
822 tmp
= spe_allocate_available_register(f
);
823 factor
[0] = spe_allocate_available_register(f
);
824 factor
[1] = factor
[0];
825 factor
[2] = factor
[0];
828 spe_cuflt(f
, tmp
, tmp
, 0);
829 spe_fs(f
, factor
[0], tmp
, dst
[3]);
831 spe_release_register(f
, tmp
);
834 case PIPE_BLENDFACTOR_INV_DST_COLOR
:
835 tmp
= spe_allocate_available_register(f
);
838 spe_cuflt(f
, tmp
, tmp
, 0);
840 for (i
= 0; i
< 3; ++i
) {
841 if ((mask
& (1U << i
)) != 0) {
842 factor
[i
] = spe_allocate_available_register(f
);
843 spe_fs(f
, factor
[i
], tmp
, dst
[i
]);
847 spe_release_register(f
, tmp
);
850 case PIPE_BLENDFACTOR_SRC1_COLOR
:
851 case PIPE_BLENDFACTOR_SRC1_ALPHA
:
852 case PIPE_BLENDFACTOR_INV_SRC1_COLOR
:
853 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA
:
861 emit_blend_calculation(struct spe_function
*f
,
862 unsigned func
, unsigned sF
, unsigned dF
,
863 int src
, int src_factor
, int dst
, int dst_factor
)
865 int tmp
= spe_allocate_available_register(f
);
869 if (sF
== PIPE_BLENDFACTOR_ONE
) {
870 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
872 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
873 spe_fa(f
, src
, src
, dst
);
875 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
876 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
878 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
879 spe_or(f
, src
, dst
, dst
);
881 spe_fm(f
, src
, dst
, dst_factor
);
883 } else if (dF
== PIPE_BLENDFACTOR_ZERO
) {
884 spe_fm(f
, src
, src
, src_factor
);
886 spe_fm(f
, tmp
, dst
, dst_factor
);
887 spe_fma(f
, src
, src
, src_factor
, tmp
);
891 case PIPE_BLEND_SUBTRACT
:
892 if (sF
== PIPE_BLENDFACTOR_ONE
) {
893 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
895 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
896 spe_fs(f
, src
, src
, dst
);
898 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
899 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
901 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
903 spe_fs(f
, src
, tmp
, dst
);
905 spe_fm(f
, src
, dst
, dst_factor
);
907 } else if (dF
== PIPE_BLENDFACTOR_ZERO
) {
908 spe_fm(f
, src
, src
, src_factor
);
910 spe_fm(f
, tmp
, dst
, dst_factor
);
911 spe_fms(f
, src
, src
, src_factor
, tmp
);
915 case PIPE_BLEND_REVERSE_SUBTRACT
:
916 if (sF
== PIPE_BLENDFACTOR_ONE
) {
917 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
919 spe_fs(f
, src
, tmp
, src
);
920 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
921 spe_fs(f
, src
, dst
, src
);
923 } else if (sF
== PIPE_BLENDFACTOR_ZERO
) {
924 if (dF
== PIPE_BLENDFACTOR_ZERO
) {
926 } else if (dF
== PIPE_BLENDFACTOR_ONE
) {
927 spe_or(f
, src
, dst
, dst
);
929 spe_fm(f
, src
, dst
, dst_factor
);
931 } else if (dF
== PIPE_BLENDFACTOR_ZERO
) {
932 spe_fm(f
, src
, src
, src_factor
);
934 spe_fm(f
, tmp
, src
, src_factor
);
935 spe_fms(f
, src
, src
, dst_factor
, tmp
);
940 spe_cgt(f
, tmp
, src
, dst
);
941 spe_selb(f
, src
, src
, dst
, tmp
);
945 spe_cgt(f
, tmp
, src
, dst
);
946 spe_selb(f
, src
, dst
, src
, tmp
);
953 spe_release_register(f
, tmp
);
958 * Generate code to perform alpha blending on the SPE
961 cell_generate_alpha_blend(struct cell_blend_state
*cb
)
963 struct pipe_blend_state
*const b
= &cb
->base
;
964 struct spe_function
*const f
= &cb
->code
;
966 /* This code generates a maximum of 3 (source alpha factor)
967 * + 3 (destination alpha factor) + (3 * 6) (source color factor)
968 * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
969 * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
970 * make it a happy power-of-two.
972 spe_init_func(f
, SPE_INST_SIZE
* 64);
975 const int frag
[4] = {
976 spe_allocate_register(f
, 3),
977 spe_allocate_register(f
, 4),
978 spe_allocate_register(f
, 5),
979 spe_allocate_register(f
, 6),
981 const int pixel
[4] = {
982 spe_allocate_register(f
, 7),
983 spe_allocate_register(f
, 8),
984 spe_allocate_register(f
, 9),
985 spe_allocate_register(f
, 10),
987 const int const_color
[4] = {
988 spe_allocate_register(f
, 11),
989 spe_allocate_register(f
, 12),
990 spe_allocate_register(f
, 13),
991 spe_allocate_register(f
, 14),
1001 /* Does the selected blend mode make use of the source / destination
1002 * color (RGB) blend factors?
1004 boolean need_color_factor
= b
->rt
[0].blend_enable
1005 && (b
->rt
[0].rgb_func
!= PIPE_BLEND_MIN
)
1006 && (b
->rt
[0].rgb_func
!= PIPE_BLEND_MAX
);
1008 /* Does the selected blend mode make use of the source / destination
1009 * alpha blend factors?
1011 boolean need_alpha_factor
= b
->rt
[0].blend_enable
1012 && (b
->rt
[0].alpha_func
!= PIPE_BLEND_MIN
)
1013 && (b
->rt
[0].alpha_func
!= PIPE_BLEND_MAX
);
1016 if (b
->rt
[0].blend_enable
) {
1017 sF
[0] = b
->rt
[0].rgb_src_factor
;
1020 switch (b
->rt
[0].alpha_src_factor
& 0x0f) {
1021 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE
:
1022 sF
[3] = PIPE_BLENDFACTOR_ONE
;
1024 case PIPE_BLENDFACTOR_SRC_COLOR
:
1025 case PIPE_BLENDFACTOR_DST_COLOR
:
1026 case PIPE_BLENDFACTOR_CONST_COLOR
:
1027 case PIPE_BLENDFACTOR_SRC1_COLOR
:
1028 sF
[3] = b
->rt
[0].alpha_src_factor
+ 1;
1031 sF
[3] = b
->rt
[0].alpha_src_factor
;
1034 dF
[0] = b
->rt
[0].rgb_dst_factor
;
1037 switch (b
->rt
[0].alpha_dst_factor
& 0x0f) {
1038 case PIPE_BLENDFACTOR_SRC_COLOR
:
1039 case PIPE_BLENDFACTOR_DST_COLOR
:
1040 case PIPE_BLENDFACTOR_CONST_COLOR
:
1041 case PIPE_BLENDFACTOR_SRC1_COLOR
:
1042 dF
[3] = b
->rt
[0].alpha_dst_factor
+ 1;
1045 dF
[3] = b
->rt
[0].alpha_dst_factor
;
1048 func
[0] = b
->rt
[0].rgb_func
;
1051 func
[3] = b
->rt
[0].alpha_func
;
1053 sF
[0] = PIPE_BLENDFACTOR_ONE
;
1054 sF
[1] = PIPE_BLENDFACTOR_ONE
;
1055 sF
[2] = PIPE_BLENDFACTOR_ONE
;
1056 sF
[3] = PIPE_BLENDFACTOR_ONE
;
1057 dF
[0] = PIPE_BLENDFACTOR_ZERO
;
1058 dF
[1] = PIPE_BLENDFACTOR_ZERO
;
1059 dF
[2] = PIPE_BLENDFACTOR_ZERO
;
1060 dF
[3] = PIPE_BLENDFACTOR_ZERO
;
1062 func
[0] = PIPE_BLEND_ADD
;
1063 func
[1] = PIPE_BLEND_ADD
;
1064 func
[2] = PIPE_BLEND_ADD
;
1065 func
[3] = PIPE_BLEND_ADD
;
1069 /* If alpha writing is enabled and the alpha blend mode requires use of
1070 * the alpha factor, calculate the alpha factor.
1072 if (((b
->rt
[0].colormask
& 8) != 0) && need_alpha_factor
) {
1073 src_factor
[3] = emit_alpha_factor_calculation(f
, sF
[3], const_color
[3],
1076 /* If the alpha destination blend factor is the same as the alpha source
1077 * blend factor, re-use the previously calculated value.
1079 dst_factor
[3] = (dF
[3] == sF
[3])
1081 : emit_alpha_factor_calculation(f
, dF
[3], const_color
[3],
1086 if (sF
[0] == sF
[3]) {
1087 src_factor
[0] = src_factor
[3];
1088 src_factor
[1] = src_factor
[3];
1089 src_factor
[2] = src_factor
[3];
1090 } else if (sF
[0] == dF
[3]) {
1091 src_factor
[0] = dst_factor
[3];
1092 src_factor
[1] = dst_factor
[3];
1093 src_factor
[2] = dst_factor
[3];
1094 } else if (need_color_factor
) {
1095 emit_color_factor_calculation(f
,
1096 b
->rt
[0].rgb_src_factor
,
1098 frag
, pixel
, const_color
, src_factor
);
1102 if (dF
[0] == sF
[3]) {
1103 dst_factor
[0] = src_factor
[3];
1104 dst_factor
[1] = src_factor
[3];
1105 dst_factor
[2] = src_factor
[3];
1106 } else if (dF
[0] == dF
[3]) {
1107 dst_factor
[0] = dst_factor
[3];
1108 dst_factor
[1] = dst_factor
[3];
1109 dst_factor
[2] = dst_factor
[3];
1110 } else if (dF
[0] == sF
[0]) {
1111 dst_factor
[0] = src_factor
[0];
1112 dst_factor
[1] = src_factor
[1];
1113 dst_factor
[2] = src_factor
[2];
1114 } else if (need_color_factor
) {
1115 emit_color_factor_calculation(f
,
1116 b
->rt
[0].rgb_dst_factor
,
1118 frag
, pixel
, const_color
, dst_factor
);
1123 for (i
= 0; i
< 4; ++i
) {
1124 if ((b
->rt
[0].colormask
& (1U << i
)) != 0) {
1125 emit_blend_calculation(f
,
1126 func
[i
], sF
[i
], dF
[i
],
1127 frag
[i
], src_factor
[i
],
1128 pixel
[i
], dst_factor
[i
]);
1136 const uint32_t *p
= f
->store
;
1138 printf("# %u instructions\n", f
->csr
- f
->store
);
1139 printf("# blend (%sabled)\n",
1140 (cb
->base
.blend_enable
) ? "en" : "dis");
1141 printf("# RGB func / sf / df: %u %u %u\n",
1143 cb
->base
.rgb_src_factor
,
1144 cb
->base
.rgb_dst_factor
);
1145 printf("# ALP func / sf / df: %u %u %u\n",
1146 cb
->base
.alpha_func
,
1147 cb
->base
.alpha_src_factor
,
1148 cb
->base
.alpha_dst_factor
);
1150 printf("\t.text\n");
1151 for (/* empty */; p
< f
->csr
; p
++) {
1152 printf("\t.long\t0x%04x\n", *p
);
1161 PC_OFFSET(const struct spe_function
*f
, const void *d
)
1163 const intptr_t pc
= (intptr_t) &f
->store
[f
->num_inst
];
1164 const intptr_t ea
= ~0x0f & (intptr_t) d
;
1166 return (ea
- pc
) >> 2;
1171 * Generate code to perform color conversion and logic op
1174 * The code generated by this function should also perform dithering.
1177 * The code generated by this function should also perform color-write
1181 * Only two framebuffer formats are supported at this time.
1184 cell_generate_logic_op(struct spe_function
*f
,
1185 const struct pipe_blend_state
*blend
,
1186 struct pipe_surface
*surf
)
1188 const unsigned logic_op
= (blend
->logicop_enable
)
1189 ? blend
->logicop_func
: PIPE_LOGICOP_COPY
;
1191 /* This code generates a maximum of 37 instructions. An additional 32
1192 * bytes (equiv. to 8 instructions) are needed for data storage. Round up
1193 * to 64 to make it a happy power-of-two.
1195 spe_init_func(f
, SPE_INST_SIZE
* 64);
1198 /* Pixel colors in framebuffer format in AoS layout.
1200 const int pixel
[4] = {
1201 spe_allocate_register(f
, 3),
1202 spe_allocate_register(f
, 4),
1203 spe_allocate_register(f
, 5),
1204 spe_allocate_register(f
, 6),
1207 /* Fragment colors stored as floats in SoA layout.
1209 const int frag
[4] = {
1210 spe_allocate_register(f
, 7),
1211 spe_allocate_register(f
, 8),
1212 spe_allocate_register(f
, 9),
1213 spe_allocate_register(f
, 10),
1216 const int mask
= spe_allocate_register(f
, 11);
1219 /* Short-circuit the noop and invert cases.
1221 if ((logic_op
== PIPE_LOGICOP_NOOP
) || (blend
->rt
[0].colormask
== 0)) {
1224 } else if (logic_op
== PIPE_LOGICOP_INVERT
) {
1225 spe_nor(f
, pixel
[0], pixel
[0], pixel
[0]);
1226 spe_nor(f
, pixel
[1], pixel
[1], pixel
[1]);
1227 spe_nor(f
, pixel
[2], pixel
[2], pixel
[2]);
1228 spe_nor(f
, pixel
[3], pixel
[3], pixel
[3]);
1234 const int tmp
[4] = {
1235 spe_allocate_available_register(f
),
1236 spe_allocate_available_register(f
),
1237 spe_allocate_available_register(f
),
1238 spe_allocate_available_register(f
),
1241 const int shuf_xpose_hi
= spe_allocate_available_register(f
);
1242 const int shuf_xpose_lo
= spe_allocate_available_register(f
);
1243 const int shuf_color
= spe_allocate_available_register(f
);
1246 /* Pointer to the begining of the function's private data area.
1248 uint32_t *const data
= ((uint32_t *) f
->store
) + (64 - 8);
1251 /* Convert fragment colors to framebuffer format in AoS layout.
1253 switch (surf
->format
) {
1254 case PIPE_FORMAT_B8G8R8A8_UNORM
:
1255 data
[0] = 0x00010203;
1256 data
[1] = 0x10111213;
1257 data
[2] = 0x04050607;
1258 data
[3] = 0x14151617;
1259 data
[4] = 0x0c000408;
1260 data
[5] = 0x80808080;
1261 data
[6] = 0x80808080;
1262 data
[7] = 0x80808080;
1264 case PIPE_FORMAT_A8R8G8B8_UNORM
:
1265 data
[0] = 0x03020100;
1266 data
[1] = 0x13121110;
1267 data
[2] = 0x07060504;
1268 data
[3] = 0x17161514;
1269 data
[4] = 0x0804000c;
1270 data
[5] = 0x80808080;
1271 data
[6] = 0x80808080;
1272 data
[7] = 0x80808080;
1275 fprintf(stderr
, "CELL: Bad pixel format in cell_generate_logic_op()");
1279 spe_ilh(f
, tmp
[0], 0x0808);
1280 spe_lqr(f
, shuf_xpose_hi
, PC_OFFSET(f
, data
+0));
1281 spe_lqr(f
, shuf_color
, PC_OFFSET(f
, data
+4));
1282 spe_a(f
, shuf_xpose_lo
, shuf_xpose_hi
, tmp
[0]);
1284 spe_shufb(f
, tmp
[0], frag
[0], frag
[2], shuf_xpose_hi
);
1285 spe_shufb(f
, tmp
[1], frag
[0], frag
[2], shuf_xpose_lo
);
1286 spe_shufb(f
, tmp
[2], frag
[1], frag
[3], shuf_xpose_hi
);
1287 spe_shufb(f
, tmp
[3], frag
[1], frag
[3], shuf_xpose_lo
);
1289 spe_shufb(f
, frag
[0], tmp
[0], tmp
[2], shuf_xpose_hi
);
1290 spe_shufb(f
, frag
[1], tmp
[0], tmp
[2], shuf_xpose_lo
);
1291 spe_shufb(f
, frag
[2], tmp
[1], tmp
[3], shuf_xpose_hi
);
1292 spe_shufb(f
, frag
[3], tmp
[1], tmp
[3], shuf_xpose_lo
);
1294 spe_cfltu(f
, frag
[0], frag
[0], 32);
1295 spe_cfltu(f
, frag
[1], frag
[1], 32);
1296 spe_cfltu(f
, frag
[2], frag
[2], 32);
1297 spe_cfltu(f
, frag
[3], frag
[3], 32);
1299 spe_shufb(f
, frag
[0], frag
[0], pixel
[0], shuf_color
);
1300 spe_shufb(f
, frag
[1], frag
[1], pixel
[1], shuf_color
);
1301 spe_shufb(f
, frag
[2], frag
[2], pixel
[2], shuf_color
);
1302 spe_shufb(f
, frag
[3], frag
[3], pixel
[3], shuf_color
);
1305 /* If logic op is enabled, perform the requested logical operation on the
1306 * converted fragment colors and the pixel colors.
1309 case PIPE_LOGICOP_CLEAR
:
1310 spe_il(f
, frag
[0], 0);
1311 spe_il(f
, frag
[1], 0);
1312 spe_il(f
, frag
[2], 0);
1313 spe_il(f
, frag
[3], 0);
1315 case PIPE_LOGICOP_NOR
:
1316 spe_nor(f
, frag
[0], frag
[0], pixel
[0]);
1317 spe_nor(f
, frag
[1], frag
[1], pixel
[1]);
1318 spe_nor(f
, frag
[2], frag
[2], pixel
[2]);
1319 spe_nor(f
, frag
[3], frag
[3], pixel
[3]);
1321 case PIPE_LOGICOP_AND_INVERTED
:
1322 spe_andc(f
, frag
[0], pixel
[0], frag
[0]);
1323 spe_andc(f
, frag
[1], pixel
[1], frag
[1]);
1324 spe_andc(f
, frag
[2], pixel
[2], frag
[2]);
1325 spe_andc(f
, frag
[3], pixel
[3], frag
[3]);
1327 case PIPE_LOGICOP_COPY_INVERTED
:
1328 spe_nor(f
, frag
[0], frag
[0], frag
[0]);
1329 spe_nor(f
, frag
[1], frag
[1], frag
[1]);
1330 spe_nor(f
, frag
[2], frag
[2], frag
[2]);
1331 spe_nor(f
, frag
[3], frag
[3], frag
[3]);
1333 case PIPE_LOGICOP_AND_REVERSE
:
1334 spe_andc(f
, frag
[0], frag
[0], pixel
[0]);
1335 spe_andc(f
, frag
[1], frag
[1], pixel
[1]);
1336 spe_andc(f
, frag
[2], frag
[2], pixel
[2]);
1337 spe_andc(f
, frag
[3], frag
[3], pixel
[3]);
1339 case PIPE_LOGICOP_XOR
:
1340 spe_xor(f
, frag
[0], frag
[0], pixel
[0]);
1341 spe_xor(f
, frag
[1], frag
[1], pixel
[1]);
1342 spe_xor(f
, frag
[2], frag
[2], pixel
[2]);
1343 spe_xor(f
, frag
[3], frag
[3], pixel
[3]);
1345 case PIPE_LOGICOP_NAND
:
1346 spe_nand(f
, frag
[0], frag
[0], pixel
[0]);
1347 spe_nand(f
, frag
[1], frag
[1], pixel
[1]);
1348 spe_nand(f
, frag
[2], frag
[2], pixel
[2]);
1349 spe_nand(f
, frag
[3], frag
[3], pixel
[3]);
1351 case PIPE_LOGICOP_AND
:
1352 spe_and(f
, frag
[0], frag
[0], pixel
[0]);
1353 spe_and(f
, frag
[1], frag
[1], pixel
[1]);
1354 spe_and(f
, frag
[2], frag
[2], pixel
[2]);
1355 spe_and(f
, frag
[3], frag
[3], pixel
[3]);
1357 case PIPE_LOGICOP_EQUIV
:
1358 spe_eqv(f
, frag
[0], frag
[0], pixel
[0]);
1359 spe_eqv(f
, frag
[1], frag
[1], pixel
[1]);
1360 spe_eqv(f
, frag
[2], frag
[2], pixel
[2]);
1361 spe_eqv(f
, frag
[3], frag
[3], pixel
[3]);
1363 case PIPE_LOGICOP_OR_INVERTED
:
1364 spe_orc(f
, frag
[0], pixel
[0], frag
[0]);
1365 spe_orc(f
, frag
[1], pixel
[1], frag
[1]);
1366 spe_orc(f
, frag
[2], pixel
[2], frag
[2]);
1367 spe_orc(f
, frag
[3], pixel
[3], frag
[3]);
1369 case PIPE_LOGICOP_COPY
:
1371 case PIPE_LOGICOP_OR_REVERSE
:
1372 spe_orc(f
, frag
[0], frag
[0], pixel
[0]);
1373 spe_orc(f
, frag
[1], frag
[1], pixel
[1]);
1374 spe_orc(f
, frag
[2], frag
[2], pixel
[2]);
1375 spe_orc(f
, frag
[3], frag
[3], pixel
[3]);
1377 case PIPE_LOGICOP_OR
:
1378 spe_or(f
, frag
[0], frag
[0], pixel
[0]);
1379 spe_or(f
, frag
[1], frag
[1], pixel
[1]);
1380 spe_or(f
, frag
[2], frag
[2], pixel
[2]);
1381 spe_or(f
, frag
[3], frag
[3], pixel
[3]);
1383 case PIPE_LOGICOP_SET
:
1384 spe_il(f
, frag
[0], ~0);
1385 spe_il(f
, frag
[1], ~0);
1386 spe_il(f
, frag
[2], ~0);
1387 spe_il(f
, frag
[3], ~0);
1390 /* These two cases are short-circuited above.
1392 case PIPE_LOGICOP_INVERT
:
1393 case PIPE_LOGICOP_NOOP
:
1399 /* Apply fragment mask.
1401 spe_ilh(f
, tmp
[0], 0x0000);
1402 spe_ilh(f
, tmp
[1], 0x0404);
1403 spe_ilh(f
, tmp
[2], 0x0808);
1404 spe_ilh(f
, tmp
[3], 0x0c0c);
1406 spe_shufb(f
, tmp
[0], mask
, mask
, tmp
[0]);
1407 spe_shufb(f
, tmp
[1], mask
, mask
, tmp
[1]);
1408 spe_shufb(f
, tmp
[2], mask
, mask
, tmp
[2]);
1409 spe_shufb(f
, tmp
[3], mask
, mask
, tmp
[3]);
1411 spe_selb(f
, pixel
[0], pixel
[0], frag
[0], tmp
[0]);
1412 spe_selb(f
, pixel
[1], pixel
[1], frag
[1], tmp
[1]);
1413 spe_selb(f
, pixel
[2], pixel
[2], frag
[2], tmp
[2]);
1414 spe_selb(f
, pixel
[3], pixel
[3], frag
[3], tmp
[3]);
1420 const uint32_t *p
= f
->store
;
1423 printf("# %u instructions\n", f
->csr
- f
->store
);
1425 printf("\t.text\n");
1426 for (i
= 0; i
< 64; i
++) {
1427 printf("\t.long\t0x%04x\n", p
[i
]);