winsys/sw: Add a software winsys layered on a pipe
[mesa/mesa-lb.git] / src / gallium / drivers / cell / ppu / cell_state_per_fragment.c
blobdc33e7ccc2cf94cae92e94061edf3780c06492ca
1 /*
2 * (C) Copyright IBM Corporation 2008
3 * All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 /**
26 * \file
27 * Generate code to perform all per-fragment operations.
29 * Code generated by these functions perform both alpha, depth, and stencil
30 * testing as well as alpha blending.
32 * \note
33 * Occlusion query is not supported, but this is the right place to add that
34 * support.
36 * \author Ian Romanick <idr@us.ibm.com>
39 #include "pipe/p_defines.h"
40 #include "pipe/p_state.h"
42 #include "cell_context.h"
44 #include "rtasm/rtasm_ppc_spe.h"
47 /**
48 * Generate code to perform alpha testing.
50 * The code generated by this function uses the register specificed by
51 * \c mask as both an input and an output.
53 * \param dsa Current alpha-test state
54 * \param f Function to which code should be appended
55 * \param mask Index of register containing active fragment mask
56 * \param alphas Index of register containing per-fragment alpha values
58 * \note Emits a maximum of 6 instructions.
60 static void
61 emit_alpha_test(struct pipe_depth_stencil_alpha_state *dsa,
62 struct spe_function *f, int mask, int alphas)
64 /* If the alpha function is either NEVER or ALWAYS, there is no need to
65 * load the reference value into a register. ALWAYS is a fairly common
66 * case, and this optimization saves 2 instructions.
68 if (dsa->alpha.enabled
69 && (dsa->alpha.func != PIPE_FUNC_NEVER)
70 && (dsa->alpha.func != PIPE_FUNC_ALWAYS)) {
71 int ref = spe_allocate_available_register(f);
72 int tmp_a = spe_allocate_available_register(f);
73 int tmp_b = spe_allocate_available_register(f);
74 union {
75 float f;
76 unsigned u;
77 } ref_val;
78 boolean complement = FALSE;
80 ref_val.f = dsa->alpha.ref;
82 spe_il(f, ref, ref_val.u & 0x0000ffff);
83 spe_ilh(f, ref, ref_val.u >> 16);
85 switch (dsa->alpha.func) {
86 case PIPE_FUNC_NOTEQUAL:
87 complement = TRUE;
88 /* FALLTHROUGH */
90 case PIPE_FUNC_EQUAL:
91 spe_fceq(f, tmp_a, ref, alphas);
92 break;
94 case PIPE_FUNC_LEQUAL:
95 complement = TRUE;
96 /* FALLTHROUGH */
98 case PIPE_FUNC_GREATER:
99 spe_fcgt(f, tmp_a, ref, alphas);
100 break;
102 case PIPE_FUNC_LESS:
103 complement = TRUE;
104 /* FALLTHROUGH */
106 case PIPE_FUNC_GEQUAL:
107 spe_fcgt(f, tmp_a, ref, alphas);
108 spe_fceq(f, tmp_b, ref, alphas);
109 spe_or(f, tmp_a, tmp_b, tmp_a);
110 break;
112 case PIPE_FUNC_ALWAYS:
113 case PIPE_FUNC_NEVER:
114 default:
115 assert(0);
116 break;
119 if (complement) {
120 spe_andc(f, mask, mask, tmp_a);
121 } else {
122 spe_and(f, mask, mask, tmp_a);
125 spe_release_register(f, ref);
126 spe_release_register(f, tmp_a);
127 spe_release_register(f, tmp_b);
128 } else if (dsa->alpha.enabled && (dsa->alpha.func == PIPE_FUNC_NEVER)) {
129 spe_il(f, mask, 0);
135 * Generate code to perform Z testing. Four Z values are tested at once.
136 * \param dsa Current depth-test state
137 * \param f Function to which code should be appended
138 * \param mask Index of register to contain depth-pass mask
139 * \param stored Index of register containing values from depth buffer
140 * \param calculated Index of register containing per-fragment depth values
142 * \return
143 * If the calculated depth comparison mask is the actual mask, \c FALSE is
144 * returned. If the calculated depth comparison mask is the compliment of
145 * the actual mask, \c TRUE is returned.
147 * \note Emits a maximum of 3 instructions.
149 static boolean
150 emit_depth_test(struct pipe_depth_stencil_alpha_state *dsa,
151 struct spe_function *f, int mask, int stored, int calculated)
153 unsigned func = (dsa->depth.enabled)
154 ? dsa->depth.func : PIPE_FUNC_ALWAYS;
155 int tmp = spe_allocate_available_register(f);
156 boolean compliment = FALSE;
158 switch (func) {
159 case PIPE_FUNC_NEVER:
160 spe_il(f, mask, 0);
161 break;
163 case PIPE_FUNC_NOTEQUAL:
164 compliment = TRUE;
165 /* FALLTHROUGH */
166 case PIPE_FUNC_EQUAL:
167 spe_ceq(f, mask, calculated, stored);
168 break;
170 case PIPE_FUNC_LEQUAL:
171 compliment = TRUE;
172 /* FALLTHROUGH */
173 case PIPE_FUNC_GREATER:
174 spe_clgt(f, mask, calculated, stored);
175 break;
177 case PIPE_FUNC_LESS:
178 compliment = TRUE;
179 /* FALLTHROUGH */
180 case PIPE_FUNC_GEQUAL:
181 spe_clgt(f, mask, calculated, stored);
182 spe_ceq(f, tmp, calculated, stored);
183 spe_or(f, mask, mask, tmp);
184 break;
186 case PIPE_FUNC_ALWAYS:
187 spe_il(f, mask, ~0);
188 break;
190 default:
191 assert(0);
192 break;
195 spe_release_register(f, tmp);
196 return compliment;
201 * Generate code to apply the stencil operation (after testing).
202 * \note Emits a maximum of 5 instructions.
204 * \warning
205 * Since \c out and \c in might be the same register, this routine cannot
206 * generate code that uses \c out as a temporary.
208 static void
209 emit_stencil_op(struct spe_function *f,
210 int out, int in, int mask, unsigned op, unsigned ref)
212 const int clamp = spe_allocate_available_register(f);
213 const int clamp_mask = spe_allocate_available_register(f);
214 const int result = spe_allocate_available_register(f);
216 switch(op) {
217 case PIPE_STENCIL_OP_KEEP:
218 assert(0);
219 case PIPE_STENCIL_OP_ZERO:
220 spe_il(f, result, 0);
221 break;
222 case PIPE_STENCIL_OP_REPLACE:
223 spe_il(f, result, ref);
224 break;
225 case PIPE_STENCIL_OP_INCR:
226 /* clamp = [0xff, 0xff, 0xff, 0xff] */
227 spe_il(f, clamp, 0x0ff);
228 /* result[i] = in[i] + 1 */
229 spe_ai(f, result, in, 1);
230 /* clamp_mask[i] = (result[i] > 0xff) */
231 spe_clgti(f, clamp_mask, result, 0x0ff);
232 /* result[i] = clamp_mask[i] ? clamp[i] : result[i] */
233 spe_selb(f, result, result, clamp, clamp_mask);
234 break;
235 case PIPE_STENCIL_OP_DECR:
236 spe_il(f, clamp, 0);
237 spe_ai(f, result, in, -1);
239 /* If "(s-1) < 0" in signed arithemtic, then "(s-1) > MAX" in unsigned
240 * arithmetic.
242 spe_clgti(f, clamp_mask, result, 0x0ff);
243 spe_selb(f, result, result, clamp, clamp_mask);
244 break;
245 case PIPE_STENCIL_OP_INCR_WRAP:
246 spe_ai(f, result, in, 1);
247 break;
248 case PIPE_STENCIL_OP_DECR_WRAP:
249 spe_ai(f, result, in, -1);
250 break;
251 case PIPE_STENCIL_OP_INVERT:
252 spe_nor(f, result, in, in);
253 break;
254 default:
255 assert(0);
258 spe_selb(f, out, in, result, mask);
260 spe_release_register(f, result);
261 spe_release_register(f, clamp_mask);
262 spe_release_register(f, clamp);
267 * Generate code to do stencil test. Four pixels are tested at once.
268 * \param dsa Depth / stencil test state
269 * \param face 0 for front face, 1 for back face
270 * \param f Function to append instructions to
271 * \param mask Register containing mask of fragments passing the
272 * alpha test
273 * \param depth_mask Register containing mask of fragments passing the
274 * depth test
275 * \param depth_compliment Is \c depth_mask the compliment of the actual mask?
276 * \param stencil Register containing values from stencil buffer
277 * \param depth_pass Register to store mask of fragments passing stencil test
278 * and depth test
280 * \note
281 * Emits a maximum of 10 + (3 * 5) = 25 instructions.
283 static int
284 emit_stencil_test(struct pipe_depth_stencil_alpha_state *dsa,
285 struct pipe_stencil_ref *sr,
286 unsigned face,
287 struct spe_function *f,
288 int mask,
289 int depth_mask,
290 boolean depth_complement,
291 int stencil,
292 int depth_pass)
294 int stencil_fail = spe_allocate_available_register(f);
295 int depth_fail = spe_allocate_available_register(f);
296 int stencil_mask = spe_allocate_available_register(f);
297 int stencil_pass = spe_allocate_available_register(f);
298 int face_stencil = spe_allocate_available_register(f);
299 int stencil_src = stencil;
300 const unsigned ref = (sr->ref_value[face]
301 & dsa->stencil[face].valuemask);
302 boolean complement = FALSE;
303 int stored;
304 int tmp = spe_allocate_available_register(f);
307 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
308 && (dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
309 && (dsa->stencil[face].valuemask != 0x0ff)) {
310 stored = spe_allocate_available_register(f);
311 spe_andi(f, stored, stencil, dsa->stencil[face].valuemask);
312 } else {
313 stored = stencil;
317 switch (dsa->stencil[face].func) {
318 case PIPE_FUNC_NEVER:
319 spe_il(f, stencil_mask, 0); /* stencil_mask[0..3] = [0,0,0,0] */
320 break;
322 case PIPE_FUNC_NOTEQUAL:
323 complement = TRUE;
324 /* FALLTHROUGH */
325 case PIPE_FUNC_EQUAL:
326 /* stencil_mask[i] = (stored[i] == ref) */
327 spe_ceqi(f, stencil_mask, stored, ref);
328 break;
330 case PIPE_FUNC_LEQUAL:
331 complement = TRUE;
332 /* FALLTHROUGH */
333 case PIPE_FUNC_GREATER:
334 complement = TRUE;
335 /* stencil_mask[i] = (stored[i] > ref) */
336 spe_clgti(f, stencil_mask, stored, ref);
337 break;
339 case PIPE_FUNC_LESS:
340 complement = TRUE;
341 /* FALLTHROUGH */
342 case PIPE_FUNC_GEQUAL:
343 /* stencil_mask[i] = (stored[i] > ref) */
344 spe_clgti(f, stencil_mask, stored, ref);
345 /* tmp[i] = (stored[i] == ref) */
346 spe_ceqi(f, tmp, stored, ref);
347 /* stencil_mask[i] = stencil_mask[i] | tmp[i] */
348 spe_or(f, stencil_mask, stencil_mask, tmp);
349 break;
351 case PIPE_FUNC_ALWAYS:
352 /* See comment below. */
353 break;
355 default:
356 assert(0);
357 break;
360 if (stored != stencil) {
361 spe_release_register(f, stored);
363 spe_release_register(f, tmp);
366 /* ALWAYS is a very common stencil-test, so some effort is applied to
367 * optimize that case. The stencil-pass mask is the same as the input
368 * fragment mask. This makes the stencil-test (above) a no-op, and the
369 * input fragment mask can be "renamed" the stencil-pass mask.
371 if (dsa->stencil[face].func == PIPE_FUNC_ALWAYS) {
372 spe_release_register(f, stencil_pass);
373 stencil_pass = mask;
374 } else {
375 if (complement) {
376 spe_andc(f, stencil_pass, mask, stencil_mask);
377 } else {
378 spe_and(f, stencil_pass, mask, stencil_mask);
382 if (depth_complement) {
383 spe_andc(f, depth_pass, stencil_pass, depth_mask);
384 } else {
385 spe_and(f, depth_pass, stencil_pass, depth_mask);
389 /* Conditionally emit code to update the stencil value under various
390 * condititons. Note that there is no need to generate code under the
391 * following circumstances:
393 * - Stencil write mask is zero.
394 * - For stencil-fail if the stencil test is ALWAYS
395 * - For depth-fail if the stencil test is NEVER
396 * - For depth-pass if the stencil test is NEVER
397 * - Any of the 3 conditions if the operation is KEEP
399 if (dsa->stencil[face].writemask != 0) {
400 if ((dsa->stencil[face].func != PIPE_FUNC_ALWAYS)
401 && (dsa->stencil[face].fail_op != PIPE_STENCIL_OP_KEEP)) {
402 if (complement) {
403 spe_and(f, stencil_fail, mask, stencil_mask);
404 } else {
405 spe_andc(f, stencil_fail, mask, stencil_mask);
408 emit_stencil_op(f, face_stencil, stencil_src, stencil_fail,
409 dsa->stencil[face].fail_op,
410 sr->ref_value[face]);
412 stencil_src = face_stencil;
415 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
416 && (dsa->stencil[face].zfail_op != PIPE_STENCIL_OP_KEEP)) {
417 if (depth_complement) {
418 spe_and(f, depth_fail, stencil_pass, depth_mask);
419 } else {
420 spe_andc(f, depth_fail, stencil_pass, depth_mask);
423 emit_stencil_op(f, face_stencil, stencil_src, depth_fail,
424 dsa->stencil[face].zfail_op,
425 sr->ref_value[face]);
426 stencil_src = face_stencil;
429 if ((dsa->stencil[face].func != PIPE_FUNC_NEVER)
430 && (dsa->stencil[face].zpass_op != PIPE_STENCIL_OP_KEEP)) {
431 emit_stencil_op(f, face_stencil, stencil_src, depth_pass,
432 dsa->stencil[face].zpass_op,
433 sr->ref_value[face]);
434 stencil_src = face_stencil;
438 spe_release_register(f, stencil_fail);
439 spe_release_register(f, depth_fail);
440 spe_release_register(f, stencil_mask);
441 if (stencil_pass != mask) {
442 spe_release_register(f, stencil_pass);
445 /* If all of the stencil operations were KEEP or the stencil write mask was
446 * zero, "stencil_src" will still be set to "stencil". In this case
447 * release the "face_stencil" register. Otherwise apply the stencil write
448 * mask to select bits from the calculated stencil value and the previous
449 * stencil value.
451 if (stencil_src == stencil) {
452 spe_release_register(f, face_stencil);
453 } else if (dsa->stencil[face].writemask != 0x0ff) {
454 int tmp = spe_allocate_available_register(f);
456 spe_il(f, tmp, dsa->stencil[face].writemask);
457 spe_selb(f, stencil_src, stencil, stencil_src, tmp);
459 spe_release_register(f, tmp);
462 return stencil_src;
466 void
467 cell_generate_depth_stencil_test(struct cell_depth_stencil_alpha_state *cdsa,
468 struct pipe_stencil_ref *sr)
470 struct pipe_depth_stencil_alpha_state *const dsa = &cdsa->base;
471 struct spe_function *const f = &cdsa->code;
473 /* This code generates a maximum of 6 (alpha test) + 3 (depth test)
474 * + 25 (front stencil) + 25 (back stencil) + 4 = 63 instructions. Round
475 * up to 64 to make it a happy power-of-two.
477 spe_init_func(f, SPE_INST_SIZE * 64);
480 /* Allocate registers for the function's input parameters. Cleverly (and
481 * clever code is usually dangerous, but I couldn't resist) the generated
482 * function returns a structure. Returned structures start with register
483 * 3, and the structure fields are ordered to match up exactly with the
484 * input parameters.
486 int mask = spe_allocate_register(f, 3);
487 int depth = spe_allocate_register(f, 4);
488 int stencil = spe_allocate_register(f, 5);
489 int zvals = spe_allocate_register(f, 6);
490 int frag_a = spe_allocate_register(f, 7);
491 int facing = spe_allocate_register(f, 8);
493 int depth_mask = spe_allocate_available_register(f);
495 boolean depth_complement;
498 emit_alpha_test(dsa, f, mask, frag_a);
500 depth_complement = emit_depth_test(dsa, f, depth_mask, depth, zvals);
502 if (dsa->stencil[0].enabled) {
503 const int front_depth_pass = spe_allocate_available_register(f);
504 int front_stencil = emit_stencil_test(dsa, sr, 0, f, mask,
505 depth_mask, depth_complement,
506 stencil, front_depth_pass);
508 if (dsa->stencil[1].enabled) {
509 const int back_depth_pass = spe_allocate_available_register(f);
510 int back_stencil = emit_stencil_test(dsa, sr, 1, f, mask,
511 depth_mask, depth_complement,
512 stencil, back_depth_pass);
514 /* If the front facing stencil value and the back facing stencil
515 * value are stored in the same register, there is no need to select
516 * a value based on the facing. This can happen if the stencil value
517 * was not modified due to the write masks being zero, the stencil
518 * operations being KEEP, etc.
520 if (front_stencil != back_stencil) {
521 spe_selb(f, stencil, back_stencil, front_stencil, facing);
524 if (back_stencil != stencil) {
525 spe_release_register(f, back_stencil);
528 if (front_stencil != stencil) {
529 spe_release_register(f, front_stencil);
532 spe_selb(f, mask, back_depth_pass, front_depth_pass, facing);
534 spe_release_register(f, back_depth_pass);
535 } else {
536 if (front_stencil != stencil) {
537 spe_or(f, stencil, front_stencil, front_stencil);
538 spe_release_register(f, front_stencil);
540 spe_or(f, mask, front_depth_pass, front_depth_pass);
543 spe_release_register(f, front_depth_pass);
544 } else if (dsa->depth.enabled) {
545 if (depth_complement) {
546 spe_andc(f, mask, mask, depth_mask);
547 } else {
548 spe_and(f, mask, mask, depth_mask);
552 if (dsa->depth.writemask) {
553 spe_selb(f, depth, depth, zvals, mask);
556 spe_bi(f, 0, 0, 0); /* return from function call */
559 #if 0
561 const uint32_t *p = f->store;
562 unsigned i;
564 printf("# alpha (%sabled)\n",
565 (dsa->alpha.enabled) ? "en" : "dis");
566 printf("# func: %u\n", dsa->alpha.func);
567 printf("# ref: %.2f\n", dsa->alpha.ref);
569 printf("# depth (%sabled)\n",
570 (dsa->depth.enabled) ? "en" : "dis");
571 printf("# func: %u\n", dsa->depth.func);
573 for (i = 0; i < 2; i++) {
574 printf("# %s stencil (%sabled)\n",
575 (i == 0) ? "front" : "back",
576 (dsa->stencil[i].enabled) ? "en" : "dis");
578 printf("# func: %u\n", dsa->stencil[i].func);
579 printf("# op (sf, zf, zp): %u %u %u\n",
580 dsa->stencil[i].fail_op,
581 dsa->stencil[i].zfail_op,
582 dsa->stencil[i].zpass_op);
583 printf("# ref value / value mask / write mask: %02x %02x %02x\n",
584 sr->ref_value[i],
585 dsa->stencil[i].valuemask,
586 dsa->stencil[i].writemask);
589 printf("\t.text\n");
590 for (/* empty */; p < f->csr; p++) {
591 printf("\t.long\t0x%04x\n", *p);
593 fflush(stdout);
595 #endif
600 * \note Emits a maximum of 3 instructions
602 static int
603 emit_alpha_factor_calculation(struct spe_function *f,
604 unsigned factor,
605 int src_alpha, int dst_alpha, int const_alpha)
607 int factor_reg;
608 int tmp;
611 switch (factor) {
612 case PIPE_BLENDFACTOR_ONE:
613 factor_reg = -1;
614 break;
616 case PIPE_BLENDFACTOR_SRC_ALPHA:
617 factor_reg = spe_allocate_available_register(f);
619 spe_or(f, factor_reg, src_alpha, src_alpha);
620 break;
622 case PIPE_BLENDFACTOR_DST_ALPHA:
623 factor_reg = dst_alpha;
624 break;
626 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
627 factor_reg = -1;
628 break;
630 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
631 factor_reg = spe_allocate_available_register(f);
633 tmp = spe_allocate_available_register(f);
634 spe_il(f, tmp, 1);
635 spe_cuflt(f, tmp, tmp, 0);
636 spe_fs(f, factor_reg, tmp, const_alpha);
637 spe_release_register(f, tmp);
638 break;
640 case PIPE_BLENDFACTOR_CONST_ALPHA:
641 factor_reg = const_alpha;
642 break;
644 case PIPE_BLENDFACTOR_ZERO:
645 factor_reg = -1;
646 break;
648 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
649 tmp = spe_allocate_available_register(f);
650 factor_reg = spe_allocate_available_register(f);
652 spe_il(f, tmp, 1);
653 spe_cuflt(f, tmp, tmp, 0);
654 spe_fs(f, factor_reg, tmp, src_alpha);
656 spe_release_register(f, tmp);
657 break;
659 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
660 tmp = spe_allocate_available_register(f);
661 factor_reg = spe_allocate_available_register(f);
663 spe_il(f, tmp, 1);
664 spe_cuflt(f, tmp, tmp, 0);
665 spe_fs(f, factor_reg, tmp, dst_alpha);
667 spe_release_register(f, tmp);
668 break;
670 case PIPE_BLENDFACTOR_SRC1_ALPHA:
671 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
672 default:
673 assert(0);
674 factor_reg = -1;
675 break;
678 return factor_reg;
683 * \note Emits a maximum of 6 instructions
685 static void
686 emit_color_factor_calculation(struct spe_function *f,
687 unsigned sF, unsigned mask,
688 const int *src,
689 const int *dst,
690 const int *const_color,
691 int *factor)
693 int tmp;
694 unsigned i;
697 factor[0] = -1;
698 factor[1] = -1;
699 factor[2] = -1;
700 factor[3] = -1;
702 switch (sF) {
703 case PIPE_BLENDFACTOR_ONE:
704 break;
706 case PIPE_BLENDFACTOR_SRC_COLOR:
707 for (i = 0; i < 3; ++i) {
708 if ((mask & (1U << i)) != 0) {
709 factor[i] = spe_allocate_available_register(f);
710 spe_or(f, factor[i], src[i], src[i]);
713 break;
715 case PIPE_BLENDFACTOR_SRC_ALPHA:
716 factor[0] = spe_allocate_available_register(f);
717 factor[1] = factor[0];
718 factor[2] = factor[0];
720 spe_or(f, factor[0], src[3], src[3]);
721 break;
723 case PIPE_BLENDFACTOR_DST_ALPHA:
724 factor[0] = dst[3];
725 factor[1] = dst[3];
726 factor[2] = dst[3];
727 break;
729 case PIPE_BLENDFACTOR_DST_COLOR:
730 factor[0] = dst[0];
731 factor[1] = dst[1];
732 factor[2] = dst[2];
733 break;
735 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
736 tmp = spe_allocate_available_register(f);
737 factor[0] = spe_allocate_available_register(f);
738 factor[1] = factor[0];
739 factor[2] = factor[0];
741 /* Alpha saturate means min(As, 1-Ad).
743 spe_il(f, tmp, 1);
744 spe_cuflt(f, tmp, tmp, 0);
745 spe_fs(f, tmp, tmp, dst[3]);
746 spe_fcgt(f, factor[0], tmp, src[3]);
747 spe_selb(f, factor[0], src[3], tmp, factor[0]);
749 spe_release_register(f, tmp);
750 break;
752 case PIPE_BLENDFACTOR_INV_CONST_COLOR:
753 tmp = spe_allocate_available_register(f);
754 spe_il(f, tmp, 1);
755 spe_cuflt(f, tmp, tmp, 0);
757 for (i = 0; i < 3; i++) {
758 factor[i] = spe_allocate_available_register(f);
760 spe_fs(f, factor[i], tmp, const_color[i]);
762 spe_release_register(f, tmp);
763 break;
765 case PIPE_BLENDFACTOR_CONST_COLOR:
766 for (i = 0; i < 3; i++) {
767 factor[i] = const_color[i];
769 break;
771 case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
772 factor[0] = spe_allocate_available_register(f);
773 factor[1] = factor[0];
774 factor[2] = factor[0];
776 tmp = spe_allocate_available_register(f);
777 spe_il(f, tmp, 1);
778 spe_cuflt(f, tmp, tmp, 0);
779 spe_fs(f, factor[0], tmp, const_color[3]);
780 spe_release_register(f, tmp);
781 break;
783 case PIPE_BLENDFACTOR_CONST_ALPHA:
784 factor[0] = const_color[3];
785 factor[1] = factor[0];
786 factor[2] = factor[0];
787 break;
789 case PIPE_BLENDFACTOR_ZERO:
790 break;
792 case PIPE_BLENDFACTOR_INV_SRC_COLOR:
793 tmp = spe_allocate_available_register(f);
795 spe_il(f, tmp, 1);
796 spe_cuflt(f, tmp, tmp, 0);
798 for (i = 0; i < 3; ++i) {
799 if ((mask & (1U << i)) != 0) {
800 factor[i] = spe_allocate_available_register(f);
801 spe_fs(f, factor[i], tmp, src[i]);
805 spe_release_register(f, tmp);
806 break;
808 case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
809 tmp = spe_allocate_available_register(f);
810 factor[0] = spe_allocate_available_register(f);
811 factor[1] = factor[0];
812 factor[2] = factor[0];
814 spe_il(f, tmp, 1);
815 spe_cuflt(f, tmp, tmp, 0);
816 spe_fs(f, factor[0], tmp, src[3]);
818 spe_release_register(f, tmp);
819 break;
821 case PIPE_BLENDFACTOR_INV_DST_ALPHA:
822 tmp = spe_allocate_available_register(f);
823 factor[0] = spe_allocate_available_register(f);
824 factor[1] = factor[0];
825 factor[2] = factor[0];
827 spe_il(f, tmp, 1);
828 spe_cuflt(f, tmp, tmp, 0);
829 spe_fs(f, factor[0], tmp, dst[3]);
831 spe_release_register(f, tmp);
832 break;
834 case PIPE_BLENDFACTOR_INV_DST_COLOR:
835 tmp = spe_allocate_available_register(f);
837 spe_il(f, tmp, 1);
838 spe_cuflt(f, tmp, tmp, 0);
840 for (i = 0; i < 3; ++i) {
841 if ((mask & (1U << i)) != 0) {
842 factor[i] = spe_allocate_available_register(f);
843 spe_fs(f, factor[i], tmp, dst[i]);
847 spe_release_register(f, tmp);
848 break;
850 case PIPE_BLENDFACTOR_SRC1_COLOR:
851 case PIPE_BLENDFACTOR_SRC1_ALPHA:
852 case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
853 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
854 default:
855 assert(0);
860 static void
861 emit_blend_calculation(struct spe_function *f,
862 unsigned func, unsigned sF, unsigned dF,
863 int src, int src_factor, int dst, int dst_factor)
865 int tmp = spe_allocate_available_register(f);
867 switch (func) {
868 case PIPE_BLEND_ADD:
869 if (sF == PIPE_BLENDFACTOR_ONE) {
870 if (dF == PIPE_BLENDFACTOR_ZERO) {
871 /* Do nothing. */
872 } else if (dF == PIPE_BLENDFACTOR_ONE) {
873 spe_fa(f, src, src, dst);
875 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
876 if (dF == PIPE_BLENDFACTOR_ZERO) {
877 spe_il(f, src, 0);
878 } else if (dF == PIPE_BLENDFACTOR_ONE) {
879 spe_or(f, src, dst, dst);
880 } else {
881 spe_fm(f, src, dst, dst_factor);
883 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
884 spe_fm(f, src, src, src_factor);
885 } else {
886 spe_fm(f, tmp, dst, dst_factor);
887 spe_fma(f, src, src, src_factor, tmp);
889 break;
891 case PIPE_BLEND_SUBTRACT:
892 if (sF == PIPE_BLENDFACTOR_ONE) {
893 if (dF == PIPE_BLENDFACTOR_ZERO) {
894 /* Do nothing. */
895 } else if (dF == PIPE_BLENDFACTOR_ONE) {
896 spe_fs(f, src, src, dst);
898 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
899 if (dF == PIPE_BLENDFACTOR_ZERO) {
900 spe_il(f, src, 0);
901 } else if (dF == PIPE_BLENDFACTOR_ONE) {
902 spe_il(f, tmp, 0);
903 spe_fs(f, src, tmp, dst);
904 } else {
905 spe_fm(f, src, dst, dst_factor);
907 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
908 spe_fm(f, src, src, src_factor);
909 } else {
910 spe_fm(f, tmp, dst, dst_factor);
911 spe_fms(f, src, src, src_factor, tmp);
913 break;
915 case PIPE_BLEND_REVERSE_SUBTRACT:
916 if (sF == PIPE_BLENDFACTOR_ONE) {
917 if (dF == PIPE_BLENDFACTOR_ZERO) {
918 spe_il(f, tmp, 0);
919 spe_fs(f, src, tmp, src);
920 } else if (dF == PIPE_BLENDFACTOR_ONE) {
921 spe_fs(f, src, dst, src);
923 } else if (sF == PIPE_BLENDFACTOR_ZERO) {
924 if (dF == PIPE_BLENDFACTOR_ZERO) {
925 spe_il(f, src, 0);
926 } else if (dF == PIPE_BLENDFACTOR_ONE) {
927 spe_or(f, src, dst, dst);
928 } else {
929 spe_fm(f, src, dst, dst_factor);
931 } else if (dF == PIPE_BLENDFACTOR_ZERO) {
932 spe_fm(f, src, src, src_factor);
933 } else {
934 spe_fm(f, tmp, src, src_factor);
935 spe_fms(f, src, src, dst_factor, tmp);
937 break;
939 case PIPE_BLEND_MIN:
940 spe_cgt(f, tmp, src, dst);
941 spe_selb(f, src, src, dst, tmp);
942 break;
944 case PIPE_BLEND_MAX:
945 spe_cgt(f, tmp, src, dst);
946 spe_selb(f, src, dst, src, tmp);
947 break;
949 default:
950 assert(0);
953 spe_release_register(f, tmp);
958 * Generate code to perform alpha blending on the SPE
960 void
961 cell_generate_alpha_blend(struct cell_blend_state *cb)
963 struct pipe_blend_state *const b = &cb->base;
964 struct spe_function *const f = &cb->code;
966 /* This code generates a maximum of 3 (source alpha factor)
967 * + 3 (destination alpha factor) + (3 * 6) (source color factor)
968 * + (3 * 6) (destination color factor) + (4 * 2) (blend equation)
969 * + 4 (fragment mask) + 1 (return) = 55 instlructions. Round up to 64 to
970 * make it a happy power-of-two.
972 spe_init_func(f, SPE_INST_SIZE * 64);
975 const int frag[4] = {
976 spe_allocate_register(f, 3),
977 spe_allocate_register(f, 4),
978 spe_allocate_register(f, 5),
979 spe_allocate_register(f, 6),
981 const int pixel[4] = {
982 spe_allocate_register(f, 7),
983 spe_allocate_register(f, 8),
984 spe_allocate_register(f, 9),
985 spe_allocate_register(f, 10),
987 const int const_color[4] = {
988 spe_allocate_register(f, 11),
989 spe_allocate_register(f, 12),
990 spe_allocate_register(f, 13),
991 spe_allocate_register(f, 14),
993 unsigned func[4];
994 unsigned sF[4];
995 unsigned dF[4];
996 unsigned i;
997 int src_factor[4];
998 int dst_factor[4];
1001 /* Does the selected blend mode make use of the source / destination
1002 * color (RGB) blend factors?
1004 boolean need_color_factor = b->rt[0].blend_enable
1005 && (b->rt[0].rgb_func != PIPE_BLEND_MIN)
1006 && (b->rt[0].rgb_func != PIPE_BLEND_MAX);
1008 /* Does the selected blend mode make use of the source / destination
1009 * alpha blend factors?
1011 boolean need_alpha_factor = b->rt[0].blend_enable
1012 && (b->rt[0].alpha_func != PIPE_BLEND_MIN)
1013 && (b->rt[0].alpha_func != PIPE_BLEND_MAX);
1016 if (b->rt[0].blend_enable) {
1017 sF[0] = b->rt[0].rgb_src_factor;
1018 sF[1] = sF[0];
1019 sF[2] = sF[0];
1020 switch (b->rt[0].alpha_src_factor & 0x0f) {
1021 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
1022 sF[3] = PIPE_BLENDFACTOR_ONE;
1023 break;
1024 case PIPE_BLENDFACTOR_SRC_COLOR:
1025 case PIPE_BLENDFACTOR_DST_COLOR:
1026 case PIPE_BLENDFACTOR_CONST_COLOR:
1027 case PIPE_BLENDFACTOR_SRC1_COLOR:
1028 sF[3] = b->rt[0].alpha_src_factor + 1;
1029 break;
1030 default:
1031 sF[3] = b->rt[0].alpha_src_factor;
1034 dF[0] = b->rt[0].rgb_dst_factor;
1035 dF[1] = dF[0];
1036 dF[2] = dF[0];
1037 switch (b->rt[0].alpha_dst_factor & 0x0f) {
1038 case PIPE_BLENDFACTOR_SRC_COLOR:
1039 case PIPE_BLENDFACTOR_DST_COLOR:
1040 case PIPE_BLENDFACTOR_CONST_COLOR:
1041 case PIPE_BLENDFACTOR_SRC1_COLOR:
1042 dF[3] = b->rt[0].alpha_dst_factor + 1;
1043 break;
1044 default:
1045 dF[3] = b->rt[0].alpha_dst_factor;
1048 func[0] = b->rt[0].rgb_func;
1049 func[1] = func[0];
1050 func[2] = func[0];
1051 func[3] = b->rt[0].alpha_func;
1052 } else {
1053 sF[0] = PIPE_BLENDFACTOR_ONE;
1054 sF[1] = PIPE_BLENDFACTOR_ONE;
1055 sF[2] = PIPE_BLENDFACTOR_ONE;
1056 sF[3] = PIPE_BLENDFACTOR_ONE;
1057 dF[0] = PIPE_BLENDFACTOR_ZERO;
1058 dF[1] = PIPE_BLENDFACTOR_ZERO;
1059 dF[2] = PIPE_BLENDFACTOR_ZERO;
1060 dF[3] = PIPE_BLENDFACTOR_ZERO;
1062 func[0] = PIPE_BLEND_ADD;
1063 func[1] = PIPE_BLEND_ADD;
1064 func[2] = PIPE_BLEND_ADD;
1065 func[3] = PIPE_BLEND_ADD;
1069 /* If alpha writing is enabled and the alpha blend mode requires use of
1070 * the alpha factor, calculate the alpha factor.
1072 if (((b->rt[0].colormask & 8) != 0) && need_alpha_factor) {
1073 src_factor[3] = emit_alpha_factor_calculation(f, sF[3], const_color[3],
1074 frag[3], pixel[3]);
1076 /* If the alpha destination blend factor is the same as the alpha source
1077 * blend factor, re-use the previously calculated value.
1079 dst_factor[3] = (dF[3] == sF[3])
1080 ? src_factor[3]
1081 : emit_alpha_factor_calculation(f, dF[3], const_color[3],
1082 frag[3], pixel[3]);
1086 if (sF[0] == sF[3]) {
1087 src_factor[0] = src_factor[3];
1088 src_factor[1] = src_factor[3];
1089 src_factor[2] = src_factor[3];
1090 } else if (sF[0] == dF[3]) {
1091 src_factor[0] = dst_factor[3];
1092 src_factor[1] = dst_factor[3];
1093 src_factor[2] = dst_factor[3];
1094 } else if (need_color_factor) {
1095 emit_color_factor_calculation(f,
1096 b->rt[0].rgb_src_factor,
1097 b->rt[0].colormask,
1098 frag, pixel, const_color, src_factor);
1102 if (dF[0] == sF[3]) {
1103 dst_factor[0] = src_factor[3];
1104 dst_factor[1] = src_factor[3];
1105 dst_factor[2] = src_factor[3];
1106 } else if (dF[0] == dF[3]) {
1107 dst_factor[0] = dst_factor[3];
1108 dst_factor[1] = dst_factor[3];
1109 dst_factor[2] = dst_factor[3];
1110 } else if (dF[0] == sF[0]) {
1111 dst_factor[0] = src_factor[0];
1112 dst_factor[1] = src_factor[1];
1113 dst_factor[2] = src_factor[2];
1114 } else if (need_color_factor) {
1115 emit_color_factor_calculation(f,
1116 b->rt[0].rgb_dst_factor,
1117 b->rt[0].colormask,
1118 frag, pixel, const_color, dst_factor);
1123 for (i = 0; i < 4; ++i) {
1124 if ((b->rt[0].colormask & (1U << i)) != 0) {
1125 emit_blend_calculation(f,
1126 func[i], sF[i], dF[i],
1127 frag[i], src_factor[i],
1128 pixel[i], dst_factor[i]);
1132 spe_bi(f, 0, 0, 0);
1134 #if 0
1136 const uint32_t *p = f->store;
1138 printf("# %u instructions\n", f->csr - f->store);
1139 printf("# blend (%sabled)\n",
1140 (cb->base.blend_enable) ? "en" : "dis");
1141 printf("# RGB func / sf / df: %u %u %u\n",
1142 cb->base.rgb_func,
1143 cb->base.rgb_src_factor,
1144 cb->base.rgb_dst_factor);
1145 printf("# ALP func / sf / df: %u %u %u\n",
1146 cb->base.alpha_func,
1147 cb->base.alpha_src_factor,
1148 cb->base.alpha_dst_factor);
1150 printf("\t.text\n");
1151 for (/* empty */; p < f->csr; p++) {
1152 printf("\t.long\t0x%04x\n", *p);
1154 fflush(stdout);
1156 #endif
1160 static int
1161 PC_OFFSET(const struct spe_function *f, const void *d)
1163 const intptr_t pc = (intptr_t) &f->store[f->num_inst];
1164 const intptr_t ea = ~0x0f & (intptr_t) d;
1166 return (ea - pc) >> 2;
1171 * Generate code to perform color conversion and logic op
1173 * \bug
1174 * The code generated by this function should also perform dithering.
1176 * \bug
1177 * The code generated by this function should also perform color-write
1178 * masking.
1180 * \bug
1181 * Only two framebuffer formats are supported at this time.
1183 void
1184 cell_generate_logic_op(struct spe_function *f,
1185 const struct pipe_blend_state *blend,
1186 struct pipe_surface *surf)
1188 const unsigned logic_op = (blend->logicop_enable)
1189 ? blend->logicop_func : PIPE_LOGICOP_COPY;
1191 /* This code generates a maximum of 37 instructions. An additional 32
1192 * bytes (equiv. to 8 instructions) are needed for data storage. Round up
1193 * to 64 to make it a happy power-of-two.
1195 spe_init_func(f, SPE_INST_SIZE * 64);
1198 /* Pixel colors in framebuffer format in AoS layout.
1200 const int pixel[4] = {
1201 spe_allocate_register(f, 3),
1202 spe_allocate_register(f, 4),
1203 spe_allocate_register(f, 5),
1204 spe_allocate_register(f, 6),
1207 /* Fragment colors stored as floats in SoA layout.
1209 const int frag[4] = {
1210 spe_allocate_register(f, 7),
1211 spe_allocate_register(f, 8),
1212 spe_allocate_register(f, 9),
1213 spe_allocate_register(f, 10),
1216 const int mask = spe_allocate_register(f, 11);
1219 /* Short-circuit the noop and invert cases.
1221 if ((logic_op == PIPE_LOGICOP_NOOP) || (blend->rt[0].colormask == 0)) {
1222 spe_bi(f, 0, 0, 0);
1223 return;
1224 } else if (logic_op == PIPE_LOGICOP_INVERT) {
1225 spe_nor(f, pixel[0], pixel[0], pixel[0]);
1226 spe_nor(f, pixel[1], pixel[1], pixel[1]);
1227 spe_nor(f, pixel[2], pixel[2], pixel[2]);
1228 spe_nor(f, pixel[3], pixel[3], pixel[3]);
1229 spe_bi(f, 0, 0, 0);
1230 return;
1234 const int tmp[4] = {
1235 spe_allocate_available_register(f),
1236 spe_allocate_available_register(f),
1237 spe_allocate_available_register(f),
1238 spe_allocate_available_register(f),
1241 const int shuf_xpose_hi = spe_allocate_available_register(f);
1242 const int shuf_xpose_lo = spe_allocate_available_register(f);
1243 const int shuf_color = spe_allocate_available_register(f);
1246 /* Pointer to the begining of the function's private data area.
1248 uint32_t *const data = ((uint32_t *) f->store) + (64 - 8);
1251 /* Convert fragment colors to framebuffer format in AoS layout.
1253 switch (surf->format) {
1254 case PIPE_FORMAT_B8G8R8A8_UNORM:
1255 data[0] = 0x00010203;
1256 data[1] = 0x10111213;
1257 data[2] = 0x04050607;
1258 data[3] = 0x14151617;
1259 data[4] = 0x0c000408;
1260 data[5] = 0x80808080;
1261 data[6] = 0x80808080;
1262 data[7] = 0x80808080;
1263 break;
1264 case PIPE_FORMAT_A8R8G8B8_UNORM:
1265 data[0] = 0x03020100;
1266 data[1] = 0x13121110;
1267 data[2] = 0x07060504;
1268 data[3] = 0x17161514;
1269 data[4] = 0x0804000c;
1270 data[5] = 0x80808080;
1271 data[6] = 0x80808080;
1272 data[7] = 0x80808080;
1273 break;
1274 default:
1275 fprintf(stderr, "CELL: Bad pixel format in cell_generate_logic_op()");
1276 ASSERT(0);
1279 spe_ilh(f, tmp[0], 0x0808);
1280 spe_lqr(f, shuf_xpose_hi, PC_OFFSET(f, data+0));
1281 spe_lqr(f, shuf_color, PC_OFFSET(f, data+4));
1282 spe_a(f, shuf_xpose_lo, shuf_xpose_hi, tmp[0]);
1284 spe_shufb(f, tmp[0], frag[0], frag[2], shuf_xpose_hi);
1285 spe_shufb(f, tmp[1], frag[0], frag[2], shuf_xpose_lo);
1286 spe_shufb(f, tmp[2], frag[1], frag[3], shuf_xpose_hi);
1287 spe_shufb(f, tmp[3], frag[1], frag[3], shuf_xpose_lo);
1289 spe_shufb(f, frag[0], tmp[0], tmp[2], shuf_xpose_hi);
1290 spe_shufb(f, frag[1], tmp[0], tmp[2], shuf_xpose_lo);
1291 spe_shufb(f, frag[2], tmp[1], tmp[3], shuf_xpose_hi);
1292 spe_shufb(f, frag[3], tmp[1], tmp[3], shuf_xpose_lo);
1294 spe_cfltu(f, frag[0], frag[0], 32);
1295 spe_cfltu(f, frag[1], frag[1], 32);
1296 spe_cfltu(f, frag[2], frag[2], 32);
1297 spe_cfltu(f, frag[3], frag[3], 32);
1299 spe_shufb(f, frag[0], frag[0], pixel[0], shuf_color);
1300 spe_shufb(f, frag[1], frag[1], pixel[1], shuf_color);
1301 spe_shufb(f, frag[2], frag[2], pixel[2], shuf_color);
1302 spe_shufb(f, frag[3], frag[3], pixel[3], shuf_color);
1305 /* If logic op is enabled, perform the requested logical operation on the
1306 * converted fragment colors and the pixel colors.
1308 switch (logic_op) {
1309 case PIPE_LOGICOP_CLEAR:
1310 spe_il(f, frag[0], 0);
1311 spe_il(f, frag[1], 0);
1312 spe_il(f, frag[2], 0);
1313 spe_il(f, frag[3], 0);
1314 break;
1315 case PIPE_LOGICOP_NOR:
1316 spe_nor(f, frag[0], frag[0], pixel[0]);
1317 spe_nor(f, frag[1], frag[1], pixel[1]);
1318 spe_nor(f, frag[2], frag[2], pixel[2]);
1319 spe_nor(f, frag[3], frag[3], pixel[3]);
1320 break;
1321 case PIPE_LOGICOP_AND_INVERTED:
1322 spe_andc(f, frag[0], pixel[0], frag[0]);
1323 spe_andc(f, frag[1], pixel[1], frag[1]);
1324 spe_andc(f, frag[2], pixel[2], frag[2]);
1325 spe_andc(f, frag[3], pixel[3], frag[3]);
1326 break;
1327 case PIPE_LOGICOP_COPY_INVERTED:
1328 spe_nor(f, frag[0], frag[0], frag[0]);
1329 spe_nor(f, frag[1], frag[1], frag[1]);
1330 spe_nor(f, frag[2], frag[2], frag[2]);
1331 spe_nor(f, frag[3], frag[3], frag[3]);
1332 break;
1333 case PIPE_LOGICOP_AND_REVERSE:
1334 spe_andc(f, frag[0], frag[0], pixel[0]);
1335 spe_andc(f, frag[1], frag[1], pixel[1]);
1336 spe_andc(f, frag[2], frag[2], pixel[2]);
1337 spe_andc(f, frag[3], frag[3], pixel[3]);
1338 break;
1339 case PIPE_LOGICOP_XOR:
1340 spe_xor(f, frag[0], frag[0], pixel[0]);
1341 spe_xor(f, frag[1], frag[1], pixel[1]);
1342 spe_xor(f, frag[2], frag[2], pixel[2]);
1343 spe_xor(f, frag[3], frag[3], pixel[3]);
1344 break;
1345 case PIPE_LOGICOP_NAND:
1346 spe_nand(f, frag[0], frag[0], pixel[0]);
1347 spe_nand(f, frag[1], frag[1], pixel[1]);
1348 spe_nand(f, frag[2], frag[2], pixel[2]);
1349 spe_nand(f, frag[3], frag[3], pixel[3]);
1350 break;
1351 case PIPE_LOGICOP_AND:
1352 spe_and(f, frag[0], frag[0], pixel[0]);
1353 spe_and(f, frag[1], frag[1], pixel[1]);
1354 spe_and(f, frag[2], frag[2], pixel[2]);
1355 spe_and(f, frag[3], frag[3], pixel[3]);
1356 break;
1357 case PIPE_LOGICOP_EQUIV:
1358 spe_eqv(f, frag[0], frag[0], pixel[0]);
1359 spe_eqv(f, frag[1], frag[1], pixel[1]);
1360 spe_eqv(f, frag[2], frag[2], pixel[2]);
1361 spe_eqv(f, frag[3], frag[3], pixel[3]);
1362 break;
1363 case PIPE_LOGICOP_OR_INVERTED:
1364 spe_orc(f, frag[0], pixel[0], frag[0]);
1365 spe_orc(f, frag[1], pixel[1], frag[1]);
1366 spe_orc(f, frag[2], pixel[2], frag[2]);
1367 spe_orc(f, frag[3], pixel[3], frag[3]);
1368 break;
1369 case PIPE_LOGICOP_COPY:
1370 break;
1371 case PIPE_LOGICOP_OR_REVERSE:
1372 spe_orc(f, frag[0], frag[0], pixel[0]);
1373 spe_orc(f, frag[1], frag[1], pixel[1]);
1374 spe_orc(f, frag[2], frag[2], pixel[2]);
1375 spe_orc(f, frag[3], frag[3], pixel[3]);
1376 break;
1377 case PIPE_LOGICOP_OR:
1378 spe_or(f, frag[0], frag[0], pixel[0]);
1379 spe_or(f, frag[1], frag[1], pixel[1]);
1380 spe_or(f, frag[2], frag[2], pixel[2]);
1381 spe_or(f, frag[3], frag[3], pixel[3]);
1382 break;
1383 case PIPE_LOGICOP_SET:
1384 spe_il(f, frag[0], ~0);
1385 spe_il(f, frag[1], ~0);
1386 spe_il(f, frag[2], ~0);
1387 spe_il(f, frag[3], ~0);
1388 break;
1390 /* These two cases are short-circuited above.
1392 case PIPE_LOGICOP_INVERT:
1393 case PIPE_LOGICOP_NOOP:
1394 default:
1395 assert(0);
1399 /* Apply fragment mask.
1401 spe_ilh(f, tmp[0], 0x0000);
1402 spe_ilh(f, tmp[1], 0x0404);
1403 spe_ilh(f, tmp[2], 0x0808);
1404 spe_ilh(f, tmp[3], 0x0c0c);
1406 spe_shufb(f, tmp[0], mask, mask, tmp[0]);
1407 spe_shufb(f, tmp[1], mask, mask, tmp[1]);
1408 spe_shufb(f, tmp[2], mask, mask, tmp[2]);
1409 spe_shufb(f, tmp[3], mask, mask, tmp[3]);
1411 spe_selb(f, pixel[0], pixel[0], frag[0], tmp[0]);
1412 spe_selb(f, pixel[1], pixel[1], frag[1], tmp[1]);
1413 spe_selb(f, pixel[2], pixel[2], frag[2], tmp[2]);
1414 spe_selb(f, pixel[3], pixel[3], frag[3], tmp[3]);
1416 spe_bi(f, 0, 0, 0);
1418 #if 0
1420 const uint32_t *p = f->store;
1421 unsigned i;
1423 printf("# %u instructions\n", f->csr - f->store);
1425 printf("\t.text\n");
1426 for (i = 0; i < 64; i++) {
1427 printf("\t.long\t0x%04x\n", p[i]);
1429 fflush(stdout);
1431 #endif