1 /* libs/pixelflinger/codeflinger/blending.cpp
3 ** Copyright 2006, The Android Open Source Project
5 ** Licensed under the Apache License, Version 2.0 (the "License");
6 ** you may not use this file except in compliance with the License.
7 ** You may obtain a copy of the License at
9 ** http://www.apache.org/licenses/LICENSE-2.0
11 ** Unless required by applicable law or agreed to in writing, software
12 ** distributed under the License is distributed on an "AS IS" BASIS,
13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 ** See the License for the specific language governing permissions and
15 ** limitations under the License.
22 #include <sys/types.h>
24 #include <cutils/log.h>
26 #include "GGLAssembler.h" //codeflinger/
31 void GGLAssembler::build_fog(
32 component_t
& temp
, // incomming fragment / output
36 if (mInfo
[component
].fog
) {
37 Scratch
scratches(registerFile());
40 integer_t
fragment(temp
.reg
, temp
.h
, temp
.flags
);
41 if (!(temp
.flags
& CORRUPTIBLE
)) {
42 temp
.reg
= regs
.obtain();
43 temp
.flags
|= CORRUPTIBLE
;
46 integer_t
fogColor(scratches
.obtain(), 8, CORRUPTIBLE
);
47 LDRB(AL
, fogColor
.reg
, mBuilderContext
.Rctx
,
48 immed12_pre(GGL_OFFSETOF(state
.fog
.color
[component
])));
50 integer_t
factor(scratches
.obtain(), 16, CORRUPTIBLE
);
51 CONTEXT_LOAD(factor
.reg
, generated_vars
.f
);
53 // clamp fog factor (TODO: see if there is a way to guarantee
54 // we won't overflow, when setting the iterators)
55 BIC(AL
, 0, factor
.reg
, factor
.reg
, reg_imm(factor
.reg
, ASR
, 31));
56 CMP(AL
, factor
.reg
, imm( 0x10000 ));
57 MOV(HS
, 0, factor
.reg
, imm( 0x10000 ));
59 build_blendFOneMinusF(temp
, factor
, fragment
, fogColor
);
63 void GGLAssembler::build_blending(
64 component_t
& temp
, // incomming fragment / output
65 const pixel_t
& pixel
, // framebuffer
69 if (!mInfo
[component
].blend
)
72 int fs
= component
==GGLFormat::ALPHA
? mBlendSrcA
: mBlendSrc
;
73 int fd
= component
==GGLFormat::ALPHA
? mBlendDstA
: mBlendDst
;
74 if (fs
==GGL_SRC_ALPHA_SATURATE
&& component
==GGLFormat::ALPHA
)
76 const int blending
= blending_codes(fs
, fd
);
78 // here, blending will produce something which doesn't depend on
79 // that component (eg: GL_ZERO:GL_*), so the register has not been
80 // allocated yet. Will never be used as a source.
81 temp
= component_t(regs
.obtain(), CORRUPTIBLE
);
84 // we are doing real blending...
86 // fragment: extracted src
87 // temp: component_t(fragment) and result
89 // scoped register allocator
90 Scratch
scratches(registerFile());
93 // we can optimize these cases a bit...
94 // (1) saturation is not needed
95 // (2) we can use only one multiply instead of 2
96 // (3) we can reduce the register pressure
97 // R = S*f + D*(1-f) = (S-D)*f + D
98 // R = S*(1-f) + D*f = (D-S)*f + S
100 const bool same_factor_opt1
=
101 (fs
==GGL_DST_COLOR
&& fd
==GGL_ONE_MINUS_DST_COLOR
) ||
102 (fs
==GGL_SRC_COLOR
&& fd
==GGL_ONE_MINUS_SRC_COLOR
) ||
103 (fs
==GGL_DST_ALPHA
&& fd
==GGL_ONE_MINUS_DST_ALPHA
) ||
104 (fs
==GGL_SRC_ALPHA
&& fd
==GGL_ONE_MINUS_SRC_ALPHA
);
106 const bool same_factor_opt2
=
107 (fs
==GGL_ONE_MINUS_DST_COLOR
&& fd
==GGL_DST_COLOR
) ||
108 (fs
==GGL_ONE_MINUS_SRC_COLOR
&& fd
==GGL_SRC_COLOR
) ||
109 (fs
==GGL_ONE_MINUS_DST_ALPHA
&& fd
==GGL_DST_ALPHA
) ||
110 (fs
==GGL_ONE_MINUS_SRC_ALPHA
&& fd
==GGL_SRC_ALPHA
);
113 // XXX: we could also optimize these cases:
114 // R = S*f + D*f = (S+D)*f
115 // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
116 // R = S*D + D*S = 2*S*D
119 // see if we need to extract 'component' from the destination (fb)
121 if (blending
& (BLEND_DST
|FACTOR_DST
)) {
122 fb
.setTo(scratches
.obtain(), 32);
123 extract(fb
, pixel
, component
);
125 // XXX: maybe what we should do instead, is simply
126 // expand fb -or- fragment to the larger of the two
127 if (fb
.size() < temp
.size()) {
128 // for now we expand 'fb' to min(fragment, 8)
129 int new_size
= temp
.size() < 8 ? temp
.size() : 8;
130 expand(fb
, fb
, new_size
);
136 // convert input fragment to integer_t
137 if (temp
.l
&& (temp
.flags
& CORRUPTIBLE
)) {
138 MOV(AL
, 0, temp
.reg
, reg_imm(temp
.reg
, LSR
, temp
.l
));
142 integer_t
fragment(temp
.reg
, temp
.size(), temp
.flags
);
144 // if not done yet, convert input fragment to integer_t
146 // here we know temp is not CORRUPTIBLE
147 fragment
.reg
= scratches
.obtain();
148 MOV(AL
, 0, fragment
.reg
, reg_imm(temp
.reg
, LSR
, temp
.l
));
149 fragment
.flags
|= CORRUPTIBLE
;
152 if (!(temp
.flags
& CORRUPTIBLE
)) {
153 // temp is not corruptible, but since it's the destination it
154 // will be modified, so we need to allocate a new register.
155 temp
.reg
= regs
.obtain();
156 temp
.flags
&= ~CORRUPTIBLE
;
157 fragment
.flags
&= ~CORRUPTIBLE
;
160 if ((blending
& BLEND_SRC
) && !same_factor_opt1
) {
161 // source (fragment) is needed for the blending stage
162 // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
163 fragment
.flags
&= ~CORRUPTIBLE
;
167 if (same_factor_opt1
) {
168 // R = S*f + D*(1-f) = (S-D)*f + D
170 build_blend_factor(factor
, fs
,
171 component
, pixel
, fragment
, fb
, scratches
);
172 // fb is always corruptible from this point
173 fb
.flags
|= CORRUPTIBLE
;
174 build_blendFOneMinusF(temp
, factor
, fragment
, fb
);
175 } else if (same_factor_opt2
) {
176 // R = S*(1-f) + D*f = (D-S)*f + S
178 // fb is always corrruptible here
179 fb
.flags
|= CORRUPTIBLE
;
180 build_blend_factor(factor
, fd
,
181 component
, pixel
, fragment
, fb
, scratches
);
182 build_blendOneMinusFF(temp
, factor
, fragment
, fb
);
184 integer_t src_factor
;
185 integer_t dst_factor
;
187 // if destination (fb) is not needed for the blending stage,
188 // then it can be marked as CORRUPTIBLE
189 if (!(blending
& BLEND_DST
)) {
190 fb
.flags
|= CORRUPTIBLE
;
193 // XXX: try to mark some registers as CORRUPTIBLE
194 // in most case we could make those corruptible
195 // when we're processing the last component
196 // but not always, for instance
197 // when fragment is constant and not reloaded
198 // when fb is needed for logic-ops or masking
199 // when a register is aliased (for instance with mAlphaSource)
203 if (fd
==GGL_ZERO
) { // R = 0
204 // already taken care of
205 } else if (fd
==GGL_ONE
) { // R = D
206 // already taken care of
209 build_blend_factor(dst_factor
, fd
,
210 component
, pixel
, fragment
, fb
, scratches
);
211 mul_factor(temp
, fb
, dst_factor
);
213 } else if (fs
==GGL_ONE
) {
214 if (fd
==GGL_ZERO
) { // R = S
215 // NOP, taken care of
216 } else if (fd
==GGL_ONE
) { // R = S + D
217 component_add(temp
, fb
, fragment
); // args order matters
219 } else { // R = S + D*fd
221 build_blend_factor(dst_factor
, fd
,
222 component
, pixel
, fragment
, fb
, scratches
);
223 mul_factor_add(temp
, fb
, dst_factor
, component_t(fragment
));
228 build_blend_factor(src_factor
, fs
,
229 component
, pixel
, fragment
, fb
, scratches
);
230 if (fd
==GGL_ZERO
) { // R = S*fs
231 mul_factor(temp
, fragment
, src_factor
);
232 } else if (fd
==GGL_ONE
) { // R = S*fs + D
233 mul_factor_add(temp
, fragment
, src_factor
, component_t(fb
));
235 } else { // R = S*fs + D*fd
236 mul_factor(temp
, fragment
, src_factor
);
237 if (scratches
.isUsed(src_factor
.reg
))
238 scratches
.recycle(src_factor
.reg
);
240 build_blend_factor(dst_factor
, fd
,
241 component
, pixel
, fragment
, fb
, scratches
);
242 mul_factor_add(temp
, fb
, dst_factor
, temp
);
243 if (!same_factor_opt1
&& !same_factor_opt2
) {
250 // now we can be corrupted (it's the dest)
251 temp
.flags
|= CORRUPTIBLE
;
254 void GGLAssembler::build_blend_factor(
255 integer_t
& factor
, int f
, int component
,
256 const pixel_t
& dst_pixel
,
261 integer_t
src_alpha(fragment
);
263 // src_factor/dst_factor won't be used after blending,
264 // so it's fine to mark them as CORRUPTIBLE (if not aliased)
265 factor
.flags
|= CORRUPTIBLE
;
268 case GGL_ONE_MINUS_SRC_ALPHA
:
270 if (component
==GGLFormat::ALPHA
&& !isAlphaSourceNeeded()) {
271 // we're processing alpha, so we already have
272 // src-alpha in fragment, and we need src-alpha just this time.
274 // alpha-src will be needed for other components
275 if (!mBlendFactorCached
|| mBlendFactorCached
==f
) {
276 src_alpha
= mAlphaSource
;
277 factor
= mAlphaSource
;
278 factor
.flags
&= ~CORRUPTIBLE
;
279 // we already computed the blend factor before, nothing to do.
280 if (mBlendFactorCached
)
282 // this is the first time, make sure to compute the blend
284 mBlendFactorCached
= f
;
287 // we have a cached alpha blend factor, but we want another one,
288 // this should really not happen because by construction,
289 // we cannot have BOTH source and destination
290 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
291 // the blending stage uses the f/(1-f) optimization
293 // for completeness, we handle this case though. Since there
294 // are only 2 choices, this meens we want "the other one"
296 factor
= mAlphaSource
;
297 factor
.flags
&= ~CORRUPTIBLE
;
298 RSB(AL
, 0, factor
.reg
, factor
.reg
, imm((1<<factor
.s
)));
299 mBlendFactorCached
= f
;
304 case GGL_ONE_MINUS_DST_COLOR
:
306 case GGL_ONE_MINUS_SRC_COLOR
:
308 case GGL_ONE_MINUS_DST_ALPHA
:
310 case GGL_SRC_ALPHA_SATURATE
:
311 // help us find out what register we can use for the blend-factor
312 // CORRUPTIBLE registers are chosen first, or a new one is allocated.
313 if (fragment
.flags
& CORRUPTIBLE
) {
314 factor
.setTo(fragment
.reg
, 32, CORRUPTIBLE
);
315 fragment
.flags
&= ~CORRUPTIBLE
;
316 } else if (fb
.flags
& CORRUPTIBLE
) {
317 factor
.setTo(fb
.reg
, 32, CORRUPTIBLE
);
318 fb
.flags
&= ~CORRUPTIBLE
;
320 factor
.setTo(scratches
.obtain(), 32, CORRUPTIBLE
);
325 // XXX: doesn't work if size==1
328 case GGL_ONE_MINUS_DST_COLOR
:
331 ADD(AL
, 0, factor
.reg
, fb
.reg
, reg_imm(fb
.reg
, LSR
, fb
.s
-1));
333 case GGL_ONE_MINUS_SRC_COLOR
:
335 factor
.s
= fragment
.s
;
336 ADD(AL
, 0, factor
.reg
, fragment
.reg
,
337 reg_imm(fragment
.reg
, LSR
, fragment
.s
-1));
339 case GGL_ONE_MINUS_SRC_ALPHA
:
341 factor
.s
= src_alpha
.s
;
342 ADD(AL
, 0, factor
.reg
, src_alpha
.reg
,
343 reg_imm(src_alpha
.reg
, LSR
, src_alpha
.s
-1));
345 case GGL_ONE_MINUS_DST_ALPHA
:
347 // XXX: should be precomputed
348 extract(factor
, dst_pixel
, GGLFormat::ALPHA
);
349 ADD(AL
, 0, factor
.reg
, factor
.reg
,
350 reg_imm(factor
.reg
, LSR
, factor
.s
-1));
352 case GGL_SRC_ALPHA_SATURATE
:
353 // XXX: should be precomputed
354 // XXX: f = min(As, 1-Ad)
355 // btw, we're guaranteed that Ad's size is <= 8, because
356 // it's extracted from the framebuffer
361 case GGL_ONE_MINUS_DST_COLOR
:
362 case GGL_ONE_MINUS_SRC_COLOR
:
363 case GGL_ONE_MINUS_DST_ALPHA
:
364 case GGL_ONE_MINUS_SRC_ALPHA
:
365 RSB(AL
, 0, factor
.reg
, factor
.reg
, imm((1<<factor
.s
)));
368 // don't need more than 8-bits for the blend factor
369 // and this will prevent overflows in the multiplies later
371 MOV(AL
, 0, factor
.reg
, reg_imm(factor
.reg
, LSR
, factor
.s
-8));
376 int GGLAssembler::blending_codes(int fs
, int fd
)
381 blending
|= BLEND_SRC
;
384 case GGL_ONE_MINUS_DST_COLOR
:
386 blending
|= FACTOR_DST
|BLEND_SRC
;
388 case GGL_ONE_MINUS_DST_ALPHA
:
390 // no need to extract 'component' from the destination
391 // for the blend factor, because we need ALPHA only.
392 blending
|= BLEND_SRC
;
395 case GGL_ONE_MINUS_SRC_COLOR
:
397 blending
|= FACTOR_SRC
|BLEND_SRC
;
399 case GGL_ONE_MINUS_SRC_ALPHA
:
401 case GGL_SRC_ALPHA_SATURATE
:
402 blending
|= FACTOR_SRC
|BLEND_SRC
;
407 blending
|= BLEND_DST
;
410 case GGL_ONE_MINUS_DST_COLOR
:
412 blending
|= FACTOR_DST
|BLEND_DST
;
414 case GGL_ONE_MINUS_DST_ALPHA
:
416 blending
|= FACTOR_DST
|BLEND_DST
;
419 case GGL_ONE_MINUS_SRC_COLOR
:
421 blending
|= FACTOR_SRC
|BLEND_DST
;
423 case GGL_ONE_MINUS_SRC_ALPHA
:
425 // no need to extract 'component' from the source
426 // for the blend factor, because we need ALPHA only.
427 blending
|= BLEND_DST
;
433 // ---------------------------------------------------------------------------
435 void GGLAssembler::build_blendFOneMinusF(
437 const integer_t
& factor
,
438 const integer_t
& fragment
,
441 // R = S*f + D*(1-f) = (S-D)*f + D
442 Scratch
scratches(registerFile());
444 integer_t
diff(fragment
.flags
& CORRUPTIBLE
?
445 fragment
.reg
: scratches
.obtain(), fb
.size(), CORRUPTIBLE
);
446 const int shift
= fragment
.size() - fb
.size();
447 if (shift
>0) RSB(AL
, 0, diff
.reg
, fb
.reg
, reg_imm(fragment
.reg
, LSR
, shift
));
448 else if (shift
<0) RSB(AL
, 0, diff
.reg
, fb
.reg
, reg_imm(fragment
.reg
, LSL
,-shift
));
449 else RSB(AL
, 0, diff
.reg
, fb
.reg
, fragment
.reg
);
450 mul_factor_add(temp
, diff
, factor
, component_t(fb
));
453 void GGLAssembler::build_blendOneMinusFF(
455 const integer_t
& factor
,
456 const integer_t
& fragment
,
459 // R = S*f + D*(1-f) = (S-D)*f + D
460 Scratch
scratches(registerFile());
462 integer_t
diff(fb
.flags
& CORRUPTIBLE
?
463 fb
.reg
: scratches
.obtain(), fb
.size(), CORRUPTIBLE
);
464 const int shift
= fragment
.size() - fb
.size();
465 if (shift
>0) SUB(AL
, 0, diff
.reg
, fb
.reg
, reg_imm(fragment
.reg
, LSR
, shift
));
466 else if (shift
<0) SUB(AL
, 0, diff
.reg
, fb
.reg
, reg_imm(fragment
.reg
, LSL
,-shift
));
467 else SUB(AL
, 0, diff
.reg
, fb
.reg
, fragment
.reg
);
468 mul_factor_add(temp
, diff
, factor
, component_t(fragment
));
471 // ---------------------------------------------------------------------------
473 void GGLAssembler::mul_factor( component_t
& d
,
481 // XXX: we could have special cases for 1 bit mul
483 // all this code below to use the best multiply instruction
484 // wrt the parameters size. We take advantage of the fact
485 // that the 16-bits multiplies allow a 16-bit shift
486 // The trick is that we just make sure that we have at least 8-bits
487 // per component (which is enough for a 8 bits display).
497 } else if (GGL_BETWEEN(fs
, 24, 31)) {
501 // eg: 15 * 18 -> 15 * 15
506 } else if (GGL_BETWEEN(vs
, 24, 31)) {
510 } else if (GGL_BETWEEN(fs
, 24, 31)) {
514 // eg: 24 * 18 -> 8 * 18
521 // eg: 18 * 15 -> 15 * 15
525 } else if (GGL_BETWEEN(fs
, 24, 31)) {
526 // eg: 18 * 24 -> 15 * 8
531 // eg: 18 * 18 -> (15 * 18)>>16
534 xy
= yB
; //XXX SMULWB
539 LOGE_IF(ms
>=32, "mul_factor overflow vs=%d, fs=%d", vs
, fs
);
544 MOV(AL
, 0, d
.reg
, reg_imm(vreg
, LSR
, vshift
));
548 MOV(AL
, 0, d
.reg
, reg_imm(vreg
, LSR
, fshift
));
551 if (smulw
) SMULW(AL
, xy
, d
.reg
, vreg
, freg
);
552 else SMUL(AL
, xy
, d
.reg
, vreg
, freg
);
564 void GGLAssembler::mul_factor_add( component_t
& d
,
567 const component_t
& a
)
569 // XXX: we could have special cases for 1 bit mul
570 Scratch
scratches(registerFile());
577 LOGE_IF(ms
>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs
, fs
, as
);
579 integer_t
add(a
.reg
, a
.h
, a
.flags
);
581 // 'a' is a component_t but it is guaranteed to have
582 // its high bits set to 0. However in the dithering case,
583 // we can't get away with truncating the potentially bad bits
584 // so extraction is needed.
586 if ((mDithering
) && (a
.size() < ms
)) {
587 // we need to expand a
588 if (!(a
.flags
& CORRUPTIBLE
)) {
589 // ... but it's not corruptible, so we need to pick a
590 // temporary register.
591 // Try to uses the destination register first (it's likely
592 // to be usable, unless it aliases an input).
593 if (d
.reg
!=a
.reg
&& d
.reg
!=v
.reg
&& d
.reg
!=f
.reg
) {
596 add
.reg
= scratches
.obtain();
599 expand(add
, a
, ms
); // extracts and expands
604 if (vs
<16 && fs
<16) SMLABB(AL
, d
.reg
, v
.reg
, f
.reg
, add
.reg
);
605 else MLA(AL
, 0, d
.reg
, v
.reg
, f
.reg
, add
.reg
);
608 if (temp
== add
.reg
) {
609 // the mul will modify add.reg, we need an intermediary reg
610 if (v
.flags
& CORRUPTIBLE
) temp
= v
.reg
;
611 else if (f
.flags
& CORRUPTIBLE
) temp
= f
.reg
;
612 else temp
= scratches
.obtain();
615 if (vs
<16 && fs
<16) SMULBB(AL
, temp
, v
.reg
, f
.reg
);
616 else MUL(AL
, 0, temp
, v
.reg
, f
.reg
);
619 ADD(AL
, 0, d
.reg
, temp
, reg_imm(add
.reg
, LSL
, ms
-as
));
621 // not sure if we should expand the mul instead?
622 ADD(AL
, 0, d
.reg
, temp
, reg_imm(add
.reg
, LSR
, as
-ms
));
630 d
.l
= fs
>a
.l
? fs
: a
.l
;
635 void GGLAssembler::component_add(component_t
& d
,
636 const integer_t
& dst
, const integer_t
& src
)
638 // here we're guaranteed that fragment.size() >= fb.size()
639 const int shift
= src
.size() - dst
.size();
641 ADD(AL
, 0, d
.reg
, src
.reg
, dst
.reg
);
643 ADD(AL
, 0, d
.reg
, src
.reg
, reg_imm(dst
.reg
, LSL
, shift
));
655 void GGLAssembler::component_sat(const component_t
& v
)
657 const int one
= ((1<<v
.size())-1)<<v
.l
;
658 CMP(AL
, v
.reg
, imm( 1<<v
.h
));
659 if (isValidImmediate(one
)) {
660 MOV(HS
, 0, v
.reg
, imm( one
));
661 } else if (isValidImmediate(~one
)) {
662 MVN(HS
, 0, v
.reg
, imm( ~one
));
664 MOV(HS
, 0, v
.reg
, imm( 1<<v
.h
));
665 SUB(HS
, 0, v
.reg
, v
.reg
, imm( 1<<v
.l
));
669 // ----------------------------------------------------------------------------
671 }; // namespace android