u-tools/apps/android_port/libpixelflinger/codeflinger/blending.cpp

   1 /* libs/pixelflinger/codeflinger/blending.cpp
   2 **
   3 ** Copyright 2006, The Android Open Source Project
   4 **
   5 ** Licensed under the Apache License, Version 2.0 (the "License");
   6 ** you may not use this file except in compliance with the License.
   7 ** You may obtain a copy of the License at
   8 **
   9 **     http://www.apache.org/licenses/LICENSE-2.0
  10 **
  11 ** Unless required by applicable law or agreed to in writing, software
  12 ** distributed under the License is distributed on an "AS IS" BASIS,
  13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 ** See the License for the specific language governing permissions and
  15 ** limitations under the License.
  16 */
  17
  18 #include <assert.h>
  19 #include <stdint.h>
  20 #include <stdlib.h>
  21 #include <stdio.h>
  22 #include <sys/types.h>
  23
  24 #include <cutils/log.h>
  25
  26 #include "GGLAssembler.h"   //codeflinger/
  27
  28
  29 namespace android {
  30
  31 void GGLAssembler::build_fog(
  32                         component_t& temp,      // incomming fragment / output
  33                         int component,
  34                         Scratch& regs)
  35 {
  36    if (mInfo[component].fog) {
  37         Scratch scratches(registerFile());
  38         comment("fog");
  39
  40         integer_t fragment(temp.reg, temp.h, temp.flags);
  41         if (!(temp.flags & CORRUPTIBLE)) {
  42             temp.reg = regs.obtain();
  43             temp.flags |= CORRUPTIBLE;
  44         }
  45
  46         integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
  47         LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
  48                 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
  49
  50         integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
  51         CONTEXT_LOAD(factor.reg, generated_vars.f);
  52
  53         // clamp fog factor (TODO: see if there is a way to guarantee
  54         // we won't overflow, when setting the iterators)
  55         BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
  56         CMP(AL, factor.reg, imm( 0x10000 ));
  57         MOV(HS, 0, factor.reg, imm( 0x10000 ));
  58
  59         build_blendFOneMinusF(temp, factor, fragment, fogColor);
  60     }
  61 }
  62
  63 void GGLAssembler::build_blending(
  64                         component_t& temp,      // incomming fragment / output
  65                         const pixel_t& pixel,   // framebuffer
  66                         int component,
  67                         Scratch& regs)
  68 {
  69    if (!mInfo[component].blend)
  70         return;
  71
  72     int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
  73     int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
  74     if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
  75         fs = GGL_ONE;
  76     const int blending = blending_codes(fs, fd);
  77     if (!temp.size()) {
  78         // here, blending will produce something which doesn't depend on
  79         // that component (eg: GL_ZERO:GL_*), so the register has not been
  80         // allocated yet. Will never be used as a source.
  81         temp = component_t(regs.obtain(), CORRUPTIBLE);
  82     }
  83
  84     // we are doing real blending...
  85     // fb:          extracted dst
  86     // fragment:    extracted src
  87     // temp:        component_t(fragment) and result
  88
  89     // scoped register allocator
  90     Scratch scratches(registerFile());
  91     comment("blending");
  92
  93     // we can optimize these cases a bit...
  94     // (1) saturation is not needed
  95     // (2) we can use only one multiply instead of 2
  96     // (3) we can reduce the register pressure
  97     //      R = S*f + D*(1-f) = (S-D)*f + D
  98     //      R = S*(1-f) + D*f = (D-S)*f + S
  99
 100     const bool same_factor_opt1 =
 101         (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
 102         (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
 103         (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
 104         (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
 105
 106     const bool same_factor_opt2 =
 107         (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
 108         (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
 109         (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
 110         (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
 111
 112
 113     // XXX: we could also optimize these cases:
 114     // R = S*f + D*f = (S+D)*f
 115     // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
 116     // R = S*D + D*S = 2*S*D
 117
 118
 119     // see if we need to extract 'component' from the destination (fb)
 120     integer_t fb;
 121     if (blending & (BLEND_DST|FACTOR_DST)) {
 122         fb.setTo(scratches.obtain(), 32);
 123         extract(fb, pixel, component);
 124         if (mDithering) {
 125             // XXX: maybe what we should do instead, is simply
 126             // expand fb -or- fragment to the larger of the two
 127             if (fb.size() < temp.size()) {
 128                 // for now we expand 'fb' to min(fragment, 8)
 129                 int new_size = temp.size() < 8 ? temp.size() : 8;
 130                 expand(fb, fb, new_size);
 131             }
 132         }
 133     }
 134
 135
 136     // convert input fragment to integer_t
 137     if (temp.l && (temp.flags & CORRUPTIBLE)) {
 138         MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
 139         temp.h -= temp.l;
 140         temp.l = 0;
 141     }
 142     integer_t fragment(temp.reg, temp.size(), temp.flags);
 143
 144     // if not done yet, convert input fragment to integer_t
 145     if (temp.l) {
 146         // here we know temp is not CORRUPTIBLE
 147         fragment.reg = scratches.obtain();
 148         MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
 149         fragment.flags |= CORRUPTIBLE;
 150     }
 151
 152     if (!(temp.flags & CORRUPTIBLE)) {
 153         // temp is not corruptible, but since it's the destination it
 154         // will be modified, so we need to allocate a new register.
 155         temp.reg = regs.obtain();
 156         temp.flags &= ~CORRUPTIBLE;
 157         fragment.flags &= ~CORRUPTIBLE;
 158     }
 159
 160     if ((blending & BLEND_SRC) && !same_factor_opt1) {
 161         // source (fragment) is needed for the blending stage
 162         // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
 163         fragment.flags &= ~CORRUPTIBLE;
 164     }
 165
 166
 167     if (same_factor_opt1) {
 168         //  R = S*f + D*(1-f) = (S-D)*f + D
 169         integer_t factor;
 170         build_blend_factor(factor, fs,
 171                 component, pixel, fragment, fb, scratches);
 172         // fb is always corruptible from this point
 173         fb.flags |= CORRUPTIBLE;
 174         build_blendFOneMinusF(temp, factor, fragment, fb);
 175     } else if (same_factor_opt2) {
 176         //  R = S*(1-f) + D*f = (D-S)*f + S
 177         integer_t factor;
 178         // fb is always corrruptible here
 179         fb.flags |= CORRUPTIBLE;
 180         build_blend_factor(factor, fd,
 181                 component, pixel, fragment, fb, scratches);
 182         build_blendOneMinusFF(temp, factor, fragment, fb);
 183     } else {
 184         integer_t src_factor;
 185         integer_t dst_factor;
 186
 187         // if destination (fb) is not needed for the blending stage,
 188         // then it can be marked as CORRUPTIBLE
 189         if (!(blending & BLEND_DST)) {
 190             fb.flags |= CORRUPTIBLE;
 191         }
 192
 193         // XXX: try to mark some registers as CORRUPTIBLE
 194         // in most case we could make those corruptible
 195         // when we're processing the last component
 196         // but not always, for instance
 197         //    when fragment is constant and not reloaded
 198         //    when fb is needed for logic-ops or masking
 199         //    when a register is aliased (for instance with mAlphaSource)
 200
 201         // blend away...
 202         if (fs==GGL_ZERO) {
 203             if (fd==GGL_ZERO) {         // R = 0
 204                 // already taken care of
 205             } else if (fd==GGL_ONE) {   // R = D
 206                 // already taken care of
 207             } else {                    // R = D*fd
 208                 // compute fd
 209                 build_blend_factor(dst_factor, fd,
 210                         component, pixel, fragment, fb, scratches);
 211                 mul_factor(temp, fb, dst_factor);
 212             }
 213         } else if (fs==GGL_ONE) {
 214             if (fd==GGL_ZERO) {         // R = S
 215                 // NOP, taken care of
 216             } else if (fd==GGL_ONE) {   // R = S + D
 217                 component_add(temp, fb, fragment); // args order matters
 218                 component_sat(temp);
 219             } else {                    // R = S + D*fd
 220                 // compute fd
 221                 build_blend_factor(dst_factor, fd,
 222                         component, pixel, fragment, fb, scratches);
 223                 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
 224                 component_sat(temp);
 225             }
 226         } else {
 227             // compute fs
 228             build_blend_factor(src_factor, fs,
 229                     component, pixel, fragment, fb, scratches);
 230             if (fd==GGL_ZERO) {         // R = S*fs
 231                 mul_factor(temp, fragment, src_factor);
 232             } else if (fd==GGL_ONE) {   // R = S*fs + D
 233                 mul_factor_add(temp, fragment, src_factor, component_t(fb));
 234                 component_sat(temp);
 235             } else {                    // R = S*fs + D*fd
 236                 mul_factor(temp, fragment, src_factor);
 237                 if (scratches.isUsed(src_factor.reg))
 238                     scratches.recycle(src_factor.reg);
 239                 // compute fd
 240                 build_blend_factor(dst_factor, fd,
 241                         component, pixel, fragment, fb, scratches);
 242                 mul_factor_add(temp, fb, dst_factor, temp);
 243                 if (!same_factor_opt1 && !same_factor_opt2) {
 244                     component_sat(temp);
 245                 }
 246             }
 247         }
 248     }
 249
 250     // now we can be corrupted (it's the dest)
 251     temp.flags |= CORRUPTIBLE;
 252 }
 253
 254 void GGLAssembler::build_blend_factor(
 255         integer_t& factor, int f, int component,
 256         const pixel_t& dst_pixel,
 257         integer_t& fragment,
 258         integer_t& fb,
 259         Scratch& scratches)
 260 {
 261     integer_t src_alpha(fragment);
 262
 263     // src_factor/dst_factor won't be used after blending,
 264     // so it's fine to mark them as CORRUPTIBLE (if not aliased)
 265     factor.flags |= CORRUPTIBLE;
 266
 267     switch(f) {
 268     case GGL_ONE_MINUS_SRC_ALPHA:
 269     case GGL_SRC_ALPHA:
 270         if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
 271             // we're processing alpha, so we already have
 272             // src-alpha in fragment, and we need src-alpha just this time.
 273         } else {
 274            // alpha-src will be needed for other components
 275             if (!mBlendFactorCached || mBlendFactorCached==f) {
 276                 src_alpha = mAlphaSource;
 277                 factor = mAlphaSource;
 278                 factor.flags &= ~CORRUPTIBLE;
 279                 // we already computed the blend factor before, nothing to do.
 280                 if (mBlendFactorCached)
 281                     return;
 282                 // this is the first time, make sure to compute the blend
 283                 // factor properly.
 284                 mBlendFactorCached = f;
 285                 break;
 286             } else {
 287                 // we have a cached alpha blend factor, but we want another one,
 288                 // this should really not happen because by construction,
 289                 // we cannot have BOTH source and destination
 290                 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
 291                 // the blending stage uses the f/(1-f) optimization
 292
 293                 // for completeness, we handle this case though. Since there
 294                 // are only 2 choices, this meens we want "the other one"
 295                 // (1-factor)
 296                 factor = mAlphaSource;
 297                 factor.flags &= ~CORRUPTIBLE;
 298                 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
 299                 mBlendFactorCached = f;
 300                 return;
 301             }
 302         }
 303         // fall-through...
 304     case GGL_ONE_MINUS_DST_COLOR:
 305     case GGL_DST_COLOR:
 306     case GGL_ONE_MINUS_SRC_COLOR:
 307     case GGL_SRC_COLOR:
 308     case GGL_ONE_MINUS_DST_ALPHA:
 309     case GGL_DST_ALPHA:
 310     case GGL_SRC_ALPHA_SATURATE:
 311         // help us find out what register we can use for the blend-factor
 312         // CORRUPTIBLE registers are chosen first, or a new one is allocated.
 313         if (fragment.flags & CORRUPTIBLE) {
 314             factor.setTo(fragment.reg, 32, CORRUPTIBLE);
 315             fragment.flags &= ~CORRUPTIBLE;
 316         } else if (fb.flags & CORRUPTIBLE) {
 317             factor.setTo(fb.reg, 32, CORRUPTIBLE);
 318             fb.flags &= ~CORRUPTIBLE;
 319         } else {
 320             factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
 321         }
 322         break;
 323     }
 324
 325     // XXX: doesn't work if size==1
 326
 327     switch(f) {
 328     case GGL_ONE_MINUS_DST_COLOR:
 329     case GGL_DST_COLOR:
 330         factor.s = fb.s;
 331         ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
 332         break;
 333     case GGL_ONE_MINUS_SRC_COLOR:
 334     case GGL_SRC_COLOR:
 335         factor.s = fragment.s;
 336         ADD(AL, 0, factor.reg, fragment.reg,
 337             reg_imm(fragment.reg, LSR, fragment.s-1));
 338         break;
 339     case GGL_ONE_MINUS_SRC_ALPHA:
 340     case GGL_SRC_ALPHA:
 341         factor.s = src_alpha.s;
 342         ADD(AL, 0, factor.reg, src_alpha.reg,
 343                 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
 344         break;
 345     case GGL_ONE_MINUS_DST_ALPHA:
 346     case GGL_DST_ALPHA:
 347         // XXX: should be precomputed
 348         extract(factor, dst_pixel, GGLFormat::ALPHA);
 349         ADD(AL, 0, factor.reg, factor.reg,
 350                 reg_imm(factor.reg, LSR, factor.s-1));
 351         break;
 352     case GGL_SRC_ALPHA_SATURATE:
 353         // XXX: should be precomputed
 354         // XXX: f = min(As, 1-Ad)
 355         // btw, we're guaranteed that Ad's size is <= 8, because
 356         // it's extracted from the framebuffer
 357         break;
 358     }
 359
 360     switch(f) {
 361     case GGL_ONE_MINUS_DST_COLOR:
 362     case GGL_ONE_MINUS_SRC_COLOR:
 363     case GGL_ONE_MINUS_DST_ALPHA:
 364     case GGL_ONE_MINUS_SRC_ALPHA:
 365         RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
 366     }
 367
 368     // don't need more than 8-bits for the blend factor
 369     // and this will prevent overflows in the multiplies later
 370     if (factor.s > 8) {
 371         MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
 372         factor.s = 8;
 373     }
 374 }
 375
 376 int GGLAssembler::blending_codes(int fs, int fd)
 377 {
 378     int blending = 0;
 379     switch(fs) {
 380     case GGL_ONE:
 381         blending |= BLEND_SRC;
 382         break;
 383
 384     case GGL_ONE_MINUS_DST_COLOR:
 385     case GGL_DST_COLOR:
 386         blending |= FACTOR_DST|BLEND_SRC;
 387         break;
 388     case GGL_ONE_MINUS_DST_ALPHA:
 389     case GGL_DST_ALPHA:
 390         // no need to extract 'component' from the destination
 391         // for the blend factor, because we need ALPHA only.
 392         blending |= BLEND_SRC;
 393         break;
 394
 395     case GGL_ONE_MINUS_SRC_COLOR:
 396     case GGL_SRC_COLOR:
 397         blending |= FACTOR_SRC|BLEND_SRC;
 398         break;
 399     case GGL_ONE_MINUS_SRC_ALPHA:
 400     case GGL_SRC_ALPHA:
 401     case GGL_SRC_ALPHA_SATURATE:
 402         blending |= FACTOR_SRC|BLEND_SRC;
 403         break;
 404     }
 405     switch(fd) {
 406     case GGL_ONE:
 407         blending |= BLEND_DST;
 408         break;
 409
 410     case GGL_ONE_MINUS_DST_COLOR:
 411     case GGL_DST_COLOR:
 412         blending |= FACTOR_DST|BLEND_DST;
 413         break;
 414     case GGL_ONE_MINUS_DST_ALPHA:
 415     case GGL_DST_ALPHA:
 416         blending |= FACTOR_DST|BLEND_DST;
 417         break;
 418
 419     case GGL_ONE_MINUS_SRC_COLOR:
 420     case GGL_SRC_COLOR:
 421         blending |= FACTOR_SRC|BLEND_DST;
 422         break;
 423     case GGL_ONE_MINUS_SRC_ALPHA:
 424     case GGL_SRC_ALPHA:
 425         // no need to extract 'component' from the source
 426         // for the blend factor, because we need ALPHA only.
 427         blending |= BLEND_DST;
 428         break;
 429     }
 430     return blending;
 431 }
 432
 433 // ---------------------------------------------------------------------------
 434
 435 void GGLAssembler::build_blendFOneMinusF(
 436         component_t& temp,
 437         const integer_t& factor,
 438         const integer_t& fragment,
 439         const integer_t& fb)
 440 {
 441     //  R = S*f + D*(1-f) = (S-D)*f + D
 442     Scratch scratches(registerFile());
 443     // compute S-D
 444     integer_t diff(fragment.flags & CORRUPTIBLE ?
 445             fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
 446     const int shift = fragment.size() - fb.size();
 447     if (shift>0)        RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
 448     else if (shift<0)   RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
 449     else                RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
 450     mul_factor_add(temp, diff, factor, component_t(fb));
 451 }
 452
 453 void GGLAssembler::build_blendOneMinusFF(
 454         component_t& temp,
 455         const integer_t& factor,
 456         const integer_t& fragment,
 457         const integer_t& fb)
 458 {
 459     //  R = S*f + D*(1-f) = (S-D)*f + D
 460     Scratch scratches(registerFile());
 461     // compute D-S
 462     integer_t diff(fb.flags & CORRUPTIBLE ?
 463             fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
 464     const int shift = fragment.size() - fb.size();
 465     if (shift>0)        SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
 466     else if (shift<0)   SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
 467     else                SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
 468     mul_factor_add(temp, diff, factor, component_t(fragment));
 469 }
 470
 471 // ---------------------------------------------------------------------------
 472
 473 void GGLAssembler::mul_factor(  component_t& d,
 474                                 const integer_t& v,
 475                                 const integer_t& f)
 476 {
 477     int vs = v.size();
 478     int fs = f.size();
 479     int ms = vs+fs;
 480
 481     // XXX: we could have special cases for 1 bit mul
 482
 483     // all this code below to use the best multiply instruction
 484     // wrt the parameters size. We take advantage of the fact
 485     // that the 16-bits multiplies allow a 16-bit shift
 486     // The trick is that we just make sure that we have at least 8-bits
 487     // per component (which is enough for a 8 bits display).
 488
 489     int xy;
 490     int vshift = 0;
 491     int fshift = 0;
 492     int smulw = 0;
 493
 494     if (vs<16) {
 495         if (fs<16) {
 496             xy = xyBB;
 497         } else if (GGL_BETWEEN(fs, 24, 31)) {
 498             ms -= 16;
 499             xy = xyTB;
 500         } else {
 501             // eg: 15 * 18  ->  15 * 15
 502             fshift = fs - 15;
 503             ms -= fshift;
 504             xy = xyBB;
 505         }
 506     } else if (GGL_BETWEEN(vs, 24, 31)) {
 507         if (fs<16) {
 508             ms -= 16;
 509             xy = xyTB;
 510         } else if (GGL_BETWEEN(fs, 24, 31)) {
 511             ms -= 32;
 512             xy = xyTT;
 513         } else {
 514             // eg: 24 * 18  ->  8 * 18
 515             fshift = fs - 15;
 516             ms -= 16 + fshift;
 517             xy = xyTB;
 518         }
 519     } else {
 520         if (fs<16) {
 521             // eg: 18 * 15  ->  15 * 15
 522             vshift = vs - 15;
 523             ms -= vshift;
 524             xy = xyBB;
 525         } else if (GGL_BETWEEN(fs, 24, 31)) {
 526             // eg: 18 * 24  ->  15 * 8
 527             vshift = vs - 15;
 528             ms -= 16 + vshift;
 529             xy = xyBT;
 530         } else {
 531             // eg: 18 * 18  ->  (15 * 18)>>16
 532             fshift = fs - 15;
 533             ms -= 16 + fshift;
 534             xy = yB;    //XXX SMULWB
 535             smulw = 1;
 536         }
 537     }
 538
 539     LOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
 540
 541     int vreg = v.reg;
 542     int freg = f.reg;
 543     if (vshift) {
 544         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
 545         vreg = d.reg;
 546     }
 547     if (fshift) {
 548         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
 549         freg = d.reg;
 550     }
 551     if (smulw)  SMULW(AL, xy, d.reg, vreg, freg);
 552     else        SMUL(AL, xy, d.reg, vreg, freg);
 553
 554
 555     d.h = ms;
 556     if (mDithering) {
 557         d.l = 0;
 558     } else {
 559         d.l = fs;
 560         d.flags |= CLEAR_LO;
 561     }
 562 }
 563
 564 void GGLAssembler::mul_factor_add(  component_t& d,
 565                                     const integer_t& v,
 566                                     const integer_t& f,
 567                                     const component_t& a)
 568 {
 569     // XXX: we could have special cases for 1 bit mul
 570     Scratch scratches(registerFile());
 571
 572     int vs = v.size();
 573     int fs = f.size();
 574     int as = a.h;
 575     int ms = vs+fs;
 576
 577     LOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
 578
 579     integer_t add(a.reg, a.h, a.flags);
 580
 581     // 'a' is a component_t but it is guaranteed to have
 582     // its high bits set to 0. However in the dithering case,
 583     // we can't get away with truncating the potentially bad bits
 584     // so extraction is needed.
 585
 586    if ((mDithering) && (a.size() < ms)) {
 587         // we need to expand a
 588         if (!(a.flags & CORRUPTIBLE)) {
 589             // ... but it's not corruptible, so we need to pick a
 590             // temporary register.
 591             // Try to uses the destination register first (it's likely
 592             // to be usable, unless it aliases an input).
 593             if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
 594                 add.reg = d.reg;
 595             } else {
 596                 add.reg = scratches.obtain();
 597             }
 598         }
 599         expand(add, a, ms); // extracts and expands
 600         as = ms;
 601     }
 602
 603     if (ms == as) {
 604         if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
 605         else                MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
 606     } else {
 607         int temp = d.reg;
 608         if (temp == add.reg) {
 609             // the mul will modify add.reg, we need an intermediary reg
 610             if (v.flags & CORRUPTIBLE)      temp = v.reg;
 611             else if (f.flags & CORRUPTIBLE) temp = f.reg;
 612             else                            temp = scratches.obtain();
 613         }
 614
 615         if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
 616         else                MUL(AL, 0, temp, v.reg, f.reg);
 617
 618         if (ms>as) {
 619             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
 620         } else if (ms<as) {
 621             // not sure if we should expand the mul instead?
 622             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
 623         }
 624     }
 625
 626     d.h = ms;
 627     if (mDithering) {
 628         d.l = a.l;
 629     } else {
 630         d.l = fs>a.l ? fs : a.l;
 631         d.flags |= CLEAR_LO;
 632     }
 633 }
 634
 635 void GGLAssembler::component_add(component_t& d,
 636         const integer_t& dst, const integer_t& src)
 637 {
 638     // here we're guaranteed that fragment.size() >= fb.size()
 639     const int shift = src.size() - dst.size();
 640     if (!shift) {
 641         ADD(AL, 0, d.reg, src.reg, dst.reg);
 642     } else {
 643         ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
 644     }
 645
 646     d.h = src.size();
 647     if (mDithering) {
 648         d.l = 0;
 649     } else {
 650         d.l = shift;
 651         d.flags |= CLEAR_LO;
 652     }
 653 }
 654
 655 void GGLAssembler::component_sat(const component_t& v)
 656 {
 657     const int one = ((1<<v.size())-1)<<v.l;
 658     CMP(AL, v.reg, imm( 1<<v.h ));
 659     if (isValidImmediate(one)) {
 660         MOV(HS, 0, v.reg, imm( one ));
 661     } else if (isValidImmediate(~one)) {
 662         MVN(HS, 0, v.reg, imm( ~one ));
 663     } else {
 664         MOV(HS, 0, v.reg, imm( 1<<v.h ));
 665         SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
 666     }
 667 }
 668
 669 // ----------------------------------------------------------------------------
 670
 671 }; // namespace android
 672