src/lib/alac/codec/ALACEncoder.cpp

   1 /*
   2  * Copyright (c) 2011 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_APACHE_LICENSE_HEADER_START@
   5  *
   6  * Licensed under the Apache License, Version 2.0 (the "License");
   7  * you may not use this file except in compliance with the License.
   8  * You may obtain a copy of the License at
   9  *
  10  *     http://www.apache.org/licenses/LICENSE-2.0
  11  *
  12  * Unless required by applicable law or agreed to in writing, software
  13  * distributed under the License is distributed on an "AS IS" BASIS,
  14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15  * See the License for the specific language governing permissions and
  16  * limitations under the License.
  17  *
  18  * @APPLE_APACHE_LICENSE_HEADER_END@
  19  */
  20
  21 /*
  22         File:           ALACEncoder.cpp
  23 */
  24
  25 // build stuff
  26 #define VERBOSE_DEBUG           0
  27
  28 // headers
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #include "ALACEncoder.h"
  34
  35 #include "aglib.h"
  36 #include "dplib.h"
  37 #include "matrixlib.h"
  38
  39 #include "ALACBitUtilities.h"
  40 #include "ALACAudioTypes.h"
  41 #include "EndianPortable.h"
  42
  43 // Note: in C you can't typecast to a 2-dimensional array pointer but that's what we need when
  44 // picking which coefs to use so we declare this typedef b/c we *can* typecast to this type
  45 typedef int16_t (*SearchCoefs)[kALACMaxCoefs];
  46
  47 // defines/constants
  48 const uint32_t kALACEncoderMagic        = 'dpge';
  49 const uint32_t kMaxSampleSize           = 32;                   // max allowed bit width is 32
  50 const uint32_t kDefaultMixBits  = 2;
  51 const uint32_t kDefaultMixRes           = 0;
  52 const uint32_t kMaxRes                  = 4;
  53 const uint32_t kDefaultNumUV            = 8;
  54 const uint32_t kMinUV                           = 4;
  55 const uint32_t kMaxUV                           = 8;
  56
  57 // static functions
  58 #if VERBOSE_DEBUG
  59 static void AddFiller( BitBuffer * bits, int32_t numBytes );
  60 #endif
  61
  62
  63 /*
  64         Map Format: 3-bit field per channel which is the same as the "element tag" that should be placed
  65                                 at the beginning of the frame for that channel.  Indicates whether SCE, CPE, or LFE.
  66                                 Each particular field is accessed via the current channel index.  Note that the channel
  67                                 index increments by two for channel pairs.
  68
  69         For example:
  70
  71                         C L R 3-channel input           = (ID_CPE << 3) | (ID_SCE)
  72                                 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
  73                                 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
  74
  75                         C L R Ls Rs LFE 5.1-channel input = (ID_LFE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
  76                                 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
  77                                 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
  78                                 index 3 value = (map & (0x7ul << (3 * 3))) >> (3 * 3)
  79                                 index 5 value = (map & (0x7ul << (5 * 3))) >> (5 * 3)
  80                                 index 7 value = (map & (0x7ul << (7 * 3))) >> (7 * 3)
  81 */
  82 static const uint32_t   sChannelMaps[kALACMaxChannels] =
  83 {
  84         ID_SCE,
  85         ID_CPE,
  86         (ID_CPE << 3) | (ID_SCE),
  87         (ID_SCE << 9) | (ID_CPE << 3) | (ID_SCE),
  88         (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
  89         (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
  90         (ID_SCE << 18) | (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
  91         (ID_SCE << 21) | (ID_CPE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
  92 };
  93
  94 static const uint32_t sSupportediPodSampleRates[] =
  95 {
  96         8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000
  97 };
  98
  99 /*
 100         Constructor
 101 */
 102 ALACEncoder::ALACEncoder() :
 103         mBitDepth( 0 ),
 104     mFastMode( 0 ),
 105         mMixBufferU( nil ),
 106         mMixBufferV( nil ),
 107         mPredictorU( nil ),
 108         mPredictorV( nil ),
 109         mShiftBufferUV( nil ),
 110         mWorkBuffer( nil ),
 111
 112
 113         mTotalBytesGenerated( 0 ),
 114         mAvgBitRate( 0 ),
 115         mMaxFrameBytes( 0 )
 116 {
 117         // overrides
 118         mFrameSize = kALACDefaultFrameSize;
 119 }
 120
 121 /*
 122         Destructor
 123 */
 124 ALACEncoder::~ALACEncoder()
 125 {
 126         // delete the matrix mixing buffers
 127         if ( mMixBufferU )
 128     {
 129                 free(mMixBufferU);
 130         mMixBufferU = NULL;
 131     }
 132         if ( mMixBufferV )
 133     {
 134                 free(mMixBufferV);
 135         mMixBufferV = NULL;
 136     }
 137
 138         // delete the dynamic predictor's "corrector" buffers
 139         if ( mPredictorU )
 140     {
 141                 free(mPredictorU);
 142         mPredictorU = NULL;
 143     }
 144         if ( mPredictorV )
 145     {
 146                 free(mPredictorV);
 147         mPredictorV = NULL;
 148     }
 149
 150         // delete the unused byte shift buffer
 151         if ( mShiftBufferUV )
 152     {
 153                 free(mShiftBufferUV);
 154         mShiftBufferUV = NULL;
 155     }
 156
 157         // delete the work buffer
 158         if ( mWorkBuffer )
 159     {
 160                 free(mWorkBuffer);
 161         mWorkBuffer = NULL;
 162     }
 163 }
 164
 165 #if PRAGMA_MARK
 166 #pragma mark -
 167 #endif
 168
 169 /*
 170         HEADER SPECIFICATION
 171
 172         For every segment we adopt the following header:
 173
 174                         1 byte reserved                 (always 0)
 175                         1 byte flags                    (see below)
 176                         [4 byte frame length]   (optional, see below)
 177                              ---Next, the per-segment ALAC parameters---
 178                         1 byte mixBits                  (middle-side parameter)
 179                         1 byte mixRes                   (middle-side parameter, interpreted as signed char)
 180
 181                         1 byte shiftU                   (4 bits modeU, 4 bits denShiftU)
 182                         1 byte filterU                  (3 bits pbFactorU, 5 bits numU)
 183                         (numU) shorts                   (signed DP coefficients for V channel)
 184                              ---Next, 2nd-channel ALAC parameters in case of stereo mode---
 185                         1 byte shiftV                   (4 bits modeV, 4 bits denShiftV)
 186                         1 byte filterV                  (3 bits pbFactorV, 5 bits numV)
 187                         (numV) shorts                   (signed DP coefficients for V channel)
 188                              ---After this come the shift-off bytes for (>= 24)-bit data (n-byte shift) if indicated---
 189                              ---Then comes the AG-compressor bitstream---
 190
 191
 192         FLAGS
 193         -----
 194
 195                 The presence of certain flag bits changes the header format such that the parameters might
 196                 not even be sent.  The currently defined flags format is:
 197
 198                         0000psse
 199
 200                         where           0       = reserved, must be 0
 201                                                 p       = 1-bit field "partial frame" flag indicating 32-bit frame length follows this byte
 202                                                 ss      = 2-bit field indicating "number of shift-off bytes ignored by compression"
 203                                                 e       = 1-bit field indicating "escape"
 204
 205                 The "partial frame" flag means that the following segment is not equal to the frame length specified
 206                 in the out-of-band decoder configuration.  This allows the decoder to deal with end-of-file partial
 207                 segments without incurring the 32-bit overhead for each segment.
 208
 209                 The "shift-off" field indicates the number of bytes at the bottom of the word that were passed through
 210                 uncompressed.  The reason for this is that the entropy inherent in the LS bytes of >= 24-bit words
 211                 quite often means that the frame would have to be "escaped" b/c the compressed size would be >= the
 212                 uncompressed size.  However, by shifting the input values down and running the remaining bits through
 213                 the normal compression algorithm, a net win can be achieved.  If this field is non-zero, it means that
 214                 the shifted-off bytes follow after the parameter section of the header and before the compressed
 215                 bitstream.  Note that doing this also allows us to use matrixing on 32-bit inputs after one or more
 216                 bytes are shifted off the bottom which helps the eventual compression ratio.  For stereo channels,
 217                 the shifted off bytes are interleaved.
 218
 219         The "escape" flag means that this segment was not compressed b/c the compressed size would be
 220         >= uncompressed size.  In that case, the audio data was passed through uncompressed after the header.
 221         The other header parameter bytes will not be sent.
 222
 223
 224                 PARAMETERS
 225                 ----------
 226
 227                 If the segment is not a partial or escape segment, the total header size (in bytes) is given exactly by:
 228
 229                         4 + (2 + 2 * numU)                   (mono mode)
 230                         4 + (2 + 2 * numV) + (2 + 2 * numV)  (stereo mode)
 231
 232         where the ALAC filter-lengths numU, numV are bounded by a
 233         constant (in the current source, numU, numV <= NUMCOEPAIRS), and
 234         this forces an absolute upper bound on header size.
 235
 236         Each segment-decode process loads up these bytes from the front of the
 237         local stream, in the above order, then follows with the entropy-encoded
 238         bits for the given segment.
 239
 240         To generalize middle-side, there are various mixing modes including middle-side, each lossless,
 241         as embodied in the mix() and unmix() functions.  These functions exploit a generalized middle-side
 242         transformation:
 243
 244         u := [(rL + (m-r)R)/m];
 245         v := L - R;
 246
 247         where [ ] denotes integer floor.  The (lossless) inverse is
 248
 249         L = u + v - [rV/m];
 250         R = L - v;
 251
 252         In the segment header, m and r are encoded in mixBits and mixRes.
 253         Classical "middle-side" is obtained with m = 2, r = 1, but now
 254         we have more generalized mixes.
 255
 256         NOTES
 257         -----
 258         The relevance of the ALAC coefficients is explained in detail
 259         in patent documents.
 260 */
 261
 262 /*
 263         EncodeStereo()
 264         - encode a channel pair
 265 */
 266 int32_t ALACEncoder::EncodeStereo( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
 267 {
 268         BitBuffer               workBits;
 269         BitBuffer               startBits = *bitstream;                 // squirrel away copy of current state in case we need to go back and do an escape packet
 270         AGParamRec              agParams;
 271         uint32_t          bits1, bits2;
 272         uint32_t                        dilate;
 273         int32_t                 mixBits, mixRes, maxRes;
 274         uint32_t                        minBits, minBits1, minBits2;
 275         uint32_t                        numU, numV;
 276         uint32_t                        mode;
 277         uint32_t                        pbFactor;
 278         uint32_t                        chanBits;
 279         uint32_t                        denShift;
 280         uint8_t                 bytesShifted;
 281         SearchCoefs             coefsU;
 282         SearchCoefs             coefsV;
 283         uint32_t                        index;
 284         uint8_t                 partialFrame;
 285         uint32_t                        escapeBits;
 286         bool                    doEscape;
 287         int32_t         status = ALAC_noErr;
 288
 289         // make sure we handle this bit-depth before we get going
 290         RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
 291
 292         // reload coefs pointers for this channel pair
 293         // - note that, while you might think they should be re-initialized per block, retaining state across blocks
 294         //       actually results in better overall compression
 295         // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
 296         //       different coefs for the different passes of "mixRes" results in even better compression
 297         coefsU = (SearchCoefs) mCoefsU[channelIndex];
 298         coefsV = (SearchCoefs) mCoefsV[channelIndex];
 299
 300         // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
 301         // so enable 16-bit "shift off" and encode in 17-bit mode
 302         // - in addition, 24-bit mode really improves with one byte shifted off
 303         if ( mBitDepth == 32 )
 304                 bytesShifted = 2;
 305         else if ( mBitDepth >= 24 )
 306                 bytesShifted = 1;
 307         else
 308                 bytesShifted = 0;
 309
 310         chanBits = mBitDepth - (bytesShifted * 8) + 1;
 311
 312         // flag whether or not this is a partial frame
 313         partialFrame = (numSamples == mFrameSize) ? 0 : 1;
 314
 315         // brute-force encode optimization loop
 316         // - run over variations of the encoding params to find the best choice
 317         mixBits         = kDefaultMixBits;
 318         maxRes          = kMaxRes;
 319         numU = numV = kDefaultNumUV;
 320         denShift        = DENSHIFT_DEFAULT;
 321         mode            = 0;
 322         pbFactor        = 4;
 323         dilate          = 8;
 324
 325         minBits = minBits1 = minBits2 = 1ul << 31;
 326
 327     int32_t             bestRes = mLastMixRes[channelIndex];
 328
 329     for ( mixRes = 0; mixRes <= maxRes; mixRes++ )
 330     {
 331         // mix the stereo inputs
 332         switch ( mBitDepth )
 333         {
 334             case 16:
 335                 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
 336                 break;
 337             case 20:
 338                 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
 339                 break;
 340             case 24:
 341                 // includes extraction of shifted-off bytes
 342                 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
 343                         mixBits, mixRes, mShiftBufferUV, bytesShifted );
 344                 break;
 345             case 32:
 346                 // includes extraction of shifted-off bytes
 347                 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
 348                         mixBits, mixRes, mShiftBufferUV, bytesShifted );
 349                 break;
 350         }
 351
 352         BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
 353
 354         // run the dynamic predictors
 355         pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
 356         pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
 357
 358         // run the lossless compressor on each channel
 359         set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
 360         status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
 361         RequireNoErr( status, goto Exit; );
 362
 363         set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
 364         status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
 365         RequireNoErr( status, goto Exit; );
 366
 367         // look for best match
 368         if ( (bits1 + bits2) < minBits1 )
 369         {
 370             minBits1 = bits1 + bits2;
 371             bestRes = mixRes;
 372         }
 373     }
 374
 375     mLastMixRes[channelIndex] = (int16_t)bestRes;
 376
 377         // mix the stereo inputs with the current best mixRes
 378         mixRes = mLastMixRes[channelIndex];
 379         switch ( mBitDepth )
 380         {
 381                 case 16:
 382                         mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
 383                         break;
 384                 case 20:
 385                         mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
 386                         break;
 387                 case 24:
 388                         // also extracts the shifted off bytes into the shift buffers
 389                         mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
 390                                         mixBits, mixRes, mShiftBufferUV, bytesShifted );
 391                         break;
 392                 case 32:
 393                         // also extracts the shifted off bytes into the shift buffers
 394                         mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
 395                                         mixBits, mixRes, mShiftBufferUV, bytesShifted );
 396                         break;
 397         }
 398
 399         // now it's time for the predictor coefficient search loop
 400         numU = numV = kMinUV;
 401         minBits1 = minBits2 = 1ul << 31;
 402
 403         for ( uint32_t numUV = kMinUV; numUV <= kMaxUV; numUV += 4 )
 404         {
 405                 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
 406
 407                 dilate = 32;
 408
 409                 // run the predictor over the same data multiple times to help it converge
 410                 for ( uint32_t converge = 0; converge < 8; converge++ )
 411                 {
 412                     pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
 413                     pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
 414                 }
 415
 416                 dilate = 8;
 417
 418                 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
 419                 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
 420
 421                 if ( (bits1 * dilate + 16 * numUV) < minBits1 )
 422                 {
 423                         minBits1 = bits1 * dilate + 16 * numUV;
 424                         numU = numUV;
 425                 }
 426
 427                 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
 428                 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
 429
 430                 if ( (bits2 * dilate + 16 * numUV) < minBits2 )
 431                 {
 432                         minBits2 = bits2 * dilate + 16 * numUV;
 433                         numV = numUV;
 434                 }
 435         }
 436
 437         // test for escape hatch if best calculated compressed size turns out to be more than the input size
 438         minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
 439         if ( bytesShifted != 0 )
 440                 minBits += (numSamples * (bytesShifted * 8) * 2);
 441
 442         escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8);        /* 2 common header bytes */
 443
 444         doEscape = (minBits >= escapeBits) ? true : false;
 445
 446         if ( doEscape == false )
 447         {
 448                 // write bitstream header and coefs
 449                 BitBufferWrite( bitstream, 0, 12 );
 450                 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
 451                 if ( partialFrame )
 452                         BitBufferWrite( bitstream, numSamples, 32 );
 453                 BitBufferWrite( bitstream, mixBits, 8 );
 454                 BitBufferWrite( bitstream, mixRes, 8 );
 455
 456                 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
 457                 //Assert( (pbFactor < 8) && (numU < 32) );
 458                 //Assert( (pbFactor < 8) && (numV < 32) );
 459
 460                 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
 461                 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
 462                 for ( index = 0; index < numU; index++ )
 463                         BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
 464
 465                 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
 466                 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
 467                 for ( index = 0; index < numV; index++ )
 468                         BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
 469
 470                 // if shift active, write the interleaved shift buffers
 471                 if ( bytesShifted != 0 )
 472                 {
 473                         uint32_t                bitShift = bytesShifted * 8;
 474
 475                         //Assert( bitShift <= 16 );
 476
 477                         for ( index = 0; index < (numSamples * 2); index += 2 )
 478                         {
 479                                 uint32_t                        shiftedVal;
 480
 481                                 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
 482                                 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
 483                         }
 484                 }
 485
 486                 // run the dynamic predictor and lossless compression for the "left" channel
 487                 // - note: to avoid allocating more buffers, we're mixing and matching between the available buffers instead
 488                 //                 of only using "U" buffers for the U-channel and "V" buffers for the V-channel
 489                 if ( mode == 0 )
 490                 {
 491                         pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
 492                 }
 493                 else
 494                 {
 495                         pc_block( mMixBufferU, mPredictorV, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
 496                         pc_block( mPredictorV, mPredictorU, numSamples, nil, 31, chanBits, 0 );
 497                 }
 498
 499                 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
 500                 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
 501                 RequireNoErr( status, goto Exit; );
 502
 503                 // run the dynamic predictor and lossless compression for the "right" channel
 504                 if ( mode == 0 )
 505                 {
 506                         pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
 507                 }
 508                 else
 509                 {
 510                         pc_block( mMixBufferV, mPredictorU, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
 511                         pc_block( mPredictorU, mPredictorV, numSamples, nil, 31, chanBits, 0 );
 512                 }
 513
 514                 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
 515                 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
 516                 RequireNoErr( status, goto Exit; );
 517
 518                 /*      if we happened to create a compressed packet that was actually bigger than an escape packet would be,
 519                         chuck it and do an escape packet
 520                 */
 521                 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
 522                 if ( minBits >= escapeBits )
 523                 {
 524                         *bitstream = startBits;         // reset bitstream state
 525                         doEscape = true;
 526                         printf( "compressed frame too big: %u vs. %u \n", minBits, escapeBits );
 527                 }
 528         }
 529
 530         if ( doEscape == true )
 531         {
 532                 /* escape */
 533                 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
 534
 535 #if VERBOSE_DEBUG
 536                 DebugMsg( "escape!: %lu vs %lu", minBits, escapeBits );
 537 #endif
 538         }
 539
 540 Exit:
 541         return status;
 542 }
 543
 544 /*
 545         EncodeStereoFast()
 546         - encode a channel pair without the search loop for maximum possible speed
 547 */
 548 int32_t ALACEncoder::EncodeStereoFast( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
 549 {
 550         BitBuffer               startBits = *bitstream;                 // squirrel away current bit position in case we decide to use escape hatch
 551         AGParamRec              agParams;
 552         uint32_t        bits1, bits2;
 553         int32_t                 mixBits, mixRes;
 554         uint32_t                        minBits, minBits1, minBits2;
 555         uint32_t                        numU, numV;
 556         uint32_t                        mode;
 557         uint32_t                        pbFactor;
 558         uint32_t                        chanBits;
 559         uint32_t                        denShift;
 560         uint8_t                 bytesShifted;
 561         SearchCoefs             coefsU;
 562         SearchCoefs             coefsV;
 563         uint32_t                        index;
 564         uint8_t                 partialFrame;
 565         uint32_t                        escapeBits;
 566         bool                    doEscape;
 567         int32_t         status;
 568
 569         // make sure we handle this bit-depth before we get going
 570         RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
 571
 572         // reload coefs pointers for this channel pair
 573         // - note that, while you might think they should be re-initialized per block, retaining state across blocks
 574         //       actually results in better overall compression
 575         // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
 576         //       different coefs for the different passes of "mixRes" results in even better compression
 577         coefsU = (SearchCoefs) mCoefsU[channelIndex];
 578         coefsV = (SearchCoefs) mCoefsV[channelIndex];
 579
 580         // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
 581         // so enable 16-bit "shift off" and encode in 17-bit mode
 582         // - in addition, 24-bit mode really improves with one byte shifted off
 583         if ( mBitDepth == 32 )
 584                 bytesShifted = 2;
 585         else if ( mBitDepth >= 24 )
 586                 bytesShifted = 1;
 587         else
 588                 bytesShifted = 0;
 589
 590         chanBits = mBitDepth - (bytesShifted * 8) + 1;
 591
 592         // flag whether or not this is a partial frame
 593         partialFrame = (numSamples == mFrameSize) ? 0 : 1;
 594
 595         // set up default encoding parameters for "fast" mode
 596         mixBits         = kDefaultMixBits;
 597         mixRes          = kDefaultMixRes;
 598         numU = numV = kDefaultNumUV;
 599         denShift        = DENSHIFT_DEFAULT;
 600         mode            = 0;
 601         pbFactor        = 4;
 602
 603         minBits = minBits1 = minBits2 = 1ul << 31;
 604
 605         // mix the stereo inputs with default mixBits/mixRes
 606         switch ( mBitDepth )
 607         {
 608                 case 16:
 609                         mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
 610                         break;
 611                 case 20:
 612                         mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
 613                         break;
 614                 case 24:
 615                         // also extracts the shifted off bytes into the shift buffers
 616                         mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
 617                                         mixBits, mixRes, mShiftBufferUV, bytesShifted );
 618                         break;
 619                 case 32:
 620                         // also extracts the shifted off bytes into the shift buffers
 621                         mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
 622                                         mixBits, mixRes, mShiftBufferUV, bytesShifted );
 623                         break;
 624         }
 625
 626         /* speculatively write the bitstream assuming the compressed version will be smaller */
 627
 628         // write bitstream header and coefs
 629         BitBufferWrite( bitstream, 0, 12 );
 630         BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
 631         if ( partialFrame )
 632                 BitBufferWrite( bitstream, numSamples, 32 );
 633         BitBufferWrite( bitstream, mixBits, 8 );
 634         BitBufferWrite( bitstream, mixRes, 8 );
 635
 636         //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
 637         //Assert( (pbFactor < 8) && (numU < 32) );
 638         //Assert( (pbFactor < 8) && (numV < 32) );
 639
 640         BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
 641         BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
 642         for ( index = 0; index < numU; index++ )
 643                 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
 644
 645         BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
 646         BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
 647         for ( index = 0; index < numV; index++ )
 648                 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
 649
 650         // if shift active, write the interleaved shift buffers
 651         if ( bytesShifted != 0 )
 652         {
 653                 uint32_t                bitShift = bytesShifted * 8;
 654
 655                 //Assert( bitShift <= 16 );
 656
 657                 for ( index = 0; index < (numSamples * 2); index += 2 )
 658                 {
 659                         uint32_t                        shiftedVal;
 660
 661                         shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
 662                         BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
 663                 }
 664         }
 665
 666         // run the dynamic predictor and lossless compression for the "left" channel
 667         // - note: we always use mode 0 in the "fast" path so we don't need the code for mode != 0
 668         pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
 669
 670         set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
 671         status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
 672         RequireNoErr( status, goto Exit; );
 673
 674         // run the dynamic predictor and lossless compression for the "right" channel
 675         pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
 676
 677         set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
 678         status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
 679         RequireNoErr( status, goto Exit; );
 680
 681         // do bit requirement calculations
 682         minBits1 = bits1 + (numU * sizeof(int16_t) * 8);
 683         minBits2 = bits2 + (numV * sizeof(int16_t) * 8);
 684
 685         // test for escape hatch if best calculated compressed size turns out to be more than the input size
 686         minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
 687         if ( bytesShifted != 0 )
 688                 minBits += (numSamples * (bytesShifted * 8) * 2);
 689
 690         escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8);        /* 2 common header bytes */
 691
 692         doEscape = (minBits >= escapeBits) ? true : false;
 693
 694         if ( doEscape == false )
 695         {
 696                 /*      if we happened to create a compressed packet that was actually bigger than an escape packet would be,
 697                         chuck it and do an escape packet
 698                 */
 699                 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
 700                 if ( minBits >= escapeBits )
 701                 {
 702                         doEscape = true;
 703                         printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
 704                 }
 705
 706         }
 707
 708         if ( doEscape == true )
 709         {
 710                 /* escape */
 711
 712                 // reset bitstream position since we speculatively wrote the compressed version
 713                 *bitstream = startBits;
 714
 715                 // write escape frame
 716                 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
 717
 718 #if VERBOSE_DEBUG
 719                 DebugMsg( "escape!: %u vs %u", minBits, (numSamples * mBitDepth * 2) );
 720 #endif
 721         }
 722
 723 Exit:
 724         return status;
 725 }
 726
 727 /*
 728         EncodeStereoEscape()
 729         - encode stereo escape frame
 730 */
 731 int32_t ALACEncoder::EncodeStereoEscape( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t numSamples )
 732 {
 733         int16_t *               input16;
 734         int32_t *               input32;
 735         uint8_t                 partialFrame;
 736         uint32_t                        index;
 737
 738         // flag whether or not this is a partial frame
 739         partialFrame = (numSamples == mFrameSize) ? 0 : 1;
 740
 741         // write bitstream header
 742         BitBufferWrite( bitstream, 0, 12 );
 743         BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 );        // LSB = 1 means "frame not compressed"
 744         if ( partialFrame )
 745                 BitBufferWrite( bitstream, numSamples, 32 );
 746
 747         // just copy the input data to the output buffer
 748         switch ( mBitDepth )
 749         {
 750                 case 16:
 751                         input16 = (int16_t *) inputBuffer;
 752
 753                         for ( index = 0; index < (numSamples * stride); index += stride )
 754                         {
 755                                 BitBufferWrite( bitstream, input16[index + 0], 16 );
 756                                 BitBufferWrite( bitstream, input16[index + 1], 16 );
 757                         }
 758                         break;
 759                 case 20:
 760                         // mix20() with mixres param = 0 means de-interleave so use it to simplify things
 761                         mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0 );
 762                         for ( index = 0; index < numSamples; index++ )
 763                         {
 764                                 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
 765                                 BitBufferWrite( bitstream, mMixBufferV[index], 20 );
 766                         }
 767                         break;
 768                 case 24:
 769                         // mix24() with mixres param = 0 means de-interleave so use it to simplify things
 770                         mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0, mShiftBufferUV, 0 );
 771                         for ( index = 0; index < numSamples; index++ )
 772                         {
 773                                 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
 774                                 BitBufferWrite( bitstream, mMixBufferV[index], 24 );
 775                         }
 776                         break;
 777                 case 32:
 778                         input32 = (int32_t *) inputBuffer;
 779
 780                         for ( index = 0; index < (numSamples * stride); index += stride )
 781                         {
 782                                 BitBufferWrite( bitstream, input32[index + 0], 32 );
 783                                 BitBufferWrite( bitstream, input32[index + 1], 32 );
 784                         }
 785                         break;
 786         }
 787
 788         return ALAC_noErr;
 789 }
 790
 791 /*
 792         EncodeMono()
 793         - encode a mono input buffer
 794 */
 795 int32_t ALACEncoder::EncodeMono( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
 796 {
 797         BitBuffer               startBits = *bitstream;                 // squirrel away copy of current state in case we need to go back and do an escape packet
 798         AGParamRec              agParams;
 799         uint32_t        bits1;
 800         uint32_t                        numU;
 801         SearchCoefs             coefsU;
 802         uint32_t                        dilate;
 803         uint32_t                        minBits, bestU;
 804         uint32_t                        minU, maxU;
 805         uint32_t                        index, index2;
 806         uint8_t                 bytesShifted;
 807         uint32_t                        shift;
 808         uint32_t                        mask;
 809         uint32_t                        chanBits;
 810         uint8_t                 pbFactor;
 811         uint8_t                 partialFrame;
 812         int16_t *               input16;
 813         int32_t *               input32;
 814         uint32_t                        escapeBits;
 815         bool                    doEscape;
 816         int32_t         status;
 817
 818         // make sure we handle this bit-depth before we get going
 819         RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
 820
 821         status = ALAC_noErr;
 822
 823         // reload coefs array from previous frame
 824         coefsU = (SearchCoefs) mCoefsU[channelIndex];
 825
 826         // pick bit depth for actual encoding
 827         // - we lop off the lower byte(s) for 24-/32-bit encodings
 828         if ( mBitDepth == 32 )
 829                 bytesShifted = 2;
 830         else if ( mBitDepth >= 24 )
 831                 bytesShifted = 1;
 832         else
 833                 bytesShifted = 0;
 834
 835         shift = bytesShifted * 8;
 836         mask = (1ul << shift) - 1;
 837         chanBits = mBitDepth - (bytesShifted * 8);
 838
 839         // flag whether or not this is a partial frame
 840         partialFrame = (numSamples == mFrameSize) ? 0 : 1;
 841
 842         // convert N-bit data to 32-bit for predictor
 843         switch ( mBitDepth )
 844         {
 845                 case 16:
 846                 {
 847                         // convert 16-bit data to 32-bit for predictor
 848                         input16 = (int16_t *) inputBuffer;
 849                         for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
 850                                 mMixBufferU[index] = (int32_t) input16[index2];
 851                         break;
 852                 }
 853                 case 20:
 854                         // convert 20-bit data to 32-bit for predictor
 855                         copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
 856                         break;
 857                 case 24:
 858                         // convert 24-bit data to 32-bit for the predictor and extract the shifted off byte(s)
 859                         copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
 860                         for ( index = 0; index < numSamples; index++ )
 861                         {
 862                                 mShiftBufferUV[index] = (uint16_t)(mMixBufferU[index] & mask);
 863                                 mMixBufferU[index] >>= shift;
 864                         }
 865                         break;
 866                 case 32:
 867                 {
 868                         // just copy the 32-bit input data for the predictor and extract the shifted off byte(s)
 869                         input32 = (int32_t *) inputBuffer;
 870
 871                         for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
 872                         {
 873                                 int32_t                 val = input32[index2];
 874
 875                                 mShiftBufferUV[index] = (uint16_t)(val & mask);
 876                                 mMixBufferU[index] = val >> shift;
 877                         }
 878                         break;
 879                 }
 880         }
 881
 882         // brute-force encode optimization loop (implied "encode depth" of 0 if comparing to cmd line tool)
 883         // - run over variations of the encoding params to find the best choice
 884         minU            = 4;
 885         maxU            = 8;
 886         minBits         = 1ul << 31;
 887         pbFactor        = 4;
 888
 889         minBits = 1ul << 31;
 890         bestU   = minU;
 891
 892         for ( numU = minU; numU <= maxU; numU += 4 )
 893         {
 894                 BitBuffer               workBits;
 895                 uint32_t                        numBits;
 896
 897                 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
 898
 899                 dilate = 32;
 900                 for ( uint32_t converge = 0; converge < 7; converge++ )
 901                         pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
 902
 903                 dilate = 8;
 904                 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
 905
 906                 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
 907                 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
 908                 RequireNoErr( status, goto Exit; );
 909
 910                 numBits = (dilate * bits1) + (16 * numU);
 911                 if ( numBits < minBits )
 912                 {
 913                         bestU   = numU;
 914                         minBits = numBits;
 915                 }
 916         }
 917
 918         // test for escape hatch if best calculated compressed size turns out to be more than the input size
 919         // - first, add bits for the header bytes mixRes/maxRes/shiftU/filterU
 920         minBits += (4 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
 921         if ( bytesShifted != 0 )
 922                 minBits += (numSamples * (bytesShifted * 8));
 923
 924         escapeBits = (numSamples * mBitDepth) + ((partialFrame == true) ? 32 : 0) + (2 * 8);    /* 2 common header bytes */
 925
 926         doEscape = (minBits >= escapeBits) ? true : false;
 927
 928         if ( doEscape == false )
 929         {
 930                 // write bitstream header
 931                 BitBufferWrite( bitstream, 0, 12 );
 932                 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
 933                 if ( partialFrame )
 934                         BitBufferWrite( bitstream, numSamples, 32 );
 935                 BitBufferWrite( bitstream, 0, 16 );                                                             // mixBits = mixRes = 0
 936
 937                 // write the params and predictor coefs
 938                 numU = bestU;
 939                 BitBufferWrite( bitstream, (0 << 4) | DENSHIFT_DEFAULT, 8 );    // modeU = 0
 940                 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
 941                 for ( index = 0; index < numU; index++ )
 942                         BitBufferWrite( bitstream, coefsU[numU-1][index], 16 );
 943
 944                 // if shift active, write the interleaved shift buffers
 945                 if ( bytesShifted != 0 )
 946                 {
 947                         for ( index = 0; index < numSamples; index++ )
 948                                 BitBufferWrite( bitstream, mShiftBufferUV[index], shift );
 949                 }
 950
 951                 // run the dynamic predictor with the best result
 952                 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
 953
 954                 // do lossless compression
 955                 set_standard_ag_params( &agParams, numSamples, numSamples );
 956                 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
 957                 //AssertNoErr( status );
 958
 959
 960                 /*      if we happened to create a compressed packet that was actually bigger than an escape packet would be,
 961                         chuck it and do an escape packet
 962                 */
 963                 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
 964                 if ( minBits >= escapeBits )
 965                 {
 966                         *bitstream = startBits;         // reset bitstream state
 967                         doEscape = true;
 968                         printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
 969                 }
 970         }
 971
 972         if ( doEscape == true )
 973         {
 974                 // write bitstream header and coefs
 975                 BitBufferWrite( bitstream, 0, 12 );
 976                 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 );        // LSB = 1 means "frame not compressed"
 977                 if ( partialFrame )
 978                         BitBufferWrite( bitstream, numSamples, 32 );
 979
 980                 // just copy the input data to the output buffer
 981                 switch ( mBitDepth )
 982                 {
 983                         case 16:
 984                                 input16 = (int16_t *) inputBuffer;
 985                                 for ( index = 0; index < (numSamples * stride); index += stride )
 986                                         BitBufferWrite( bitstream, input16[index], 16 );
 987                                 break;
 988                         case 20:
 989                                 // convert 20-bit data to 32-bit for simplicity
 990                                 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
 991                                 for ( index = 0; index < numSamples; index++ )
 992                                         BitBufferWrite( bitstream, mMixBufferU[index], 20 );
 993                                 break;
 994                         case 24:
 995                                 // convert 24-bit data to 32-bit for simplicity
 996                                 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
 997                                 for ( index = 0; index < numSamples; index++ )
 998                                         BitBufferWrite( bitstream, mMixBufferU[index], 24 );
 999                                 break;
1000                         case 32:
1001                                 input32 = (int32_t *) inputBuffer;
1002                                 for ( index = 0; index < (numSamples * stride); index += stride )
1003                                         BitBufferWrite( bitstream, input32[index], 32 );
1004                                 break;
1005                 }
1006 #if VERBOSE_DEBUG
1007                 DebugMsg( "escape!: %lu vs %lu", minBits, (numSamples * mBitDepth) );
1008 #endif
1009         }
1010
1011 Exit:
1012         return status;
1013 }
1014
1015 #if PRAGMA_MARK
1016 #pragma mark -
1017 #endif
1018
1019 /*
1020         Encode()
1021         - encode the next block of samples
1022 */
1023 int32_t ALACEncoder::Encode(AudioFormatDescription theInputFormat, AudioFormatDescription theOutputFormat,
1024                              unsigned char * theReadBuffer, unsigned char * theWriteBuffer, int32_t * ioNumBytes)
1025 {
1026         uint32_t                                numFrames;
1027         uint32_t                                outputSize;
1028         BitBuffer                       bitstream;
1029         int32_t                 status;
1030
1031         numFrames = *ioNumBytes/theInputFormat.mBytesPerPacket;
1032
1033         // create a bit buffer structure pointing to our output buffer
1034         BitBufferInit( &bitstream, theWriteBuffer, mMaxOutputBytes );
1035
1036         if ( theInputFormat.mChannelsPerFrame == 2 )
1037         {
1038                 // add 3-bit frame start tag ID_CPE = channel pair & 4-bit element instance tag = 0
1039                 BitBufferWrite( &bitstream, ID_CPE, 3 );
1040                 BitBufferWrite( &bitstream, 0, 4 );
1041
1042                 // encode stereo input buffer
1043                 if ( mFastMode == false )
1044                         status = this->EncodeStereo( &bitstream, theReadBuffer, 2, 0, numFrames );
1045                 else
1046                         status = this->EncodeStereoFast( &bitstream, theReadBuffer, 2, 0, numFrames );
1047                 RequireNoErr( status, goto Exit; );
1048         }
1049         else if ( theInputFormat.mChannelsPerFrame == 1 )
1050         {
1051                 // add 3-bit frame start tag ID_SCE = mono channel & 4-bit element instance tag = 0
1052                 BitBufferWrite( &bitstream, ID_SCE, 3 );
1053                 BitBufferWrite( &bitstream, 0, 4 );
1054
1055                 // encode mono input buffer
1056                 status = this->EncodeMono( &bitstream, theReadBuffer, 1, 0, numFrames );
1057                 RequireNoErr( status, goto Exit; );
1058         }
1059         else
1060         {
1061                 char *                                  inputBuffer;
1062                 uint32_t                                tag;
1063                 uint32_t                                channelIndex;
1064                 uint32_t                                inputIncrement;
1065                 uint8_t                         stereoElementTag;
1066                 uint8_t                         monoElementTag;
1067                 uint8_t                         lfeElementTag;
1068
1069                 inputBuffer             = (char *) theReadBuffer;
1070                 inputIncrement  = ((mBitDepth + 7) / 8);
1071
1072                 stereoElementTag        = 0;
1073                 monoElementTag          = 0;
1074                 lfeElementTag           = 0;
1075
1076                 for ( channelIndex = 0; channelIndex < theInputFormat.mChannelsPerFrame; )
1077                 {
1078                         tag = (sChannelMaps[theInputFormat.mChannelsPerFrame - 1] & (0x7ul << (channelIndex * 3))) >> (channelIndex * 3);
1079
1080                         BitBufferWrite( &bitstream, tag, 3 );
1081                         switch ( tag )
1082                         {
1083                                 case ID_SCE:
1084                                         // mono
1085                                         BitBufferWrite( &bitstream, monoElementTag, 4 );
1086
1087                                         status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1088
1089                                         inputBuffer += inputIncrement;
1090                                         channelIndex++;
1091                                         monoElementTag++;
1092                                         break;
1093
1094                                 case ID_CPE:
1095                                         // stereo
1096                                         BitBufferWrite( &bitstream, stereoElementTag, 4 );
1097
1098                                         status = this->EncodeStereo( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1099
1100                                         inputBuffer += (inputIncrement * 2);
1101                                         channelIndex += 2;
1102                                         stereoElementTag++;
1103                                         break;
1104
1105                                 case ID_LFE:
1106                                         // LFE channel (subwoofer)
1107                                         BitBufferWrite( &bitstream, lfeElementTag, 4 );
1108
1109                                         status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1110
1111                                         inputBuffer += inputIncrement;
1112                                         channelIndex++;
1113                                         lfeElementTag++;
1114                                         break;
1115
1116                                 default:
1117                                         printf( "That ain't right! (%u)\n", tag );
1118                                         status = kALAC_ParamError;
1119                                         goto Exit;
1120                         }
1121
1122                         RequireNoErr( status, goto Exit; );
1123                 }
1124         }
1125
1126 #if VERBOSE_DEBUG
1127 {
1128         // if there is room left in the output buffer, add some random fill data to test decoder
1129         int32_t                 bitsLeft;
1130         int32_t                 bytesLeft;
1131
1132         bitsLeft = BitBufferGetPosition( &bitstream ) - 3;      // - 3 for ID_END tag
1133         bytesLeft = bitstream.byteSize - ((bitsLeft + 7) / 8);
1134
1135         if ( (bytesLeft > 20) && ((bytesLeft & 0x4u) != 0) )
1136                 AddFiller( &bitstream, bytesLeft );
1137 }
1138 #endif
1139
1140         // add 3-bit frame end tag: ID_END
1141         BitBufferWrite( &bitstream, ID_END, 3 );
1142
1143         // byte-align the output data
1144         BitBufferByteAlign( &bitstream, true );
1145
1146         outputSize = BitBufferGetPosition( &bitstream ) / 8;
1147         //Assert( outputSize <= mMaxOutputBytes );
1148
1149
1150         // all good, let iTunes know what happened and remember the total number of input sample frames
1151         *ioNumBytes = outputSize;
1152         //mEncodedFrames                           += encodeMsg->numInputSamples;
1153
1154         // gather encoding stats
1155         mTotalBytesGenerated += outputSize;
1156         mMaxFrameBytes = MAX( mMaxFrameBytes, outputSize );
1157
1158         status = ALAC_noErr;
1159
1160 Exit:
1161         return status;
1162 }
1163
1164 /*
1165         Finish()
1166         - drain out any leftover samples
1167 */
1168
1169 int32_t ALACEncoder::Finish()
1170 {
1171 /*      // finalize bit rate statistics
1172         if ( mSampleSize.numEntries != 0 )
1173                 mAvgBitRate = (uint32_t)( (((float)mTotalBytesGenerated * 8.0f) / (float)mSampleSize.numEntries) * ((float)mSampleRate / (float)mFrameSize) );
1174         else
1175                 mAvgBitRate = 0;
1176 */
1177         return ALAC_noErr;
1178 }
1179
1180 #if PRAGMA_MARK
1181 #pragma mark -
1182 #endif
1183
1184 /*
1185         GetConfig()
1186 */
1187 void ALACEncoder::GetConfig( ALACSpecificConfig & config )
1188 {
1189         config.frameLength                      = Swap32NtoB(mFrameSize);
1190         config.compatibleVersion        = (uint8_t) kALACCompatibleVersion;
1191         config.bitDepth                         = (uint8_t) mBitDepth;
1192         config.pb                                       = (uint8_t) PB0;
1193         config.kb                                       = (uint8_t) KB0;
1194         config.mb                                       = (uint8_t) MB0;
1195         config.numChannels                      = (uint8_t) mNumChannels;
1196         config.maxRun                           = Swap16NtoB((uint16_t) MAX_RUN_DEFAULT);
1197         config.maxFrameBytes            = Swap32NtoB(mMaxFrameBytes);
1198         config.avgBitRate                       = Swap32NtoB(mAvgBitRate);
1199         config.sampleRate                       = Swap32NtoB(mOutputSampleRate);
1200 }
1201
1202 uint32_t ALACEncoder::GetMagicCookieSize(uint32_t inNumChannels)
1203 {
1204     if (inNumChannels > 2)
1205     {
1206         return sizeof(ALACSpecificConfig) + kChannelAtomSize + sizeof(ALACAudioChannelLayout);
1207     }
1208     else
1209     {
1210         return sizeof(ALACSpecificConfig);
1211     }
1212 }
1213
1214 void ALACEncoder::GetMagicCookie(void * outCookie, uint32_t * ioSize)
1215 {
1216     ALACSpecificConfig theConfig = {0};
1217     ALACAudioChannelLayout theChannelLayout = {0};
1218     uint8_t theChannelAtom[kChannelAtomSize] = {0, 0, 0, 0, 'c', 'h', 'a', 'n', 0, 0, 0, 0};
1219     uint32_t theCookieSize = sizeof(ALACSpecificConfig);
1220     uint8_t * theCookiePointer = (uint8_t *)outCookie;
1221
1222     GetConfig(theConfig);
1223     if (theConfig.numChannels > 2)
1224     {
1225         theChannelLayout.mChannelLayoutTag = ALACChannelLayoutTags[theConfig.numChannels - 1];
1226         theCookieSize += (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1227     }
1228      if (*ioSize >= theCookieSize)
1229     {
1230         memcpy(theCookiePointer, &theConfig, sizeof(ALACSpecificConfig));
1231         theChannelAtom[3] = (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1232         if (theConfig.numChannels > 2)
1233         {
1234             theCookiePointer += sizeof(ALACSpecificConfig);
1235             memcpy(theCookiePointer, theChannelAtom, kChannelAtomSize);
1236             theCookiePointer += kChannelAtomSize;
1237             memcpy(theCookiePointer, &theChannelLayout, sizeof(ALACAudioChannelLayout));
1238         }
1239         *ioSize = theCookieSize;
1240     }
1241     else
1242     {
1243         *ioSize = 0; // no incomplete cookies
1244     }
1245 }
1246
1247 /*
1248         InitializeEncoder()
1249         - initialize the encoder component with the current config
1250 */
1251 int32_t ALACEncoder::InitializeEncoder(AudioFormatDescription theOutputFormat)
1252 {
1253         int32_t                 status;
1254
1255     mOutputSampleRate = theOutputFormat.mSampleRate;
1256     mNumChannels = theOutputFormat.mChannelsPerFrame;
1257     switch(theOutputFormat.mFormatFlags)
1258     {
1259         case 1:
1260             mBitDepth = 16;
1261             break;
1262         case 2:
1263             mBitDepth = 20;
1264             break;
1265         case 3:
1266             mBitDepth = 24;
1267             break;
1268         case 4:
1269             mBitDepth = 32;
1270             break;
1271         default:
1272             break;
1273     }
1274
1275         // set up default encoding parameters and state
1276         // - note: mFrameSize is set in the constructor or via SetFrameSize() which must be called before this routine
1277         for ( uint32_t index = 0; index < kALACMaxChannels; index++ )
1278                 mLastMixRes[index] = kDefaultMixRes;
1279
1280         // the maximum output frame size can be no bigger than (samplesPerBlock * numChannels * ((10 + sampleSize)/8) + 1)
1281         // but note that this can be bigger than the input size!
1282         // - since we don't yet know what our input format will be, use our max allowed sample size in the calculation
1283         mMaxOutputBytes = mFrameSize * mNumChannels * ((10 + kMaxSampleSize) / 8)  + 1;
1284
1285         // allocate mix buffers
1286         mMixBufferU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1287         mMixBufferV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1288
1289         // allocate dynamic predictor buffers
1290         mPredictorU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1291         mPredictorV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1292
1293         // allocate combined shift buffer
1294         mShiftBufferUV = (uint16_t *) calloc( mFrameSize * 2 * sizeof(uint16_t),1 );
1295
1296         // allocate work buffer for search loop
1297         mWorkBuffer = (uint8_t *) calloc( mMaxOutputBytes, 1 );
1298
1299         RequireAction( (mMixBufferU != nil) && (mMixBufferV != nil) &&
1300                                         (mPredictorU != nil) && (mPredictorV != nil) &&
1301                                         (mShiftBufferUV != nil) && (mWorkBuffer != nil ),
1302                                         status = kALAC_MemFullError; goto Exit; );
1303
1304         status = ALAC_noErr;
1305
1306
1307         // initialize coefs arrays once b/c retaining state across blocks actually improves the encode ratio
1308         for ( int32_t channel = 0; channel < (int32_t)mNumChannels; channel++ )
1309         {
1310                 for ( int32_t search = 0; search < kALACMaxSearches; search++ )
1311                 {
1312                         init_coefs( mCoefsU[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1313                         init_coefs( mCoefsV[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1314                 }
1315         }
1316
1317 Exit:
1318         return status;
1319 }
1320
1321 /*
1322         GetSourceFormat()
1323         - given the input format, return one of our supported formats
1324 */
1325 void ALACEncoder::GetSourceFormat( const AudioFormatDescription * source, AudioFormatDescription * output )
1326 {
1327         // default is 16-bit native endian
1328         // - note: for float input we assume that's coming from one of our decoders (mp3, aac) so it only makes sense
1329         //                 to encode to 16-bit since the source was lossy in the first place
1330         // - note: if not a supported bit depth, find the closest supported bit depth to the input one
1331         if ( (source->mFormatID != kALACFormatLinearPCM) || ((source->mFormatFlags & kALACFormatFlagIsFloat) != 0) ||
1332                 ( source->mBitsPerChannel <= 16 ) )
1333                 mBitDepth = 16;
1334         else if ( source->mBitsPerChannel <= 20 )
1335                 mBitDepth = 20;
1336         else if ( source->mBitsPerChannel <= 24 )
1337                 mBitDepth = 24;
1338         else
1339                 mBitDepth = 32;
1340
1341         // we support 16/20/24/32-bit integer data at any sample rate and our target number of channels
1342         // and sample rate were specified when we were configured
1343         /*
1344     MakeUncompressedAudioFormat( mNumChannels, (float) mOutputSampleRate, mBitDepth, kAudioFormatFlagsNativeIntegerPacked, output );
1345      */
1346 }
1347
1348
1349
1350 #if VERBOSE_DEBUG
1351
1352 #if PRAGMA_MARK
1353 #pragma mark -
1354 #endif
1355
1356 /*
1357         AddFiller()
1358         - add fill and data stream elements to the bitstream to test the decoder
1359 */
1360 static void AddFiller( BitBuffer * bits, int32_t numBytes )
1361 {
1362         uint8_t         tag;
1363         uint32_t                index;
1364
1365         // out of lameness, subtract 6 bytes to deal with header + alignment as required for fill/data elements
1366         numBytes -= 6;
1367         if ( numBytes <= 0 )
1368                 return;
1369
1370         // randomly pick Fill or Data Stream Element based on numBytes requested
1371         tag = (numBytes & 0x8) ? ID_FIL : ID_DSE;
1372
1373         BitBufferWrite( bits, tag, 3 );
1374         if ( tag == ID_FIL )
1375         {
1376                 // can't write more than 269 bytes in a fill element
1377                 numBytes = (numBytes > 269) ? 269 : numBytes;
1378
1379                 // fill element = 4-bit size unless >= 15 then 4-bit size + 8-bit extension size
1380                 if ( numBytes >= 15 )
1381                 {
1382                         uint16_t                        extensionSize;
1383
1384                         BitBufferWrite( bits, 15, 4 );
1385
1386                         // 8-bit extension count field is "extra + 1" which is weird but I didn't define the syntax
1387                         // - otherwise, there's no way to represent 15
1388                         // - for example, to really mean 15 bytes you must encode extensionSize = 1
1389                         // - why it's not like data stream elements I have no idea
1390                         extensionSize = (numBytes - 15) + 1;
1391                         Assert( extensionSize <= 255 );
1392                         BitBufferWrite( bits, extensionSize, 8 );
1393                 }
1394                 else
1395                         BitBufferWrite( bits, numBytes, 4 );
1396
1397                 BitBufferWrite( bits, 0x10, 8 );                // extension_type = FILL_DATA = b0001 or'ed with fill_nibble = b0000
1398                 for ( index = 0; index < (numBytes - 1); index++ )
1399                         BitBufferWrite( bits, 0xa5, 8 );        // fill_byte = b10100101 = 0xa5
1400         }
1401         else
1402         {
1403                 // can't write more than 510 bytes in a data stream element
1404                 numBytes = (numBytes > 510) ? 510 : numBytes;
1405
1406                 BitBufferWrite( bits, 0, 4 );                   // element instance tag
1407                 BitBufferWrite( bits, 1, 1 );                   // byte-align flag = true
1408
1409                 // data stream element = 8-bit size unless >= 255 then 8-bit size + 8-bit size
1410                 if ( numBytes >= 255 )
1411                 {
1412                         BitBufferWrite( bits, 255, 8 );
1413                         BitBufferWrite( bits, numBytes - 255, 8 );
1414                 }
1415                 else
1416                         BitBufferWrite( bits, numBytes, 8 );
1417
1418                 BitBufferByteAlign( bits, true );               // byte-align with zeros
1419
1420                 for ( index = 0; index < numBytes; index++ )
1421                         BitBufferWrite( bits, 0x5a, 8 );
1422         }
1423 }
1424
1425 #endif  /* VERBOSE_DEBUG */