2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_APACHE_LICENSE_HEADER_START@
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * @APPLE_APACHE_LICENSE_HEADER_END@
26 #define VERBOSE_DEBUG 0
33 #include "ALACEncoder.h"
37 #include "matrixlib.h"
39 #include "ALACBitUtilities.h"
40 #include "ALACAudioTypes.h"
41 #include "EndianPortable.h"
43 // Note: in C you can't typecast to a 2-dimensional array pointer but that's what we need when
44 // picking which coefs to use so we declare this typedef b/c we *can* typecast to this type
45 typedef int16_t (*SearchCoefs)[kALACMaxCoefs];
48 const uint32_t kALACEncoderMagic = 'dpge';
49 const uint32_t kMaxSampleSize = 32; // max allowed bit width is 32
50 const uint32_t kDefaultMixBits = 2;
51 const uint32_t kDefaultMixRes = 0;
52 const uint32_t kMaxRes = 4;
53 const uint32_t kDefaultNumUV = 8;
54 const uint32_t kMinUV = 4;
55 const uint32_t kMaxUV = 8;
59 static void AddFiller( BitBuffer * bits, int32_t numBytes );
64 Map Format: 3-bit field per channel which is the same as the "element tag" that should be placed
65 at the beginning of the frame for that channel. Indicates whether SCE, CPE, or LFE.
66 Each particular field is accessed via the current channel index. Note that the channel
67 index increments by two for channel pairs.
71 C L R 3-channel input = (ID_CPE << 3) | (ID_SCE)
72 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
73 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
75 C L R Ls Rs LFE 5.1-channel input = (ID_LFE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
76 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
77 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
78 index 3 value = (map & (0x7ul << (3 * 3))) >> (3 * 3)
79 index 5 value = (map & (0x7ul << (5 * 3))) >> (5 * 3)
80 index 7 value = (map & (0x7ul << (7 * 3))) >> (7 * 3)
82 static const uint32_t sChannelMaps[kALACMaxChannels] =
86 (ID_CPE << 3) | (ID_SCE),
87 (ID_SCE << 9) | (ID_CPE << 3) | (ID_SCE),
88 (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
89 (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
90 (ID_SCE << 18) | (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
91 (ID_SCE << 21) | (ID_CPE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
94 static const uint32_t sSupportediPodSampleRates[] =
96 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000
102 ALACEncoder::ALACEncoder() :
109 mShiftBufferUV( nil ),
113 mTotalBytesGenerated( 0 ),
118 mFrameSize = kALACDefaultFrameSize;
124 ALACEncoder::~ALACEncoder()
126 // delete the matrix mixing buffers
138 // delete the dynamic predictor's "corrector" buffers
150 // delete the unused byte shift buffer
151 if ( mShiftBufferUV )
153 free(mShiftBufferUV);
154 mShiftBufferUV = NULL;
157 // delete the work buffer
172 For every segment we adopt the following header:
174 1 byte reserved (always 0)
175 1 byte flags (see below)
176 [4 byte frame length] (optional, see below)
177 ---Next, the per-segment ALAC parameters---
178 1 byte mixBits (middle-side parameter)
179 1 byte mixRes (middle-side parameter, interpreted as signed char)
181 1 byte shiftU (4 bits modeU, 4 bits denShiftU)
182 1 byte filterU (3 bits pbFactorU, 5 bits numU)
183 (numU) shorts (signed DP coefficients for V channel)
184 ---Next, 2nd-channel ALAC parameters in case of stereo mode---
185 1 byte shiftV (4 bits modeV, 4 bits denShiftV)
186 1 byte filterV (3 bits pbFactorV, 5 bits numV)
187 (numV) shorts (signed DP coefficients for V channel)
188 ---After this come the shift-off bytes for (>= 24)-bit data (n-byte shift) if indicated---
189 ---Then comes the AG-compressor bitstream---
195 The presence of certain flag bits changes the header format such that the parameters might
196 not even be sent. The currently defined flags format is:
200 where 0 = reserved, must be 0
201 p = 1-bit field "partial frame" flag indicating 32-bit frame length follows this byte
202 ss = 2-bit field indicating "number of shift-off bytes ignored by compression"
203 e = 1-bit field indicating "escape"
205 The "partial frame" flag means that the following segment is not equal to the frame length specified
206 in the out-of-band decoder configuration. This allows the decoder to deal with end-of-file partial
207 segments without incurring the 32-bit overhead for each segment.
209 The "shift-off" field indicates the number of bytes at the bottom of the word that were passed through
210 uncompressed. The reason for this is that the entropy inherent in the LS bytes of >= 24-bit words
211 quite often means that the frame would have to be "escaped" b/c the compressed size would be >= the
212 uncompressed size. However, by shifting the input values down and running the remaining bits through
213 the normal compression algorithm, a net win can be achieved. If this field is non-zero, it means that
214 the shifted-off bytes follow after the parameter section of the header and before the compressed
215 bitstream. Note that doing this also allows us to use matrixing on 32-bit inputs after one or more
216 bytes are shifted off the bottom which helps the eventual compression ratio. For stereo channels,
217 the shifted off bytes are interleaved.
219 The "escape" flag means that this segment was not compressed b/c the compressed size would be
220 >= uncompressed size. In that case, the audio data was passed through uncompressed after the header.
221 The other header parameter bytes will not be sent.
227 If the segment is not a partial or escape segment, the total header size (in bytes) is given exactly by:
229 4 + (2 + 2 * numU) (mono mode)
230 4 + (2 + 2 * numV) + (2 + 2 * numV) (stereo mode)
232 where the ALAC filter-lengths numU, numV are bounded by a
233 constant (in the current source, numU, numV <= NUMCOEPAIRS), and
234 this forces an absolute upper bound on header size.
236 Each segment-decode process loads up these bytes from the front of the
237 local stream, in the above order, then follows with the entropy-encoded
238 bits for the given segment.
240 To generalize middle-side, there are various mixing modes including middle-side, each lossless,
241 as embodied in the mix() and unmix() functions. These functions exploit a generalized middle-side
244 u := [(rL + (m-r)R)/m];
247 where [ ] denotes integer floor. The (lossless) inverse is
252 In the segment header, m and r are encoded in mixBits and mixRes.
253 Classical "middle-side" is obtained with m = 2, r = 1, but now
254 we have more generalized mixes.
258 The relevance of the ALAC coefficients is explained in detail
264 - encode a channel pair
266 int32_t ALACEncoder::EncodeStereo( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
269 BitBuffer startBits = *bitstream; // squirrel away copy of current state in case we need to go back and do an escape packet
271 uint32_t bits1, bits2;
273 int32_t mixBits, mixRes, maxRes;
274 uint32_t minBits, minBits1, minBits2;
280 uint8_t bytesShifted;
284 uint8_t partialFrame;
287 int32_t status = ALAC_noErr;
289 // make sure we handle this bit-depth before we get going
290 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
292 // reload coefs pointers for this channel pair
293 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
294 // actually results in better overall compression
295 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
296 // different coefs for the different passes of "mixRes" results in even better compression
297 coefsU = (SearchCoefs) mCoefsU[channelIndex];
298 coefsV = (SearchCoefs) mCoefsV[channelIndex];
300 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
301 // so enable 16-bit "shift off" and encode in 17-bit mode
302 // - in addition, 24-bit mode really improves with one byte shifted off
303 if ( mBitDepth == 32 )
305 else if ( mBitDepth >= 24 )
310 chanBits = mBitDepth - (bytesShifted * 8) + 1;
312 // flag whether or not this is a partial frame
313 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
315 // brute-force encode optimization loop
316 // - run over variations of the encoding params to find the best choice
317 mixBits = kDefaultMixBits;
319 numU = numV = kDefaultNumUV;
320 denShift = DENSHIFT_DEFAULT;
325 minBits = minBits1 = minBits2 = 1ul << 31;
327 int32_t bestRes = mLastMixRes[channelIndex];
329 for ( mixRes = 0; mixRes <= maxRes; mixRes++ )
331 // mix the stereo inputs
335 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
338 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
341 // includes extraction of shifted-off bytes
342 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
343 mixBits, mixRes, mShiftBufferUV, bytesShifted );
346 // includes extraction of shifted-off bytes
347 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
348 mixBits, mixRes, mShiftBufferUV, bytesShifted );
352 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
354 // run the dynamic predictors
355 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
356 pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
358 // run the lossless compressor on each channel
359 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
360 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
361 RequireNoErr( status, goto Exit; );
363 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
364 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
365 RequireNoErr( status, goto Exit; );
367 // look for best match
368 if ( (bits1 + bits2) < minBits1 )
370 minBits1 = bits1 + bits2;
375 mLastMixRes[channelIndex] = (int16_t)bestRes;
377 // mix the stereo inputs with the current best mixRes
378 mixRes = mLastMixRes[channelIndex];
382 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
385 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
388 // also extracts the shifted off bytes into the shift buffers
389 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
390 mixBits, mixRes, mShiftBufferUV, bytesShifted );
393 // also extracts the shifted off bytes into the shift buffers
394 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
395 mixBits, mixRes, mShiftBufferUV, bytesShifted );
399 // now it's time for the predictor coefficient search loop
400 numU = numV = kMinUV;
401 minBits1 = minBits2 = 1ul << 31;
403 for ( uint32_t numUV = kMinUV; numUV <= kMaxUV; numUV += 4 )
405 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
409 // run the predictor over the same data multiple times to help it converge
410 for ( uint32_t converge = 0; converge < 8; converge++ )
412 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
413 pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
418 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
419 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
421 if ( (bits1 * dilate + 16 * numUV) < minBits1 )
423 minBits1 = bits1 * dilate + 16 * numUV;
427 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
428 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
430 if ( (bits2 * dilate + 16 * numUV) < minBits2 )
432 minBits2 = bits2 * dilate + 16 * numUV;
437 // test for escape hatch if best calculated compressed size turns out to be more than the input size
438 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
439 if ( bytesShifted != 0 )
440 minBits += (numSamples * (bytesShifted * 8) * 2);
442 escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
444 doEscape = (minBits >= escapeBits) ? true : false;
446 if ( doEscape == false )
448 // write bitstream header and coefs
449 BitBufferWrite( bitstream, 0, 12 );
450 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
452 BitBufferWrite( bitstream, numSamples, 32 );
453 BitBufferWrite( bitstream, mixBits, 8 );
454 BitBufferWrite( bitstream, mixRes, 8 );
456 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
457 //Assert( (pbFactor < 8) && (numU < 32) );
458 //Assert( (pbFactor < 8) && (numV < 32) );
460 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
461 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
462 for ( index = 0; index < numU; index++ )
463 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
465 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
466 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
467 for ( index = 0; index < numV; index++ )
468 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
470 // if shift active, write the interleaved shift buffers
471 if ( bytesShifted != 0 )
473 uint32_t bitShift = bytesShifted * 8;
475 //Assert( bitShift <= 16 );
477 for ( index = 0; index < (numSamples * 2); index += 2 )
481 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
482 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
486 // run the dynamic predictor and lossless compression for the "left" channel
487 // - note: to avoid allocating more buffers, we're mixing and matching between the available buffers instead
488 // of only using "U" buffers for the U-channel and "V" buffers for the V-channel
491 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
495 pc_block( mMixBufferU, mPredictorV, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
496 pc_block( mPredictorV, mPredictorU, numSamples, nil, 31, chanBits, 0 );
499 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
500 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
501 RequireNoErr( status, goto Exit; );
503 // run the dynamic predictor and lossless compression for the "right" channel
506 pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
510 pc_block( mMixBufferV, mPredictorU, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
511 pc_block( mPredictorU, mPredictorV, numSamples, nil, 31, chanBits, 0 );
514 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
515 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
516 RequireNoErr( status, goto Exit; );
518 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
519 chuck it and do an escape packet
521 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
522 if ( minBits >= escapeBits )
524 *bitstream = startBits; // reset bitstream state
526 printf( "compressed frame too big: %u vs. %u \n", minBits, escapeBits );
530 if ( doEscape == true )
533 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
536 DebugMsg( "escape!: %lu vs %lu", minBits, escapeBits );
546 - encode a channel pair without the search loop for maximum possible speed
548 int32_t ALACEncoder::EncodeStereoFast( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
550 BitBuffer startBits = *bitstream; // squirrel away current bit position in case we decide to use escape hatch
552 uint32_t bits1, bits2;
553 int32_t mixBits, mixRes;
554 uint32_t minBits, minBits1, minBits2;
560 uint8_t bytesShifted;
564 uint8_t partialFrame;
569 // make sure we handle this bit-depth before we get going
570 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
572 // reload coefs pointers for this channel pair
573 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
574 // actually results in better overall compression
575 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
576 // different coefs for the different passes of "mixRes" results in even better compression
577 coefsU = (SearchCoefs) mCoefsU[channelIndex];
578 coefsV = (SearchCoefs) mCoefsV[channelIndex];
580 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
581 // so enable 16-bit "shift off" and encode in 17-bit mode
582 // - in addition, 24-bit mode really improves with one byte shifted off
583 if ( mBitDepth == 32 )
585 else if ( mBitDepth >= 24 )
590 chanBits = mBitDepth - (bytesShifted * 8) + 1;
592 // flag whether or not this is a partial frame
593 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
595 // set up default encoding parameters for "fast" mode
596 mixBits = kDefaultMixBits;
597 mixRes = kDefaultMixRes;
598 numU = numV = kDefaultNumUV;
599 denShift = DENSHIFT_DEFAULT;
603 minBits = minBits1 = minBits2 = 1ul << 31;
605 // mix the stereo inputs with default mixBits/mixRes
609 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
612 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
615 // also extracts the shifted off bytes into the shift buffers
616 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
617 mixBits, mixRes, mShiftBufferUV, bytesShifted );
620 // also extracts the shifted off bytes into the shift buffers
621 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
622 mixBits, mixRes, mShiftBufferUV, bytesShifted );
626 /* speculatively write the bitstream assuming the compressed version will be smaller */
628 // write bitstream header and coefs
629 BitBufferWrite( bitstream, 0, 12 );
630 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
632 BitBufferWrite( bitstream, numSamples, 32 );
633 BitBufferWrite( bitstream, mixBits, 8 );
634 BitBufferWrite( bitstream, mixRes, 8 );
636 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
637 //Assert( (pbFactor < 8) && (numU < 32) );
638 //Assert( (pbFactor < 8) && (numV < 32) );
640 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
641 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
642 for ( index = 0; index < numU; index++ )
643 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
645 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
646 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
647 for ( index = 0; index < numV; index++ )
648 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
650 // if shift active, write the interleaved shift buffers
651 if ( bytesShifted != 0 )
653 uint32_t bitShift = bytesShifted * 8;
655 //Assert( bitShift <= 16 );
657 for ( index = 0; index < (numSamples * 2); index += 2 )
661 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
662 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
666 // run the dynamic predictor and lossless compression for the "left" channel
667 // - note: we always use mode 0 in the "fast" path so we don't need the code for mode != 0
668 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
670 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
671 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
672 RequireNoErr( status, goto Exit; );
674 // run the dynamic predictor and lossless compression for the "right" channel
675 pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
677 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
678 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
679 RequireNoErr( status, goto Exit; );
681 // do bit requirement calculations
682 minBits1 = bits1 + (numU * sizeof(int16_t) * 8);
683 minBits2 = bits2 + (numV * sizeof(int16_t) * 8);
685 // test for escape hatch if best calculated compressed size turns out to be more than the input size
686 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
687 if ( bytesShifted != 0 )
688 minBits += (numSamples * (bytesShifted * 8) * 2);
690 escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
692 doEscape = (minBits >= escapeBits) ? true : false;
694 if ( doEscape == false )
696 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
697 chuck it and do an escape packet
699 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
700 if ( minBits >= escapeBits )
703 printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
708 if ( doEscape == true )
712 // reset bitstream position since we speculatively wrote the compressed version
713 *bitstream = startBits;
715 // write escape frame
716 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
719 DebugMsg( "escape!: %u vs %u", minBits, (numSamples * mBitDepth * 2) );
729 - encode stereo escape frame
731 int32_t ALACEncoder::EncodeStereoEscape( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t numSamples )
735 uint8_t partialFrame;
738 // flag whether or not this is a partial frame
739 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
741 // write bitstream header
742 BitBufferWrite( bitstream, 0, 12 );
743 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 ); // LSB = 1 means "frame not compressed"
745 BitBufferWrite( bitstream, numSamples, 32 );
747 // just copy the input data to the output buffer
751 input16 = (int16_t *) inputBuffer;
753 for ( index = 0; index < (numSamples * stride); index += stride )
755 BitBufferWrite( bitstream, input16[index + 0], 16 );
756 BitBufferWrite( bitstream, input16[index + 1], 16 );
760 // mix20() with mixres param = 0 means de-interleave so use it to simplify things
761 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0 );
762 for ( index = 0; index < numSamples; index++ )
764 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
765 BitBufferWrite( bitstream, mMixBufferV[index], 20 );
769 // mix24() with mixres param = 0 means de-interleave so use it to simplify things
770 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0, mShiftBufferUV, 0 );
771 for ( index = 0; index < numSamples; index++ )
773 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
774 BitBufferWrite( bitstream, mMixBufferV[index], 24 );
778 input32 = (int32_t *) inputBuffer;
780 for ( index = 0; index < (numSamples * stride); index += stride )
782 BitBufferWrite( bitstream, input32[index + 0], 32 );
783 BitBufferWrite( bitstream, input32[index + 1], 32 );
793 - encode a mono input buffer
795 int32_t ALACEncoder::EncodeMono( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
797 BitBuffer startBits = *bitstream; // squirrel away copy of current state in case we need to go back and do an escape packet
803 uint32_t minBits, bestU;
805 uint32_t index, index2;
806 uint8_t bytesShifted;
811 uint8_t partialFrame;
818 // make sure we handle this bit-depth before we get going
819 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
823 // reload coefs array from previous frame
824 coefsU = (SearchCoefs) mCoefsU[channelIndex];
826 // pick bit depth for actual encoding
827 // - we lop off the lower byte(s) for 24-/32-bit encodings
828 if ( mBitDepth == 32 )
830 else if ( mBitDepth >= 24 )
835 shift = bytesShifted * 8;
836 mask = (1ul << shift) - 1;
837 chanBits = mBitDepth - (bytesShifted * 8);
839 // flag whether or not this is a partial frame
840 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
842 // convert N-bit data to 32-bit for predictor
847 // convert 16-bit data to 32-bit for predictor
848 input16 = (int16_t *) inputBuffer;
849 for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
850 mMixBufferU[index] = (int32_t) input16[index2];
854 // convert 20-bit data to 32-bit for predictor
855 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
858 // convert 24-bit data to 32-bit for the predictor and extract the shifted off byte(s)
859 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
860 for ( index = 0; index < numSamples; index++ )
862 mShiftBufferUV[index] = (uint16_t)(mMixBufferU[index] & mask);
863 mMixBufferU[index] >>= shift;
868 // just copy the 32-bit input data for the predictor and extract the shifted off byte(s)
869 input32 = (int32_t *) inputBuffer;
871 for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
873 int32_t val = input32[index2];
875 mShiftBufferUV[index] = (uint16_t)(val & mask);
876 mMixBufferU[index] = val >> shift;
882 // brute-force encode optimization loop (implied "encode depth" of 0 if comparing to cmd line tool)
883 // - run over variations of the encoding params to find the best choice
892 for ( numU = minU; numU <= maxU; numU += 4 )
897 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
900 for ( uint32_t converge = 0; converge < 7; converge++ )
901 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
904 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
906 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
907 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
908 RequireNoErr( status, goto Exit; );
910 numBits = (dilate * bits1) + (16 * numU);
911 if ( numBits < minBits )
918 // test for escape hatch if best calculated compressed size turns out to be more than the input size
919 // - first, add bits for the header bytes mixRes/maxRes/shiftU/filterU
920 minBits += (4 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
921 if ( bytesShifted != 0 )
922 minBits += (numSamples * (bytesShifted * 8));
924 escapeBits = (numSamples * mBitDepth) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
926 doEscape = (minBits >= escapeBits) ? true : false;
928 if ( doEscape == false )
930 // write bitstream header
931 BitBufferWrite( bitstream, 0, 12 );
932 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
934 BitBufferWrite( bitstream, numSamples, 32 );
935 BitBufferWrite( bitstream, 0, 16 ); // mixBits = mixRes = 0
937 // write the params and predictor coefs
939 BitBufferWrite( bitstream, (0 << 4) | DENSHIFT_DEFAULT, 8 ); // modeU = 0
940 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
941 for ( index = 0; index < numU; index++ )
942 BitBufferWrite( bitstream, coefsU[numU-1][index], 16 );
944 // if shift active, write the interleaved shift buffers
945 if ( bytesShifted != 0 )
947 for ( index = 0; index < numSamples; index++ )
948 BitBufferWrite( bitstream, mShiftBufferUV[index], shift );
951 // run the dynamic predictor with the best result
952 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
954 // do lossless compression
955 set_standard_ag_params( &agParams, numSamples, numSamples );
956 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
957 //AssertNoErr( status );
960 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
961 chuck it and do an escape packet
963 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
964 if ( minBits >= escapeBits )
966 *bitstream = startBits; // reset bitstream state
968 printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
972 if ( doEscape == true )
974 // write bitstream header and coefs
975 BitBufferWrite( bitstream, 0, 12 );
976 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 ); // LSB = 1 means "frame not compressed"
978 BitBufferWrite( bitstream, numSamples, 32 );
980 // just copy the input data to the output buffer
984 input16 = (int16_t *) inputBuffer;
985 for ( index = 0; index < (numSamples * stride); index += stride )
986 BitBufferWrite( bitstream, input16[index], 16 );
989 // convert 20-bit data to 32-bit for simplicity
990 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
991 for ( index = 0; index < numSamples; index++ )
992 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
995 // convert 24-bit data to 32-bit for simplicity
996 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
997 for ( index = 0; index < numSamples; index++ )
998 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
1001 input32 = (int32_t *) inputBuffer;
1002 for ( index = 0; index < (numSamples * stride); index += stride )
1003 BitBufferWrite( bitstream, input32[index], 32 );
1007 DebugMsg( "escape!: %lu vs %lu", minBits, (numSamples * mBitDepth) );
1021 - encode the next block of samples
1023 int32_t ALACEncoder::Encode(AudioFormatDescription theInputFormat, AudioFormatDescription theOutputFormat,
1024 unsigned char * theReadBuffer, unsigned char * theWriteBuffer, int32_t * ioNumBytes)
1027 uint32_t outputSize;
1028 BitBuffer bitstream;
1031 numFrames = *ioNumBytes/theInputFormat.mBytesPerPacket;
1033 // create a bit buffer structure pointing to our output buffer
1034 BitBufferInit( &bitstream, theWriteBuffer, mMaxOutputBytes );
1036 if ( theInputFormat.mChannelsPerFrame == 2 )
1038 // add 3-bit frame start tag ID_CPE = channel pair & 4-bit element instance tag = 0
1039 BitBufferWrite( &bitstream, ID_CPE, 3 );
1040 BitBufferWrite( &bitstream, 0, 4 );
1042 // encode stereo input buffer
1043 if ( mFastMode == false )
1044 status = this->EncodeStereo( &bitstream, theReadBuffer, 2, 0, numFrames );
1046 status = this->EncodeStereoFast( &bitstream, theReadBuffer, 2, 0, numFrames );
1047 RequireNoErr( status, goto Exit; );
1049 else if ( theInputFormat.mChannelsPerFrame == 1 )
1051 // add 3-bit frame start tag ID_SCE = mono channel & 4-bit element instance tag = 0
1052 BitBufferWrite( &bitstream, ID_SCE, 3 );
1053 BitBufferWrite( &bitstream, 0, 4 );
1055 // encode mono input buffer
1056 status = this->EncodeMono( &bitstream, theReadBuffer, 1, 0, numFrames );
1057 RequireNoErr( status, goto Exit; );
1063 uint32_t channelIndex;
1064 uint32_t inputIncrement;
1065 uint8_t stereoElementTag;
1066 uint8_t monoElementTag;
1067 uint8_t lfeElementTag;
1069 inputBuffer = (char *) theReadBuffer;
1070 inputIncrement = ((mBitDepth + 7) / 8);
1072 stereoElementTag = 0;
1076 for ( channelIndex = 0; channelIndex < theInputFormat.mChannelsPerFrame; )
1078 tag = (sChannelMaps[theInputFormat.mChannelsPerFrame - 1] & (0x7ul << (channelIndex * 3))) >> (channelIndex * 3);
1080 BitBufferWrite( &bitstream, tag, 3 );
1085 BitBufferWrite( &bitstream, monoElementTag, 4 );
1087 status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1089 inputBuffer += inputIncrement;
1096 BitBufferWrite( &bitstream, stereoElementTag, 4 );
1098 status = this->EncodeStereo( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1100 inputBuffer += (inputIncrement * 2);
1106 // LFE channel (subwoofer)
1107 BitBufferWrite( &bitstream, lfeElementTag, 4 );
1109 status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1111 inputBuffer += inputIncrement;
1117 printf( "That ain't right! (%u)\n", tag );
1118 status = kALAC_ParamError;
1122 RequireNoErr( status, goto Exit; );
1128 // if there is room left in the output buffer, add some random fill data to test decoder
1132 bitsLeft = BitBufferGetPosition( &bitstream ) - 3; // - 3 for ID_END tag
1133 bytesLeft = bitstream.byteSize - ((bitsLeft + 7) / 8);
1135 if ( (bytesLeft > 20) && ((bytesLeft & 0x4u) != 0) )
1136 AddFiller( &bitstream, bytesLeft );
1140 // add 3-bit frame end tag: ID_END
1141 BitBufferWrite( &bitstream, ID_END, 3 );
1143 // byte-align the output data
1144 BitBufferByteAlign( &bitstream, true );
1146 outputSize = BitBufferGetPosition( &bitstream ) / 8;
1147 //Assert( outputSize <= mMaxOutputBytes );
1150 // all good, let iTunes know what happened and remember the total number of input sample frames
1151 *ioNumBytes = outputSize;
1152 //mEncodedFrames += encodeMsg->numInputSamples;
1154 // gather encoding stats
1155 mTotalBytesGenerated += outputSize;
1156 mMaxFrameBytes = MAX( mMaxFrameBytes, outputSize );
1158 status = ALAC_noErr;
1166 - drain out any leftover samples
1169 int32_t ALACEncoder::Finish()
1171 /* // finalize bit rate statistics
1172 if ( mSampleSize.numEntries != 0 )
1173 mAvgBitRate = (uint32_t)( (((float)mTotalBytesGenerated * 8.0f) / (float)mSampleSize.numEntries) * ((float)mSampleRate / (float)mFrameSize) );
1187 void ALACEncoder::GetConfig( ALACSpecificConfig & config )
1189 config.frameLength = Swap32NtoB(mFrameSize);
1190 config.compatibleVersion = (uint8_t) kALACCompatibleVersion;
1191 config.bitDepth = (uint8_t) mBitDepth;
1192 config.pb = (uint8_t) PB0;
1193 config.kb = (uint8_t) KB0;
1194 config.mb = (uint8_t) MB0;
1195 config.numChannels = (uint8_t) mNumChannels;
1196 config.maxRun = Swap16NtoB((uint16_t) MAX_RUN_DEFAULT);
1197 config.maxFrameBytes = Swap32NtoB(mMaxFrameBytes);
1198 config.avgBitRate = Swap32NtoB(mAvgBitRate);
1199 config.sampleRate = Swap32NtoB(mOutputSampleRate);
1202 uint32_t ALACEncoder::GetMagicCookieSize(uint32_t inNumChannels)
1204 if (inNumChannels > 2)
1206 return sizeof(ALACSpecificConfig) + kChannelAtomSize + sizeof(ALACAudioChannelLayout);
1210 return sizeof(ALACSpecificConfig);
1214 void ALACEncoder::GetMagicCookie(void * outCookie, uint32_t * ioSize)
1216 ALACSpecificConfig theConfig = {0};
1217 ALACAudioChannelLayout theChannelLayout = {0};
1218 uint8_t theChannelAtom[kChannelAtomSize] = {0, 0, 0, 0, 'c', 'h', 'a', 'n', 0, 0, 0, 0};
1219 uint32_t theCookieSize = sizeof(ALACSpecificConfig);
1220 uint8_t * theCookiePointer = (uint8_t *)outCookie;
1222 GetConfig(theConfig);
1223 if (theConfig.numChannels > 2)
1225 theChannelLayout.mChannelLayoutTag = ALACChannelLayoutTags[theConfig.numChannels - 1];
1226 theCookieSize += (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1228 if (*ioSize >= theCookieSize)
1230 memcpy(theCookiePointer, &theConfig, sizeof(ALACSpecificConfig));
1231 theChannelAtom[3] = (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1232 if (theConfig.numChannels > 2)
1234 theCookiePointer += sizeof(ALACSpecificConfig);
1235 memcpy(theCookiePointer, theChannelAtom, kChannelAtomSize);
1236 theCookiePointer += kChannelAtomSize;
1237 memcpy(theCookiePointer, &theChannelLayout, sizeof(ALACAudioChannelLayout));
1239 *ioSize = theCookieSize;
1243 *ioSize = 0; // no incomplete cookies
1249 - initialize the encoder component with the current config
1251 int32_t ALACEncoder::InitializeEncoder(AudioFormatDescription theOutputFormat)
1255 mOutputSampleRate = theOutputFormat.mSampleRate;
1256 mNumChannels = theOutputFormat.mChannelsPerFrame;
1257 switch(theOutputFormat.mFormatFlags)
1275 // set up default encoding parameters and state
1276 // - note: mFrameSize is set in the constructor or via SetFrameSize() which must be called before this routine
1277 for ( uint32_t index = 0; index < kALACMaxChannels; index++ )
1278 mLastMixRes[index] = kDefaultMixRes;
1280 // the maximum output frame size can be no bigger than (samplesPerBlock * numChannels * ((10 + sampleSize)/8) + 1)
1281 // but note that this can be bigger than the input size!
1282 // - since we don't yet know what our input format will be, use our max allowed sample size in the calculation
1283 mMaxOutputBytes = mFrameSize * mNumChannels * ((10 + kMaxSampleSize) / 8) + 1;
1285 // allocate mix buffers
1286 mMixBufferU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1287 mMixBufferV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1289 // allocate dynamic predictor buffers
1290 mPredictorU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1291 mPredictorV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1293 // allocate combined shift buffer
1294 mShiftBufferUV = (uint16_t *) calloc( mFrameSize * 2 * sizeof(uint16_t),1 );
1296 // allocate work buffer for search loop
1297 mWorkBuffer = (uint8_t *) calloc( mMaxOutputBytes, 1 );
1299 RequireAction( (mMixBufferU != nil) && (mMixBufferV != nil) &&
1300 (mPredictorU != nil) && (mPredictorV != nil) &&
1301 (mShiftBufferUV != nil) && (mWorkBuffer != nil ),
1302 status = kALAC_MemFullError; goto Exit; );
1304 status = ALAC_noErr;
1307 // initialize coefs arrays once b/c retaining state across blocks actually improves the encode ratio
1308 for ( int32_t channel = 0; channel < (int32_t)mNumChannels; channel++ )
1310 for ( int32_t search = 0; search < kALACMaxSearches; search++ )
1312 init_coefs( mCoefsU[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1313 init_coefs( mCoefsV[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1323 - given the input format, return one of our supported formats
1325 void ALACEncoder::GetSourceFormat( const AudioFormatDescription * source, AudioFormatDescription * output )
1327 // default is 16-bit native endian
1328 // - note: for float input we assume that's coming from one of our decoders (mp3, aac) so it only makes sense
1329 // to encode to 16-bit since the source was lossy in the first place
1330 // - note: if not a supported bit depth, find the closest supported bit depth to the input one
1331 if ( (source->mFormatID != kALACFormatLinearPCM) || ((source->mFormatFlags & kALACFormatFlagIsFloat) != 0) ||
1332 ( source->mBitsPerChannel <= 16 ) )
1334 else if ( source->mBitsPerChannel <= 20 )
1336 else if ( source->mBitsPerChannel <= 24 )
1341 // we support 16/20/24/32-bit integer data at any sample rate and our target number of channels
1342 // and sample rate were specified when we were configured
1344 MakeUncompressedAudioFormat( mNumChannels, (float) mOutputSampleRate, mBitDepth, kAudioFormatFlagsNativeIntegerPacked, output );
1358 - add fill and data stream elements to the bitstream to test the decoder
1360 static void AddFiller( BitBuffer * bits, int32_t numBytes )
1365 // out of lameness, subtract 6 bytes to deal with header + alignment as required for fill/data elements
1367 if ( numBytes <= 0 )
1370 // randomly pick Fill or Data Stream Element based on numBytes requested
1371 tag = (numBytes & 0x8) ? ID_FIL : ID_DSE;
1373 BitBufferWrite( bits, tag, 3 );
1374 if ( tag == ID_FIL )
1376 // can't write more than 269 bytes in a fill element
1377 numBytes = (numBytes > 269) ? 269 : numBytes;
1379 // fill element = 4-bit size unless >= 15 then 4-bit size + 8-bit extension size
1380 if ( numBytes >= 15 )
1382 uint16_t extensionSize;
1384 BitBufferWrite( bits, 15, 4 );
1386 // 8-bit extension count field is "extra + 1" which is weird but I didn't define the syntax
1387 // - otherwise, there's no way to represent 15
1388 // - for example, to really mean 15 bytes you must encode extensionSize = 1
1389 // - why it's not like data stream elements I have no idea
1390 extensionSize = (numBytes - 15) + 1;
1391 Assert( extensionSize <= 255 );
1392 BitBufferWrite( bits, extensionSize, 8 );
1395 BitBufferWrite( bits, numBytes, 4 );
1397 BitBufferWrite( bits, 0x10, 8 ); // extension_type = FILL_DATA = b0001 or'ed with fill_nibble = b0000
1398 for ( index = 0; index < (numBytes - 1); index++ )
1399 BitBufferWrite( bits, 0xa5, 8 ); // fill_byte = b10100101 = 0xa5
1403 // can't write more than 510 bytes in a data stream element
1404 numBytes = (numBytes > 510) ? 510 : numBytes;
1406 BitBufferWrite( bits, 0, 4 ); // element instance tag
1407 BitBufferWrite( bits, 1, 1 ); // byte-align flag = true
1409 // data stream element = 8-bit size unless >= 255 then 8-bit size + 8-bit size
1410 if ( numBytes >= 255 )
1412 BitBufferWrite( bits, 255, 8 );
1413 BitBufferWrite( bits, numBytes - 255, 8 );
1416 BitBufferWrite( bits, numBytes, 8 );
1418 BitBufferByteAlign( bits, true ); // byte-align with zeros
1420 for ( index = 0; index < numBytes; index++ )
1421 BitBufferWrite( bits, 0x5a, 8 );
1425 #endif /* VERBOSE_DEBUG */