Initial commit.
[libsalac.git] / src / lib / alac / codec / ALACEncoder.cpp
blob1b71c2958fdb52cd34401155ba297a11177de447
1 /*
2 * Copyright (c) 2011 Apple Inc. All rights reserved.
4 * @APPLE_APACHE_LICENSE_HEADER_START@
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
18 * @APPLE_APACHE_LICENSE_HEADER_END@
22 File: ALACEncoder.cpp
25 // build stuff
26 #define VERBOSE_DEBUG 0
28 // headers
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
33 #include "ALACEncoder.h"
35 #include "aglib.h"
36 #include "dplib.h"
37 #include "matrixlib.h"
39 #include "ALACBitUtilities.h"
40 #include "ALACAudioTypes.h"
41 #include "EndianPortable.h"
43 // Note: in C you can't typecast to a 2-dimensional array pointer but that's what we need when
44 // picking which coefs to use so we declare this typedef b/c we *can* typecast to this type
45 typedef int16_t (*SearchCoefs)[kALACMaxCoefs];
47 // defines/constants
48 const uint32_t kALACEncoderMagic = 'dpge';
49 const uint32_t kMaxSampleSize = 32; // max allowed bit width is 32
50 const uint32_t kDefaultMixBits = 2;
51 const uint32_t kDefaultMixRes = 0;
52 const uint32_t kMaxRes = 4;
53 const uint32_t kDefaultNumUV = 8;
54 const uint32_t kMinUV = 4;
55 const uint32_t kMaxUV = 8;
57 // static functions
58 #if VERBOSE_DEBUG
59 static void AddFiller( BitBuffer * bits, int32_t numBytes );
60 #endif
64 Map Format: 3-bit field per channel which is the same as the "element tag" that should be placed
65 at the beginning of the frame for that channel. Indicates whether SCE, CPE, or LFE.
66 Each particular field is accessed via the current channel index. Note that the channel
67 index increments by two for channel pairs.
69 For example:
71 C L R 3-channel input = (ID_CPE << 3) | (ID_SCE)
72 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
73 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
75 C L R Ls Rs LFE 5.1-channel input = (ID_LFE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
76 index 0 value = (map & (0x7ul << (0 * 3))) >> (0 * 3)
77 index 1 value = (map & (0x7ul << (1 * 3))) >> (1 * 3)
78 index 3 value = (map & (0x7ul << (3 * 3))) >> (3 * 3)
79 index 5 value = (map & (0x7ul << (5 * 3))) >> (5 * 3)
80 index 7 value = (map & (0x7ul << (7 * 3))) >> (7 * 3)
82 static const uint32_t sChannelMaps[kALACMaxChannels] =
84 ID_SCE,
85 ID_CPE,
86 (ID_CPE << 3) | (ID_SCE),
87 (ID_SCE << 9) | (ID_CPE << 3) | (ID_SCE),
88 (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
89 (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
90 (ID_SCE << 18) | (ID_SCE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE),
91 (ID_SCE << 21) | (ID_CPE << 15) | (ID_CPE << 9) | (ID_CPE << 3) | (ID_SCE)
94 static const uint32_t sSupportediPodSampleRates[] =
96 8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000
100 Constructor
102 ALACEncoder::ALACEncoder() :
103 mBitDepth( 0 ),
104 mFastMode( 0 ),
105 mMixBufferU( nil ),
106 mMixBufferV( nil ),
107 mPredictorU( nil ),
108 mPredictorV( nil ),
109 mShiftBufferUV( nil ),
110 mWorkBuffer( nil ),
113 mTotalBytesGenerated( 0 ),
114 mAvgBitRate( 0 ),
115 mMaxFrameBytes( 0 )
117 // overrides
118 mFrameSize = kALACDefaultFrameSize;
122 Destructor
124 ALACEncoder::~ALACEncoder()
126 // delete the matrix mixing buffers
127 if ( mMixBufferU )
129 free(mMixBufferU);
130 mMixBufferU = NULL;
132 if ( mMixBufferV )
134 free(mMixBufferV);
135 mMixBufferV = NULL;
138 // delete the dynamic predictor's "corrector" buffers
139 if ( mPredictorU )
141 free(mPredictorU);
142 mPredictorU = NULL;
144 if ( mPredictorV )
146 free(mPredictorV);
147 mPredictorV = NULL;
150 // delete the unused byte shift buffer
151 if ( mShiftBufferUV )
153 free(mShiftBufferUV);
154 mShiftBufferUV = NULL;
157 // delete the work buffer
158 if ( mWorkBuffer )
160 free(mWorkBuffer);
161 mWorkBuffer = NULL;
165 #if PRAGMA_MARK
166 #pragma mark -
167 #endif
170 HEADER SPECIFICATION
172 For every segment we adopt the following header:
174 1 byte reserved (always 0)
175 1 byte flags (see below)
176 [4 byte frame length] (optional, see below)
177 ---Next, the per-segment ALAC parameters---
178 1 byte mixBits (middle-side parameter)
179 1 byte mixRes (middle-side parameter, interpreted as signed char)
181 1 byte shiftU (4 bits modeU, 4 bits denShiftU)
182 1 byte filterU (3 bits pbFactorU, 5 bits numU)
183 (numU) shorts (signed DP coefficients for V channel)
184 ---Next, 2nd-channel ALAC parameters in case of stereo mode---
185 1 byte shiftV (4 bits modeV, 4 bits denShiftV)
186 1 byte filterV (3 bits pbFactorV, 5 bits numV)
187 (numV) shorts (signed DP coefficients for V channel)
188 ---After this come the shift-off bytes for (>= 24)-bit data (n-byte shift) if indicated---
189 ---Then comes the AG-compressor bitstream---
192 FLAGS
193 -----
195 The presence of certain flag bits changes the header format such that the parameters might
196 not even be sent. The currently defined flags format is:
198 0000psse
200 where 0 = reserved, must be 0
201 p = 1-bit field "partial frame" flag indicating 32-bit frame length follows this byte
202 ss = 2-bit field indicating "number of shift-off bytes ignored by compression"
203 e = 1-bit field indicating "escape"
205 The "partial frame" flag means that the following segment is not equal to the frame length specified
206 in the out-of-band decoder configuration. This allows the decoder to deal with end-of-file partial
207 segments without incurring the 32-bit overhead for each segment.
209 The "shift-off" field indicates the number of bytes at the bottom of the word that were passed through
210 uncompressed. The reason for this is that the entropy inherent in the LS bytes of >= 24-bit words
211 quite often means that the frame would have to be "escaped" b/c the compressed size would be >= the
212 uncompressed size. However, by shifting the input values down and running the remaining bits through
213 the normal compression algorithm, a net win can be achieved. If this field is non-zero, it means that
214 the shifted-off bytes follow after the parameter section of the header and before the compressed
215 bitstream. Note that doing this also allows us to use matrixing on 32-bit inputs after one or more
216 bytes are shifted off the bottom which helps the eventual compression ratio. For stereo channels,
217 the shifted off bytes are interleaved.
219 The "escape" flag means that this segment was not compressed b/c the compressed size would be
220 >= uncompressed size. In that case, the audio data was passed through uncompressed after the header.
221 The other header parameter bytes will not be sent.
224 PARAMETERS
225 ----------
227 If the segment is not a partial or escape segment, the total header size (in bytes) is given exactly by:
229 4 + (2 + 2 * numU) (mono mode)
230 4 + (2 + 2 * numV) + (2 + 2 * numV) (stereo mode)
232 where the ALAC filter-lengths numU, numV are bounded by a
233 constant (in the current source, numU, numV <= NUMCOEPAIRS), and
234 this forces an absolute upper bound on header size.
236 Each segment-decode process loads up these bytes from the front of the
237 local stream, in the above order, then follows with the entropy-encoded
238 bits for the given segment.
240 To generalize middle-side, there are various mixing modes including middle-side, each lossless,
241 as embodied in the mix() and unmix() functions. These functions exploit a generalized middle-side
242 transformation:
244 u := [(rL + (m-r)R)/m];
245 v := L - R;
247 where [ ] denotes integer floor. The (lossless) inverse is
249 L = u + v - [rV/m];
250 R = L - v;
252 In the segment header, m and r are encoded in mixBits and mixRes.
253 Classical "middle-side" is obtained with m = 2, r = 1, but now
254 we have more generalized mixes.
256 NOTES
257 -----
258 The relevance of the ALAC coefficients is explained in detail
259 in patent documents.
263 EncodeStereo()
264 - encode a channel pair
266 int32_t ALACEncoder::EncodeStereo( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
268 BitBuffer workBits;
269 BitBuffer startBits = *bitstream; // squirrel away copy of current state in case we need to go back and do an escape packet
270 AGParamRec agParams;
271 uint32_t bits1, bits2;
272 uint32_t dilate;
273 int32_t mixBits, mixRes, maxRes;
274 uint32_t minBits, minBits1, minBits2;
275 uint32_t numU, numV;
276 uint32_t mode;
277 uint32_t pbFactor;
278 uint32_t chanBits;
279 uint32_t denShift;
280 uint8_t bytesShifted;
281 SearchCoefs coefsU;
282 SearchCoefs coefsV;
283 uint32_t index;
284 uint8_t partialFrame;
285 uint32_t escapeBits;
286 bool doEscape;
287 int32_t status = ALAC_noErr;
289 // make sure we handle this bit-depth before we get going
290 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
292 // reload coefs pointers for this channel pair
293 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
294 // actually results in better overall compression
295 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
296 // different coefs for the different passes of "mixRes" results in even better compression
297 coefsU = (SearchCoefs) mCoefsU[channelIndex];
298 coefsV = (SearchCoefs) mCoefsV[channelIndex];
300 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
301 // so enable 16-bit "shift off" and encode in 17-bit mode
302 // - in addition, 24-bit mode really improves with one byte shifted off
303 if ( mBitDepth == 32 )
304 bytesShifted = 2;
305 else if ( mBitDepth >= 24 )
306 bytesShifted = 1;
307 else
308 bytesShifted = 0;
310 chanBits = mBitDepth - (bytesShifted * 8) + 1;
312 // flag whether or not this is a partial frame
313 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
315 // brute-force encode optimization loop
316 // - run over variations of the encoding params to find the best choice
317 mixBits = kDefaultMixBits;
318 maxRes = kMaxRes;
319 numU = numV = kDefaultNumUV;
320 denShift = DENSHIFT_DEFAULT;
321 mode = 0;
322 pbFactor = 4;
323 dilate = 8;
325 minBits = minBits1 = minBits2 = 1ul << 31;
327 int32_t bestRes = mLastMixRes[channelIndex];
329 for ( mixRes = 0; mixRes <= maxRes; mixRes++ )
331 // mix the stereo inputs
332 switch ( mBitDepth )
334 case 16:
335 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
336 break;
337 case 20:
338 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate, mixBits, mixRes );
339 break;
340 case 24:
341 // includes extraction of shifted-off bytes
342 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
343 mixBits, mixRes, mShiftBufferUV, bytesShifted );
344 break;
345 case 32:
346 // includes extraction of shifted-off bytes
347 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples/dilate,
348 mixBits, mixRes, mShiftBufferUV, bytesShifted );
349 break;
352 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
354 // run the dynamic predictors
355 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
356 pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
358 // run the lossless compressor on each channel
359 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
360 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
361 RequireNoErr( status, goto Exit; );
363 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
364 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
365 RequireNoErr( status, goto Exit; );
367 // look for best match
368 if ( (bits1 + bits2) < minBits1 )
370 minBits1 = bits1 + bits2;
371 bestRes = mixRes;
375 mLastMixRes[channelIndex] = (int16_t)bestRes;
377 // mix the stereo inputs with the current best mixRes
378 mixRes = mLastMixRes[channelIndex];
379 switch ( mBitDepth )
381 case 16:
382 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
383 break;
384 case 20:
385 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
386 break;
387 case 24:
388 // also extracts the shifted off bytes into the shift buffers
389 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
390 mixBits, mixRes, mShiftBufferUV, bytesShifted );
391 break;
392 case 32:
393 // also extracts the shifted off bytes into the shift buffers
394 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
395 mixBits, mixRes, mShiftBufferUV, bytesShifted );
396 break;
399 // now it's time for the predictor coefficient search loop
400 numU = numV = kMinUV;
401 minBits1 = minBits2 = 1ul << 31;
403 for ( uint32_t numUV = kMinUV; numUV <= kMaxUV; numUV += 4 )
405 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
407 dilate = 32;
409 // run the predictor over the same data multiple times to help it converge
410 for ( uint32_t converge = 0; converge < 8; converge++ )
412 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
413 pc_block( mMixBufferV, mPredictorV, numSamples/dilate, coefsV[numUV-1], numUV, chanBits, DENSHIFT_DEFAULT );
416 dilate = 8;
418 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
419 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
421 if ( (bits1 * dilate + 16 * numUV) < minBits1 )
423 minBits1 = bits1 * dilate + 16 * numUV;
424 numU = numUV;
427 set_ag_params( &agParams, MB0, (pbFactor * PB0)/4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
428 status = dyn_comp( &agParams, mPredictorV, &workBits, numSamples/dilate, chanBits, &bits2 );
430 if ( (bits2 * dilate + 16 * numUV) < minBits2 )
432 minBits2 = bits2 * dilate + 16 * numUV;
433 numV = numUV;
437 // test for escape hatch if best calculated compressed size turns out to be more than the input size
438 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
439 if ( bytesShifted != 0 )
440 minBits += (numSamples * (bytesShifted * 8) * 2);
442 escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
444 doEscape = (minBits >= escapeBits) ? true : false;
446 if ( doEscape == false )
448 // write bitstream header and coefs
449 BitBufferWrite( bitstream, 0, 12 );
450 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
451 if ( partialFrame )
452 BitBufferWrite( bitstream, numSamples, 32 );
453 BitBufferWrite( bitstream, mixBits, 8 );
454 BitBufferWrite( bitstream, mixRes, 8 );
456 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
457 //Assert( (pbFactor < 8) && (numU < 32) );
458 //Assert( (pbFactor < 8) && (numV < 32) );
460 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
461 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
462 for ( index = 0; index < numU; index++ )
463 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
465 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
466 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
467 for ( index = 0; index < numV; index++ )
468 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
470 // if shift active, write the interleaved shift buffers
471 if ( bytesShifted != 0 )
473 uint32_t bitShift = bytesShifted * 8;
475 //Assert( bitShift <= 16 );
477 for ( index = 0; index < (numSamples * 2); index += 2 )
479 uint32_t shiftedVal;
481 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
482 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
486 // run the dynamic predictor and lossless compression for the "left" channel
487 // - note: to avoid allocating more buffers, we're mixing and matching between the available buffers instead
488 // of only using "U" buffers for the U-channel and "V" buffers for the V-channel
489 if ( mode == 0 )
491 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
493 else
495 pc_block( mMixBufferU, mPredictorV, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
496 pc_block( mPredictorV, mPredictorU, numSamples, nil, 31, chanBits, 0 );
499 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
500 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
501 RequireNoErr( status, goto Exit; );
503 // run the dynamic predictor and lossless compression for the "right" channel
504 if ( mode == 0 )
506 pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
508 else
510 pc_block( mMixBufferV, mPredictorU, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
511 pc_block( mPredictorU, mPredictorV, numSamples, nil, 31, chanBits, 0 );
514 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
515 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
516 RequireNoErr( status, goto Exit; );
518 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
519 chuck it and do an escape packet
521 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
522 if ( minBits >= escapeBits )
524 *bitstream = startBits; // reset bitstream state
525 doEscape = true;
526 printf( "compressed frame too big: %u vs. %u \n", minBits, escapeBits );
530 if ( doEscape == true )
532 /* escape */
533 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
535 #if VERBOSE_DEBUG
536 DebugMsg( "escape!: %lu vs %lu", minBits, escapeBits );
537 #endif
540 Exit:
541 return status;
545 EncodeStereoFast()
546 - encode a channel pair without the search loop for maximum possible speed
548 int32_t ALACEncoder::EncodeStereoFast( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
550 BitBuffer startBits = *bitstream; // squirrel away current bit position in case we decide to use escape hatch
551 AGParamRec agParams;
552 uint32_t bits1, bits2;
553 int32_t mixBits, mixRes;
554 uint32_t minBits, minBits1, minBits2;
555 uint32_t numU, numV;
556 uint32_t mode;
557 uint32_t pbFactor;
558 uint32_t chanBits;
559 uint32_t denShift;
560 uint8_t bytesShifted;
561 SearchCoefs coefsU;
562 SearchCoefs coefsV;
563 uint32_t index;
564 uint8_t partialFrame;
565 uint32_t escapeBits;
566 bool doEscape;
567 int32_t status;
569 // make sure we handle this bit-depth before we get going
570 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
572 // reload coefs pointers for this channel pair
573 // - note that, while you might think they should be re-initialized per block, retaining state across blocks
574 // actually results in better overall compression
575 // - strangely, re-using the same coefs for the different passes of the "mixRes" search loop instead of using
576 // different coefs for the different passes of "mixRes" results in even better compression
577 coefsU = (SearchCoefs) mCoefsU[channelIndex];
578 coefsV = (SearchCoefs) mCoefsV[channelIndex];
580 // matrix encoding adds an extra bit but 32-bit inputs cannot be matrixed b/c 33 is too many
581 // so enable 16-bit "shift off" and encode in 17-bit mode
582 // - in addition, 24-bit mode really improves with one byte shifted off
583 if ( mBitDepth == 32 )
584 bytesShifted = 2;
585 else if ( mBitDepth >= 24 )
586 bytesShifted = 1;
587 else
588 bytesShifted = 0;
590 chanBits = mBitDepth - (bytesShifted * 8) + 1;
592 // flag whether or not this is a partial frame
593 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
595 // set up default encoding parameters for "fast" mode
596 mixBits = kDefaultMixBits;
597 mixRes = kDefaultMixRes;
598 numU = numV = kDefaultNumUV;
599 denShift = DENSHIFT_DEFAULT;
600 mode = 0;
601 pbFactor = 4;
603 minBits = minBits1 = minBits2 = 1ul << 31;
605 // mix the stereo inputs with default mixBits/mixRes
606 switch ( mBitDepth )
608 case 16:
609 mix16( (int16_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
610 break;
611 case 20:
612 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, mixBits, mixRes );
613 break;
614 case 24:
615 // also extracts the shifted off bytes into the shift buffers
616 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
617 mixBits, mixRes, mShiftBufferUV, bytesShifted );
618 break;
619 case 32:
620 // also extracts the shifted off bytes into the shift buffers
621 mix32( (int32_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples,
622 mixBits, mixRes, mShiftBufferUV, bytesShifted );
623 break;
626 /* speculatively write the bitstream assuming the compressed version will be smaller */
628 // write bitstream header and coefs
629 BitBufferWrite( bitstream, 0, 12 );
630 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
631 if ( partialFrame )
632 BitBufferWrite( bitstream, numSamples, 32 );
633 BitBufferWrite( bitstream, mixBits, 8 );
634 BitBufferWrite( bitstream, mixRes, 8 );
636 //Assert( (mode < 16) && (DENSHIFT_DEFAULT < 16) );
637 //Assert( (pbFactor < 8) && (numU < 32) );
638 //Assert( (pbFactor < 8) && (numV < 32) );
640 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
641 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
642 for ( index = 0; index < numU; index++ )
643 BitBufferWrite( bitstream, coefsU[numU - 1][index], 16 );
645 BitBufferWrite( bitstream, (mode << 4) | DENSHIFT_DEFAULT, 8 );
646 BitBufferWrite( bitstream, (pbFactor << 5) | numV, 8 );
647 for ( index = 0; index < numV; index++ )
648 BitBufferWrite( bitstream, coefsV[numV - 1][index], 16 );
650 // if shift active, write the interleaved shift buffers
651 if ( bytesShifted != 0 )
653 uint32_t bitShift = bytesShifted * 8;
655 //Assert( bitShift <= 16 );
657 for ( index = 0; index < (numSamples * 2); index += 2 )
659 uint32_t shiftedVal;
661 shiftedVal = ((uint32_t)mShiftBufferUV[index + 0] << bitShift) | (uint32_t)mShiftBufferUV[index + 1];
662 BitBufferWrite( bitstream, shiftedVal, bitShift * 2 );
666 // run the dynamic predictor and lossless compression for the "left" channel
667 // - note: we always use mode 0 in the "fast" path so we don't need the code for mode != 0
668 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU - 1], numU, chanBits, DENSHIFT_DEFAULT );
670 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
671 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
672 RequireNoErr( status, goto Exit; );
674 // run the dynamic predictor and lossless compression for the "right" channel
675 pc_block( mMixBufferV, mPredictorV, numSamples, coefsV[numV - 1], numV, chanBits, DENSHIFT_DEFAULT );
677 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples, numSamples, MAX_RUN_DEFAULT );
678 status = dyn_comp( &agParams, mPredictorV, bitstream, numSamples, chanBits, &bits2 );
679 RequireNoErr( status, goto Exit; );
681 // do bit requirement calculations
682 minBits1 = bits1 + (numU * sizeof(int16_t) * 8);
683 minBits2 = bits2 + (numV * sizeof(int16_t) * 8);
685 // test for escape hatch if best calculated compressed size turns out to be more than the input size
686 minBits = minBits1 + minBits2 + (8 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
687 if ( bytesShifted != 0 )
688 minBits += (numSamples * (bytesShifted * 8) * 2);
690 escapeBits = (numSamples * mBitDepth * 2) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
692 doEscape = (minBits >= escapeBits) ? true : false;
694 if ( doEscape == false )
696 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
697 chuck it and do an escape packet
699 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
700 if ( minBits >= escapeBits )
702 doEscape = true;
703 printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
708 if ( doEscape == true )
710 /* escape */
712 // reset bitstream position since we speculatively wrote the compressed version
713 *bitstream = startBits;
715 // write escape frame
716 status = this->EncodeStereoEscape( bitstream, inputBuffer, stride, numSamples );
718 #if VERBOSE_DEBUG
719 DebugMsg( "escape!: %u vs %u", minBits, (numSamples * mBitDepth * 2) );
720 #endif
723 Exit:
724 return status;
728 EncodeStereoEscape()
729 - encode stereo escape frame
731 int32_t ALACEncoder::EncodeStereoEscape( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t numSamples )
733 int16_t * input16;
734 int32_t * input32;
735 uint8_t partialFrame;
736 uint32_t index;
738 // flag whether or not this is a partial frame
739 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
741 // write bitstream header
742 BitBufferWrite( bitstream, 0, 12 );
743 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 ); // LSB = 1 means "frame not compressed"
744 if ( partialFrame )
745 BitBufferWrite( bitstream, numSamples, 32 );
747 // just copy the input data to the output buffer
748 switch ( mBitDepth )
750 case 16:
751 input16 = (int16_t *) inputBuffer;
753 for ( index = 0; index < (numSamples * stride); index += stride )
755 BitBufferWrite( bitstream, input16[index + 0], 16 );
756 BitBufferWrite( bitstream, input16[index + 1], 16 );
758 break;
759 case 20:
760 // mix20() with mixres param = 0 means de-interleave so use it to simplify things
761 mix20( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0 );
762 for ( index = 0; index < numSamples; index++ )
764 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
765 BitBufferWrite( bitstream, mMixBufferV[index], 20 );
767 break;
768 case 24:
769 // mix24() with mixres param = 0 means de-interleave so use it to simplify things
770 mix24( (uint8_t *) inputBuffer, stride, mMixBufferU, mMixBufferV, numSamples, 0, 0, mShiftBufferUV, 0 );
771 for ( index = 0; index < numSamples; index++ )
773 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
774 BitBufferWrite( bitstream, mMixBufferV[index], 24 );
776 break;
777 case 32:
778 input32 = (int32_t *) inputBuffer;
780 for ( index = 0; index < (numSamples * stride); index += stride )
782 BitBufferWrite( bitstream, input32[index + 0], 32 );
783 BitBufferWrite( bitstream, input32[index + 1], 32 );
785 break;
788 return ALAC_noErr;
792 EncodeMono()
793 - encode a mono input buffer
795 int32_t ALACEncoder::EncodeMono( BitBuffer * bitstream, void * inputBuffer, uint32_t stride, uint32_t channelIndex, uint32_t numSamples )
797 BitBuffer startBits = *bitstream; // squirrel away copy of current state in case we need to go back and do an escape packet
798 AGParamRec agParams;
799 uint32_t bits1;
800 uint32_t numU;
801 SearchCoefs coefsU;
802 uint32_t dilate;
803 uint32_t minBits, bestU;
804 uint32_t minU, maxU;
805 uint32_t index, index2;
806 uint8_t bytesShifted;
807 uint32_t shift;
808 uint32_t mask;
809 uint32_t chanBits;
810 uint8_t pbFactor;
811 uint8_t partialFrame;
812 int16_t * input16;
813 int32_t * input32;
814 uint32_t escapeBits;
815 bool doEscape;
816 int32_t status;
818 // make sure we handle this bit-depth before we get going
819 RequireAction( (mBitDepth == 16) || (mBitDepth == 20) || (mBitDepth == 24) || (mBitDepth == 32), return kALAC_ParamError; );
821 status = ALAC_noErr;
823 // reload coefs array from previous frame
824 coefsU = (SearchCoefs) mCoefsU[channelIndex];
826 // pick bit depth for actual encoding
827 // - we lop off the lower byte(s) for 24-/32-bit encodings
828 if ( mBitDepth == 32 )
829 bytesShifted = 2;
830 else if ( mBitDepth >= 24 )
831 bytesShifted = 1;
832 else
833 bytesShifted = 0;
835 shift = bytesShifted * 8;
836 mask = (1ul << shift) - 1;
837 chanBits = mBitDepth - (bytesShifted * 8);
839 // flag whether or not this is a partial frame
840 partialFrame = (numSamples == mFrameSize) ? 0 : 1;
842 // convert N-bit data to 32-bit for predictor
843 switch ( mBitDepth )
845 case 16:
847 // convert 16-bit data to 32-bit for predictor
848 input16 = (int16_t *) inputBuffer;
849 for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
850 mMixBufferU[index] = (int32_t) input16[index2];
851 break;
853 case 20:
854 // convert 20-bit data to 32-bit for predictor
855 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
856 break;
857 case 24:
858 // convert 24-bit data to 32-bit for the predictor and extract the shifted off byte(s)
859 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
860 for ( index = 0; index < numSamples; index++ )
862 mShiftBufferUV[index] = (uint16_t)(mMixBufferU[index] & mask);
863 mMixBufferU[index] >>= shift;
865 break;
866 case 32:
868 // just copy the 32-bit input data for the predictor and extract the shifted off byte(s)
869 input32 = (int32_t *) inputBuffer;
871 for ( index = 0, index2 = 0; index < numSamples; index++, index2 += stride )
873 int32_t val = input32[index2];
875 mShiftBufferUV[index] = (uint16_t)(val & mask);
876 mMixBufferU[index] = val >> shift;
878 break;
882 // brute-force encode optimization loop (implied "encode depth" of 0 if comparing to cmd line tool)
883 // - run over variations of the encoding params to find the best choice
884 minU = 4;
885 maxU = 8;
886 minBits = 1ul << 31;
887 pbFactor = 4;
889 minBits = 1ul << 31;
890 bestU = minU;
892 for ( numU = minU; numU <= maxU; numU += 4 )
894 BitBuffer workBits;
895 uint32_t numBits;
897 BitBufferInit( &workBits, mWorkBuffer, mMaxOutputBytes );
899 dilate = 32;
900 for ( uint32_t converge = 0; converge < 7; converge++ )
901 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
903 dilate = 8;
904 pc_block( mMixBufferU, mPredictorU, numSamples/dilate, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
906 set_ag_params( &agParams, MB0, (pbFactor * PB0) / 4, KB0, numSamples/dilate, numSamples/dilate, MAX_RUN_DEFAULT );
907 status = dyn_comp( &agParams, mPredictorU, &workBits, numSamples/dilate, chanBits, &bits1 );
908 RequireNoErr( status, goto Exit; );
910 numBits = (dilate * bits1) + (16 * numU);
911 if ( numBits < minBits )
913 bestU = numU;
914 minBits = numBits;
918 // test for escape hatch if best calculated compressed size turns out to be more than the input size
919 // - first, add bits for the header bytes mixRes/maxRes/shiftU/filterU
920 minBits += (4 /* mixRes/maxRes/etc. */ * 8) + ((partialFrame == true) ? 32 : 0);
921 if ( bytesShifted != 0 )
922 minBits += (numSamples * (bytesShifted * 8));
924 escapeBits = (numSamples * mBitDepth) + ((partialFrame == true) ? 32 : 0) + (2 * 8); /* 2 common header bytes */
926 doEscape = (minBits >= escapeBits) ? true : false;
928 if ( doEscape == false )
930 // write bitstream header
931 BitBufferWrite( bitstream, 0, 12 );
932 BitBufferWrite( bitstream, (partialFrame << 3) | (bytesShifted << 1), 4 );
933 if ( partialFrame )
934 BitBufferWrite( bitstream, numSamples, 32 );
935 BitBufferWrite( bitstream, 0, 16 ); // mixBits = mixRes = 0
937 // write the params and predictor coefs
938 numU = bestU;
939 BitBufferWrite( bitstream, (0 << 4) | DENSHIFT_DEFAULT, 8 ); // modeU = 0
940 BitBufferWrite( bitstream, (pbFactor << 5) | numU, 8 );
941 for ( index = 0; index < numU; index++ )
942 BitBufferWrite( bitstream, coefsU[numU-1][index], 16 );
944 // if shift active, write the interleaved shift buffers
945 if ( bytesShifted != 0 )
947 for ( index = 0; index < numSamples; index++ )
948 BitBufferWrite( bitstream, mShiftBufferUV[index], shift );
951 // run the dynamic predictor with the best result
952 pc_block( mMixBufferU, mPredictorU, numSamples, coefsU[numU-1], numU, chanBits, DENSHIFT_DEFAULT );
954 // do lossless compression
955 set_standard_ag_params( &agParams, numSamples, numSamples );
956 status = dyn_comp( &agParams, mPredictorU, bitstream, numSamples, chanBits, &bits1 );
957 //AssertNoErr( status );
960 /* if we happened to create a compressed packet that was actually bigger than an escape packet would be,
961 chuck it and do an escape packet
963 minBits = BitBufferGetPosition( bitstream ) - BitBufferGetPosition( &startBits );
964 if ( minBits >= escapeBits )
966 *bitstream = startBits; // reset bitstream state
967 doEscape = true;
968 printf( "compressed frame too big: %u vs. %u\n", minBits, escapeBits );
972 if ( doEscape == true )
974 // write bitstream header and coefs
975 BitBufferWrite( bitstream, 0, 12 );
976 BitBufferWrite( bitstream, (partialFrame << 3) | 1, 4 ); // LSB = 1 means "frame not compressed"
977 if ( partialFrame )
978 BitBufferWrite( bitstream, numSamples, 32 );
980 // just copy the input data to the output buffer
981 switch ( mBitDepth )
983 case 16:
984 input16 = (int16_t *) inputBuffer;
985 for ( index = 0; index < (numSamples * stride); index += stride )
986 BitBufferWrite( bitstream, input16[index], 16 );
987 break;
988 case 20:
989 // convert 20-bit data to 32-bit for simplicity
990 copy20ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
991 for ( index = 0; index < numSamples; index++ )
992 BitBufferWrite( bitstream, mMixBufferU[index], 20 );
993 break;
994 case 24:
995 // convert 24-bit data to 32-bit for simplicity
996 copy24ToPredictor( (uint8_t *) inputBuffer, stride, mMixBufferU, numSamples );
997 for ( index = 0; index < numSamples; index++ )
998 BitBufferWrite( bitstream, mMixBufferU[index], 24 );
999 break;
1000 case 32:
1001 input32 = (int32_t *) inputBuffer;
1002 for ( index = 0; index < (numSamples * stride); index += stride )
1003 BitBufferWrite( bitstream, input32[index], 32 );
1004 break;
1006 #if VERBOSE_DEBUG
1007 DebugMsg( "escape!: %lu vs %lu", minBits, (numSamples * mBitDepth) );
1008 #endif
1011 Exit:
1012 return status;
1015 #if PRAGMA_MARK
1016 #pragma mark -
1017 #endif
1020 Encode()
1021 - encode the next block of samples
1023 int32_t ALACEncoder::Encode(AudioFormatDescription theInputFormat, AudioFormatDescription theOutputFormat,
1024 unsigned char * theReadBuffer, unsigned char * theWriteBuffer, int32_t * ioNumBytes)
1026 uint32_t numFrames;
1027 uint32_t outputSize;
1028 BitBuffer bitstream;
1029 int32_t status;
1031 numFrames = *ioNumBytes/theInputFormat.mBytesPerPacket;
1033 // create a bit buffer structure pointing to our output buffer
1034 BitBufferInit( &bitstream, theWriteBuffer, mMaxOutputBytes );
1036 if ( theInputFormat.mChannelsPerFrame == 2 )
1038 // add 3-bit frame start tag ID_CPE = channel pair & 4-bit element instance tag = 0
1039 BitBufferWrite( &bitstream, ID_CPE, 3 );
1040 BitBufferWrite( &bitstream, 0, 4 );
1042 // encode stereo input buffer
1043 if ( mFastMode == false )
1044 status = this->EncodeStereo( &bitstream, theReadBuffer, 2, 0, numFrames );
1045 else
1046 status = this->EncodeStereoFast( &bitstream, theReadBuffer, 2, 0, numFrames );
1047 RequireNoErr( status, goto Exit; );
1049 else if ( theInputFormat.mChannelsPerFrame == 1 )
1051 // add 3-bit frame start tag ID_SCE = mono channel & 4-bit element instance tag = 0
1052 BitBufferWrite( &bitstream, ID_SCE, 3 );
1053 BitBufferWrite( &bitstream, 0, 4 );
1055 // encode mono input buffer
1056 status = this->EncodeMono( &bitstream, theReadBuffer, 1, 0, numFrames );
1057 RequireNoErr( status, goto Exit; );
1059 else
1061 char * inputBuffer;
1062 uint32_t tag;
1063 uint32_t channelIndex;
1064 uint32_t inputIncrement;
1065 uint8_t stereoElementTag;
1066 uint8_t monoElementTag;
1067 uint8_t lfeElementTag;
1069 inputBuffer = (char *) theReadBuffer;
1070 inputIncrement = ((mBitDepth + 7) / 8);
1072 stereoElementTag = 0;
1073 monoElementTag = 0;
1074 lfeElementTag = 0;
1076 for ( channelIndex = 0; channelIndex < theInputFormat.mChannelsPerFrame; )
1078 tag = (sChannelMaps[theInputFormat.mChannelsPerFrame - 1] & (0x7ul << (channelIndex * 3))) >> (channelIndex * 3);
1080 BitBufferWrite( &bitstream, tag, 3 );
1081 switch ( tag )
1083 case ID_SCE:
1084 // mono
1085 BitBufferWrite( &bitstream, monoElementTag, 4 );
1087 status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1089 inputBuffer += inputIncrement;
1090 channelIndex++;
1091 monoElementTag++;
1092 break;
1094 case ID_CPE:
1095 // stereo
1096 BitBufferWrite( &bitstream, stereoElementTag, 4 );
1098 status = this->EncodeStereo( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1100 inputBuffer += (inputIncrement * 2);
1101 channelIndex += 2;
1102 stereoElementTag++;
1103 break;
1105 case ID_LFE:
1106 // LFE channel (subwoofer)
1107 BitBufferWrite( &bitstream, lfeElementTag, 4 );
1109 status = this->EncodeMono( &bitstream, inputBuffer, theInputFormat.mChannelsPerFrame, channelIndex, numFrames );
1111 inputBuffer += inputIncrement;
1112 channelIndex++;
1113 lfeElementTag++;
1114 break;
1116 default:
1117 printf( "That ain't right! (%u)\n", tag );
1118 status = kALAC_ParamError;
1119 goto Exit;
1122 RequireNoErr( status, goto Exit; );
1126 #if VERBOSE_DEBUG
1128 // if there is room left in the output buffer, add some random fill data to test decoder
1129 int32_t bitsLeft;
1130 int32_t bytesLeft;
1132 bitsLeft = BitBufferGetPosition( &bitstream ) - 3; // - 3 for ID_END tag
1133 bytesLeft = bitstream.byteSize - ((bitsLeft + 7) / 8);
1135 if ( (bytesLeft > 20) && ((bytesLeft & 0x4u) != 0) )
1136 AddFiller( &bitstream, bytesLeft );
1138 #endif
1140 // add 3-bit frame end tag: ID_END
1141 BitBufferWrite( &bitstream, ID_END, 3 );
1143 // byte-align the output data
1144 BitBufferByteAlign( &bitstream, true );
1146 outputSize = BitBufferGetPosition( &bitstream ) / 8;
1147 //Assert( outputSize <= mMaxOutputBytes );
1150 // all good, let iTunes know what happened and remember the total number of input sample frames
1151 *ioNumBytes = outputSize;
1152 //mEncodedFrames += encodeMsg->numInputSamples;
1154 // gather encoding stats
1155 mTotalBytesGenerated += outputSize;
1156 mMaxFrameBytes = MAX( mMaxFrameBytes, outputSize );
1158 status = ALAC_noErr;
1160 Exit:
1161 return status;
1165 Finish()
1166 - drain out any leftover samples
1169 int32_t ALACEncoder::Finish()
1171 /* // finalize bit rate statistics
1172 if ( mSampleSize.numEntries != 0 )
1173 mAvgBitRate = (uint32_t)( (((float)mTotalBytesGenerated * 8.0f) / (float)mSampleSize.numEntries) * ((float)mSampleRate / (float)mFrameSize) );
1174 else
1175 mAvgBitRate = 0;
1177 return ALAC_noErr;
1180 #if PRAGMA_MARK
1181 #pragma mark -
1182 #endif
1185 GetConfig()
1187 void ALACEncoder::GetConfig( ALACSpecificConfig & config )
1189 config.frameLength = Swap32NtoB(mFrameSize);
1190 config.compatibleVersion = (uint8_t) kALACCompatibleVersion;
1191 config.bitDepth = (uint8_t) mBitDepth;
1192 config.pb = (uint8_t) PB0;
1193 config.kb = (uint8_t) KB0;
1194 config.mb = (uint8_t) MB0;
1195 config.numChannels = (uint8_t) mNumChannels;
1196 config.maxRun = Swap16NtoB((uint16_t) MAX_RUN_DEFAULT);
1197 config.maxFrameBytes = Swap32NtoB(mMaxFrameBytes);
1198 config.avgBitRate = Swap32NtoB(mAvgBitRate);
1199 config.sampleRate = Swap32NtoB(mOutputSampleRate);
1202 uint32_t ALACEncoder::GetMagicCookieSize(uint32_t inNumChannels)
1204 if (inNumChannels > 2)
1206 return sizeof(ALACSpecificConfig) + kChannelAtomSize + sizeof(ALACAudioChannelLayout);
1208 else
1210 return sizeof(ALACSpecificConfig);
1214 void ALACEncoder::GetMagicCookie(void * outCookie, uint32_t * ioSize)
1216 ALACSpecificConfig theConfig = {0};
1217 ALACAudioChannelLayout theChannelLayout = {0};
1218 uint8_t theChannelAtom[kChannelAtomSize] = {0, 0, 0, 0, 'c', 'h', 'a', 'n', 0, 0, 0, 0};
1219 uint32_t theCookieSize = sizeof(ALACSpecificConfig);
1220 uint8_t * theCookiePointer = (uint8_t *)outCookie;
1222 GetConfig(theConfig);
1223 if (theConfig.numChannels > 2)
1225 theChannelLayout.mChannelLayoutTag = ALACChannelLayoutTags[theConfig.numChannels - 1];
1226 theCookieSize += (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1228 if (*ioSize >= theCookieSize)
1230 memcpy(theCookiePointer, &theConfig, sizeof(ALACSpecificConfig));
1231 theChannelAtom[3] = (sizeof(ALACAudioChannelLayout) + kChannelAtomSize);
1232 if (theConfig.numChannels > 2)
1234 theCookiePointer += sizeof(ALACSpecificConfig);
1235 memcpy(theCookiePointer, theChannelAtom, kChannelAtomSize);
1236 theCookiePointer += kChannelAtomSize;
1237 memcpy(theCookiePointer, &theChannelLayout, sizeof(ALACAudioChannelLayout));
1239 *ioSize = theCookieSize;
1241 else
1243 *ioSize = 0; // no incomplete cookies
1248 InitializeEncoder()
1249 - initialize the encoder component with the current config
1251 int32_t ALACEncoder::InitializeEncoder(AudioFormatDescription theOutputFormat)
1253 int32_t status;
1255 mOutputSampleRate = theOutputFormat.mSampleRate;
1256 mNumChannels = theOutputFormat.mChannelsPerFrame;
1257 switch(theOutputFormat.mFormatFlags)
1259 case 1:
1260 mBitDepth = 16;
1261 break;
1262 case 2:
1263 mBitDepth = 20;
1264 break;
1265 case 3:
1266 mBitDepth = 24;
1267 break;
1268 case 4:
1269 mBitDepth = 32;
1270 break;
1271 default:
1272 break;
1275 // set up default encoding parameters and state
1276 // - note: mFrameSize is set in the constructor or via SetFrameSize() which must be called before this routine
1277 for ( uint32_t index = 0; index < kALACMaxChannels; index++ )
1278 mLastMixRes[index] = kDefaultMixRes;
1280 // the maximum output frame size can be no bigger than (samplesPerBlock * numChannels * ((10 + sampleSize)/8) + 1)
1281 // but note that this can be bigger than the input size!
1282 // - since we don't yet know what our input format will be, use our max allowed sample size in the calculation
1283 mMaxOutputBytes = mFrameSize * mNumChannels * ((10 + kMaxSampleSize) / 8) + 1;
1285 // allocate mix buffers
1286 mMixBufferU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1287 mMixBufferV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1289 // allocate dynamic predictor buffers
1290 mPredictorU = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1291 mPredictorV = (int32_t *) calloc( mFrameSize * sizeof(int32_t), 1 );
1293 // allocate combined shift buffer
1294 mShiftBufferUV = (uint16_t *) calloc( mFrameSize * 2 * sizeof(uint16_t),1 );
1296 // allocate work buffer for search loop
1297 mWorkBuffer = (uint8_t *) calloc( mMaxOutputBytes, 1 );
1299 RequireAction( (mMixBufferU != nil) && (mMixBufferV != nil) &&
1300 (mPredictorU != nil) && (mPredictorV != nil) &&
1301 (mShiftBufferUV != nil) && (mWorkBuffer != nil ),
1302 status = kALAC_MemFullError; goto Exit; );
1304 status = ALAC_noErr;
1307 // initialize coefs arrays once b/c retaining state across blocks actually improves the encode ratio
1308 for ( int32_t channel = 0; channel < (int32_t)mNumChannels; channel++ )
1310 for ( int32_t search = 0; search < kALACMaxSearches; search++ )
1312 init_coefs( mCoefsU[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1313 init_coefs( mCoefsV[channel][search], DENSHIFT_DEFAULT, kALACMaxCoefs );
1317 Exit:
1318 return status;
1322 GetSourceFormat()
1323 - given the input format, return one of our supported formats
1325 void ALACEncoder::GetSourceFormat( const AudioFormatDescription * source, AudioFormatDescription * output )
1327 // default is 16-bit native endian
1328 // - note: for float input we assume that's coming from one of our decoders (mp3, aac) so it only makes sense
1329 // to encode to 16-bit since the source was lossy in the first place
1330 // - note: if not a supported bit depth, find the closest supported bit depth to the input one
1331 if ( (source->mFormatID != kALACFormatLinearPCM) || ((source->mFormatFlags & kALACFormatFlagIsFloat) != 0) ||
1332 ( source->mBitsPerChannel <= 16 ) )
1333 mBitDepth = 16;
1334 else if ( source->mBitsPerChannel <= 20 )
1335 mBitDepth = 20;
1336 else if ( source->mBitsPerChannel <= 24 )
1337 mBitDepth = 24;
1338 else
1339 mBitDepth = 32;
1341 // we support 16/20/24/32-bit integer data at any sample rate and our target number of channels
1342 // and sample rate were specified when we were configured
1344 MakeUncompressedAudioFormat( mNumChannels, (float) mOutputSampleRate, mBitDepth, kAudioFormatFlagsNativeIntegerPacked, output );
1350 #if VERBOSE_DEBUG
1352 #if PRAGMA_MARK
1353 #pragma mark -
1354 #endif
1357 AddFiller()
1358 - add fill and data stream elements to the bitstream to test the decoder
1360 static void AddFiller( BitBuffer * bits, int32_t numBytes )
1362 uint8_t tag;
1363 uint32_t index;
1365 // out of lameness, subtract 6 bytes to deal with header + alignment as required for fill/data elements
1366 numBytes -= 6;
1367 if ( numBytes <= 0 )
1368 return;
1370 // randomly pick Fill or Data Stream Element based on numBytes requested
1371 tag = (numBytes & 0x8) ? ID_FIL : ID_DSE;
1373 BitBufferWrite( bits, tag, 3 );
1374 if ( tag == ID_FIL )
1376 // can't write more than 269 bytes in a fill element
1377 numBytes = (numBytes > 269) ? 269 : numBytes;
1379 // fill element = 4-bit size unless >= 15 then 4-bit size + 8-bit extension size
1380 if ( numBytes >= 15 )
1382 uint16_t extensionSize;
1384 BitBufferWrite( bits, 15, 4 );
1386 // 8-bit extension count field is "extra + 1" which is weird but I didn't define the syntax
1387 // - otherwise, there's no way to represent 15
1388 // - for example, to really mean 15 bytes you must encode extensionSize = 1
1389 // - why it's not like data stream elements I have no idea
1390 extensionSize = (numBytes - 15) + 1;
1391 Assert( extensionSize <= 255 );
1392 BitBufferWrite( bits, extensionSize, 8 );
1394 else
1395 BitBufferWrite( bits, numBytes, 4 );
1397 BitBufferWrite( bits, 0x10, 8 ); // extension_type = FILL_DATA = b0001 or'ed with fill_nibble = b0000
1398 for ( index = 0; index < (numBytes - 1); index++ )
1399 BitBufferWrite( bits, 0xa5, 8 ); // fill_byte = b10100101 = 0xa5
1401 else
1403 // can't write more than 510 bytes in a data stream element
1404 numBytes = (numBytes > 510) ? 510 : numBytes;
1406 BitBufferWrite( bits, 0, 4 ); // element instance tag
1407 BitBufferWrite( bits, 1, 1 ); // byte-align flag = true
1409 // data stream element = 8-bit size unless >= 255 then 8-bit size + 8-bit size
1410 if ( numBytes >= 255 )
1412 BitBufferWrite( bits, 255, 8 );
1413 BitBufferWrite( bits, numBytes - 255, 8 );
1415 else
1416 BitBufferWrite( bits, numBytes, 8 );
1418 BitBufferByteAlign( bits, true ); // byte-align with zeros
1420 for ( index = 0; index < numBytes; index++ )
1421 BitBufferWrite( bits, 0x5a, 8 );
1425 #endif /* VERBOSE_DEBUG */