2 * AAC definitions and structures
3 * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
4 * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 * AAC definitions and structures
26 * @author Oded Shimon ( ods15 ods15 dyndns org )
27 * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
33 #include "libavutil/float_dsp.h"
37 #include "mpeg4audio.h"
42 #define MAX_CHANNELS 64
43 #define MAX_ELEM_ID 16
45 #define TNS_MAX_ORDER 20
46 #define MAX_LTP_LONG_SFB 40
48 enum RawDataBlockType
{
59 enum ExtensionPayloadID
{
63 EXT_DYNAMIC_RANGE
= 0xb,
65 EXT_SBR_DATA_CRC
= 0xe,
76 ZERO_BT
= 0, ///< Scalefactors and spectral data are all zero.
77 FIRST_PAIR_BT
= 5, ///< This and later band types encode two values (rather than four) with one code word.
78 ESC_BT
= 11, ///< Spectral data are coded with an escape sequence.
79 NOISE_BT
= 13, ///< Spectral data are scaled white noise not coded in the bitstream.
80 INTENSITY_BT2
= 14, ///< Scalefactor data are intensity stereo positions.
81 INTENSITY_BT
= 15, ///< Scalefactor data are intensity stereo positions.
84 #define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10)
86 enum ChannelPosition
{
88 AAC_CHANNEL_FRONT
= 1,
96 * The point during decoding at which channel coupling is applied.
100 BETWEEN_TNS_AND_IMDCT
,
105 * Output configuration status
108 OC_NONE
, ///< Output unconfigured
109 OC_TRIAL_PCE
, ///< Output configuration under trial specified by an inband PCE
110 OC_TRIAL_FRAME
, ///< Output configuration under trial specified by a frame header
111 OC_GLOBAL_HDR
, ///< Output configuration set in a global header but not yet locked
112 OC_LOCKED
, ///< Output configuration locked in place
115 typedef struct OutputConfiguration
{
116 MPEG4AudioConfig m4ac
;
117 uint8_t layout_map
[MAX_ELEM_ID
*4][3];
120 uint64_t channel_layout
;
121 enum OCStatus status
;
122 } OutputConfiguration
;
127 typedef struct PredictorState
{
136 #define MAX_PREDICTORS 672
138 #define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times
139 #define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0
140 #define SCALE_MAX_POS 255 ///< scalefactor index maximum value
141 #define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard
142 #define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference
145 * Long Term Prediction
147 typedef struct LongTermPrediction
{
151 int8_t used
[MAX_LTP_LONG_SFB
];
152 } LongTermPrediction
;
155 * Individual Channel Stream
157 typedef struct IndividualChannelStream
{
158 uint8_t max_sfb
; ///< number of scalefactor bands per group
159 enum WindowSequence window_sequence
[2];
160 uint8_t use_kb_window
[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window.
161 int num_window_groups
;
162 uint8_t group_len
[8];
163 LongTermPrediction ltp
;
164 const uint16_t *swb_offset
; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
165 const uint8_t *swb_sizes
; ///< table of scalefactor band sizes for a particular window
166 int num_swb
; ///< number of scalefactor window bands
169 int predictor_present
;
170 int predictor_initialized
;
171 int predictor_reset_group
;
172 uint8_t prediction_used
[41];
173 } IndividualChannelStream
;
176 * Temporal Noise Shaping
178 typedef struct TemporalNoiseShaping
{
184 float coef
[8][4][TNS_MAX_ORDER
];
185 } TemporalNoiseShaping
;
188 * Dynamic Range Control - decoded from the bitstream but not processed further.
190 typedef struct DynamicRangeControl
{
191 int pce_instance_tag
; ///< Indicates with which program the DRC info is associated.
192 int dyn_rng_sgn
[17]; ///< DRC sign information; 0 - positive, 1 - negative
193 int dyn_rng_ctl
[17]; ///< DRC magnitude information
194 int exclude_mask
[MAX_CHANNELS
]; ///< Channels to be excluded from DRC processing.
195 int band_incr
; ///< Number of DRC bands greater than 1 having DRC info.
196 int interpolation_scheme
; ///< Indicates the interpolation scheme used in the SBR QMF domain.
197 int band_top
[17]; ///< Indicates the top of the i-th DRC band in units of 4 spectral lines.
198 int prog_ref_level
; /**< A reference level for the long-term program audio level for all
201 } DynamicRangeControl
;
203 typedef struct Pulse
{
211 * coupling parameters
213 typedef struct ChannelCoupling
{
214 enum CouplingPoint coupling_point
; ///< The point during decoding at which coupling is applied.
215 int num_coupled
; ///< number of target elements
216 enum RawDataBlockType type
[8]; ///< Type of channel element to be coupled - SCE or CPE.
217 int id_select
[8]; ///< element id
218 int ch_select
[8]; /**< [0] shared list of gains; [1] list of gains for right channel;
219 * [2] list of gains for left channel; [3] lists of gains for both channels
225 * Single Channel Element - used for both SCE and LFE elements.
227 typedef struct SingleChannelElement
{
228 IndividualChannelStream ics
;
229 TemporalNoiseShaping tns
;
231 enum BandType band_type
[128]; ///< band types
232 int band_type_run_end
[120]; ///< band type run end points
233 float sf
[120]; ///< scalefactors
234 int sf_idx
[128]; ///< scalefactor indices (used by encoder)
235 uint8_t zeroes
[128]; ///< band is not coded (used by encoder)
236 DECLARE_ALIGNED(32, float, coeffs
)[1024]; ///< coefficients for IMDCT
237 DECLARE_ALIGNED(32, float, saved
)[1536]; ///< overlap
238 DECLARE_ALIGNED(32, float, ret_buf
)[2048]; ///< PCM output buffer
239 DECLARE_ALIGNED(16, float, ltp_state
)[3072]; ///< time signal for LTP
240 PredictorState predictor_state
[MAX_PREDICTORS
];
241 float *ret
; ///< PCM output
242 } SingleChannelElement
;
245 * channel element - generic struct for SCE/CPE/CCE/LFE
247 typedef struct ChannelElement
{
249 int common_window
; ///< Set if channels share a common 'IndividualChannelStream' in bitstream.
250 int ms_mode
; ///< Signals mid/side stereo flags coding mode (used by encoder)
251 uint8_t ms_mask
[128]; ///< Set if mid/side stereo is used for each scalefactor window band
253 SingleChannelElement ch
[2];
255 ChannelCoupling coup
;
256 SpectralBandReplication sbr
;
262 typedef struct AACContext
{
263 AVCodecContext
*avctx
;
266 int is_saved
; ///< Set if elements have stored overlap from previous frame.
267 DynamicRangeControl che_drc
;
270 * @name Channel element related data
273 ChannelElement
*che
[4][MAX_ELEM_ID
];
274 ChannelElement
*tag_che_map
[4][MAX_ELEM_ID
];
279 * @name temporary aligned temporary buffers
280 * (We do not want to have these on the stack.)
283 DECLARE_ALIGNED(32, float, buf_mdct
)[1024];
287 * @name Computed / set up during initialization
291 FFTContext mdct_small
;
294 IMDCT15Context
*mdct480
;
295 AVFloatDSPContext fdsp
;
300 * @name Members used for output
303 SingleChannelElement
*output_element
[MAX_CHANNELS
]; ///< Points to each SingleChannelElement
306 DECLARE_ALIGNED(32, float, temp
)[128];
308 OutputConfiguration oc
[2];
311 #endif /* AVCODEC_AAC_H */