avformat/mpeg: demux ivtv captions
[ffmpeg.git] / libavcodec / aacenc.h
blobae15f91e069f36ef32d6a4da92fa247527aa6618
1 /*
2 * AAC encoder
3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #ifndef AVCODEC_AACENC_H
23 #define AVCODEC_AACENC_H
25 #include <stdint.h>
27 #include "libavutil/channel_layout.h"
28 #include "libavutil/float_dsp.h"
29 #include "libavutil/mem_internal.h"
30 #include "libavutil/tx.h"
32 #include "avcodec.h"
33 #include "put_bits.h"
35 #include "aac.h"
36 #include "aacencdsp.h"
37 #include "audio_frame_queue.h"
38 #include "psymodel.h"
40 #include "lpc.h"
42 #define CLIP_AVOIDANCE_FACTOR 0.95f
44 typedef enum AACCoder {
45 AAC_CODER_ANMR = 0,
46 AAC_CODER_TWOLOOP,
47 AAC_CODER_FAST,
49 AAC_CODER_NB,
50 }AACCoder;
52 /**
53 * Predictor State
55 typedef struct PredictorState {
56 float cor0;
57 float cor1;
58 float var0;
59 float var1;
60 float r0;
61 float r1;
62 float k1;
63 float x_est;
64 } PredictorState;
66 typedef struct AACEncOptions {
67 int coder;
68 int pns;
69 int tns;
70 int ltp;
71 int pce;
72 int pred;
73 int mid_side;
74 int intensity_stereo;
75 } AACEncOptions;
77 /**
78 * Long Term Prediction
80 typedef struct LongTermPrediction {
81 int8_t present;
82 int16_t lag;
83 int coef_idx;
84 float coef;
85 int8_t used[MAX_LTP_LONG_SFB];
86 } LongTermPrediction;
88 /**
89 * Individual Channel Stream
91 typedef struct IndividualChannelStream {
92 uint8_t max_sfb; ///< number of scalefactor bands per group
93 enum WindowSequence window_sequence[2];
94 uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sine window.
95 uint8_t group_len[8];
96 LongTermPrediction ltp;
97 const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window
98 const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window
99 int num_swb; ///< number of scalefactor window bands
100 int num_windows;
101 int tns_max_bands;
102 int predictor_present;
103 int predictor_initialized;
104 int predictor_reset_group;
105 int predictor_reset_count[31]; ///< used to count prediction resets
106 uint8_t prediction_used[41];
107 uint8_t window_clipping[8]; ///< set if a certain window is near clipping
108 float clip_avoidance_factor; ///< set if any window is near clipping to the necessary atennuation factor to avoid it
109 } IndividualChannelStream;
112 * Temporal Noise Shaping
114 typedef struct TemporalNoiseShaping {
115 int present;
116 int n_filt[8];
117 int length[8][4];
118 int direction[8][4];
119 int order[8][4];
120 int coef_idx[8][4][TNS_MAX_ORDER];
121 float coef[8][4][TNS_MAX_ORDER];
122 } TemporalNoiseShaping;
125 * Single Channel Element - used for both SCE and LFE elements.
127 typedef struct SingleChannelElement {
128 IndividualChannelStream ics;
129 TemporalNoiseShaping tns;
130 Pulse pulse;
131 enum BandType band_type[128]; ///< band types
132 enum BandType band_alt[128]; ///< alternative band type
133 int sf_idx[128]; ///< scalefactor indices
134 uint8_t zeroes[128]; ///< band is not coded
135 uint8_t can_pns[128]; ///< band is allowed to PNS (informative)
136 float is_ener[128]; ///< Intensity stereo pos
137 float pns_ener[128]; ///< Noise energy values
138 DECLARE_ALIGNED(32, float, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine
139 DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT, maybe processed
140 DECLARE_ALIGNED(32, float, ret_buf)[2048]; ///< PCM output buffer
141 DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP
142 DECLARE_ALIGNED(32, float, lcoeffs)[1024]; ///< MDCT of LTP coefficients
143 DECLARE_ALIGNED(32, float, prcoeffs)[1024]; ///< Main prediction coefs
144 PredictorState predictor_state[MAX_PREDICTORS];
145 } SingleChannelElement;
148 * channel element - generic struct for SCE/CPE/CCE/LFE
150 typedef struct ChannelElement {
151 // CPE specific
152 int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream.
153 int ms_mode; ///< Signals mid/side stereo flags coding mode
154 uint8_t is_mode; ///< Set if any bands have been encoded using intensity stereo
155 uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band
156 uint8_t is_mask[128]; ///< Set if intensity stereo is used
157 // shared
158 SingleChannelElement ch[2];
159 } ChannelElement;
161 struct AACEncContext;
163 typedef struct AACCoefficientsEncoder {
164 void (*search_for_quantizers)(AVCodecContext *avctx, struct AACEncContext *s,
165 SingleChannelElement *sce, const float lambda);
166 void (*encode_window_bands_info)(struct AACEncContext *s, SingleChannelElement *sce,
167 int win, int group_len, const float lambda);
168 void (*quantize_and_encode_band)(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size,
169 int scale_idx, int cb, const float lambda, int rtz);
170 void (*encode_tns_info)(struct AACEncContext *s, SingleChannelElement *sce);
171 void (*encode_ltp_info)(struct AACEncContext *s, SingleChannelElement *sce, int common_window);
172 void (*encode_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
173 void (*adjust_common_pred)(struct AACEncContext *s, ChannelElement *cpe);
174 void (*adjust_common_ltp)(struct AACEncContext *s, ChannelElement *cpe);
175 void (*apply_main_pred)(struct AACEncContext *s, SingleChannelElement *sce);
176 void (*apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce);
177 void (*update_ltp)(struct AACEncContext *s, SingleChannelElement *sce);
178 void (*ltp_insert_new_frame)(struct AACEncContext *s);
179 void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce);
180 void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
181 void (*mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce);
182 void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce);
183 void (*search_for_ltp)(struct AACEncContext *s, SingleChannelElement *sce, int common_window);
184 void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe);
185 void (*search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe);
186 void (*search_for_pred)(struct AACEncContext *s, SingleChannelElement *sce);
187 } AACCoefficientsEncoder;
189 extern const AACCoefficientsEncoder ff_aac_coders[];
191 typedef struct AACQuantizeBandCostCacheEntry {
192 float rd;
193 float energy;
194 int bits;
195 char cb;
196 char rtz;
197 uint16_t generation;
198 } AACQuantizeBandCostCacheEntry;
200 typedef struct AACPCEInfo {
201 AVChannelLayout layout;
202 int num_ele[4]; ///< front, side, back, lfe
203 int pairing[3][8]; ///< front, side, back
204 int index[4][8]; ///< front, side, back, lfe
205 uint8_t config_map[16]; ///< configs the encoder's channel specific settings
206 uint8_t reorder_map[16]; ///< maps channels from lavc to aac order
207 } AACPCEInfo;
210 * AAC encoder context
212 typedef struct AACEncContext {
213 AVClass *av_class;
214 AACEncOptions options; ///< encoding options
215 PutBitContext pb;
216 AVTXContext *mdct1024; ///< long (1024 samples) frame transform context
217 av_tx_fn mdct1024_fn;
218 AVTXContext *mdct128; ///< short (128 samples) frame transform context
219 av_tx_fn mdct128_fn;
220 AVFloatDSPContext *fdsp;
221 AACPCEInfo pce; ///< PCE data, if needed
222 float *planar_samples[16]; ///< saved preprocessed input
224 int profile; ///< copied from avctx
225 int needs_pce; ///< flag for non-standard layout
226 LPCContext lpc; ///< used by TNS
227 int samplerate_index; ///< MPEG-4 samplerate index
228 int channels; ///< channel count
229 const uint8_t *reorder_map; ///< lavc to aac reorder map
230 const uint8_t *chan_map; ///< channel configuration map
232 ChannelElement *cpe; ///< channel elements
233 FFPsyContext psy;
234 struct FFPsyPreprocessContext* psypp;
235 const AACCoefficientsEncoder *coder;
236 int cur_channel; ///< current channel for coder context
237 int random_state;
238 float lambda;
239 int last_frame_pb_count; ///< number of bits for the previous frame
240 float lambda_sum; ///< sum(lambda), for Qvg reporting
241 int lambda_count; ///< count(lambda), for Qvg reporting
242 enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to
244 AudioFrameQueue afq;
245 DECLARE_ALIGNED(32, int, qcoefs)[96]; ///< quantized coefficients
246 DECLARE_ALIGNED(32, float, scoefs)[1024]; ///< scaled coefficients
248 uint16_t quantize_band_cost_cache_generation;
249 AACQuantizeBandCostCacheEntry quantize_band_cost_cache[256][128]; ///< memoization area for quantize_band_cost
251 AACEncDSPContext aacdsp;
253 struct {
254 float *samples;
255 } buffer;
256 } AACEncContext;
258 void ff_quantize_band_cost_cache_init(struct AACEncContext *s);
261 #endif /* AVCODEC_AACENC_H */