3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 /***********************************
29 * add sane pulse detection
30 ***********************************/
33 #include "libavutil/channel_layout.h"
34 #include "libavutil/libm.h"
35 #include "libavutil/float_dsp.h"
36 #include "libavutil/mem.h"
37 #include "libavutil/opt.h"
39 #include "codec_internal.h"
42 #include "mpeg4audio.h"
50 #include "aacenctab.h"
51 #include "aacenc_utils.h"
56 * List of PCE (Program Configuration Element) for the channel layouts listed
59 * For those wishing in the future to add other layouts:
61 * - num_ele: number of elements in each group of front, side, back, lfe channels
62 * (an element is of type SCE (single channel), CPE (channel pair) for
63 * the first 3 groups; and is LFE for LFE group).
65 * - pairing: 0 for an SCE element or 1 for a CPE; does not apply to LFE group
67 * - index: there are three independent indices for SCE, CPE and LFE;
68 * they are incremented irrespective of the group to which the element belongs;
69 * they are not reset when going from one group to another
71 * Example: for 7.0 channel layout,
72 * .pairing = { { 1, 0 }, { 1 }, { 1 }, }, (3 CPE and 1 SCE in front group)
73 * .index = { { 0, 0 }, { 1 }, { 2 }, },
74 * (index is 0 for the single SCE but goes from 0 to 2 for the CPEs)
76 * The index order impacts the channel ordering. But is otherwise arbitrary
77 * (the sequence could have been 2, 0, 1 instead of 0, 1, 2).
79 * Spec allows for discontinuous indices, e.g. if one has a total of two SCE,
80 * SCE.0 SCE.15 is OK per spec; BUT it won't be decoded by our AAC decoder
81 * which at this time requires that indices fully cover some range starting
82 * from 0 (SCE.1 SCE.0 is OK but not SCE.0 SCE.15).
84 * - config_map: total number of elements and their types. Beware, the way the
85 * types are ordered impacts the final channel ordering.
87 * - reorder_map: reorders the channels.
90 static const AACPCEInfo aac_pce_configs
[] = {
92 .layout
= AV_CHANNEL_LAYOUT_MONO
,
93 .num_ele
= { 1, 0, 0, 0 },
94 .pairing
= { { 0 }, },
96 .config_map
= { 1, TYPE_SCE
, },
100 .layout
= AV_CHANNEL_LAYOUT_STEREO
,
101 .num_ele
= { 1, 0, 0, 0 },
102 .pairing
= { { 1 }, },
104 .config_map
= { 1, TYPE_CPE
, },
105 .reorder_map
= { 0, 1 },
108 .layout
= AV_CHANNEL_LAYOUT_2POINT1
,
109 .num_ele
= { 1, 0, 0, 1 },
110 .pairing
= { { 1 }, },
111 .index
= { { 0 },{ 0 },{ 0 },{ 0 } },
112 .config_map
= { 2, TYPE_CPE
, TYPE_LFE
},
113 .reorder_map
= { 0, 1, 2 },
116 .layout
= AV_CHANNEL_LAYOUT_2_1
,
117 .num_ele
= { 1, 0, 1, 0 },
118 .pairing
= { { 1 },{ 0 },{ 0 } },
119 .index
= { { 0 },{ 0 },{ 0 }, },
120 .config_map
= { 2, TYPE_CPE
, TYPE_SCE
},
121 .reorder_map
= { 0, 1, 2 },
124 .layout
= AV_CHANNEL_LAYOUT_SURROUND
,
125 .num_ele
= { 2, 0, 0, 0 },
126 .pairing
= { { 1, 0 }, },
127 .index
= { { 0, 0 }, },
128 .config_map
= { 2, TYPE_CPE
, TYPE_SCE
, },
129 .reorder_map
= { 0, 1, 2 },
132 .layout
= AV_CHANNEL_LAYOUT_3POINT1
,
133 .num_ele
= { 2, 0, 0, 1 },
134 .pairing
= { { 1, 0 }, },
135 .index
= { { 0, 0 }, { 0 }, { 0 }, { 0 }, },
136 .config_map
= { 3, TYPE_CPE
, TYPE_SCE
, TYPE_LFE
},
137 .reorder_map
= { 0, 1, 2, 3 },
140 .layout
= AV_CHANNEL_LAYOUT_4POINT0
,
141 .num_ele
= { 2, 0, 1, 0 },
142 .pairing
= { { 1, 0 }, { 0 }, { 0 }, },
143 .index
= { { 0, 0 }, { 0 }, { 1 } },
144 .config_map
= { 3, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
},
145 .reorder_map
= { 0, 1, 2, 3 },
148 .layout
= AV_CHANNEL_LAYOUT_4POINT1
,
149 .num_ele
= { 2, 1, 1, 0 },
150 .pairing
= { { 1, 0 }, { 0 }, { 0 }, },
151 .index
= { { 0, 0 }, { 1 }, { 2 }, { 0 } },
152 .config_map
= { 4, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_SCE
},
153 .reorder_map
= { 0, 1, 2, 3, 4 },
156 .layout
= AV_CHANNEL_LAYOUT_2_2
,
157 .num_ele
= { 1, 1, 0, 0 },
158 .pairing
= { { 1 }, { 1 }, },
159 .index
= { { 0 }, { 1 }, },
160 .config_map
= { 2, TYPE_CPE
, TYPE_CPE
},
161 .reorder_map
= { 0, 1, 2, 3 },
164 .layout
= AV_CHANNEL_LAYOUT_QUAD
,
165 .num_ele
= { 1, 0, 1, 0 },
166 .pairing
= { { 1 }, { 0 }, { 1 }, },
167 .index
= { { 0 }, { 0 }, { 1 } },
168 .config_map
= { 2, TYPE_CPE
, TYPE_CPE
},
169 .reorder_map
= { 0, 1, 2, 3 },
172 .layout
= AV_CHANNEL_LAYOUT_5POINT0
,
173 .num_ele
= { 2, 1, 0, 0 },
174 .pairing
= { { 1, 0 }, { 1 }, },
175 .index
= { { 0, 0 }, { 1 } },
176 .config_map
= { 3, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
},
177 .reorder_map
= { 0, 1, 2, 3, 4 },
180 .layout
= AV_CHANNEL_LAYOUT_5POINT1
,
181 .num_ele
= { 2, 1, 1, 0 },
182 .pairing
= { { 1, 0 }, { 0 }, { 1 }, },
183 .index
= { { 0, 0 }, { 1 }, { 1 } },
184 .config_map
= { 4, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_CPE
},
185 .reorder_map
= { 0, 1, 2, 3, 4, 5 },
188 .layout
= AV_CHANNEL_LAYOUT_5POINT0_BACK
,
189 .num_ele
= { 2, 0, 1, 0 },
190 .pairing
= { { 1, 0 }, { 0 }, { 1 } },
191 .index
= { { 0, 0 }, { 0 }, { 1 } },
192 .config_map
= { 3, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
},
193 .reorder_map
= { 0, 1, 2, 3, 4 },
196 .layout
= AV_CHANNEL_LAYOUT_5POINT1_BACK
,
197 .num_ele
= { 2, 1, 1, 0 },
198 .pairing
= { { 1, 0 }, { 0 }, { 1 }, },
199 .index
= { { 0, 0 }, { 1 }, { 1 } },
200 .config_map
= { 4, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_CPE
},
201 .reorder_map
= { 0, 1, 2, 3, 4, 5 },
204 .layout
= AV_CHANNEL_LAYOUT_6POINT0
,
205 .num_ele
= { 2, 1, 1, 0 },
206 .pairing
= { { 1, 0 }, { 1 }, { 0 }, },
207 .index
= { { 0, 0 }, { 1 }, { 1 } },
208 .config_map
= { 4, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
},
209 .reorder_map
= { 0, 1, 2, 3, 4, 5 },
212 .layout
= AV_CHANNEL_LAYOUT_6POINT0_FRONT
,
213 .num_ele
= { 2, 1, 0, 0 },
214 .pairing
= { { 1, 1 }, { 1 } },
215 .index
= { { 1, 0 }, { 2 }, },
216 .config_map
= { 3, TYPE_CPE
, TYPE_CPE
, TYPE_CPE
, },
217 .reorder_map
= { 0, 1, 2, 3, 4, 5 },
220 .layout
= AV_CHANNEL_LAYOUT_HEXAGONAL
,
221 .num_ele
= { 2, 0, 2, 0 },
222 .pairing
= { { 1, 0 },{ 0 },{ 1, 0 }, },
223 .index
= { { 0, 0 },{ 0 },{ 1, 1 } },
224 .config_map
= { 4, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
, },
225 .reorder_map
= { 0, 1, 2, 3, 4, 5 },
228 .layout
= AV_CHANNEL_LAYOUT_6POINT1
,
229 .num_ele
= { 2, 1, 2, 0 },
230 .pairing
= { { 1, 0 },{ 0 },{ 1, 0 }, },
231 .index
= { { 0, 0 },{ 1 },{ 1, 2 } },
232 .config_map
= { 5, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
},
233 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6 },
236 .layout
= AV_CHANNEL_LAYOUT_6POINT1_BACK
,
237 .num_ele
= { 2, 1, 2, 0 },
238 .pairing
= { { 1, 0 }, { 0 }, { 1, 0 }, },
239 .index
= { { 0, 0 }, { 1 }, { 1, 2 } },
240 .config_map
= { 5, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
},
241 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6 },
244 .layout
= AV_CHANNEL_LAYOUT_6POINT1_FRONT
,
245 .num_ele
= { 2, 1, 2, 0 },
246 .pairing
= { { 1, 0 }, { 0 }, { 1, 0 }, },
247 .index
= { { 0, 0 }, { 1 }, { 1, 2 } },
248 .config_map
= { 5, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
},
249 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6 },
252 .layout
= AV_CHANNEL_LAYOUT_7POINT0
,
253 .num_ele
= { 2, 1, 1, 0 },
254 .pairing
= { { 1, 0 }, { 1 }, { 1 }, },
255 .index
= { { 0, 0 }, { 1 }, { 2 }, },
256 .config_map
= { 4, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
},
257 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6 },
260 .layout
= AV_CHANNEL_LAYOUT_7POINT0_FRONT
,
261 .num_ele
= { 2, 1, 1, 0 },
262 .pairing
= { { 1, 0 }, { 1 }, { 1 }, },
263 .index
= { { 0, 0 }, { 1 }, { 2 }, },
264 .config_map
= { 4, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
},
265 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6 },
268 .layout
= AV_CHANNEL_LAYOUT_7POINT1
,
269 .num_ele
= { 2, 1, 2, 0 },
270 .pairing
= { { 1, 0 }, { 0 }, { 1, 1 }, },
271 .index
= { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
272 .config_map
= { 5, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
},
273 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6, 7 },
276 .layout
= AV_CHANNEL_LAYOUT_7POINT1_WIDE
,
277 .num_ele
= { 2, 1, 2, 0 },
278 .pairing
= { { 1, 0 }, { 0 },{ 1, 1 }, },
279 .index
= { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
280 .config_map
= { 5, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
},
281 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6, 7 },
284 .layout
= AV_CHANNEL_LAYOUT_7POINT1_WIDE_BACK
,
285 .num_ele
= { 2, 1, 2, 0 },
286 .pairing
= { { 1, 0 }, { 0 }, { 1, 1 }, },
287 .index
= { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
288 .config_map
= { 5, TYPE_CPE
, TYPE_SCE
, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
},
289 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6, 7 },
292 .layout
= AV_CHANNEL_LAYOUT_OCTAGONAL
,
293 .num_ele
= { 2, 1, 2, 0 },
294 .pairing
= { { 1, 0 }, { 1 }, { 1, 0 }, },
295 .index
= { { 0, 0 }, { 1 }, { 2, 1 } },
296 .config_map
= { 5, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
, TYPE_SCE
},
297 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6, 7 },
299 { /* Meant for order 2/mixed ambisonics */
300 .layout
= { .order
= AV_CHANNEL_ORDER_NATIVE
, .nb_channels
= 9,
301 .u
.mask
= AV_CH_LAYOUT_OCTAGONAL
| AV_CH_TOP_CENTER
},
302 .num_ele
= { 2, 2, 2, 0 },
303 .pairing
= { { 1, 0 }, { 1, 0 }, { 1, 0 }, },
304 .index
= { { 0, 0 }, { 1, 1 }, { 2, 2 } },
305 .config_map
= { 6, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
},
306 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6, 7, 8 },
308 { /* Meant for order 2/mixed ambisonics */
309 .layout
= { .order
= AV_CHANNEL_ORDER_NATIVE
, .nb_channels
= 10,
310 .u
.mask
= AV_CH_LAYOUT_6POINT0_FRONT
| AV_CH_BACK_CENTER
|
311 AV_CH_BACK_LEFT
| AV_CH_BACK_RIGHT
| AV_CH_TOP_CENTER
},
312 .num_ele
= { 2, 2, 2, 0 },
313 .pairing
= { { 1, 1 }, { 1, 0 }, { 1, 0 }, },
314 .index
= { { 0, 1 }, { 2, 0 }, { 3, 1 } },
315 .config_map
= { 6, TYPE_CPE
, TYPE_CPE
, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
},
316 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
319 .layout
= AV_CHANNEL_LAYOUT_HEXADECAGONAL
,
320 .num_ele
= { 4, 2, 4, 0 },
321 .pairing
= { { 1, 0, 1, 0 }, { 1, 1 }, { 1, 0, 1, 0 }, },
322 .index
= { { 0, 0, 1, 1 }, { 2, 3 }, { 4, 2, 5, 3 } },
323 .config_map
= { 10, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_CPE
, TYPE_CPE
, TYPE_SCE
, TYPE_CPE
, TYPE_SCE
},
324 .reorder_map
= { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
328 static void put_pce(PutBitContext
*pb
, AVCodecContext
*avctx
)
331 AACEncContext
*s
= avctx
->priv_data
;
332 AACPCEInfo
*pce
= &s
->pce
;
333 const int bitexact
= avctx
->flags
& AV_CODEC_FLAG_BITEXACT
;
334 const char *aux_data
= bitexact
? "Lavc" : LIBAVCODEC_IDENT
;
338 put_bits(pb
, 2, avctx
->profile
);
339 put_bits(pb
, 4, s
->samplerate_index
);
341 put_bits(pb
, 4, pce
->num_ele
[0]); /* Front */
342 put_bits(pb
, 4, pce
->num_ele
[1]); /* Side */
343 put_bits(pb
, 4, pce
->num_ele
[2]); /* Back */
344 put_bits(pb
, 2, pce
->num_ele
[3]); /* LFE */
345 put_bits(pb
, 3, 0); /* Assoc data */
346 put_bits(pb
, 4, 0); /* CCs */
348 put_bits(pb
, 1, 0); /* Stereo mixdown */
349 put_bits(pb
, 1, 0); /* Mono mixdown */
350 put_bits(pb
, 1, 0); /* Something else */
352 for (i
= 0; i
< 4; i
++) {
353 for (j
= 0; j
< pce
->num_ele
[i
]; j
++) {
355 put_bits(pb
, 1, pce
->pairing
[i
][j
]);
356 put_bits(pb
, 4, pce
->index
[i
][j
]);
361 put_bits(pb
, 8, strlen(aux_data
));
362 ff_put_string(pb
, aux_data
, 0);
366 * Make AAC audio config object.
367 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
369 static int put_audio_specific_config(AVCodecContext
*avctx
)
372 AACEncContext
*s
= avctx
->priv_data
;
373 int channels
= (!s
->needs_pce
)*(s
->channels
- (s
->channels
== 8 ? 1 : 0));
374 const int max_size
= 32;
376 avctx
->extradata
= av_mallocz(max_size
);
377 if (!avctx
->extradata
)
378 return AVERROR(ENOMEM
);
380 init_put_bits(&pb
, avctx
->extradata
, max_size
);
381 put_bits(&pb
, 5, s
->profile
+1); //profile
382 put_bits(&pb
, 4, s
->samplerate_index
); //sample rate index
383 put_bits(&pb
, 4, channels
);
385 put_bits(&pb
, 1, 0); //frame length - 1024 samples
386 put_bits(&pb
, 1, 0); //does not depend on core coder
387 put_bits(&pb
, 1, 0); //is not extension
391 //Explicitly Mark SBR absent
392 put_bits(&pb
, 11, 0x2b7); //sync extension
393 put_bits(&pb
, 5, AOT_SBR
);
396 avctx
->extradata_size
= put_bytes_output(&pb
);
401 void ff_quantize_band_cost_cache_init(struct AACEncContext
*s
)
403 ++s
->quantize_band_cost_cache_generation
;
404 if (s
->quantize_band_cost_cache_generation
== 0) {
405 memset(s
->quantize_band_cost_cache
, 0, sizeof(s
->quantize_band_cost_cache
));
406 s
->quantize_band_cost_cache_generation
= 1;
410 #define WINDOW_FUNC(type) \
411 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
412 SingleChannelElement *sce, \
415 WINDOW_FUNC(only_long
)
417 const float *lwindow
= sce
->ics
.use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
418 const float *pwindow
= sce
->ics
.use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
419 float *out
= sce
->ret_buf
;
421 fdsp
->vector_fmul (out
, audio
, lwindow
, 1024);
422 fdsp
->vector_fmul_reverse(out
+ 1024, audio
+ 1024, pwindow
, 1024);
425 WINDOW_FUNC(long_start
)
427 const float *lwindow
= sce
->ics
.use_kb_window
[1] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
428 const float *swindow
= sce
->ics
.use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
429 float *out
= sce
->ret_buf
;
431 fdsp
->vector_fmul(out
, audio
, lwindow
, 1024);
432 memcpy(out
+ 1024, audio
+ 1024, sizeof(out
[0]) * 448);
433 fdsp
->vector_fmul_reverse(out
+ 1024 + 448, audio
+ 1024 + 448, swindow
, 128);
434 memset(out
+ 1024 + 576, 0, sizeof(out
[0]) * 448);
437 WINDOW_FUNC(long_stop
)
439 const float *lwindow
= sce
->ics
.use_kb_window
[0] ? ff_aac_kbd_long_1024
: ff_sine_1024
;
440 const float *swindow
= sce
->ics
.use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
441 float *out
= sce
->ret_buf
;
443 memset(out
, 0, sizeof(out
[0]) * 448);
444 fdsp
->vector_fmul(out
+ 448, audio
+ 448, swindow
, 128);
445 memcpy(out
+ 576, audio
+ 576, sizeof(out
[0]) * 448);
446 fdsp
->vector_fmul_reverse(out
+ 1024, audio
+ 1024, lwindow
, 1024);
449 WINDOW_FUNC(eight_short
)
451 const float *swindow
= sce
->ics
.use_kb_window
[0] ? ff_aac_kbd_short_128
: ff_sine_128
;
452 const float *pwindow
= sce
->ics
.use_kb_window
[1] ? ff_aac_kbd_short_128
: ff_sine_128
;
453 const float *in
= audio
+ 448;
454 float *out
= sce
->ret_buf
;
457 for (w
= 0; w
< 8; w
++) {
458 fdsp
->vector_fmul (out
, in
, w
? pwindow
: swindow
, 128);
461 fdsp
->vector_fmul_reverse(out
, in
, swindow
, 128);
466 static void (*const apply_window
[4])(AVFloatDSPContext
*fdsp
,
467 SingleChannelElement
*sce
,
468 const float *audio
) = {
469 [ONLY_LONG_SEQUENCE
] = apply_only_long_window
,
470 [LONG_START_SEQUENCE
] = apply_long_start_window
,
471 [EIGHT_SHORT_SEQUENCE
] = apply_eight_short_window
,
472 [LONG_STOP_SEQUENCE
] = apply_long_stop_window
475 static void apply_window_and_mdct(AACEncContext
*s
, SingleChannelElement
*sce
,
479 float *output
= sce
->ret_buf
;
481 apply_window
[sce
->ics
.window_sequence
[0]](s
->fdsp
, sce
, audio
);
483 if (sce
->ics
.window_sequence
[0] != EIGHT_SHORT_SEQUENCE
)
484 s
->mdct1024_fn(s
->mdct1024
, sce
->coeffs
, output
, sizeof(float));
486 for (i
= 0; i
< 1024; i
+= 128)
487 s
->mdct128_fn(s
->mdct128
, &sce
->coeffs
[i
], output
+ i
*2, sizeof(float));
488 memcpy(audio
, audio
+ 1024, sizeof(audio
[0]) * 1024);
489 memcpy(sce
->pcoeffs
, sce
->coeffs
, sizeof(sce
->pcoeffs
));
493 * Encode ics_info element.
494 * @see Table 4.6 (syntax of ics_info)
496 static void put_ics_info(AACEncContext
*s
, IndividualChannelStream
*info
)
500 put_bits(&s
->pb
, 1, 0); // ics_reserved bit
501 put_bits(&s
->pb
, 2, info
->window_sequence
[0]);
502 put_bits(&s
->pb
, 1, info
->use_kb_window
[0]);
503 if (info
->window_sequence
[0] != EIGHT_SHORT_SEQUENCE
) {
504 put_bits(&s
->pb
, 6, info
->max_sfb
);
505 put_bits(&s
->pb
, 1, !!info
->predictor_present
);
507 put_bits(&s
->pb
, 4, info
->max_sfb
);
508 for (w
= 1; w
< 8; w
++)
509 put_bits(&s
->pb
, 1, !info
->group_len
[w
]);
515 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
517 static void encode_ms_info(PutBitContext
*pb
, ChannelElement
*cpe
)
521 put_bits(pb
, 2, cpe
->ms_mode
);
522 if (cpe
->ms_mode
== 1)
523 for (w
= 0; w
< cpe
->ch
[0].ics
.num_windows
; w
+= cpe
->ch
[0].ics
.group_len
[w
])
524 for (i
= 0; i
< cpe
->ch
[0].ics
.max_sfb
; i
++)
525 put_bits(pb
, 1, cpe
->ms_mask
[w
*16 + i
]);
529 * Produce integer coefficients from scalefactors provided by the model.
531 static void adjust_frame_information(ChannelElement
*cpe
, int chans
)
536 for (ch
= 0; ch
< chans
; ch
++) {
537 IndividualChannelStream
*ics
= &cpe
->ch
[ch
].ics
;
539 cpe
->ch
[ch
].pulse
.num_pulse
= 0;
540 for (w
= 0; w
< ics
->num_windows
; w
+= ics
->group_len
[w
]) {
541 for (cmaxsfb
= ics
->num_swb
; cmaxsfb
> 0 && cpe
->ch
[ch
].zeroes
[w
*16+cmaxsfb
-1]; cmaxsfb
--)
543 maxsfb
= FFMAX(maxsfb
, cmaxsfb
);
545 ics
->max_sfb
= maxsfb
;
547 //adjust zero bands for window groups
548 for (w
= 0; w
< ics
->num_windows
; w
+= ics
->group_len
[w
]) {
549 for (g
= 0; g
< ics
->max_sfb
; g
++) {
551 for (w2
= w
; w2
< w
+ ics
->group_len
[w
]; w2
++) {
552 if (!cpe
->ch
[ch
].zeroes
[w2
*16 + g
]) {
557 cpe
->ch
[ch
].zeroes
[w
*16 + g
] = i
;
562 if (chans
> 1 && cpe
->common_window
) {
563 IndividualChannelStream
*ics0
= &cpe
->ch
[0].ics
;
564 IndividualChannelStream
*ics1
= &cpe
->ch
[1].ics
;
566 ics0
->max_sfb
= FFMAX(ics0
->max_sfb
, ics1
->max_sfb
);
567 ics1
->max_sfb
= ics0
->max_sfb
;
568 for (w
= 0; w
< ics0
->num_windows
*16; w
+= 16)
569 for (i
= 0; i
< ics0
->max_sfb
; i
++)
570 if (cpe
->ms_mask
[w
+i
])
572 if (msc
== 0 || ics0
->max_sfb
== 0)
575 cpe
->ms_mode
= msc
< ics0
->max_sfb
* ics0
->num_windows
? 1 : 2;
579 static void apply_intensity_stereo(ChannelElement
*cpe
)
582 IndividualChannelStream
*ics
= &cpe
->ch
[0].ics
;
583 if (!cpe
->common_window
)
585 for (w
= 0; w
< ics
->num_windows
; w
+= ics
->group_len
[w
]) {
586 for (w2
= 0; w2
< ics
->group_len
[w
]; w2
++) {
587 int start
= (w
+w2
) * 128;
588 for (g
= 0; g
< ics
->num_swb
; g
++) {
589 int p
= -1 + 2 * (cpe
->ch
[1].band_type
[w
*16+g
] - 14);
590 float scale
= cpe
->ch
[0].is_ener
[w
*16+g
];
591 if (!cpe
->is_mask
[w
*16 + g
]) {
592 start
+= ics
->swb_sizes
[g
];
595 if (cpe
->ms_mask
[w
*16 + g
])
597 for (i
= 0; i
< ics
->swb_sizes
[g
]; i
++) {
598 float sum
= (cpe
->ch
[0].coeffs
[start
+i
] + p
*cpe
->ch
[1].coeffs
[start
+i
])*scale
;
599 cpe
->ch
[0].coeffs
[start
+i
] = sum
;
600 cpe
->ch
[1].coeffs
[start
+i
] = 0.0f
;
602 start
+= ics
->swb_sizes
[g
];
608 static void apply_mid_side_stereo(ChannelElement
*cpe
)
611 IndividualChannelStream
*ics
= &cpe
->ch
[0].ics
;
612 if (!cpe
->common_window
)
614 for (w
= 0; w
< ics
->num_windows
; w
+= ics
->group_len
[w
]) {
615 for (w2
= 0; w2
< ics
->group_len
[w
]; w2
++) {
616 int start
= (w
+w2
) * 128;
617 for (g
= 0; g
< ics
->num_swb
; g
++) {
618 /* ms_mask can be used for other purposes in PNS and I/S,
619 * so must not apply M/S if any band uses either, even if
622 if (!cpe
->ms_mask
[w
*16 + g
] || cpe
->is_mask
[w
*16 + g
]
623 || cpe
->ch
[0].band_type
[w
*16 + g
] >= NOISE_BT
624 || cpe
->ch
[1].band_type
[w
*16 + g
] >= NOISE_BT
) {
625 start
+= ics
->swb_sizes
[g
];
628 for (i
= 0; i
< ics
->swb_sizes
[g
]; i
++) {
629 float L
= (cpe
->ch
[0].coeffs
[start
+i
] + cpe
->ch
[1].coeffs
[start
+i
]) * 0.5f
;
630 float R
= L
- cpe
->ch
[1].coeffs
[start
+i
];
631 cpe
->ch
[0].coeffs
[start
+i
] = L
;
632 cpe
->ch
[1].coeffs
[start
+i
] = R
;
634 start
+= ics
->swb_sizes
[g
];
641 * Encode scalefactor band coding type.
643 static void encode_band_info(AACEncContext
*s
, SingleChannelElement
*sce
)
647 if (s
->coder
->set_special_band_scalefactors
)
648 s
->coder
->set_special_band_scalefactors(s
, sce
);
650 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
])
651 s
->coder
->encode_window_bands_info(s
, sce
, w
, sce
->ics
.group_len
[w
], s
->lambda
);
655 * Encode scalefactors.
657 static void encode_scale_factors(AVCodecContext
*avctx
, AACEncContext
*s
,
658 SingleChannelElement
*sce
)
660 int diff
, off_sf
= sce
->sf_idx
[0], off_pns
= sce
->sf_idx
[0] - NOISE_OFFSET
;
661 int off_is
= 0, noise_flag
= 1;
664 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
665 for (i
= 0; i
< sce
->ics
.max_sfb
; i
++) {
666 if (!sce
->zeroes
[w
*16 + i
]) {
667 if (sce
->band_type
[w
*16 + i
] == NOISE_BT
) {
668 diff
= sce
->sf_idx
[w
*16 + i
] - off_pns
;
669 off_pns
= sce
->sf_idx
[w
*16 + i
];
670 if (noise_flag
-- > 0) {
671 put_bits(&s
->pb
, NOISE_PRE_BITS
, diff
+ NOISE_PRE
);
674 } else if (sce
->band_type
[w
*16 + i
] == INTENSITY_BT
||
675 sce
->band_type
[w
*16 + i
] == INTENSITY_BT2
) {
676 diff
= sce
->sf_idx
[w
*16 + i
] - off_is
;
677 off_is
= sce
->sf_idx
[w
*16 + i
];
679 diff
= sce
->sf_idx
[w
*16 + i
] - off_sf
;
680 off_sf
= sce
->sf_idx
[w
*16 + i
];
682 diff
+= SCALE_DIFF_ZERO
;
683 av_assert0(diff
>= 0 && diff
<= 120);
684 put_bits(&s
->pb
, ff_aac_scalefactor_bits
[diff
], ff_aac_scalefactor_code
[diff
]);
693 static void encode_pulses(AACEncContext
*s
, Pulse
*pulse
)
697 put_bits(&s
->pb
, 1, !!pulse
->num_pulse
);
698 if (!pulse
->num_pulse
)
701 put_bits(&s
->pb
, 2, pulse
->num_pulse
- 1);
702 put_bits(&s
->pb
, 6, pulse
->start
);
703 for (i
= 0; i
< pulse
->num_pulse
; i
++) {
704 put_bits(&s
->pb
, 5, pulse
->pos
[i
]);
705 put_bits(&s
->pb
, 4, pulse
->amp
[i
]);
710 * Encode spectral coefficients processed by psychoacoustic model.
712 static void encode_spectral_coeffs(AACEncContext
*s
, SingleChannelElement
*sce
)
716 for (w
= 0; w
< sce
->ics
.num_windows
; w
+= sce
->ics
.group_len
[w
]) {
718 for (i
= 0; i
< sce
->ics
.max_sfb
; i
++) {
719 if (sce
->zeroes
[w
*16 + i
]) {
720 start
+= sce
->ics
.swb_sizes
[i
];
723 for (w2
= w
; w2
< w
+ sce
->ics
.group_len
[w
]; w2
++) {
724 s
->coder
->quantize_and_encode_band(s
, &s
->pb
,
725 &sce
->coeffs
[start
+ w2
*128],
726 NULL
, sce
->ics
.swb_sizes
[i
],
727 sce
->sf_idx
[w
*16 + i
],
728 sce
->band_type
[w
*16 + i
],
730 sce
->ics
.window_clipping
[w
]);
732 start
+= sce
->ics
.swb_sizes
[i
];
738 * Downscale spectral coefficients for near-clipping windows to avoid artifacts
740 static void avoid_clipping(AACEncContext
*s
, SingleChannelElement
*sce
)
744 if (sce
->ics
.clip_avoidance_factor
< 1.0f
) {
745 for (w
= 0; w
< sce
->ics
.num_windows
; w
++) {
747 for (i
= 0; i
< sce
->ics
.max_sfb
; i
++) {
748 float *swb_coeffs
= &sce
->coeffs
[start
+ w
*128];
749 for (j
= 0; j
< sce
->ics
.swb_sizes
[i
]; j
++)
750 swb_coeffs
[j
] *= sce
->ics
.clip_avoidance_factor
;
751 start
+= sce
->ics
.swb_sizes
[i
];
758 * Encode one channel of audio data.
760 static int encode_individual_channel(AVCodecContext
*avctx
, AACEncContext
*s
,
761 SingleChannelElement
*sce
,
764 put_bits(&s
->pb
, 8, sce
->sf_idx
[0]);
765 if (!common_window
) {
766 put_ics_info(s
, &sce
->ics
);
767 if (s
->coder
->encode_main_pred
)
768 s
->coder
->encode_main_pred(s
, sce
);
769 if (s
->coder
->encode_ltp_info
)
770 s
->coder
->encode_ltp_info(s
, sce
, 0);
772 encode_band_info(s
, sce
);
773 encode_scale_factors(avctx
, s
, sce
);
774 encode_pulses(s
, &sce
->pulse
);
775 put_bits(&s
->pb
, 1, !!sce
->tns
.present
);
776 if (s
->coder
->encode_tns_info
)
777 s
->coder
->encode_tns_info(s
, sce
);
778 put_bits(&s
->pb
, 1, 0); //ssr
779 encode_spectral_coeffs(s
, sce
);
784 * Write some auxiliary information about the created AAC file.
786 static void put_bitstream_info(AACEncContext
*s
, const char *name
)
788 int i
, namelen
, padbits
;
790 namelen
= strlen(name
) + 2;
791 put_bits(&s
->pb
, 3, TYPE_FIL
);
792 put_bits(&s
->pb
, 4, FFMIN(namelen
, 15));
794 put_bits(&s
->pb
, 8, namelen
- 14);
795 put_bits(&s
->pb
, 4, 0); //extension type - filler
796 padbits
= -put_bits_count(&s
->pb
) & 7;
797 align_put_bits(&s
->pb
);
798 for (i
= 0; i
< namelen
- 2; i
++)
799 put_bits(&s
->pb
, 8, name
[i
]);
800 put_bits(&s
->pb
, 12 - padbits
, 0);
804 * Copy input samples.
805 * Channels are reordered from libavcodec's default order to AAC order.
807 static void copy_input_samples(AACEncContext
*s
, const AVFrame
*frame
)
810 int end
= 2048 + (frame
? frame
->nb_samples
: 0);
811 const uint8_t *channel_map
= s
->reorder_map
;
813 /* copy and remap input samples */
814 for (ch
= 0; ch
< s
->channels
; ch
++) {
815 /* copy last 1024 samples of previous frame to the start of the current frame */
816 memcpy(&s
->planar_samples
[ch
][1024], &s
->planar_samples
[ch
][2048], 1024 * sizeof(s
->planar_samples
[0][0]));
818 /* copy new samples and zero any remaining samples */
820 memcpy(&s
->planar_samples
[ch
][2048],
821 frame
->extended_data
[channel_map
[ch
]],
822 frame
->nb_samples
* sizeof(s
->planar_samples
[0][0]));
824 memset(&s
->planar_samples
[ch
][end
], 0,
825 (3072 - end
) * sizeof(s
->planar_samples
[0][0]));
829 static int aac_encode_frame(AVCodecContext
*avctx
, AVPacket
*avpkt
,
830 const AVFrame
*frame
, int *got_packet_ptr
)
832 AACEncContext
*s
= avctx
->priv_data
;
833 float **samples
= s
->planar_samples
, *samples2
, *la
, *overlap
;
835 SingleChannelElement
*sce
;
836 IndividualChannelStream
*ics
;
837 int i
, its
, ch
, w
, chans
, tag
, start_ch
, ret
, frame_bits
;
838 int target_bits
, rate_bits
, too_many_bits
, too_few_bits
;
839 int ms_mode
= 0, is_mode
= 0, tns_mode
= 0, pred_mode
= 0;
840 int chan_el_counter
[4];
841 FFPsyWindowInfo windows
[AAC_MAX_CHANNELS
];
843 /* add current frame to queue */
845 if ((ret
= ff_af_queue_add(&s
->afq
, frame
)) < 0)
848 if (!s
->afq
.remaining_samples
|| (!s
->afq
.frame_alloc
&& !s
->afq
.frame_count
))
852 copy_input_samples(s
, frame
);
854 ff_psy_preprocess(s
->psypp
, s
->planar_samples
, s
->channels
);
856 if (!avctx
->frame_num
)
860 for (i
= 0; i
< s
->chan_map
[0]; i
++) {
861 FFPsyWindowInfo
* wi
= windows
+ start_ch
;
862 tag
= s
->chan_map
[i
+1];
863 chans
= tag
== TYPE_CPE
? 2 : 1;
865 for (ch
= 0; ch
< chans
; ch
++) {
867 float clip_avoidance_factor
;
870 s
->cur_channel
= start_ch
+ ch
;
871 overlap
= &samples
[s
->cur_channel
][0];
872 samples2
= overlap
+ 1024;
873 la
= samples2
+ (448+64);
876 if (tag
== TYPE_LFE
) {
877 wi
[ch
].window_type
[0] = wi
[ch
].window_type
[1] = ONLY_LONG_SEQUENCE
;
878 wi
[ch
].window_shape
= 0;
879 wi
[ch
].num_windows
= 1;
880 wi
[ch
].grouping
[0] = 1;
881 wi
[ch
].clipping
[0] = 0;
883 /* Only the lowest 12 coefficients are used in a LFE channel.
884 * The expression below results in only the bottom 8 coefficients
885 * being used for 11.025kHz to 16kHz sample rates.
887 ics
->num_swb
= s
->samplerate_index
>= 8 ? 1 : 3;
889 wi
[ch
] = s
->psy
.model
->window(&s
->psy
, samples2
, la
, s
->cur_channel
,
890 ics
->window_sequence
[0]);
892 ics
->window_sequence
[1] = ics
->window_sequence
[0];
893 ics
->window_sequence
[0] = wi
[ch
].window_type
[0];
894 ics
->use_kb_window
[1] = ics
->use_kb_window
[0];
895 ics
->use_kb_window
[0] = wi
[ch
].window_shape
;
896 ics
->num_windows
= wi
[ch
].num_windows
;
897 ics
->swb_sizes
= s
->psy
.bands
[ics
->num_windows
== 8];
898 ics
->num_swb
= tag
== TYPE_LFE
? ics
->num_swb
: s
->psy
.num_bands
[ics
->num_windows
== 8];
899 ics
->max_sfb
= FFMIN(ics
->max_sfb
, ics
->num_swb
);
900 ics
->swb_offset
= wi
[ch
].window_type
[0] == EIGHT_SHORT_SEQUENCE
?
901 ff_swb_offset_128
[s
->samplerate_index
]:
902 ff_swb_offset_1024
[s
->samplerate_index
];
903 ics
->tns_max_bands
= wi
[ch
].window_type
[0] == EIGHT_SHORT_SEQUENCE
?
904 ff_tns_max_bands_128
[s
->samplerate_index
]:
905 ff_tns_max_bands_1024
[s
->samplerate_index
];
907 for (w
= 0; w
< ics
->num_windows
; w
++)
908 ics
->group_len
[w
] = wi
[ch
].grouping
[w
];
910 /* Calculate input sample maximums and evaluate clipping risk */
911 clip_avoidance_factor
= 0.0f
;
912 for (w
= 0; w
< ics
->num_windows
; w
++) {
913 const float *wbuf
= overlap
+ w
* 128;
914 const int wlen
= 2048 / ics
->num_windows
;
917 /* mdct input is 2 * output */
918 for (j
= 0; j
< wlen
; j
++)
919 max
= FFMAX(max
, fabsf(wbuf
[j
]));
920 wi
[ch
].clipping
[w
] = max
;
922 for (w
= 0; w
< ics
->num_windows
; w
++) {
923 if (wi
[ch
].clipping
[w
] > CLIP_AVOIDANCE_FACTOR
) {
924 ics
->window_clipping
[w
] = 1;
925 clip_avoidance_factor
= FFMAX(clip_avoidance_factor
, wi
[ch
].clipping
[w
]);
927 ics
->window_clipping
[w
] = 0;
930 if (clip_avoidance_factor
> CLIP_AVOIDANCE_FACTOR
) {
931 ics
->clip_avoidance_factor
= CLIP_AVOIDANCE_FACTOR
/ clip_avoidance_factor
;
933 ics
->clip_avoidance_factor
= 1.0f
;
936 apply_window_and_mdct(s
, sce
, overlap
);
938 if (s
->options
.ltp
&& s
->coder
->update_ltp
) {
939 s
->coder
->update_ltp(s
, sce
);
940 apply_window
[sce
->ics
.window_sequence
[0]](s
->fdsp
, sce
, &sce
->ltp_state
[0]);
941 s
->mdct1024_fn(s
->mdct1024
, sce
->lcoeffs
, sce
->ret_buf
, sizeof(float));
944 for (k
= 0; k
< 1024; k
++) {
945 if (!(fabs(cpe
->ch
[ch
].coeffs
[k
]) < 1E16
)) { // Ensure headroom for energy calculation
946 av_log(avctx
, AV_LOG_ERROR
, "Input contains (near) NaN/+-Inf\n");
947 return AVERROR(EINVAL
);
950 avoid_clipping(s
, sce
);
954 if ((ret
= ff_alloc_packet(avctx
, avpkt
, 8192 * s
->channels
)) < 0)
956 frame_bits
= its
= 0;
958 init_put_bits(&s
->pb
, avpkt
->data
, avpkt
->size
);
960 if ((avctx
->frame_num
& 0xFF)==1 && !(avctx
->flags
& AV_CODEC_FLAG_BITEXACT
))
961 put_bitstream_info(s
, LIBAVCODEC_IDENT
);
964 memset(chan_el_counter
, 0, sizeof(chan_el_counter
));
965 for (i
= 0; i
< s
->chan_map
[0]; i
++) {
966 FFPsyWindowInfo
* wi
= windows
+ start_ch
;
967 const float *coeffs
[2];
968 tag
= s
->chan_map
[i
+1];
969 chans
= tag
== TYPE_CPE
? 2 : 1;
971 cpe
->common_window
= 0;
972 memset(cpe
->is_mask
, 0, sizeof(cpe
->is_mask
));
973 memset(cpe
->ms_mask
, 0, sizeof(cpe
->ms_mask
));
974 put_bits(&s
->pb
, 3, tag
);
975 put_bits(&s
->pb
, 4, chan_el_counter
[tag
]++);
976 for (ch
= 0; ch
< chans
; ch
++) {
978 coeffs
[ch
] = sce
->coeffs
;
979 sce
->ics
.predictor_present
= 0;
980 sce
->ics
.ltp
.present
= 0;
981 memset(sce
->ics
.ltp
.used
, 0, sizeof(sce
->ics
.ltp
.used
));
982 memset(sce
->ics
.prediction_used
, 0, sizeof(sce
->ics
.prediction_used
));
983 memset(&sce
->tns
, 0, sizeof(TemporalNoiseShaping
));
984 for (w
= 0; w
< 128; w
++)
985 if (sce
->band_type
[w
] > RESERVED_BT
)
986 sce
->band_type
[w
] = 0;
988 s
->psy
.bitres
.alloc
= -1;
989 s
->psy
.bitres
.bits
= s
->last_frame_pb_count
/ s
->channels
;
990 s
->psy
.model
->analyze(&s
->psy
, start_ch
, coeffs
, wi
);
991 if (s
->psy
.bitres
.alloc
> 0) {
992 /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
993 target_bits
+= s
->psy
.bitres
.alloc
994 * (s
->lambda
/ (avctx
->global_quality
? avctx
->global_quality
: 120));
995 s
->psy
.bitres
.alloc
/= chans
;
998 for (ch
= 0; ch
< chans
; ch
++) {
999 s
->cur_channel
= start_ch
+ ch
;
1000 if (s
->options
.pns
&& s
->coder
->mark_pns
)
1001 s
->coder
->mark_pns(s
, avctx
, &cpe
->ch
[ch
]);
1002 s
->coder
->search_for_quantizers(avctx
, s
, &cpe
->ch
[ch
], s
->lambda
);
1005 && wi
[0].window_type
[0] == wi
[1].window_type
[0]
1006 && wi
[0].window_shape
== wi
[1].window_shape
) {
1008 cpe
->common_window
= 1;
1009 for (w
= 0; w
< wi
[0].num_windows
; w
++) {
1010 if (wi
[0].grouping
[w
] != wi
[1].grouping
[w
]) {
1011 cpe
->common_window
= 0;
1016 for (ch
= 0; ch
< chans
; ch
++) { /* TNS and PNS */
1018 s
->cur_channel
= start_ch
+ ch
;
1019 if (s
->options
.tns
&& s
->coder
->search_for_tns
)
1020 s
->coder
->search_for_tns(s
, sce
);
1021 if (s
->options
.tns
&& s
->coder
->apply_tns_filt
)
1022 s
->coder
->apply_tns_filt(s
, sce
);
1023 if (sce
->tns
.present
)
1025 if (s
->options
.pns
&& s
->coder
->search_for_pns
)
1026 s
->coder
->search_for_pns(s
, avctx
, sce
);
1028 s
->cur_channel
= start_ch
;
1029 if (s
->options
.intensity_stereo
) { /* Intensity Stereo */
1030 if (s
->coder
->search_for_is
)
1031 s
->coder
->search_for_is(s
, avctx
, cpe
);
1032 if (cpe
->is_mode
) is_mode
= 1;
1033 apply_intensity_stereo(cpe
);
1035 if (s
->options
.pred
) { /* Prediction */
1036 for (ch
= 0; ch
< chans
; ch
++) {
1038 s
->cur_channel
= start_ch
+ ch
;
1039 if (s
->options
.pred
&& s
->coder
->search_for_pred
)
1040 s
->coder
->search_for_pred(s
, sce
);
1041 if (cpe
->ch
[ch
].ics
.predictor_present
) pred_mode
= 1;
1043 if (s
->coder
->adjust_common_pred
)
1044 s
->coder
->adjust_common_pred(s
, cpe
);
1045 for (ch
= 0; ch
< chans
; ch
++) {
1047 s
->cur_channel
= start_ch
+ ch
;
1048 if (s
->options
.pred
&& s
->coder
->apply_main_pred
)
1049 s
->coder
->apply_main_pred(s
, sce
);
1051 s
->cur_channel
= start_ch
;
1053 if (s
->options
.mid_side
) { /* Mid/Side stereo */
1054 if (s
->options
.mid_side
== -1 && s
->coder
->search_for_ms
)
1055 s
->coder
->search_for_ms(s
, cpe
);
1056 else if (cpe
->common_window
)
1057 memset(cpe
->ms_mask
, 1, sizeof(cpe
->ms_mask
));
1058 apply_mid_side_stereo(cpe
);
1060 adjust_frame_information(cpe
, chans
);
1061 if (s
->options
.ltp
) { /* LTP */
1062 for (ch
= 0; ch
< chans
; ch
++) {
1064 s
->cur_channel
= start_ch
+ ch
;
1065 if (s
->coder
->search_for_ltp
)
1066 s
->coder
->search_for_ltp(s
, sce
, cpe
->common_window
);
1067 if (sce
->ics
.ltp
.present
) pred_mode
= 1;
1069 s
->cur_channel
= start_ch
;
1070 if (s
->coder
->adjust_common_ltp
)
1071 s
->coder
->adjust_common_ltp(s
, cpe
);
1074 put_bits(&s
->pb
, 1, cpe
->common_window
);
1075 if (cpe
->common_window
) {
1076 put_ics_info(s
, &cpe
->ch
[0].ics
);
1077 if (s
->coder
->encode_main_pred
)
1078 s
->coder
->encode_main_pred(s
, &cpe
->ch
[0]);
1079 if (s
->coder
->encode_ltp_info
)
1080 s
->coder
->encode_ltp_info(s
, &cpe
->ch
[0], 1);
1081 encode_ms_info(&s
->pb
, cpe
);
1082 if (cpe
->ms_mode
) ms_mode
= 1;
1085 for (ch
= 0; ch
< chans
; ch
++) {
1086 s
->cur_channel
= start_ch
+ ch
;
1087 encode_individual_channel(avctx
, s
, &cpe
->ch
[ch
], cpe
->common_window
);
1092 if (avctx
->flags
& AV_CODEC_FLAG_QSCALE
) {
1093 /* When using a constant Q-scale, don't mess with lambda */
1097 /* rate control stuff
1098 * allow between the nominal bitrate, and what psy's bit reservoir says to target
1099 * but drift towards the nominal bitrate always
1101 frame_bits
= put_bits_count(&s
->pb
);
1102 rate_bits
= avctx
->bit_rate
* 1024 / avctx
->sample_rate
;
1103 rate_bits
= FFMIN(rate_bits
, 6144 * s
->channels
- 3);
1104 too_many_bits
= FFMAX(target_bits
, rate_bits
);
1105 too_many_bits
= FFMIN(too_many_bits
, 6144 * s
->channels
- 3);
1106 too_few_bits
= FFMIN(FFMAX(rate_bits
- rate_bits
/4, target_bits
), too_many_bits
);
1108 /* When strict bit-rate control is demanded */
1109 if (avctx
->bit_rate_tolerance
== 0) {
1110 if (rate_bits
< frame_bits
) {
1111 float ratio
= ((float)rate_bits
) / frame_bits
;
1112 s
->lambda
*= FFMIN(0.9f
, ratio
);
1115 /* reset lambda when solution is found */
1116 s
->lambda
= avctx
->global_quality
> 0 ? avctx
->global_quality
: 120;
1120 /* When using ABR, be strict (but only for increasing) */
1121 too_few_bits
= too_few_bits
- too_few_bits
/8;
1122 too_many_bits
= too_many_bits
+ too_many_bits
/2;
1124 if ( its
== 0 /* for steady-state Q-scale tracking */
1125 || (its
< 5 && (frame_bits
< too_few_bits
|| frame_bits
> too_many_bits
))
1126 || frame_bits
>= 6144 * s
->channels
- 3 )
1128 float ratio
= ((float)rate_bits
) / frame_bits
;
1130 if (frame_bits
>= too_few_bits
&& frame_bits
<= too_many_bits
) {
1132 * This path is for steady-state Q-scale tracking
1133 * When frame bits fall within the stable range, we still need to adjust
1134 * lambda to maintain it like so in a stable fashion (large jumps in lambda
1135 * create artifacts and should be avoided), but slowly
1137 ratio
= sqrtf(sqrtf(ratio
));
1138 ratio
= av_clipf(ratio
, 0.9f
, 1.1f
);
1140 /* Not so fast though */
1141 ratio
= sqrtf(ratio
);
1143 s
->lambda
= av_clipf(s
->lambda
* ratio
, FLT_EPSILON
, 65536.f
);
1145 /* Keep iterating if we must reduce and lambda is in the sky */
1146 if (ratio
> 0.9f
&& ratio
< 1.1f
) {
1149 if (is_mode
|| ms_mode
|| tns_mode
|| pred_mode
) {
1150 for (i
= 0; i
< s
->chan_map
[0]; i
++) {
1151 // Must restore coeffs
1152 chans
= tag
== TYPE_CPE
? 2 : 1;
1154 for (ch
= 0; ch
< chans
; ch
++)
1155 memcpy(cpe
->ch
[ch
].coeffs
, cpe
->ch
[ch
].pcoeffs
, sizeof(cpe
->ch
[ch
].coeffs
));
1165 if (s
->options
.ltp
&& s
->coder
->ltp_insert_new_frame
)
1166 s
->coder
->ltp_insert_new_frame(s
);
1168 put_bits(&s
->pb
, 3, TYPE_END
);
1169 flush_put_bits(&s
->pb
);
1171 s
->last_frame_pb_count
= put_bits_count(&s
->pb
);
1172 avpkt
->size
= put_bytes_output(&s
->pb
);
1174 s
->lambda_sum
+= s
->lambda
;
1177 ff_af_queue_remove(&s
->afq
, avctx
->frame_size
, &avpkt
->pts
,
1180 avpkt
->flags
|= AV_PKT_FLAG_KEY
;
1182 *got_packet_ptr
= 1;
1186 static av_cold
int aac_encode_end(AVCodecContext
*avctx
)
1188 AACEncContext
*s
= avctx
->priv_data
;
1190 av_log(avctx
, AV_LOG_INFO
, "Qavg: %.3f\n", s
->lambda_count
? s
->lambda_sum
/ s
->lambda_count
: NAN
);
1192 av_tx_uninit(&s
->mdct1024
);
1193 av_tx_uninit(&s
->mdct128
);
1194 ff_psy_end(&s
->psy
);
1195 ff_lpc_end(&s
->lpc
);
1197 ff_psy_preprocess_end(s
->psypp
);
1198 av_freep(&s
->buffer
.samples
);
1201 ff_af_queue_close(&s
->afq
);
1205 static av_cold
int dsp_init(AVCodecContext
*avctx
, AACEncContext
*s
)
1208 float scale
= 32768.0f
;
1210 s
->fdsp
= avpriv_float_dsp_alloc(avctx
->flags
& AV_CODEC_FLAG_BITEXACT
);
1212 return AVERROR(ENOMEM
);
1214 if ((ret
= av_tx_init(&s
->mdct1024
, &s
->mdct1024_fn
, AV_TX_FLOAT_MDCT
, 0,
1215 1024, &scale
, 0)) < 0)
1217 if ((ret
= av_tx_init(&s
->mdct128
, &s
->mdct128_fn
, AV_TX_FLOAT_MDCT
, 0,
1218 128, &scale
, 0)) < 0)
1224 static av_cold
int alloc_buffers(AVCodecContext
*avctx
, AACEncContext
*s
)
1227 if (!FF_ALLOCZ_TYPED_ARRAY(s
->buffer
.samples
, s
->channels
* 3 * 1024) ||
1228 !FF_ALLOCZ_TYPED_ARRAY(s
->cpe
, s
->chan_map
[0]))
1229 return AVERROR(ENOMEM
);
1231 for(ch
= 0; ch
< s
->channels
; ch
++)
1232 s
->planar_samples
[ch
] = s
->buffer
.samples
+ 3 * 1024 * ch
;
1237 static av_cold
int aac_encode_init(AVCodecContext
*avctx
)
1239 AACEncContext
*s
= avctx
->priv_data
;
1241 const uint8_t *sizes
[2];
1242 uint8_t grouping
[AAC_MAX_CHANNELS
];
1246 s
->last_frame_pb_count
= 0;
1247 avctx
->frame_size
= 1024;
1248 avctx
->initial_padding
= 1024;
1249 s
->lambda
= avctx
->global_quality
> 0 ? avctx
->global_quality
: 120;
1251 /* Channel map and unspecified bitrate guessing */
1252 s
->channels
= avctx
->ch_layout
.nb_channels
;
1255 for (i
= 0; i
< FF_ARRAY_ELEMS(aac_normal_chan_layouts
); i
++) {
1256 if (!av_channel_layout_compare(&avctx
->ch_layout
, &aac_normal_chan_layouts
[i
])) {
1257 s
->needs_pce
= s
->options
.pce
;
1264 for (i
= 0; i
< FF_ARRAY_ELEMS(aac_pce_configs
); i
++)
1265 if (!av_channel_layout_compare(&avctx
->ch_layout
, &aac_pce_configs
[i
].layout
))
1267 av_channel_layout_describe(&avctx
->ch_layout
, buf
, sizeof(buf
));
1268 if (i
== FF_ARRAY_ELEMS(aac_pce_configs
)) {
1269 av_log(avctx
, AV_LOG_ERROR
, "Unsupported channel layout \"%s\"\n", buf
);
1270 return AVERROR(EINVAL
);
1272 av_log(avctx
, AV_LOG_INFO
, "Using a PCE to encode channel layout \"%s\"\n", buf
);
1273 s
->pce
= aac_pce_configs
[i
];
1274 s
->reorder_map
= s
->pce
.reorder_map
;
1275 s
->chan_map
= s
->pce
.config_map
;
1277 s
->reorder_map
= aac_chan_maps
[s
->channels
- 1];
1278 s
->chan_map
= aac_chan_configs
[s
->channels
- 1];
1281 if (!avctx
->bit_rate
) {
1282 for (i
= 1; i
<= s
->chan_map
[0]; i
++) {
1283 avctx
->bit_rate
+= s
->chan_map
[i
] == TYPE_CPE
? 128000 : /* Pair */
1284 s
->chan_map
[i
] == TYPE_LFE
? 16000 : /* LFE */
1290 for (i
= 0; i
< 16; i
++)
1291 if (avctx
->sample_rate
== ff_mpeg4audio_sample_rates
[i
])
1293 s
->samplerate_index
= i
;
1294 ERROR_IF(s
->samplerate_index
== 16 ||
1295 s
->samplerate_index
>= ff_aac_swb_size_1024_len
||
1296 s
->samplerate_index
>= ff_aac_swb_size_128_len
,
1297 "Unsupported sample rate %d\n", avctx
->sample_rate
);
1299 /* Bitrate limiting */
1300 WARN_IF(1024.0 * avctx
->bit_rate
/ avctx
->sample_rate
> 6144 * s
->channels
,
1301 "Too many bits %f > %d per frame requested, clamping to max\n",
1302 1024.0 * avctx
->bit_rate
/ avctx
->sample_rate
,
1303 6144 * s
->channels
);
1304 avctx
->bit_rate
= (int64_t)FFMIN(6144 * s
->channels
/ 1024.0 * avctx
->sample_rate
,
1307 /* Profile and option setting */
1308 avctx
->profile
= avctx
->profile
== AV_PROFILE_UNKNOWN
? AV_PROFILE_AAC_LOW
:
1310 for (i
= 0; i
< FF_ARRAY_ELEMS(aacenc_profiles
); i
++)
1311 if (avctx
->profile
== aacenc_profiles
[i
])
1313 if (avctx
->profile
== AV_PROFILE_MPEG2_AAC_LOW
) {
1314 avctx
->profile
= AV_PROFILE_AAC_LOW
;
1315 ERROR_IF(s
->options
.pred
,
1316 "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1317 ERROR_IF(s
->options
.ltp
,
1318 "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1319 WARN_IF(s
->options
.pns
,
1320 "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
1322 } else if (avctx
->profile
== AV_PROFILE_AAC_LTP
) {
1324 ERROR_IF(s
->options
.pred
,
1325 "Main prediction unavailable in the \"aac_ltp\" profile\n");
1326 } else if (avctx
->profile
== AV_PROFILE_AAC_MAIN
) {
1327 s
->options
.pred
= 1;
1328 ERROR_IF(s
->options
.ltp
,
1329 "LTP prediction unavailable in the \"aac_main\" profile\n");
1330 } else if (s
->options
.ltp
) {
1331 avctx
->profile
= AV_PROFILE_AAC_LTP
;
1333 "Chainging profile to \"aac_ltp\"\n");
1334 ERROR_IF(s
->options
.pred
,
1335 "Main prediction unavailable in the \"aac_ltp\" profile\n");
1336 } else if (s
->options
.pred
) {
1337 avctx
->profile
= AV_PROFILE_AAC_MAIN
;
1339 "Chainging profile to \"aac_main\"\n");
1340 ERROR_IF(s
->options
.ltp
,
1341 "LTP prediction unavailable in the \"aac_main\" profile\n");
1343 s
->profile
= avctx
->profile
;
1345 /* Coder limitations */
1346 s
->coder
= &ff_aac_coders
[s
->options
.coder
];
1347 if (s
->options
.coder
== AAC_CODER_ANMR
) {
1348 ERROR_IF(avctx
->strict_std_compliance
> FF_COMPLIANCE_EXPERIMENTAL
,
1349 "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
1350 s
->options
.intensity_stereo
= 0;
1353 ERROR_IF(s
->options
.ltp
&& avctx
->strict_std_compliance
> FF_COMPLIANCE_EXPERIMENTAL
,
1354 "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
1356 /* M/S introduces horrible artifacts with multichannel files, this is temporary */
1357 if (s
->channels
> 3)
1358 s
->options
.mid_side
= 0;
1360 // Initialize static tables
1361 ff_aac_float_common_init();
1363 if ((ret
= dsp_init(avctx
, s
)) < 0)
1366 if ((ret
= alloc_buffers(avctx
, s
)) < 0)
1369 if ((ret
= put_audio_specific_config(avctx
)))
1372 sizes
[0] = ff_aac_swb_size_1024
[s
->samplerate_index
];
1373 sizes
[1] = ff_aac_swb_size_128
[s
->samplerate_index
];
1374 lengths
[0] = ff_aac_num_swb_1024
[s
->samplerate_index
];
1375 lengths
[1] = ff_aac_num_swb_128
[s
->samplerate_index
];
1376 for (i
= 0; i
< s
->chan_map
[0]; i
++)
1377 grouping
[i
] = s
->chan_map
[i
+ 1] == TYPE_CPE
;
1378 if ((ret
= ff_psy_init(&s
->psy
, avctx
, 2, sizes
, lengths
,
1379 s
->chan_map
[0], grouping
)) < 0)
1381 s
->psypp
= ff_psy_preprocess_init(avctx
);
1382 ff_lpc_init(&s
->lpc
, 2*avctx
->frame_size
, TNS_MAX_ORDER
, FF_LPC_TYPE_LEVINSON
);
1383 s
->random_state
= 0x1f2e3d4c;
1385 ff_aacenc_dsp_init(&s
->aacdsp
);
1387 ff_af_queue_init(avctx
, &s
->afq
);
1392 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1393 static const AVOption aacenc_options
[] = {
1394 {"aac_coder", "Coding algorithm", offsetof(AACEncContext
, options
.coder
), AV_OPT_TYPE_INT
, {.i64
= AAC_CODER_TWOLOOP
}, 0, AAC_CODER_NB
-1, AACENC_FLAGS
, .unit
= "coder"},
1395 {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST
, {.i64
= AAC_CODER_ANMR
}, INT_MIN
, INT_MAX
, AACENC_FLAGS
, .unit
= "coder"},
1396 {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST
, {.i64
= AAC_CODER_TWOLOOP
}, INT_MIN
, INT_MAX
, AACENC_FLAGS
, .unit
= "coder"},
1397 {"fast", "Fast search", 0, AV_OPT_TYPE_CONST
, {.i64
= AAC_CODER_FAST
}, INT_MIN
, INT_MAX
, AACENC_FLAGS
, .unit
= "coder"},
1398 {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext
, options
.mid_side
), AV_OPT_TYPE_BOOL
, {.i64
= -1}, -1, 1, AACENC_FLAGS
},
1399 {"aac_is", "Intensity stereo coding", offsetof(AACEncContext
, options
.intensity_stereo
), AV_OPT_TYPE_BOOL
, {.i64
= 1}, -1, 1, AACENC_FLAGS
},
1400 {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext
, options
.pns
), AV_OPT_TYPE_BOOL
, {.i64
= 1}, -1, 1, AACENC_FLAGS
},
1401 {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext
, options
.tns
), AV_OPT_TYPE_BOOL
, {.i64
= 1}, -1, 1, AACENC_FLAGS
},
1402 {"aac_ltp", "Long term prediction", offsetof(AACEncContext
, options
.ltp
), AV_OPT_TYPE_BOOL
, {.i64
= 0}, -1, 1, AACENC_FLAGS
},
1403 {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext
, options
.pred
), AV_OPT_TYPE_BOOL
, {.i64
= 0}, -1, 1, AACENC_FLAGS
},
1404 {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext
, options
.pce
), AV_OPT_TYPE_BOOL
, {.i64
= 0}, -1, 1, AACENC_FLAGS
},
1409 static const AVClass aacenc_class
= {
1410 .class_name
= "AAC encoder",
1411 .item_name
= av_default_item_name
,
1412 .option
= aacenc_options
,
1413 .version
= LIBAVUTIL_VERSION_INT
,
1416 static const FFCodecDefault aac_encode_defaults
[] = {
1421 const FFCodec ff_aac_encoder
= {
1423 CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
1424 .p
.type
= AVMEDIA_TYPE_AUDIO
,
1425 .p
.id
= AV_CODEC_ID_AAC
,
1426 .p
.capabilities
= AV_CODEC_CAP_DR1
| AV_CODEC_CAP_DELAY
|
1427 AV_CODEC_CAP_SMALL_LAST_FRAME
,
1428 .priv_data_size
= sizeof(AACEncContext
),
1429 .init
= aac_encode_init
,
1430 FF_CODEC_ENCODE_CB(aac_encode_frame
),
1431 .close
= aac_encode_end
,
1432 .defaults
= aac_encode_defaults
,
1433 .p
.supported_samplerates
= ff_mpeg4audio_sample_rates
,
1434 .caps_internal
= FF_CODEC_CAP_INIT_CLEANUP
,
1435 .p
.sample_fmts
= (const enum AVSampleFormat
[]){ AV_SAMPLE_FMT_FLTP
,
1436 AV_SAMPLE_FMT_NONE
},
1437 .p
.priv_class
= &aacenc_class
,