libavcodec/aacenc.c

   1 /*
   2  * AAC encoder
   3  * Copyright (C) 2008 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder
  25  */
  26
  27 /***********************************
  28  *              TODOs:
  29  * add sane pulse detection
  30  ***********************************/
  31 #include <float.h>
  32
  33 #include "libavutil/channel_layout.h"
  34 #include "libavutil/libm.h"
  35 #include "libavutil/float_dsp.h"
  36 #include "libavutil/mem.h"
  37 #include "libavutil/opt.h"
  38 #include "avcodec.h"
  39 #include "codec_internal.h"
  40 #include "encode.h"
  41 #include "put_bits.h"
  42 #include "mpeg4audio.h"
  43 #include "sinewin.h"
  44 #include "profiles.h"
  45 #include "version.h"
  46
  47 #include "aac.h"
  48 #include "aactab.h"
  49 #include "aacenc.h"
  50 #include "aacenctab.h"
  51 #include "aacenc_utils.h"
  52
  53 #include "psymodel.h"
  54
  55 /**
  56  * List of PCE (Program Configuration Element) for the channel layouts listed
  57  * in channel_layout.h
  58  *
  59  * For those wishing in the future to add other layouts:
  60  *
  61  * - num_ele: number of elements in each group of front, side, back, lfe channels
  62  *            (an element is of type SCE (single channel), CPE (channel pair) for
  63  *            the first 3 groups; and is LFE for LFE group).
  64  *
  65  * - pairing: 0 for an SCE element or 1 for a CPE; does not apply to LFE group
  66  *
  67  * - index: there are three independent indices for SCE, CPE and LFE;
  68  *     they are incremented irrespective of the group to which the element belongs;
  69  *     they are not reset when going from one group to another
  70  *
  71  *     Example: for 7.0 channel layout,
  72  *        .pairing = { { 1, 0 }, { 1 }, { 1 }, }, (3 CPE and 1 SCE in front group)
  73  *        .index = { { 0, 0 }, { 1 }, { 2 }, },
  74  *               (index is 0 for the single SCE but goes from 0 to 2 for the CPEs)
  75  *
  76  *     The index order impacts the channel ordering. But is otherwise arbitrary
  77  *     (the sequence could have been 2, 0, 1 instead of 0, 1, 2).
  78  *
  79  *     Spec allows for discontinuous indices, e.g. if one has a total of two SCE,
  80  *     SCE.0 SCE.15 is OK per spec; BUT it won't be decoded by our AAC decoder
  81  *     which at this time requires that indices fully cover some range starting
  82  *     from 0 (SCE.1 SCE.0 is OK but not SCE.0 SCE.15).
  83  *
  84  * - config_map: total number of elements and their types. Beware, the way the
  85  *               types are ordered impacts the final channel ordering.
  86  *
  87  * - reorder_map: reorders the channels.
  88  *
  89  */
  90 static const AACPCEInfo aac_pce_configs[] = {
  91     {
  92         .layout = AV_CHANNEL_LAYOUT_MONO,
  93         .num_ele = { 1, 0, 0, 0 },
  94         .pairing = { { 0 }, },
  95         .index = { { 0 }, },
  96         .config_map = { 1, TYPE_SCE, },
  97         .reorder_map = { 0 },
  98     },
  99     {
 100         .layout = AV_CHANNEL_LAYOUT_STEREO,
 101         .num_ele = { 1, 0, 0, 0 },
 102         .pairing = { { 1 }, },
 103         .index = { { 0 }, },
 104         .config_map = { 1, TYPE_CPE, },
 105         .reorder_map = { 0, 1 },
 106     },
 107     {
 108         .layout = AV_CHANNEL_LAYOUT_2POINT1,
 109         .num_ele = { 1, 0, 0, 1 },
 110         .pairing = { { 1 }, },
 111         .index = { { 0 },{ 0 },{ 0 },{ 0 } },
 112         .config_map = { 2, TYPE_CPE, TYPE_LFE },
 113         .reorder_map = { 0, 1, 2 },
 114     },
 115     {
 116         .layout = AV_CHANNEL_LAYOUT_2_1,
 117         .num_ele = { 1, 0, 1, 0 },
 118         .pairing = { { 1 },{ 0 },{ 0 } },
 119         .index = { { 0 },{ 0 },{ 0 }, },
 120         .config_map = { 2, TYPE_CPE, TYPE_SCE },
 121         .reorder_map = { 0, 1, 2 },
 122     },
 123     {
 124         .layout = AV_CHANNEL_LAYOUT_SURROUND,
 125         .num_ele = { 2, 0, 0, 0 },
 126         .pairing = { { 1, 0 }, },
 127         .index = { { 0, 0 }, },
 128         .config_map = { 2, TYPE_CPE, TYPE_SCE, },
 129         .reorder_map = { 0, 1, 2 },
 130     },
 131     {
 132         .layout = AV_CHANNEL_LAYOUT_3POINT1,
 133         .num_ele = { 2, 0, 0, 1 },
 134         .pairing = { { 1, 0 }, },
 135         .index = { { 0, 0 }, { 0 }, { 0 }, { 0 }, },
 136         .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_LFE },
 137         .reorder_map = { 0, 1, 2, 3 },
 138     },
 139     {
 140         .layout = AV_CHANNEL_LAYOUT_4POINT0,
 141         .num_ele = { 2, 0, 1, 0 },
 142         .pairing = { { 1, 0 }, { 0 }, { 0 }, },
 143         .index = { { 0, 0 }, { 0 }, { 1 } },
 144         .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_SCE },
 145         .reorder_map = {  0, 1, 2, 3 },
 146     },
 147     {
 148         .layout = AV_CHANNEL_LAYOUT_4POINT1,
 149         .num_ele = { 2, 1, 1, 0 },
 150         .pairing = { { 1, 0 }, { 0 }, { 0 }, },
 151         .index = { { 0, 0 }, { 1 }, { 2 }, { 0 } },
 152         .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_SCE },
 153         .reorder_map = { 0, 1, 2, 3, 4 },
 154     },
 155     {
 156         .layout = AV_CHANNEL_LAYOUT_2_2,
 157         .num_ele = { 1, 1, 0, 0 },
 158         .pairing = { { 1 }, { 1 }, },
 159         .index = { { 0 }, { 1 }, },
 160         .config_map = { 2, TYPE_CPE, TYPE_CPE },
 161         .reorder_map = { 0, 1, 2, 3 },
 162     },
 163     {
 164         .layout = AV_CHANNEL_LAYOUT_QUAD,
 165         .num_ele = { 1, 0, 1, 0 },
 166         .pairing = { { 1 }, { 0 }, { 1 }, },
 167         .index = { { 0 }, { 0 }, { 1 } },
 168         .config_map = { 2, TYPE_CPE, TYPE_CPE },
 169         .reorder_map = { 0, 1, 2, 3 },
 170     },
 171     {
 172         .layout = AV_CHANNEL_LAYOUT_5POINT0,
 173         .num_ele = { 2, 1, 0, 0 },
 174         .pairing = { { 1, 0 }, { 1 }, },
 175         .index = { { 0, 0 }, { 1 } },
 176         .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE },
 177         .reorder_map = { 0, 1, 2, 3, 4 },
 178     },
 179     {
 180         .layout = AV_CHANNEL_LAYOUT_5POINT1,
 181         .num_ele = { 2, 1, 1, 0 },
 182         .pairing = { { 1, 0 }, { 0 }, { 1 }, },
 183         .index = { { 0, 0 }, { 1 }, { 1 } },
 184         .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE },
 185         .reorder_map = { 0, 1, 2, 3, 4, 5 },
 186     },
 187     {
 188         .layout = AV_CHANNEL_LAYOUT_5POINT0_BACK,
 189         .num_ele = { 2, 0, 1, 0 },
 190         .pairing = { { 1, 0 }, { 0 }, { 1 } },
 191         .index = { { 0, 0 }, { 0 }, { 1 } },
 192         .config_map = { 3, TYPE_CPE, TYPE_SCE, TYPE_CPE },
 193         .reorder_map = { 0, 1, 2, 3, 4 },
 194     },
 195     {
 196         .layout = AV_CHANNEL_LAYOUT_5POINT1_BACK,
 197         .num_ele = { 2, 1, 1, 0 },
 198         .pairing = { { 1, 0 }, { 0 }, { 1 }, },
 199         .index = { { 0, 0 }, { 1 }, { 1 } },
 200         .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE },
 201         .reorder_map = { 0, 1, 2, 3, 4, 5 },
 202     },
 203     {
 204         .layout = AV_CHANNEL_LAYOUT_6POINT0,
 205         .num_ele = { 2, 1, 1, 0 },
 206         .pairing = { { 1, 0 }, { 1 }, { 0 }, },
 207         .index = { { 0, 0 }, { 1 }, { 1 } },
 208         .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
 209         .reorder_map = { 0, 1, 2, 3, 4, 5 },
 210     },
 211     {
 212         .layout = AV_CHANNEL_LAYOUT_6POINT0_FRONT,
 213         .num_ele = { 2, 1, 0, 0 },
 214         .pairing = { { 1, 1 }, { 1 } },
 215         .index = { { 1, 0 }, { 2 }, },
 216         .config_map = { 3, TYPE_CPE, TYPE_CPE, TYPE_CPE, },
 217         .reorder_map = { 0, 1, 2, 3, 4, 5 },
 218     },
 219     {
 220         .layout = AV_CHANNEL_LAYOUT_HEXAGONAL,
 221         .num_ele = { 2, 0, 2, 0 },
 222         .pairing = { { 1, 0 },{ 0 },{ 1, 0 }, },
 223         .index = { { 0, 0 },{ 0 },{ 1, 1 } },
 224         .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, },
 225         .reorder_map = { 0, 1, 2, 3, 4, 5 },
 226     },
 227     {
 228         .layout = AV_CHANNEL_LAYOUT_6POINT1,
 229         .num_ele = { 2, 1, 2, 0 },
 230         .pairing = { { 1, 0 },{ 0 },{ 1, 0 }, },
 231         .index = { { 0, 0 },{ 1 },{ 1, 2 } },
 232         .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
 233         .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
 234     },
 235     {
 236         .layout = AV_CHANNEL_LAYOUT_6POINT1_BACK,
 237         .num_ele = { 2, 1, 2, 0 },
 238         .pairing = { { 1, 0 }, { 0 }, { 1, 0 }, },
 239         .index = { { 0, 0 }, { 1 }, { 1, 2 } },
 240         .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
 241         .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
 242     },
 243     {
 244         .layout = AV_CHANNEL_LAYOUT_6POINT1_FRONT,
 245         .num_ele = { 2, 1, 2, 0 },
 246         .pairing = { { 1, 0 }, { 0 }, { 1, 0 }, },
 247         .index = { { 0, 0 }, { 1 }, { 1, 2 } },
 248         .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
 249         .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
 250     },
 251     {
 252         .layout = AV_CHANNEL_LAYOUT_7POINT0,
 253         .num_ele = { 2, 1, 1, 0 },
 254         .pairing = { { 1, 0 }, { 1 }, { 1 }, },
 255         .index = { { 0, 0 }, { 1 }, { 2 }, },
 256         .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
 257         .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
 258     },
 259     {
 260         .layout = AV_CHANNEL_LAYOUT_7POINT0_FRONT,
 261         .num_ele = { 2, 1, 1, 0 },
 262         .pairing = { { 1, 0 }, { 1 }, { 1 }, },
 263         .index = { { 0, 0 }, { 1 }, { 2 }, },
 264         .config_map = { 4, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
 265         .reorder_map = { 0, 1, 2, 3, 4, 5, 6 },
 266     },
 267     {
 268         .layout = AV_CHANNEL_LAYOUT_7POINT1,
 269         .num_ele = { 2, 1, 2, 0 },
 270         .pairing = { { 1, 0 }, { 0 }, { 1, 1 }, },
 271         .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
 272         .config_map = { 5, TYPE_CPE, TYPE_SCE,  TYPE_SCE, TYPE_CPE, TYPE_CPE },
 273         .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
 274     },
 275     {
 276         .layout = AV_CHANNEL_LAYOUT_7POINT1_WIDE,
 277         .num_ele = { 2, 1, 2, 0 },
 278         .pairing = { { 1, 0 }, { 0 },{  1, 1 }, },
 279         .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
 280         .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
 281         .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
 282     },
 283     {
 284         .layout = AV_CHANNEL_LAYOUT_7POINT1_WIDE_BACK,
 285         .num_ele = { 2, 1, 2, 0 },
 286         .pairing = { { 1, 0 }, { 0 }, { 1, 1 }, },
 287         .index = { { 0, 0 }, { 1 }, { 1, 2 }, { 0 } },
 288         .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_SCE, TYPE_CPE, TYPE_CPE },
 289         .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
 290     },
 291     {
 292         .layout = AV_CHANNEL_LAYOUT_OCTAGONAL,
 293         .num_ele = { 2, 1, 2, 0 },
 294         .pairing = { { 1, 0 }, { 1 }, { 1, 0 }, },
 295         .index = { { 0, 0 }, { 1 }, { 2, 1 } },
 296         .config_map = { 5, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_SCE },
 297         .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7 },
 298     },
 299     {   /* Meant for order 2/mixed ambisonics */
 300         .layout = { .order = AV_CHANNEL_ORDER_NATIVE, .nb_channels = 9,
 301                     .u.mask = AV_CH_LAYOUT_OCTAGONAL | AV_CH_TOP_CENTER },
 302         .num_ele = { 2, 2, 2, 0 },
 303         .pairing = { { 1, 0 }, { 1, 0 }, { 1, 0 }, },
 304         .index = { { 0, 0 }, { 1, 1 }, { 2, 2 } },
 305         .config_map = { 6, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
 306         .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8 },
 307     },
 308     {   /* Meant for order 2/mixed ambisonics */
 309         .layout = { .order = AV_CHANNEL_ORDER_NATIVE, .nb_channels = 10,
 310                     .u.mask = AV_CH_LAYOUT_6POINT0_FRONT | AV_CH_BACK_CENTER |
 311                               AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT | AV_CH_TOP_CENTER },
 312         .num_ele = { 2, 2, 2, 0 },
 313         .pairing = { { 1, 1 }, { 1, 0 }, { 1, 0 }, },
 314         .index = { { 0, 1 }, { 2, 0 }, { 3, 1 } },
 315         .config_map = { 6, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
 316         .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
 317     },
 318     {
 319         .layout = AV_CHANNEL_LAYOUT_HEXADECAGONAL,
 320         .num_ele = { 4, 2, 4, 0 },
 321         .pairing = { { 1, 0, 1, 0 }, { 1, 1 }, { 1, 0, 1, 0 }, },
 322         .index = { { 0, 0, 1, 1 }, { 2, 3 }, { 4, 2, 5, 3 } },
 323         .config_map = { 10, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_CPE, TYPE_SCE, TYPE_CPE, TYPE_SCE },
 324         .reorder_map = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
 325     },
 326 };
 327
 328 static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
 329 {
 330     int i, j;
 331     AACEncContext *s = avctx->priv_data;
 332     AACPCEInfo *pce = &s->pce;
 333     const int bitexact = avctx->flags & AV_CODEC_FLAG_BITEXACT;
 334     const char *aux_data = bitexact ? "Lavc" : LIBAVCODEC_IDENT;
 335
 336     put_bits(pb, 4, 0);
 337
 338     put_bits(pb, 2, avctx->profile);
 339     put_bits(pb, 4, s->samplerate_index);
 340
 341     put_bits(pb, 4, pce->num_ele[0]); /* Front */
 342     put_bits(pb, 4, pce->num_ele[1]); /* Side */
 343     put_bits(pb, 4, pce->num_ele[2]); /* Back */
 344     put_bits(pb, 2, pce->num_ele[3]); /* LFE */
 345     put_bits(pb, 3, 0); /* Assoc data */
 346     put_bits(pb, 4, 0); /* CCs */
 347
 348     put_bits(pb, 1, 0); /* Stereo mixdown */
 349     put_bits(pb, 1, 0); /* Mono mixdown */
 350     put_bits(pb, 1, 0); /* Something else */
 351
 352     for (i = 0; i < 4; i++) {
 353         for (j = 0; j < pce->num_ele[i]; j++) {
 354             if (i < 3)
 355                 put_bits(pb, 1, pce->pairing[i][j]);
 356             put_bits(pb, 4, pce->index[i][j]);
 357         }
 358     }
 359
 360     align_put_bits(pb);
 361     put_bits(pb, 8, strlen(aux_data));
 362     ff_put_string(pb, aux_data, 0);
 363 }
 364
 365 /**
 366  * Make AAC audio config object.
 367  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 368  */
 369 static int put_audio_specific_config(AVCodecContext *avctx)
 370 {
 371     PutBitContext pb;
 372     AACEncContext *s = avctx->priv_data;
 373     int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
 374     const int max_size = 32;
 375
 376     avctx->extradata = av_mallocz(max_size);
 377     if (!avctx->extradata)
 378         return AVERROR(ENOMEM);
 379
 380     init_put_bits(&pb, avctx->extradata, max_size);
 381     put_bits(&pb, 5, s->profile+1); //profile
 382     put_bits(&pb, 4, s->samplerate_index); //sample rate index
 383     put_bits(&pb, 4, channels);
 384     //GASpecificConfig
 385     put_bits(&pb, 1, 0); //frame length - 1024 samples
 386     put_bits(&pb, 1, 0); //does not depend on core coder
 387     put_bits(&pb, 1, 0); //is not extension
 388     if (s->needs_pce)
 389         put_pce(&pb, avctx);
 390
 391     //Explicitly Mark SBR absent
 392     put_bits(&pb, 11, 0x2b7); //sync extension
 393     put_bits(&pb, 5,  AOT_SBR);
 394     put_bits(&pb, 1,  0);
 395     flush_put_bits(&pb);
 396     avctx->extradata_size = put_bytes_output(&pb);
 397
 398     return 0;
 399 }
 400
 401 void ff_quantize_band_cost_cache_init(struct AACEncContext *s)
 402 {
 403     ++s->quantize_band_cost_cache_generation;
 404     if (s->quantize_band_cost_cache_generation == 0) {
 405         memset(s->quantize_band_cost_cache, 0, sizeof(s->quantize_band_cost_cache));
 406         s->quantize_band_cost_cache_generation = 1;
 407     }
 408 }
 409
 410 #define WINDOW_FUNC(type) \
 411 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
 412                                     SingleChannelElement *sce, \
 413                                     const float *audio)
 414
 415 WINDOW_FUNC(only_long)
 416 {
 417     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 418     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 419     float *out = sce->ret_buf;
 420
 421     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
 422     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
 423 }
 424
 425 WINDOW_FUNC(long_start)
 426 {
 427     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 428     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 429     float *out = sce->ret_buf;
 430
 431     fdsp->vector_fmul(out, audio, lwindow, 1024);
 432     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
 433     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
 434     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
 435 }
 436
 437 WINDOW_FUNC(long_stop)
 438 {
 439     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 440     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 441     float *out = sce->ret_buf;
 442
 443     memset(out, 0, sizeof(out[0]) * 448);
 444     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
 445     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
 446     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
 447 }
 448
 449 WINDOW_FUNC(eight_short)
 450 {
 451     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 452     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 453     const float *in = audio + 448;
 454     float *out = sce->ret_buf;
 455     int w;
 456
 457     for (w = 0; w < 8; w++) {
 458         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
 459         out += 128;
 460         in  += 128;
 461         fdsp->vector_fmul_reverse(out, in, swindow, 128);
 462         out += 128;
 463     }
 464 }
 465
 466 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
 467                                      SingleChannelElement *sce,
 468                                      const float *audio) = {
 469     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
 470     [LONG_START_SEQUENCE]  = apply_long_start_window,
 471     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
 472     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
 473 };
 474
 475 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
 476                                   float *audio)
 477 {
 478     int i;
 479     float *output = sce->ret_buf;
 480
 481     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
 482
 483     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
 484         s->mdct1024_fn(s->mdct1024, sce->coeffs, output, sizeof(float));
 485     else
 486         for (i = 0; i < 1024; i += 128)
 487             s->mdct128_fn(s->mdct128, &sce->coeffs[i], output + i*2, sizeof(float));
 488     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
 489     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 490 }
 491
 492 /**
 493  * Encode ics_info element.
 494  * @see Table 4.6 (syntax of ics_info)
 495  */
 496 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
 497 {
 498     int w;
 499
 500     put_bits(&s->pb, 1, 0);                // ics_reserved bit
 501     put_bits(&s->pb, 2, info->window_sequence[0]);
 502     put_bits(&s->pb, 1, info->use_kb_window[0]);
 503     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
 504         put_bits(&s->pb, 6, info->max_sfb);
 505         put_bits(&s->pb, 1, !!info->predictor_present);
 506     } else {
 507         put_bits(&s->pb, 4, info->max_sfb);
 508         for (w = 1; w < 8; w++)
 509             put_bits(&s->pb, 1, !info->group_len[w]);
 510     }
 511 }
 512
 513 /**
 514  * Encode MS data.
 515  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
 516  */
 517 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
 518 {
 519     int i, w;
 520
 521     put_bits(pb, 2, cpe->ms_mode);
 522     if (cpe->ms_mode == 1)
 523         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
 524             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
 525                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
 526 }
 527
 528 /**
 529  * Produce integer coefficients from scalefactors provided by the model.
 530  */
 531 static void adjust_frame_information(ChannelElement *cpe, int chans)
 532 {
 533     int i, w, w2, g, ch;
 534     int maxsfb, cmaxsfb;
 535
 536     for (ch = 0; ch < chans; ch++) {
 537         IndividualChannelStream *ics = &cpe->ch[ch].ics;
 538         maxsfb = 0;
 539         cpe->ch[ch].pulse.num_pulse = 0;
 540         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 541             for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
 542                 ;
 543             maxsfb = FFMAX(maxsfb, cmaxsfb);
 544         }
 545         ics->max_sfb = maxsfb;
 546
 547         //adjust zero bands for window groups
 548         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 549             for (g = 0; g < ics->max_sfb; g++) {
 550                 i = 1;
 551                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
 552                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
 553                         i = 0;
 554                         break;
 555                     }
 556                 }
 557                 cpe->ch[ch].zeroes[w*16 + g] = i;
 558             }
 559         }
 560     }
 561
 562     if (chans > 1 && cpe->common_window) {
 563         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
 564         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
 565         int msc = 0;
 566         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
 567         ics1->max_sfb = ics0->max_sfb;
 568         for (w = 0; w < ics0->num_windows*16; w += 16)
 569             for (i = 0; i < ics0->max_sfb; i++)
 570                 if (cpe->ms_mask[w+i])
 571                     msc++;
 572         if (msc == 0 || ics0->max_sfb == 0)
 573             cpe->ms_mode = 0;
 574         else
 575             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
 576     }
 577 }
 578
 579 static void apply_intensity_stereo(ChannelElement *cpe)
 580 {
 581     int w, w2, g, i;
 582     IndividualChannelStream *ics = &cpe->ch[0].ics;
 583     if (!cpe->common_window)
 584         return;
 585     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 586         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 587             int start = (w+w2) * 128;
 588             for (g = 0; g < ics->num_swb; g++) {
 589                 int p  = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
 590                 float scale = cpe->ch[0].is_ener[w*16+g];
 591                 if (!cpe->is_mask[w*16 + g]) {
 592                     start += ics->swb_sizes[g];
 593                     continue;
 594                 }
 595                 if (cpe->ms_mask[w*16 + g])
 596                     p *= -1;
 597                 for (i = 0; i < ics->swb_sizes[g]; i++) {
 598                     float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale;
 599                     cpe->ch[0].coeffs[start+i] = sum;
 600                     cpe->ch[1].coeffs[start+i] = 0.0f;
 601                 }
 602                 start += ics->swb_sizes[g];
 603             }
 604         }
 605     }
 606 }
 607
 608 static void apply_mid_side_stereo(ChannelElement *cpe)
 609 {
 610     int w, w2, g, i;
 611     IndividualChannelStream *ics = &cpe->ch[0].ics;
 612     if (!cpe->common_window)
 613         return;
 614     for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 615         for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 616             int start = (w+w2) * 128;
 617             for (g = 0; g < ics->num_swb; g++) {
 618                 /* ms_mask can be used for other purposes in PNS and I/S,
 619                  * so must not apply M/S if any band uses either, even if
 620                  * ms_mask is set.
 621                  */
 622                 if (!cpe->ms_mask[w*16 + g] || cpe->is_mask[w*16 + g]
 623                     || cpe->ch[0].band_type[w*16 + g] >= NOISE_BT
 624                     || cpe->ch[1].band_type[w*16 + g] >= NOISE_BT) {
 625                     start += ics->swb_sizes[g];
 626                     continue;
 627                 }
 628                 for (i = 0; i < ics->swb_sizes[g]; i++) {
 629                     float L = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) * 0.5f;
 630                     float R = L - cpe->ch[1].coeffs[start+i];
 631                     cpe->ch[0].coeffs[start+i] = L;
 632                     cpe->ch[1].coeffs[start+i] = R;
 633                 }
 634                 start += ics->swb_sizes[g];
 635             }
 636         }
 637     }
 638 }
 639
 640 /**
 641  * Encode scalefactor band coding type.
 642  */
 643 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
 644 {
 645     int w;
 646
 647     if (s->coder->set_special_band_scalefactors)
 648         s->coder->set_special_band_scalefactors(s, sce);
 649
 650     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 651         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
 652 }
 653
 654 /**
 655  * Encode scalefactors.
 656  */
 657 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
 658                                  SingleChannelElement *sce)
 659 {
 660     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
 661     int off_is = 0, noise_flag = 1;
 662     int i, w;
 663
 664     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 665         for (i = 0; i < sce->ics.max_sfb; i++) {
 666             if (!sce->zeroes[w*16 + i]) {
 667                 if (sce->band_type[w*16 + i] == NOISE_BT) {
 668                     diff = sce->sf_idx[w*16 + i] - off_pns;
 669                     off_pns = sce->sf_idx[w*16 + i];
 670                     if (noise_flag-- > 0) {
 671                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
 672                         continue;
 673                     }
 674                 } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
 675                            sce->band_type[w*16 + i] == INTENSITY_BT2) {
 676                     diff = sce->sf_idx[w*16 + i] - off_is;
 677                     off_is = sce->sf_idx[w*16 + i];
 678                 } else {
 679                     diff = sce->sf_idx[w*16 + i] - off_sf;
 680                     off_sf = sce->sf_idx[w*16 + i];
 681                 }
 682                 diff += SCALE_DIFF_ZERO;
 683                 av_assert0(diff >= 0 && diff <= 120);
 684                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
 685             }
 686         }
 687     }
 688 }
 689
 690 /**
 691  * Encode pulse data.
 692  */
 693 static void encode_pulses(AACEncContext *s, Pulse *pulse)
 694 {
 695     int i;
 696
 697     put_bits(&s->pb, 1, !!pulse->num_pulse);
 698     if (!pulse->num_pulse)
 699         return;
 700
 701     put_bits(&s->pb, 2, pulse->num_pulse - 1);
 702     put_bits(&s->pb, 6, pulse->start);
 703     for (i = 0; i < pulse->num_pulse; i++) {
 704         put_bits(&s->pb, 5, pulse->pos[i]);
 705         put_bits(&s->pb, 4, pulse->amp[i]);
 706     }
 707 }
 708
 709 /**
 710  * Encode spectral coefficients processed by psychoacoustic model.
 711  */
 712 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
 713 {
 714     int start, i, w, w2;
 715
 716     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 717         start = 0;
 718         for (i = 0; i < sce->ics.max_sfb; i++) {
 719             if (sce->zeroes[w*16 + i]) {
 720                 start += sce->ics.swb_sizes[i];
 721                 continue;
 722             }
 723             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++) {
 724                 s->coder->quantize_and_encode_band(s, &s->pb,
 725                                                    &sce->coeffs[start + w2*128],
 726                                                    NULL, sce->ics.swb_sizes[i],
 727                                                    sce->sf_idx[w*16 + i],
 728                                                    sce->band_type[w*16 + i],
 729                                                    s->lambda,
 730                                                    sce->ics.window_clipping[w]);
 731             }
 732             start += sce->ics.swb_sizes[i];
 733         }
 734     }
 735 }
 736
 737 /**
 738  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
 739  */
 740 static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
 741 {
 742     int start, i, j, w;
 743
 744     if (sce->ics.clip_avoidance_factor < 1.0f) {
 745         for (w = 0; w < sce->ics.num_windows; w++) {
 746             start = 0;
 747             for (i = 0; i < sce->ics.max_sfb; i++) {
 748                 float *swb_coeffs = &sce->coeffs[start + w*128];
 749                 for (j = 0; j < sce->ics.swb_sizes[i]; j++)
 750                     swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
 751                 start += sce->ics.swb_sizes[i];
 752             }
 753         }
 754     }
 755 }
 756
 757 /**
 758  * Encode one channel of audio data.
 759  */
 760 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
 761                                      SingleChannelElement *sce,
 762                                      int common_window)
 763 {
 764     put_bits(&s->pb, 8, sce->sf_idx[0]);
 765     if (!common_window) {
 766         put_ics_info(s, &sce->ics);
 767         if (s->coder->encode_main_pred)
 768             s->coder->encode_main_pred(s, sce);
 769         if (s->coder->encode_ltp_info)
 770             s->coder->encode_ltp_info(s, sce, 0);
 771     }
 772     encode_band_info(s, sce);
 773     encode_scale_factors(avctx, s, sce);
 774     encode_pulses(s, &sce->pulse);
 775     put_bits(&s->pb, 1, !!sce->tns.present);
 776     if (s->coder->encode_tns_info)
 777         s->coder->encode_tns_info(s, sce);
 778     put_bits(&s->pb, 1, 0); //ssr
 779     encode_spectral_coeffs(s, sce);
 780     return 0;
 781 }
 782
 783 /**
 784  * Write some auxiliary information about the created AAC file.
 785  */
 786 static void put_bitstream_info(AACEncContext *s, const char *name)
 787 {
 788     int i, namelen, padbits;
 789
 790     namelen = strlen(name) + 2;
 791     put_bits(&s->pb, 3, TYPE_FIL);
 792     put_bits(&s->pb, 4, FFMIN(namelen, 15));
 793     if (namelen >= 15)
 794         put_bits(&s->pb, 8, namelen - 14);
 795     put_bits(&s->pb, 4, 0); //extension type - filler
 796     padbits = -put_bits_count(&s->pb) & 7;
 797     align_put_bits(&s->pb);
 798     for (i = 0; i < namelen - 2; i++)
 799         put_bits(&s->pb, 8, name[i]);
 800     put_bits(&s->pb, 12 - padbits, 0);
 801 }
 802
 803 /*
 804  * Copy input samples.
 805  * Channels are reordered from libavcodec's default order to AAC order.
 806  */
 807 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
 808 {
 809     int ch;
 810     int end = 2048 + (frame ? frame->nb_samples : 0);
 811     const uint8_t *channel_map = s->reorder_map;
 812
 813     /* copy and remap input samples */
 814     for (ch = 0; ch < s->channels; ch++) {
 815         /* copy last 1024 samples of previous frame to the start of the current frame */
 816         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
 817
 818         /* copy new samples and zero any remaining samples */
 819         if (frame) {
 820             memcpy(&s->planar_samples[ch][2048],
 821                    frame->extended_data[channel_map[ch]],
 822                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
 823         }
 824         memset(&s->planar_samples[ch][end], 0,
 825                (3072 - end) * sizeof(s->planar_samples[0][0]));
 826     }
 827 }
 828
 829 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 830                             const AVFrame *frame, int *got_packet_ptr)
 831 {
 832     AACEncContext *s = avctx->priv_data;
 833     float **samples = s->planar_samples, *samples2, *la, *overlap;
 834     ChannelElement *cpe;
 835     SingleChannelElement *sce;
 836     IndividualChannelStream *ics;
 837     int i, its, ch, w, chans, tag, start_ch, ret, frame_bits;
 838     int target_bits, rate_bits, too_many_bits, too_few_bits;
 839     int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0;
 840     int chan_el_counter[4];
 841     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 842
 843     /* add current frame to queue */
 844     if (frame) {
 845         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 846             return ret;
 847     } else {
 848         if (!s->afq.remaining_samples || (!s->afq.frame_alloc && !s->afq.frame_count))
 849             return 0;
 850     }
 851
 852     copy_input_samples(s, frame);
 853     if (s->psypp)
 854         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
 855
 856     if (!avctx->frame_num)
 857         return 0;
 858
 859     start_ch = 0;
 860     for (i = 0; i < s->chan_map[0]; i++) {
 861         FFPsyWindowInfo* wi = windows + start_ch;
 862         tag      = s->chan_map[i+1];
 863         chans    = tag == TYPE_CPE ? 2 : 1;
 864         cpe      = &s->cpe[i];
 865         for (ch = 0; ch < chans; ch++) {
 866             int k;
 867             float clip_avoidance_factor;
 868             sce = &cpe->ch[ch];
 869             ics = &sce->ics;
 870             s->cur_channel = start_ch + ch;
 871             overlap  = &samples[s->cur_channel][0];
 872             samples2 = overlap + 1024;
 873             la       = samples2 + (448+64);
 874             if (!frame)
 875                 la = NULL;
 876             if (tag == TYPE_LFE) {
 877                 wi[ch].window_type[0] = wi[ch].window_type[1] = ONLY_LONG_SEQUENCE;
 878                 wi[ch].window_shape   = 0;
 879                 wi[ch].num_windows    = 1;
 880                 wi[ch].grouping[0]    = 1;
 881                 wi[ch].clipping[0]    = 0;
 882
 883                 /* Only the lowest 12 coefficients are used in a LFE channel.
 884                  * The expression below results in only the bottom 8 coefficients
 885                  * being used for 11.025kHz to 16kHz sample rates.
 886                  */
 887                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
 888             } else {
 889                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, s->cur_channel,
 890                                               ics->window_sequence[0]);
 891             }
 892             ics->window_sequence[1] = ics->window_sequence[0];
 893             ics->window_sequence[0] = wi[ch].window_type[0];
 894             ics->use_kb_window[1]   = ics->use_kb_window[0];
 895             ics->use_kb_window[0]   = wi[ch].window_shape;
 896             ics->num_windows        = wi[ch].num_windows;
 897             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
 898             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
 899             ics->max_sfb            = FFMIN(ics->max_sfb, ics->num_swb);
 900             ics->swb_offset         = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
 901                                         ff_swb_offset_128 [s->samplerate_index]:
 902                                         ff_swb_offset_1024[s->samplerate_index];
 903             ics->tns_max_bands      = wi[ch].window_type[0] == EIGHT_SHORT_SEQUENCE ?
 904                                         ff_tns_max_bands_128 [s->samplerate_index]:
 905                                         ff_tns_max_bands_1024[s->samplerate_index];
 906
 907             for (w = 0; w < ics->num_windows; w++)
 908                 ics->group_len[w] = wi[ch].grouping[w];
 909
 910             /* Calculate input sample maximums and evaluate clipping risk */
 911             clip_avoidance_factor = 0.0f;
 912             for (w = 0; w < ics->num_windows; w++) {
 913                 const float *wbuf = overlap + w * 128;
 914                 const int wlen = 2048 / ics->num_windows;
 915                 float max = 0;
 916                 int j;
 917                 /* mdct input is 2 * output */
 918                 for (j = 0; j < wlen; j++)
 919                     max = FFMAX(max, fabsf(wbuf[j]));
 920                 wi[ch].clipping[w] = max;
 921             }
 922             for (w = 0; w < ics->num_windows; w++) {
 923                 if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
 924                     ics->window_clipping[w] = 1;
 925                     clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
 926                 } else {
 927                     ics->window_clipping[w] = 0;
 928                 }
 929             }
 930             if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
 931                 ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
 932             } else {
 933                 ics->clip_avoidance_factor = 1.0f;
 934             }
 935
 936             apply_window_and_mdct(s, sce, overlap);
 937
 938             if (s->options.ltp && s->coder->update_ltp) {
 939                 s->coder->update_ltp(s, sce);
 940                 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, &sce->ltp_state[0]);
 941                 s->mdct1024_fn(s->mdct1024, sce->lcoeffs, sce->ret_buf, sizeof(float));
 942             }
 943
 944             for (k = 0; k < 1024; k++) {
 945                 if (!(fabs(cpe->ch[ch].coeffs[k]) < 1E16)) { // Ensure headroom for energy calculation
 946                     av_log(avctx, AV_LOG_ERROR, "Input contains (near) NaN/+-Inf\n");
 947                     return AVERROR(EINVAL);
 948                 }
 949             }
 950             avoid_clipping(s, sce);
 951         }
 952         start_ch += chans;
 953     }
 954     if ((ret = ff_alloc_packet(avctx, avpkt, 8192 * s->channels)) < 0)
 955         return ret;
 956     frame_bits = its = 0;
 957     do {
 958         init_put_bits(&s->pb, avpkt->data, avpkt->size);
 959
 960         if ((avctx->frame_num & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
 961             put_bitstream_info(s, LIBAVCODEC_IDENT);
 962         start_ch = 0;
 963         target_bits = 0;
 964         memset(chan_el_counter, 0, sizeof(chan_el_counter));
 965         for (i = 0; i < s->chan_map[0]; i++) {
 966             FFPsyWindowInfo* wi = windows + start_ch;
 967             const float *coeffs[2];
 968             tag      = s->chan_map[i+1];
 969             chans    = tag == TYPE_CPE ? 2 : 1;
 970             cpe      = &s->cpe[i];
 971             cpe->common_window = 0;
 972             memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
 973             memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
 974             put_bits(&s->pb, 3, tag);
 975             put_bits(&s->pb, 4, chan_el_counter[tag]++);
 976             for (ch = 0; ch < chans; ch++) {
 977                 sce = &cpe->ch[ch];
 978                 coeffs[ch] = sce->coeffs;
 979                 sce->ics.predictor_present = 0;
 980                 sce->ics.ltp.present = 0;
 981                 memset(sce->ics.ltp.used, 0, sizeof(sce->ics.ltp.used));
 982                 memset(sce->ics.prediction_used, 0, sizeof(sce->ics.prediction_used));
 983                 memset(&sce->tns, 0, sizeof(TemporalNoiseShaping));
 984                 for (w = 0; w < 128; w++)
 985                     if (sce->band_type[w] > RESERVED_BT)
 986                         sce->band_type[w] = 0;
 987             }
 988             s->psy.bitres.alloc = -1;
 989             s->psy.bitres.bits = s->last_frame_pb_count / s->channels;
 990             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
 991             if (s->psy.bitres.alloc > 0) {
 992                 /* Lambda unused here on purpose, we need to take psy's unscaled allocation */
 993                 target_bits += s->psy.bitres.alloc
 994                     * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120));
 995                 s->psy.bitres.alloc /= chans;
 996             }
 997             s->cur_type = tag;
 998             for (ch = 0; ch < chans; ch++) {
 999                 s->cur_channel = start_ch + ch;
1000                 if (s->options.pns && s->coder->mark_pns)
1001                     s->coder->mark_pns(s, avctx, &cpe->ch[ch]);
1002                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
1003             }
1004             if (chans > 1
1005                 && wi[0].window_type[0] == wi[1].window_type[0]
1006                 && wi[0].window_shape   == wi[1].window_shape) {
1007
1008                 cpe->common_window = 1;
1009                 for (w = 0; w < wi[0].num_windows; w++) {
1010                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
1011                         cpe->common_window = 0;
1012                         break;
1013                     }
1014                 }
1015             }
1016             for (ch = 0; ch < chans; ch++) { /* TNS and PNS */
1017                 sce = &cpe->ch[ch];
1018                 s->cur_channel = start_ch + ch;
1019                 if (s->options.tns && s->coder->search_for_tns)
1020                     s->coder->search_for_tns(s, sce);
1021                 if (s->options.tns && s->coder->apply_tns_filt)
1022                     s->coder->apply_tns_filt(s, sce);
1023                 if (sce->tns.present)
1024                     tns_mode = 1;
1025                 if (s->options.pns && s->coder->search_for_pns)
1026                     s->coder->search_for_pns(s, avctx, sce);
1027             }
1028             s->cur_channel = start_ch;
1029             if (s->options.intensity_stereo) { /* Intensity Stereo */
1030                 if (s->coder->search_for_is)
1031                     s->coder->search_for_is(s, avctx, cpe);
1032                 if (cpe->is_mode) is_mode = 1;
1033                 apply_intensity_stereo(cpe);
1034             }
1035             if (s->options.pred) { /* Prediction */
1036                 for (ch = 0; ch < chans; ch++) {
1037                     sce = &cpe->ch[ch];
1038                     s->cur_channel = start_ch + ch;
1039                     if (s->options.pred && s->coder->search_for_pred)
1040                         s->coder->search_for_pred(s, sce);
1041                     if (cpe->ch[ch].ics.predictor_present) pred_mode = 1;
1042                 }
1043                 if (s->coder->adjust_common_pred)
1044                     s->coder->adjust_common_pred(s, cpe);
1045                 for (ch = 0; ch < chans; ch++) {
1046                     sce = &cpe->ch[ch];
1047                     s->cur_channel = start_ch + ch;
1048                     if (s->options.pred && s->coder->apply_main_pred)
1049                         s->coder->apply_main_pred(s, sce);
1050                 }
1051                 s->cur_channel = start_ch;
1052             }
1053             if (s->options.mid_side) { /* Mid/Side stereo */
1054                 if (s->options.mid_side == -1 && s->coder->search_for_ms)
1055                     s->coder->search_for_ms(s, cpe);
1056                 else if (cpe->common_window)
1057                     memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask));
1058                 apply_mid_side_stereo(cpe);
1059             }
1060             adjust_frame_information(cpe, chans);
1061             if (s->options.ltp) { /* LTP */
1062                 for (ch = 0; ch < chans; ch++) {
1063                     sce = &cpe->ch[ch];
1064                     s->cur_channel = start_ch + ch;
1065                     if (s->coder->search_for_ltp)
1066                         s->coder->search_for_ltp(s, sce, cpe->common_window);
1067                     if (sce->ics.ltp.present) pred_mode = 1;
1068                 }
1069                 s->cur_channel = start_ch;
1070                 if (s->coder->adjust_common_ltp)
1071                     s->coder->adjust_common_ltp(s, cpe);
1072             }
1073             if (chans == 2) {
1074                 put_bits(&s->pb, 1, cpe->common_window);
1075                 if (cpe->common_window) {
1076                     put_ics_info(s, &cpe->ch[0].ics);
1077                     if (s->coder->encode_main_pred)
1078                         s->coder->encode_main_pred(s, &cpe->ch[0]);
1079                     if (s->coder->encode_ltp_info)
1080                         s->coder->encode_ltp_info(s, &cpe->ch[0], 1);
1081                     encode_ms_info(&s->pb, cpe);
1082                     if (cpe->ms_mode) ms_mode = 1;
1083                 }
1084             }
1085             for (ch = 0; ch < chans; ch++) {
1086                 s->cur_channel = start_ch + ch;
1087                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
1088             }
1089             start_ch += chans;
1090         }
1091
1092         if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
1093             /* When using a constant Q-scale, don't mess with lambda */
1094             break;
1095         }
1096
1097         /* rate control stuff
1098          * allow between the nominal bitrate, and what psy's bit reservoir says to target
1099          * but drift towards the nominal bitrate always
1100          */
1101         frame_bits = put_bits_count(&s->pb);
1102         rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate;
1103         rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3);
1104         too_many_bits = FFMAX(target_bits, rate_bits);
1105         too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3);
1106         too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits);
1107
1108         /* When strict bit-rate control is demanded */
1109         if (avctx->bit_rate_tolerance == 0) {
1110             if (rate_bits < frame_bits) {
1111                 float ratio = ((float)rate_bits) / frame_bits;
1112                 s->lambda *= FFMIN(0.9f, ratio);
1113                 continue;
1114             }
1115             /* reset lambda when solution is found */
1116             s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
1117             break;
1118         }
1119
1120         /* When using ABR, be strict (but only for increasing) */
1121         too_few_bits = too_few_bits - too_few_bits/8;
1122         too_many_bits = too_many_bits + too_many_bits/2;
1123
1124         if (   its == 0 /* for steady-state Q-scale tracking */
1125             || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits))
1126             || frame_bits >= 6144 * s->channels - 3  )
1127         {
1128             float ratio = ((float)rate_bits) / frame_bits;
1129
1130             if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) {
1131                 /*
1132                  * This path is for steady-state Q-scale tracking
1133                  * When frame bits fall within the stable range, we still need to adjust
1134                  * lambda to maintain it like so in a stable fashion (large jumps in lambda
1135                  * create artifacts and should be avoided), but slowly
1136                  */
1137                 ratio = sqrtf(sqrtf(ratio));
1138                 ratio = av_clipf(ratio, 0.9f, 1.1f);
1139             } else {
1140                 /* Not so fast though */
1141                 ratio = sqrtf(ratio);
1142             }
1143             s->lambda = av_clipf(s->lambda * ratio, FLT_EPSILON, 65536.f);
1144
1145             /* Keep iterating if we must reduce and lambda is in the sky */
1146             if (ratio > 0.9f && ratio < 1.1f) {
1147                 break;
1148             } else {
1149                 if (is_mode || ms_mode || tns_mode || pred_mode) {
1150                     for (i = 0; i < s->chan_map[0]; i++) {
1151                         // Must restore coeffs
1152                         chans = tag == TYPE_CPE ? 2 : 1;
1153                         cpe = &s->cpe[i];
1154                         for (ch = 0; ch < chans; ch++)
1155                             memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
1156                     }
1157                 }
1158                 its++;
1159             }
1160         } else {
1161             break;
1162         }
1163     } while (1);
1164
1165     if (s->options.ltp && s->coder->ltp_insert_new_frame)
1166         s->coder->ltp_insert_new_frame(s);
1167
1168     put_bits(&s->pb, 3, TYPE_END);
1169     flush_put_bits(&s->pb);
1170
1171     s->last_frame_pb_count = put_bits_count(&s->pb);
1172     avpkt->size            = put_bytes_output(&s->pb);
1173
1174     s->lambda_sum += s->lambda;
1175     s->lambda_count++;
1176
1177     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
1178                        &avpkt->duration);
1179
1180     avpkt->flags |= AV_PKT_FLAG_KEY;
1181
1182     *got_packet_ptr = 1;
1183     return 0;
1184 }
1185
1186 static av_cold int aac_encode_end(AVCodecContext *avctx)
1187 {
1188     AACEncContext *s = avctx->priv_data;
1189
1190     av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_count ? s->lambda_sum / s->lambda_count : NAN);
1191
1192     av_tx_uninit(&s->mdct1024);
1193     av_tx_uninit(&s->mdct128);
1194     ff_psy_end(&s->psy);
1195     ff_lpc_end(&s->lpc);
1196     if (s->psypp)
1197         ff_psy_preprocess_end(s->psypp);
1198     av_freep(&s->buffer.samples);
1199     av_freep(&s->cpe);
1200     av_freep(&s->fdsp);
1201     ff_af_queue_close(&s->afq);
1202     return 0;
1203 }
1204
1205 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
1206 {
1207     int ret = 0;
1208     float scale = 32768.0f;
1209
1210     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
1211     if (!s->fdsp)
1212         return AVERROR(ENOMEM);
1213
1214     if ((ret = av_tx_init(&s->mdct1024, &s->mdct1024_fn, AV_TX_FLOAT_MDCT, 0,
1215                           1024, &scale, 0)) < 0)
1216         return ret;
1217     if ((ret = av_tx_init(&s->mdct128, &s->mdct128_fn,   AV_TX_FLOAT_MDCT, 0,
1218                           128, &scale, 0)) < 0)
1219         return ret;
1220
1221     return 0;
1222 }
1223
1224 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
1225 {
1226     int ch;
1227     if (!FF_ALLOCZ_TYPED_ARRAY(s->buffer.samples, s->channels * 3 * 1024) ||
1228         !FF_ALLOCZ_TYPED_ARRAY(s->cpe,            s->chan_map[0]))
1229         return AVERROR(ENOMEM);
1230
1231     for(ch = 0; ch < s->channels; ch++)
1232         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
1233
1234     return 0;
1235 }
1236
1237 static av_cold int aac_encode_init(AVCodecContext *avctx)
1238 {
1239     AACEncContext *s = avctx->priv_data;
1240     int i, ret = 0;
1241     const uint8_t *sizes[2];
1242     uint8_t grouping[AAC_MAX_CHANNELS];
1243     int lengths[2];
1244
1245     /* Constants */
1246     s->last_frame_pb_count = 0;
1247     avctx->frame_size = 1024;
1248     avctx->initial_padding = 1024;
1249     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
1250
1251     /* Channel map and unspecified bitrate guessing */
1252     s->channels = avctx->ch_layout.nb_channels;
1253
1254     s->needs_pce = 1;
1255     for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
1256         if (!av_channel_layout_compare(&avctx->ch_layout, &aac_normal_chan_layouts[i])) {
1257             s->needs_pce = s->options.pce;
1258             break;
1259         }
1260     }
1261
1262     if (s->needs_pce) {
1263         char buf[64];
1264         for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
1265             if (!av_channel_layout_compare(&avctx->ch_layout, &aac_pce_configs[i].layout))
1266                 break;
1267         av_channel_layout_describe(&avctx->ch_layout, buf, sizeof(buf));
1268         if (i == FF_ARRAY_ELEMS(aac_pce_configs)) {
1269             av_log(avctx, AV_LOG_ERROR, "Unsupported channel layout \"%s\"\n", buf);
1270             return AVERROR(EINVAL);
1271         }
1272         av_log(avctx, AV_LOG_INFO, "Using a PCE to encode channel layout \"%s\"\n", buf);
1273         s->pce = aac_pce_configs[i];
1274         s->reorder_map = s->pce.reorder_map;
1275         s->chan_map = s->pce.config_map;
1276     } else {
1277         s->reorder_map = aac_chan_maps[s->channels - 1];
1278         s->chan_map = aac_chan_configs[s->channels - 1];
1279     }
1280
1281     if (!avctx->bit_rate) {
1282         for (i = 1; i <= s->chan_map[0]; i++) {
1283             avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
1284                                s->chan_map[i] == TYPE_LFE ? 16000  : /* LFE  */
1285                                                             69000  ; /* SCE  */
1286         }
1287     }
1288
1289     /* Samplerate */
1290     for (i = 0; i < 16; i++)
1291         if (avctx->sample_rate == ff_mpeg4audio_sample_rates[i])
1292             break;
1293     s->samplerate_index = i;
1294     ERROR_IF(s->samplerate_index == 16 ||
1295              s->samplerate_index >= ff_aac_swb_size_1024_len ||
1296              s->samplerate_index >= ff_aac_swb_size_128_len,
1297              "Unsupported sample rate %d\n", avctx->sample_rate);
1298
1299     /* Bitrate limiting */
1300     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
1301              "Too many bits %f > %d per frame requested, clamping to max\n",
1302              1024.0 * avctx->bit_rate / avctx->sample_rate,
1303              6144 * s->channels);
1304     avctx->bit_rate = (int64_t)FFMIN(6144 * s->channels / 1024.0 * avctx->sample_rate,
1305                                      avctx->bit_rate);
1306
1307     /* Profile and option setting */
1308     avctx->profile = avctx->profile == AV_PROFILE_UNKNOWN ? AV_PROFILE_AAC_LOW :
1309                      avctx->profile;
1310     for (i = 0; i < FF_ARRAY_ELEMS(aacenc_profiles); i++)
1311         if (avctx->profile == aacenc_profiles[i])
1312             break;
1313     if (avctx->profile == AV_PROFILE_MPEG2_AAC_LOW) {
1314         avctx->profile = AV_PROFILE_AAC_LOW;
1315         ERROR_IF(s->options.pred,
1316                  "Main prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1317         ERROR_IF(s->options.ltp,
1318                  "LTP prediction unavailable in the \"mpeg2_aac_low\" profile\n");
1319         WARN_IF(s->options.pns,
1320                 "PNS unavailable in the \"mpeg2_aac_low\" profile, turning off\n");
1321         s->options.pns = 0;
1322     } else if (avctx->profile == AV_PROFILE_AAC_LTP) {
1323         s->options.ltp = 1;
1324         ERROR_IF(s->options.pred,
1325                  "Main prediction unavailable in the \"aac_ltp\" profile\n");
1326     } else if (avctx->profile == AV_PROFILE_AAC_MAIN) {
1327         s->options.pred = 1;
1328         ERROR_IF(s->options.ltp,
1329                  "LTP prediction unavailable in the \"aac_main\" profile\n");
1330     } else if (s->options.ltp) {
1331         avctx->profile = AV_PROFILE_AAC_LTP;
1332         WARN_IF(1,
1333                 "Chainging profile to \"aac_ltp\"\n");
1334         ERROR_IF(s->options.pred,
1335                  "Main prediction unavailable in the \"aac_ltp\" profile\n");
1336     } else if (s->options.pred) {
1337         avctx->profile = AV_PROFILE_AAC_MAIN;
1338         WARN_IF(1,
1339                 "Chainging profile to \"aac_main\"\n");
1340         ERROR_IF(s->options.ltp,
1341                  "LTP prediction unavailable in the \"aac_main\" profile\n");
1342     }
1343     s->profile = avctx->profile;
1344
1345     /* Coder limitations */
1346     s->coder = &ff_aac_coders[s->options.coder];
1347     if (s->options.coder == AAC_CODER_ANMR) {
1348         ERROR_IF(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
1349                  "The ANMR coder is considered experimental, add -strict -2 to enable!\n");
1350         s->options.intensity_stereo = 0;
1351         s->options.pns = 0;
1352     }
1353     ERROR_IF(s->options.ltp && avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL,
1354              "The LPT profile requires experimental compliance, add -strict -2 to enable!\n");
1355
1356     /* M/S introduces horrible artifacts with multichannel files, this is temporary */
1357     if (s->channels > 3)
1358         s->options.mid_side = 0;
1359
1360     // Initialize static tables
1361     ff_aac_float_common_init();
1362
1363     if ((ret = dsp_init(avctx, s)) < 0)
1364         return ret;
1365
1366     if ((ret = alloc_buffers(avctx, s)) < 0)
1367         return ret;
1368
1369     if ((ret = put_audio_specific_config(avctx)))
1370         return ret;
1371
1372     sizes[0]   = ff_aac_swb_size_1024[s->samplerate_index];
1373     sizes[1]   = ff_aac_swb_size_128[s->samplerate_index];
1374     lengths[0] = ff_aac_num_swb_1024[s->samplerate_index];
1375     lengths[1] = ff_aac_num_swb_128[s->samplerate_index];
1376     for (i = 0; i < s->chan_map[0]; i++)
1377         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
1378     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
1379                            s->chan_map[0], grouping)) < 0)
1380         return ret;
1381     s->psypp = ff_psy_preprocess_init(avctx);
1382     ff_lpc_init(&s->lpc, 2*avctx->frame_size, TNS_MAX_ORDER, FF_LPC_TYPE_LEVINSON);
1383     s->random_state = 0x1f2e3d4c;
1384
1385     ff_aacenc_dsp_init(&s->aacdsp);
1386
1387     ff_af_queue_init(avctx, &s->afq);
1388
1389     return 0;
1390 }
1391
1392 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1393 static const AVOption aacenc_options[] = {
1394     {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, .unit = "coder"},
1395         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"},
1396         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"},
1397         {"fast",     "Fast search",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, .unit = "coder"},
1398     {"aac_ms", "Force M/S stereo coding", offsetof(AACEncContext, options.mid_side), AV_OPT_TYPE_BOOL, {.i64 = -1}, -1, 1, AACENC_FLAGS},
1399     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1400     {"aac_pns", "Perceptual noise substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1401     {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
1402     {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1403     {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1404     {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
1405     FF_AAC_PROFILE_OPTS
1406     {NULL}
1407 };
1408
1409 static const AVClass aacenc_class = {
1410     .class_name = "AAC encoder",
1411     .item_name  = av_default_item_name,
1412     .option     = aacenc_options,
1413     .version    = LIBAVUTIL_VERSION_INT,
1414 };
1415
1416 static const FFCodecDefault aac_encode_defaults[] = {
1417     { "b", "0" },
1418     { NULL }
1419 };
1420
1421 const FFCodec ff_aac_encoder = {
1422     .p.name         = "aac",
1423     CODEC_LONG_NAME("AAC (Advanced Audio Coding)"),
1424     .p.type         = AVMEDIA_TYPE_AUDIO,
1425     .p.id           = AV_CODEC_ID_AAC,
1426     .p.capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
1427                       AV_CODEC_CAP_SMALL_LAST_FRAME,
1428     .priv_data_size = sizeof(AACEncContext),
1429     .init           = aac_encode_init,
1430     FF_CODEC_ENCODE_CB(aac_encode_frame),
1431     .close          = aac_encode_end,
1432     .defaults       = aac_encode_defaults,
1433     .p.supported_samplerates = ff_mpeg4audio_sample_rates,
1434     .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
1435     .p.sample_fmts  = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
1436                                                      AV_SAMPLE_FMT_NONE },
1437     .p.priv_class   = &aacenc_class,
1438 };