libavcodec/wmaenc.c

   1 /*
   2  * WMA compatible encoder
   3  * Copyright (c) 2007 Michael Niedermayer
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "libavutil/attributes.h"
  23
  24 #include "avcodec.h"
  25 #include "internal.h"
  26 #include "wma.h"
  27
  28 #undef NDEBUG
  29 #include <assert.h>
  30
  31
  32 static av_cold int encode_init(AVCodecContext *avctx)
  33 {
  34     WMACodecContext *s = avctx->priv_data;
  35     int i, flags1, flags2, block_align;
  36     uint8_t *extradata;
  37
  38     s->avctx = avctx;
  39
  40     if (avctx->channels > MAX_CHANNELS) {
  41         av_log(avctx, AV_LOG_ERROR,
  42                "too many channels: got %i, need %i or fewer",
  43                avctx->channels, MAX_CHANNELS);
  44         return AVERROR(EINVAL);
  45     }
  46
  47     if (avctx->sample_rate > 48000) {
  48         av_log(avctx, AV_LOG_ERROR, "sample rate is too high: %d > 48kHz",
  49                avctx->sample_rate);
  50         return AVERROR(EINVAL);
  51     }
  52
  53     if (avctx->bit_rate < 24 * 1000) {
  54         av_log(avctx, AV_LOG_ERROR,
  55                "bitrate too low: got %i, need 24000 or higher\n",
  56                avctx->bit_rate);
  57         return AVERROR(EINVAL);
  58     }
  59
  60     /* extract flag info */
  61     flags1 = 0;
  62     flags2 = 1;
  63     if (avctx->codec->id == AV_CODEC_ID_WMAV1) {
  64         extradata             = av_malloc(4);
  65         if (!extradata)
  66             return AVERROR(ENOMEM);
  67         avctx->extradata_size = 4;
  68         AV_WL16(extradata, flags1);
  69         AV_WL16(extradata + 2, flags2);
  70     } else if (avctx->codec->id == AV_CODEC_ID_WMAV2) {
  71         extradata             = av_mallocz(10);
  72         if (!extradata)
  73             return AVERROR(ENOMEM);
  74         avctx->extradata_size = 10;
  75         AV_WL32(extradata, flags1);
  76         AV_WL16(extradata + 4, flags2);
  77     } else {
  78         assert(0);
  79     }
  80     avctx->extradata          = extradata;
  81     s->use_exp_vlc            = flags2 & 0x0001;
  82     s->use_bit_reservoir      = flags2 & 0x0002;
  83     s->use_variable_block_len = flags2 & 0x0004;
  84     if (avctx->channels == 2)
  85         s->ms_stereo = 1;
  86
  87     ff_wma_init(avctx, flags2);
  88
  89     /* init MDCT */
  90     for (i = 0; i < s->nb_block_sizes; i++)
  91         ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 0, 1.0);
  92
  93     block_align        = avctx->bit_rate * (int64_t) s->frame_len /
  94                          (avctx->sample_rate * 8);
  95     block_align        = FFMIN(block_align, MAX_CODED_SUPERFRAME_SIZE);
  96     avctx->block_align = block_align;
  97     avctx->bit_rate    = avctx->block_align * 8LL * avctx->sample_rate /
  98                          s->frame_len;
  99     avctx->frame_size = avctx->initial_padding = s->frame_len;
 100
 101     return 0;
 102 }
 103
 104 static void apply_window_and_mdct(AVCodecContext *avctx, const AVFrame *frame)
 105 {
 106     WMACodecContext *s = avctx->priv_data;
 107     float **audio      = (float **) frame->extended_data;
 108     int len            = frame->nb_samples;
 109     int window_index   = s->frame_len_bits - s->block_len_bits;
 110     FFTContext *mdct   = &s->mdct_ctx[window_index];
 111     int ch;
 112     const float *win   = s->windows[window_index];
 113     int window_len     = 1 << s->block_len_bits;
 114     float n            = 2.0 * 32768.0 / window_len;
 115
 116     for (ch = 0; ch < avctx->channels; ch++) {
 117         memcpy(s->output, s->frame_out[ch], window_len * sizeof(*s->output));
 118         s->fdsp.vector_fmul_scalar(s->frame_out[ch], audio[ch], n, len);
 119         s->fdsp.vector_fmul_reverse(&s->output[window_len], s->frame_out[ch],
 120                                     win, len);
 121         s->fdsp.vector_fmul(s->frame_out[ch], s->frame_out[ch], win, len);
 122         mdct->mdct_calc(mdct, s->coefs[ch], s->output);
 123     }
 124 }
 125
 126 // FIXME use for decoding too
 127 static void init_exp(WMACodecContext *s, int ch, const int *exp_param)
 128 {
 129     int n;
 130     const uint16_t *ptr;
 131     float v, *q, max_scale, *q_end;
 132
 133     ptr       = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
 134     q         = s->exponents[ch];
 135     q_end     = q + s->block_len;
 136     max_scale = 0;
 137     while (q < q_end) {
 138         /* XXX: use a table */
 139         v         = pow(10, *exp_param++ *(1.0 / 16.0));
 140         max_scale = FFMAX(max_scale, v);
 141         n         = *ptr++;
 142         do {
 143             *q++ = v;
 144         } while (--n);
 145     }
 146     s->max_exponent[ch] = max_scale;
 147 }
 148
 149 static void encode_exp_vlc(WMACodecContext *s, int ch, const int *exp_param)
 150 {
 151     int last_exp;
 152     const uint16_t *ptr;
 153     float *q, *q_end;
 154
 155     ptr   = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
 156     q     = s->exponents[ch];
 157     q_end = q + s->block_len;
 158     if (s->version == 1) {
 159         last_exp = *exp_param++;
 160         assert(last_exp - 10 >= 0 && last_exp - 10 < 32);
 161         put_bits(&s->pb, 5, last_exp - 10);
 162         q += *ptr++;
 163     } else
 164         last_exp = 36;
 165     while (q < q_end) {
 166         int exp  = *exp_param++;
 167         int code = exp - last_exp + 60;
 168         assert(code >= 0 && code < 120);
 169         put_bits(&s->pb, ff_aac_scalefactor_bits[code],
 170                  ff_aac_scalefactor_code[code]);
 171         /* XXX: use a table */
 172         q       += *ptr++;
 173         last_exp = exp;
 174     }
 175 }
 176
 177 static int encode_block(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
 178                         int total_gain)
 179 {
 180     int v, bsize, ch, coef_nb_bits, parse_exponents;
 181     float mdct_norm;
 182     int nb_coefs[MAX_CHANNELS];
 183     static const int fixed_exp[25] = {
 184         20, 20, 20, 20, 20,
 185         20, 20, 20, 20, 20,
 186         20, 20, 20, 20, 20,
 187         20, 20, 20, 20, 20,
 188         20, 20, 20, 20, 20
 189     };
 190
 191     // FIXME remove duplication relative to decoder
 192     if (s->use_variable_block_len) {
 193         assert(0); // FIXME not implemented
 194     } else {
 195         /* fixed block len */
 196         s->next_block_len_bits = s->frame_len_bits;
 197         s->prev_block_len_bits = s->frame_len_bits;
 198         s->block_len_bits      = s->frame_len_bits;
 199     }
 200
 201     s->block_len = 1 << s->block_len_bits;
 202 //     assert((s->block_pos + s->block_len) <= s->frame_len);
 203     bsize = s->frame_len_bits - s->block_len_bits;
 204
 205     // FIXME factor
 206     v = s->coefs_end[bsize] - s->coefs_start;
 207     for (ch = 0; ch < s->avctx->channels; ch++)
 208         nb_coefs[ch] = v;
 209     {
 210         int n4 = s->block_len / 2;
 211         mdct_norm = 1.0 / (float) n4;
 212         if (s->version == 1)
 213             mdct_norm *= sqrt(n4);
 214     }
 215
 216     if (s->avctx->channels == 2)
 217         put_bits(&s->pb, 1, !!s->ms_stereo);
 218
 219     for (ch = 0; ch < s->avctx->channels; ch++) {
 220         // FIXME only set channel_coded when needed, instead of always
 221         s->channel_coded[ch] = 1;
 222         if (s->channel_coded[ch])
 223             init_exp(s, ch, fixed_exp);
 224     }
 225
 226     for (ch = 0; ch < s->avctx->channels; ch++) {
 227         if (s->channel_coded[ch]) {
 228             WMACoef *coefs1;
 229             float *coefs, *exponents, mult;
 230             int i, n;
 231
 232             coefs1    = s->coefs1[ch];
 233             exponents = s->exponents[ch];
 234             mult      = pow(10, total_gain * 0.05) / s->max_exponent[ch];
 235             mult     *= mdct_norm;
 236             coefs     = src_coefs[ch];
 237             if (s->use_noise_coding && 0) {
 238                 assert(0); // FIXME not implemented
 239             } else {
 240                 coefs += s->coefs_start;
 241                 n      = nb_coefs[ch];
 242                 for (i = 0; i < n; i++) {
 243                     double t = *coefs++ / (exponents[i] * mult);
 244                     if (t < -32768 || t > 32767)
 245                         return -1;
 246
 247                     coefs1[i] = lrint(t);
 248                 }
 249             }
 250         }
 251     }
 252
 253     v = 0;
 254     for (ch = 0; ch < s->avctx->channels; ch++) {
 255         int a = s->channel_coded[ch];
 256         put_bits(&s->pb, 1, a);
 257         v |= a;
 258     }
 259
 260     if (!v)
 261         return 1;
 262
 263     for (v = total_gain - 1; v >= 127; v -= 127)
 264         put_bits(&s->pb, 7, 127);
 265     put_bits(&s->pb, 7, v);
 266
 267     coef_nb_bits = ff_wma_total_gain_to_bits(total_gain);
 268
 269     if (s->use_noise_coding) {
 270         for (ch = 0; ch < s->avctx->channels; ch++) {
 271             if (s->channel_coded[ch]) {
 272                 int i, n;
 273                 n = s->exponent_high_sizes[bsize];
 274                 for (i = 0; i < n; i++) {
 275                     put_bits(&s->pb, 1, s->high_band_coded[ch][i] = 0);
 276                     if (0)
 277                         nb_coefs[ch] -= s->exponent_high_bands[bsize][i];
 278                 }
 279             }
 280         }
 281     }
 282
 283     parse_exponents = 1;
 284     if (s->block_len_bits != s->frame_len_bits)
 285         put_bits(&s->pb, 1, parse_exponents);
 286
 287     if (parse_exponents) {
 288         for (ch = 0; ch < s->avctx->channels; ch++) {
 289             if (s->channel_coded[ch]) {
 290                 if (s->use_exp_vlc) {
 291                     encode_exp_vlc(s, ch, fixed_exp);
 292                 } else {
 293                     assert(0); // FIXME not implemented
 294 //                    encode_exp_lsp(s, ch);
 295                 }
 296             }
 297         }
 298     } else
 299         assert(0); // FIXME not implemented
 300
 301     for (ch = 0; ch < s->avctx->channels; ch++) {
 302         if (s->channel_coded[ch]) {
 303             int run, tindex;
 304             WMACoef *ptr, *eptr;
 305             tindex = (ch == 1 && s->ms_stereo);
 306             ptr    = &s->coefs1[ch][0];
 307             eptr   = ptr + nb_coefs[ch];
 308
 309             run = 0;
 310             for (; ptr < eptr; ptr++) {
 311                 if (*ptr) {
 312                     int level     = *ptr;
 313                     int abs_level = FFABS(level);
 314                     int code      = 0;
 315                     if (abs_level <= s->coef_vlcs[tindex]->max_level)
 316                         if (run < s->coef_vlcs[tindex]->levels[abs_level - 1])
 317                             code = run + s->int_table[tindex][abs_level - 1];
 318
 319                     assert(code < s->coef_vlcs[tindex]->n);
 320                     put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[code],
 321                              s->coef_vlcs[tindex]->huffcodes[code]);
 322
 323                     if (code == 0) {
 324                         if (1 << coef_nb_bits <= abs_level)
 325                             return -1;
 326
 327                         put_bits(&s->pb, coef_nb_bits, abs_level);
 328                         put_bits(&s->pb, s->frame_len_bits, run);
 329                     }
 330                     // FIXME the sign is flipped somewhere
 331                     put_bits(&s->pb, 1, level < 0);
 332                     run = 0;
 333                 } else
 334                     run++;
 335             }
 336             if (run)
 337                 put_bits(&s->pb, s->coef_vlcs[tindex]->huffbits[1],
 338                          s->coef_vlcs[tindex]->huffcodes[1]);
 339         }
 340         if (s->version == 1 && s->avctx->channels >= 2)
 341             avpriv_align_put_bits(&s->pb);
 342     }
 343     return 0;
 344 }
 345
 346 static int encode_frame(WMACodecContext *s, float (*src_coefs)[BLOCK_MAX_SIZE],
 347                         uint8_t *buf, int buf_size, int total_gain)
 348 {
 349     init_put_bits(&s->pb, buf, buf_size);
 350
 351     if (s->use_bit_reservoir)
 352         assert(0); // FIXME not implemented
 353     else if (encode_block(s, src_coefs, total_gain) < 0)
 354         return INT_MAX;
 355
 356     avpriv_align_put_bits(&s->pb);
 357
 358     return put_bits_count(&s->pb) / 8 - s->avctx->block_align;
 359 }
 360
 361 static int encode_superframe(AVCodecContext *avctx, AVPacket *avpkt,
 362                              const AVFrame *frame, int *got_packet_ptr)
 363 {
 364     WMACodecContext *s = avctx->priv_data;
 365     int i, total_gain, ret;
 366
 367     s->block_len_bits = s->frame_len_bits; // required by non variable block len
 368     s->block_len      = 1 << s->block_len_bits;
 369
 370     apply_window_and_mdct(avctx, frame);
 371
 372     if (s->ms_stereo) {
 373         float a, b;
 374         int i;
 375
 376         for (i = 0; i < s->block_len; i++) {
 377             a              = s->coefs[0][i] * 0.5;
 378             b              = s->coefs[1][i] * 0.5;
 379             s->coefs[0][i] = a + b;
 380             s->coefs[1][i] = a - b;
 381         }
 382     }
 383
 384     if ((ret = ff_alloc_packet(avpkt, 2 * MAX_CODED_SUPERFRAME_SIZE))) {
 385         av_log(avctx, AV_LOG_ERROR, "Error getting output packet\n");
 386         return ret;
 387     }
 388
 389     total_gain = 128;
 390     for (i = 64; i; i >>= 1) {
 391         int error = encode_frame(s, s->coefs, avpkt->data, avpkt->size,
 392                                  total_gain - i);
 393         if (error < 0)
 394             total_gain -= i;
 395     }
 396
 397     if ((i = encode_frame(s, s->coefs, avpkt->data, avpkt->size, total_gain)) >= 0) {
 398         av_log(avctx, AV_LOG_ERROR, "required frame size too large. please "
 399                                     "use a higher bit rate.\n");
 400         return AVERROR(EINVAL);
 401     }
 402     assert((put_bits_count(&s->pb) & 7) == 0);
 403     while (i++)
 404         put_bits(&s->pb, 8, 'N');
 405
 406     flush_put_bits(&s->pb);
 407
 408     if (frame->pts != AV_NOPTS_VALUE)
 409         avpkt->pts = frame->pts - ff_samples_to_time_base(avctx, avctx->initial_padding);
 410
 411     avpkt->size     = avctx->block_align;
 412     *got_packet_ptr = 1;
 413     return 0;
 414 }
 415
 416 AVCodec ff_wmav1_encoder = {
 417     .name           = "wmav1",
 418     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 1"),
 419     .type           = AVMEDIA_TYPE_AUDIO,
 420     .id             = AV_CODEC_ID_WMAV1,
 421     .priv_data_size = sizeof(WMACodecContext),
 422     .init           = encode_init,
 423     .encode2        = encode_superframe,
 424     .close          = ff_wma_end,
 425     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
 426                                                       AV_SAMPLE_FMT_NONE },
 427 };
 428
 429 AVCodec ff_wmav2_encoder = {
 430     .name           = "wmav2",
 431     .long_name      = NULL_IF_CONFIG_SMALL("Windows Media Audio 2"),
 432     .type           = AVMEDIA_TYPE_AUDIO,
 433     .id             = AV_CODEC_ID_WMAV2,
 434     .priv_data_size = sizeof(WMACodecContext),
 435     .init           = encode_init,
 436     .encode2        = encode_superframe,
 437     .close          = ff_wma_end,
 438     .sample_fmts    = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLTP,
 439                                                       AV_SAMPLE_FMT_NONE },
 440 };