libavformat/mp3enc.c

   1 /*
   2  * MP3 muxer
   3  * Copyright (c) 2003 Fabrice Bellard
   4  *
   5  * This file is part of Libav.
   6  *
   7  * Libav is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * Libav is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with Libav; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "avformat.h"
  23 #include "avio_internal.h"
  24 #include "id3v1.h"
  25 #include "id3v2.h"
  26 #include "rawenc.h"
  27 #include "libavutil/avstring.h"
  28 #include "libavcodec/mpegaudio.h"
  29 #include "libavcodec/mpegaudiodata.h"
  30 #include "libavcodec/mpegaudiodecheader.h"
  31 #include "libavutil/intreadwrite.h"
  32 #include "libavutil/opt.h"
  33 #include "libavutil/dict.h"
  34 #include "libavutil/avassert.h"
  35
  36 static int id3v1_set_string(AVFormatContext *s, const char *key,
  37                             uint8_t *buf, int buf_size)
  38 {
  39     AVDictionaryEntry *tag;
  40     if ((tag = av_dict_get(s->metadata, key, NULL, 0)))
  41         av_strlcpy(buf, tag->value, buf_size);
  42     return !!tag;
  43 }
  44
  45 static int id3v1_create_tag(AVFormatContext *s, uint8_t *buf)
  46 {
  47     AVDictionaryEntry *tag;
  48     int i, count = 0;
  49
  50     memset(buf, 0, ID3v1_TAG_SIZE); /* fail safe */
  51     buf[0] = 'T';
  52     buf[1] = 'A';
  53     buf[2] = 'G';
  54     count += id3v1_set_string(s, "TIT2",    buf +  3, 30);       //title
  55     count += id3v1_set_string(s, "TPE1",    buf + 33, 30);       //author|artist
  56     count += id3v1_set_string(s, "TALB",    buf + 63, 30);       //album
  57     count += id3v1_set_string(s, "TDRL",    buf + 93,  4);       //date
  58     count += id3v1_set_string(s, "comment", buf + 97, 30);
  59     if ((tag = av_dict_get(s->metadata, "TRCK", NULL, 0))) { //track
  60         buf[125] = 0;
  61         buf[126] = atoi(tag->value);
  62         count++;
  63     }
  64     buf[127] = 0xFF; /* default to unknown genre */
  65     if ((tag = av_dict_get(s->metadata, "TCON", NULL, 0))) { //genre
  66         for(i = 0; i <= ID3v1_GENRE_MAX; i++) {
  67             if (!av_strcasecmp(tag->value, ff_id3v1_genre_str[i])) {
  68                 buf[127] = i;
  69                 count++;
  70                 break;
  71             }
  72         }
  73     }
  74     return count;
  75 }
  76
  77 #define XING_NUM_BAGS 400
  78 #define XING_TOC_SIZE 100
  79 // maximum size of the xing frame: offset/Xing/flags/frames/size/TOC
  80 #define XING_MAX_SIZE (32 + 4 + 4 + 4 + 4 + XING_TOC_SIZE)
  81
  82 typedef struct MP3Context {
  83     const AVClass *class;
  84     ID3v2EncContext id3;
  85     int id3v2_version;
  86     int write_id3v1;
  87
  88     /* xing header */
  89     int64_t xing_offset;
  90     int32_t frames;
  91     int32_t size;
  92     uint32_t want;
  93     uint32_t seen;
  94     uint32_t pos;
  95     uint64_t bag[XING_NUM_BAGS];
  96     int initial_bitrate;
  97     int has_variable_bitrate;
  98
  99     /* index of the audio stream */
 100     int audio_stream_idx;
 101     /* number of attached pictures we still need to write */
 102     int pics_to_write;
 103
 104     /* audio packets are queued here until we get all the attached pictures */
 105     AVPacketList *queue, *queue_end;
 106 } MP3Context;
 107
 108 static const uint8_t xing_offtbl[2][2] = {{32, 17}, {17, 9}};
 109
 110 /*
 111  * Write an empty XING header and initialize respective data.
 112  */
 113 static void mp3_write_xing(AVFormatContext *s)
 114 {
 115     MP3Context       *mp3 = s->priv_data;
 116     AVCodecContext *codec = s->streams[mp3->audio_stream_idx]->codec;
 117     int32_t        header;
 118     MPADecodeHeader  mpah;
 119     int srate_idx, i, channels;
 120     int bitrate_idx;
 121     int xing_offset;
 122     int ver = 0;
 123
 124     if (!s->pb->seekable)
 125         return;
 126
 127     for (i = 0; i < FF_ARRAY_ELEMS(avpriv_mpa_freq_tab); i++) {
 128         const uint16_t base_freq = avpriv_mpa_freq_tab[i];
 129
 130         if      (codec->sample_rate == base_freq)     ver = 0x3; // MPEG 1
 131         else if (codec->sample_rate == base_freq / 2) ver = 0x2; // MPEG 2
 132         else if (codec->sample_rate == base_freq / 4) ver = 0x0; // MPEG 2.5
 133         else continue;
 134
 135         srate_idx = i;
 136         break;
 137     }
 138     if (i == FF_ARRAY_ELEMS(avpriv_mpa_freq_tab)) {
 139         av_log(s, AV_LOG_WARNING, "Unsupported sample rate, not writing Xing "
 140                "header.\n");
 141         return;
 142     }
 143
 144     switch (codec->channels) {
 145     case 1:  channels = MPA_MONO;                                          break;
 146     case 2:  channels = MPA_STEREO;                                        break;
 147     default: av_log(s, AV_LOG_WARNING, "Unsupported number of channels, "
 148                     "not writing Xing header.\n");
 149              return;
 150     }
 151
 152     /* 64 kbps frame, should be large enough */
 153     bitrate_idx = (ver == 3) ? 5 : 8;
 154
 155     /* dummy MPEG audio header */
 156     header  =  0xff                                  << 24; // sync
 157     header |= (0x7 << 5 | ver << 3 | 0x1 << 1 | 0x1) << 16; // sync/audio-version/layer 3/no crc*/
 158     header |= (bitrate_idx << 4 | srate_idx << 2)    <<  8;
 159     header |= channels << 6;
 160     avio_wb32(s->pb, header);
 161
 162     avpriv_mpegaudio_decode_header(&mpah, header);
 163
 164     av_assert0(mpah.frame_size >= XING_MAX_SIZE);
 165
 166     xing_offset = xing_offtbl[ver != 3][codec->channels == 1];
 167     ffio_fill(s->pb, 0, xing_offset);
 168     mp3->xing_offset = avio_tell(s->pb);
 169     ffio_wfourcc(s->pb, "Xing");
 170     avio_wb32(s->pb, 0x01 | 0x02 | 0x04);  // frames / size / TOC
 171
 172     mp3->size = mpah.frame_size;
 173     mp3->want = 1;
 174
 175     avio_wb32(s->pb, 0);  // frames
 176     avio_wb32(s->pb, 0);  // size
 177
 178     // TOC
 179     for (i = 0; i < XING_TOC_SIZE; i++)
 180         avio_w8(s->pb, 255 * i / XING_TOC_SIZE);
 181
 182     mpah.frame_size -= 4 + xing_offset + 4 + 4 + 4 + 4 + XING_TOC_SIZE;
 183     ffio_fill(s->pb, 0, mpah.frame_size);
 184 }
 185
 186 /*
 187  * Add a frame to XING data.
 188  * Following lame's "VbrTag.c".
 189  */
 190 static void mp3_xing_add_frame(MP3Context *mp3, AVPacket *pkt)
 191 {
 192     int i;
 193
 194     mp3->frames++;
 195     mp3->seen++;
 196     mp3->size += pkt->size;
 197
 198     if (mp3->want == mp3->seen) {
 199         mp3->bag[mp3->pos] = mp3->size;
 200
 201         if (XING_NUM_BAGS == ++mp3->pos) {
 202             /* shrink table to half size by throwing away each second bag. */
 203             for (i = 1; i < XING_NUM_BAGS; i += 2)
 204                 mp3->bag[i / 2] = mp3->bag[i];
 205
 206             /* double wanted amount per bag. */
 207             mp3->want *= 2;
 208             /* adjust current position to half of table size. */
 209             mp3->pos = XING_NUM_BAGS / 2;
 210         }
 211
 212         mp3->seen = 0;
 213     }
 214 }
 215
 216 static int mp3_write_audio_packet(AVFormatContext *s, AVPacket *pkt)
 217 {
 218     MP3Context  *mp3 = s->priv_data;
 219
 220     if (mp3->xing_offset && pkt->size >= 4) {
 221         MPADecodeHeader c;
 222
 223         avpriv_mpegaudio_decode_header(&c, AV_RB32(pkt->data));
 224
 225         if (!mp3->initial_bitrate)
 226             mp3->initial_bitrate = c.bit_rate;
 227         if ((c.bit_rate == 0) || (mp3->initial_bitrate != c.bit_rate))
 228             mp3->has_variable_bitrate = 1;
 229
 230         mp3_xing_add_frame(mp3, pkt);
 231     }
 232
 233     return ff_raw_write_packet(s, pkt);
 234 }
 235
 236 static int mp3_queue_flush(AVFormatContext *s)
 237 {
 238     MP3Context *mp3 = s->priv_data;
 239     AVPacketList *pktl;
 240     int ret = 0, write = 1;
 241
 242     ff_id3v2_finish(&mp3->id3, s->pb);
 243     mp3_write_xing(s);
 244
 245     while ((pktl = mp3->queue)) {
 246         if (write && (ret = mp3_write_audio_packet(s, &pktl->pkt)) < 0)
 247             write = 0;
 248         av_free_packet(&pktl->pkt);
 249         mp3->queue = pktl->next;
 250         av_freep(&pktl);
 251     }
 252     mp3->queue_end = NULL;
 253     return ret;
 254 }
 255
 256 static void mp3_update_xing(AVFormatContext *s)
 257 {
 258     MP3Context  *mp3 = s->priv_data;
 259     int i;
 260
 261     /* replace "Xing" identification string with "Info" for CBR files. */
 262     if (!mp3->has_variable_bitrate) {
 263         avio_seek(s->pb, mp3->xing_offset, SEEK_SET);
 264         ffio_wfourcc(s->pb, "Info");
 265     }
 266
 267     avio_seek(s->pb, mp3->xing_offset + 8, SEEK_SET);
 268     avio_wb32(s->pb, mp3->frames);
 269     avio_wb32(s->pb, mp3->size);
 270
 271     avio_w8(s->pb, 0);  // first toc entry has to be zero.
 272
 273     for (i = 1; i < XING_TOC_SIZE; ++i) {
 274         int j = i * mp3->pos / XING_TOC_SIZE;
 275         int seek_point = 256LL * mp3->bag[j] / mp3->size;
 276         avio_w8(s->pb, FFMIN(seek_point, 255));
 277     }
 278
 279     avio_seek(s->pb, 0, SEEK_END);
 280 }
 281
 282 static int mp3_write_trailer(struct AVFormatContext *s)
 283 {
 284     uint8_t buf[ID3v1_TAG_SIZE];
 285     MP3Context *mp3 = s->priv_data;
 286
 287     if (mp3->pics_to_write) {
 288         av_log(s, AV_LOG_WARNING, "No packets were sent for some of the "
 289                "attached pictures.\n");
 290         mp3_queue_flush(s);
 291     }
 292
 293     /* write the id3v1 tag */
 294     if (mp3->write_id3v1 && id3v1_create_tag(s, buf) > 0) {
 295         avio_write(s->pb, buf, ID3v1_TAG_SIZE);
 296     }
 297
 298     if (mp3->xing_offset)
 299         mp3_update_xing(s);
 300
 301     return 0;
 302 }
 303
 304 #if CONFIG_MP2_MUXER
 305 AVOutputFormat ff_mp2_muxer = {
 306     .name              = "mp2",
 307     .long_name         = NULL_IF_CONFIG_SMALL("MP2 (MPEG audio layer 2)"),
 308     .mime_type         = "audio/x-mpeg",
 309     .extensions        = "mp2,m2a",
 310     .audio_codec       = AV_CODEC_ID_MP2,
 311     .video_codec       = AV_CODEC_ID_NONE,
 312     .write_packet      = ff_raw_write_packet,
 313     .flags             = AVFMT_NOTIMESTAMPS,
 314 };
 315 #endif
 316
 317 #if CONFIG_MP3_MUXER
 318
 319 static const AVOption options[] = {
 320     { "id3v2_version", "Select ID3v2 version to write. Currently 3 and 4 are supported.",
 321       offsetof(MP3Context, id3v2_version), AV_OPT_TYPE_INT, {.i64 = 4}, 3, 4, AV_OPT_FLAG_ENCODING_PARAM},
 322     { "write_id3v1", "Enable ID3v1 writing. ID3v1 tags are written in UTF-8 which may not be supported by most software.",
 323       offsetof(MP3Context, write_id3v1), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AV_OPT_FLAG_ENCODING_PARAM},
 324     { NULL },
 325 };
 326
 327 static const AVClass mp3_muxer_class = {
 328     .class_name     = "MP3 muxer",
 329     .item_name      = av_default_item_name,
 330     .option         = options,
 331     .version        = LIBAVUTIL_VERSION_INT,
 332 };
 333
 334 static int mp3_write_packet(AVFormatContext *s, AVPacket *pkt)
 335 {
 336     MP3Context *mp3 = s->priv_data;
 337
 338     if (pkt->stream_index == mp3->audio_stream_idx) {
 339         if (mp3->pics_to_write) {
 340             /* buffer audio packets until we get all the pictures */
 341             AVPacketList *pktl = av_mallocz(sizeof(*pktl));
 342             if (!pktl)
 343                 return AVERROR(ENOMEM);
 344
 345             pktl->pkt     = *pkt;
 346             pktl->pkt.buf = av_buffer_ref(pkt->buf);
 347             if (!pktl->pkt.buf) {
 348                 av_freep(&pktl);
 349                 return AVERROR(ENOMEM);
 350             }
 351
 352             if (mp3->queue_end)
 353                 mp3->queue_end->next = pktl;
 354             else
 355                 mp3->queue = pktl;
 356             mp3->queue_end = pktl;
 357         } else
 358             return mp3_write_audio_packet(s, pkt);
 359     } else {
 360         int ret;
 361
 362         /* warn only once for each stream */
 363         if (s->streams[pkt->stream_index]->nb_frames == 1) {
 364             av_log(s, AV_LOG_WARNING, "Got more than one picture in stream %d,"
 365                    " ignoring.\n", pkt->stream_index);
 366         }
 367         if (!mp3->pics_to_write || s->streams[pkt->stream_index]->nb_frames >= 1)
 368             return 0;
 369
 370         if ((ret = ff_id3v2_write_apic(s, &mp3->id3, pkt)) < 0)
 371             return ret;
 372         mp3->pics_to_write--;
 373
 374         /* flush the buffered audio packets */
 375         if (!mp3->pics_to_write &&
 376             (ret = mp3_queue_flush(s)) < 0)
 377             return ret;
 378     }
 379
 380     return 0;
 381 }
 382
 383 /**
 384  * Write an ID3v2 header at beginning of stream
 385  */
 386
 387 static int mp3_write_header(struct AVFormatContext *s)
 388 {
 389     MP3Context  *mp3 = s->priv_data;
 390     int ret, i;
 391
 392     /* check the streams -- we want exactly one audio and arbitrary number of
 393      * video (attached pictures) */
 394     mp3->audio_stream_idx = -1;
 395     for (i = 0; i < s->nb_streams; i++) {
 396         AVStream *st = s->streams[i];
 397         if (st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
 398             if (mp3->audio_stream_idx >= 0 || st->codec->codec_id != AV_CODEC_ID_MP3) {
 399                 av_log(s, AV_LOG_ERROR, "Invalid audio stream. Exactly one MP3 "
 400                        "audio stream is required.\n");
 401                 return AVERROR(EINVAL);
 402             }
 403             mp3->audio_stream_idx = i;
 404         } else if (st->codec->codec_type != AVMEDIA_TYPE_VIDEO) {
 405             av_log(s, AV_LOG_ERROR, "Only audio streams and pictures are allowed in MP3.\n");
 406             return AVERROR(EINVAL);
 407         }
 408     }
 409     if (mp3->audio_stream_idx < 0) {
 410         av_log(s, AV_LOG_ERROR, "No audio stream present.\n");
 411         return AVERROR(EINVAL);
 412     }
 413     mp3->pics_to_write = s->nb_streams - 1;
 414
 415     ff_id3v2_start(&mp3->id3, s->pb, mp3->id3v2_version, ID3v2_DEFAULT_MAGIC);
 416     ret = ff_id3v2_write_metadata(s, &mp3->id3);
 417     if (ret < 0)
 418         return ret;
 419
 420     if (!mp3->pics_to_write) {
 421         ff_id3v2_finish(&mp3->id3, s->pb);
 422         mp3_write_xing(s);
 423     }
 424
 425     return 0;
 426 }
 427
 428 AVOutputFormat ff_mp3_muxer = {
 429     .name              = "mp3",
 430     .long_name         = NULL_IF_CONFIG_SMALL("MP3 (MPEG audio layer 3)"),
 431     .mime_type         = "audio/x-mpeg",
 432     .extensions        = "mp3",
 433     .priv_data_size    = sizeof(MP3Context),
 434     .audio_codec       = AV_CODEC_ID_MP3,
 435     .video_codec       = AV_CODEC_ID_PNG,
 436     .write_header      = mp3_write_header,
 437     .write_packet      = mp3_write_packet,
 438     .write_trailer     = mp3_write_trailer,
 439     .flags             = AVFMT_NOTIMESTAMPS,
 440     .priv_class        = &mp3_muxer_class,
 441 };
 442 #endif