2 * 3GPP TS 26.245 Timed Text decoder
3 * Copyright (c) 2012 Philip Langdale <philipl@overt.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/opt.h"
25 #include "libavutil/avstring.h"
26 #include "libavutil/common.h"
27 #include "libavutil/bprint.h"
28 #include "libavutil/intreadwrite.h"
29 #include "libavutil/mem.h"
30 #include "bytestream.h"
31 #include "codec_internal.h"
33 #define STYLE_FLAG_BOLD (1<<0)
34 #define STYLE_FLAG_ITALIC (1<<1)
35 #define STYLE_FLAG_UNDERLINE (1<<2)
37 #define BOX_SIZE_INITIAL 40
39 #define STYL_BOX (1<<0)
40 #define HLIT_BOX (1<<1)
41 #define HCLR_BOX (1<<2)
42 #define TWRP_BOX (1<<3)
45 #define BOTTOM_CENTER 2
46 #define BOTTOM_RIGHT 3
48 #define MIDDLE_CENTER 5
49 #define MIDDLE_RIGHT 6
54 #define RGB_TO_BGR(c) (((c) & 0xff) << 16 | ((c) & 0xff00) | (((c) >> 16) & 0xff))
88 uint8_t hlit_color
[4];
104 uint16_t style_entries
, ftab_entries
;
113 int (*decode
)(const uint8_t *tsmb
, MovTextContext
*m
, uint64_t size
);
116 static void mov_text_cleanup(MovTextContext
*m
)
118 if (m
->box_flags
& STYL_BOX
) {
120 m
->style_entries
= 0;
124 static void mov_text_cleanup_ftab(MovTextContext
*m
)
126 for (unsigned i
= 0; i
< m
->ftab_entries
; i
++)
127 av_freep(&m
->ftab
[i
].font
);
132 static void mov_text_parse_style_record(StyleBox
*style
, const uint8_t **ptr
)
135 style
->font_id
= bytestream_get_be16(ptr
);
137 style
->flags
= bytestream_get_byte(ptr
);
138 style
->bold
= !!(style
->flags
& STYLE_FLAG_BOLD
);
139 style
->italic
= !!(style
->flags
& STYLE_FLAG_ITALIC
);
140 style
->underline
= !!(style
->flags
& STYLE_FLAG_UNDERLINE
);
142 style
->fontsize
= bytestream_get_byte(ptr
);
144 style
->color
= bytestream_get_be24(ptr
);
145 style
->color
= RGB_TO_BGR(style
->color
);
146 style
->alpha
= bytestream_get_byte(ptr
);
149 static int mov_text_tx3g(AVCodecContext
*avctx
, MovTextContext
*m
)
151 const uint8_t *tx3g_ptr
= avctx
->extradata
;
152 int i
, j
= -1, font_length
, remaining
= avctx
->extradata_size
- BOX_SIZE_INITIAL
;
153 int8_t v_align
, h_align
;
154 unsigned ftab_entries
;
163 h_align
= bytestream_get_byte(&tx3g_ptr
);
164 v_align
= bytestream_get_byte(&tx3g_ptr
);
167 m
->d
.alignment
= TOP_LEFT
;
169 m
->d
.alignment
= MIDDLE_LEFT
;
171 m
->d
.alignment
= BOTTOM_LEFT
;
175 m
->d
.alignment
= TOP_CENTER
;
177 m
->d
.alignment
= MIDDLE_CENTER
;
179 m
->d
.alignment
= BOTTOM_CENTER
;
183 m
->d
.alignment
= TOP_RIGHT
;
185 m
->d
.alignment
= MIDDLE_RIGHT
;
187 m
->d
.alignment
= BOTTOM_RIGHT
;
190 m
->d
.back_color
= bytestream_get_be24(&tx3g_ptr
);
191 m
->d
.back_color
= RGB_TO_BGR(m
->d
.back_color
);
192 m
->d
.back_alpha
= bytestream_get_byte(&tx3g_ptr
);
197 mov_text_parse_style_record(&m
->d
.style
, &tx3g_ptr
);
204 // In case of broken header, init default font
205 m
->d
.font
= ASS_DEFAULT_FONT
;
207 ftab_entries
= bytestream_get_be16(&tx3g_ptr
);
210 remaining
-= 3 * ftab_entries
;
212 return AVERROR_INVALIDDATA
;
213 m
->ftab
= av_calloc(ftab_entries
, sizeof(*m
->ftab
));
215 return AVERROR(ENOMEM
);
216 m
->ftab_entries
= ftab_entries
;
218 for (i
= 0; i
< m
->ftab_entries
; i
++) {
219 m
->ftab
[i
].font_id
= bytestream_get_be16(&tx3g_ptr
);
220 if (m
->ftab
[i
].font_id
== m
->d
.style
.font_id
)
222 font_length
= bytestream_get_byte(&tx3g_ptr
);
224 remaining
-= font_length
;
226 mov_text_cleanup_ftab(m
);
229 m
->ftab
[i
].font
= av_malloc(font_length
+ 1);
230 if (!m
->ftab
[i
].font
) {
231 mov_text_cleanup_ftab(m
);
232 return AVERROR(ENOMEM
);
234 bytestream_get_buffer(&tx3g_ptr
, m
->ftab
[i
].font
, font_length
);
235 m
->ftab
[i
].font
[font_length
] = '\0';
238 m
->d
.font
= m
->ftab
[j
].font
;
242 static int decode_twrp(const uint8_t *tsmb
, MovTextContext
*m
, uint64_t size
)
244 m
->box_flags
|= TWRP_BOX
;
245 m
->w
.wrap_flag
= bytestream_get_byte(&tsmb
);
249 static int decode_hlit(const uint8_t *tsmb
, MovTextContext
*m
, uint64_t size
)
251 m
->box_flags
|= HLIT_BOX
;
252 m
->h
.hlit_start
= bytestream_get_be16(&tsmb
);
253 m
->h
.hlit_end
= bytestream_get_be16(&tsmb
);
257 static int decode_hclr(const uint8_t *tsmb
, MovTextContext
*m
, uint64_t size
)
259 m
->box_flags
|= HCLR_BOX
;
260 bytestream_get_buffer(&tsmb
, m
->c
.hlit_color
, 4);
264 static int styles_equivalent(const StyleBox
*a
, const StyleBox
*b
)
266 #define CMP(field) ((a)->field == (b)->field)
267 return CMP(bold
) && CMP(italic
) && CMP(underline
) && CMP(color
) &&
268 CMP(alpha
) && CMP(fontsize
) && CMP(font_id
);
272 static int decode_styl(const uint8_t *tsmb
, MovTextContext
*m
, uint64_t size
)
275 int style_entries
= bytestream_get_be16(&tsmb
);
278 // A single style record is of length 12 bytes.
279 if (2 + style_entries
* 12 > size
)
282 tmp
= av_realloc_array(m
->s
, style_entries
, sizeof(*m
->s
));
284 return AVERROR(ENOMEM
);
286 m
->style_entries
= style_entries
;
288 m
->box_flags
|= STYL_BOX
;
289 for(i
= 0; i
< m
->style_entries
; i
++) {
290 StyleBox
*style
= &m
->s
[i
];
292 style
->start
= bytestream_get_be16(&tsmb
);
293 style
->end
= bytestream_get_be16(&tsmb
);
294 if (style
->end
< style
->start
||
295 (i
&& style
->start
< m
->s
[i
- 1].end
)) {
297 return AVERROR_INVALIDDATA
;
299 if (style
->start
== style
->end
) {
300 /* Skip this style as it applies to no character */
307 mov_text_parse_style_record(style
, &tsmb
);
308 if (styles_equivalent(style
, &m
->d
.style
)) {
309 /* Skip this style as it is equivalent to the default style */
313 } else if (i
&& style
->start
== style
[-1].end
&&
314 styles_equivalent(style
, &style
[-1])) {
315 /* Merge the two adjacent styles */
316 style
[-1].end
= style
->end
;
325 static const Box box_types
[] = {
326 { MKBETAG('s','t','y','l'), 2, decode_styl
},
327 { MKBETAG('h','l','i','t'), 4, decode_hlit
},
328 { MKBETAG('h','c','l','r'), 4, decode_hclr
},
329 { MKBETAG('t','w','r','p'), 1, decode_twrp
}
332 const static size_t box_count
= FF_ARRAY_ELEMS(box_types
);
334 // Return byte length of the UTF-8 sequence starting at text[0]. 0 on error.
335 static int get_utf8_length_at(const char *text
, const char *text_end
)
337 const char *start
= text
;
340 GET_UTF8(c
, text
< text_end
? (uint8_t)*text
++ : (err
= 1, 0), goto error
;);
348 static int text_to_ass(AVBPrint
*buf
, const char *text
, const char *text_end
,
349 AVCodecContext
*avctx
)
351 MovTextContext
*m
= avctx
->priv_data
;
352 const StyleBox
*const default_style
= &m
->d
.style
;
356 int color
= default_style
->color
;
358 if (text
< text_end
&& m
->box_flags
& TWRP_BOX
) {
359 if (m
->w
.wrap_flag
== 1) {
360 av_bprintf(buf
, "{\\q1}"); /* End of line wrap */
362 av_bprintf(buf
, "{\\q2}"); /* No wrap */
366 while (text
< text_end
) {
369 if ((m
->box_flags
& STYL_BOX
) && entry
< m
->style_entries
) {
370 const StyleBox
*style
= &m
->s
[entry
];
371 if (text_pos
== style
->end
) {
372 av_bprintf(buf
, "{\\r}");
373 color
= default_style
->color
;
377 if (entry
< m
->style_entries
&& text_pos
== style
->start
) {
378 if (style
->bold
^ default_style
->bold
)
379 av_bprintf(buf
, "{\\b%d}", style
->bold
);
380 if (style
->italic
^ default_style
->italic
)
381 av_bprintf(buf
, "{\\i%d}", style
->italic
);
382 if (style
->underline
^ default_style
->underline
)
383 av_bprintf(buf
, "{\\u%d}", style
->underline
);
384 if (style
->fontsize
!= default_style
->fontsize
)
385 av_bprintf(buf
, "{\\fs%d}", style
->fontsize
);
386 if (style
->font_id
!= default_style
->font_id
)
387 for (i
= 0; i
< m
->ftab_entries
; i
++) {
388 if (style
->font_id
== m
->ftab
[i
].font_id
)
389 av_bprintf(buf
, "{\\fn%s}", m
->ftab
[i
].font
);
391 if (default_style
->color
!= style
->color
) {
392 color
= style
->color
;
393 av_bprintf(buf
, "{\\1c&H%X&}", color
);
395 if (default_style
->alpha
!= style
->alpha
)
396 av_bprintf(buf
, "{\\1a&H%02X&}", 255 - style
->alpha
);
399 if (m
->box_flags
& HLIT_BOX
) {
400 if (text_pos
== m
->h
.hlit_start
) {
401 /* If hclr box is present, set the secondary color to the color
402 * specified. Otherwise, set primary color to white and secondary
403 * color to black. These colors will come from TextSampleModifier
404 * boxes in future and inverse video technique for highlight will
407 if (m
->box_flags
& HCLR_BOX
) {
408 av_bprintf(buf
, "{\\2c&H%02x%02x%02x&}", m
->c
.hlit_color
[2],
409 m
->c
.hlit_color
[1], m
->c
.hlit_color
[0]);
411 av_bprintf(buf
, "{\\1c&H000000&}{\\2c&HFFFFFF&}");
414 if (text_pos
== m
->h
.hlit_end
) {
415 if (m
->box_flags
& HCLR_BOX
) {
416 av_bprintf(buf
, "{\\2c&H%X&}", default_style
->color
);
418 av_bprintf(buf
, "{\\1c&H%X&}{\\2c&H%X&}",
419 color
, default_style
->color
);
424 len
= get_utf8_length_at(text
, text_end
);
426 av_log(avctx
, AV_LOG_ERROR
, "invalid UTF-8 byte in subtitle\n");
433 av_bprintf(buf
, "\\N");
436 av_bprint_append_data(buf
, text
, len
);
446 static int mov_text_init(AVCodecContext
*avctx
) {
448 * TODO: Handle the default text style.
449 * NB: Most players ignore styles completely, with the result that
450 * it's very common to find files where the default style is broken
451 * and respecting it results in a worse experience than ignoring it.
454 MovTextContext
*m
= avctx
->priv_data
;
455 ret
= mov_text_tx3g(avctx
, m
);
457 const StyleBox
*const default_style
= &m
->d
.style
;
458 if (!m
->frame_width
|| !m
->frame_height
) {
459 m
->frame_width
= ASS_DEFAULT_PLAYRESX
;
460 m
->frame_height
= ASS_DEFAULT_PLAYRESY
;
462 return ff_ass_subtitle_header_full(avctx
,
463 m
->frame_width
, m
->frame_height
,
464 m
->d
.font
, default_style
->fontsize
,
465 (255U - default_style
->alpha
) << 24 | default_style
->color
,
466 (255U - default_style
->alpha
) << 24 | default_style
->color
,
467 (255U - m
->d
.back_alpha
) << 24 | m
->d
.back_color
,
468 (255U - m
->d
.back_alpha
) << 24 | m
->d
.back_color
,
469 default_style
->bold
, default_style
->italic
, default_style
->underline
,
470 ASS_DEFAULT_BORDERSTYLE
, m
->d
.alignment
);
472 return ff_ass_subtitle_header_default(avctx
);
475 static int mov_text_decode_frame(AVCodecContext
*avctx
, AVSubtitle
*sub
,
476 int *got_sub_ptr
, const AVPacket
*avpkt
)
478 MovTextContext
*m
= avctx
->priv_data
;
481 const char *ptr
= avpkt
->data
, *end
;
485 if (!ptr
|| avpkt
->size
< 2)
486 return AVERROR_INVALIDDATA
;
489 * A packet of size two with value zero is an empty subtitle
490 * used to mark the end of the previous non-empty subtitle.
491 * We can just drop them here as we have duration information
492 * already. If the value is non-zero, then it's technically a
495 if (avpkt
->size
== 2)
496 return AV_RB16(ptr
) == 0 ? 0 : AVERROR_INVALIDDATA
;
499 * The first two bytes of the packet are the length of the text string
500 * In complex cases, there are style descriptors appended to the string
501 * so we can't just assume the packet size is the string size.
503 text_length
= AV_RB16(ptr
);
504 end
= ptr
+ FFMIN(2 + text_length
, avpkt
->size
);
509 m
->style_entries
= 0;
511 // Note that the spec recommends lines be no longer than 2048 characters.
512 av_bprint_init(&buf
, 0, AV_BPRINT_SIZE_UNLIMITED
);
513 if (text_length
+ 2 < avpkt
->size
) {
514 const uint8_t *tsmb
= end
;
515 const uint8_t *const tsmb_end
= avpkt
->data
+ avpkt
->size
;
516 // A box is a minimum of 8 bytes.
517 while (tsmb_end
- tsmb
>= 8) {
518 uint64_t tsmb_size
= bytestream_get_be32(&tsmb
);
519 uint32_t tsmb_type
= bytestream_get_be32(&tsmb
);
520 int size_var
, ret_tsmb
;
522 if (tsmb_size
== 1) {
523 if (tsmb_end
- tsmb
< 8)
525 tsmb_size
= bytestream_get_be64(&tsmb
);
529 //size_var is equal to 8 or 16 depending on the size of box
531 if (tsmb_size
< size_var
) {
532 av_log(avctx
, AV_LOG_ERROR
, "tsmb_size invalid\n");
533 return AVERROR_INVALIDDATA
;
535 tsmb_size
-= size_var
;
537 if (tsmb_end
- tsmb
< tsmb_size
)
540 for (i
= 0; i
< box_count
; i
++) {
541 if (tsmb_type
== box_types
[i
].type
) {
542 if (tsmb_size
< box_types
[i
].base_size
)
544 ret_tsmb
= box_types
[i
].decode(tsmb
, m
, tsmb_size
);
551 text_to_ass(&buf
, ptr
, end
, avctx
);
554 text_to_ass(&buf
, ptr
, end
, avctx
);
556 ret
= ff_ass_add_rect(sub
, buf
.str
, m
->readorder
++, 0, NULL
, NULL
);
557 av_bprint_finalize(&buf
, NULL
);
560 *got_sub_ptr
= sub
->num_rects
> 0;
564 static int mov_text_decode_close(AVCodecContext
*avctx
)
566 MovTextContext
*m
= avctx
->priv_data
;
567 mov_text_cleanup_ftab(m
);
572 static void mov_text_flush(AVCodecContext
*avctx
)
574 MovTextContext
*m
= avctx
->priv_data
;
575 if (!(avctx
->flags2
& AV_CODEC_FLAG2_RO_FLUSH_NOOP
))
579 #define OFFSET(x) offsetof(MovTextContext, x)
580 #define FLAGS AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_SUBTITLE_PARAM
581 static const AVOption options
[] = {
582 { "width", "Frame width, usually video width", OFFSET(frame_width
), AV_OPT_TYPE_INT
, {.i64
=0}, 0, INT_MAX
, FLAGS
},
583 { "height", "Frame height, usually video height", OFFSET(frame_height
), AV_OPT_TYPE_INT
, {.i64
=0}, 0, INT_MAX
, FLAGS
},
587 static const AVClass mov_text_decoder_class
= {
588 .class_name
= "MOV text decoder",
589 .item_name
= av_default_item_name
,
591 .version
= LIBAVUTIL_VERSION_INT
,
594 const FFCodec ff_movtext_decoder
= {
595 .p
.name
= "mov_text",
596 CODEC_LONG_NAME("3GPP Timed Text subtitle"),
597 .p
.type
= AVMEDIA_TYPE_SUBTITLE
,
598 .p
.id
= AV_CODEC_ID_MOV_TEXT
,
599 .priv_data_size
= sizeof(MovTextContext
),
600 .p
.priv_class
= &mov_text_decoder_class
,
601 .init
= mov_text_init
,
602 FF_CODEC_DECODE_SUB_CB(mov_text_decode_frame
),
603 .close
= mov_text_decode_close
,
604 .flush
= mov_text_flush
,