2 * Lagarith lossless decoder
3 * Copyright (c) 2009 Nathan Caldwell <saintdev (at) gmail.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * Lagarith lossless decoder
25 * @author Nathan Caldwell
30 #include "libavutil/thread.h"
33 #include "codec_internal.h"
36 #include "lagarithrac.h"
37 #include "lossless_videodsp.h"
42 enum LagarithFrameType
{
43 FRAME_RAW
= 1, /**< uncompressed */
44 FRAME_U_RGB24
= 2, /**< unaligned RGB24 */
45 FRAME_ARITH_YUY2
= 3, /**< arithmetic coded YUY2 */
46 FRAME_ARITH_RGB24
= 4, /**< arithmetic coded RGB24 */
47 FRAME_SOLID_GRAY
= 5, /**< solid grayscale color frame */
48 FRAME_SOLID_COLOR
= 6, /**< solid non-grayscale color frame */
49 FRAME_OLD_ARITH_RGB
= 7, /**< obsolete arithmetic coded RGB (no longer encoded by upstream since version 1.1.0) */
50 FRAME_ARITH_RGBA
= 8, /**< arithmetic coded RGBA */
51 FRAME_SOLID_RGBA
= 9, /**< solid RGBA color frame */
52 FRAME_ARITH_YV12
= 10, /**< arithmetic coded YV12 */
53 FRAME_REDUCED_RES
= 11, /**< reduced resolution YV12 frame */
56 typedef struct LagarithContext
{
57 AVCodecContext
*avctx
;
58 LLVidDSPContext llviddsp
;
59 int zeros
; /**< number of consecutive zero bytes encountered */
60 int zeros_rem
; /**< number of zero bytes remaining to output */
63 static VLCElem lag_tab
[1 << VLC_BITS
];
65 static const uint8_t lag_bits
[] = {
66 7, 7, 2, 7, 3, 4, 5, 6, 7, 7, 7, 7, 7, 6, 7, 4, 5, 7, 7, 7, 7,
67 5, 6, 7, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7,
68 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
71 static const uint8_t lag_codes
[] = {
72 0x01, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04, 0x05,
73 0x08, 0x09, 0x0A, 0x0B, 0x0B, 0x0B, 0x0B, 0x10, 0x11, 0x12, 0x13,
74 0x13, 0x13, 0x14, 0x15, 0x20, 0x21, 0x22, 0x23, 0x23, 0x24, 0x25,
75 0x28, 0x29, 0x2A, 0x2B, 0x2B, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45,
76 0x48, 0x49, 0x4A, 0x4B, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55,
79 static const uint8_t lag_symbols
[] = {
80 20, 12, 0, 12, 1, 2, 4, 7, 7, 28, 4, 25, 17,
81 10, 17, 3, 6, 2, 23, 15, 15, 5, 9, 10, 31, 1, 22,
82 14, 14, 8, 9, 30, 6, 27, 19, 11, 19, 0, 21, 13, 13,
83 8, 29, 5, 26, 18, 18, 3, 24, 16, 16, 11, 32,
86 static av_cold
void lag_init_static_data(void)
88 VLC_INIT_STATIC_SPARSE_TABLE(lag_tab
, VLC_BITS
, FF_ARRAY_ELEMS(lag_bits
),
89 lag_bits
, 1, 1, lag_codes
, 1, 1, lag_symbols
, 1, 1, 0);
93 * Compute the 52-bit mantissa of 1/(double)denom.
94 * This crazy format uses floats in an entropy coder and we have to match x86
95 * rounding exactly, thus ordinary floats aren't portable enough.
96 * @param denom denominator
97 * @return 52-bit mantissa
100 static uint64_t softfloat_reciprocal(uint32_t denom
)
102 int shift
= av_log2(denom
- 1) + 1;
103 uint64_t ret
= (1ULL << 52) / denom
;
104 uint64_t err
= (1ULL << 52) - ret
* denom
;
108 return ret
+ err
/ denom
;
112 * (uint32_t)(x*f), where f has the given mantissa, and exponent 0
113 * Used in combination with softfloat_reciprocal computes x/(double)denom.
114 * @param x 32-bit integer factor
115 * @param mantissa mantissa of f with exponent 0
116 * @return 32-bit integer value (x*f)
117 * @see softfloat_reciprocal
119 static uint32_t softfloat_mul(uint32_t x
, uint64_t mantissa
)
121 uint64_t l
= x
* (mantissa
& 0xffffffff);
122 uint64_t h
= x
* (mantissa
>> 32);
125 l
+= 1LL << av_log2(h
>> 21);
130 static uint8_t lag_calc_zero_run(int8_t x
)
132 return (x
* 2) ^ (x
>> 7);
135 static int lag_decode_prob(GetBitContext
*gb
, uint32_t *value
)
139 bits
= get_vlc2(gb
, lag_tab
, VLC_BITS
, 1);
142 return AVERROR_INVALIDDATA
;
143 } else if (bits
== 0) {
148 val
= get_bits_long(gb
, bits
);
156 static int lag_read_prob_header(lag_rac
*rac
, GetBitContext
*gb
)
158 int i
, j
, scale_factor
;
159 unsigned prob
, cumulative_target
;
160 unsigned cumul_prob
= 0;
161 unsigned scaled_cumul_prob
= 0;
165 rac
->prob
[257] = UINT_MAX
;
166 /* Read probabilities from bitstream */
167 for (i
= 1; i
< 257; i
++) {
168 if (lag_decode_prob(gb
, &rac
->prob
[i
]) < 0) {
169 av_log(rac
->logctx
, AV_LOG_ERROR
, "Invalid probability encountered.\n");
170 return AVERROR_INVALIDDATA
;
172 if ((uint64_t)cumul_prob
+ rac
->prob
[i
] > UINT_MAX
) {
173 av_log(rac
->logctx
, AV_LOG_ERROR
, "Integer overflow encountered in cumulative probability calculation.\n");
174 return AVERROR_INVALIDDATA
;
176 cumul_prob
+= rac
->prob
[i
];
178 if (lag_decode_prob(gb
, &prob
)) {
179 av_log(rac
->logctx
, AV_LOG_ERROR
, "Invalid probability run encountered.\n");
180 return AVERROR_INVALIDDATA
;
184 for (j
= 0; j
< prob
; j
++)
192 av_log(rac
->logctx
, AV_LOG_ERROR
, "All probabilities are 0!\n");
193 return AVERROR_INVALIDDATA
;
196 if (nnz
== 1 && (show_bits_long(gb
, 32) & 0xFFFFFF)) {
197 return AVERROR_INVALIDDATA
;
200 /* Scale probabilities so cumulative probability is an even power of 2. */
201 scale_factor
= av_log2(cumul_prob
);
203 if (cumul_prob
& (cumul_prob
- 1)) {
204 uint64_t mul
= softfloat_reciprocal(cumul_prob
);
205 for (i
= 1; i
<= 128; i
++) {
206 rac
->prob
[i
] = softfloat_mul(rac
->prob
[i
], mul
);
207 scaled_cumul_prob
+= rac
->prob
[i
];
209 if (scaled_cumul_prob
<= 0) {
210 av_log(rac
->logctx
, AV_LOG_ERROR
, "Scaled probabilities invalid\n");
211 return AVERROR_INVALIDDATA
;
213 for (; i
< 257; i
++) {
214 rac
->prob
[i
] = softfloat_mul(rac
->prob
[i
], mul
);
215 scaled_cumul_prob
+= rac
->prob
[i
];
219 if (scale_factor
>= 32U)
220 return AVERROR_INVALIDDATA
;
221 cumulative_target
= 1U << scale_factor
;
223 if (scaled_cumul_prob
> cumulative_target
) {
224 av_log(rac
->logctx
, AV_LOG_ERROR
,
225 "Scaled probabilities are larger than target!\n");
226 return AVERROR_INVALIDDATA
;
229 scaled_cumul_prob
= cumulative_target
- scaled_cumul_prob
;
231 for (i
= 1; scaled_cumul_prob
; i
= (i
& 0x7f) + 1) {
236 /* Comment from reference source:
237 * if (b & 0x80 == 0) { // order of operations is 'wrong'; it has been left this way
238 * // since the compression change is negligible and fixing it
239 * // breaks backwards compatibility
240 * b =- (signed int)b;
250 if (scale_factor
> 23)
251 return AVERROR_INVALIDDATA
;
253 rac
->scale
= scale_factor
;
255 /* Fill probability array with cumulative probability for each symbol. */
256 for (i
= 1; i
< 257; i
++)
257 rac
->prob
[i
] += rac
->prob
[i
- 1];
262 static void add_lag_median_prediction(uint8_t *dst
, uint8_t *src1
,
263 uint8_t *diff
, int w
, int *left
,
266 /* This is almost identical to add_hfyu_median_pred in huffyuvdsp.h.
267 * However the &0xFF on the gradient predictor yields incorrect output
276 for (i
= 0; i
< w
; i
++) {
277 l
= mid_pred(l
, src1
[i
], l
+ src1
[i
] - lt
) + diff
[i
];
286 static void lag_pred_line(LagarithContext
*l
, uint8_t *buf
,
287 int width
, int stride
, int line
)
292 /* Left prediction only for first line */
293 L
= l
->llviddsp
.add_left_pred(buf
, buf
, width
, 0);
295 /* Left pixel is actually prev_row[width] */
296 L
= buf
[width
- stride
- 1];
299 /* Second line, left predict first pixel, the rest of the line is median predicted
300 * NOTE: In the case of RGB this pixel is top predicted */
301 TL
= l
->avctx
->pix_fmt
== AV_PIX_FMT_YUV420P
? buf
[-stride
] : L
;
303 /* Top left is 2 rows back, last pixel */
304 TL
= buf
[width
- (2 * stride
) - 1];
307 add_lag_median_prediction(buf
, buf
- stride
, buf
,
312 static void lag_pred_line_yuy2(LagarithContext
*l
, uint8_t *buf
,
313 int width
, int stride
, int line
,
322 l
->llviddsp
.add_left_pred(buf
, buf
, width
, 0);
328 const int HEAD
= is_luma
? 4 : 2;
331 L
= buf
[width
- stride
- 1];
332 TL
= buf
[HEAD
- stride
- 1];
333 for (i
= 0; i
< HEAD
; i
++) {
337 for (; i
< width
; i
++) {
338 L
= mid_pred(L
& 0xFF, buf
[i
- stride
], (L
+ buf
[i
- stride
] - TL
) & 0xFF) + buf
[i
];
339 TL
= buf
[i
- stride
];
343 TL
= buf
[width
- (2 * stride
) - 1];
344 L
= buf
[width
- stride
- 1];
345 l
->llviddsp
.add_median_pred(buf
, buf
- stride
, buf
, width
, &L
, &TL
);
349 static int lag_decode_line(LagarithContext
*l
, lag_rac
*rac
,
350 uint8_t *dst
, int width
, int stride
,
359 /* Output any zeros remaining from the previous run */
362 int count
= FFMIN(l
->zeros_rem
, width
- i
);
363 memset(dst
+ i
, 0, count
);
365 l
->zeros_rem
-= count
;
369 dst
[i
] = lag_get_rac(rac
);
378 if (l
->zeros
== esc_count
) {
379 int index
= lag_get_rac(rac
);
384 l
->zeros_rem
= lag_calc_zero_run(index
);
391 static int lag_decode_zero_run_line(LagarithContext
*l
, uint8_t *dst
,
392 const uint8_t *src
, const uint8_t *src_end
,
393 int width
, int esc_count
)
397 uint8_t zero_run
= 0;
398 const uint8_t *src_start
= src
;
399 uint8_t mask1
= -(esc_count
< 2);
400 uint8_t mask2
= -(esc_count
< 3);
401 uint8_t *end
= dst
+ (width
- 2);
403 avpriv_request_sample(l
->avctx
, "zero_run_line");
405 memset(dst
, 0, width
);
409 count
= FFMIN(l
->zeros_rem
, width
- i
);
410 if (end
- dst
< count
) {
411 av_log(l
->avctx
, AV_LOG_ERROR
, "Too many zeros remaining.\n");
412 return AVERROR_INVALIDDATA
;
415 memset(dst
, 0, count
);
416 l
->zeros_rem
-= count
;
422 while (!zero_run
&& dst
+ i
< end
) {
424 if (i
+2 >= src_end
- src
)
425 return AVERROR_INVALIDDATA
;
427 !(src
[i
] | (src
[i
+ 1] & mask1
) | (src
[i
+ 2] & mask2
));
434 return AVERROR_INVALIDDATA
;
437 l
->zeros_rem
= lag_calc_zero_run(src
[i
]);
447 return src
- src_start
;
452 static int lag_decode_arith_plane(LagarithContext
*l
, uint8_t *dst
,
453 int width
, int height
, int stride
,
454 const uint8_t *src
, int src_size
)
463 const uint8_t *src_end
= src
+ src_size
;
466 rac
.logctx
= l
->avctx
;
470 return AVERROR_INVALIDDATA
;
474 length
= width
* height
;
476 return AVERROR_INVALIDDATA
;
477 if (esc_count
&& AV_RL32(src
+ 1) < length
) {
478 length
= AV_RL32(src
+ 1);
482 if ((ret
= init_get_bits8(&gb
, src
+ offset
, src_size
- offset
)) < 0)
485 if ((ret
= lag_read_prob_header(&rac
, &gb
)) < 0)
488 ff_lag_rac_init(&rac
, &gb
, length
- stride
);
489 for (i
= 0; i
< height
; i
++) {
490 if (rac
.overread
> MAX_OVERREAD
)
491 return AVERROR_INVALIDDATA
;
492 read
+= lag_decode_line(l
, &rac
, dst
+ (i
* stride
), width
,
497 av_log(l
->avctx
, AV_LOG_WARNING
,
498 "Output more bytes than length (%d of %"PRIu32
")\n", read
,
500 } else if (esc_count
< 8) {
505 /* Zero run coding only, no range coding. */
506 for (i
= 0; i
< height
; i
++) {
507 int res
= lag_decode_zero_run_line(l
, dst
+ (i
* stride
), src
,
508 src_end
, width
, esc_count
);
514 if (src_size
< width
* height
)
515 return AVERROR_INVALIDDATA
; // buffer not big enough
516 /* Plane is stored uncompressed */
517 for (i
= 0; i
< height
; i
++) {
518 memcpy(dst
+ (i
* stride
), src
, width
);
522 } else if (esc_count
== 0xff) {
523 /* Plane is a solid run of given value */
524 for (i
= 0; i
< height
; i
++)
525 memset(dst
+ i
* stride
, src
[1], width
);
526 /* Do not apply prediction.
527 Note: memset to 0 above, setting first value to src[1]
528 and applying prediction gives the same result. */
531 av_log(l
->avctx
, AV_LOG_ERROR
,
532 "Invalid zero run escape code! (%#x)\n", esc_count
);
533 return AVERROR_INVALIDDATA
;
536 if (l
->avctx
->pix_fmt
!= AV_PIX_FMT_YUV422P
) {
537 for (i
= 0; i
< height
; i
++) {
538 lag_pred_line(l
, dst
, width
, stride
, i
);
542 for (i
= 0; i
< height
; i
++) {
543 lag_pred_line_yuy2(l
, dst
, width
, stride
, i
,
544 width
== l
->avctx
->width
);
554 * @param avctx codec context
555 * @param data output AVFrame
556 * @param data_size size of output data or 0 if no picture is returned
557 * @param avpkt input packet
558 * @return number of consumed bytes on success or negative if decode fails
560 static int lag_decode_frame(AVCodecContext
*avctx
, AVFrame
*p
,
561 int *got_frame
, AVPacket
*avpkt
)
563 const uint8_t *buf
= avpkt
->data
;
564 unsigned int buf_size
= avpkt
->size
;
565 LagarithContext
*l
= avctx
->priv_data
;
567 uint32_t offset_gu
= 0, offset_bv
= 0, offset_ry
= 9;
570 int i
, j
, planes
= 3;
575 offset_gu
= AV_RL32(buf
+ 1);
576 offset_bv
= AV_RL32(buf
+ 5);
579 case FRAME_SOLID_RGBA
:
580 avctx
->pix_fmt
= AV_PIX_FMT_GBRAP
;
581 case FRAME_SOLID_GRAY
:
582 if (frametype
== FRAME_SOLID_GRAY
)
583 if (avctx
->bits_per_coded_sample
== 24) {
584 avctx
->pix_fmt
= AV_PIX_FMT_GBRP
;
586 avctx
->pix_fmt
= AV_PIX_FMT_GBRAP
;
590 if ((ret
= ff_thread_get_buffer(avctx
, p
, 0)) < 0)
593 if (frametype
== FRAME_SOLID_RGBA
) {
594 for (i
= 0; i
< avctx
->height
; i
++) {
595 memset(p
->data
[0] + i
* p
->linesize
[0], buf
[2], avctx
->width
);
596 memset(p
->data
[1] + i
* p
->linesize
[1], buf
[1], avctx
->width
);
597 memset(p
->data
[2] + i
* p
->linesize
[2], buf
[3], avctx
->width
);
598 memset(p
->data
[3] + i
* p
->linesize
[3], buf
[4], avctx
->width
);
601 for (i
= 0; i
< avctx
->height
; i
++) {
602 for (j
= 0; j
< planes
; j
++)
603 memset(p
->data
[j
] + i
* p
->linesize
[j
], buf
[1], avctx
->width
);
607 case FRAME_SOLID_COLOR
:
608 if (avctx
->bits_per_coded_sample
== 24) {
609 avctx
->pix_fmt
= AV_PIX_FMT_GBRP
;
611 avctx
->pix_fmt
= AV_PIX_FMT_GBRAP
;
614 if ((ret
= ff_thread_get_buffer(avctx
, p
,0)) < 0)
617 for (i
= 0; i
< avctx
->height
; i
++) {
618 memset(p
->data
[0] + i
* p
->linesize
[0], buf
[2], avctx
->width
);
619 memset(p
->data
[1] + i
* p
->linesize
[1], buf
[1], avctx
->width
);
620 memset(p
->data
[2] + i
* p
->linesize
[2], buf
[3], avctx
->width
);
621 if (avctx
->pix_fmt
== AV_PIX_FMT_GBRAP
)
622 memset(p
->data
[3] + i
* p
->linesize
[3], 0xFFu
, avctx
->width
);
625 case FRAME_ARITH_RGBA
:
626 avctx
->pix_fmt
= AV_PIX_FMT_GBRAP
;
629 offs
[3] = AV_RL32(buf
+ 9);
630 case FRAME_ARITH_RGB24
:
632 if (frametype
== FRAME_ARITH_RGB24
|| frametype
== FRAME_U_RGB24
)
633 avctx
->pix_fmt
= AV_PIX_FMT_GBRP
;
635 if ((ret
= ff_thread_get_buffer(avctx
, p
, 0)) < 0)
642 for (i
= 0; i
< planes
; i
++)
643 srcs
[i
] = p
->data
[i
] + (avctx
->height
- 1) * p
->linesize
[i
];
644 for (i
= 0; i
< planes
; i
++)
645 if (buf_size
<= offs
[i
]) {
646 av_log(avctx
, AV_LOG_ERROR
,
647 "Invalid frame offsets\n");
648 return AVERROR_INVALIDDATA
;
651 for (i
= 0; i
< planes
; i
++) {
652 ret
= lag_decode_arith_plane(l
, srcs
[i
],
653 avctx
->width
, avctx
->height
,
654 -p
->linesize
[i
], buf
+ offs
[i
],
659 for (i
= 0; i
< avctx
->height
; i
++) {
660 l
->llviddsp
.add_bytes(p
->data
[0] + i
* p
->linesize
[0], p
->data
[1] + i
* p
->linesize
[1], avctx
->width
);
661 l
->llviddsp
.add_bytes(p
->data
[2] + i
* p
->linesize
[2], p
->data
[1] + i
* p
->linesize
[1], avctx
->width
);
663 FFSWAP(uint8_t*, p
->data
[0], p
->data
[1]);
664 FFSWAP(int, p
->linesize
[0], p
->linesize
[1]);
665 FFSWAP(uint8_t*, p
->data
[2], p
->data
[1]);
666 FFSWAP(int, p
->linesize
[2], p
->linesize
[1]);
668 case FRAME_ARITH_YUY2
:
669 avctx
->pix_fmt
= AV_PIX_FMT_YUV422P
;
671 if ((ret
= ff_thread_get_buffer(avctx
, p
, 0)) < 0)
674 if (offset_ry
>= buf_size
||
675 offset_gu
>= buf_size
||
676 offset_bv
>= buf_size
) {
677 av_log(avctx
, AV_LOG_ERROR
,
678 "Invalid frame offsets\n");
679 return AVERROR_INVALIDDATA
;
682 ret
= lag_decode_arith_plane(l
, p
->data
[0], avctx
->width
, avctx
->height
,
683 p
->linesize
[0], buf
+ offset_ry
,
684 buf_size
- offset_ry
);
687 ret
= lag_decode_arith_plane(l
, p
->data
[1], (avctx
->width
+ 1) / 2,
688 avctx
->height
, p
->linesize
[1],
689 buf
+ offset_gu
, buf_size
- offset_gu
);
692 ret
= lag_decode_arith_plane(l
, p
->data
[2], (avctx
->width
+ 1) / 2,
693 avctx
->height
, p
->linesize
[2],
694 buf
+ offset_bv
, buf_size
- offset_bv
);
696 case FRAME_ARITH_YV12
:
697 avctx
->pix_fmt
= AV_PIX_FMT_YUV420P
;
699 if ((ret
= ff_thread_get_buffer(avctx
, p
, 0)) < 0)
702 if (offset_ry
>= buf_size
||
703 offset_gu
>= buf_size
||
704 offset_bv
>= buf_size
) {
705 av_log(avctx
, AV_LOG_ERROR
,
706 "Invalid frame offsets\n");
707 return AVERROR_INVALIDDATA
;
710 ret
= lag_decode_arith_plane(l
, p
->data
[0], avctx
->width
, avctx
->height
,
711 p
->linesize
[0], buf
+ offset_ry
,
712 buf_size
- offset_ry
);
715 ret
= lag_decode_arith_plane(l
, p
->data
[2], (avctx
->width
+ 1) / 2,
716 (avctx
->height
+ 1) / 2, p
->linesize
[2],
717 buf
+ offset_gu
, buf_size
- offset_gu
);
720 ret
= lag_decode_arith_plane(l
, p
->data
[1], (avctx
->width
+ 1) / 2,
721 (avctx
->height
+ 1) / 2, p
->linesize
[1],
722 buf
+ offset_bv
, buf_size
- offset_bv
);
725 av_log(avctx
, AV_LOG_ERROR
,
726 "Unsupported Lagarith frame type: %#"PRIx8
"\n", frametype
);
727 return AVERROR_PATCHWELCOME
;
738 static av_cold
int lag_decode_init(AVCodecContext
*avctx
)
740 static AVOnce init_static_once
= AV_ONCE_INIT
;
741 LagarithContext
*l
= avctx
->priv_data
;
744 ff_llviddsp_init(&l
->llviddsp
);
745 ff_thread_once(&init_static_once
, lag_init_static_data
);
750 const FFCodec ff_lagarith_decoder
= {
751 .p
.name
= "lagarith",
752 CODEC_LONG_NAME("Lagarith lossless"),
753 .p
.type
= AVMEDIA_TYPE_VIDEO
,
754 .p
.id
= AV_CODEC_ID_LAGARITH
,
755 .priv_data_size
= sizeof(LagarithContext
),
756 .init
= lag_decode_init
,
757 FF_CODEC_DECODE_CB(lag_decode_frame
),
758 .p
.capabilities
= AV_CODEC_CAP_DR1
| AV_CODEC_CAP_FRAME_THREADS
,