2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of Libav.
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/avassert.h"
34 #define VP9_SYNCCODE 0x498342
37 static void vp9_frame_unref(AVCodecContext
*avctx
, VP9Frame
*f
)
39 ff_thread_release_buffer(avctx
, &f
->tf
);
40 av_buffer_unref(&f
->segmentation_map_buf
);
41 av_buffer_unref(&f
->mv_buf
);
42 f
->segmentation_map
= NULL
;
46 static int vp9_frame_alloc(AVCodecContext
*avctx
, VP9Frame
*f
)
48 VP9Context
*s
= avctx
->priv_data
;
51 ret
= ff_thread_get_buffer(avctx
, &f
->tf
, AV_GET_BUFFER_FLAG_REF
);
55 sz
= 64 * s
->sb_cols
* s
->sb_rows
;
56 f
->segmentation_map_buf
= av_buffer_allocz(sz
* sizeof(*f
->segmentation_map
));
57 f
->mv_buf
= av_buffer_allocz(sz
* sizeof(*f
->mv
));
58 if (!f
->segmentation_map_buf
|| !f
->mv_buf
) {
59 vp9_frame_unref(avctx
, f
);
60 return AVERROR(ENOMEM
);
63 f
->segmentation_map
= f
->segmentation_map_buf
->data
;
64 f
->mv
= (VP9MVRefPair
*)f
->mv_buf
->data
;
66 if (s
->segmentation
.enabled
&& !s
->segmentation
.update_map
&&
67 !s
->keyframe
&& !s
->intraonly
&& !s
->errorres
)
68 memcpy(f
->segmentation_map
, s
->frames
[LAST_FRAME
].segmentation_map
, sz
);
73 static int vp9_frame_ref(VP9Frame
*dst
, VP9Frame
*src
)
77 dst
->segmentation_map_buf
= av_buffer_ref(src
->segmentation_map_buf
);
78 dst
->mv_buf
= av_buffer_ref(src
->mv_buf
);
79 if (!dst
->segmentation_map_buf
|| !dst
->mv_buf
) {
80 ret
= AVERROR(ENOMEM
);
84 ret
= ff_thread_ref_frame(&dst
->tf
, &src
->tf
);
88 dst
->segmentation_map
= src
->segmentation_map
;
93 av_buffer_unref(&dst
->segmentation_map_buf
);
94 av_buffer_unref(&dst
->mv_buf
);
98 static void vp9_decode_flush(AVCodecContext
*avctx
)
100 VP9Context
*s
= avctx
->priv_data
;
103 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->frames
); i
++)
104 vp9_frame_unref(avctx
, &s
->frames
[i
]);
106 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->refs
); i
++)
107 ff_thread_release_buffer(avctx
, &s
->refs
[i
]);
109 s
->use_last_frame_mvs
= 0;
115 static int update_size(AVCodecContext
*avctx
, int w
, int h
)
117 VP9Context
*s
= avctx
->priv_data
;
119 int nb_blocks
, nb_superblocks
;
121 if (s
->above_partition_ctx
&& w
== s
->alloc_width
&& h
== s
->alloc_height
)
124 vp9_decode_flush(avctx
);
126 if (w
<= 0 || h
<= 0)
127 return AVERROR_INVALIDDATA
;
131 s
->sb_cols
= (w
+ 63) >> 6;
132 s
->sb_rows
= (h
+ 63) >> 6;
133 s
->cols
= (w
+ 7) >> 3;
134 s
->rows
= (h
+ 7) >> 3;
136 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
137 av_free(s
->above_partition_ctx
);
138 p
= av_malloc(s
->sb_cols
*
139 (240 + sizeof(*s
->lflvl
) + 16 * sizeof(*s
->above_mv_ctx
)));
141 return AVERROR(ENOMEM
);
142 assign(s
->above_partition_ctx
, uint8_t *, 8);
143 assign(s
->above_skip_ctx
, uint8_t *, 8);
144 assign(s
->above_txfm_ctx
, uint8_t *, 8);
145 assign(s
->above_mode_ctx
, uint8_t *, 16);
146 assign(s
->above_y_nnz_ctx
, uint8_t *, 16);
147 assign(s
->above_uv_nnz_ctx
[0], uint8_t *, 8);
148 assign(s
->above_uv_nnz_ctx
[1], uint8_t *, 8);
149 assign(s
->intra_pred_data
[0], uint8_t *, 64);
150 assign(s
->intra_pred_data
[1], uint8_t *, 32);
151 assign(s
->intra_pred_data
[2], uint8_t *, 32);
152 assign(s
->above_segpred_ctx
, uint8_t *, 8);
153 assign(s
->above_intra_ctx
, uint8_t *, 8);
154 assign(s
->above_comp_ctx
, uint8_t *, 8);
155 assign(s
->above_ref_ctx
, uint8_t *, 8);
156 assign(s
->above_filter_ctx
, uint8_t *, 8);
157 assign(s
->lflvl
, VP9Filter
*, 1);
158 assign(s
->above_mv_ctx
, VP56mv(*)[2], 16);
161 av_freep(&s
->b_base
);
162 av_freep(&s
->block_base
);
164 if (avctx
->active_thread_type
& FF_THREAD_FRAME
) {
165 nb_blocks
= s
->cols
* s
->rows
;
166 nb_superblocks
= s
->sb_cols
* s
->sb_rows
;
168 nb_blocks
= nb_superblocks
= 1;
171 s
->b_base
= av_malloc_array(nb_blocks
, sizeof(*s
->b_base
));
172 s
->block_base
= av_mallocz_array(nb_superblocks
, (64 * 64 + 128) * 3);
173 if (!s
->b_base
|| !s
->block_base
)
174 return AVERROR(ENOMEM
);
175 s
->uvblock_base
[0] = s
->block_base
+ nb_superblocks
* 64 * 64;
176 s
->uvblock_base
[1] = s
->uvblock_base
[0] + nb_superblocks
* 32 * 32;
177 s
->eob_base
= (uint8_t *)(s
->uvblock_base
[1] + nb_superblocks
* 32 * 32);
178 s
->uveob_base
[0] = s
->eob_base
+ nb_superblocks
* 256;
179 s
->uveob_base
[1] = s
->uveob_base
[0] + nb_superblocks
* 64;
187 // The sign bit is at the end, not the start, of a bit sequence
188 static av_always_inline
int get_bits_with_sign(GetBitContext
*gb
, int n
)
190 int v
= get_bits(gb
, n
);
191 return get_bits1(gb
) ? -v
: v
;
194 static av_always_inline
int inv_recenter_nonneg(int v
, int m
)
199 return m
- ((v
+ 1) >> 1);
203 // differential forward probability updates
204 static int update_prob(VP56RangeCoder
*c
, int p
)
206 static const int inv_map_table
[MAX_PROB
- 1] = {
207 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
208 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
209 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
210 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
211 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
212 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
213 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
214 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
215 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
216 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
217 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
218 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
219 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
220 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
221 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
222 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
223 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
224 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
229 /* This code is trying to do a differential probability update. For a
230 * current probability A in the range [1, 255], the difference to a new
231 * probability of any value can be expressed differentially as 1-A, 255-A
232 * where some part of this (absolute range) exists both in positive as
233 * well as the negative part, whereas another part only exists in one
234 * half. We're trying to code this shared part differentially, i.e.
235 * times two where the value of the lowest bit specifies the sign, and
236 * the single part is then coded on top of this. This absolute difference
237 * then again has a value of [0, 254], but a bigger value in this range
238 * indicates that we're further away from the original value A, so we
239 * can code this as a VLC code, since higher values are increasingly
240 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
241 * updates vs. the 'fine, exact' updates further down the range, which
242 * adds one extra dimension to this differential update model. */
244 if (!vp8_rac_get(c
)) {
245 d
= vp8_rac_get_uint(c
, 4) + 0;
246 } else if (!vp8_rac_get(c
)) {
247 d
= vp8_rac_get_uint(c
, 4) + 16;
248 } else if (!vp8_rac_get(c
)) {
249 d
= vp8_rac_get_uint(c
, 5) + 32;
251 d
= vp8_rac_get_uint(c
, 7);
253 d
= (d
<< 1) - 65 + vp8_rac_get(c
);
254 d
= av_clip(d
, 0, MAX_PROB
- 65 - 1);
260 ? 1 + inv_recenter_nonneg(inv_map_table
[d
], p
- 1)
261 : 255 - inv_recenter_nonneg(inv_map_table
[d
], 255 - p
);
264 static int decode_frame_header(AVCodecContext
*avctx
,
265 const uint8_t *data
, int size
, int *ref
)
267 VP9Context
*s
= avctx
->priv_data
;
268 int c
, i
, j
, k
, l
, m
, n
, w
, h
, max
, size2
, ret
, sharp
;
270 const uint8_t *data2
;
273 if ((ret
= init_get_bits8(&s
->gb
, data
, size
)) < 0) {
274 av_log(avctx
, AV_LOG_ERROR
, "Failed to initialize bitstream reader\n");
277 if (get_bits(&s
->gb
, 2) != 0x2) { // frame marker
278 av_log(avctx
, AV_LOG_ERROR
, "Invalid frame marker\n");
279 return AVERROR_INVALIDDATA
;
281 s
->profile
= get_bits1(&s
->gb
);
282 if (get_bits1(&s
->gb
)) { // reserved bit
283 av_log(avctx
, AV_LOG_ERROR
, "Reserved bit should be zero\n");
284 return AVERROR_INVALIDDATA
;
286 if (get_bits1(&s
->gb
)) {
287 *ref
= get_bits(&s
->gb
, 3);
291 s
->last_keyframe
= s
->keyframe
;
292 s
->keyframe
= !get_bits1(&s
->gb
);
294 last_invisible
= s
->invisible
;
295 s
->invisible
= !get_bits1(&s
->gb
);
296 s
->errorres
= get_bits1(&s
->gb
);
297 s
->use_last_frame_mvs
= !s
->errorres
&& !last_invisible
;
300 if (get_bits_long(&s
->gb
, 24) != VP9_SYNCCODE
) { // synccode
301 av_log(avctx
, AV_LOG_ERROR
, "Invalid sync code\n");
302 return AVERROR_INVALIDDATA
;
304 s
->colorspace
= get_bits(&s
->gb
, 3);
305 if (s
->colorspace
== 7) { // RGB = profile 1
306 av_log(avctx
, AV_LOG_ERROR
, "RGB not supported in profile 0\n");
307 return AVERROR_INVALIDDATA
;
309 s
->fullrange
= get_bits1(&s
->gb
);
312 if (s
->profile
== 1 || s
->profile
== 3) {
313 s
->sub_x
= get_bits1(&s
->gb
);
314 s
->sub_y
= get_bits1(&s
->gb
);
315 if (s
->sub_x
&& s
->sub_y
) {
316 av_log(avctx
, AV_LOG_ERROR
,
317 "4:2:0 color not supported in profile 1 or 3\n");
318 return AVERROR_INVALIDDATA
;
320 if (get_bits1(&s
->gb
)) { // reserved bit
321 av_log(avctx
, AV_LOG_ERROR
, "Reserved bit should be zero\n");
322 return AVERROR_INVALIDDATA
;
325 s
->sub_x
= s
->sub_y
= 1;
327 if (!s
->sub_x
|| !s
->sub_y
) {
328 avpriv_report_missing_feature(avctx
, "Subsampling %d:%d",
330 return AVERROR_PATCHWELCOME
;
333 s
->refreshrefmask
= 0xff;
334 w
= get_bits(&s
->gb
, 16) + 1;
335 h
= get_bits(&s
->gb
, 16) + 1;
336 if (get_bits1(&s
->gb
)) // display size
337 skip_bits(&s
->gb
, 32);
339 s
->intraonly
= s
->invisible
? get_bits1(&s
->gb
) : 0;
340 s
->resetctx
= s
->errorres
? 0 : get_bits(&s
->gb
, 2);
342 if (get_bits_long(&s
->gb
, 24) != VP9_SYNCCODE
) { // synccode
343 av_log(avctx
, AV_LOG_ERROR
, "Invalid sync code\n");
344 return AVERROR_INVALIDDATA
;
346 s
->refreshrefmask
= get_bits(&s
->gb
, 8);
347 w
= get_bits(&s
->gb
, 16) + 1;
348 h
= get_bits(&s
->gb
, 16) + 1;
349 if (get_bits1(&s
->gb
)) // display size
350 skip_bits(&s
->gb
, 32);
352 s
->refreshrefmask
= get_bits(&s
->gb
, 8);
353 s
->refidx
[0] = get_bits(&s
->gb
, 3);
354 s
->signbias
[0] = get_bits1(&s
->gb
);
355 s
->refidx
[1] = get_bits(&s
->gb
, 3);
356 s
->signbias
[1] = get_bits1(&s
->gb
);
357 s
->refidx
[2] = get_bits(&s
->gb
, 3);
358 s
->signbias
[2] = get_bits1(&s
->gb
);
359 if (!s
->refs
[s
->refidx
[0]].f
->buf
[0] ||
360 !s
->refs
[s
->refidx
[1]].f
->buf
[0] ||
361 !s
->refs
[s
->refidx
[2]].f
->buf
[0]) {
362 av_log(avctx
, AV_LOG_ERROR
,
363 "Not all references are available\n");
364 return AVERROR_INVALIDDATA
;
366 if (get_bits1(&s
->gb
)) {
367 w
= s
->refs
[s
->refidx
[0]].f
->width
;
368 h
= s
->refs
[s
->refidx
[0]].f
->height
;
369 } else if (get_bits1(&s
->gb
)) {
370 w
= s
->refs
[s
->refidx
[1]].f
->width
;
371 h
= s
->refs
[s
->refidx
[1]].f
->height
;
372 } else if (get_bits1(&s
->gb
)) {
373 w
= s
->refs
[s
->refidx
[2]].f
->width
;
374 h
= s
->refs
[s
->refidx
[2]].f
->height
;
376 w
= get_bits(&s
->gb
, 16) + 1;
377 h
= get_bits(&s
->gb
, 16) + 1;
379 if (get_bits1(&s
->gb
)) // display size
380 skip_bits(&s
->gb
, 32);
381 s
->highprecisionmvs
= get_bits1(&s
->gb
);
382 s
->filtermode
= get_bits1(&s
->gb
) ? FILTER_SWITCHABLE
:
384 s
->allowcompinter
= s
->signbias
[0] != s
->signbias
[1] ||
385 s
->signbias
[0] != s
->signbias
[2];
386 if (s
->allowcompinter
) {
387 if (s
->signbias
[0] == s
->signbias
[1]) {
389 s
->varcompref
[0] = 0;
390 s
->varcompref
[1] = 1;
391 } else if (s
->signbias
[0] == s
->signbias
[2]) {
393 s
->varcompref
[0] = 0;
394 s
->varcompref
[1] = 2;
397 s
->varcompref
[0] = 1;
398 s
->varcompref
[1] = 2;
404 s
->refreshctx
= s
->errorres
? 0 : get_bits1(&s
->gb
);
405 s
->parallelmode
= s
->errorres
? 1 : get_bits1(&s
->gb
);
406 s
->framectxid
= c
= get_bits(&s
->gb
, 2);
408 /* loopfilter header data */
409 s
->filter
.level
= get_bits(&s
->gb
, 6);
410 sharp
= get_bits(&s
->gb
, 3);
411 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
412 * keep the old cache values since they are still valid. */
413 if (s
->filter
.sharpness
!= sharp
)
414 memset(s
->filter
.lim_lut
, 0, sizeof(s
->filter
.lim_lut
));
415 s
->filter
.sharpness
= sharp
;
416 if ((s
->lf_delta
.enabled
= get_bits1(&s
->gb
))) {
417 if (get_bits1(&s
->gb
)) {
418 for (i
= 0; i
< 4; i
++)
419 if (get_bits1(&s
->gb
))
420 s
->lf_delta
.ref
[i
] = get_bits_with_sign(&s
->gb
, 6);
421 for (i
= 0; i
< 2; i
++)
422 if (get_bits1(&s
->gb
))
423 s
->lf_delta
.mode
[i
] = get_bits_with_sign(&s
->gb
, 6);
426 memset(&s
->lf_delta
, 0, sizeof(s
->lf_delta
));
429 /* quantization header data */
430 s
->yac_qi
= get_bits(&s
->gb
, 8);
431 s
->ydc_qdelta
= get_bits1(&s
->gb
) ? get_bits_with_sign(&s
->gb
, 4) : 0;
432 s
->uvdc_qdelta
= get_bits1(&s
->gb
) ? get_bits_with_sign(&s
->gb
, 4) : 0;
433 s
->uvac_qdelta
= get_bits1(&s
->gb
) ? get_bits_with_sign(&s
->gb
, 4) : 0;
434 s
->lossless
= s
->yac_qi
== 0 && s
->ydc_qdelta
== 0 &&
435 s
->uvdc_qdelta
== 0 && s
->uvac_qdelta
== 0;
437 /* segmentation header info */
438 if ((s
->segmentation
.enabled
= get_bits1(&s
->gb
))) {
439 if ((s
->segmentation
.update_map
= get_bits1(&s
->gb
))) {
440 for (i
= 0; i
< 7; i
++)
441 s
->prob
.seg
[i
] = get_bits1(&s
->gb
) ?
442 get_bits(&s
->gb
, 8) : 255;
443 if ((s
->segmentation
.temporal
= get_bits1(&s
->gb
)))
444 for (i
= 0; i
< 3; i
++)
445 s
->prob
.segpred
[i
] = get_bits1(&s
->gb
) ?
446 get_bits(&s
->gb
, 8) : 255;
449 if (get_bits1(&s
->gb
)) {
450 s
->segmentation
.absolute_vals
= get_bits1(&s
->gb
);
451 for (i
= 0; i
< 8; i
++) {
452 if ((s
->segmentation
.feat
[i
].q_enabled
= get_bits1(&s
->gb
)))
453 s
->segmentation
.feat
[i
].q_val
= get_bits_with_sign(&s
->gb
, 8);
454 if ((s
->segmentation
.feat
[i
].lf_enabled
= get_bits1(&s
->gb
)))
455 s
->segmentation
.feat
[i
].lf_val
= get_bits_with_sign(&s
->gb
, 6);
456 if ((s
->segmentation
.feat
[i
].ref_enabled
= get_bits1(&s
->gb
)))
457 s
->segmentation
.feat
[i
].ref_val
= get_bits(&s
->gb
, 2);
458 s
->segmentation
.feat
[i
].skip_enabled
= get_bits1(&s
->gb
);
462 s
->segmentation
.feat
[0].q_enabled
= 0;
463 s
->segmentation
.feat
[0].lf_enabled
= 0;
464 s
->segmentation
.feat
[0].skip_enabled
= 0;
465 s
->segmentation
.feat
[0].ref_enabled
= 0;
468 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
469 for (i
= 0; i
< (s
->segmentation
.enabled
? 8 : 1); i
++) {
470 int qyac
, qydc
, quvac
, quvdc
, lflvl
, sh
;
472 if (s
->segmentation
.feat
[i
].q_enabled
) {
473 if (s
->segmentation
.absolute_vals
)
474 qyac
= s
->segmentation
.feat
[i
].q_val
;
476 qyac
= s
->yac_qi
+ s
->segmentation
.feat
[i
].q_val
;
480 qydc
= av_clip_uintp2(qyac
+ s
->ydc_qdelta
, 8);
481 quvdc
= av_clip_uintp2(qyac
+ s
->uvdc_qdelta
, 8);
482 quvac
= av_clip_uintp2(qyac
+ s
->uvac_qdelta
, 8);
483 qyac
= av_clip_uintp2(qyac
, 8);
485 s
->segmentation
.feat
[i
].qmul
[0][0] = ff_vp9_dc_qlookup
[qydc
];
486 s
->segmentation
.feat
[i
].qmul
[0][1] = ff_vp9_ac_qlookup
[qyac
];
487 s
->segmentation
.feat
[i
].qmul
[1][0] = ff_vp9_dc_qlookup
[quvdc
];
488 s
->segmentation
.feat
[i
].qmul
[1][1] = ff_vp9_ac_qlookup
[quvac
];
490 sh
= s
->filter
.level
>= 32;
491 if (s
->segmentation
.feat
[i
].lf_enabled
) {
492 if (s
->segmentation
.absolute_vals
)
493 lflvl
= s
->segmentation
.feat
[i
].lf_val
;
495 lflvl
= s
->filter
.level
+ s
->segmentation
.feat
[i
].lf_val
;
497 lflvl
= s
->filter
.level
;
499 s
->segmentation
.feat
[i
].lflvl
[0][0] =
500 s
->segmentation
.feat
[i
].lflvl
[0][1] =
501 av_clip_uintp2(lflvl
+ (s
->lf_delta
.ref
[0] << sh
), 6);
502 for (j
= 1; j
< 4; j
++) {
503 s
->segmentation
.feat
[i
].lflvl
[j
][0] =
504 av_clip_uintp2(lflvl
+ ((s
->lf_delta
.ref
[j
] +
505 s
->lf_delta
.mode
[0]) << sh
), 6);
506 s
->segmentation
.feat
[i
].lflvl
[j
][1] =
507 av_clip_uintp2(lflvl
+ ((s
->lf_delta
.ref
[j
] +
508 s
->lf_delta
.mode
[1]) << sh
), 6);
513 if ((ret
= update_size(avctx
, w
, h
)) < 0) {
514 av_log(avctx
, AV_LOG_ERROR
,
515 "Failed to initialize decoder for %dx%d\n", w
, h
);
518 for (s
->tiling
.log2_tile_cols
= 0;
519 (s
->sb_cols
>> s
->tiling
.log2_tile_cols
) > 64;
520 s
->tiling
.log2_tile_cols
++) ;
521 for (max
= 0; (s
->sb_cols
>> max
) >= 4; max
++) ;
522 max
= FFMAX(0, max
- 1);
523 while (max
> s
->tiling
.log2_tile_cols
) {
524 if (get_bits1(&s
->gb
))
525 s
->tiling
.log2_tile_cols
++;
529 s
->tiling
.log2_tile_rows
= decode012(&s
->gb
);
530 s
->tiling
.tile_rows
= 1 << s
->tiling
.log2_tile_rows
;
531 if (s
->tiling
.tile_cols
!= (1 << s
->tiling
.log2_tile_cols
)) {
532 s
->tiling
.tile_cols
= 1 << s
->tiling
.log2_tile_cols
;
533 s
->c_b
= av_fast_realloc(s
->c_b
, &s
->c_b_size
,
534 sizeof(VP56RangeCoder
) *
535 s
->tiling
.tile_cols
);
537 av_log(avctx
, AV_LOG_ERROR
,
538 "Ran out of memory during range coder init\n");
539 return AVERROR(ENOMEM
);
543 if (s
->keyframe
|| s
->errorres
|| s
->intraonly
) {
547 s
->prob_ctx
[3].p
= ff_vp9_default_probs
;
548 memcpy(s
->prob_ctx
[0].coef
, ff_vp9_default_coef_probs
,
549 sizeof(ff_vp9_default_coef_probs
));
550 memcpy(s
->prob_ctx
[1].coef
, ff_vp9_default_coef_probs
,
551 sizeof(ff_vp9_default_coef_probs
));
552 memcpy(s
->prob_ctx
[2].coef
, ff_vp9_default_coef_probs
,
553 sizeof(ff_vp9_default_coef_probs
));
554 memcpy(s
->prob_ctx
[3].coef
, ff_vp9_default_coef_probs
,
555 sizeof(ff_vp9_default_coef_probs
));
558 // next 16 bits is size of the rest of the header (arith-coded)
559 size2
= get_bits(&s
->gb
, 16);
560 data2
= align_get_bits(&s
->gb
);
561 if (size2
> size
- (data2
- data
)) {
562 av_log(avctx
, AV_LOG_ERROR
, "Invalid compressed header size\n");
563 return AVERROR_INVALIDDATA
;
565 ff_vp56_init_range_decoder(&s
->c
, data2
, size2
);
566 if (vp56_rac_get_prob_branchy(&s
->c
, 128)) { // marker bit
567 av_log(avctx
, AV_LOG_ERROR
, "Marker bit was set\n");
568 return AVERROR_INVALIDDATA
;
571 if (s
->keyframe
|| s
->intraonly
)
572 memset(s
->counts
.coef
, 0,
573 sizeof(s
->counts
.coef
) + sizeof(s
->counts
.eob
));
575 memset(&s
->counts
, 0, sizeof(s
->counts
));
577 /* FIXME is it faster to not copy here, but do it down in the fw updates
578 * as explicit copies if the fw update is missing (and skip the copy upon
580 s
->prob
.p
= s
->prob_ctx
[c
].p
;
584 s
->txfmmode
= TX_4X4
;
586 s
->txfmmode
= vp8_rac_get_uint(&s
->c
, 2);
587 if (s
->txfmmode
== 3)
588 s
->txfmmode
+= vp8_rac_get(&s
->c
);
590 if (s
->txfmmode
== TX_SWITCHABLE
) {
591 for (i
= 0; i
< 2; i
++)
592 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
593 s
->prob
.p
.tx8p
[i
] = update_prob(&s
->c
, s
->prob
.p
.tx8p
[i
]);
594 for (i
= 0; i
< 2; i
++)
595 for (j
= 0; j
< 2; j
++)
596 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
597 s
->prob
.p
.tx16p
[i
][j
] =
598 update_prob(&s
->c
, s
->prob
.p
.tx16p
[i
][j
]);
599 for (i
= 0; i
< 2; i
++)
600 for (j
= 0; j
< 3; j
++)
601 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
602 s
->prob
.p
.tx32p
[i
][j
] =
603 update_prob(&s
->c
, s
->prob
.p
.tx32p
[i
][j
]);
608 for (i
= 0; i
< 4; i
++) {
609 uint8_t (*ref
)[2][6][6][3] = s
->prob_ctx
[c
].coef
[i
];
610 if (vp8_rac_get(&s
->c
)) {
611 for (j
= 0; j
< 2; j
++)
612 for (k
= 0; k
< 2; k
++)
613 for (l
= 0; l
< 6; l
++)
614 for (m
= 0; m
< 6; m
++) {
615 uint8_t *p
= s
->prob
.coef
[i
][j
][k
][l
][m
];
616 uint8_t *r
= ref
[j
][k
][l
][m
];
617 if (m
>= 3 && l
== 0) // dc only has 3 pt
619 for (n
= 0; n
< 3; n
++) {
620 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
621 p
[n
] = update_prob(&s
->c
, r
[n
]);
628 for (j
= 0; j
< 2; j
++)
629 for (k
= 0; k
< 2; k
++)
630 for (l
= 0; l
< 6; l
++)
631 for (m
= 0; m
< 6; m
++) {
632 uint8_t *p
= s
->prob
.coef
[i
][j
][k
][l
][m
];
633 uint8_t *r
= ref
[j
][k
][l
][m
];
634 if (m
> 3 && l
== 0) // dc only has 3 pt
640 if (s
->txfmmode
== i
)
645 for (i
= 0; i
< 3; i
++)
646 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
647 s
->prob
.p
.skip
[i
] = update_prob(&s
->c
, s
->prob
.p
.skip
[i
]);
648 if (!s
->keyframe
&& !s
->intraonly
) {
649 for (i
= 0; i
< 7; i
++)
650 for (j
= 0; j
< 3; j
++)
651 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
652 s
->prob
.p
.mv_mode
[i
][j
] =
653 update_prob(&s
->c
, s
->prob
.p
.mv_mode
[i
][j
]);
655 if (s
->filtermode
== FILTER_SWITCHABLE
)
656 for (i
= 0; i
< 4; i
++)
657 for (j
= 0; j
< 2; j
++)
658 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
659 s
->prob
.p
.filter
[i
][j
] =
660 update_prob(&s
->c
, s
->prob
.p
.filter
[i
][j
]);
662 for (i
= 0; i
< 4; i
++)
663 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
664 s
->prob
.p
.intra
[i
] = update_prob(&s
->c
, s
->prob
.p
.intra
[i
]);
666 if (s
->allowcompinter
) {
667 s
->comppredmode
= vp8_rac_get(&s
->c
);
669 s
->comppredmode
+= vp8_rac_get(&s
->c
);
670 if (s
->comppredmode
== PRED_SWITCHABLE
)
671 for (i
= 0; i
< 5; i
++)
672 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
674 update_prob(&s
->c
, s
->prob
.p
.comp
[i
]);
676 s
->comppredmode
= PRED_SINGLEREF
;
679 if (s
->comppredmode
!= PRED_COMPREF
) {
680 for (i
= 0; i
< 5; i
++) {
681 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
682 s
->prob
.p
.single_ref
[i
][0] =
683 update_prob(&s
->c
, s
->prob
.p
.single_ref
[i
][0]);
684 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
685 s
->prob
.p
.single_ref
[i
][1] =
686 update_prob(&s
->c
, s
->prob
.p
.single_ref
[i
][1]);
690 if (s
->comppredmode
!= PRED_SINGLEREF
) {
691 for (i
= 0; i
< 5; i
++)
692 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
693 s
->prob
.p
.comp_ref
[i
] =
694 update_prob(&s
->c
, s
->prob
.p
.comp_ref
[i
]);
697 for (i
= 0; i
< 4; i
++)
698 for (j
= 0; j
< 9; j
++)
699 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
700 s
->prob
.p
.y_mode
[i
][j
] =
701 update_prob(&s
->c
, s
->prob
.p
.y_mode
[i
][j
]);
703 for (i
= 0; i
< 4; i
++)
704 for (j
= 0; j
< 4; j
++)
705 for (k
= 0; k
< 3; k
++)
706 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
707 s
->prob
.p
.partition
[3 - i
][j
][k
] =
709 s
->prob
.p
.partition
[3 - i
][j
][k
]);
711 // mv fields don't use the update_prob subexp model for some reason
712 for (i
= 0; i
< 3; i
++)
713 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
714 s
->prob
.p
.mv_joint
[i
] = (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
716 for (i
= 0; i
< 2; i
++) {
717 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
718 s
->prob
.p
.mv_comp
[i
].sign
=
719 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
721 for (j
= 0; j
< 10; j
++)
722 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
723 s
->prob
.p
.mv_comp
[i
].classes
[j
] =
724 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
726 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
727 s
->prob
.p
.mv_comp
[i
].class0
=
728 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
730 for (j
= 0; j
< 10; j
++)
731 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
732 s
->prob
.p
.mv_comp
[i
].bits
[j
] =
733 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
736 for (i
= 0; i
< 2; i
++) {
737 for (j
= 0; j
< 2; j
++)
738 for (k
= 0; k
< 3; k
++)
739 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
740 s
->prob
.p
.mv_comp
[i
].class0_fp
[j
][k
] =
741 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
743 for (j
= 0; j
< 3; j
++)
744 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
745 s
->prob
.p
.mv_comp
[i
].fp
[j
] =
746 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
749 if (s
->highprecisionmvs
) {
750 for (i
= 0; i
< 2; i
++) {
751 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
752 s
->prob
.p
.mv_comp
[i
].class0_hp
=
753 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
755 if (vp56_rac_get_prob_branchy(&s
->c
, 252))
756 s
->prob
.p
.mv_comp
[i
].hp
=
757 (vp8_rac_get_uint(&s
->c
, 7) << 1) | 1;
762 return (data2
- data
) + size2
;
765 static int decode_subblock(AVCodecContext
*avctx
, int row
, int col
,
767 ptrdiff_t yoff
, ptrdiff_t uvoff
, enum BlockLevel bl
)
769 VP9Context
*s
= avctx
->priv_data
;
770 AVFrame
*f
= s
->frames
[CUR_FRAME
].tf
.f
;
771 int c
= ((s
->above_partition_ctx
[col
] >> (3 - bl
)) & 1) |
772 (((s
->left_partition_ctx
[row
& 0x7] >> (3 - bl
)) & 1) << 1);
774 const uint8_t *p
= s
->keyframe
? ff_vp9_default_kf_partition_probs
[bl
][c
]
775 : s
->prob
.p
.partition
[bl
][c
];
776 enum BlockPartition bp
;
777 ptrdiff_t hbs
= 4 >> bl
;
780 bp
= vp8_rac_get_tree(&s
->c
, ff_vp9_partition_tree
, p
);
781 ret
= ff_vp9_decode_block(avctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
, bp
);
782 } else if (col
+ hbs
< s
->cols
) {
783 if (row
+ hbs
< s
->rows
) {
784 bp
= vp8_rac_get_tree(&s
->c
, ff_vp9_partition_tree
, p
);
787 ret
= ff_vp9_decode_block(avctx
, row
, col
, lflvl
, yoff
, uvoff
,
791 ret
= ff_vp9_decode_block(avctx
, row
, col
, lflvl
, yoff
, uvoff
,
794 yoff
+= hbs
* 8 * f
->linesize
[0];
795 uvoff
+= hbs
* 4 * f
->linesize
[1];
796 ret
= ff_vp9_decode_block(avctx
, row
+ hbs
, col
, lflvl
,
797 yoff
, uvoff
, bl
, bp
);
801 ret
= ff_vp9_decode_block(avctx
, row
, col
, lflvl
, yoff
, uvoff
,
806 ret
= ff_vp9_decode_block(avctx
, row
, col
+ hbs
, lflvl
,
807 yoff
, uvoff
, bl
, bp
);
810 case PARTITION_SPLIT
:
811 ret
= decode_subblock(avctx
, row
, col
, lflvl
,
812 yoff
, uvoff
, bl
+ 1);
814 ret
= decode_subblock(avctx
, row
, col
+ hbs
, lflvl
,
815 yoff
+ 8 * hbs
, uvoff
+ 4 * hbs
,
818 yoff
+= hbs
* 8 * f
->linesize
[0];
819 uvoff
+= hbs
* 4 * f
->linesize
[1];
820 ret
= decode_subblock(avctx
, row
+ hbs
, col
, lflvl
,
821 yoff
, uvoff
, bl
+ 1);
823 ret
= decode_subblock(avctx
, row
+ hbs
, col
+ hbs
,
824 lflvl
, yoff
+ 8 * hbs
,
825 uvoff
+ 4 * hbs
, bl
+ 1);
831 av_log(avctx
, AV_LOG_ERROR
, "Unexpected partition %d.", bp
);
832 return AVERROR_INVALIDDATA
;
834 } else if (vp56_rac_get_prob_branchy(&s
->c
, p
[1])) {
835 bp
= PARTITION_SPLIT
;
836 ret
= decode_subblock(avctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
838 ret
= decode_subblock(avctx
, row
, col
+ hbs
, lflvl
,
839 yoff
+ 8 * hbs
, uvoff
+ 4 * hbs
, bl
+ 1);
842 ret
= ff_vp9_decode_block(avctx
, row
, col
, lflvl
, yoff
, uvoff
,
845 } else if (row
+ hbs
< s
->rows
) {
846 if (vp56_rac_get_prob_branchy(&s
->c
, p
[2])) {
847 bp
= PARTITION_SPLIT
;
848 ret
= decode_subblock(avctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
850 yoff
+= hbs
* 8 * f
->linesize
[0];
851 uvoff
+= hbs
* 4 * f
->linesize
[1];
852 ret
= decode_subblock(avctx
, row
+ hbs
, col
, lflvl
,
853 yoff
, uvoff
, bl
+ 1);
857 ret
= ff_vp9_decode_block(avctx
, row
, col
, lflvl
, yoff
, uvoff
,
861 bp
= PARTITION_SPLIT
;
862 ret
= decode_subblock(avctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
864 s
->counts
.partition
[bl
][c
][bp
]++;
869 static int decode_superblock_mem(AVCodecContext
*avctx
, int row
, int col
, struct VP9Filter
*lflvl
,
870 ptrdiff_t yoff
, ptrdiff_t uvoff
, enum BlockLevel bl
)
872 VP9Context
*s
= avctx
->priv_data
;
874 ptrdiff_t hbs
= 4 >> bl
;
875 AVFrame
*f
= s
->frames
[CUR_FRAME
].tf
.f
;
876 ptrdiff_t y_stride
= f
->linesize
[0], uv_stride
= f
->linesize
[1];
880 av_assert2(b
->bl
== BL_8X8
);
881 res
= ff_vp9_decode_block(avctx
, row
, col
, lflvl
, yoff
, uvoff
, b
->bl
, b
->bp
);
882 } else if (s
->b
->bl
== bl
) {
883 if ((res
= ff_vp9_decode_block(avctx
, row
, col
, lflvl
, yoff
, uvoff
, b
->bl
, b
->bp
)) < 0)
885 if (b
->bp
== PARTITION_H
&& row
+ hbs
< s
->rows
) {
886 yoff
+= hbs
* 8 * y_stride
;
887 uvoff
+= hbs
* 4 * uv_stride
;
888 res
= ff_vp9_decode_block(avctx
, row
+ hbs
, col
, lflvl
, yoff
, uvoff
, b
->bl
, b
->bp
);
889 } else if (b
->bp
== PARTITION_V
&& col
+ hbs
< s
->cols
) {
892 res
= ff_vp9_decode_block(avctx
, row
, col
+ hbs
, lflvl
, yoff
, uvoff
, b
->bl
, b
->bp
);
895 if ((res
= decode_superblock_mem(avctx
, row
, col
, lflvl
, yoff
, uvoff
, bl
+ 1)) < 0)
897 if (col
+ hbs
< s
->cols
) { // FIXME why not <=?
898 if (row
+ hbs
< s
->rows
) {
899 if ((res
= decode_superblock_mem(avctx
, row
, col
+ hbs
, lflvl
, yoff
+ 8 * hbs
,
900 uvoff
+ 4 * hbs
, bl
+ 1)) < 0)
902 yoff
+= hbs
* 8 * y_stride
;
903 uvoff
+= hbs
* 4 * uv_stride
;
904 if ((res
= decode_superblock_mem(avctx
, row
+ hbs
, col
, lflvl
, yoff
,
907 res
= decode_superblock_mem(avctx
, row
+ hbs
, col
+ hbs
, lflvl
,
908 yoff
+ 8 * hbs
, uvoff
+ 4 * hbs
, bl
+ 1);
912 res
= decode_superblock_mem(avctx
, row
, col
+ hbs
, lflvl
, yoff
, uvoff
, bl
+ 1);
914 } else if (row
+ hbs
< s
->rows
) {
915 yoff
+= hbs
* 8 * y_stride
;
916 uvoff
+= hbs
* 4 * uv_stride
;
917 res
= decode_superblock_mem(avctx
, row
+ hbs
, col
, lflvl
, yoff
, uvoff
, bl
+ 1);
924 static void loopfilter_subblock(AVCodecContext
*avctx
, VP9Filter
*lflvl
,
926 ptrdiff_t yoff
, ptrdiff_t uvoff
)
928 VP9Context
*s
= avctx
->priv_data
;
929 AVFrame
*f
= s
->frames
[CUR_FRAME
].tf
.f
;
930 uint8_t *dst
= f
->data
[0] + yoff
;
931 ptrdiff_t ls_y
= f
->linesize
[0], ls_uv
= f
->linesize
[1];
932 uint8_t *lvl
= lflvl
->level
;
935 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
936 * if you think of them as acting on a 8x8 block max, we can interleave
937 * each v/h within the single x loop, but that only works if we work on
938 * 8 pixel blocks, and we won't always do that (we want at least 16px
939 * to use SSE2 optimizations, perhaps 32 for AVX2). */
941 // filter edges between columns, Y plane (e.g. block1 | block2)
942 for (y
= 0; y
< 8; y
+= 2, dst
+= 16 * ls_y
, lvl
+= 16) {
943 uint8_t *ptr
= dst
, *l
= lvl
, *hmask1
= lflvl
->mask
[0][0][y
];
944 uint8_t *hmask2
= lflvl
->mask
[0][0][y
+ 1];
945 unsigned hm1
= hmask1
[0] | hmask1
[1] | hmask1
[2], hm13
= hmask1
[3];
946 unsigned hm2
= hmask2
[1] | hmask2
[2], hm23
= hmask2
[3];
947 unsigned hm
= hm1
| hm2
| hm13
| hm23
;
949 for (x
= 1; hm
& ~(x
- 1); x
<<= 1, ptr
+= 8, l
++) {
951 int L
= *l
, H
= L
>> 4;
952 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
957 av_assert2(l
[8] == L
);
958 s
->dsp
.loop_filter_16
[0](ptr
, ls_y
, E
, I
, H
);
960 s
->dsp
.loop_filter_8
[2][0](ptr
, ls_y
, E
, I
, H
);
962 } else if (hm2
& x
) {
965 E
|= s
->filter
.mblim_lut
[L
] << 8;
966 I
|= s
->filter
.lim_lut
[L
] << 8;
967 s
->dsp
.loop_filter_mix2
[!!(hmask1
[1] & x
)]
969 [0](ptr
, ls_y
, E
, I
, H
);
971 s
->dsp
.loop_filter_8
[!!(hmask1
[1] & x
)]
972 [0](ptr
, ls_y
, E
, I
, H
);
975 } else if (hm2
& x
) {
976 int L
= l
[8], H
= L
>> 4;
977 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
980 s
->dsp
.loop_filter_8
[!!(hmask2
[1] & x
)]
981 [0](ptr
+ 8 * ls_y
, ls_y
, E
, I
, H
);
985 int L
= *l
, H
= L
>> 4;
986 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
991 E
|= s
->filter
.mblim_lut
[L
] << 8;
992 I
|= s
->filter
.lim_lut
[L
] << 8;
993 s
->dsp
.loop_filter_mix2
[0][0][0](ptr
+ 4, ls_y
, E
, I
, H
);
995 s
->dsp
.loop_filter_8
[0][0](ptr
+ 4, ls_y
, E
, I
, H
);
997 } else if (hm23
& x
) {
998 int L
= l
[8], H
= L
>> 4;
999 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
1001 s
->dsp
.loop_filter_8
[0][0](ptr
+ 8 * ls_y
+ 4, ls_y
, E
, I
, H
);
1007 // filter edges between rows, Y plane (e.g. ------)
1009 dst
= f
->data
[0] + yoff
;
1011 for (y
= 0; y
< 8; y
++, dst
+= 8 * ls_y
, lvl
+= 8) {
1012 uint8_t *ptr
= dst
, *l
= lvl
, *vmask
= lflvl
->mask
[0][1][y
];
1013 unsigned vm
= vmask
[0] | vmask
[1] | vmask
[2], vm3
= vmask
[3];
1015 for (x
= 1; vm
& ~(x
- 1); x
<<= 2, ptr
+= 16, l
+= 2) {
1018 int L
= *l
, H
= L
>> 4;
1019 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
1022 if (vmask
[0] & (x
<< 1)) {
1023 av_assert2(l
[1] == L
);
1024 s
->dsp
.loop_filter_16
[1](ptr
, ls_y
, E
, I
, H
);
1026 s
->dsp
.loop_filter_8
[2][1](ptr
, ls_y
, E
, I
, H
);
1028 } else if (vm
& (x
<< 1)) {
1031 E
|= s
->filter
.mblim_lut
[L
] << 8;
1032 I
|= s
->filter
.lim_lut
[L
] << 8;
1033 s
->dsp
.loop_filter_mix2
[!!(vmask
[1] & x
)]
1034 [!!(vmask
[1] & (x
<< 1))]
1035 [1](ptr
, ls_y
, E
, I
, H
);
1037 s
->dsp
.loop_filter_8
[!!(vmask
[1] & x
)]
1038 [1](ptr
, ls_y
, E
, I
, H
);
1040 } else if (vm
& (x
<< 1)) {
1041 int L
= l
[1], H
= L
>> 4;
1042 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
1044 s
->dsp
.loop_filter_8
[!!(vmask
[1] & (x
<< 1))]
1045 [1](ptr
+ 8, ls_y
, E
, I
, H
);
1049 int L
= *l
, H
= L
>> 4;
1050 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
1052 if (vm3
& (x
<< 1)) {
1055 E
|= s
->filter
.mblim_lut
[L
] << 8;
1056 I
|= s
->filter
.lim_lut
[L
] << 8;
1057 s
->dsp
.loop_filter_mix2
[0][0][1](ptr
+ ls_y
* 4, ls_y
, E
, I
, H
);
1059 s
->dsp
.loop_filter_8
[0][1](ptr
+ ls_y
* 4, ls_y
, E
, I
, H
);
1061 } else if (vm3
& (x
<< 1)) {
1062 int L
= l
[1], H
= L
>> 4;
1063 int E
= s
->filter
.mblim_lut
[L
], I
= s
->filter
.lim_lut
[L
];
1065 s
->dsp
.loop_filter_8
[0][1](ptr
+ ls_y
* 4 + 8, ls_y
, E
, I
, H
);
1070 // same principle but for U/V planes
1071 for (p
= 0; p
< 2; p
++) {
1073 dst
= f
->data
[1 + p
] + uvoff
;
1074 for (y
= 0; y
< 8; y
+= 4, dst
+= 16 * ls_uv
, lvl
+= 32) {
1075 uint8_t *ptr
= dst
, *l
= lvl
, *hmask1
= lflvl
->mask
[1][0][y
];
1076 uint8_t *hmask2
= lflvl
->mask
[1][0][y
+ 2];
1077 unsigned hm1
= hmask1
[0] | hmask1
[1] | hmask1
[2];
1078 unsigned hm2
= hmask2
[1] | hmask2
[2], hm
= hm1
| hm2
;
1080 for (x
= 1; hm
& ~(x
- 1); x
<<= 1, ptr
+= 4) {
1083 int L
= *l
, H
= L
>> 4;
1084 int E
= s
->filter
.mblim_lut
[L
];
1085 int I
= s
->filter
.lim_lut
[L
];
1087 if (hmask1
[0] & x
) {
1088 if (hmask2
[0] & x
) {
1089 av_assert2(l
[16] == L
);
1090 s
->dsp
.loop_filter_16
[0](ptr
, ls_uv
, E
, I
, H
);
1092 s
->dsp
.loop_filter_8
[2][0](ptr
, ls_uv
, E
, I
, H
);
1094 } else if (hm2
& x
) {
1097 E
|= s
->filter
.mblim_lut
[L
] << 8;
1098 I
|= s
->filter
.lim_lut
[L
] << 8;
1099 s
->dsp
.loop_filter_mix2
[!!(hmask1
[1] & x
)]
1101 [0](ptr
, ls_uv
, E
, I
, H
);
1103 s
->dsp
.loop_filter_8
[!!(hmask1
[1] & x
)]
1104 [0](ptr
, ls_uv
, E
, I
, H
);
1106 } else if (hm2
& x
) {
1107 int L
= l
[16], H
= L
>> 4;
1108 int E
= s
->filter
.mblim_lut
[L
];
1109 int I
= s
->filter
.lim_lut
[L
];
1111 s
->dsp
.loop_filter_8
[!!(hmask2
[1] & x
)]
1112 [0](ptr
+ 8 * ls_uv
, ls_uv
, E
, I
, H
);
1120 dst
= f
->data
[1 + p
] + uvoff
;
1121 for (y
= 0; y
< 8; y
++, dst
+= 4 * ls_uv
) {
1122 uint8_t *ptr
= dst
, *l
= lvl
, *vmask
= lflvl
->mask
[1][1][y
];
1123 unsigned vm
= vmask
[0] | vmask
[1] | vmask
[2];
1125 for (x
= 1; vm
& ~(x
- 1); x
<<= 4, ptr
+= 16, l
+= 4) {
1128 int L
= *l
, H
= L
>> 4;
1129 int E
= s
->filter
.mblim_lut
[L
];
1130 int I
= s
->filter
.lim_lut
[L
];
1133 if (vmask
[0] & (x
<< 2)) {
1134 av_assert2(l
[2] == L
);
1135 s
->dsp
.loop_filter_16
[1](ptr
, ls_uv
, E
, I
, H
);
1137 s
->dsp
.loop_filter_8
[2][1](ptr
, ls_uv
, E
, I
, H
);
1139 } else if (vm
& (x
<< 2)) {
1142 E
|= s
->filter
.mblim_lut
[L
] << 8;
1143 I
|= s
->filter
.lim_lut
[L
] << 8;
1144 s
->dsp
.loop_filter_mix2
[!!(vmask
[1] & x
)]
1145 [!!(vmask
[1] & (x
<< 2))]
1146 [1](ptr
, ls_uv
, E
, I
, H
);
1148 s
->dsp
.loop_filter_8
[!!(vmask
[1] & x
)]
1149 [1](ptr
, ls_uv
, E
, I
, H
);
1151 } else if (vm
& (x
<< 2)) {
1152 int L
= l
[2], H
= L
>> 4;
1153 int E
= s
->filter
.mblim_lut
[L
];
1154 int I
= s
->filter
.lim_lut
[L
];
1156 s
->dsp
.loop_filter_8
[!!(vmask
[1] & (x
<< 2))]
1157 [1](ptr
+ 8, ls_uv
, E
, I
, H
);
1167 static void set_tile_offset(int *start
, int *end
, int idx
, int log2_n
, int n
)
1169 int sb_start
= (idx
* n
) >> log2_n
;
1170 int sb_end
= ((idx
+ 1) * n
) >> log2_n
;
1171 *start
= FFMIN(sb_start
, n
) << 3;
1172 *end
= FFMIN(sb_end
, n
) << 3;
1175 static int update_refs(AVCodecContext
*avctx
)
1177 VP9Context
*s
= avctx
->priv_data
;
1180 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->refs
); i
++)
1181 if (s
->refreshrefmask
& (1 << i
)) {
1182 ff_thread_release_buffer(avctx
, &s
->refs
[i
]);
1183 ret
= ff_thread_ref_frame(&s
->refs
[i
], &s
->frames
[CUR_FRAME
].tf
);
1191 static int vp9_decode_frame(AVCodecContext
*avctx
, void *output
,
1192 int *got_frame
, AVPacket
*pkt
)
1194 VP9Context
*s
= avctx
->priv_data
;
1195 AVFrame
*frame
= output
;
1196 const uint8_t *data
= pkt
->data
;
1197 int size
= pkt
->size
;
1199 int ret
, tile_row
, tile_col
, i
, ref
= -1, row
, col
;
1201 s
->setup_finished
= 0;
1203 ret
= decode_frame_header(avctx
, data
, size
, &ref
);
1207 if (!s
->refs
[ref
].f
->buf
[0]) {
1208 av_log(avctx
, AV_LOG_ERROR
,
1209 "Requested reference %d not available\n", ref
);
1210 return AVERROR_INVALIDDATA
;
1213 ret
= av_frame_ref(frame
, s
->refs
[ref
].f
);
1222 vp9_frame_unref(avctx
, &s
->frames
[LAST_FRAME
]);
1223 if (!s
->keyframe
&& s
->frames
[CUR_FRAME
].tf
.f
->buf
[0]) {
1224 ret
= vp9_frame_ref(&s
->frames
[LAST_FRAME
], &s
->frames
[CUR_FRAME
]);
1229 vp9_frame_unref(avctx
, &s
->frames
[CUR_FRAME
]);
1230 ret
= vp9_frame_alloc(avctx
, &s
->frames
[CUR_FRAME
]);
1234 f
= s
->frames
[CUR_FRAME
].tf
.f
;
1235 f
->key_frame
= s
->keyframe
;
1236 f
->pict_type
= s
->keyframe
? AV_PICTURE_TYPE_I
: AV_PICTURE_TYPE_P
;
1239 avctx
->color_range
= AVCOL_RANGE_JPEG
;
1241 avctx
->color_range
= AVCOL_RANGE_MPEG
;
1243 switch (s
->colorspace
) {
1244 case 1: avctx
->colorspace
= AVCOL_SPC_BT470BG
; break;
1245 case 2: avctx
->colorspace
= AVCOL_SPC_BT709
; break;
1246 case 3: avctx
->colorspace
= AVCOL_SPC_SMPTE170M
; break;
1247 case 4: avctx
->colorspace
= AVCOL_SPC_SMPTE240M
; break;
1250 s
->pass
= s
->uses_2pass
=
1251 avctx
->active_thread_type
& FF_THREAD_FRAME
&& s
->refreshctx
&& !s
->parallelmode
;
1253 if (s
->refreshctx
&& s
->parallelmode
) {
1255 for (i
= 0; i
< 4; i
++) {
1256 for (j
= 0; j
< 2; j
++)
1257 for (k
= 0; k
< 2; k
++)
1258 for (l
= 0; l
< 6; l
++)
1259 for (m
= 0; m
< 6; m
++)
1260 memcpy(s
->prob_ctx
[s
->framectxid
].coef
[i
][j
][k
][l
][m
],
1261 s
->prob
.coef
[i
][j
][k
][l
][m
], 3);
1262 if (s
->txfmmode
== i
)
1265 s
->prob_ctx
[s
->framectxid
].p
= s
->prob
.p
;
1267 if ((s
->parallelmode
|| !s
->refreshctx
) &&
1268 avctx
->active_thread_type
& FF_THREAD_FRAME
) {
1269 ff_thread_finish_setup(avctx
);
1270 s
->setup_finished
= 1;
1273 // main tile decode loop
1274 memset(s
->above_partition_ctx
, 0, s
->cols
);
1275 memset(s
->above_skip_ctx
, 0, s
->cols
);
1276 if (s
->keyframe
|| s
->intraonly
)
1277 memset(s
->above_mode_ctx
, DC_PRED
, s
->cols
* 2);
1279 memset(s
->above_mode_ctx
, NEARESTMV
, s
->cols
);
1280 memset(s
->above_y_nnz_ctx
, 0, s
->sb_cols
* 16);
1281 memset(s
->above_uv_nnz_ctx
[0], 0, s
->sb_cols
* 8);
1282 memset(s
->above_uv_nnz_ctx
[1], 0, s
->sb_cols
* 8);
1283 memset(s
->above_segpred_ctx
, 0, s
->cols
);
1286 ptrdiff_t yoff
= 0, uvoff
= 0;
1288 s
->block
= s
->block_base
;
1289 s
->uvblock
[0] = s
->uvblock_base
[0];
1290 s
->uvblock
[1] = s
->uvblock_base
[1];
1291 s
->eob
= s
->eob_base
;
1292 s
->uveob
[0] = s
->uveob_base
[0];
1293 s
->uveob
[1] = s
->uveob_base
[1];
1295 for (tile_row
= 0; tile_row
< s
->tiling
.tile_rows
; tile_row
++) {
1296 set_tile_offset(&s
->tiling
.tile_row_start
, &s
->tiling
.tile_row_end
,
1297 tile_row
, s
->tiling
.log2_tile_rows
, s
->sb_rows
);
1300 for (tile_col
= 0; tile_col
< s
->tiling
.tile_cols
; tile_col
++) {
1303 if (tile_col
== s
->tiling
.tile_cols
- 1 &&
1304 tile_row
== s
->tiling
.tile_rows
- 1) {
1307 tile_size
= AV_RB32(data
);
1311 if (tile_size
> size
) {
1312 ret
= AVERROR_INVALIDDATA
;
1315 ff_vp56_init_range_decoder(&s
->c_b
[tile_col
], data
, tile_size
);
1316 if (vp56_rac_get_prob_branchy(&s
->c_b
[tile_col
], 128)) { // marker bit
1317 ret
= AVERROR_INVALIDDATA
;
1325 for (row
= s
->tiling
.tile_row_start
;
1326 row
< s
->tiling
.tile_row_end
;
1327 row
+= 8, yoff
+= f
->linesize
[0] * 64,
1328 uvoff
+= f
->linesize
[1] * 32) {
1329 VP9Filter
*lflvl
= s
->lflvl
;
1330 ptrdiff_t yoff2
= yoff
, uvoff2
= uvoff
;
1332 for (tile_col
= 0; tile_col
< s
->tiling
.tile_cols
; tile_col
++) {
1333 set_tile_offset(&s
->tiling
.tile_col_start
,
1334 &s
->tiling
.tile_col_end
,
1335 tile_col
, s
->tiling
.log2_tile_cols
, s
->sb_cols
);
1337 memset(s
->left_partition_ctx
, 0, 8);
1338 memset(s
->left_skip_ctx
, 0, 8);
1339 if (s
->keyframe
|| s
->intraonly
)
1340 memset(s
->left_mode_ctx
, DC_PRED
, 16);
1342 memset(s
->left_mode_ctx
, NEARESTMV
, 8);
1343 memset(s
->left_y_nnz_ctx
, 0, 16);
1344 memset(s
->left_uv_nnz_ctx
, 0, 16);
1345 memset(s
->left_segpred_ctx
, 0, 8);
1347 memcpy(&s
->c
, &s
->c_b
[tile_col
], sizeof(s
->c
));
1348 for (col
= s
->tiling
.tile_col_start
;
1349 col
< s
->tiling
.tile_col_end
;
1350 col
+= 8, yoff2
+= 64, uvoff2
+= 32, lflvl
++) {
1351 // FIXME integrate with lf code (i.e. zero after each
1352 // use, similar to invtxfm coefficients, or similar)
1354 memset(lflvl
->mask
, 0, sizeof(lflvl
->mask
));
1357 ret
= decode_superblock_mem(avctx
, row
, col
, lflvl
,
1358 yoff2
, uvoff2
, BL_64X64
);
1360 ret
= decode_subblock(avctx
, row
, col
, lflvl
,
1361 yoff2
, uvoff2
, BL_64X64
);
1367 memcpy(&s
->c_b
[tile_col
], &s
->c
, sizeof(s
->c
));
1373 // backup pre-loopfilter reconstruction data for intra
1374 // prediction of next row of sb64s
1375 if (row
+ 8 < s
->rows
) {
1376 memcpy(s
->intra_pred_data
[0],
1378 63 * f
->linesize
[0],
1380 memcpy(s
->intra_pred_data
[1],
1381 f
->data
[1] + uvoff
+
1382 31 * f
->linesize
[1],
1384 memcpy(s
->intra_pred_data
[2],
1385 f
->data
[2] + uvoff
+
1386 31 * f
->linesize
[2],
1390 // loopfilter one row
1391 if (s
->filter
.level
) {
1395 for (col
= 0; col
< s
->cols
;
1396 col
+= 8, yoff2
+= 64, uvoff2
+= 32, lflvl
++)
1397 loopfilter_subblock(avctx
, lflvl
, row
, col
, yoff2
, uvoff2
);
1400 // FIXME maybe we can make this more finegrained by running the
1401 // loopfilter per-block instead of after each sbrow
1402 // In fact that would also make intra pred left preparation easier?
1403 ff_thread_report_progress(&s
->frames
[CUR_FRAME
].tf
, row
>> 3, 0);
1407 if (s
->pass
< 2 && s
->refreshctx
&& !s
->parallelmode
) {
1408 ff_vp9_adapt_probs(s
);
1409 if (avctx
->active_thread_type
& FF_THREAD_FRAME
) {
1410 ff_thread_finish_setup(avctx
);
1411 s
->setup_finished
= 1;
1414 } while (s
->pass
++ == 1);
1416 ff_thread_report_progress(&s
->frames
[CUR_FRAME
].tf
, INT_MAX
, 0);
1421 if (!s
->setup_finished
) {
1422 ret
= update_refs(avctx
);
1427 if (!s
->invisible
) {
1428 av_frame_unref(frame
);
1429 ret
= av_frame_ref(frame
, s
->frames
[CUR_FRAME
].tf
.f
);
1438 static av_cold
int vp9_decode_free(AVCodecContext
*avctx
)
1440 VP9Context
*s
= avctx
->priv_data
;
1443 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->frames
); i
++) {
1444 vp9_frame_unref(avctx
, &s
->frames
[i
]);
1445 av_frame_free(&s
->frames
[i
].tf
.f
);
1448 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->refs
); i
++) {
1449 ff_thread_release_buffer(avctx
, &s
->refs
[i
]);
1450 av_frame_free(&s
->refs
[i
].f
);
1454 av_freep(&s
->above_partition_ctx
);
1455 av_freep(&s
->b_base
);
1456 av_freep(&s
->block_base
);
1461 static av_cold
int vp9_decode_init(AVCodecContext
*avctx
)
1463 VP9Context
*s
= avctx
->priv_data
;
1466 memset(s
, 0, sizeof(*s
));
1468 avctx
->internal
->allocate_progress
= 1;
1470 avctx
->pix_fmt
= AV_PIX_FMT_YUV420P
;
1472 ff_vp9dsp_init(&s
->dsp
);
1473 ff_videodsp_init(&s
->vdsp
, 8);
1475 s
->frames
[0].tf
.f
= av_frame_alloc();
1476 s
->frames
[1].tf
.f
= av_frame_alloc();
1477 if (!s
->frames
[0].tf
.f
|| !s
->frames
[1].tf
.f
)
1480 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->refs
); i
++) {
1481 s
->refs
[i
].f
= av_frame_alloc();
1486 s
->filter
.sharpness
= -1;
1490 vp9_decode_free(avctx
);
1491 return AVERROR(ENOMEM
);
1494 static int vp9_decode_update_thread_context(AVCodecContext
*dst
, const AVCodecContext
*src
)
1496 VP9Context
*s
= dst
->priv_data
, *ssrc
= src
->priv_data
;
1499 ret
= update_size(dst
, ssrc
->alloc_width
, ssrc
->alloc_height
);
1503 for (i
= 0; i
< 2; i
++) {
1504 if (s
->frames
[i
].tf
.f
->data
[0])
1505 vp9_frame_unref(dst
, &s
->frames
[i
]);
1506 if (ssrc
->frames
[i
].tf
.f
->data
[0]) {
1507 if ((ret
= vp9_frame_ref(&s
->frames
[i
], &ssrc
->frames
[i
])) < 0)
1511 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->refs
); i
++) {
1512 ff_thread_release_buffer(dst
, &s
->refs
[i
]);
1513 if (ssrc
->refs
[i
].f
->buf
[0]) {
1514 ret
= ff_thread_ref_frame(&s
->refs
[i
], &ssrc
->refs
[i
]);
1520 s
->refreshrefmask
= ssrc
->refreshrefmask
;
1521 ret
= update_refs(dst
);
1525 s
->invisible
= ssrc
->invisible
;
1526 s
->keyframe
= ssrc
->keyframe
;
1527 s
->last_uses_2pass
= ssrc
->uses_2pass
;
1529 memcpy(&s
->prob_ctx
, &ssrc
->prob_ctx
, sizeof(s
->prob_ctx
));
1530 memcpy(&s
->lf_delta
, &ssrc
->lf_delta
, sizeof(s
->lf_delta
));
1531 memcpy(&s
->segmentation
.feat
, &ssrc
->segmentation
.feat
,
1532 sizeof(s
->segmentation
.feat
));
1537 AVCodec ff_vp9_decoder
= {
1539 .long_name
= NULL_IF_CONFIG_SMALL("Google VP9"),
1540 .type
= AVMEDIA_TYPE_VIDEO
,
1541 .id
= AV_CODEC_ID_VP9
,
1542 .priv_data_size
= sizeof(VP9Context
),
1543 .init
= vp9_decode_init
,
1544 .decode
= vp9_decode_frame
,
1545 .flush
= vp9_decode_flush
,
1546 .close
= vp9_decode_free
,
1547 .capabilities
= AV_CODEC_CAP_DR1
| AV_CODEC_CAP_FRAME_THREADS
,
1548 .init_thread_copy
= vp9_decode_init
,
1549 .update_thread_context
= vp9_decode_update_thread_context
,
1550 .bsfs
= "vp9_superframe_split",