aarch64: Add assembly support for -fsanitize=hwaddress tagged globals.
[libav.git] / libavcodec / vp9.c
blob48f8afe9ca652b3b4f4983de7096ce99e9401565
1 /*
2 * VP9 compatible video decoder
4 * Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
5 * Copyright (C) 2013 Clément Bœsch <u pkh me>
7 * This file is part of Libav.
9 * Libav is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * Libav is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with Libav; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "libavutil/avassert.h"
26 #include "avcodec.h"
27 #include "get_bits.h"
28 #include "internal.h"
29 #include "videodsp.h"
30 #include "vp56.h"
31 #include "vp9.h"
32 #include "vp9data.h"
34 #define VP9_SYNCCODE 0x498342
35 #define MAX_PROB 255
37 static void vp9_frame_unref(AVCodecContext *avctx, VP9Frame *f)
39 ff_thread_release_buffer(avctx, &f->tf);
40 av_buffer_unref(&f->segmentation_map_buf);
41 av_buffer_unref(&f->mv_buf);
42 f->segmentation_map = NULL;
43 f->mv = NULL;
46 static int vp9_frame_alloc(AVCodecContext *avctx, VP9Frame *f)
48 VP9Context *s = avctx->priv_data;
49 int ret, sz;
51 ret = ff_thread_get_buffer(avctx, &f->tf, AV_GET_BUFFER_FLAG_REF);
52 if (ret < 0)
53 return ret;
55 sz = 64 * s->sb_cols * s->sb_rows;
56 f->segmentation_map_buf = av_buffer_allocz(sz * sizeof(*f->segmentation_map));
57 f->mv_buf = av_buffer_allocz(sz * sizeof(*f->mv));
58 if (!f->segmentation_map_buf || !f->mv_buf) {
59 vp9_frame_unref(avctx, f);
60 return AVERROR(ENOMEM);
63 f->segmentation_map = f->segmentation_map_buf->data;
64 f->mv = (VP9MVRefPair*)f->mv_buf->data;
66 if (s->segmentation.enabled && !s->segmentation.update_map &&
67 !s->keyframe && !s->intraonly && !s->errorres)
68 memcpy(f->segmentation_map, s->frames[LAST_FRAME].segmentation_map, sz);
70 return 0;
73 static int vp9_frame_ref(VP9Frame *dst, VP9Frame *src)
75 int ret;
77 dst->segmentation_map_buf = av_buffer_ref(src->segmentation_map_buf);
78 dst->mv_buf = av_buffer_ref(src->mv_buf);
79 if (!dst->segmentation_map_buf || !dst->mv_buf) {
80 ret = AVERROR(ENOMEM);
81 goto fail;
84 ret = ff_thread_ref_frame(&dst->tf, &src->tf);
85 if (ret < 0)
86 goto fail;
88 dst->segmentation_map = src->segmentation_map;
89 dst->mv = src->mv;
91 return 0;
92 fail:
93 av_buffer_unref(&dst->segmentation_map_buf);
94 av_buffer_unref(&dst->mv_buf);
95 return ret;
98 static void vp9_decode_flush(AVCodecContext *avctx)
100 VP9Context *s = avctx->priv_data;
101 int i;
103 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
104 vp9_frame_unref(avctx, &s->frames[i]);
106 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
107 ff_thread_release_buffer(avctx, &s->refs[i]);
109 s->use_last_frame_mvs = 0;
111 s->alloc_width = 0;
112 s->alloc_height = 0;
115 static int update_size(AVCodecContext *avctx, int w, int h)
117 VP9Context *s = avctx->priv_data;
118 uint8_t *p;
119 int nb_blocks, nb_superblocks;
121 if (s->above_partition_ctx && w == s->alloc_width && h == s->alloc_height)
122 return 0;
124 vp9_decode_flush(avctx);
126 if (w <= 0 || h <= 0)
127 return AVERROR_INVALIDDATA;
129 avctx->width = w;
130 avctx->height = h;
131 s->sb_cols = (w + 63) >> 6;
132 s->sb_rows = (h + 63) >> 6;
133 s->cols = (w + 7) >> 3;
134 s->rows = (h + 7) >> 3;
136 #define assign(var, type, n) var = (type)p; p += s->sb_cols * n * sizeof(*var)
137 av_free(s->above_partition_ctx);
138 p = av_malloc(s->sb_cols *
139 (240 + sizeof(*s->lflvl) + 16 * sizeof(*s->above_mv_ctx)));
140 if (!p)
141 return AVERROR(ENOMEM);
142 assign(s->above_partition_ctx, uint8_t *, 8);
143 assign(s->above_skip_ctx, uint8_t *, 8);
144 assign(s->above_txfm_ctx, uint8_t *, 8);
145 assign(s->above_mode_ctx, uint8_t *, 16);
146 assign(s->above_y_nnz_ctx, uint8_t *, 16);
147 assign(s->above_uv_nnz_ctx[0], uint8_t *, 8);
148 assign(s->above_uv_nnz_ctx[1], uint8_t *, 8);
149 assign(s->intra_pred_data[0], uint8_t *, 64);
150 assign(s->intra_pred_data[1], uint8_t *, 32);
151 assign(s->intra_pred_data[2], uint8_t *, 32);
152 assign(s->above_segpred_ctx, uint8_t *, 8);
153 assign(s->above_intra_ctx, uint8_t *, 8);
154 assign(s->above_comp_ctx, uint8_t *, 8);
155 assign(s->above_ref_ctx, uint8_t *, 8);
156 assign(s->above_filter_ctx, uint8_t *, 8);
157 assign(s->lflvl, VP9Filter *, 1);
158 assign(s->above_mv_ctx, VP56mv(*)[2], 16);
159 #undef assign
161 av_freep(&s->b_base);
162 av_freep(&s->block_base);
164 if (avctx->active_thread_type & FF_THREAD_FRAME) {
165 nb_blocks = s->cols * s->rows;
166 nb_superblocks = s->sb_cols * s->sb_rows;
167 } else {
168 nb_blocks = nb_superblocks = 1;
171 s->b_base = av_malloc_array(nb_blocks, sizeof(*s->b_base));
172 s->block_base = av_mallocz_array(nb_superblocks, (64 * 64 + 128) * 3);
173 if (!s->b_base || !s->block_base)
174 return AVERROR(ENOMEM);
175 s->uvblock_base[0] = s->block_base + nb_superblocks * 64 * 64;
176 s->uvblock_base[1] = s->uvblock_base[0] + nb_superblocks * 32 * 32;
177 s->eob_base = (uint8_t *)(s->uvblock_base[1] + nb_superblocks * 32 * 32);
178 s->uveob_base[0] = s->eob_base + nb_superblocks * 256;
179 s->uveob_base[1] = s->uveob_base[0] + nb_superblocks * 64;
181 s->alloc_width = w;
182 s->alloc_height = h;
184 return 0;
187 // The sign bit is at the end, not the start, of a bit sequence
188 static av_always_inline int get_bits_with_sign(GetBitContext *gb, int n)
190 int v = get_bits(gb, n);
191 return get_bits1(gb) ? -v : v;
194 static av_always_inline int inv_recenter_nonneg(int v, int m)
196 if (v > 2 * m)
197 return v;
198 if (v & 1)
199 return m - ((v + 1) >> 1);
200 return m + (v >> 1);
203 // differential forward probability updates
204 static int update_prob(VP56RangeCoder *c, int p)
206 static const int inv_map_table[MAX_PROB - 1] = {
207 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176,
208 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9,
209 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24,
210 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39,
211 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54,
212 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
213 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
214 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100,
215 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115,
216 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130,
217 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145,
218 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160,
219 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175,
220 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191,
221 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206,
222 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221,
223 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236,
224 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251,
225 252, 253,
227 int d;
229 /* This code is trying to do a differential probability update. For a
230 * current probability A in the range [1, 255], the difference to a new
231 * probability of any value can be expressed differentially as 1-A, 255-A
232 * where some part of this (absolute range) exists both in positive as
233 * well as the negative part, whereas another part only exists in one
234 * half. We're trying to code this shared part differentially, i.e.
235 * times two where the value of the lowest bit specifies the sign, and
236 * the single part is then coded on top of this. This absolute difference
237 * then again has a value of [0, 254], but a bigger value in this range
238 * indicates that we're further away from the original value A, so we
239 * can code this as a VLC code, since higher values are increasingly
240 * unlikely. The first 20 values in inv_map_table[] allow 'cheap, rough'
241 * updates vs. the 'fine, exact' updates further down the range, which
242 * adds one extra dimension to this differential update model. */
244 if (!vp8_rac_get(c)) {
245 d = vp8_rac_get_uint(c, 4) + 0;
246 } else if (!vp8_rac_get(c)) {
247 d = vp8_rac_get_uint(c, 4) + 16;
248 } else if (!vp8_rac_get(c)) {
249 d = vp8_rac_get_uint(c, 5) + 32;
250 } else {
251 d = vp8_rac_get_uint(c, 7);
252 if (d >= 65) {
253 d = (d << 1) - 65 + vp8_rac_get(c);
254 d = av_clip(d, 0, MAX_PROB - 65 - 1);
256 d += 64;
259 return p <= 128
260 ? 1 + inv_recenter_nonneg(inv_map_table[d], p - 1)
261 : 255 - inv_recenter_nonneg(inv_map_table[d], 255 - p);
264 static int decode_frame_header(AVCodecContext *avctx,
265 const uint8_t *data, int size, int *ref)
267 VP9Context *s = avctx->priv_data;
268 int c, i, j, k, l, m, n, w, h, max, size2, ret, sharp;
269 int last_invisible;
270 const uint8_t *data2;
272 /* general header */
273 if ((ret = init_get_bits8(&s->gb, data, size)) < 0) {
274 av_log(avctx, AV_LOG_ERROR, "Failed to initialize bitstream reader\n");
275 return ret;
277 if (get_bits(&s->gb, 2) != 0x2) { // frame marker
278 av_log(avctx, AV_LOG_ERROR, "Invalid frame marker\n");
279 return AVERROR_INVALIDDATA;
281 s->profile = get_bits1(&s->gb);
282 if (get_bits1(&s->gb)) { // reserved bit
283 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
284 return AVERROR_INVALIDDATA;
286 if (get_bits1(&s->gb)) {
287 *ref = get_bits(&s->gb, 3);
288 return 0;
291 s->last_keyframe = s->keyframe;
292 s->keyframe = !get_bits1(&s->gb);
294 last_invisible = s->invisible;
295 s->invisible = !get_bits1(&s->gb);
296 s->errorres = get_bits1(&s->gb);
297 s->use_last_frame_mvs = !s->errorres && !last_invisible;
299 if (s->keyframe) {
300 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
301 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
302 return AVERROR_INVALIDDATA;
304 s->colorspace = get_bits(&s->gb, 3);
305 if (s->colorspace == 7) { // RGB = profile 1
306 av_log(avctx, AV_LOG_ERROR, "RGB not supported in profile 0\n");
307 return AVERROR_INVALIDDATA;
309 s->fullrange = get_bits1(&s->gb);
311 // subsampling bits
312 if (s->profile == 1 || s->profile == 3) {
313 s->sub_x = get_bits1(&s->gb);
314 s->sub_y = get_bits1(&s->gb);
315 if (s->sub_x && s->sub_y) {
316 av_log(avctx, AV_LOG_ERROR,
317 "4:2:0 color not supported in profile 1 or 3\n");
318 return AVERROR_INVALIDDATA;
320 if (get_bits1(&s->gb)) { // reserved bit
321 av_log(avctx, AV_LOG_ERROR, "Reserved bit should be zero\n");
322 return AVERROR_INVALIDDATA;
324 } else {
325 s->sub_x = s->sub_y = 1;
327 if (!s->sub_x || !s->sub_y) {
328 avpriv_report_missing_feature(avctx, "Subsampling %d:%d",
329 s->sub_x, s->sub_y);
330 return AVERROR_PATCHWELCOME;
333 s->refreshrefmask = 0xff;
334 w = get_bits(&s->gb, 16) + 1;
335 h = get_bits(&s->gb, 16) + 1;
336 if (get_bits1(&s->gb)) // display size
337 skip_bits(&s->gb, 32);
338 } else {
339 s->intraonly = s->invisible ? get_bits1(&s->gb) : 0;
340 s->resetctx = s->errorres ? 0 : get_bits(&s->gb, 2);
341 if (s->intraonly) {
342 if (get_bits_long(&s->gb, 24) != VP9_SYNCCODE) { // synccode
343 av_log(avctx, AV_LOG_ERROR, "Invalid sync code\n");
344 return AVERROR_INVALIDDATA;
346 s->refreshrefmask = get_bits(&s->gb, 8);
347 w = get_bits(&s->gb, 16) + 1;
348 h = get_bits(&s->gb, 16) + 1;
349 if (get_bits1(&s->gb)) // display size
350 skip_bits(&s->gb, 32);
351 } else {
352 s->refreshrefmask = get_bits(&s->gb, 8);
353 s->refidx[0] = get_bits(&s->gb, 3);
354 s->signbias[0] = get_bits1(&s->gb);
355 s->refidx[1] = get_bits(&s->gb, 3);
356 s->signbias[1] = get_bits1(&s->gb);
357 s->refidx[2] = get_bits(&s->gb, 3);
358 s->signbias[2] = get_bits1(&s->gb);
359 if (!s->refs[s->refidx[0]].f->buf[0] ||
360 !s->refs[s->refidx[1]].f->buf[0] ||
361 !s->refs[s->refidx[2]].f->buf[0]) {
362 av_log(avctx, AV_LOG_ERROR,
363 "Not all references are available\n");
364 return AVERROR_INVALIDDATA;
366 if (get_bits1(&s->gb)) {
367 w = s->refs[s->refidx[0]].f->width;
368 h = s->refs[s->refidx[0]].f->height;
369 } else if (get_bits1(&s->gb)) {
370 w = s->refs[s->refidx[1]].f->width;
371 h = s->refs[s->refidx[1]].f->height;
372 } else if (get_bits1(&s->gb)) {
373 w = s->refs[s->refidx[2]].f->width;
374 h = s->refs[s->refidx[2]].f->height;
375 } else {
376 w = get_bits(&s->gb, 16) + 1;
377 h = get_bits(&s->gb, 16) + 1;
379 if (get_bits1(&s->gb)) // display size
380 skip_bits(&s->gb, 32);
381 s->highprecisionmvs = get_bits1(&s->gb);
382 s->filtermode = get_bits1(&s->gb) ? FILTER_SWITCHABLE :
383 get_bits(&s->gb, 2);
384 s->allowcompinter = s->signbias[0] != s->signbias[1] ||
385 s->signbias[0] != s->signbias[2];
386 if (s->allowcompinter) {
387 if (s->signbias[0] == s->signbias[1]) {
388 s->fixcompref = 2;
389 s->varcompref[0] = 0;
390 s->varcompref[1] = 1;
391 } else if (s->signbias[0] == s->signbias[2]) {
392 s->fixcompref = 1;
393 s->varcompref[0] = 0;
394 s->varcompref[1] = 2;
395 } else {
396 s->fixcompref = 0;
397 s->varcompref[0] = 1;
398 s->varcompref[1] = 2;
404 s->refreshctx = s->errorres ? 0 : get_bits1(&s->gb);
405 s->parallelmode = s->errorres ? 1 : get_bits1(&s->gb);
406 s->framectxid = c = get_bits(&s->gb, 2);
408 /* loopfilter header data */
409 s->filter.level = get_bits(&s->gb, 6);
410 sharp = get_bits(&s->gb, 3);
411 /* If sharpness changed, reinit lim/mblim LUTs. if it didn't change,
412 * keep the old cache values since they are still valid. */
413 if (s->filter.sharpness != sharp)
414 memset(s->filter.lim_lut, 0, sizeof(s->filter.lim_lut));
415 s->filter.sharpness = sharp;
416 if ((s->lf_delta.enabled = get_bits1(&s->gb))) {
417 if (get_bits1(&s->gb)) {
418 for (i = 0; i < 4; i++)
419 if (get_bits1(&s->gb))
420 s->lf_delta.ref[i] = get_bits_with_sign(&s->gb, 6);
421 for (i = 0; i < 2; i++)
422 if (get_bits1(&s->gb))
423 s->lf_delta.mode[i] = get_bits_with_sign(&s->gb, 6);
425 } else {
426 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
429 /* quantization header data */
430 s->yac_qi = get_bits(&s->gb, 8);
431 s->ydc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
432 s->uvdc_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
433 s->uvac_qdelta = get_bits1(&s->gb) ? get_bits_with_sign(&s->gb, 4) : 0;
434 s->lossless = s->yac_qi == 0 && s->ydc_qdelta == 0 &&
435 s->uvdc_qdelta == 0 && s->uvac_qdelta == 0;
437 /* segmentation header info */
438 if ((s->segmentation.enabled = get_bits1(&s->gb))) {
439 if ((s->segmentation.update_map = get_bits1(&s->gb))) {
440 for (i = 0; i < 7; i++)
441 s->prob.seg[i] = get_bits1(&s->gb) ?
442 get_bits(&s->gb, 8) : 255;
443 if ((s->segmentation.temporal = get_bits1(&s->gb)))
444 for (i = 0; i < 3; i++)
445 s->prob.segpred[i] = get_bits1(&s->gb) ?
446 get_bits(&s->gb, 8) : 255;
449 if (get_bits1(&s->gb)) {
450 s->segmentation.absolute_vals = get_bits1(&s->gb);
451 for (i = 0; i < 8; i++) {
452 if ((s->segmentation.feat[i].q_enabled = get_bits1(&s->gb)))
453 s->segmentation.feat[i].q_val = get_bits_with_sign(&s->gb, 8);
454 if ((s->segmentation.feat[i].lf_enabled = get_bits1(&s->gb)))
455 s->segmentation.feat[i].lf_val = get_bits_with_sign(&s->gb, 6);
456 if ((s->segmentation.feat[i].ref_enabled = get_bits1(&s->gb)))
457 s->segmentation.feat[i].ref_val = get_bits(&s->gb, 2);
458 s->segmentation.feat[i].skip_enabled = get_bits1(&s->gb);
461 } else {
462 s->segmentation.feat[0].q_enabled = 0;
463 s->segmentation.feat[0].lf_enabled = 0;
464 s->segmentation.feat[0].skip_enabled = 0;
465 s->segmentation.feat[0].ref_enabled = 0;
468 // set qmul[] based on Y/UV, AC/DC and segmentation Q idx deltas
469 for (i = 0; i < (s->segmentation.enabled ? 8 : 1); i++) {
470 int qyac, qydc, quvac, quvdc, lflvl, sh;
472 if (s->segmentation.feat[i].q_enabled) {
473 if (s->segmentation.absolute_vals)
474 qyac = s->segmentation.feat[i].q_val;
475 else
476 qyac = s->yac_qi + s->segmentation.feat[i].q_val;
477 } else {
478 qyac = s->yac_qi;
480 qydc = av_clip_uintp2(qyac + s->ydc_qdelta, 8);
481 quvdc = av_clip_uintp2(qyac + s->uvdc_qdelta, 8);
482 quvac = av_clip_uintp2(qyac + s->uvac_qdelta, 8);
483 qyac = av_clip_uintp2(qyac, 8);
485 s->segmentation.feat[i].qmul[0][0] = ff_vp9_dc_qlookup[qydc];
486 s->segmentation.feat[i].qmul[0][1] = ff_vp9_ac_qlookup[qyac];
487 s->segmentation.feat[i].qmul[1][0] = ff_vp9_dc_qlookup[quvdc];
488 s->segmentation.feat[i].qmul[1][1] = ff_vp9_ac_qlookup[quvac];
490 sh = s->filter.level >= 32;
491 if (s->segmentation.feat[i].lf_enabled) {
492 if (s->segmentation.absolute_vals)
493 lflvl = s->segmentation.feat[i].lf_val;
494 else
495 lflvl = s->filter.level + s->segmentation.feat[i].lf_val;
496 } else {
497 lflvl = s->filter.level;
499 s->segmentation.feat[i].lflvl[0][0] =
500 s->segmentation.feat[i].lflvl[0][1] =
501 av_clip_uintp2(lflvl + (s->lf_delta.ref[0] << sh), 6);
502 for (j = 1; j < 4; j++) {
503 s->segmentation.feat[i].lflvl[j][0] =
504 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
505 s->lf_delta.mode[0]) << sh), 6);
506 s->segmentation.feat[i].lflvl[j][1] =
507 av_clip_uintp2(lflvl + ((s->lf_delta.ref[j] +
508 s->lf_delta.mode[1]) << sh), 6);
512 /* tiling info */
513 if ((ret = update_size(avctx, w, h)) < 0) {
514 av_log(avctx, AV_LOG_ERROR,
515 "Failed to initialize decoder for %dx%d\n", w, h);
516 return ret;
518 for (s->tiling.log2_tile_cols = 0;
519 (s->sb_cols >> s->tiling.log2_tile_cols) > 64;
520 s->tiling.log2_tile_cols++) ;
521 for (max = 0; (s->sb_cols >> max) >= 4; max++) ;
522 max = FFMAX(0, max - 1);
523 while (max > s->tiling.log2_tile_cols) {
524 if (get_bits1(&s->gb))
525 s->tiling.log2_tile_cols++;
526 else
527 break;
529 s->tiling.log2_tile_rows = decode012(&s->gb);
530 s->tiling.tile_rows = 1 << s->tiling.log2_tile_rows;
531 if (s->tiling.tile_cols != (1 << s->tiling.log2_tile_cols)) {
532 s->tiling.tile_cols = 1 << s->tiling.log2_tile_cols;
533 s->c_b = av_fast_realloc(s->c_b, &s->c_b_size,
534 sizeof(VP56RangeCoder) *
535 s->tiling.tile_cols);
536 if (!s->c_b) {
537 av_log(avctx, AV_LOG_ERROR,
538 "Ran out of memory during range coder init\n");
539 return AVERROR(ENOMEM);
543 if (s->keyframe || s->errorres || s->intraonly) {
544 s->prob_ctx[0].p =
545 s->prob_ctx[1].p =
546 s->prob_ctx[2].p =
547 s->prob_ctx[3].p = ff_vp9_default_probs;
548 memcpy(s->prob_ctx[0].coef, ff_vp9_default_coef_probs,
549 sizeof(ff_vp9_default_coef_probs));
550 memcpy(s->prob_ctx[1].coef, ff_vp9_default_coef_probs,
551 sizeof(ff_vp9_default_coef_probs));
552 memcpy(s->prob_ctx[2].coef, ff_vp9_default_coef_probs,
553 sizeof(ff_vp9_default_coef_probs));
554 memcpy(s->prob_ctx[3].coef, ff_vp9_default_coef_probs,
555 sizeof(ff_vp9_default_coef_probs));
558 // next 16 bits is size of the rest of the header (arith-coded)
559 size2 = get_bits(&s->gb, 16);
560 data2 = align_get_bits(&s->gb);
561 if (size2 > size - (data2 - data)) {
562 av_log(avctx, AV_LOG_ERROR, "Invalid compressed header size\n");
563 return AVERROR_INVALIDDATA;
565 ff_vp56_init_range_decoder(&s->c, data2, size2);
566 if (vp56_rac_get_prob_branchy(&s->c, 128)) { // marker bit
567 av_log(avctx, AV_LOG_ERROR, "Marker bit was set\n");
568 return AVERROR_INVALIDDATA;
571 if (s->keyframe || s->intraonly)
572 memset(s->counts.coef, 0,
573 sizeof(s->counts.coef) + sizeof(s->counts.eob));
574 else
575 memset(&s->counts, 0, sizeof(s->counts));
577 /* FIXME is it faster to not copy here, but do it down in the fw updates
578 * as explicit copies if the fw update is missing (and skip the copy upon
579 * fw update)? */
580 s->prob.p = s->prob_ctx[c].p;
582 // txfm updates
583 if (s->lossless) {
584 s->txfmmode = TX_4X4;
585 } else {
586 s->txfmmode = vp8_rac_get_uint(&s->c, 2);
587 if (s->txfmmode == 3)
588 s->txfmmode += vp8_rac_get(&s->c);
590 if (s->txfmmode == TX_SWITCHABLE) {
591 for (i = 0; i < 2; i++)
592 if (vp56_rac_get_prob_branchy(&s->c, 252))
593 s->prob.p.tx8p[i] = update_prob(&s->c, s->prob.p.tx8p[i]);
594 for (i = 0; i < 2; i++)
595 for (j = 0; j < 2; j++)
596 if (vp56_rac_get_prob_branchy(&s->c, 252))
597 s->prob.p.tx16p[i][j] =
598 update_prob(&s->c, s->prob.p.tx16p[i][j]);
599 for (i = 0; i < 2; i++)
600 for (j = 0; j < 3; j++)
601 if (vp56_rac_get_prob_branchy(&s->c, 252))
602 s->prob.p.tx32p[i][j] =
603 update_prob(&s->c, s->prob.p.tx32p[i][j]);
607 // coef updates
608 for (i = 0; i < 4; i++) {
609 uint8_t (*ref)[2][6][6][3] = s->prob_ctx[c].coef[i];
610 if (vp8_rac_get(&s->c)) {
611 for (j = 0; j < 2; j++)
612 for (k = 0; k < 2; k++)
613 for (l = 0; l < 6; l++)
614 for (m = 0; m < 6; m++) {
615 uint8_t *p = s->prob.coef[i][j][k][l][m];
616 uint8_t *r = ref[j][k][l][m];
617 if (m >= 3 && l == 0) // dc only has 3 pt
618 break;
619 for (n = 0; n < 3; n++) {
620 if (vp56_rac_get_prob_branchy(&s->c, 252))
621 p[n] = update_prob(&s->c, r[n]);
622 else
623 p[n] = r[n];
625 p[3] = 0;
627 } else {
628 for (j = 0; j < 2; j++)
629 for (k = 0; k < 2; k++)
630 for (l = 0; l < 6; l++)
631 for (m = 0; m < 6; m++) {
632 uint8_t *p = s->prob.coef[i][j][k][l][m];
633 uint8_t *r = ref[j][k][l][m];
634 if (m > 3 && l == 0) // dc only has 3 pt
635 break;
636 memcpy(p, r, 3);
637 p[3] = 0;
640 if (s->txfmmode == i)
641 break;
644 // mode updates
645 for (i = 0; i < 3; i++)
646 if (vp56_rac_get_prob_branchy(&s->c, 252))
647 s->prob.p.skip[i] = update_prob(&s->c, s->prob.p.skip[i]);
648 if (!s->keyframe && !s->intraonly) {
649 for (i = 0; i < 7; i++)
650 for (j = 0; j < 3; j++)
651 if (vp56_rac_get_prob_branchy(&s->c, 252))
652 s->prob.p.mv_mode[i][j] =
653 update_prob(&s->c, s->prob.p.mv_mode[i][j]);
655 if (s->filtermode == FILTER_SWITCHABLE)
656 for (i = 0; i < 4; i++)
657 for (j = 0; j < 2; j++)
658 if (vp56_rac_get_prob_branchy(&s->c, 252))
659 s->prob.p.filter[i][j] =
660 update_prob(&s->c, s->prob.p.filter[i][j]);
662 for (i = 0; i < 4; i++)
663 if (vp56_rac_get_prob_branchy(&s->c, 252))
664 s->prob.p.intra[i] = update_prob(&s->c, s->prob.p.intra[i]);
666 if (s->allowcompinter) {
667 s->comppredmode = vp8_rac_get(&s->c);
668 if (s->comppredmode)
669 s->comppredmode += vp8_rac_get(&s->c);
670 if (s->comppredmode == PRED_SWITCHABLE)
671 for (i = 0; i < 5; i++)
672 if (vp56_rac_get_prob_branchy(&s->c, 252))
673 s->prob.p.comp[i] =
674 update_prob(&s->c, s->prob.p.comp[i]);
675 } else {
676 s->comppredmode = PRED_SINGLEREF;
679 if (s->comppredmode != PRED_COMPREF) {
680 for (i = 0; i < 5; i++) {
681 if (vp56_rac_get_prob_branchy(&s->c, 252))
682 s->prob.p.single_ref[i][0] =
683 update_prob(&s->c, s->prob.p.single_ref[i][0]);
684 if (vp56_rac_get_prob_branchy(&s->c, 252))
685 s->prob.p.single_ref[i][1] =
686 update_prob(&s->c, s->prob.p.single_ref[i][1]);
690 if (s->comppredmode != PRED_SINGLEREF) {
691 for (i = 0; i < 5; i++)
692 if (vp56_rac_get_prob_branchy(&s->c, 252))
693 s->prob.p.comp_ref[i] =
694 update_prob(&s->c, s->prob.p.comp_ref[i]);
697 for (i = 0; i < 4; i++)
698 for (j = 0; j < 9; j++)
699 if (vp56_rac_get_prob_branchy(&s->c, 252))
700 s->prob.p.y_mode[i][j] =
701 update_prob(&s->c, s->prob.p.y_mode[i][j]);
703 for (i = 0; i < 4; i++)
704 for (j = 0; j < 4; j++)
705 for (k = 0; k < 3; k++)
706 if (vp56_rac_get_prob_branchy(&s->c, 252))
707 s->prob.p.partition[3 - i][j][k] =
708 update_prob(&s->c,
709 s->prob.p.partition[3 - i][j][k]);
711 // mv fields don't use the update_prob subexp model for some reason
712 for (i = 0; i < 3; i++)
713 if (vp56_rac_get_prob_branchy(&s->c, 252))
714 s->prob.p.mv_joint[i] = (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
716 for (i = 0; i < 2; i++) {
717 if (vp56_rac_get_prob_branchy(&s->c, 252))
718 s->prob.p.mv_comp[i].sign =
719 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
721 for (j = 0; j < 10; j++)
722 if (vp56_rac_get_prob_branchy(&s->c, 252))
723 s->prob.p.mv_comp[i].classes[j] =
724 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
726 if (vp56_rac_get_prob_branchy(&s->c, 252))
727 s->prob.p.mv_comp[i].class0 =
728 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
730 for (j = 0; j < 10; j++)
731 if (vp56_rac_get_prob_branchy(&s->c, 252))
732 s->prob.p.mv_comp[i].bits[j] =
733 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
736 for (i = 0; i < 2; i++) {
737 for (j = 0; j < 2; j++)
738 for (k = 0; k < 3; k++)
739 if (vp56_rac_get_prob_branchy(&s->c, 252))
740 s->prob.p.mv_comp[i].class0_fp[j][k] =
741 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
743 for (j = 0; j < 3; j++)
744 if (vp56_rac_get_prob_branchy(&s->c, 252))
745 s->prob.p.mv_comp[i].fp[j] =
746 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
749 if (s->highprecisionmvs) {
750 for (i = 0; i < 2; i++) {
751 if (vp56_rac_get_prob_branchy(&s->c, 252))
752 s->prob.p.mv_comp[i].class0_hp =
753 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
755 if (vp56_rac_get_prob_branchy(&s->c, 252))
756 s->prob.p.mv_comp[i].hp =
757 (vp8_rac_get_uint(&s->c, 7) << 1) | 1;
762 return (data2 - data) + size2;
765 static int decode_subblock(AVCodecContext *avctx, int row, int col,
766 VP9Filter *lflvl,
767 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
769 VP9Context *s = avctx->priv_data;
770 AVFrame *f = s->frames[CUR_FRAME].tf.f;
771 int c = ((s->above_partition_ctx[col] >> (3 - bl)) & 1) |
772 (((s->left_partition_ctx[row & 0x7] >> (3 - bl)) & 1) << 1);
773 int ret;
774 const uint8_t *p = s->keyframe ? ff_vp9_default_kf_partition_probs[bl][c]
775 : s->prob.p.partition[bl][c];
776 enum BlockPartition bp;
777 ptrdiff_t hbs = 4 >> bl;
779 if (bl == BL_8X8) {
780 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
781 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, bl, bp);
782 } else if (col + hbs < s->cols) {
783 if (row + hbs < s->rows) {
784 bp = vp8_rac_get_tree(&s->c, ff_vp9_partition_tree, p);
785 switch (bp) {
786 case PARTITION_NONE:
787 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
788 bl, bp);
789 break;
790 case PARTITION_H:
791 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
792 bl, bp);
793 if (!ret) {
794 yoff += hbs * 8 * f->linesize[0];
795 uvoff += hbs * 4 * f->linesize[1];
796 ret = ff_vp9_decode_block(avctx, row + hbs, col, lflvl,
797 yoff, uvoff, bl, bp);
799 break;
800 case PARTITION_V:
801 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
802 bl, bp);
803 if (!ret) {
804 yoff += hbs * 8;
805 uvoff += hbs * 4;
806 ret = ff_vp9_decode_block(avctx, row, col + hbs, lflvl,
807 yoff, uvoff, bl, bp);
809 break;
810 case PARTITION_SPLIT:
811 ret = decode_subblock(avctx, row, col, lflvl,
812 yoff, uvoff, bl + 1);
813 if (!ret) {
814 ret = decode_subblock(avctx, row, col + hbs, lflvl,
815 yoff + 8 * hbs, uvoff + 4 * hbs,
816 bl + 1);
817 if (!ret) {
818 yoff += hbs * 8 * f->linesize[0];
819 uvoff += hbs * 4 * f->linesize[1];
820 ret = decode_subblock(avctx, row + hbs, col, lflvl,
821 yoff, uvoff, bl + 1);
822 if (!ret) {
823 ret = decode_subblock(avctx, row + hbs, col + hbs,
824 lflvl, yoff + 8 * hbs,
825 uvoff + 4 * hbs, bl + 1);
829 break;
830 default:
831 av_log(avctx, AV_LOG_ERROR, "Unexpected partition %d.", bp);
832 return AVERROR_INVALIDDATA;
834 } else if (vp56_rac_get_prob_branchy(&s->c, p[1])) {
835 bp = PARTITION_SPLIT;
836 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
837 if (!ret)
838 ret = decode_subblock(avctx, row, col + hbs, lflvl,
839 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
840 } else {
841 bp = PARTITION_H;
842 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
843 bl, bp);
845 } else if (row + hbs < s->rows) {
846 if (vp56_rac_get_prob_branchy(&s->c, p[2])) {
847 bp = PARTITION_SPLIT;
848 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
849 if (!ret) {
850 yoff += hbs * 8 * f->linesize[0];
851 uvoff += hbs * 4 * f->linesize[1];
852 ret = decode_subblock(avctx, row + hbs, col, lflvl,
853 yoff, uvoff, bl + 1);
855 } else {
856 bp = PARTITION_V;
857 ret = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff,
858 bl, bp);
860 } else {
861 bp = PARTITION_SPLIT;
862 ret = decode_subblock(avctx, row, col, lflvl, yoff, uvoff, bl + 1);
864 s->counts.partition[bl][c][bp]++;
866 return ret;
869 static int decode_superblock_mem(AVCodecContext *avctx, int row, int col, struct VP9Filter *lflvl,
870 ptrdiff_t yoff, ptrdiff_t uvoff, enum BlockLevel bl)
872 VP9Context *s = avctx->priv_data;
873 VP9Block *b = s->b;
874 ptrdiff_t hbs = 4 >> bl;
875 AVFrame *f = s->frames[CUR_FRAME].tf.f;
876 ptrdiff_t y_stride = f->linesize[0], uv_stride = f->linesize[1];
877 int res;
879 if (bl == BL_8X8) {
880 av_assert2(b->bl == BL_8X8);
881 res = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp);
882 } else if (s->b->bl == bl) {
883 if ((res = ff_vp9_decode_block(avctx, row, col, lflvl, yoff, uvoff, b->bl, b->bp)) < 0)
884 return res;
885 if (b->bp == PARTITION_H && row + hbs < s->rows) {
886 yoff += hbs * 8 * y_stride;
887 uvoff += hbs * 4 * uv_stride;
888 res = ff_vp9_decode_block(avctx, row + hbs, col, lflvl, yoff, uvoff, b->bl, b->bp);
889 } else if (b->bp == PARTITION_V && col + hbs < s->cols) {
890 yoff += hbs * 8;
891 uvoff += hbs * 4;
892 res = ff_vp9_decode_block(avctx, row, col + hbs, lflvl, yoff, uvoff, b->bl, b->bp);
894 } else {
895 if ((res = decode_superblock_mem(avctx, row, col, lflvl, yoff, uvoff, bl + 1)) < 0)
896 return res;
897 if (col + hbs < s->cols) { // FIXME why not <=?
898 if (row + hbs < s->rows) {
899 if ((res = decode_superblock_mem(avctx, row, col + hbs, lflvl, yoff + 8 * hbs,
900 uvoff + 4 * hbs, bl + 1)) < 0)
901 return res;
902 yoff += hbs * 8 * y_stride;
903 uvoff += hbs * 4 * uv_stride;
904 if ((res = decode_superblock_mem(avctx, row + hbs, col, lflvl, yoff,
905 uvoff, bl + 1)) < 0)
906 return res;
907 res = decode_superblock_mem(avctx, row + hbs, col + hbs, lflvl,
908 yoff + 8 * hbs, uvoff + 4 * hbs, bl + 1);
909 } else {
910 yoff += hbs * 8;
911 uvoff += hbs * 4;
912 res = decode_superblock_mem(avctx, row, col + hbs, lflvl, yoff, uvoff, bl + 1);
914 } else if (row + hbs < s->rows) {
915 yoff += hbs * 8 * y_stride;
916 uvoff += hbs * 4 * uv_stride;
917 res = decode_superblock_mem(avctx, row + hbs, col, lflvl, yoff, uvoff, bl + 1);
921 return res;
924 static void loopfilter_subblock(AVCodecContext *avctx, VP9Filter *lflvl,
925 int row, int col,
926 ptrdiff_t yoff, ptrdiff_t uvoff)
928 VP9Context *s = avctx->priv_data;
929 AVFrame *f = s->frames[CUR_FRAME].tf.f;
930 uint8_t *dst = f->data[0] + yoff;
931 ptrdiff_t ls_y = f->linesize[0], ls_uv = f->linesize[1];
932 uint8_t *lvl = lflvl->level;
933 int y, x, p;
935 /* FIXME: In how far can we interleave the v/h loopfilter calls? E.g.
936 * if you think of them as acting on a 8x8 block max, we can interleave
937 * each v/h within the single x loop, but that only works if we work on
938 * 8 pixel blocks, and we won't always do that (we want at least 16px
939 * to use SSE2 optimizations, perhaps 32 for AVX2). */
941 // filter edges between columns, Y plane (e.g. block1 | block2)
942 for (y = 0; y < 8; y += 2, dst += 16 * ls_y, lvl += 16) {
943 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[0][0][y];
944 uint8_t *hmask2 = lflvl->mask[0][0][y + 1];
945 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2], hm13 = hmask1[3];
946 unsigned hm2 = hmask2[1] | hmask2[2], hm23 = hmask2[3];
947 unsigned hm = hm1 | hm2 | hm13 | hm23;
949 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 8, l++) {
950 if (hm1 & x) {
951 int L = *l, H = L >> 4;
952 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
954 if (col || x > 1) {
955 if (hmask1[0] & x) {
956 if (hmask2[0] & x) {
957 av_assert2(l[8] == L);
958 s->dsp.loop_filter_16[0](ptr, ls_y, E, I, H);
959 } else {
960 s->dsp.loop_filter_8[2][0](ptr, ls_y, E, I, H);
962 } else if (hm2 & x) {
963 L = l[8];
964 H |= (L >> 4) << 8;
965 E |= s->filter.mblim_lut[L] << 8;
966 I |= s->filter.lim_lut[L] << 8;
967 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
968 [!!(hmask2[1] & x)]
969 [0](ptr, ls_y, E, I, H);
970 } else {
971 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
972 [0](ptr, ls_y, E, I, H);
975 } else if (hm2 & x) {
976 int L = l[8], H = L >> 4;
977 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
979 if (col || x > 1) {
980 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
981 [0](ptr + 8 * ls_y, ls_y, E, I, H);
984 if (hm13 & x) {
985 int L = *l, H = L >> 4;
986 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
988 if (hm23 & x) {
989 L = l[8];
990 H |= (L >> 4) << 8;
991 E |= s->filter.mblim_lut[L] << 8;
992 I |= s->filter.lim_lut[L] << 8;
993 s->dsp.loop_filter_mix2[0][0][0](ptr + 4, ls_y, E, I, H);
994 } else {
995 s->dsp.loop_filter_8[0][0](ptr + 4, ls_y, E, I, H);
997 } else if (hm23 & x) {
998 int L = l[8], H = L >> 4;
999 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1001 s->dsp.loop_filter_8[0][0](ptr + 8 * ls_y + 4, ls_y, E, I, H);
1006 // block1
1007 // filter edges between rows, Y plane (e.g. ------)
1008 // block2
1009 dst = f->data[0] + yoff;
1010 lvl = lflvl->level;
1011 for (y = 0; y < 8; y++, dst += 8 * ls_y, lvl += 8) {
1012 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[0][1][y];
1013 unsigned vm = vmask[0] | vmask[1] | vmask[2], vm3 = vmask[3];
1015 for (x = 1; vm & ~(x - 1); x <<= 2, ptr += 16, l += 2) {
1016 if (row || y) {
1017 if (vm & x) {
1018 int L = *l, H = L >> 4;
1019 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1021 if (vmask[0] & x) {
1022 if (vmask[0] & (x << 1)) {
1023 av_assert2(l[1] == L);
1024 s->dsp.loop_filter_16[1](ptr, ls_y, E, I, H);
1025 } else {
1026 s->dsp.loop_filter_8[2][1](ptr, ls_y, E, I, H);
1028 } else if (vm & (x << 1)) {
1029 L = l[1];
1030 H |= (L >> 4) << 8;
1031 E |= s->filter.mblim_lut[L] << 8;
1032 I |= s->filter.lim_lut[L] << 8;
1033 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
1034 [!!(vmask[1] & (x << 1))]
1035 [1](ptr, ls_y, E, I, H);
1036 } else {
1037 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1038 [1](ptr, ls_y, E, I, H);
1040 } else if (vm & (x << 1)) {
1041 int L = l[1], H = L >> 4;
1042 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1044 s->dsp.loop_filter_8[!!(vmask[1] & (x << 1))]
1045 [1](ptr + 8, ls_y, E, I, H);
1048 if (vm3 & x) {
1049 int L = *l, H = L >> 4;
1050 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1052 if (vm3 & (x << 1)) {
1053 L = l[1];
1054 H |= (L >> 4) << 8;
1055 E |= s->filter.mblim_lut[L] << 8;
1056 I |= s->filter.lim_lut[L] << 8;
1057 s->dsp.loop_filter_mix2[0][0][1](ptr + ls_y * 4, ls_y, E, I, H);
1058 } else {
1059 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4, ls_y, E, I, H);
1061 } else if (vm3 & (x << 1)) {
1062 int L = l[1], H = L >> 4;
1063 int E = s->filter.mblim_lut[L], I = s->filter.lim_lut[L];
1065 s->dsp.loop_filter_8[0][1](ptr + ls_y * 4 + 8, ls_y, E, I, H);
1070 // same principle but for U/V planes
1071 for (p = 0; p < 2; p++) {
1072 lvl = lflvl->level;
1073 dst = f->data[1 + p] + uvoff;
1074 for (y = 0; y < 8; y += 4, dst += 16 * ls_uv, lvl += 32) {
1075 uint8_t *ptr = dst, *l = lvl, *hmask1 = lflvl->mask[1][0][y];
1076 uint8_t *hmask2 = lflvl->mask[1][0][y + 2];
1077 unsigned hm1 = hmask1[0] | hmask1[1] | hmask1[2];
1078 unsigned hm2 = hmask2[1] | hmask2[2], hm = hm1 | hm2;
1080 for (x = 1; hm & ~(x - 1); x <<= 1, ptr += 4) {
1081 if (col || x > 1) {
1082 if (hm1 & x) {
1083 int L = *l, H = L >> 4;
1084 int E = s->filter.mblim_lut[L];
1085 int I = s->filter.lim_lut[L];
1087 if (hmask1[0] & x) {
1088 if (hmask2[0] & x) {
1089 av_assert2(l[16] == L);
1090 s->dsp.loop_filter_16[0](ptr, ls_uv, E, I, H);
1091 } else {
1092 s->dsp.loop_filter_8[2][0](ptr, ls_uv, E, I, H);
1094 } else if (hm2 & x) {
1095 L = l[16];
1096 H |= (L >> 4) << 8;
1097 E |= s->filter.mblim_lut[L] << 8;
1098 I |= s->filter.lim_lut[L] << 8;
1099 s->dsp.loop_filter_mix2[!!(hmask1[1] & x)]
1100 [!!(hmask2[1] & x)]
1101 [0](ptr, ls_uv, E, I, H);
1102 } else {
1103 s->dsp.loop_filter_8[!!(hmask1[1] & x)]
1104 [0](ptr, ls_uv, E, I, H);
1106 } else if (hm2 & x) {
1107 int L = l[16], H = L >> 4;
1108 int E = s->filter.mblim_lut[L];
1109 int I = s->filter.lim_lut[L];
1111 s->dsp.loop_filter_8[!!(hmask2[1] & x)]
1112 [0](ptr + 8 * ls_uv, ls_uv, E, I, H);
1115 if (x & 0xAA)
1116 l += 2;
1119 lvl = lflvl->level;
1120 dst = f->data[1 + p] + uvoff;
1121 for (y = 0; y < 8; y++, dst += 4 * ls_uv) {
1122 uint8_t *ptr = dst, *l = lvl, *vmask = lflvl->mask[1][1][y];
1123 unsigned vm = vmask[0] | vmask[1] | vmask[2];
1125 for (x = 1; vm & ~(x - 1); x <<= 4, ptr += 16, l += 4) {
1126 if (row || y) {
1127 if (vm & x) {
1128 int L = *l, H = L >> 4;
1129 int E = s->filter.mblim_lut[L];
1130 int I = s->filter.lim_lut[L];
1132 if (vmask[0] & x) {
1133 if (vmask[0] & (x << 2)) {
1134 av_assert2(l[2] == L);
1135 s->dsp.loop_filter_16[1](ptr, ls_uv, E, I, H);
1136 } else {
1137 s->dsp.loop_filter_8[2][1](ptr, ls_uv, E, I, H);
1139 } else if (vm & (x << 2)) {
1140 L = l[2];
1141 H |= (L >> 4) << 8;
1142 E |= s->filter.mblim_lut[L] << 8;
1143 I |= s->filter.lim_lut[L] << 8;
1144 s->dsp.loop_filter_mix2[!!(vmask[1] & x)]
1145 [!!(vmask[1] & (x << 2))]
1146 [1](ptr, ls_uv, E, I, H);
1147 } else {
1148 s->dsp.loop_filter_8[!!(vmask[1] & x)]
1149 [1](ptr, ls_uv, E, I, H);
1151 } else if (vm & (x << 2)) {
1152 int L = l[2], H = L >> 4;
1153 int E = s->filter.mblim_lut[L];
1154 int I = s->filter.lim_lut[L];
1156 s->dsp.loop_filter_8[!!(vmask[1] & (x << 2))]
1157 [1](ptr + 8, ls_uv, E, I, H);
1161 if (y & 1)
1162 lvl += 16;
1167 static void set_tile_offset(int *start, int *end, int idx, int log2_n, int n)
1169 int sb_start = (idx * n) >> log2_n;
1170 int sb_end = ((idx + 1) * n) >> log2_n;
1171 *start = FFMIN(sb_start, n) << 3;
1172 *end = FFMIN(sb_end, n) << 3;
1175 static int update_refs(AVCodecContext *avctx)
1177 VP9Context *s = avctx->priv_data;
1178 int i, ret;
1180 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++)
1181 if (s->refreshrefmask & (1 << i)) {
1182 ff_thread_release_buffer(avctx, &s->refs[i]);
1183 ret = ff_thread_ref_frame(&s->refs[i], &s->frames[CUR_FRAME].tf);
1184 if (ret < 0)
1185 return ret;
1188 return 0;
1191 static int vp9_decode_frame(AVCodecContext *avctx, void *output,
1192 int *got_frame, AVPacket *pkt)
1194 VP9Context *s = avctx->priv_data;
1195 AVFrame *frame = output;
1196 const uint8_t *data = pkt->data;
1197 int size = pkt->size;
1198 AVFrame *f;
1199 int ret, tile_row, tile_col, i, ref = -1, row, col;
1201 s->setup_finished = 0;
1203 ret = decode_frame_header(avctx, data, size, &ref);
1204 if (ret < 0) {
1205 return ret;
1206 } else if (!ret) {
1207 if (!s->refs[ref].f->buf[0]) {
1208 av_log(avctx, AV_LOG_ERROR,
1209 "Requested reference %d not available\n", ref);
1210 return AVERROR_INVALIDDATA;
1213 ret = av_frame_ref(frame, s->refs[ref].f);
1214 if (ret < 0)
1215 return ret;
1216 *got_frame = 1;
1217 return pkt->size;
1219 data += ret;
1220 size -= ret;
1222 vp9_frame_unref(avctx, &s->frames[LAST_FRAME]);
1223 if (!s->keyframe && s->frames[CUR_FRAME].tf.f->buf[0]) {
1224 ret = vp9_frame_ref(&s->frames[LAST_FRAME], &s->frames[CUR_FRAME]);
1225 if (ret < 0)
1226 return ret;
1229 vp9_frame_unref(avctx, &s->frames[CUR_FRAME]);
1230 ret = vp9_frame_alloc(avctx, &s->frames[CUR_FRAME]);
1231 if (ret < 0)
1232 return ret;
1234 f = s->frames[CUR_FRAME].tf.f;
1235 f->key_frame = s->keyframe;
1236 f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1238 if (s->fullrange)
1239 avctx->color_range = AVCOL_RANGE_JPEG;
1240 else
1241 avctx->color_range = AVCOL_RANGE_MPEG;
1243 switch (s->colorspace) {
1244 case 1: avctx->colorspace = AVCOL_SPC_BT470BG; break;
1245 case 2: avctx->colorspace = AVCOL_SPC_BT709; break;
1246 case 3: avctx->colorspace = AVCOL_SPC_SMPTE170M; break;
1247 case 4: avctx->colorspace = AVCOL_SPC_SMPTE240M; break;
1250 s->pass = s->uses_2pass =
1251 avctx->active_thread_type & FF_THREAD_FRAME && s->refreshctx && !s->parallelmode;
1253 if (s->refreshctx && s->parallelmode) {
1254 int j, k, l, m;
1255 for (i = 0; i < 4; i++) {
1256 for (j = 0; j < 2; j++)
1257 for (k = 0; k < 2; k++)
1258 for (l = 0; l < 6; l++)
1259 for (m = 0; m < 6; m++)
1260 memcpy(s->prob_ctx[s->framectxid].coef[i][j][k][l][m],
1261 s->prob.coef[i][j][k][l][m], 3);
1262 if (s->txfmmode == i)
1263 break;
1265 s->prob_ctx[s->framectxid].p = s->prob.p;
1267 if ((s->parallelmode || !s->refreshctx) &&
1268 avctx->active_thread_type & FF_THREAD_FRAME) {
1269 ff_thread_finish_setup(avctx);
1270 s->setup_finished = 1;
1273 // main tile decode loop
1274 memset(s->above_partition_ctx, 0, s->cols);
1275 memset(s->above_skip_ctx, 0, s->cols);
1276 if (s->keyframe || s->intraonly)
1277 memset(s->above_mode_ctx, DC_PRED, s->cols * 2);
1278 else
1279 memset(s->above_mode_ctx, NEARESTMV, s->cols);
1280 memset(s->above_y_nnz_ctx, 0, s->sb_cols * 16);
1281 memset(s->above_uv_nnz_ctx[0], 0, s->sb_cols * 8);
1282 memset(s->above_uv_nnz_ctx[1], 0, s->sb_cols * 8);
1283 memset(s->above_segpred_ctx, 0, s->cols);
1285 do {
1286 ptrdiff_t yoff = 0, uvoff = 0;
1287 s->b = s->b_base;
1288 s->block = s->block_base;
1289 s->uvblock[0] = s->uvblock_base[0];
1290 s->uvblock[1] = s->uvblock_base[1];
1291 s->eob = s->eob_base;
1292 s->uveob[0] = s->uveob_base[0];
1293 s->uveob[1] = s->uveob_base[1];
1295 for (tile_row = 0; tile_row < s->tiling.tile_rows; tile_row++) {
1296 set_tile_offset(&s->tiling.tile_row_start, &s->tiling.tile_row_end,
1297 tile_row, s->tiling.log2_tile_rows, s->sb_rows);
1299 if (s->pass != 2) {
1300 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1301 int64_t tile_size;
1303 if (tile_col == s->tiling.tile_cols - 1 &&
1304 tile_row == s->tiling.tile_rows - 1) {
1305 tile_size = size;
1306 } else {
1307 tile_size = AV_RB32(data);
1308 data += 4;
1309 size -= 4;
1311 if (tile_size > size) {
1312 ret = AVERROR_INVALIDDATA;
1313 goto fail;
1315 ff_vp56_init_range_decoder(&s->c_b[tile_col], data, tile_size);
1316 if (vp56_rac_get_prob_branchy(&s->c_b[tile_col], 128)) { // marker bit
1317 ret = AVERROR_INVALIDDATA;
1318 goto fail;
1320 data += tile_size;
1321 size -= tile_size;
1325 for (row = s->tiling.tile_row_start;
1326 row < s->tiling.tile_row_end;
1327 row += 8, yoff += f->linesize[0] * 64,
1328 uvoff += f->linesize[1] * 32) {
1329 VP9Filter *lflvl = s->lflvl;
1330 ptrdiff_t yoff2 = yoff, uvoff2 = uvoff;
1332 for (tile_col = 0; tile_col < s->tiling.tile_cols; tile_col++) {
1333 set_tile_offset(&s->tiling.tile_col_start,
1334 &s->tiling.tile_col_end,
1335 tile_col, s->tiling.log2_tile_cols, s->sb_cols);
1337 memset(s->left_partition_ctx, 0, 8);
1338 memset(s->left_skip_ctx, 0, 8);
1339 if (s->keyframe || s->intraonly)
1340 memset(s->left_mode_ctx, DC_PRED, 16);
1341 else
1342 memset(s->left_mode_ctx, NEARESTMV, 8);
1343 memset(s->left_y_nnz_ctx, 0, 16);
1344 memset(s->left_uv_nnz_ctx, 0, 16);
1345 memset(s->left_segpred_ctx, 0, 8);
1347 memcpy(&s->c, &s->c_b[tile_col], sizeof(s->c));
1348 for (col = s->tiling.tile_col_start;
1349 col < s->tiling.tile_col_end;
1350 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++) {
1351 // FIXME integrate with lf code (i.e. zero after each
1352 // use, similar to invtxfm coefficients, or similar)
1353 if (s->pass != 1)
1354 memset(lflvl->mask, 0, sizeof(lflvl->mask));
1356 if (s->pass == 2) {
1357 ret = decode_superblock_mem(avctx, row, col, lflvl,
1358 yoff2, uvoff2, BL_64X64);
1359 } else {
1360 ret = decode_subblock(avctx, row, col, lflvl,
1361 yoff2, uvoff2, BL_64X64);
1363 if (ret < 0)
1364 goto fail;
1366 if (s->pass != 2)
1367 memcpy(&s->c_b[tile_col], &s->c, sizeof(s->c));
1370 if (s->pass == 1)
1371 continue;
1373 // backup pre-loopfilter reconstruction data for intra
1374 // prediction of next row of sb64s
1375 if (row + 8 < s->rows) {
1376 memcpy(s->intra_pred_data[0],
1377 f->data[0] + yoff +
1378 63 * f->linesize[0],
1379 8 * s->cols);
1380 memcpy(s->intra_pred_data[1],
1381 f->data[1] + uvoff +
1382 31 * f->linesize[1],
1383 4 * s->cols);
1384 memcpy(s->intra_pred_data[2],
1385 f->data[2] + uvoff +
1386 31 * f->linesize[2],
1387 4 * s->cols);
1390 // loopfilter one row
1391 if (s->filter.level) {
1392 yoff2 = yoff;
1393 uvoff2 = uvoff;
1394 lflvl = s->lflvl;
1395 for (col = 0; col < s->cols;
1396 col += 8, yoff2 += 64, uvoff2 += 32, lflvl++)
1397 loopfilter_subblock(avctx, lflvl, row, col, yoff2, uvoff2);
1400 // FIXME maybe we can make this more finegrained by running the
1401 // loopfilter per-block instead of after each sbrow
1402 // In fact that would also make intra pred left preparation easier?
1403 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, row >> 3, 0);
1407 if (s->pass < 2 && s->refreshctx && !s->parallelmode) {
1408 ff_vp9_adapt_probs(s);
1409 if (avctx->active_thread_type & FF_THREAD_FRAME) {
1410 ff_thread_finish_setup(avctx);
1411 s->setup_finished = 1;
1414 } while (s->pass++ == 1);
1415 fail:
1416 ff_thread_report_progress(&s->frames[CUR_FRAME].tf, INT_MAX, 0);
1417 if (ret < 0)
1418 return ret;
1420 // ref frame setup
1421 if (!s->setup_finished) {
1422 ret = update_refs(avctx);
1423 if (ret < 0)
1424 return ret;
1427 if (!s->invisible) {
1428 av_frame_unref(frame);
1429 ret = av_frame_ref(frame, s->frames[CUR_FRAME].tf.f);
1430 if (ret < 0)
1431 return ret;
1432 *got_frame = 1;
1435 return pkt->size;
1438 static av_cold int vp9_decode_free(AVCodecContext *avctx)
1440 VP9Context *s = avctx->priv_data;
1441 int i;
1443 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
1444 vp9_frame_unref(avctx, &s->frames[i]);
1445 av_frame_free(&s->frames[i].tf.f);
1448 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1449 ff_thread_release_buffer(avctx, &s->refs[i]);
1450 av_frame_free(&s->refs[i].f);
1453 av_freep(&s->c_b);
1454 av_freep(&s->above_partition_ctx);
1455 av_freep(&s->b_base);
1456 av_freep(&s->block_base);
1458 return 0;
1461 static av_cold int vp9_decode_init(AVCodecContext *avctx)
1463 VP9Context *s = avctx->priv_data;
1464 int i;
1466 memset(s, 0, sizeof(*s));
1468 avctx->internal->allocate_progress = 1;
1470 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
1472 ff_vp9dsp_init(&s->dsp);
1473 ff_videodsp_init(&s->vdsp, 8);
1475 s->frames[0].tf.f = av_frame_alloc();
1476 s->frames[1].tf.f = av_frame_alloc();
1477 if (!s->frames[0].tf.f || !s->frames[1].tf.f)
1478 goto fail;
1480 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1481 s->refs[i].f = av_frame_alloc();
1482 if (!s->refs[i].f)
1483 goto fail;
1486 s->filter.sharpness = -1;
1488 return 0;
1489 fail:
1490 vp9_decode_free(avctx);
1491 return AVERROR(ENOMEM);
1494 static int vp9_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
1496 VP9Context *s = dst->priv_data, *ssrc = src->priv_data;
1497 int i, ret;
1499 ret = update_size(dst, ssrc->alloc_width, ssrc->alloc_height);
1500 if (ret < 0)
1501 return ret;
1503 for (i = 0; i < 2; i++) {
1504 if (s->frames[i].tf.f->data[0])
1505 vp9_frame_unref(dst, &s->frames[i]);
1506 if (ssrc->frames[i].tf.f->data[0]) {
1507 if ((ret = vp9_frame_ref(&s->frames[i], &ssrc->frames[i])) < 0)
1508 return ret;
1511 for (i = 0; i < FF_ARRAY_ELEMS(s->refs); i++) {
1512 ff_thread_release_buffer(dst, &s->refs[i]);
1513 if (ssrc->refs[i].f->buf[0]) {
1514 ret = ff_thread_ref_frame(&s->refs[i], &ssrc->refs[i]);
1515 if (ret < 0)
1516 return ret;
1520 s->refreshrefmask = ssrc->refreshrefmask;
1521 ret = update_refs(dst);
1522 if (ret < 0)
1523 return ret;
1525 s->invisible = ssrc->invisible;
1526 s->keyframe = ssrc->keyframe;
1527 s->last_uses_2pass = ssrc->uses_2pass;
1529 memcpy(&s->prob_ctx, &ssrc->prob_ctx, sizeof(s->prob_ctx));
1530 memcpy(&s->lf_delta, &ssrc->lf_delta, sizeof(s->lf_delta));
1531 memcpy(&s->segmentation.feat, &ssrc->segmentation.feat,
1532 sizeof(s->segmentation.feat));
1534 return 0;
1537 AVCodec ff_vp9_decoder = {
1538 .name = "vp9",
1539 .long_name = NULL_IF_CONFIG_SMALL("Google VP9"),
1540 .type = AVMEDIA_TYPE_VIDEO,
1541 .id = AV_CODEC_ID_VP9,
1542 .priv_data_size = sizeof(VP9Context),
1543 .init = vp9_decode_init,
1544 .decode = vp9_decode_frame,
1545 .flush = vp9_decode_flush,
1546 .close = vp9_decode_free,
1547 .capabilities = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
1548 .init_thread_copy = vp9_decode_init,
1549 .update_thread_context = vp9_decode_update_thread_context,
1550 .bsfs = "vp9_superframe_split",