lavc decoders: work with refcounted frames.
[FFMpeg-mirror/mplayer-patches.git] / libavcodec / vp8.c
blob4d4ff6ca2d659329e998c5eab41d9b43f96f178e
1 /*
2 * VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Jason Garrett-Glaser
7 * Copyright (C) 2012 Daniel Kang
9 * This file is part of Libav.
11 * Libav is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public
13 * License as published by the Free Software Foundation; either
14 * version 2.1 of the License, or (at your option) any later version.
16 * Libav is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Libav; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "libavutil/imgutils.h"
27 #include "avcodec.h"
28 #include "internal.h"
29 #include "vp8.h"
30 #include "vp8data.h"
31 #include "rectangle.h"
32 #include "thread.h"
34 #if ARCH_ARM
35 # include "arm/vp8.h"
36 #endif
38 static void free_buffers(VP8Context *s)
40 int i;
41 if (s->thread_data)
42 for (i = 0; i < MAX_THREADS; i++) {
43 av_freep(&s->thread_data[i].filter_strength);
44 av_freep(&s->thread_data[i].edge_emu_buffer);
46 av_freep(&s->thread_data);
47 av_freep(&s->macroblocks_base);
48 av_freep(&s->intra4x4_pred_mode_top);
49 av_freep(&s->top_nnz);
50 av_freep(&s->top_border);
52 s->macroblocks = NULL;
55 static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
57 int ret;
58 if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
59 ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
60 return ret;
61 if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
62 ff_thread_release_buffer(s->avctx, &f->tf);
63 return AVERROR(ENOMEM);
65 return 0;
68 static void vp8_release_frame(VP8Context *s, VP8Frame *f)
70 av_buffer_unref(&f->seg_map);
71 ff_thread_release_buffer(s->avctx, &f->tf);
74 static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
76 int ret;
78 vp8_release_frame(s, dst);
80 if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
81 return ret;
82 if (src->seg_map &&
83 !(dst->seg_map = av_buffer_ref(src->seg_map))) {
84 vp8_release_frame(s, dst);
85 return AVERROR(ENOMEM);
88 return 0;
92 static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
94 VP8Context *s = avctx->priv_data;
95 int i;
97 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
98 vp8_release_frame(s, &s->frames[i]);
99 memset(s->framep, 0, sizeof(s->framep));
101 if (free_mem)
102 free_buffers(s);
105 static void vp8_decode_flush(AVCodecContext *avctx)
107 vp8_decode_flush_impl(avctx, 0);
110 static int update_dimensions(VP8Context *s, int width, int height)
112 AVCodecContext *avctx = s->avctx;
113 int i;
115 if (width != s->avctx->width ||
116 height != s->avctx->height) {
117 if (av_image_check_size(width, height, 0, s->avctx))
118 return AVERROR_INVALIDDATA;
120 vp8_decode_flush_impl(s->avctx, 1);
122 avcodec_set_dimensions(s->avctx, width, height);
125 s->mb_width = (s->avctx->coded_width +15) / 16;
126 s->mb_height = (s->avctx->coded_height+15) / 16;
128 s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
129 if (!s->mb_layout) { // Frame threading and one thread
130 s->macroblocks_base = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
131 s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
133 else // Sliced threading
134 s->macroblocks_base = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
135 s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
136 s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
137 s->thread_data = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
139 for (i = 0; i < MAX_THREADS; i++) {
140 s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
141 #if HAVE_THREADS
142 pthread_mutex_init(&s->thread_data[i].lock, NULL);
143 pthread_cond_init(&s->thread_data[i].cond, NULL);
144 #endif
147 if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
148 (!s->intra4x4_pred_mode_top && !s->mb_layout))
149 return AVERROR(ENOMEM);
151 s->macroblocks = s->macroblocks_base + 1;
153 return 0;
156 static void parse_segment_info(VP8Context *s)
158 VP56RangeCoder *c = &s->c;
159 int i;
161 s->segmentation.update_map = vp8_rac_get(c);
163 if (vp8_rac_get(c)) { // update segment feature data
164 s->segmentation.absolute_vals = vp8_rac_get(c);
166 for (i = 0; i < 4; i++)
167 s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7);
169 for (i = 0; i < 4; i++)
170 s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
172 if (s->segmentation.update_map)
173 for (i = 0; i < 3; i++)
174 s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
177 static void update_lf_deltas(VP8Context *s)
179 VP56RangeCoder *c = &s->c;
180 int i;
182 for (i = 0; i < 4; i++) {
183 if (vp8_rac_get(c)) {
184 s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
186 if (vp8_rac_get(c))
187 s->lf_delta.ref[i] = -s->lf_delta.ref[i];
191 for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
192 if (vp8_rac_get(c)) {
193 s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);
195 if (vp8_rac_get(c))
196 s->lf_delta.mode[i] = -s->lf_delta.mode[i];
201 static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
203 const uint8_t *sizes = buf;
204 int i;
206 s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);
208 buf += 3*(s->num_coeff_partitions-1);
209 buf_size -= 3*(s->num_coeff_partitions-1);
210 if (buf_size < 0)
211 return -1;
213 for (i = 0; i < s->num_coeff_partitions-1; i++) {
214 int size = AV_RL24(sizes + 3*i);
215 if (buf_size - size < 0)
216 return -1;
218 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
219 buf += size;
220 buf_size -= size;
222 ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
224 return 0;
227 static void get_quants(VP8Context *s)
229 VP56RangeCoder *c = &s->c;
230 int i, base_qi;
232 int yac_qi = vp8_rac_get_uint(c, 7);
233 int ydc_delta = vp8_rac_get_sint(c, 4);
234 int y2dc_delta = vp8_rac_get_sint(c, 4);
235 int y2ac_delta = vp8_rac_get_sint(c, 4);
236 int uvdc_delta = vp8_rac_get_sint(c, 4);
237 int uvac_delta = vp8_rac_get_sint(c, 4);
239 for (i = 0; i < 4; i++) {
240 if (s->segmentation.enabled) {
241 base_qi = s->segmentation.base_quant[i];
242 if (!s->segmentation.absolute_vals)
243 base_qi += yac_qi;
244 } else
245 base_qi = yac_qi;
247 s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
248 s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi , 7)];
249 s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
250 /* 101581>>16 is equivalent to 155/100 */
251 s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
252 s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
253 s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
255 s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
256 s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132);
261 * Determine which buffers golden and altref should be updated with after this frame.
262 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
264 * Intra frames update all 3 references
265 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
266 * If the update (golden|altref) flag is set, it's updated with the current frame
267 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
268 * If the flag is not set, the number read means:
269 * 0: no update
270 * 1: VP56_FRAME_PREVIOUS
271 * 2: update golden with altref, or update altref with golden
273 static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
275 VP56RangeCoder *c = &s->c;
277 if (update)
278 return VP56_FRAME_CURRENT;
280 switch (vp8_rac_get_uint(c, 2)) {
281 case 1:
282 return VP56_FRAME_PREVIOUS;
283 case 2:
284 return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
286 return VP56_FRAME_NONE;
289 static void update_refs(VP8Context *s)
291 VP56RangeCoder *c = &s->c;
293 int update_golden = vp8_rac_get(c);
294 int update_altref = vp8_rac_get(c);
296 s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
297 s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
300 static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
302 VP56RangeCoder *c = &s->c;
303 int header_size, hscale, vscale, i, j, k, l, m, ret;
304 int width = s->avctx->width;
305 int height = s->avctx->height;
307 s->keyframe = !(buf[0] & 1);
308 s->profile = (buf[0]>>1) & 7;
309 s->invisible = !(buf[0] & 0x10);
310 header_size = AV_RL24(buf) >> 5;
311 buf += 3;
312 buf_size -= 3;
314 if (s->profile > 3)
315 av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);
317 if (!s->profile)
318 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
319 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
320 memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
322 if (header_size > buf_size - 7*s->keyframe) {
323 av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
324 return AVERROR_INVALIDDATA;
327 if (s->keyframe) {
328 if (AV_RL24(buf) != 0x2a019d) {
329 av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
330 return AVERROR_INVALIDDATA;
332 width = AV_RL16(buf+3) & 0x3fff;
333 height = AV_RL16(buf+5) & 0x3fff;
334 hscale = buf[4] >> 6;
335 vscale = buf[6] >> 6;
336 buf += 7;
337 buf_size -= 7;
339 if (hscale || vscale)
340 av_log_missing_feature(s->avctx, "Upscaling", 1);
342 s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
343 for (i = 0; i < 4; i++)
344 for (j = 0; j < 16; j++)
345 memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
346 sizeof(s->prob->token[i][j]));
347 memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
348 memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
349 memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc));
350 memset(&s->segmentation, 0, sizeof(s->segmentation));
351 memset(&s->lf_delta, 0, sizeof(s->lf_delta));
354 ff_vp56_init_range_decoder(c, buf, header_size);
355 buf += header_size;
356 buf_size -= header_size;
358 if (s->keyframe) {
359 if (vp8_rac_get(c))
360 av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
361 vp8_rac_get(c); // whether we can skip clamping in dsp functions
364 if ((s->segmentation.enabled = vp8_rac_get(c)))
365 parse_segment_info(s);
366 else
367 s->segmentation.update_map = 0; // FIXME: move this to some init function?
369 s->filter.simple = vp8_rac_get(c);
370 s->filter.level = vp8_rac_get_uint(c, 6);
371 s->filter.sharpness = vp8_rac_get_uint(c, 3);
373 if ((s->lf_delta.enabled = vp8_rac_get(c)))
374 if (vp8_rac_get(c))
375 update_lf_deltas(s);
377 if (setup_partitions(s, buf, buf_size)) {
378 av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
379 return AVERROR_INVALIDDATA;
382 if (!s->macroblocks_base || /* first frame */
383 width != s->avctx->width || height != s->avctx->height) {
384 if ((ret = update_dimensions(s, width, height)) < 0)
385 return ret;
388 get_quants(s);
390 if (!s->keyframe) {
391 update_refs(s);
392 s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c);
393 s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
396 // if we aren't saving this frame's probabilities for future frames,
397 // make a copy of the current probabilities
398 if (!(s->update_probabilities = vp8_rac_get(c)))
399 s->prob[1] = s->prob[0];
401 s->update_last = s->keyframe || vp8_rac_get(c);
403 for (i = 0; i < 4; i++)
404 for (j = 0; j < 8; j++)
405 for (k = 0; k < 3; k++)
406 for (l = 0; l < NUM_DCT_TOKENS-1; l++)
407 if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
408 int prob = vp8_rac_get_uint(c, 8);
409 for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
410 s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
413 if ((s->mbskip_enabled = vp8_rac_get(c)))
414 s->prob->mbskip = vp8_rac_get_uint(c, 8);
416 if (!s->keyframe) {
417 s->prob->intra = vp8_rac_get_uint(c, 8);
418 s->prob->last = vp8_rac_get_uint(c, 8);
419 s->prob->golden = vp8_rac_get_uint(c, 8);
421 if (vp8_rac_get(c))
422 for (i = 0; i < 4; i++)
423 s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
424 if (vp8_rac_get(c))
425 for (i = 0; i < 3; i++)
426 s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8);
428 // 17.2 MV probability update
429 for (i = 0; i < 2; i++)
430 for (j = 0; j < 19; j++)
431 if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
432 s->prob->mvc[i][j] = vp8_rac_get_nn(c);
435 return 0;
438 static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
440 dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
441 dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
445 * Motion vector coding, 17.1.
447 static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
449 int bit, x = 0;
451 if (vp56_rac_get_prob_branchy(c, p[0])) {
452 int i;
454 for (i = 0; i < 3; i++)
455 x += vp56_rac_get_prob(c, p[9 + i]) << i;
456 for (i = 9; i > 3; i--)
457 x += vp56_rac_get_prob(c, p[9 + i]) << i;
458 if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
459 x += 8;
460 } else {
461 // small_mvtree
462 const uint8_t *ps = p+2;
463 bit = vp56_rac_get_prob(c, *ps);
464 ps += 1 + 3*bit;
465 x += 4*bit;
466 bit = vp56_rac_get_prob(c, *ps);
467 ps += 1 + bit;
468 x += 2*bit;
469 x += vp56_rac_get_prob(c, *ps);
472 return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
475 static av_always_inline
476 const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
478 if (left == top)
479 return vp8_submv_prob[4-!!left];
480 if (!top)
481 return vp8_submv_prob[2];
482 return vp8_submv_prob[1-!!left];
486 * Split motion vector prediction, 16.4.
487 * @returns the number of motion vectors parsed (2, 4 or 16)
489 static av_always_inline
490 int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
492 int part_idx;
493 int n, num;
494 VP8Macroblock *top_mb;
495 VP8Macroblock *left_mb = &mb[-1];
496 const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
497 *mbsplits_top,
498 *mbsplits_cur, *firstidx;
499 VP56mv *top_mv;
500 VP56mv *left_mv = left_mb->bmv;
501 VP56mv *cur_mv = mb->bmv;
503 if (!layout) // layout is inlined, s->mb_layout is not
504 top_mb = &mb[2];
505 else
506 top_mb = &mb[-s->mb_width-1];
507 mbsplits_top = vp8_mbsplits[top_mb->partitioning];
508 top_mv = top_mb->bmv;
510 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
511 if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
512 part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
513 } else {
514 part_idx = VP8_SPLITMVMODE_8x8;
516 } else {
517 part_idx = VP8_SPLITMVMODE_4x4;
520 num = vp8_mbsplit_count[part_idx];
521 mbsplits_cur = vp8_mbsplits[part_idx],
522 firstidx = vp8_mbfirstidx[part_idx];
523 mb->partitioning = part_idx;
525 for (n = 0; n < num; n++) {
526 int k = firstidx[n];
527 uint32_t left, above;
528 const uint8_t *submv_prob;
530 if (!(k & 3))
531 left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
532 else
533 left = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
534 if (k <= 3)
535 above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
536 else
537 above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
539 submv_prob = get_submv_prob(left, above);
541 if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
542 if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
543 if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
544 mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
545 mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
546 } else {
547 AV_ZERO32(&mb->bmv[n]);
549 } else {
550 AV_WN32A(&mb->bmv[n], above);
552 } else {
553 AV_WN32A(&mb->bmv[n], left);
557 return num;
560 static av_always_inline
561 void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
563 VP8Macroblock *mb_edge[3] = { 0 /* top */,
564 mb - 1 /* left */,
565 0 /* top-left */ };
566 enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
567 enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
568 int idx = CNT_ZERO;
569 int cur_sign_bias = s->sign_bias[mb->ref_frame];
570 int8_t *sign_bias = s->sign_bias;
571 VP56mv near_mv[4];
572 uint8_t cnt[4] = { 0 };
573 VP56RangeCoder *c = &s->c;
575 if (!layout) { // layout is inlined (s->mb_layout is not)
576 mb_edge[0] = mb + 2;
577 mb_edge[2] = mb + 1;
579 else {
580 mb_edge[0] = mb - s->mb_width-1;
581 mb_edge[2] = mb - s->mb_width-2;
584 AV_ZERO32(&near_mv[0]);
585 AV_ZERO32(&near_mv[1]);
586 AV_ZERO32(&near_mv[2]);
588 /* Process MB on top, left and top-left */
589 #define MV_EDGE_CHECK(n)\
591 VP8Macroblock *edge = mb_edge[n];\
592 int edge_ref = edge->ref_frame;\
593 if (edge_ref != VP56_FRAME_CURRENT) {\
594 uint32_t mv = AV_RN32A(&edge->mv);\
595 if (mv) {\
596 if (cur_sign_bias != sign_bias[edge_ref]) {\
597 /* SWAR negate of the values in mv. */\
598 mv = ~mv;\
599 mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
601 if (!n || mv != AV_RN32A(&near_mv[idx]))\
602 AV_WN32A(&near_mv[++idx], mv);\
603 cnt[idx] += 1 + (n != 2);\
604 } else\
605 cnt[CNT_ZERO] += 1 + (n != 2);\
609 MV_EDGE_CHECK(0)
610 MV_EDGE_CHECK(1)
611 MV_EDGE_CHECK(2)
613 mb->partitioning = VP8_SPLITMVMODE_NONE;
614 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
615 mb->mode = VP8_MVMODE_MV;
617 /* If we have three distinct MVs, merge first and last if they're the same */
618 if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
619 cnt[CNT_NEAREST] += 1;
621 /* Swap near and nearest if necessary */
622 if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
623 FFSWAP(uint8_t, cnt[CNT_NEAREST], cnt[CNT_NEAR]);
624 FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
627 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
628 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {
630 /* Choose the best mv out of 0,0 and the nearest mv */
631 clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
632 cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) +
633 (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 +
634 (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
636 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
637 mb->mode = VP8_MVMODE_SPLIT;
638 mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
639 } else {
640 mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
641 mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
642 mb->bmv[0] = mb->mv;
644 } else {
645 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
646 mb->bmv[0] = mb->mv;
648 } else {
649 clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
650 mb->bmv[0] = mb->mv;
652 } else {
653 mb->mode = VP8_MVMODE_ZERO;
654 AV_ZERO32(&mb->mv);
655 mb->bmv[0] = mb->mv;
659 static av_always_inline
660 void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
661 int mb_x, int keyframe, int layout)
663 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
665 if (layout == 1) {
666 VP8Macroblock *mb_top = mb - s->mb_width - 1;
667 memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
669 if (keyframe) {
670 int x, y;
671 uint8_t* top;
672 uint8_t* const left = s->intra4x4_pred_mode_left;
673 if (layout == 1)
674 top = mb->intra4x4_pred_mode_top;
675 else
676 top = s->intra4x4_pred_mode_top + 4 * mb_x;
677 for (y = 0; y < 4; y++) {
678 for (x = 0; x < 4; x++) {
679 const uint8_t *ctx;
680 ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
681 *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
682 left[y] = top[x] = *intra4x4;
683 intra4x4++;
686 } else {
687 int i;
688 for (i = 0; i < 16; i++)
689 intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
693 static av_always_inline
694 void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
695 uint8_t *segment, uint8_t *ref, int layout)
697 VP56RangeCoder *c = &s->c;
699 if (s->segmentation.update_map)
700 *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
701 else if (s->segmentation.enabled)
702 *segment = ref ? *ref : *segment;
703 mb->segment = *segment;
705 mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
707 if (s->keyframe) {
708 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);
710 if (mb->mode == MODE_I4x4) {
711 decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
712 } else {
713 const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
714 if (s->mb_layout == 1)
715 AV_WN32A(mb->intra4x4_pred_mode_top, modes);
716 else
717 AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
718 AV_WN32A( s->intra4x4_pred_mode_left, modes);
721 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
722 mb->ref_frame = VP56_FRAME_CURRENT;
723 } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
724 // inter MB, 16.2
725 if (vp56_rac_get_prob_branchy(c, s->prob->last))
726 mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
727 VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
728 else
729 mb->ref_frame = VP56_FRAME_PREVIOUS;
730 s->ref_count[mb->ref_frame-1]++;
732 // motion vectors, 16.3
733 decode_mvs(s, mb, mb_x, mb_y, layout);
734 } else {
735 // intra MB, 16.1
736 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);
738 if (mb->mode == MODE_I4x4)
739 decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
741 mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
742 mb->ref_frame = VP56_FRAME_CURRENT;
743 mb->partitioning = VP8_SPLITMVMODE_NONE;
744 AV_ZERO32(&mb->bmv[0]);
748 #ifndef decode_block_coeffs_internal
750 * @param r arithmetic bitstream reader context
751 * @param block destination for block coefficients
752 * @param probs probabilities to use when reading trees from the bitstream
753 * @param i initial coeff index, 0 unless a separate DC block is coded
754 * @param qmul array holding the dc/ac dequant factor at position 0/1
755 * @return 0 if no coeffs were decoded
756 * otherwise, the index of the last coeff decoded plus one
758 static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
759 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
760 int i, uint8_t *token_prob, int16_t qmul[2])
762 VP56RangeCoder c = *r;
763 goto skip_eob;
764 do {
765 int coeff;
766 if (!vp56_rac_get_prob_branchy(&c, token_prob[0])) // DCT_EOB
767 break;
769 skip_eob:
770 if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
771 if (++i == 16)
772 break; // invalid input; blocks should end with EOB
773 token_prob = probs[i][0];
774 goto skip_eob;
777 if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
778 coeff = 1;
779 token_prob = probs[i+1][1];
780 } else {
781 if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
782 coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
783 if (coeff)
784 coeff += vp56_rac_get_prob(&c, token_prob[5]);
785 coeff += 2;
786 } else {
787 // DCT_CAT*
788 if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
789 if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
790 coeff = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
791 } else { // DCT_CAT2
792 coeff = 7;
793 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
794 coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
796 } else { // DCT_CAT3 and up
797 int a = vp56_rac_get_prob(&c, token_prob[8]);
798 int b = vp56_rac_get_prob(&c, token_prob[9+a]);
799 int cat = (a<<1) + b;
800 coeff = 3 + (8<<cat);
801 coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
804 token_prob = probs[i+1][2];
806 block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
807 } while (++i < 16);
809 *r = c;
810 return i;
812 #endif
815 * @param c arithmetic bitstream reader context
816 * @param block destination for block coefficients
817 * @param probs probabilities to use when reading trees from the bitstream
818 * @param i initial coeff index, 0 unless a separate DC block is coded
819 * @param zero_nhood the initial prediction context for number of surrounding
820 * all-zero blocks (only left/top, so 0-2)
821 * @param qmul array holding the dc/ac dequant factor at position 0/1
822 * @return 0 if no coeffs were decoded
823 * otherwise, the index of the last coeff decoded plus one
825 static av_always_inline
826 int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
827 uint8_t probs[16][3][NUM_DCT_TOKENS-1],
828 int i, int zero_nhood, int16_t qmul[2])
830 uint8_t *token_prob = probs[i][zero_nhood];
831 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
832 return 0;
833 return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
836 static av_always_inline
837 void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
838 uint8_t t_nnz[9], uint8_t l_nnz[9])
840 int i, x, y, luma_start = 0, luma_ctx = 3;
841 int nnz_pred, nnz, nnz_total = 0;
842 int segment = mb->segment;
843 int block_dc = 0;
845 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
846 nnz_pred = t_nnz[8] + l_nnz[8];
848 // decode DC values and do hadamard
849 nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
850 s->qmat[segment].luma_dc_qmul);
851 l_nnz[8] = t_nnz[8] = !!nnz;
852 if (nnz) {
853 nnz_total += nnz;
854 block_dc = 1;
855 if (nnz == 1)
856 s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
857 else
858 s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
860 luma_start = 1;
861 luma_ctx = 0;
864 // luma blocks
865 for (y = 0; y < 4; y++)
866 for (x = 0; x < 4; x++) {
867 nnz_pred = l_nnz[y] + t_nnz[x];
868 nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
869 nnz_pred, s->qmat[segment].luma_qmul);
870 // nnz+block_dc may be one more than the actual last index, but we don't care
871 td->non_zero_count_cache[y][x] = nnz + block_dc;
872 t_nnz[x] = l_nnz[y] = !!nnz;
873 nnz_total += nnz;
876 // chroma blocks
877 // TODO: what to do about dimensions? 2nd dim for luma is x,
878 // but for chroma it's (y<<1)|x
879 for (i = 4; i < 6; i++)
880 for (y = 0; y < 2; y++)
881 for (x = 0; x < 2; x++) {
882 nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
883 nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
884 nnz_pred, s->qmat[segment].chroma_qmul);
885 td->non_zero_count_cache[i][(y<<1)+x] = nnz;
886 t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
887 nnz_total += nnz;
890 // if there were no coded coeffs despite the macroblock not being marked skip,
891 // we MUST not do the inner loop filter and should not do IDCT
892 // Since skip isn't used for bitstream prediction, just manually set it.
893 if (!nnz_total)
894 mb->skip = 1;
897 static av_always_inline
898 void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
899 int linesize, int uvlinesize, int simple)
901 AV_COPY128(top_border, src_y + 15*linesize);
902 if (!simple) {
903 AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
904 AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
908 static av_always_inline
909 void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
910 int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
911 int simple, int xchg)
913 uint8_t *top_border_m1 = top_border-32; // for TL prediction
914 src_y -= linesize;
915 src_cb -= uvlinesize;
916 src_cr -= uvlinesize;
918 #define XCHG(a,b,xchg) do { \
919 if (xchg) AV_SWAP64(b,a); \
920 else AV_COPY64(b,a); \
921 } while (0)
923 XCHG(top_border_m1+8, src_y-8, xchg);
924 XCHG(top_border, src_y, xchg);
925 XCHG(top_border+8, src_y+8, 1);
926 if (mb_x < mb_width-1)
927 XCHG(top_border+32, src_y+16, 1);
929 // only copy chroma for normal loop filter
930 // or to initialize the top row to 127
931 if (!simple || !mb_y) {
932 XCHG(top_border_m1+16, src_cb-8, xchg);
933 XCHG(top_border_m1+24, src_cr-8, xchg);
934 XCHG(top_border+16, src_cb, 1);
935 XCHG(top_border+24, src_cr, 1);
939 static av_always_inline
940 int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
942 if (!mb_x) {
943 return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
944 } else {
945 return mb_y ? mode : LEFT_DC_PRED8x8;
949 static av_always_inline
950 int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
952 if (!mb_x) {
953 return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
954 } else {
955 return mb_y ? mode : HOR_PRED8x8;
959 static av_always_inline
960 int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
962 if (mode == DC_PRED8x8) {
963 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
964 } else {
965 return mode;
969 static av_always_inline
970 int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
972 switch (mode) {
973 case DC_PRED8x8:
974 return check_dc_pred8x8_mode(mode, mb_x, mb_y);
975 case VERT_PRED8x8:
976 return !mb_y ? DC_127_PRED8x8 : mode;
977 case HOR_PRED8x8:
978 return !mb_x ? DC_129_PRED8x8 : mode;
979 case PLANE_PRED8x8 /*TM*/:
980 return check_tm_pred8x8_mode(mode, mb_x, mb_y);
982 return mode;
985 static av_always_inline
986 int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
988 if (!mb_x) {
989 return mb_y ? VERT_VP8_PRED : DC_129_PRED;
990 } else {
991 return mb_y ? mode : HOR_VP8_PRED;
995 static av_always_inline
996 int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
998 switch (mode) {
999 case VERT_PRED:
1000 if (!mb_x && mb_y) {
1001 *copy_buf = 1;
1002 return mode;
1004 /* fall-through */
1005 case DIAG_DOWN_LEFT_PRED:
1006 case VERT_LEFT_PRED:
1007 return !mb_y ? DC_127_PRED : mode;
1008 case HOR_PRED:
1009 if (!mb_y) {
1010 *copy_buf = 1;
1011 return mode;
1013 /* fall-through */
1014 case HOR_UP_PRED:
1015 return !mb_x ? DC_129_PRED : mode;
1016 case TM_VP8_PRED:
1017 return check_tm_pred4x4_mode(mode, mb_x, mb_y);
1018 case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
1019 case DIAG_DOWN_RIGHT_PRED:
1020 case VERT_RIGHT_PRED:
1021 case HOR_DOWN_PRED:
1022 if (!mb_y || !mb_x)
1023 *copy_buf = 1;
1024 return mode;
1026 return mode;
1029 static av_always_inline
1030 void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1031 VP8Macroblock *mb, int mb_x, int mb_y)
1033 AVCodecContext *avctx = s->avctx;
1034 int x, y, mode, nnz;
1035 uint32_t tr;
1037 // for the first row, we need to run xchg_mb_border to init the top edge to 127
1038 // otherwise, skip it if we aren't going to deblock
1039 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1040 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1041 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1042 s->filter.simple, 1);
1044 if (mb->mode < MODE_I4x4) {
1045 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
1046 mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
1047 } else {
1048 mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
1050 s->hpc.pred16x16[mode](dst[0], s->linesize);
1051 } else {
1052 uint8_t *ptr = dst[0];
1053 uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1054 uint8_t tr_top[4] = { 127, 127, 127, 127 };
1056 // all blocks on the right edge of the macroblock use bottom edge
1057 // the top macroblock for their topright edge
1058 uint8_t *tr_right = ptr - s->linesize + 16;
1060 // if we're on the right edge of the frame, said edge is extended
1061 // from the top macroblock
1062 if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
1063 mb_x == s->mb_width-1) {
1064 tr = tr_right[-1]*0x01010101u;
1065 tr_right = (uint8_t *)&tr;
1068 if (mb->skip)
1069 AV_ZERO128(td->non_zero_count_cache);
1071 for (y = 0; y < 4; y++) {
1072 uint8_t *topright = ptr + 4 - s->linesize;
1073 for (x = 0; x < 4; x++) {
1074 int copy = 0, linesize = s->linesize;
1075 uint8_t *dst = ptr+4*x;
1076 DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];
1078 if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
1079 topright = tr_top;
1080 } else if (x == 3)
1081 topright = tr_right;
1083 if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
1084 mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
1085 if (copy) {
1086 dst = copy_dst + 12;
1087 linesize = 8;
1088 if (!(mb_y + y)) {
1089 copy_dst[3] = 127U;
1090 AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1091 } else {
1092 AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1093 if (!(mb_x + x)) {
1094 copy_dst[3] = 129U;
1095 } else {
1096 copy_dst[3] = ptr[4*x-s->linesize-1];
1099 if (!(mb_x + x)) {
1100 copy_dst[11] =
1101 copy_dst[19] =
1102 copy_dst[27] =
1103 copy_dst[35] = 129U;
1104 } else {
1105 copy_dst[11] = ptr[4*x -1];
1106 copy_dst[19] = ptr[4*x+s->linesize -1];
1107 copy_dst[27] = ptr[4*x+s->linesize*2-1];
1108 copy_dst[35] = ptr[4*x+s->linesize*3-1];
1111 } else {
1112 mode = intra4x4[x];
1114 s->hpc.pred4x4[mode](dst, topright, linesize);
1115 if (copy) {
1116 AV_COPY32(ptr+4*x , copy_dst+12);
1117 AV_COPY32(ptr+4*x+s->linesize , copy_dst+20);
1118 AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
1119 AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1122 nnz = td->non_zero_count_cache[y][x];
1123 if (nnz) {
1124 if (nnz == 1)
1125 s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
1126 else
1127 s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
1129 topright += 4;
1132 ptr += 4*s->linesize;
1133 intra4x4 += 4;
1137 if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1138 mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1139 } else {
1140 mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1142 s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
1143 s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1145 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1146 xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
1147 s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
1148 s->filter.simple, 0);
1151 static const uint8_t subpel_idx[3][8] = {
1152 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1153 // also function pointer index
1154 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1155 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1159 * luma MC function
1161 * @param s VP8 decoding context
1162 * @param dst target buffer for block data at block position
1163 * @param ref reference picture buffer at origin (0, 0)
1164 * @param mv motion vector (relative to block position) to get pixel data from
1165 * @param x_off horizontal position of block from origin (0, 0)
1166 * @param y_off vertical position of block from origin (0, 0)
1167 * @param block_w width of block (16, 8 or 4)
1168 * @param block_h height of block (always same as block_w)
1169 * @param width width of src/dst plane data
1170 * @param height height of src/dst plane data
1171 * @param linesize size of a single line of plane data, including padding
1172 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1174 static av_always_inline
1175 void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1176 ThreadFrame *ref, const VP56mv *mv,
1177 int x_off, int y_off, int block_w, int block_h,
1178 int width, int height, int linesize,
1179 vp8_mc_func mc_func[3][3])
1181 uint8_t *src = ref->f->data[0];
1183 if (AV_RN32A(mv)) {
1185 int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
1186 int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];
1188 x_off += mv->x >> 2;
1189 y_off += mv->y >> 2;
1191 // edge emulation
1192 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1193 src += y_off * linesize + x_off;
1194 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1195 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1196 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1197 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1198 x_off - mx_idx, y_off - my_idx, width, height);
1199 src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1201 mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
1202 } else {
1203 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1204 mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
1209 * chroma MC function
1211 * @param s VP8 decoding context
1212 * @param dst1 target buffer for block data at block position (U plane)
1213 * @param dst2 target buffer for block data at block position (V plane)
1214 * @param ref reference picture buffer at origin (0, 0)
1215 * @param mv motion vector (relative to block position) to get pixel data from
1216 * @param x_off horizontal position of block from origin (0, 0)
1217 * @param y_off vertical position of block from origin (0, 0)
1218 * @param block_w width of block (16, 8 or 4)
1219 * @param block_h height of block (always same as block_w)
1220 * @param width width of src/dst plane data
1221 * @param height height of src/dst plane data
1222 * @param linesize size of a single line of plane data, including padding
1223 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1225 static av_always_inline
1226 void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1227 ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1228 int block_w, int block_h, int width, int height, int linesize,
1229 vp8_mc_func mc_func[3][3])
1231 uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
1233 if (AV_RN32A(mv)) {
1234 int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
1235 int my = mv->y&7, my_idx = subpel_idx[0][my];
1237 x_off += mv->x >> 3;
1238 y_off += mv->y >> 3;
1240 // edge emulation
1241 src1 += y_off * linesize + x_off;
1242 src2 += y_off * linesize + x_off;
1243 ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1244 if (x_off < mx_idx || x_off >= width - block_w - subpel_idx[2][mx] ||
1245 y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1246 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1247 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1248 x_off - mx_idx, y_off - my_idx, width, height);
1249 src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1250 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1252 s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1253 block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
1254 x_off - mx_idx, y_off - my_idx, width, height);
1255 src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1256 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1257 } else {
1258 mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
1259 mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
1261 } else {
1262 ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1263 mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1264 mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
1268 static av_always_inline
1269 void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1270 ThreadFrame *ref_frame, int x_off, int y_off,
1271 int bx_off, int by_off,
1272 int block_w, int block_h,
1273 int width, int height, VP56mv *mv)
1275 VP56mv uvmv = *mv;
1277 /* Y */
1278 vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
1279 ref_frame, mv, x_off + bx_off, y_off + by_off,
1280 block_w, block_h, width, height, s->linesize,
1281 s->put_pixels_tab[block_w == 8]);
1283 /* U/V */
1284 if (s->profile == 3) {
1285 uvmv.x &= ~7;
1286 uvmv.y &= ~7;
1288 x_off >>= 1; y_off >>= 1;
1289 bx_off >>= 1; by_off >>= 1;
1290 width >>= 1; height >>= 1;
1291 block_w >>= 1; block_h >>= 1;
1292 vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
1293 dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
1294 &uvmv, x_off + bx_off, y_off + by_off,
1295 block_w, block_h, width, height, s->uvlinesize,
1296 s->put_pixels_tab[1 + (block_w == 4)]);
1299 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1300 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1301 static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1303 /* Don't prefetch refs that haven't been used very often this frame. */
1304 if (s->ref_count[ref-1] > (mb_xy >> 5)) {
1305 int x_off = mb_x << 4, y_off = mb_y << 4;
1306 int mx = (mb->mv.x>>2) + x_off + 8;
1307 int my = (mb->mv.y>>2) + y_off;
1308 uint8_t **src= s->framep[ref]->tf.f->data;
1309 int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
1310 /* For threading, a ff_thread_await_progress here might be useful, but
1311 * it actually slows down the decoder. Since a bad prefetch doesn't
1312 * generate bad decoder output, we don't run it here. */
1313 s->vdsp.prefetch(src[0]+off, s->linesize, 4);
1314 off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
1315 s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
1320 * Apply motion vectors to prediction buffer, chapter 18.
1322 static av_always_inline
1323 void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1324 VP8Macroblock *mb, int mb_x, int mb_y)
1326 int x_off = mb_x << 4, y_off = mb_y << 4;
1327 int width = 16*s->mb_width, height = 16*s->mb_height;
1328 ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1329 VP56mv *bmv = mb->bmv;
1331 switch (mb->partitioning) {
1332 case VP8_SPLITMVMODE_NONE:
1333 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1334 0, 0, 16, 16, width, height, &mb->mv);
1335 break;
1336 case VP8_SPLITMVMODE_4x4: {
1337 int x, y;
1338 VP56mv uvmv;
1340 /* Y */
1341 for (y = 0; y < 4; y++) {
1342 for (x = 0; x < 4; x++) {
1343 vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
1344 ref, &bmv[4*y + x],
1345 4*x + x_off, 4*y + y_off, 4, 4,
1346 width, height, s->linesize,
1347 s->put_pixels_tab[2]);
1351 /* U/V */
1352 x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
1353 for (y = 0; y < 2; y++) {
1354 for (x = 0; x < 2; x++) {
1355 uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x +
1356 mb->bmv[ 2*y * 4 + 2*x+1].x +
1357 mb->bmv[(2*y+1) * 4 + 2*x ].x +
1358 mb->bmv[(2*y+1) * 4 + 2*x+1].x;
1359 uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y +
1360 mb->bmv[ 2*y * 4 + 2*x+1].y +
1361 mb->bmv[(2*y+1) * 4 + 2*x ].y +
1362 mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1363 uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
1364 uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
1365 if (s->profile == 3) {
1366 uvmv.x &= ~7;
1367 uvmv.y &= ~7;
1369 vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
1370 dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1371 4*x + x_off, 4*y + y_off, 4, 4,
1372 width, height, s->uvlinesize,
1373 s->put_pixels_tab[2]);
1376 break;
1378 case VP8_SPLITMVMODE_16x8:
1379 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1380 0, 0, 16, 8, width, height, &bmv[0]);
1381 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1382 0, 8, 16, 8, width, height, &bmv[1]);
1383 break;
1384 case VP8_SPLITMVMODE_8x16:
1385 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1386 0, 0, 8, 16, width, height, &bmv[0]);
1387 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1388 8, 0, 8, 16, width, height, &bmv[1]);
1389 break;
1390 case VP8_SPLITMVMODE_8x8:
1391 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392 0, 0, 8, 8, width, height, &bmv[0]);
1393 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1394 8, 0, 8, 8, width, height, &bmv[1]);
1395 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1396 0, 8, 8, 8, width, height, &bmv[2]);
1397 vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398 8, 8, 8, 8, width, height, &bmv[3]);
1399 break;
1403 static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
1404 uint8_t *dst[3], VP8Macroblock *mb)
1406 int x, y, ch;
1408 if (mb->mode != MODE_I4x4) {
1409 uint8_t *y_dst = dst[0];
1410 for (y = 0; y < 4; y++) {
1411 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1412 if (nnz4) {
1413 if (nnz4&~0x01010101) {
1414 for (x = 0; x < 4; x++) {
1415 if ((uint8_t)nnz4 == 1)
1416 s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1417 else if((uint8_t)nnz4 > 1)
1418 s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1419 nnz4 >>= 8;
1420 if (!nnz4)
1421 break;
1423 } else {
1424 s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
1427 y_dst += 4*s->linesize;
1431 for (ch = 0; ch < 2; ch++) {
1432 uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1433 if (nnz4) {
1434 uint8_t *ch_dst = dst[1+ch];
1435 if (nnz4&~0x01010101) {
1436 for (y = 0; y < 2; y++) {
1437 for (x = 0; x < 2; x++) {
1438 if ((uint8_t)nnz4 == 1)
1439 s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1440 else if((uint8_t)nnz4 > 1)
1441 s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1442 nnz4 >>= 8;
1443 if (!nnz4)
1444 goto chroma_idct_end;
1446 ch_dst += 4*s->uvlinesize;
1448 } else {
1449 s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
1452 chroma_idct_end: ;
1456 static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
1458 int interior_limit, filter_level;
1460 if (s->segmentation.enabled) {
1461 filter_level = s->segmentation.filter_level[mb->segment];
1462 if (!s->segmentation.absolute_vals)
1463 filter_level += s->filter.level;
1464 } else
1465 filter_level = s->filter.level;
1467 if (s->lf_delta.enabled) {
1468 filter_level += s->lf_delta.ref[mb->ref_frame];
1469 filter_level += s->lf_delta.mode[mb->mode];
1472 filter_level = av_clip_uintp2(filter_level, 6);
1474 interior_limit = filter_level;
1475 if (s->filter.sharpness) {
1476 interior_limit >>= (s->filter.sharpness + 3) >> 2;
1477 interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
1479 interior_limit = FFMAX(interior_limit, 1);
1481 f->filter_level = filter_level;
1482 f->inner_limit = interior_limit;
1483 f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
1486 static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
1488 int mbedge_lim, bedge_lim, hev_thresh;
1489 int filter_level = f->filter_level;
1490 int inner_limit = f->inner_limit;
1491 int inner_filter = f->inner_filter;
1492 int linesize = s->linesize;
1493 int uvlinesize = s->uvlinesize;
1494 static const uint8_t hev_thresh_lut[2][64] = {
1495 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1496 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1497 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
1498 3, 3, 3, 3 },
1499 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
1500 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1501 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1502 2, 2, 2, 2 }
1505 if (!filter_level)
1506 return;
1508 bedge_lim = 2*filter_level + inner_limit;
1509 mbedge_lim = bedge_lim + 4;
1511 hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1513 if (mb_x) {
1514 s->vp8dsp.vp8_h_loop_filter16y(dst[0], linesize,
1515 mbedge_lim, inner_limit, hev_thresh);
1516 s->vp8dsp.vp8_h_loop_filter8uv(dst[1], dst[2], uvlinesize,
1517 mbedge_lim, inner_limit, hev_thresh);
1520 if (inner_filter) {
1521 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
1522 inner_limit, hev_thresh);
1523 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
1524 inner_limit, hev_thresh);
1525 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
1526 inner_limit, hev_thresh);
1527 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
1528 uvlinesize, bedge_lim,
1529 inner_limit, hev_thresh);
1532 if (mb_y) {
1533 s->vp8dsp.vp8_v_loop_filter16y(dst[0], linesize,
1534 mbedge_lim, inner_limit, hev_thresh);
1535 s->vp8dsp.vp8_v_loop_filter8uv(dst[1], dst[2], uvlinesize,
1536 mbedge_lim, inner_limit, hev_thresh);
1539 if (inner_filter) {
1540 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
1541 linesize, bedge_lim,
1542 inner_limit, hev_thresh);
1543 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
1544 linesize, bedge_lim,
1545 inner_limit, hev_thresh);
1546 s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
1547 linesize, bedge_lim,
1548 inner_limit, hev_thresh);
1549 s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
1550 dst[2] + 4 * uvlinesize,
1551 uvlinesize, bedge_lim,
1552 inner_limit, hev_thresh);
1556 static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
1558 int mbedge_lim, bedge_lim;
1559 int filter_level = f->filter_level;
1560 int inner_limit = f->inner_limit;
1561 int inner_filter = f->inner_filter;
1562 int linesize = s->linesize;
1564 if (!filter_level)
1565 return;
1567 bedge_lim = 2*filter_level + inner_limit;
1568 mbedge_lim = bedge_lim + 4;
1570 if (mb_x)
1571 s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1572 if (inner_filter) {
1573 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
1574 s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
1575 s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
1578 if (mb_y)
1579 s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1580 if (inner_filter) {
1581 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
1582 s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
1583 s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
1587 #define MARGIN (16 << 2)
1588 static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
1589 VP8Frame *prev_frame)
1591 VP8Context *s = avctx->priv_data;
1592 int mb_x, mb_y;
1594 s->mv_min.y = -MARGIN;
1595 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1596 for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
1597 VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1598 int mb_xy = mb_y*s->mb_width;
1600 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1602 s->mv_min.x = -MARGIN;
1603 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1604 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1605 if (mb_y == 0)
1606 AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1607 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1608 prev_frame && prev_frame->seg_map ?
1609 prev_frame->seg_map->data + mb_xy : NULL, 1);
1610 s->mv_min.x -= 64;
1611 s->mv_max.x -= 64;
1613 s->mv_min.y -= 64;
1614 s->mv_max.y -= 64;
1618 #if HAVE_THREADS
1619 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
1620 do {\
1621 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
1622 if (otd->thread_mb_pos < tmp) {\
1623 pthread_mutex_lock(&otd->lock);\
1624 td->wait_mb_pos = tmp;\
1625 do {\
1626 if (otd->thread_mb_pos >= tmp)\
1627 break;\
1628 pthread_cond_wait(&otd->cond, &otd->lock);\
1629 } while (1);\
1630 td->wait_mb_pos = INT_MAX;\
1631 pthread_mutex_unlock(&otd->lock);\
1633 } while(0);
1635 #define update_pos(td, mb_y, mb_x)\
1636 do {\
1637 int pos = (mb_y << 16) | (mb_x & 0xFFFF);\
1638 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
1639 int is_null = (next_td == NULL) || (prev_td == NULL);\
1640 int pos_check = (is_null) ? 1 :\
1641 (next_td != td && pos >= next_td->wait_mb_pos) ||\
1642 (prev_td != td && pos >= prev_td->wait_mb_pos);\
1643 td->thread_mb_pos = pos;\
1644 if (sliced_threading && pos_check) {\
1645 pthread_mutex_lock(&td->lock);\
1646 pthread_cond_broadcast(&td->cond);\
1647 pthread_mutex_unlock(&td->lock);\
1649 } while(0);
1650 #else
1651 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
1652 #define update_pos(td, mb_y, mb_x)
1653 #endif
1655 static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
1656 int jobnr, int threadnr)
1658 VP8Context *s = avctx->priv_data;
1659 VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
1660 int mb_y = td->thread_mb_pos>>16;
1661 int i, y, mb_x, mb_xy = mb_y*s->mb_width;
1662 int num_jobs = s->num_jobs;
1663 VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
1664 VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
1665 VP8Macroblock *mb;
1666 uint8_t *dst[3] = {
1667 curframe->tf.f->data[0] + 16*mb_y*s->linesize,
1668 curframe->tf.f->data[1] + 8*mb_y*s->uvlinesize,
1669 curframe->tf.f->data[2] + 8*mb_y*s->uvlinesize
1671 if (mb_y == 0) prev_td = td;
1672 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1673 if (mb_y == s->mb_height-1) next_td = td;
1674 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1675 if (s->mb_layout == 1)
1676 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1677 else {
1678 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1679 memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
1680 AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
1683 memset(td->left_nnz, 0, sizeof(td->left_nnz));
1684 // left edge of 129 for intra prediction
1685 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1686 for (i = 0; i < 3; i++)
1687 for (y = 0; y < 16>>!!i; y++)
1688 dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
1689 if (mb_y == 1) {
1690 s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
1694 s->mv_min.x = -MARGIN;
1695 s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
1697 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
1698 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
1699 if (prev_td != td) {
1700 if (threadnr != 0) {
1701 check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
1702 } else {
1703 check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
1707 s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1708 s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1710 if (!s->mb_layout)
1711 decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
1712 prev_frame && prev_frame->seg_map ?
1713 prev_frame->seg_map->data + mb_xy : NULL, 0);
1715 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);
1717 if (!mb->skip)
1718 decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1720 if (mb->mode <= MODE_I4x4)
1721 intra_predict(s, td, dst, mb, mb_x, mb_y);
1722 else
1723 inter_predict(s, td, dst, mb, mb_x, mb_y);
1725 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);
1727 if (!mb->skip) {
1728 idct_mb(s, td, dst, mb);
1729 } else {
1730 AV_ZERO64(td->left_nnz);
1731 AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned
1733 // Reset DC block predictors if they would exist if the mb had coefficients
1734 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
1735 td->left_nnz[8] = 0;
1736 s->top_nnz[mb_x][8] = 0;
1740 if (s->deblock_filter)
1741 filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);
1743 if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
1744 if (s->filter.simple)
1745 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1746 else
1747 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1750 prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
1752 dst[0] += 16;
1753 dst[1] += 8;
1754 dst[2] += 8;
1755 s->mv_min.x -= 64;
1756 s->mv_max.x -= 64;
1758 if (mb_x == s->mb_width+1) {
1759 update_pos(td, mb_y, s->mb_width+3);
1760 } else {
1761 update_pos(td, mb_y, mb_x);
1766 static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
1767 int jobnr, int threadnr)
1769 VP8Context *s = avctx->priv_data;
1770 VP8ThreadData *td = &s->thread_data[threadnr];
1771 int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1772 AVFrame *curframe = s->curframe->tf.f;
1773 VP8Macroblock *mb;
1774 VP8ThreadData *prev_td, *next_td;
1775 uint8_t *dst[3] = {
1776 curframe->data[0] + 16*mb_y*s->linesize,
1777 curframe->data[1] + 8*mb_y*s->uvlinesize,
1778 curframe->data[2] + 8*mb_y*s->uvlinesize
1781 if (s->mb_layout == 1)
1782 mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
1783 else
1784 mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
1786 if (mb_y == 0) prev_td = td;
1787 else prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
1788 if (mb_y == s->mb_height-1) next_td = td;
1789 else next_td = &s->thread_data[(jobnr + 1)%num_jobs];
1791 for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
1792 VP8FilterStrength *f = &td->filter_strength[mb_x];
1793 if (prev_td != td) {
1794 check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
1796 if (next_td != td)
1797 if (next_td != &s->thread_data[0]) {
1798 check_thread_pos(td, next_td, mb_x+1, mb_y+1);
1801 if (num_jobs == 1) {
1802 if (s->filter.simple)
1803 backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
1804 else
1805 backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
1808 if (s->filter.simple)
1809 filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1810 else
1811 filter_mb(s, dst, f, mb_x, mb_y);
1812 dst[0] += 16;
1813 dst[1] += 8;
1814 dst[2] += 8;
1816 update_pos(td, mb_y, (s->mb_width+3) + mb_x);
1820 static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
1821 int jobnr, int threadnr)
1823 VP8Context *s = avctx->priv_data;
1824 VP8ThreadData *td = &s->thread_data[jobnr];
1825 VP8ThreadData *next_td = NULL, *prev_td = NULL;
1826 VP8Frame *curframe = s->curframe;
1827 int mb_y, num_jobs = s->num_jobs;
1828 td->thread_nr = threadnr;
1829 for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
1830 if (mb_y >= s->mb_height) break;
1831 td->thread_mb_pos = mb_y<<16;
1832 vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
1833 if (s->deblock_filter)
1834 vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
1835 update_pos(td, mb_y, INT_MAX & 0xFFFF);
1837 s->mv_min.y -= 64;
1838 s->mv_max.y -= 64;
1840 if (avctx->active_thread_type == FF_THREAD_FRAME)
1841 ff_thread_report_progress(&curframe->tf, mb_y, 0);
1844 return 0;
1847 static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
1848 AVPacket *avpkt)
1850 VP8Context *s = avctx->priv_data;
1851 int ret, i, referenced, num_jobs;
1852 enum AVDiscard skip_thresh;
1853 VP8Frame *av_uninit(curframe), *prev_frame;
1855 if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1856 goto err;
1858 prev_frame = s->framep[VP56_FRAME_CURRENT];
1860 referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
1861 || s->update_altref == VP56_FRAME_CURRENT;
1863 skip_thresh = !referenced ? AVDISCARD_NONREF :
1864 !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;
1866 if (avctx->skip_frame >= skip_thresh) {
1867 s->invisible = 1;
1868 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1869 goto skip_decode;
1871 s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
1873 // release no longer referenced frames
1874 for (i = 0; i < 5; i++)
1875 if (s->frames[i].tf.f->data[0] &&
1876 &s->frames[i] != prev_frame &&
1877 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1878 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1879 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1880 vp8_release_frame(s, &s->frames[i]);
1882 // find a free buffer
1883 for (i = 0; i < 5; i++)
1884 if (&s->frames[i] != prev_frame &&
1885 &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
1886 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
1887 &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
1888 curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
1889 break;
1891 if (i == 5) {
1892 av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
1893 abort();
1895 if (curframe->tf.f->data[0])
1896 vp8_release_frame(s, curframe);
1898 // Given that arithmetic probabilities are updated every frame, it's quite likely
1899 // that the values we have on a random interframe are complete junk if we didn't
1900 // start decode on a keyframe. So just don't display anything rather than junk.
1901 if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
1902 !s->framep[VP56_FRAME_GOLDEN] ||
1903 !s->framep[VP56_FRAME_GOLDEN2])) {
1904 av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
1905 ret = AVERROR_INVALIDDATA;
1906 goto err;
1909 curframe->tf.f->key_frame = s->keyframe;
1910 curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1911 if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
1912 av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1913 goto err;
1916 // check if golden and altref are swapped
1917 if (s->update_altref != VP56_FRAME_NONE) {
1918 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref];
1919 } else {
1920 s->next_framep[VP56_FRAME_GOLDEN2] = s->framep[VP56_FRAME_GOLDEN2];
1922 if (s->update_golden != VP56_FRAME_NONE) {
1923 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden];
1924 } else {
1925 s->next_framep[VP56_FRAME_GOLDEN] = s->framep[VP56_FRAME_GOLDEN];
1927 if (s->update_last) {
1928 s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
1929 } else {
1930 s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
1932 s->next_framep[VP56_FRAME_CURRENT] = curframe;
1934 ff_thread_finish_setup(avctx);
1936 s->linesize = curframe->tf.f->linesize[0];
1937 s->uvlinesize = curframe->tf.f->linesize[1];
1939 if (!s->thread_data[0].edge_emu_buffer)
1940 for (i = 0; i < MAX_THREADS; i++)
1941 s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
1943 memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
1944 /* Zero macroblock structures for top/top-left prediction from outside the frame. */
1945 if (!s->mb_layout)
1946 memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
1947 if (!s->mb_layout && s->keyframe)
1948 memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1950 // top edge of 127 for intra prediction
1951 if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
1952 s->top_border[0][15] = s->top_border[0][23] = 127;
1953 s->top_border[0][31] = 127;
1954 memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1956 memset(s->ref_count, 0, sizeof(s->ref_count));
1959 // Make sure the previous frame has read its segmentation map,
1960 // if we re-use the same map.
1961 if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1962 ff_thread_await_progress(&prev_frame->tf, 1, 0);
1964 if (s->mb_layout == 1)
1965 vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1967 if (avctx->active_thread_type == FF_THREAD_FRAME)
1968 num_jobs = 1;
1969 else
1970 num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
1971 s->num_jobs = num_jobs;
1972 s->curframe = curframe;
1973 s->prev_frame = prev_frame;
1974 s->mv_min.y = -MARGIN;
1975 s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
1976 for (i = 0; i < MAX_THREADS; i++) {
1977 s->thread_data[i].thread_mb_pos = 0;
1978 s->thread_data[i].wait_mb_pos = INT_MAX;
1980 avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
1982 ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1983 memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);
1985 skip_decode:
1986 // if future frames don't use the updated probabilities,
1987 // reset them to the values we saved
1988 if (!s->update_probabilities)
1989 s->prob[0] = s->prob[1];
1991 if (!s->invisible) {
1992 if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
1993 return ret;
1994 *got_frame = 1;
1997 return avpkt->size;
1998 err:
1999 memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
2000 return ret;
2003 static av_cold int vp8_decode_free(AVCodecContext *avctx)
2005 VP8Context *s = avctx->priv_data;
2006 int i;
2008 vp8_decode_flush_impl(avctx, 1);
2009 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
2010 av_frame_free(&s->frames[i].tf.f);
2012 return 0;
2015 static av_cold int vp8_init_frames(VP8Context *s)
2017 int i;
2018 for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
2019 s->frames[i].tf.f = av_frame_alloc();
2020 if (!s->frames[i].tf.f)
2021 return AVERROR(ENOMEM);
2023 return 0;
2026 static av_cold int vp8_decode_init(AVCodecContext *avctx)
2028 VP8Context *s = avctx->priv_data;
2029 int ret;
2031 s->avctx = avctx;
2032 avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2033 avctx->internal->allocate_progress = 1;
2035 ff_videodsp_init(&s->vdsp, 8);
2036 ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
2037 ff_vp8dsp_init(&s->vp8dsp);
2039 if ((ret = vp8_init_frames(s)) < 0) {
2040 vp8_decode_free(avctx);
2041 return ret;
2044 return 0;
2047 static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
2049 VP8Context *s = avctx->priv_data;
2050 int ret;
2052 s->avctx = avctx;
2054 if ((ret = vp8_init_frames(s)) < 0) {
2055 vp8_decode_free(avctx);
2056 return ret;
2059 return 0;
2062 #define REBASE(pic) \
2063 pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2065 static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
2067 VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2068 int i;
2070 if (s->macroblocks_base &&
2071 (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
2072 free_buffers(s);
2073 s->mb_width = s_src->mb_width;
2074 s->mb_height = s_src->mb_height;
2077 s->prob[0] = s_src->prob[!s_src->update_probabilities];
2078 s->segmentation = s_src->segmentation;
2079 s->lf_delta = s_src->lf_delta;
2080 memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));
2082 for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
2083 if (s_src->frames[i].tf.f->data[0]) {
2084 int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
2085 if (ret < 0)
2086 return ret;
2090 s->framep[0] = REBASE(s_src->next_framep[0]);
2091 s->framep[1] = REBASE(s_src->next_framep[1]);
2092 s->framep[2] = REBASE(s_src->next_framep[2]);
2093 s->framep[3] = REBASE(s_src->next_framep[3]);
2095 return 0;
2098 AVCodec ff_vp8_decoder = {
2099 .name = "vp8",
2100 .type = AVMEDIA_TYPE_VIDEO,
2101 .id = AV_CODEC_ID_VP8,
2102 .priv_data_size = sizeof(VP8Context),
2103 .init = vp8_decode_init,
2104 .close = vp8_decode_free,
2105 .decode = vp8_decode_frame,
2106 .capabilities = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2107 .flush = vp8_decode_flush,
2108 .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"),
2109 .init_thread_copy = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
2110 .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),