2 * VP7/VP8 compatible video decoder
4 * Copyright (C) 2010 David Conrad
5 * Copyright (C) 2010 Ronald S. Bultje
6 * Copyright (C) 2010 Fiona Glaser
7 * Copyright (C) 2012 Daniel Kang
8 * Copyright (C) 2014 Peter Ross
10 * This file is part of Libav.
12 * Libav is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation; either
15 * version 2.1 of the License, or (at your option) any later version.
17 * Libav is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with Libav; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/imgutils.h"
33 #include "rectangle.h"
42 static void free_buffers(VP8Context
*s
)
46 for (i
= 0; i
< MAX_THREADS
; i
++) {
48 pthread_cond_destroy(&s
->thread_data
[i
].cond
);
49 pthread_mutex_destroy(&s
->thread_data
[i
].lock
);
51 av_freep(&s
->thread_data
[i
].filter_strength
);
53 av_freep(&s
->thread_data
);
54 av_freep(&s
->macroblocks_base
);
55 av_freep(&s
->intra4x4_pred_mode_top
);
56 av_freep(&s
->top_nnz
);
57 av_freep(&s
->top_border
);
59 s
->macroblocks
= NULL
;
62 static int vp8_alloc_frame(VP8Context
*s
, VP8Frame
*f
, int ref
)
65 if ((ret
= ff_thread_get_buffer(s
->avctx
, &f
->tf
,
66 ref
? AV_GET_BUFFER_FLAG_REF
: 0)) < 0)
68 if (!(f
->seg_map
= av_buffer_allocz(s
->mb_width
* s
->mb_height
)))
70 if (s
->avctx
->hwaccel
) {
71 const AVHWAccel
*hwaccel
= s
->avctx
->hwaccel
;
72 if (hwaccel
->frame_priv_data_size
) {
73 f
->hwaccel_priv_buf
= av_buffer_allocz(hwaccel
->frame_priv_data_size
);
74 if (!f
->hwaccel_priv_buf
)
76 f
->hwaccel_picture_private
= f
->hwaccel_priv_buf
->data
;
82 av_buffer_unref(&f
->seg_map
);
83 ff_thread_release_buffer(s
->avctx
, &f
->tf
);
84 return AVERROR(ENOMEM
);
87 static void vp8_release_frame(VP8Context
*s
, VP8Frame
*f
)
89 av_buffer_unref(&f
->seg_map
);
90 av_buffer_unref(&f
->hwaccel_priv_buf
);
91 f
->hwaccel_picture_private
= NULL
;
92 ff_thread_release_buffer(s
->avctx
, &f
->tf
);
95 #if CONFIG_VP8_DECODER
96 static int vp8_ref_frame(VP8Context
*s
, VP8Frame
*dst
, VP8Frame
*src
)
100 vp8_release_frame(s
, dst
);
102 if ((ret
= ff_thread_ref_frame(&dst
->tf
, &src
->tf
)) < 0)
105 !(dst
->seg_map
= av_buffer_ref(src
->seg_map
))) {
106 vp8_release_frame(s
, dst
);
107 return AVERROR(ENOMEM
);
109 if (src
->hwaccel_picture_private
) {
110 dst
->hwaccel_priv_buf
= av_buffer_ref(src
->hwaccel_priv_buf
);
111 if (!dst
->hwaccel_priv_buf
)
112 return AVERROR(ENOMEM
);
113 dst
->hwaccel_picture_private
= dst
->hwaccel_priv_buf
->data
;
118 #endif /* CONFIG_VP8_DECODER */
120 static void vp8_decode_flush_impl(AVCodecContext
*avctx
, int free_mem
)
122 VP8Context
*s
= avctx
->priv_data
;
125 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->frames
); i
++)
126 vp8_release_frame(s
, &s
->frames
[i
]);
127 memset(s
->framep
, 0, sizeof(s
->framep
));
133 static void vp8_decode_flush(AVCodecContext
*avctx
)
135 vp8_decode_flush_impl(avctx
, 0);
138 static VP8Frame
*vp8_find_free_buffer(VP8Context
*s
)
140 VP8Frame
*frame
= NULL
;
143 // find a free buffer
144 for (i
= 0; i
< 5; i
++)
145 if (&s
->frames
[i
] != s
->framep
[VP56_FRAME_CURRENT
] &&
146 &s
->frames
[i
] != s
->framep
[VP56_FRAME_PREVIOUS
] &&
147 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN
] &&
148 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN2
]) {
149 frame
= &s
->frames
[i
];
153 av_log(s
->avctx
, AV_LOG_FATAL
, "Ran out of free frames!\n");
156 if (frame
->tf
.f
->buf
[0])
157 vp8_release_frame(s
, frame
);
162 static av_always_inline
163 int update_dimensions(VP8Context
*s
, int width
, int height
, int is_vp7
)
165 AVCodecContext
*avctx
= s
->avctx
;
168 if (width
!= s
->avctx
->width
||
169 height
!= s
->avctx
->height
) {
170 vp8_decode_flush_impl(s
->avctx
, 1);
172 ret
= ff_set_dimensions(s
->avctx
, width
, height
);
177 s
->mb_width
= (s
->avctx
->coded_width
+ 15) / 16;
178 s
->mb_height
= (s
->avctx
->coded_height
+ 15) / 16;
180 s
->mb_layout
= is_vp7
|| avctx
->active_thread_type
== FF_THREAD_SLICE
&&
181 FFMIN(s
->num_coeff_partitions
, avctx
->thread_count
) > 1;
182 if (!s
->mb_layout
) { // Frame threading and one thread
183 s
->macroblocks_base
= av_mallocz((s
->mb_width
+ s
->mb_height
* 2 + 1) *
184 sizeof(*s
->macroblocks
));
185 s
->intra4x4_pred_mode_top
= av_mallocz(s
->mb_width
* 4);
186 } else // Sliced threading
187 s
->macroblocks_base
= av_mallocz((s
->mb_width
+ 2) * (s
->mb_height
+ 2) *
188 sizeof(*s
->macroblocks
));
189 s
->top_nnz
= av_mallocz(s
->mb_width
* sizeof(*s
->top_nnz
));
190 s
->top_border
= av_mallocz((s
->mb_width
+ 1) * sizeof(*s
->top_border
));
191 s
->thread_data
= av_mallocz(MAX_THREADS
* sizeof(VP8ThreadData
));
193 if (!s
->macroblocks_base
|| !s
->top_nnz
|| !s
->top_border
||
194 !s
->thread_data
|| (!s
->intra4x4_pred_mode_top
&& !s
->mb_layout
)) {
196 return AVERROR(ENOMEM
);
199 for (i
= 0; i
< MAX_THREADS
; i
++) {
200 s
->thread_data
[i
].filter_strength
=
201 av_mallocz(s
->mb_width
* sizeof(*s
->thread_data
[0].filter_strength
));
202 if (!s
->thread_data
[i
].filter_strength
) {
204 return AVERROR(ENOMEM
);
207 pthread_mutex_init(&s
->thread_data
[i
].lock
, NULL
);
208 pthread_cond_init(&s
->thread_data
[i
].cond
, NULL
);
212 s
->macroblocks
= s
->macroblocks_base
+ 1;
217 static int vp7_update_dimensions(VP8Context
*s
, int width
, int height
)
219 return update_dimensions(s
, width
, height
, IS_VP7
);
222 static int vp8_update_dimensions(VP8Context
*s
, int width
, int height
)
224 return update_dimensions(s
, width
, height
, IS_VP8
);
227 static void parse_segment_info(VP8Context
*s
)
229 VP56RangeCoder
*c
= &s
->c
;
232 s
->segmentation
.update_map
= vp8_rac_get(c
);
233 s
->segmentation
.update_feature_data
= vp8_rac_get(c
);
235 if (s
->segmentation
.update_feature_data
) {
236 s
->segmentation
.absolute_vals
= vp8_rac_get(c
);
238 for (i
= 0; i
< 4; i
++)
239 s
->segmentation
.base_quant
[i
] = vp8_rac_get_sint(c
, 7);
241 for (i
= 0; i
< 4; i
++)
242 s
->segmentation
.filter_level
[i
] = vp8_rac_get_sint(c
, 6);
244 if (s
->segmentation
.update_map
)
245 for (i
= 0; i
< 3; i
++)
246 s
->prob
->segmentid
[i
] = vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 8) : 255;
249 static void update_lf_deltas(VP8Context
*s
)
251 VP56RangeCoder
*c
= &s
->c
;
254 for (i
= 0; i
< 4; i
++) {
255 if (vp8_rac_get(c
)) {
256 s
->lf_delta
.ref
[i
] = vp8_rac_get_uint(c
, 6);
259 s
->lf_delta
.ref
[i
] = -s
->lf_delta
.ref
[i
];
263 for (i
= MODE_I4x4
; i
<= VP8_MVMODE_SPLIT
; i
++) {
264 if (vp8_rac_get(c
)) {
265 s
->lf_delta
.mode
[i
] = vp8_rac_get_uint(c
, 6);
268 s
->lf_delta
.mode
[i
] = -s
->lf_delta
.mode
[i
];
273 static int setup_partitions(VP8Context
*s
, const uint8_t *buf
, int buf_size
)
275 const uint8_t *sizes
= buf
;
278 s
->num_coeff_partitions
= 1 << vp8_rac_get_uint(&s
->c
, 2);
280 buf
+= 3 * (s
->num_coeff_partitions
- 1);
281 buf_size
-= 3 * (s
->num_coeff_partitions
- 1);
285 for (i
= 0; i
< s
->num_coeff_partitions
- 1; i
++) {
286 int size
= AV_RL24(sizes
+ 3 * i
);
287 if (buf_size
- size
< 0)
289 s
->coeff_partition_size
[i
] = size
;
291 ff_vp56_init_range_decoder(&s
->coeff_partition
[i
], buf
, size
);
296 s
->coeff_partition_size
[i
] = buf_size
;
297 ff_vp56_init_range_decoder(&s
->coeff_partition
[i
], buf
, buf_size
);
302 static void vp7_get_quants(VP8Context
*s
)
304 VP56RangeCoder
*c
= &s
->c
;
306 int yac_qi
= vp8_rac_get_uint(c
, 7);
307 int ydc_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
308 int y2dc_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
309 int y2ac_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
310 int uvdc_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
311 int uvac_qi
= vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 7) : yac_qi
;
313 s
->qmat
[0].luma_qmul
[0] = vp7_ydc_qlookup
[ydc_qi
];
314 s
->qmat
[0].luma_qmul
[1] = vp7_yac_qlookup
[yac_qi
];
315 s
->qmat
[0].luma_dc_qmul
[0] = vp7_y2dc_qlookup
[y2dc_qi
];
316 s
->qmat
[0].luma_dc_qmul
[1] = vp7_y2ac_qlookup
[y2ac_qi
];
317 s
->qmat
[0].chroma_qmul
[0] = FFMIN(vp7_ydc_qlookup
[uvdc_qi
], 132);
318 s
->qmat
[0].chroma_qmul
[1] = vp7_yac_qlookup
[uvac_qi
];
321 static void get_quants(VP8Context
*s
)
323 VP56RangeCoder
*c
= &s
->c
;
326 s
->quant
.yac_qi
= vp8_rac_get_uint(c
, 7);
327 s
->quant
.ydc_delta
= vp8_rac_get_sint(c
, 4);
328 s
->quant
.y2dc_delta
= vp8_rac_get_sint(c
, 4);
329 s
->quant
.y2ac_delta
= vp8_rac_get_sint(c
, 4);
330 s
->quant
.uvdc_delta
= vp8_rac_get_sint(c
, 4);
331 s
->quant
.uvac_delta
= vp8_rac_get_sint(c
, 4);
333 for (i
= 0; i
< 4; i
++) {
334 if (s
->segmentation
.enabled
) {
335 base_qi
= s
->segmentation
.base_quant
[i
];
336 if (!s
->segmentation
.absolute_vals
)
337 base_qi
+= s
->quant
.yac_qi
;
339 base_qi
= s
->quant
.yac_qi
;
341 s
->qmat
[i
].luma_qmul
[0] = vp8_dc_qlookup
[av_clip_uintp2(base_qi
+ s
->quant
.ydc_delta
, 7)];
342 s
->qmat
[i
].luma_qmul
[1] = vp8_ac_qlookup
[av_clip_uintp2(base_qi
, 7)];
343 s
->qmat
[i
].luma_dc_qmul
[0] = vp8_dc_qlookup
[av_clip_uintp2(base_qi
+ s
->quant
.y2dc_delta
, 7)] * 2;
344 /* 101581>>16 is equivalent to 155/100 */
345 s
->qmat
[i
].luma_dc_qmul
[1] = vp8_ac_qlookup
[av_clip_uintp2(base_qi
+ s
->quant
.y2ac_delta
, 7)] * 101581 >> 16;
346 s
->qmat
[i
].chroma_qmul
[0] = vp8_dc_qlookup
[av_clip_uintp2(base_qi
+ s
->quant
.uvdc_delta
, 7)];
347 s
->qmat
[i
].chroma_qmul
[1] = vp8_ac_qlookup
[av_clip_uintp2(base_qi
+ s
->quant
.uvac_delta
, 7)];
349 s
->qmat
[i
].luma_dc_qmul
[1] = FFMAX(s
->qmat
[i
].luma_dc_qmul
[1], 8);
350 s
->qmat
[i
].chroma_qmul
[0] = FFMIN(s
->qmat
[i
].chroma_qmul
[0], 132);
355 * Determine which buffers golden and altref should be updated with after this frame.
356 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
358 * Intra frames update all 3 references
359 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
360 * If the update (golden|altref) flag is set, it's updated with the current frame
361 * if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
362 * If the flag is not set, the number read means:
364 * 1: VP56_FRAME_PREVIOUS
365 * 2: update golden with altref, or update altref with golden
367 static VP56Frame
ref_to_update(VP8Context
*s
, int update
, VP56Frame ref
)
369 VP56RangeCoder
*c
= &s
->c
;
372 return VP56_FRAME_CURRENT
;
374 switch (vp8_rac_get_uint(c
, 2)) {
376 return VP56_FRAME_PREVIOUS
;
378 return (ref
== VP56_FRAME_GOLDEN
) ? VP56_FRAME_GOLDEN2
: VP56_FRAME_GOLDEN
;
380 return VP56_FRAME_NONE
;
383 static void vp78_reset_probability_tables(VP8Context
*s
)
386 for (i
= 0; i
< 4; i
++)
387 for (j
= 0; j
< 16; j
++)
388 memcpy(s
->prob
->token
[i
][j
], vp8_token_default_probs
[i
][vp8_coeff_band
[j
]],
389 sizeof(s
->prob
->token
[i
][j
]));
392 static void vp78_update_probability_tables(VP8Context
*s
)
394 VP56RangeCoder
*c
= &s
->c
;
397 for (i
= 0; i
< 4; i
++)
398 for (j
= 0; j
< 8; j
++)
399 for (k
= 0; k
< 3; k
++)
400 for (l
= 0; l
< NUM_DCT_TOKENS
-1; l
++)
401 if (vp56_rac_get_prob_branchy(c
, vp8_token_update_probs
[i
][j
][k
][l
])) {
402 int prob
= vp8_rac_get_uint(c
, 8);
403 for (m
= 0; vp8_coeff_band_indexes
[j
][m
] >= 0; m
++)
404 s
->prob
->token
[i
][vp8_coeff_band_indexes
[j
][m
]][k
][l
] = prob
;
408 #define VP7_MVC_SIZE 17
409 #define VP8_MVC_SIZE 19
411 static void vp78_update_pred16x16_pred8x8_mvc_probabilities(VP8Context
*s
,
414 VP56RangeCoder
*c
= &s
->c
;
418 for (i
= 0; i
< 4; i
++)
419 s
->prob
->pred16x16
[i
] = vp8_rac_get_uint(c
, 8);
421 for (i
= 0; i
< 3; i
++)
422 s
->prob
->pred8x8c
[i
] = vp8_rac_get_uint(c
, 8);
424 // 17.2 MV probability update
425 for (i
= 0; i
< 2; i
++)
426 for (j
= 0; j
< mvc_size
; j
++)
427 if (vp56_rac_get_prob_branchy(c
, vp8_mv_update_prob
[i
][j
]))
428 s
->prob
->mvc
[i
][j
] = vp8_rac_get_nn(c
);
431 static void update_refs(VP8Context
*s
)
433 VP56RangeCoder
*c
= &s
->c
;
435 int update_golden
= vp8_rac_get(c
);
436 int update_altref
= vp8_rac_get(c
);
438 s
->update_golden
= ref_to_update(s
, update_golden
, VP56_FRAME_GOLDEN
);
439 s
->update_altref
= ref_to_update(s
, update_altref
, VP56_FRAME_GOLDEN2
);
442 static void copy_luma(AVFrame
*dst
, AVFrame
*src
, int width
, int height
)
446 for (j
= 1; j
< 3; j
++) {
447 for (i
= 0; i
< height
/ 2; i
++)
448 memcpy(dst
->data
[j
] + i
* dst
->linesize
[j
],
449 src
->data
[j
] + i
* src
->linesize
[j
], width
/ 2);
453 static void fade(uint8_t *dst
, uint8_t *src
,
454 int width
, int height
, ptrdiff_t linesize
,
459 for (j
= 0; j
< height
; j
++) {
460 for (i
= 0; i
< width
; i
++) {
461 uint8_t y
= src
[j
* linesize
+ i
];
462 dst
[j
* linesize
+ i
] = av_clip_uint8(y
+ ((y
* beta
) >> 8) + alpha
);
467 static int vp7_fade_frame(VP8Context
*s
, VP56RangeCoder
*c
)
469 int alpha
= (int8_t) vp8_rac_get_uint(c
, 8);
470 int beta
= (int8_t) vp8_rac_get_uint(c
, 8);
473 if (!s
->keyframe
&& (alpha
|| beta
)) {
474 int width
= s
->mb_width
* 16;
475 int height
= s
->mb_height
* 16;
478 if (!s
->framep
[VP56_FRAME_PREVIOUS
])
479 return AVERROR_INVALIDDATA
;
482 src
= s
->framep
[VP56_FRAME_PREVIOUS
]->tf
.f
;
484 /* preserve the golden frame, write a new previous frame */
485 if (s
->framep
[VP56_FRAME_GOLDEN
] == s
->framep
[VP56_FRAME_PREVIOUS
]) {
486 s
->framep
[VP56_FRAME_PREVIOUS
] = vp8_find_free_buffer(s
);
487 if ((ret
= vp8_alloc_frame(s
, s
->framep
[VP56_FRAME_PREVIOUS
], 1)) < 0)
490 dst
= s
->framep
[VP56_FRAME_PREVIOUS
]->tf
.f
;
492 copy_luma(dst
, src
, width
, height
);
495 fade(dst
->data
[0], src
->data
[0],
496 width
, height
, dst
->linesize
[0], alpha
, beta
);
502 static int vp7_decode_frame_header(VP8Context
*s
, const uint8_t *buf
, int buf_size
)
504 VP56RangeCoder
*c
= &s
->c
;
505 int part1_size
, hscale
, vscale
, i
, j
, ret
;
506 int width
= s
->avctx
->width
;
507 int height
= s
->avctx
->height
;
510 return AVERROR_INVALIDDATA
;
513 s
->profile
= (buf
[0] >> 1) & 7;
514 if (s
->profile
> 1) {
515 avpriv_request_sample(s
->avctx
, "Unknown profile %d", s
->profile
);
516 return AVERROR_INVALIDDATA
;
519 s
->keyframe
= !(buf
[0] & 1);
521 part1_size
= AV_RL24(buf
) >> 4;
523 buf
+= 4 - s
->profile
;
524 buf_size
-= 4 - s
->profile
;
526 if (buf_size
< part1_size
) {
527 return AVERROR_INVALIDDATA
;
530 memcpy(s
->put_pixels_tab
, s
->vp8dsp
.put_vp8_epel_pixels_tab
, sizeof(s
->put_pixels_tab
));
532 ff_vp56_init_range_decoder(c
, buf
, part1_size
);
534 buf_size
-= part1_size
;
536 /* A. Dimension information (keyframes only) */
538 width
= vp8_rac_get_uint(c
, 12);
539 height
= vp8_rac_get_uint(c
, 12);
540 hscale
= vp8_rac_get_uint(c
, 2);
541 vscale
= vp8_rac_get_uint(c
, 2);
542 if (hscale
|| vscale
)
543 avpriv_request_sample(s
->avctx
, "Upscaling");
545 s
->update_golden
= s
->update_altref
= VP56_FRAME_CURRENT
;
546 vp78_reset_probability_tables(s
);
547 memcpy(s
->prob
->pred16x16
, vp8_pred16x16_prob_inter
,
548 sizeof(s
->prob
->pred16x16
));
549 memcpy(s
->prob
->pred8x8c
, vp8_pred8x8c_prob_inter
,
550 sizeof(s
->prob
->pred8x8c
));
551 for (i
= 0; i
< 2; i
++)
552 memcpy(s
->prob
->mvc
[i
], vp7_mv_default_prob
[i
],
553 sizeof(vp7_mv_default_prob
[i
]));
554 memset(&s
->segmentation
, 0, sizeof(s
->segmentation
));
555 memset(&s
->lf_delta
, 0, sizeof(s
->lf_delta
));
556 memcpy(s
->prob
[0].scan
, ff_zigzag_scan
, sizeof(s
->prob
[0].scan
));
559 if (s
->keyframe
|| s
->profile
> 0)
560 memset(s
->inter_dc_pred
, 0 , sizeof(s
->inter_dc_pred
));
562 /* B. Decoding information for all four macroblock-level features */
563 for (i
= 0; i
< 4; i
++) {
564 s
->feature_enabled
[i
] = vp8_rac_get(c
);
565 if (s
->feature_enabled
[i
]) {
566 s
->feature_present_prob
[i
] = vp8_rac_get_uint(c
, 8);
568 for (j
= 0; j
< 3; j
++)
569 s
->feature_index_prob
[i
][j
] =
570 vp8_rac_get(c
) ? vp8_rac_get_uint(c
, 8) : 255;
572 if (vp7_feature_value_size
[s
->profile
][i
])
573 for (j
= 0; j
< 4; j
++)
574 s
->feature_value
[i
][j
] =
575 vp8_rac_get(c
) ? vp8_rac_get_uint(c
, vp7_feature_value_size
[s
->profile
][i
]) : 0;
579 s
->segmentation
.enabled
= 0;
580 s
->segmentation
.update_map
= 0;
581 s
->lf_delta
.enabled
= 0;
583 s
->num_coeff_partitions
= 1;
584 ff_vp56_init_range_decoder(&s
->coeff_partition
[0], buf
, buf_size
);
586 if (!s
->macroblocks_base
|| /* first frame */
587 width
!= s
->avctx
->width
|| height
!= s
->avctx
->height
||
588 (width
+ 15) / 16 != s
->mb_width
|| (height
+ 15) / 16 != s
->mb_height
) {
589 if ((ret
= vp7_update_dimensions(s
, width
, height
)) < 0)
593 /* C. Dequantization indices */
596 /* D. Golden frame update flag (a Flag) for interframes only */
598 s
->update_golden
= vp8_rac_get(c
) ? VP56_FRAME_CURRENT
: VP56_FRAME_NONE
;
599 s
->sign_bias
[VP56_FRAME_GOLDEN
] = 0;
603 s
->update_probabilities
= 1;
606 if (s
->profile
> 0) {
607 s
->update_probabilities
= vp8_rac_get(c
);
608 if (!s
->update_probabilities
)
609 s
->prob
[1] = s
->prob
[0];
612 s
->fade_present
= vp8_rac_get(c
);
615 /* E. Fading information for previous frame */
616 if (s
->fade_present
&& vp8_rac_get(c
)) {
617 if ((ret
= vp7_fade_frame(s
,c
)) < 0)
621 /* F. Loop filter type */
623 s
->filter
.simple
= vp8_rac_get(c
);
625 /* G. DCT coefficient ordering specification */
627 for (i
= 1; i
< 16; i
++)
628 s
->prob
[0].scan
[i
] = ff_zigzag_scan
[vp8_rac_get_uint(c
, 4)];
630 /* H. Loop filter levels */
632 s
->filter
.simple
= vp8_rac_get(c
);
633 s
->filter
.level
= vp8_rac_get_uint(c
, 6);
634 s
->filter
.sharpness
= vp8_rac_get_uint(c
, 3);
636 /* I. DCT coefficient probability update; 13.3 Token Probability Updates */
637 vp78_update_probability_tables(s
);
639 s
->mbskip_enabled
= 0;
641 /* J. The remaining frame header data occurs ONLY FOR INTERFRAMES */
643 s
->prob
->intra
= vp8_rac_get_uint(c
, 8);
644 s
->prob
->last
= vp8_rac_get_uint(c
, 8);
645 vp78_update_pred16x16_pred8x8_mvc_probabilities(s
, VP7_MVC_SIZE
);
651 static int vp8_decode_frame_header(VP8Context
*s
, const uint8_t *buf
, int buf_size
)
653 VP56RangeCoder
*c
= &s
->c
;
654 int header_size
, hscale
, vscale
, ret
;
655 int width
= s
->avctx
->width
;
656 int height
= s
->avctx
->height
;
658 s
->keyframe
= !(buf
[0] & 1);
659 s
->profile
= (buf
[0]>>1) & 7;
660 s
->invisible
= !(buf
[0] & 0x10);
661 header_size
= AV_RL24(buf
) >> 5;
665 s
->header_partition_size
= header_size
;
668 av_log(s
->avctx
, AV_LOG_WARNING
, "Unknown profile %d\n", s
->profile
);
671 memcpy(s
->put_pixels_tab
, s
->vp8dsp
.put_vp8_epel_pixels_tab
,
672 sizeof(s
->put_pixels_tab
));
673 else // profile 1-3 use bilinear, 4+ aren't defined so whatever
674 memcpy(s
->put_pixels_tab
, s
->vp8dsp
.put_vp8_bilinear_pixels_tab
,
675 sizeof(s
->put_pixels_tab
));
677 if (header_size
> buf_size
- 7 * s
->keyframe
) {
678 av_log(s
->avctx
, AV_LOG_ERROR
, "Header size larger than data provided\n");
679 return AVERROR_INVALIDDATA
;
683 if (AV_RL24(buf
) != 0x2a019d) {
684 av_log(s
->avctx
, AV_LOG_ERROR
,
685 "Invalid start code 0x%x\n", AV_RL24(buf
));
686 return AVERROR_INVALIDDATA
;
688 width
= AV_RL16(buf
+ 3) & 0x3fff;
689 height
= AV_RL16(buf
+ 5) & 0x3fff;
690 hscale
= buf
[4] >> 6;
691 vscale
= buf
[6] >> 6;
695 if (hscale
|| vscale
)
696 avpriv_request_sample(s
->avctx
, "Upscaling");
698 s
->update_golden
= s
->update_altref
= VP56_FRAME_CURRENT
;
699 vp78_reset_probability_tables(s
);
700 memcpy(s
->prob
->pred16x16
, vp8_pred16x16_prob_inter
,
701 sizeof(s
->prob
->pred16x16
));
702 memcpy(s
->prob
->pred8x8c
, vp8_pred8x8c_prob_inter
,
703 sizeof(s
->prob
->pred8x8c
));
704 memcpy(s
->prob
->mvc
, vp8_mv_default_prob
,
705 sizeof(s
->prob
->mvc
));
706 memset(&s
->segmentation
, 0, sizeof(s
->segmentation
));
707 memset(&s
->lf_delta
, 0, sizeof(s
->lf_delta
));
710 ff_vp56_init_range_decoder(c
, buf
, header_size
);
712 buf_size
-= header_size
;
715 s
->colorspace
= vp8_rac_get(c
);
717 av_log(s
->avctx
, AV_LOG_WARNING
, "Unspecified colorspace\n");
718 s
->fullrange
= vp8_rac_get(c
);
721 if ((s
->segmentation
.enabled
= vp8_rac_get(c
)))
722 parse_segment_info(s
);
724 s
->segmentation
.update_map
= 0; // FIXME: move this to some init function?
726 s
->filter
.simple
= vp8_rac_get(c
);
727 s
->filter
.level
= vp8_rac_get_uint(c
, 6);
728 s
->filter
.sharpness
= vp8_rac_get_uint(c
, 3);
730 if ((s
->lf_delta
.enabled
= vp8_rac_get(c
))) {
731 s
->lf_delta
.update
= vp8_rac_get(c
);
732 if (s
->lf_delta
.update
)
736 if (setup_partitions(s
, buf
, buf_size
)) {
737 av_log(s
->avctx
, AV_LOG_ERROR
, "Invalid partitions\n");
738 return AVERROR_INVALIDDATA
;
741 if (!s
->macroblocks_base
|| /* first frame */
742 width
!= s
->avctx
->width
|| height
!= s
->avctx
->height
)
743 if ((ret
= vp8_update_dimensions(s
, width
, height
)) < 0)
750 s
->sign_bias
[VP56_FRAME_GOLDEN
] = vp8_rac_get(c
);
751 s
->sign_bias
[VP56_FRAME_GOLDEN2
/* altref */] = vp8_rac_get(c
);
754 // if we aren't saving this frame's probabilities for future frames,
755 // make a copy of the current probabilities
756 if (!(s
->update_probabilities
= vp8_rac_get(c
)))
757 s
->prob
[1] = s
->prob
[0];
759 s
->update_last
= s
->keyframe
|| vp8_rac_get(c
);
761 vp78_update_probability_tables(s
);
763 if ((s
->mbskip_enabled
= vp8_rac_get(c
)))
764 s
->prob
->mbskip
= vp8_rac_get_uint(c
, 8);
767 s
->prob
->intra
= vp8_rac_get_uint(c
, 8);
768 s
->prob
->last
= vp8_rac_get_uint(c
, 8);
769 s
->prob
->golden
= vp8_rac_get_uint(c
, 8);
770 vp78_update_pred16x16_pred8x8_mvc_probabilities(s
, VP8_MVC_SIZE
);
773 // Record the entropy coder state here so that hwaccels can use it.
774 s
->c
.code_word
= vp56_rac_renorm(&s
->c
);
775 s
->coder_state_at_header_end
.input
= s
->c
.buffer
- (-s
->c
.bits
/ 8);
776 s
->coder_state_at_header_end
.range
= s
->c
.high
;
777 s
->coder_state_at_header_end
.value
= s
->c
.code_word
>> 16;
778 s
->coder_state_at_header_end
.bit_count
= -s
->c
.bits
% 8;
783 static av_always_inline
784 void clamp_mv(VP8Context
*s
, VP56mv
*dst
, const VP56mv
*src
)
786 dst
->x
= av_clip(src
->x
, s
->mv_min
.x
, s
->mv_max
.x
);
787 dst
->y
= av_clip(src
->y
, s
->mv_min
.y
, s
->mv_max
.y
);
791 * Motion vector coding, 17.1.
793 static int read_mv_component(VP56RangeCoder
*c
, const uint8_t *p
, int vp7
)
797 if (vp56_rac_get_prob_branchy(c
, p
[0])) {
800 for (i
= 0; i
< 3; i
++)
801 x
+= vp56_rac_get_prob(c
, p
[9 + i
]) << i
;
802 for (i
= (vp7
? 7 : 9); i
> 3; i
--)
803 x
+= vp56_rac_get_prob(c
, p
[9 + i
]) << i
;
804 if (!(x
& (vp7
? 0xF0 : 0xFFF0)) || vp56_rac_get_prob(c
, p
[12]))
808 const uint8_t *ps
= p
+ 2;
809 bit
= vp56_rac_get_prob(c
, *ps
);
812 bit
= vp56_rac_get_prob(c
, *ps
);
815 x
+= vp56_rac_get_prob(c
, *ps
);
818 return (x
&& vp56_rac_get_prob(c
, p
[1])) ? -x
: x
;
821 static av_always_inline
822 const uint8_t *get_submv_prob(uint32_t left
, uint32_t top
, int is_vp7
)
825 return vp7_submv_prob
;
828 return vp8_submv_prob
[4 - !!left
];
830 return vp8_submv_prob
[2];
831 return vp8_submv_prob
[1 - !!left
];
835 * Split motion vector prediction, 16.4.
836 * @returns the number of motion vectors parsed (2, 4 or 16)
838 static av_always_inline
839 int decode_splitmvs(VP8Context
*s
, VP56RangeCoder
*c
, VP8Macroblock
*mb
,
840 int layout
, int is_vp7
)
844 VP8Macroblock
*top_mb
;
845 VP8Macroblock
*left_mb
= &mb
[-1];
846 const uint8_t *mbsplits_left
= vp8_mbsplits
[left_mb
->partitioning
];
847 const uint8_t *mbsplits_top
, *mbsplits_cur
, *firstidx
;
849 VP56mv
*left_mv
= left_mb
->bmv
;
850 VP56mv
*cur_mv
= mb
->bmv
;
852 if (!layout
) // layout is inlined, s->mb_layout is not
855 top_mb
= &mb
[-s
->mb_width
- 1];
856 mbsplits_top
= vp8_mbsplits
[top_mb
->partitioning
];
857 top_mv
= top_mb
->bmv
;
859 if (vp56_rac_get_prob_branchy(c
, vp8_mbsplit_prob
[0])) {
860 if (vp56_rac_get_prob_branchy(c
, vp8_mbsplit_prob
[1]))
861 part_idx
= VP8_SPLITMVMODE_16x8
+ vp56_rac_get_prob(c
, vp8_mbsplit_prob
[2]);
863 part_idx
= VP8_SPLITMVMODE_8x8
;
865 part_idx
= VP8_SPLITMVMODE_4x4
;
868 num
= vp8_mbsplit_count
[part_idx
];
869 mbsplits_cur
= vp8_mbsplits
[part_idx
],
870 firstidx
= vp8_mbfirstidx
[part_idx
];
871 mb
->partitioning
= part_idx
;
873 for (n
= 0; n
< num
; n
++) {
875 uint32_t left
, above
;
876 const uint8_t *submv_prob
;
879 left
= AV_RN32A(&left_mv
[mbsplits_left
[k
+ 3]]);
881 left
= AV_RN32A(&cur_mv
[mbsplits_cur
[k
- 1]]);
883 above
= AV_RN32A(&top_mv
[mbsplits_top
[k
+ 12]]);
885 above
= AV_RN32A(&cur_mv
[mbsplits_cur
[k
- 4]]);
887 submv_prob
= get_submv_prob(left
, above
, is_vp7
);
889 if (vp56_rac_get_prob_branchy(c
, submv_prob
[0])) {
890 if (vp56_rac_get_prob_branchy(c
, submv_prob
[1])) {
891 if (vp56_rac_get_prob_branchy(c
, submv_prob
[2])) {
892 mb
->bmv
[n
].y
= mb
->mv
.y
+
893 read_mv_component(c
, s
->prob
->mvc
[0], is_vp7
);
894 mb
->bmv
[n
].x
= mb
->mv
.x
+
895 read_mv_component(c
, s
->prob
->mvc
[1], is_vp7
);
897 AV_ZERO32(&mb
->bmv
[n
]);
900 AV_WN32A(&mb
->bmv
[n
], above
);
903 AV_WN32A(&mb
->bmv
[n
], left
);
911 * The vp7 reference decoder uses a padding macroblock column (added to right
912 * edge of the frame) to guard against illegal macroblock offsets. The
913 * algorithm has bugs that permit offsets to straddle the padding column.
914 * This function replicates those bugs.
916 * @param[out] edge_x macroblock x address
917 * @param[out] edge_y macroblock y address
919 * @return macroblock offset legal (boolean)
921 static int vp7_calculate_mb_offset(int mb_x
, int mb_y
, int mb_width
,
922 int xoffset
, int yoffset
, int boundary
,
923 int *edge_x
, int *edge_y
)
925 int vwidth
= mb_width
+ 1;
926 int new = (mb_y
+ yoffset
) * vwidth
+ mb_x
+ xoffset
;
927 if (new < boundary
|| new % vwidth
== vwidth
- 1)
929 *edge_y
= new / vwidth
;
930 *edge_x
= new % vwidth
;
934 static const VP56mv
*get_bmv_ptr(const VP8Macroblock
*mb
, int subblock
)
936 return &mb
->bmv
[mb
->mode
== VP8_MVMODE_SPLIT
? vp8_mbsplits
[mb
->partitioning
][subblock
] : 0];
939 static av_always_inline
940 void vp7_decode_mvs(VP8Context
*s
, VP8Macroblock
*mb
,
941 int mb_x
, int mb_y
, int layout
)
943 VP8Macroblock
*mb_edge
[12];
944 enum { CNT_ZERO
, CNT_NEAREST
, CNT_NEAR
};
945 enum { VP8_EDGE_TOP
, VP8_EDGE_LEFT
, VP8_EDGE_TOPLEFT
};
948 uint8_t cnt
[3] = { 0 };
949 VP56RangeCoder
*c
= &s
->c
;
952 AV_ZERO32(&near_mv
[0]);
953 AV_ZERO32(&near_mv
[1]);
954 AV_ZERO32(&near_mv
[2]);
956 for (i
= 0; i
< VP7_MV_PRED_COUNT
; i
++) {
957 const VP7MVPred
* pred
= &vp7_mv_pred
[i
];
960 if (vp7_calculate_mb_offset(mb_x
, mb_y
, s
->mb_width
, pred
->xoffset
,
961 pred
->yoffset
, !s
->profile
, &edge_x
, &edge_y
)) {
962 VP8Macroblock
*edge
= mb_edge
[i
] = (s
->mb_layout
== 1)
963 ? s
->macroblocks_base
+ 1 + edge_x
+
964 (s
->mb_width
+ 1) * (edge_y
+ 1)
965 : s
->macroblocks
+ edge_x
+
966 (s
->mb_height
- edge_y
- 1) * 2;
967 uint32_t mv
= AV_RN32A(get_bmv_ptr(edge
, vp7_mv_pred
[i
].subblock
));
969 if (AV_RN32A(&near_mv
[CNT_NEAREST
])) {
970 if (mv
== AV_RN32A(&near_mv
[CNT_NEAREST
])) {
972 } else if (AV_RN32A(&near_mv
[CNT_NEAR
])) {
973 if (mv
!= AV_RN32A(&near_mv
[CNT_NEAR
]))
977 AV_WN32A(&near_mv
[CNT_NEAR
], mv
);
981 AV_WN32A(&near_mv
[CNT_NEAREST
], mv
);
990 cnt
[idx
] += vp7_mv_pred
[i
].score
;
993 mb
->partitioning
= VP8_SPLITMVMODE_NONE
;
995 if (vp56_rac_get_prob_branchy(c
, vp7_mode_contexts
[cnt
[CNT_ZERO
]][0])) {
996 mb
->mode
= VP8_MVMODE_MV
;
998 if (vp56_rac_get_prob_branchy(c
, vp7_mode_contexts
[cnt
[CNT_NEAREST
]][1])) {
1000 if (vp56_rac_get_prob_branchy(c
, vp7_mode_contexts
[cnt
[CNT_NEAR
]][2])) {
1002 if (cnt
[CNT_NEAREST
] > cnt
[CNT_NEAR
])
1003 AV_WN32A(&mb
->mv
, cnt
[CNT_ZERO
] > cnt
[CNT_NEAREST
] ? 0 : AV_RN32A(&near_mv
[CNT_NEAREST
]));
1005 AV_WN32A(&mb
->mv
, cnt
[CNT_ZERO
] > cnt
[CNT_NEAR
] ? 0 : AV_RN32A(&near_mv
[CNT_NEAR
]));
1007 if (vp56_rac_get_prob_branchy(c
, vp7_mode_contexts
[cnt
[CNT_NEAR
]][3])) {
1008 mb
->mode
= VP8_MVMODE_SPLIT
;
1009 mb
->mv
= mb
->bmv
[decode_splitmvs(s
, c
, mb
, layout
, IS_VP7
) - 1];
1011 mb
->mv
.y
+= read_mv_component(c
, s
->prob
->mvc
[0], IS_VP7
);
1012 mb
->mv
.x
+= read_mv_component(c
, s
->prob
->mvc
[1], IS_VP7
);
1013 mb
->bmv
[0] = mb
->mv
;
1016 mb
->mv
= near_mv
[CNT_NEAR
];
1017 mb
->bmv
[0] = mb
->mv
;
1020 mb
->mv
= near_mv
[CNT_NEAREST
];
1021 mb
->bmv
[0] = mb
->mv
;
1024 mb
->mode
= VP8_MVMODE_ZERO
;
1026 mb
->bmv
[0] = mb
->mv
;
1030 static av_always_inline
1031 void vp8_decode_mvs(VP8Context
*s
, VP8Macroblock
*mb
,
1032 int mb_x
, int mb_y
, int layout
)
1034 VP8Macroblock
*mb_edge
[3] = { 0 /* top */,
1037 enum { CNT_ZERO
, CNT_NEAREST
, CNT_NEAR
, CNT_SPLITMV
};
1038 enum { VP8_EDGE_TOP
, VP8_EDGE_LEFT
, VP8_EDGE_TOPLEFT
};
1040 int cur_sign_bias
= s
->sign_bias
[mb
->ref_frame
];
1041 int8_t *sign_bias
= s
->sign_bias
;
1043 uint8_t cnt
[4] = { 0 };
1044 VP56RangeCoder
*c
= &s
->c
;
1046 if (!layout
) { // layout is inlined (s->mb_layout is not)
1047 mb_edge
[0] = mb
+ 2;
1048 mb_edge
[2] = mb
+ 1;
1050 mb_edge
[0] = mb
- s
->mb_width
- 1;
1051 mb_edge
[2] = mb
- s
->mb_width
- 2;
1054 AV_ZERO32(&near_mv
[0]);
1055 AV_ZERO32(&near_mv
[1]);
1056 AV_ZERO32(&near_mv
[2]);
1058 /* Process MB on top, left and top-left */
1059 #define MV_EDGE_CHECK(n) \
1061 VP8Macroblock *edge = mb_edge[n]; \
1062 int edge_ref = edge->ref_frame; \
1063 if (edge_ref != VP56_FRAME_CURRENT) { \
1064 uint32_t mv = AV_RN32A(&edge->mv); \
1066 if (cur_sign_bias != sign_bias[edge_ref]) { \
1067 /* SWAR negate of the values in mv. */ \
1069 mv = ((mv & 0x7fff7fff) + \
1070 0x00010001) ^ (mv & 0x80008000); \
1072 if (!n || mv != AV_RN32A(&near_mv[idx])) \
1073 AV_WN32A(&near_mv[++idx], mv); \
1074 cnt[idx] += 1 + (n != 2); \
1076 cnt[CNT_ZERO] += 1 + (n != 2); \
1084 mb
->partitioning
= VP8_SPLITMVMODE_NONE
;
1085 if (vp56_rac_get_prob_branchy(c
, vp8_mode_contexts
[cnt
[CNT_ZERO
]][0])) {
1086 mb
->mode
= VP8_MVMODE_MV
;
1088 /* If we have three distinct MVs, merge first and last if they're the same */
1089 if (cnt
[CNT_SPLITMV
] &&
1090 AV_RN32A(&near_mv
[1 + VP8_EDGE_TOP
]) == AV_RN32A(&near_mv
[1 + VP8_EDGE_TOPLEFT
]))
1091 cnt
[CNT_NEAREST
] += 1;
1093 /* Swap near and nearest if necessary */
1094 if (cnt
[CNT_NEAR
] > cnt
[CNT_NEAREST
]) {
1095 FFSWAP(uint8_t, cnt
[CNT_NEAREST
], cnt
[CNT_NEAR
]);
1096 FFSWAP( VP56mv
, near_mv
[CNT_NEAREST
], near_mv
[CNT_NEAR
]);
1099 if (vp56_rac_get_prob_branchy(c
, vp8_mode_contexts
[cnt
[CNT_NEAREST
]][1])) {
1100 if (vp56_rac_get_prob_branchy(c
, vp8_mode_contexts
[cnt
[CNT_NEAR
]][2])) {
1101 /* Choose the best mv out of 0,0 and the nearest mv */
1102 clamp_mv(s
, &mb
->mv
, &near_mv
[CNT_ZERO
+ (cnt
[CNT_NEAREST
] >= cnt
[CNT_ZERO
])]);
1103 cnt
[CNT_SPLITMV
] = ((mb_edge
[VP8_EDGE_LEFT
]->mode
== VP8_MVMODE_SPLIT
) +
1104 (mb_edge
[VP8_EDGE_TOP
]->mode
== VP8_MVMODE_SPLIT
)) * 2 +
1105 (mb_edge
[VP8_EDGE_TOPLEFT
]->mode
== VP8_MVMODE_SPLIT
);
1107 if (vp56_rac_get_prob_branchy(c
, vp8_mode_contexts
[cnt
[CNT_SPLITMV
]][3])) {
1108 mb
->mode
= VP8_MVMODE_SPLIT
;
1109 mb
->mv
= mb
->bmv
[decode_splitmvs(s
, c
, mb
, layout
, IS_VP8
) - 1];
1111 mb
->mv
.y
+= read_mv_component(c
, s
->prob
->mvc
[0], IS_VP8
);
1112 mb
->mv
.x
+= read_mv_component(c
, s
->prob
->mvc
[1], IS_VP8
);
1113 mb
->bmv
[0] = mb
->mv
;
1116 clamp_mv(s
, &mb
->mv
, &near_mv
[CNT_NEAR
]);
1117 mb
->bmv
[0] = mb
->mv
;
1120 clamp_mv(s
, &mb
->mv
, &near_mv
[CNT_NEAREST
]);
1121 mb
->bmv
[0] = mb
->mv
;
1124 mb
->mode
= VP8_MVMODE_ZERO
;
1126 mb
->bmv
[0] = mb
->mv
;
1130 static av_always_inline
1131 void decode_intra4x4_modes(VP8Context
*s
, VP56RangeCoder
*c
, VP8Macroblock
*mb
,
1132 int mb_x
, int keyframe
, int layout
)
1134 uint8_t *intra4x4
= mb
->intra4x4_pred_mode_mb
;
1137 VP8Macroblock
*mb_top
= mb
- s
->mb_width
- 1;
1138 memcpy(mb
->intra4x4_pred_mode_top
, mb_top
->intra4x4_pred_mode_top
, 4);
1143 uint8_t *const left
= s
->intra4x4_pred_mode_left
;
1145 top
= mb
->intra4x4_pred_mode_top
;
1147 top
= s
->intra4x4_pred_mode_top
+ 4 * mb_x
;
1148 for (y
= 0; y
< 4; y
++) {
1149 for (x
= 0; x
< 4; x
++) {
1151 ctx
= vp8_pred4x4_prob_intra
[top
[x
]][left
[y
]];
1152 *intra4x4
= vp8_rac_get_tree(c
, vp8_pred4x4_tree
, ctx
);
1153 left
[y
] = top
[x
] = *intra4x4
;
1159 for (i
= 0; i
< 16; i
++)
1160 intra4x4
[i
] = vp8_rac_get_tree(c
, vp8_pred4x4_tree
,
1161 vp8_pred4x4_prob_inter
);
1165 static av_always_inline
1166 void decode_mb_mode(VP8Context
*s
, VP8Macroblock
*mb
, int mb_x
, int mb_y
,
1167 uint8_t *segment
, uint8_t *ref
, int layout
, int is_vp7
)
1169 VP56RangeCoder
*c
= &s
->c
;
1170 static const char * const vp7_feature_name
[] = { "q-index",
1172 "partial-golden-update",
1177 for (i
= 0; i
< 4; i
++) {
1178 if (s
->feature_enabled
[i
]) {
1179 if (vp56_rac_get_prob(c
, s
->feature_present_prob
[i
])) {
1180 int index
= vp8_rac_get_tree(c
, vp7_feature_index_tree
,
1181 s
->feature_index_prob
[i
]);
1182 av_log(s
->avctx
, AV_LOG_WARNING
,
1183 "Feature %s present in macroblock (value 0x%x)\n",
1184 vp7_feature_name
[i
], s
->feature_value
[i
][index
]);
1188 } else if (s
->segmentation
.update_map
)
1189 *segment
= vp8_rac_get_tree(c
, vp8_segmentid_tree
, s
->prob
->segmentid
);
1190 else if (s
->segmentation
.enabled
)
1191 *segment
= ref
? *ref
: *segment
;
1192 mb
->segment
= *segment
;
1194 mb
->skip
= s
->mbskip_enabled
? vp56_rac_get_prob(c
, s
->prob
->mbskip
) : 0;
1197 mb
->mode
= vp8_rac_get_tree(c
, vp8_pred16x16_tree_intra
,
1198 vp8_pred16x16_prob_intra
);
1200 if (mb
->mode
== MODE_I4x4
) {
1201 decode_intra4x4_modes(s
, c
, mb
, mb_x
, 1, layout
);
1203 const uint32_t modes
= (is_vp7
? vp7_pred4x4_mode
1204 : vp8_pred4x4_mode
)[mb
->mode
] * 0x01010101u
;
1205 if (s
->mb_layout
== 1)
1206 AV_WN32A(mb
->intra4x4_pred_mode_top
, modes
);
1208 AV_WN32A(s
->intra4x4_pred_mode_top
+ 4 * mb_x
, modes
);
1209 AV_WN32A(s
->intra4x4_pred_mode_left
, modes
);
1212 mb
->chroma_pred_mode
= vp8_rac_get_tree(c
, vp8_pred8x8c_tree
,
1213 vp8_pred8x8c_prob_intra
);
1214 mb
->ref_frame
= VP56_FRAME_CURRENT
;
1215 } else if (vp56_rac_get_prob_branchy(c
, s
->prob
->intra
)) {
1217 if (vp56_rac_get_prob_branchy(c
, s
->prob
->last
))
1219 (!is_vp7
&& vp56_rac_get_prob(c
, s
->prob
->golden
)) ? VP56_FRAME_GOLDEN2
/* altref */
1220 : VP56_FRAME_GOLDEN
;
1222 mb
->ref_frame
= VP56_FRAME_PREVIOUS
;
1223 s
->ref_count
[mb
->ref_frame
- 1]++;
1225 // motion vectors, 16.3
1227 vp7_decode_mvs(s
, mb
, mb_x
, mb_y
, layout
);
1229 vp8_decode_mvs(s
, mb
, mb_x
, mb_y
, layout
);
1232 mb
->mode
= vp8_rac_get_tree(c
, vp8_pred16x16_tree_inter
, s
->prob
->pred16x16
);
1234 if (mb
->mode
== MODE_I4x4
)
1235 decode_intra4x4_modes(s
, c
, mb
, mb_x
, 0, layout
);
1237 mb
->chroma_pred_mode
= vp8_rac_get_tree(c
, vp8_pred8x8c_tree
,
1239 mb
->ref_frame
= VP56_FRAME_CURRENT
;
1240 mb
->partitioning
= VP8_SPLITMVMODE_NONE
;
1241 AV_ZERO32(&mb
->bmv
[0]);
1246 * @param r arithmetic bitstream reader context
1247 * @param block destination for block coefficients
1248 * @param probs probabilities to use when reading trees from the bitstream
1249 * @param i initial coeff index, 0 unless a separate DC block is coded
1250 * @param qmul array holding the dc/ac dequant factor at position 0/1
1252 * @return 0 if no coeffs were decoded
1253 * otherwise, the index of the last coeff decoded plus one
1255 static av_always_inline
1256 int decode_block_coeffs_internal(VP56RangeCoder
*r
, int16_t block
[16],
1257 uint8_t probs
[16][3][NUM_DCT_TOKENS
- 1],
1258 int i
, uint8_t *token_prob
, int16_t qmul
[2],
1259 const uint8_t scan
[16], int vp7
)
1261 VP56RangeCoder c
= *r
;
1266 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[0])) // DCT_EOB
1270 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[1])) { // DCT_0
1272 break; // invalid input; blocks should end with EOB
1273 token_prob
= probs
[i
][0];
1279 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[2])) { // DCT_1
1281 token_prob
= probs
[i
+ 1][1];
1283 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[3])) { // DCT 2,3,4
1284 coeff
= vp56_rac_get_prob_branchy(&c
, token_prob
[4]);
1286 coeff
+= vp56_rac_get_prob(&c
, token_prob
[5]);
1290 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[6])) {
1291 if (!vp56_rac_get_prob_branchy(&c
, token_prob
[7])) { // DCT_CAT1
1292 coeff
= 5 + vp56_rac_get_prob(&c
, vp8_dct_cat1_prob
[0]);
1293 } else { // DCT_CAT2
1295 coeff
+= vp56_rac_get_prob(&c
, vp8_dct_cat2_prob
[0]) << 1;
1296 coeff
+= vp56_rac_get_prob(&c
, vp8_dct_cat2_prob
[1]);
1298 } else { // DCT_CAT3 and up
1299 int a
= vp56_rac_get_prob(&c
, token_prob
[8]);
1300 int b
= vp56_rac_get_prob(&c
, token_prob
[9 + a
]);
1301 int cat
= (a
<< 1) + b
;
1302 coeff
= 3 + (8 << cat
);
1303 coeff
+= vp8_rac_get_coeff(&c
, ff_vp8_dct_cat_prob
[cat
]);
1306 token_prob
= probs
[i
+ 1][2];
1308 block
[scan
[i
]] = (vp8_rac_get(&c
) ? -coeff
: coeff
) * qmul
[!!i
];
1315 static av_always_inline
1316 int inter_predict_dc(int16_t block
[16], int16_t pred
[2])
1318 int16_t dc
= block
[0];
1326 if (!pred
[0] | !dc
| ((int32_t)pred
[0] ^ (int32_t)dc
) >> 31) {
1327 block
[0] = pred
[0] = dc
;
1332 block
[0] = pred
[0] = dc
;
1338 static int vp7_decode_block_coeffs_internal(VP56RangeCoder
*r
,
1340 uint8_t probs
[16][3][NUM_DCT_TOKENS
- 1],
1341 int i
, uint8_t *token_prob
,
1343 const uint8_t scan
[16])
1345 return decode_block_coeffs_internal(r
, block
, probs
, i
,
1346 token_prob
, qmul
, scan
, IS_VP7
);
1349 #ifndef vp8_decode_block_coeffs_internal
1350 static int vp8_decode_block_coeffs_internal(VP56RangeCoder
*r
,
1352 uint8_t probs
[16][3][NUM_DCT_TOKENS
- 1],
1353 int i
, uint8_t *token_prob
,
1356 return decode_block_coeffs_internal(r
, block
, probs
, i
,
1357 token_prob
, qmul
, ff_zigzag_scan
, IS_VP8
);
1362 * @param c arithmetic bitstream reader context
1363 * @param block destination for block coefficients
1364 * @param probs probabilities to use when reading trees from the bitstream
1365 * @param i initial coeff index, 0 unless a separate DC block is coded
1366 * @param zero_nhood the initial prediction context for number of surrounding
1367 * all-zero blocks (only left/top, so 0-2)
1368 * @param qmul array holding the dc/ac dequant factor at position 0/1
1370 * @return 0 if no coeffs were decoded
1371 * otherwise, the index of the last coeff decoded plus one
1373 static av_always_inline
1374 int decode_block_coeffs(VP56RangeCoder
*c
, int16_t block
[16],
1375 uint8_t probs
[16][3][NUM_DCT_TOKENS
- 1],
1376 int i
, int zero_nhood
, int16_t qmul
[2],
1377 const uint8_t scan
[16], int vp7
)
1379 uint8_t *token_prob
= probs
[i
][zero_nhood
];
1380 if (!vp56_rac_get_prob_branchy(c
, token_prob
[0])) // DCT_EOB
1382 return vp7
? vp7_decode_block_coeffs_internal(c
, block
, probs
, i
,
1383 token_prob
, qmul
, scan
)
1384 : vp8_decode_block_coeffs_internal(c
, block
, probs
, i
,
1388 static av_always_inline
1389 void decode_mb_coeffs(VP8Context
*s
, VP8ThreadData
*td
, VP56RangeCoder
*c
,
1390 VP8Macroblock
*mb
, uint8_t t_nnz
[9], uint8_t l_nnz
[9],
1393 int i
, x
, y
, luma_start
= 0, luma_ctx
= 3;
1394 int nnz_pred
, nnz
, nnz_total
= 0;
1395 int segment
= mb
->segment
;
1398 if (mb
->mode
!= MODE_I4x4
&& (is_vp7
|| mb
->mode
!= VP8_MVMODE_SPLIT
)) {
1399 nnz_pred
= t_nnz
[8] + l_nnz
[8];
1401 // decode DC values and do hadamard
1402 nnz
= decode_block_coeffs(c
, td
->block_dc
, s
->prob
->token
[1], 0,
1403 nnz_pred
, s
->qmat
[segment
].luma_dc_qmul
,
1404 ff_zigzag_scan
, is_vp7
);
1405 l_nnz
[8] = t_nnz
[8] = !!nnz
;
1407 if (is_vp7
&& mb
->mode
> MODE_I4x4
) {
1408 nnz
|= inter_predict_dc(td
->block_dc
,
1409 s
->inter_dc_pred
[mb
->ref_frame
- 1]);
1416 s
->vp8dsp
.vp8_luma_dc_wht_dc(td
->block
, td
->block_dc
);
1418 s
->vp8dsp
.vp8_luma_dc_wht(td
->block
, td
->block_dc
);
1425 for (y
= 0; y
< 4; y
++)
1426 for (x
= 0; x
< 4; x
++) {
1427 nnz_pred
= l_nnz
[y
] + t_nnz
[x
];
1428 nnz
= decode_block_coeffs(c
, td
->block
[y
][x
],
1429 s
->prob
->token
[luma_ctx
],
1430 luma_start
, nnz_pred
,
1431 s
->qmat
[segment
].luma_qmul
,
1432 s
->prob
[0].scan
, is_vp7
);
1433 /* nnz+block_dc may be one more than the actual last index,
1434 * but we don't care */
1435 td
->non_zero_count_cache
[y
][x
] = nnz
+ block_dc
;
1436 t_nnz
[x
] = l_nnz
[y
] = !!nnz
;
1441 // TODO: what to do about dimensions? 2nd dim for luma is x,
1442 // but for chroma it's (y<<1)|x
1443 for (i
= 4; i
< 6; i
++)
1444 for (y
= 0; y
< 2; y
++)
1445 for (x
= 0; x
< 2; x
++) {
1446 nnz_pred
= l_nnz
[i
+ 2 * y
] + t_nnz
[i
+ 2 * x
];
1447 nnz
= decode_block_coeffs(c
, td
->block
[i
][(y
<< 1) + x
],
1448 s
->prob
->token
[2], 0, nnz_pred
,
1449 s
->qmat
[segment
].chroma_qmul
,
1450 s
->prob
[0].scan
, is_vp7
);
1451 td
->non_zero_count_cache
[i
][(y
<< 1) + x
] = nnz
;
1452 t_nnz
[i
+ 2 * x
] = l_nnz
[i
+ 2 * y
] = !!nnz
;
1456 // if there were no coded coeffs despite the macroblock not being marked skip,
1457 // we MUST not do the inner loop filter and should not do IDCT
1458 // Since skip isn't used for bitstream prediction, just manually set it.
1463 static av_always_inline
1464 void backup_mb_border(uint8_t *top_border
, uint8_t *src_y
,
1465 uint8_t *src_cb
, uint8_t *src_cr
,
1466 ptrdiff_t linesize
, ptrdiff_t uvlinesize
, int simple
)
1468 AV_COPY128(top_border
, src_y
+ 15 * linesize
);
1470 AV_COPY64(top_border
+ 16, src_cb
+ 7 * uvlinesize
);
1471 AV_COPY64(top_border
+ 24, src_cr
+ 7 * uvlinesize
);
1475 static av_always_inline
1476 void xchg_mb_border(uint8_t *top_border
, uint8_t *src_y
, uint8_t *src_cb
,
1477 uint8_t *src_cr
, ptrdiff_t linesize
, ptrdiff_t uvlinesize
, int mb_x
,
1478 int mb_y
, int mb_width
, int simple
, int xchg
)
1480 uint8_t *top_border_m1
= top_border
- 32; // for TL prediction
1482 src_cb
-= uvlinesize
;
1483 src_cr
-= uvlinesize
;
1485 #define XCHG(a, b, xchg) \
1493 XCHG(top_border_m1
+ 8, src_y
- 8, xchg
);
1494 XCHG(top_border
, src_y
, xchg
);
1495 XCHG(top_border
+ 8, src_y
+ 8, 1);
1496 if (mb_x
< mb_width
- 1)
1497 XCHG(top_border
+ 32, src_y
+ 16, 1);
1499 // only copy chroma for normal loop filter
1500 // or to initialize the top row to 127
1501 if (!simple
|| !mb_y
) {
1502 XCHG(top_border_m1
+ 16, src_cb
- 8, xchg
);
1503 XCHG(top_border_m1
+ 24, src_cr
- 8, xchg
);
1504 XCHG(top_border
+ 16, src_cb
, 1);
1505 XCHG(top_border
+ 24, src_cr
, 1);
1509 static av_always_inline
1510 int check_dc_pred8x8_mode(int mode
, int mb_x
, int mb_y
)
1513 return mb_y
? TOP_DC_PRED8x8
: DC_128_PRED8x8
;
1515 return mb_y
? mode
: LEFT_DC_PRED8x8
;
1518 static av_always_inline
1519 int check_tm_pred8x8_mode(int mode
, int mb_x
, int mb_y
, int vp7
)
1522 return mb_y
? VERT_PRED8x8
: (vp7
? DC_128_PRED8x8
: DC_129_PRED8x8
);
1524 return mb_y
? mode
: HOR_PRED8x8
;
1527 static av_always_inline
1528 int check_intra_pred8x8_mode_emuedge(int mode
, int mb_x
, int mb_y
, int vp7
)
1532 return check_dc_pred8x8_mode(mode
, mb_x
, mb_y
);
1534 return !mb_y
? (vp7
? DC_128_PRED8x8
: DC_127_PRED8x8
) : mode
;
1536 return !mb_x
? (vp7
? DC_128_PRED8x8
: DC_129_PRED8x8
) : mode
;
1537 case PLANE_PRED8x8
: /* TM */
1538 return check_tm_pred8x8_mode(mode
, mb_x
, mb_y
, vp7
);
1543 static av_always_inline
1544 int check_tm_pred4x4_mode(int mode
, int mb_x
, int mb_y
, int vp7
)
1547 return mb_y
? VERT_VP8_PRED
: (vp7
? DC_128_PRED
: DC_129_PRED
);
1549 return mb_y
? mode
: HOR_VP8_PRED
;
1553 static av_always_inline
1554 int check_intra_pred4x4_mode_emuedge(int mode
, int mb_x
, int mb_y
,
1555 int *copy_buf
, int vp7
)
1559 if (!mb_x
&& mb_y
) {
1564 case DIAG_DOWN_LEFT_PRED
:
1565 case VERT_LEFT_PRED
:
1566 return !mb_y
? (vp7
? DC_128_PRED
: DC_127_PRED
) : mode
;
1574 return !mb_x
? (vp7
? DC_128_PRED
: DC_129_PRED
) : mode
;
1576 return check_tm_pred4x4_mode(mode
, mb_x
, mb_y
, vp7
);
1577 case DC_PRED
: /* 4x4 DC doesn't use the same "H.264-style" exceptions
1578 * as 16x16/8x8 DC */
1579 case DIAG_DOWN_RIGHT_PRED
:
1580 case VERT_RIGHT_PRED
:
1589 static av_always_inline
1590 void intra_predict(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
[3],
1591 VP8Macroblock
*mb
, int mb_x
, int mb_y
, int is_vp7
)
1593 int x
, y
, mode
, nnz
;
1596 /* for the first row, we need to run xchg_mb_border to init the top edge
1597 * to 127 otherwise, skip it if we aren't going to deblock */
1598 if (mb_y
&& (s
->deblock_filter
|| !mb_y
) && td
->thread_nr
== 0)
1599 xchg_mb_border(s
->top_border
[mb_x
+ 1], dst
[0], dst
[1], dst
[2],
1600 s
->linesize
, s
->uvlinesize
, mb_x
, mb_y
, s
->mb_width
,
1601 s
->filter
.simple
, 1);
1603 if (mb
->mode
< MODE_I4x4
) {
1604 mode
= check_intra_pred8x8_mode_emuedge(mb
->mode
, mb_x
, mb_y
, is_vp7
);
1605 s
->hpc
.pred16x16
[mode
](dst
[0], s
->linesize
);
1607 uint8_t *ptr
= dst
[0];
1608 uint8_t *intra4x4
= mb
->intra4x4_pred_mode_mb
;
1609 const uint8_t lo
= is_vp7
? 128 : 127;
1610 const uint8_t hi
= is_vp7
? 128 : 129;
1611 uint8_t tr_top
[4] = { lo
, lo
, lo
, lo
};
1613 // all blocks on the right edge of the macroblock use bottom edge
1614 // the top macroblock for their topright edge
1615 uint8_t *tr_right
= ptr
- s
->linesize
+ 16;
1617 // if we're on the right edge of the frame, said edge is extended
1618 // from the top macroblock
1619 if (mb_y
&& mb_x
== s
->mb_width
- 1) {
1620 tr
= tr_right
[-1] * 0x01010101u
;
1621 tr_right
= (uint8_t *) &tr
;
1625 AV_ZERO128(td
->non_zero_count_cache
);
1627 for (y
= 0; y
< 4; y
++) {
1628 uint8_t *topright
= ptr
+ 4 - s
->linesize
;
1629 for (x
= 0; x
< 4; x
++) {
1631 ptrdiff_t linesize
= s
->linesize
;
1632 uint8_t *dst
= ptr
+ 4 * x
;
1633 DECLARE_ALIGNED(4, uint8_t, copy_dst
)[5 * 8];
1635 if ((y
== 0 || x
== 3) && mb_y
== 0) {
1638 topright
= tr_right
;
1640 mode
= check_intra_pred4x4_mode_emuedge(intra4x4
[x
], mb_x
+ x
,
1641 mb_y
+ y
, ©
, is_vp7
);
1643 dst
= copy_dst
+ 12;
1647 AV_WN32A(copy_dst
+ 4, lo
* 0x01010101U
);
1649 AV_COPY32(copy_dst
+ 4, ptr
+ 4 * x
- s
->linesize
);
1653 copy_dst
[3] = ptr
[4 * x
- s
->linesize
- 1];
1662 copy_dst
[11] = ptr
[4 * x
- 1];
1663 copy_dst
[19] = ptr
[4 * x
+ s
->linesize
- 1];
1664 copy_dst
[27] = ptr
[4 * x
+ s
->linesize
* 2 - 1];
1665 copy_dst
[35] = ptr
[4 * x
+ s
->linesize
* 3 - 1];
1668 s
->hpc
.pred4x4
[mode
](dst
, topright
, linesize
);
1670 AV_COPY32(ptr
+ 4 * x
, copy_dst
+ 12);
1671 AV_COPY32(ptr
+ 4 * x
+ s
->linesize
, copy_dst
+ 20);
1672 AV_COPY32(ptr
+ 4 * x
+ s
->linesize
* 2, copy_dst
+ 28);
1673 AV_COPY32(ptr
+ 4 * x
+ s
->linesize
* 3, copy_dst
+ 36);
1676 nnz
= td
->non_zero_count_cache
[y
][x
];
1679 s
->vp8dsp
.vp8_idct_dc_add(ptr
+ 4 * x
,
1680 td
->block
[y
][x
], s
->linesize
);
1682 s
->vp8dsp
.vp8_idct_add(ptr
+ 4 * x
,
1683 td
->block
[y
][x
], s
->linesize
);
1688 ptr
+= 4 * s
->linesize
;
1693 mode
= check_intra_pred8x8_mode_emuedge(mb
->chroma_pred_mode
,
1694 mb_x
, mb_y
, is_vp7
);
1695 s
->hpc
.pred8x8
[mode
](dst
[1], s
->uvlinesize
);
1696 s
->hpc
.pred8x8
[mode
](dst
[2], s
->uvlinesize
);
1698 if (mb_y
&& (s
->deblock_filter
|| !mb_y
) && td
->thread_nr
== 0)
1699 xchg_mb_border(s
->top_border
[mb_x
+ 1], dst
[0], dst
[1], dst
[2],
1700 s
->linesize
, s
->uvlinesize
, mb_x
, mb_y
, s
->mb_width
,
1701 s
->filter
.simple
, 0);
1704 static const uint8_t subpel_idx
[3][8] = {
1705 { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
1706 // also function pointer index
1707 { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
1708 { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
1714 * @param s VP8 decoding context
1715 * @param dst target buffer for block data at block position
1716 * @param ref reference picture buffer at origin (0, 0)
1717 * @param mv motion vector (relative to block position) to get pixel data from
1718 * @param x_off horizontal position of block from origin (0, 0)
1719 * @param y_off vertical position of block from origin (0, 0)
1720 * @param block_w width of block (16, 8 or 4)
1721 * @param block_h height of block (always same as block_w)
1722 * @param width width of src/dst plane data
1723 * @param height height of src/dst plane data
1724 * @param linesize size of a single line of plane data, including padding
1725 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1727 static av_always_inline
1728 void vp8_mc_luma(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
,
1729 ThreadFrame
*ref
, const VP56mv
*mv
,
1730 int x_off
, int y_off
, int block_w
, int block_h
,
1731 int width
, int height
, ptrdiff_t linesize
,
1732 vp8_mc_func mc_func
[3][3])
1734 uint8_t *src
= ref
->f
->data
[0];
1737 ptrdiff_t src_linesize
= linesize
;
1739 int mx
= (mv
->x
<< 1) & 7, mx_idx
= subpel_idx
[0][mx
];
1740 int my
= (mv
->y
<< 1) & 7, my_idx
= subpel_idx
[0][my
];
1742 x_off
+= mv
->x
>> 2;
1743 y_off
+= mv
->y
>> 2;
1746 ff_thread_await_progress(ref
, (3 + y_off
+ block_h
+ subpel_idx
[2][my
]) >> 4, 0);
1747 src
+= y_off
* linesize
+ x_off
;
1748 if (x_off
< mx_idx
|| x_off
>= width
- block_w
- subpel_idx
[2][mx
] ||
1749 y_off
< my_idx
|| y_off
>= height
- block_h
- subpel_idx
[2][my
]) {
1750 s
->vdsp
.emulated_edge_mc(td
->edge_emu_buffer
,
1751 src
- my_idx
* linesize
- mx_idx
,
1752 EDGE_EMU_LINESIZE
, linesize
,
1753 block_w
+ subpel_idx
[1][mx
],
1754 block_h
+ subpel_idx
[1][my
],
1755 x_off
- mx_idx
, y_off
- my_idx
,
1757 src
= td
->edge_emu_buffer
+ mx_idx
+ EDGE_EMU_LINESIZE
* my_idx
;
1758 src_linesize
= EDGE_EMU_LINESIZE
;
1760 mc_func
[my_idx
][mx_idx
](dst
, linesize
, src
, src_linesize
, block_h
, mx
, my
);
1762 ff_thread_await_progress(ref
, (3 + y_off
+ block_h
) >> 4, 0);
1763 mc_func
[0][0](dst
, linesize
, src
+ y_off
* linesize
+ x_off
,
1764 linesize
, block_h
, 0, 0);
1769 * chroma MC function
1771 * @param s VP8 decoding context
1772 * @param dst1 target buffer for block data at block position (U plane)
1773 * @param dst2 target buffer for block data at block position (V plane)
1774 * @param ref reference picture buffer at origin (0, 0)
1775 * @param mv motion vector (relative to block position) to get pixel data from
1776 * @param x_off horizontal position of block from origin (0, 0)
1777 * @param y_off vertical position of block from origin (0, 0)
1778 * @param block_w width of block (16, 8 or 4)
1779 * @param block_h height of block (always same as block_w)
1780 * @param width width of src/dst plane data
1781 * @param height height of src/dst plane data
1782 * @param linesize size of a single line of plane data, including padding
1783 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
1785 static av_always_inline
1786 void vp8_mc_chroma(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst1
,
1787 uint8_t *dst2
, ThreadFrame
*ref
, const VP56mv
*mv
,
1788 int x_off
, int y_off
, int block_w
, int block_h
,
1789 int width
, int height
, ptrdiff_t linesize
,
1790 vp8_mc_func mc_func
[3][3])
1792 uint8_t *src1
= ref
->f
->data
[1], *src2
= ref
->f
->data
[2];
1795 int mx
= mv
->x
& 7, mx_idx
= subpel_idx
[0][mx
];
1796 int my
= mv
->y
& 7, my_idx
= subpel_idx
[0][my
];
1798 x_off
+= mv
->x
>> 3;
1799 y_off
+= mv
->y
>> 3;
1802 src1
+= y_off
* linesize
+ x_off
;
1803 src2
+= y_off
* linesize
+ x_off
;
1804 ff_thread_await_progress(ref
, (3 + y_off
+ block_h
+ subpel_idx
[2][my
]) >> 3, 0);
1805 if (x_off
< mx_idx
|| x_off
>= width
- block_w
- subpel_idx
[2][mx
] ||
1806 y_off
< my_idx
|| y_off
>= height
- block_h
- subpel_idx
[2][my
]) {
1807 s
->vdsp
.emulated_edge_mc(td
->edge_emu_buffer
,
1808 src1
- my_idx
* linesize
- mx_idx
,
1809 EDGE_EMU_LINESIZE
, linesize
,
1810 block_w
+ subpel_idx
[1][mx
], block_h
+ subpel_idx
[1][my
],
1811 x_off
- mx_idx
, y_off
- my_idx
, width
, height
);
1812 src1
= td
->edge_emu_buffer
+ mx_idx
+ EDGE_EMU_LINESIZE
* my_idx
;
1813 mc_func
[my_idx
][mx_idx
](dst1
, linesize
, src1
, EDGE_EMU_LINESIZE
, block_h
, mx
, my
);
1815 s
->vdsp
.emulated_edge_mc(td
->edge_emu_buffer
,
1816 src2
- my_idx
* linesize
- mx_idx
,
1817 EDGE_EMU_LINESIZE
, linesize
,
1818 block_w
+ subpel_idx
[1][mx
], block_h
+ subpel_idx
[1][my
],
1819 x_off
- mx_idx
, y_off
- my_idx
, width
, height
);
1820 src2
= td
->edge_emu_buffer
+ mx_idx
+ EDGE_EMU_LINESIZE
* my_idx
;
1821 mc_func
[my_idx
][mx_idx
](dst2
, linesize
, src2
, EDGE_EMU_LINESIZE
, block_h
, mx
, my
);
1823 mc_func
[my_idx
][mx_idx
](dst1
, linesize
, src1
, linesize
, block_h
, mx
, my
);
1824 mc_func
[my_idx
][mx_idx
](dst2
, linesize
, src2
, linesize
, block_h
, mx
, my
);
1827 ff_thread_await_progress(ref
, (3 + y_off
+ block_h
) >> 3, 0);
1828 mc_func
[0][0](dst1
, linesize
, src1
+ y_off
* linesize
+ x_off
, linesize
, block_h
, 0, 0);
1829 mc_func
[0][0](dst2
, linesize
, src2
+ y_off
* linesize
+ x_off
, linesize
, block_h
, 0, 0);
1833 static av_always_inline
1834 void vp8_mc_part(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
[3],
1835 ThreadFrame
*ref_frame
, int x_off
, int y_off
,
1836 int bx_off
, int by_off
, int block_w
, int block_h
,
1837 int width
, int height
, VP56mv
*mv
)
1842 vp8_mc_luma(s
, td
, dst
[0] + by_off
* s
->linesize
+ bx_off
,
1843 ref_frame
, mv
, x_off
+ bx_off
, y_off
+ by_off
,
1844 block_w
, block_h
, width
, height
, s
->linesize
,
1845 s
->put_pixels_tab
[block_w
== 8]);
1848 if (s
->profile
== 3) {
1849 /* this block only applies VP8; it is safe to check
1850 * only the profile, as VP7 profile <= 1 */
1862 vp8_mc_chroma(s
, td
, dst
[1] + by_off
* s
->uvlinesize
+ bx_off
,
1863 dst
[2] + by_off
* s
->uvlinesize
+ bx_off
, ref_frame
,
1864 &uvmv
, x_off
+ bx_off
, y_off
+ by_off
,
1865 block_w
, block_h
, width
, height
, s
->uvlinesize
,
1866 s
->put_pixels_tab
[1 + (block_w
== 4)]);
1869 /* Fetch pixels for estimated mv 4 macroblocks ahead.
1870 * Optimized for 64-byte cache lines. Inspired by ffh264 prefetch_motion. */
1871 static av_always_inline
1872 void prefetch_motion(VP8Context
*s
, VP8Macroblock
*mb
, int mb_x
, int mb_y
,
1875 /* Don't prefetch refs that haven't been used very often this frame. */
1876 if (s
->ref_count
[ref
- 1] > (mb_xy
>> 5)) {
1877 int x_off
= mb_x
<< 4, y_off
= mb_y
<< 4;
1878 int mx
= (mb
->mv
.x
>> 2) + x_off
+ 8;
1879 int my
= (mb
->mv
.y
>> 2) + y_off
;
1880 uint8_t **src
= s
->framep
[ref
]->tf
.f
->data
;
1881 int off
= mx
+ (my
+ (mb_x
& 3) * 4) * s
->linesize
+ 64;
1882 /* For threading, a ff_thread_await_progress here might be useful, but
1883 * it actually slows down the decoder. Since a bad prefetch doesn't
1884 * generate bad decoder output, we don't run it here. */
1885 s
->vdsp
.prefetch(src
[0] + off
, s
->linesize
, 4);
1886 off
= (mx
>> 1) + ((my
>> 1) + (mb_x
& 7)) * s
->uvlinesize
+ 64;
1887 s
->vdsp
.prefetch(src
[1] + off
, src
[2] - src
[1], 2);
1892 * Apply motion vectors to prediction buffer, chapter 18.
1894 static av_always_inline
1895 void inter_predict(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
[3],
1896 VP8Macroblock
*mb
, int mb_x
, int mb_y
)
1898 int x_off
= mb_x
<< 4, y_off
= mb_y
<< 4;
1899 int width
= 16 * s
->mb_width
, height
= 16 * s
->mb_height
;
1900 ThreadFrame
*ref
= &s
->framep
[mb
->ref_frame
]->tf
;
1901 VP56mv
*bmv
= mb
->bmv
;
1903 switch (mb
->partitioning
) {
1904 case VP8_SPLITMVMODE_NONE
:
1905 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1906 0, 0, 16, 16, width
, height
, &mb
->mv
);
1908 case VP8_SPLITMVMODE_4x4
: {
1913 for (y
= 0; y
< 4; y
++) {
1914 for (x
= 0; x
< 4; x
++) {
1915 vp8_mc_luma(s
, td
, dst
[0] + 4 * y
* s
->linesize
+ x
* 4,
1916 ref
, &bmv
[4 * y
+ x
],
1917 4 * x
+ x_off
, 4 * y
+ y_off
, 4, 4,
1918 width
, height
, s
->linesize
,
1919 s
->put_pixels_tab
[2]);
1928 for (y
= 0; y
< 2; y
++) {
1929 for (x
= 0; x
< 2; x
++) {
1930 uvmv
.x
= mb
->bmv
[2 * y
* 4 + 2 * x
].x
+
1931 mb
->bmv
[2 * y
* 4 + 2 * x
+ 1].x
+
1932 mb
->bmv
[(2 * y
+ 1) * 4 + 2 * x
].x
+
1933 mb
->bmv
[(2 * y
+ 1) * 4 + 2 * x
+ 1].x
;
1934 uvmv
.y
= mb
->bmv
[2 * y
* 4 + 2 * x
].y
+
1935 mb
->bmv
[2 * y
* 4 + 2 * x
+ 1].y
+
1936 mb
->bmv
[(2 * y
+ 1) * 4 + 2 * x
].y
+
1937 mb
->bmv
[(2 * y
+ 1) * 4 + 2 * x
+ 1].y
;
1938 uvmv
.x
= (uvmv
.x
+ 2 + FF_SIGNBIT(uvmv
.x
)) >> 2;
1939 uvmv
.y
= (uvmv
.y
+ 2 + FF_SIGNBIT(uvmv
.y
)) >> 2;
1940 if (s
->profile
== 3) {
1944 vp8_mc_chroma(s
, td
, dst
[1] + 4 * y
* s
->uvlinesize
+ x
* 4,
1945 dst
[2] + 4 * y
* s
->uvlinesize
+ x
* 4, ref
,
1946 &uvmv
, 4 * x
+ x_off
, 4 * y
+ y_off
, 4, 4,
1947 width
, height
, s
->uvlinesize
,
1948 s
->put_pixels_tab
[2]);
1953 case VP8_SPLITMVMODE_16x8
:
1954 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1955 0, 0, 16, 8, width
, height
, &bmv
[0]);
1956 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1957 0, 8, 16, 8, width
, height
, &bmv
[1]);
1959 case VP8_SPLITMVMODE_8x16
:
1960 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1961 0, 0, 8, 16, width
, height
, &bmv
[0]);
1962 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1963 8, 0, 8, 16, width
, height
, &bmv
[1]);
1965 case VP8_SPLITMVMODE_8x8
:
1966 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1967 0, 0, 8, 8, width
, height
, &bmv
[0]);
1968 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1969 8, 0, 8, 8, width
, height
, &bmv
[1]);
1970 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1971 0, 8, 8, 8, width
, height
, &bmv
[2]);
1972 vp8_mc_part(s
, td
, dst
, ref
, x_off
, y_off
,
1973 8, 8, 8, 8, width
, height
, &bmv
[3]);
1978 static av_always_inline
1979 void idct_mb(VP8Context
*s
, VP8ThreadData
*td
, uint8_t *dst
[3], VP8Macroblock
*mb
)
1983 if (mb
->mode
!= MODE_I4x4
) {
1984 uint8_t *y_dst
= dst
[0];
1985 for (y
= 0; y
< 4; y
++) {
1986 uint32_t nnz4
= AV_RL32(td
->non_zero_count_cache
[y
]);
1988 if (nnz4
& ~0x01010101) {
1989 for (x
= 0; x
< 4; x
++) {
1990 if ((uint8_t) nnz4
== 1)
1991 s
->vp8dsp
.vp8_idct_dc_add(y_dst
+ 4 * x
,
1994 else if ((uint8_t) nnz4
> 1)
1995 s
->vp8dsp
.vp8_idct_add(y_dst
+ 4 * x
,
2003 s
->vp8dsp
.vp8_idct_dc_add4y(y_dst
, td
->block
[y
], s
->linesize
);
2006 y_dst
+= 4 * s
->linesize
;
2010 for (ch
= 0; ch
< 2; ch
++) {
2011 uint32_t nnz4
= AV_RL32(td
->non_zero_count_cache
[4 + ch
]);
2013 uint8_t *ch_dst
= dst
[1 + ch
];
2014 if (nnz4
& ~0x01010101) {
2015 for (y
= 0; y
< 2; y
++) {
2016 for (x
= 0; x
< 2; x
++) {
2017 if ((uint8_t) nnz4
== 1)
2018 s
->vp8dsp
.vp8_idct_dc_add(ch_dst
+ 4 * x
,
2019 td
->block
[4 + ch
][(y
<< 1) + x
],
2021 else if ((uint8_t) nnz4
> 1)
2022 s
->vp8dsp
.vp8_idct_add(ch_dst
+ 4 * x
,
2023 td
->block
[4 + ch
][(y
<< 1) + x
],
2027 goto chroma_idct_end
;
2029 ch_dst
+= 4 * s
->uvlinesize
;
2032 s
->vp8dsp
.vp8_idct_dc_add4uv(ch_dst
, td
->block
[4 + ch
], s
->uvlinesize
);
2040 static av_always_inline
2041 void filter_level_for_mb(VP8Context
*s
, VP8Macroblock
*mb
,
2042 VP8FilterStrength
*f
, int is_vp7
)
2044 int interior_limit
, filter_level
;
2046 if (s
->segmentation
.enabled
) {
2047 filter_level
= s
->segmentation
.filter_level
[mb
->segment
];
2048 if (!s
->segmentation
.absolute_vals
)
2049 filter_level
+= s
->filter
.level
;
2051 filter_level
= s
->filter
.level
;
2053 if (s
->lf_delta
.enabled
) {
2054 filter_level
+= s
->lf_delta
.ref
[mb
->ref_frame
];
2055 filter_level
+= s
->lf_delta
.mode
[mb
->mode
];
2058 filter_level
= av_clip_uintp2(filter_level
, 6);
2060 interior_limit
= filter_level
;
2061 if (s
->filter
.sharpness
) {
2062 interior_limit
>>= (s
->filter
.sharpness
+ 3) >> 2;
2063 interior_limit
= FFMIN(interior_limit
, 9 - s
->filter
.sharpness
);
2065 interior_limit
= FFMAX(interior_limit
, 1);
2067 f
->filter_level
= filter_level
;
2068 f
->inner_limit
= interior_limit
;
2069 f
->inner_filter
= is_vp7
|| !mb
->skip
|| mb
->mode
== MODE_I4x4
||
2070 mb
->mode
== VP8_MVMODE_SPLIT
;
2073 static av_always_inline
2074 void filter_mb(VP8Context
*s
, uint8_t *dst
[3], VP8FilterStrength
*f
,
2075 int mb_x
, int mb_y
, int is_vp7
)
2077 int mbedge_lim
, bedge_lim_y
, bedge_lim_uv
, hev_thresh
;
2078 int filter_level
= f
->filter_level
;
2079 int inner_limit
= f
->inner_limit
;
2080 int inner_filter
= f
->inner_filter
;
2081 ptrdiff_t linesize
= s
->linesize
;
2082 ptrdiff_t uvlinesize
= s
->uvlinesize
;
2083 static const uint8_t hev_thresh_lut
[2][64] = {
2084 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2085 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2086 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
2088 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
2089 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2090 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2098 bedge_lim_y
= filter_level
;
2099 bedge_lim_uv
= filter_level
* 2;
2100 mbedge_lim
= filter_level
+ 2;
2103 bedge_lim_uv
= filter_level
* 2 + inner_limit
;
2104 mbedge_lim
= bedge_lim_y
+ 4;
2107 hev_thresh
= hev_thresh_lut
[s
->keyframe
][filter_level
];
2110 s
->vp8dsp
.vp8_h_loop_filter16y(dst
[0], linesize
,
2111 mbedge_lim
, inner_limit
, hev_thresh
);
2112 s
->vp8dsp
.vp8_h_loop_filter8uv(dst
[1], dst
[2], uvlinesize
,
2113 mbedge_lim
, inner_limit
, hev_thresh
);
2116 #define H_LOOP_FILTER_16Y_INNER(cond) \
2117 if (cond && inner_filter) { \
2118 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 4, linesize, \
2119 bedge_lim_y, inner_limit, \
2121 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 8, linesize, \
2122 bedge_lim_y, inner_limit, \
2124 s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0] + 12, linesize, \
2125 bedge_lim_y, inner_limit, \
2127 s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4, \
2128 uvlinesize, bedge_lim_uv, \
2129 inner_limit, hev_thresh); \
2132 H_LOOP_FILTER_16Y_INNER(!is_vp7
)
2135 s
->vp8dsp
.vp8_v_loop_filter16y(dst
[0], linesize
,
2136 mbedge_lim
, inner_limit
, hev_thresh
);
2137 s
->vp8dsp
.vp8_v_loop_filter8uv(dst
[1], dst
[2], uvlinesize
,
2138 mbedge_lim
, inner_limit
, hev_thresh
);
2142 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0] + 4 * linesize
,
2143 linesize
, bedge_lim_y
,
2144 inner_limit
, hev_thresh
);
2145 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0] + 8 * linesize
,
2146 linesize
, bedge_lim_y
,
2147 inner_limit
, hev_thresh
);
2148 s
->vp8dsp
.vp8_v_loop_filter16y_inner(dst
[0] + 12 * linesize
,
2149 linesize
, bedge_lim_y
,
2150 inner_limit
, hev_thresh
);
2151 s
->vp8dsp
.vp8_v_loop_filter8uv_inner(dst
[1] + 4 * uvlinesize
,
2152 dst
[2] + 4 * uvlinesize
,
2153 uvlinesize
, bedge_lim_uv
,
2154 inner_limit
, hev_thresh
);
2157 H_LOOP_FILTER_16Y_INNER(is_vp7
)
2160 static av_always_inline
2161 void filter_mb_simple(VP8Context
*s
, uint8_t *dst
, VP8FilterStrength
*f
,
2164 int mbedge_lim
, bedge_lim
;
2165 int filter_level
= f
->filter_level
;
2166 int inner_limit
= f
->inner_limit
;
2167 int inner_filter
= f
->inner_filter
;
2168 ptrdiff_t linesize
= s
->linesize
;
2173 bedge_lim
= 2 * filter_level
+ inner_limit
;
2174 mbedge_lim
= bedge_lim
+ 4;
2177 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
, linesize
, mbedge_lim
);
2179 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+ 4, linesize
, bedge_lim
);
2180 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+ 8, linesize
, bedge_lim
);
2181 s
->vp8dsp
.vp8_h_loop_filter_simple(dst
+ 12, linesize
, bedge_lim
);
2185 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
, linesize
, mbedge_lim
);
2187 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+ 4 * linesize
, linesize
, bedge_lim
);
2188 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+ 8 * linesize
, linesize
, bedge_lim
);
2189 s
->vp8dsp
.vp8_v_loop_filter_simple(dst
+ 12 * linesize
, linesize
, bedge_lim
);
2193 #define MARGIN (16 << 2)
2194 static av_always_inline
2195 void vp78_decode_mv_mb_modes(AVCodecContext
*avctx
, VP8Frame
*curframe
,
2196 VP8Frame
*prev_frame
, int is_vp7
)
2198 VP8Context
*s
= avctx
->priv_data
;
2201 s
->mv_min
.y
= -MARGIN
;
2202 s
->mv_max
.y
= ((s
->mb_height
- 1) << 6) + MARGIN
;
2203 for (mb_y
= 0; mb_y
< s
->mb_height
; mb_y
++) {
2204 VP8Macroblock
*mb
= s
->macroblocks_base
+
2205 ((s
->mb_width
+ 1) * (mb_y
+ 1) + 1);
2206 int mb_xy
= mb_y
* s
->mb_width
;
2208 AV_WN32A(s
->intra4x4_pred_mode_left
, DC_PRED
* 0x01010101);
2210 s
->mv_min
.x
= -MARGIN
;
2211 s
->mv_max
.x
= ((s
->mb_width
- 1) << 6) + MARGIN
;
2212 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++, mb_xy
++, mb
++) {
2214 AV_WN32A((mb
- s
->mb_width
- 1)->intra4x4_pred_mode_top
,
2215 DC_PRED
* 0x01010101);
2216 decode_mb_mode(s
, mb
, mb_x
, mb_y
, curframe
->seg_map
->data
+ mb_xy
,
2217 prev_frame
&& prev_frame
->seg_map
?
2218 prev_frame
->seg_map
->data
+ mb_xy
: NULL
, 1, is_vp7
);
2227 static void vp7_decode_mv_mb_modes(AVCodecContext
*avctx
, VP8Frame
*cur_frame
,
2228 VP8Frame
*prev_frame
)
2230 vp78_decode_mv_mb_modes(avctx
, cur_frame
, prev_frame
, IS_VP7
);
2233 static void vp8_decode_mv_mb_modes(AVCodecContext
*avctx
, VP8Frame
*cur_frame
,
2234 VP8Frame
*prev_frame
)
2236 vp78_decode_mv_mb_modes(avctx
, cur_frame
, prev_frame
, IS_VP8
);
2240 #define check_thread_pos(td, otd, mb_x_check, mb_y_check) \
2242 int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF); \
2243 if (otd->thread_mb_pos < tmp) { \
2244 pthread_mutex_lock(&otd->lock); \
2245 td->wait_mb_pos = tmp; \
2247 if (otd->thread_mb_pos >= tmp) \
2249 pthread_cond_wait(&otd->cond, &otd->lock); \
2251 td->wait_mb_pos = INT_MAX; \
2252 pthread_mutex_unlock(&otd->lock); \
2256 #define update_pos(td, mb_y, mb_x) \
2258 int pos = (mb_y << 16) | (mb_x & 0xFFFF); \
2259 int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && \
2261 int is_null = !next_td || !prev_td; \
2262 int pos_check = (is_null) ? 1 \
2263 : (next_td != td && \
2264 pos >= next_td->wait_mb_pos) || \
2266 pos >= prev_td->wait_mb_pos); \
2267 td->thread_mb_pos = pos; \
2268 if (sliced_threading && pos_check) { \
2269 pthread_mutex_lock(&td->lock); \
2270 pthread_cond_broadcast(&td->cond); \
2271 pthread_mutex_unlock(&td->lock); \
2275 #define check_thread_pos(td, otd, mb_x_check, mb_y_check)
2276 #define update_pos(td, mb_y, mb_x)
2279 static void vp8_decode_mb_row_no_filter(AVCodecContext
*avctx
, void *tdata
,
2280 int jobnr
, int threadnr
, int is_vp7
)
2282 VP8Context
*s
= avctx
->priv_data
;
2283 VP8ThreadData
*prev_td
, *next_td
, *td
= &s
->thread_data
[threadnr
];
2284 int mb_y
= td
->thread_mb_pos
>> 16;
2285 int mb_x
, mb_xy
= mb_y
* s
->mb_width
;
2286 int num_jobs
= s
->num_jobs
;
2287 VP8Frame
*curframe
= s
->curframe
, *prev_frame
= s
->prev_frame
;
2288 VP56RangeCoder
*c
= &s
->coeff_partition
[mb_y
& (s
->num_coeff_partitions
- 1)];
2291 curframe
->tf
.f
->data
[0] + 16 * mb_y
* s
->linesize
,
2292 curframe
->tf
.f
->data
[1] + 8 * mb_y
* s
->uvlinesize
,
2293 curframe
->tf
.f
->data
[2] + 8 * mb_y
* s
->uvlinesize
2298 prev_td
= &s
->thread_data
[(jobnr
+ num_jobs
- 1) % num_jobs
];
2299 if (mb_y
== s
->mb_height
- 1)
2302 next_td
= &s
->thread_data
[(jobnr
+ 1) % num_jobs
];
2303 if (s
->mb_layout
== 1)
2304 mb
= s
->macroblocks_base
+ ((s
->mb_width
+ 1) * (mb_y
+ 1) + 1);
2306 // Make sure the previous frame has read its segmentation map,
2307 // if we re-use the same map.
2308 if (prev_frame
&& s
->segmentation
.enabled
&&
2309 !s
->segmentation
.update_map
)
2310 ff_thread_await_progress(&prev_frame
->tf
, mb_y
, 0);
2311 mb
= s
->macroblocks
+ (s
->mb_height
- mb_y
- 1) * 2;
2312 memset(mb
- 1, 0, sizeof(*mb
)); // zero left macroblock
2313 AV_WN32A(s
->intra4x4_pred_mode_left
, DC_PRED
* 0x01010101);
2316 if (!is_vp7
|| mb_y
== 0)
2317 memset(td
->left_nnz
, 0, sizeof(td
->left_nnz
));
2319 s
->mv_min
.x
= -MARGIN
;
2320 s
->mv_max
.x
= ((s
->mb_width
- 1) << 6) + MARGIN
;
2322 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++, mb_xy
++, mb
++) {
2323 // Wait for previous thread to read mb_x+2, and reach mb_y-1.
2324 if (prev_td
!= td
) {
2325 if (threadnr
!= 0) {
2326 check_thread_pos(td
, prev_td
,
2327 mb_x
+ (is_vp7
? 2 : 1),
2328 mb_y
- (is_vp7
? 2 : 1));
2330 check_thread_pos(td
, prev_td
,
2331 mb_x
+ (is_vp7
? 2 : 1) + s
->mb_width
+ 3,
2332 mb_y
- (is_vp7
? 2 : 1));
2336 s
->vdsp
.prefetch(dst
[0] + (mb_x
& 3) * 4 * s
->linesize
+ 64,
2338 s
->vdsp
.prefetch(dst
[1] + (mb_x
& 7) * s
->uvlinesize
+ 64,
2339 dst
[2] - dst
[1], 2);
2342 decode_mb_mode(s
, mb
, mb_x
, mb_y
, curframe
->seg_map
->data
+ mb_xy
,
2343 prev_frame
&& prev_frame
->seg_map
?
2344 prev_frame
->seg_map
->data
+ mb_xy
: NULL
, 0, is_vp7
);
2346 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_PREVIOUS
);
2349 decode_mb_coeffs(s
, td
, c
, mb
, s
->top_nnz
[mb_x
], td
->left_nnz
, is_vp7
);
2351 if (mb
->mode
<= MODE_I4x4
)
2352 intra_predict(s
, td
, dst
, mb
, mb_x
, mb_y
, is_vp7
);
2354 inter_predict(s
, td
, dst
, mb
, mb_x
, mb_y
);
2356 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_GOLDEN
);
2359 idct_mb(s
, td
, dst
, mb
);
2361 AV_ZERO64(td
->left_nnz
);
2362 AV_WN64(s
->top_nnz
[mb_x
], 0); // array of 9, so unaligned
2364 /* Reset DC block predictors if they would exist
2365 * if the mb had coefficients */
2366 if (mb
->mode
!= MODE_I4x4
&& mb
->mode
!= VP8_MVMODE_SPLIT
) {
2367 td
->left_nnz
[8] = 0;
2368 s
->top_nnz
[mb_x
][8] = 0;
2372 if (s
->deblock_filter
)
2373 filter_level_for_mb(s
, mb
, &td
->filter_strength
[mb_x
], is_vp7
);
2375 if (s
->deblock_filter
&& num_jobs
!= 1 && threadnr
== num_jobs
- 1) {
2376 if (s
->filter
.simple
)
2377 backup_mb_border(s
->top_border
[mb_x
+ 1], dst
[0],
2378 NULL
, NULL
, s
->linesize
, 0, 1);
2380 backup_mb_border(s
->top_border
[mb_x
+ 1], dst
[0],
2381 dst
[1], dst
[2], s
->linesize
, s
->uvlinesize
, 0);
2384 prefetch_motion(s
, mb
, mb_x
, mb_y
, mb_xy
, VP56_FRAME_GOLDEN2
);
2392 if (mb_x
== s
->mb_width
+ 1) {
2393 update_pos(td
, mb_y
, s
->mb_width
+ 3);
2395 update_pos(td
, mb_y
, mb_x
);
2400 static void vp8_filter_mb_row(AVCodecContext
*avctx
, void *tdata
,
2401 int jobnr
, int threadnr
, int is_vp7
)
2403 VP8Context
*s
= avctx
->priv_data
;
2404 VP8ThreadData
*td
= &s
->thread_data
[threadnr
];
2405 int mb_x
, mb_y
= td
->thread_mb_pos
>> 16, num_jobs
= s
->num_jobs
;
2406 AVFrame
*curframe
= s
->curframe
->tf
.f
;
2408 VP8ThreadData
*prev_td
, *next_td
;
2410 curframe
->data
[0] + 16 * mb_y
* s
->linesize
,
2411 curframe
->data
[1] + 8 * mb_y
* s
->uvlinesize
,
2412 curframe
->data
[2] + 8 * mb_y
* s
->uvlinesize
2415 if (s
->mb_layout
== 1)
2416 mb
= s
->macroblocks_base
+ ((s
->mb_width
+ 1) * (mb_y
+ 1) + 1);
2418 mb
= s
->macroblocks
+ (s
->mb_height
- mb_y
- 1) * 2;
2423 prev_td
= &s
->thread_data
[(jobnr
+ num_jobs
- 1) % num_jobs
];
2424 if (mb_y
== s
->mb_height
- 1)
2427 next_td
= &s
->thread_data
[(jobnr
+ 1) % num_jobs
];
2429 for (mb_x
= 0; mb_x
< s
->mb_width
; mb_x
++, mb
++) {
2430 VP8FilterStrength
*f
= &td
->filter_strength
[mb_x
];
2432 check_thread_pos(td
, prev_td
,
2433 (mb_x
+ 1) + (s
->mb_width
+ 3), mb_y
- 1);
2435 if (next_td
!= &s
->thread_data
[0])
2436 check_thread_pos(td
, next_td
, mb_x
+ 1, mb_y
+ 1);
2438 if (num_jobs
== 1) {
2439 if (s
->filter
.simple
)
2440 backup_mb_border(s
->top_border
[mb_x
+ 1], dst
[0],
2441 NULL
, NULL
, s
->linesize
, 0, 1);
2443 backup_mb_border(s
->top_border
[mb_x
+ 1], dst
[0],
2444 dst
[1], dst
[2], s
->linesize
, s
->uvlinesize
, 0);
2447 if (s
->filter
.simple
)
2448 filter_mb_simple(s
, dst
[0], f
, mb_x
, mb_y
);
2450 filter_mb(s
, dst
, f
, mb_x
, mb_y
, is_vp7
);
2455 update_pos(td
, mb_y
, (s
->mb_width
+ 3) + mb_x
);
2459 static av_always_inline
2460 int vp78_decode_mb_row_sliced(AVCodecContext
*avctx
, void *tdata
, int jobnr
,
2461 int threadnr
, int is_vp7
)
2463 VP8Context
*s
= avctx
->priv_data
;
2464 VP8ThreadData
*td
= &s
->thread_data
[jobnr
];
2465 VP8ThreadData
*next_td
= NULL
, *prev_td
= NULL
;
2466 VP8Frame
*curframe
= s
->curframe
;
2467 int mb_y
, num_jobs
= s
->num_jobs
;
2469 td
->thread_nr
= threadnr
;
2470 for (mb_y
= jobnr
; mb_y
< s
->mb_height
; mb_y
+= num_jobs
) {
2471 if (mb_y
>= s
->mb_height
)
2473 td
->thread_mb_pos
= mb_y
<< 16;
2474 vp8_decode_mb_row_no_filter(avctx
, tdata
, jobnr
, threadnr
, is_vp7
);
2475 if (s
->deblock_filter
)
2476 vp8_filter_mb_row(avctx
, tdata
, jobnr
, threadnr
, is_vp7
);
2477 update_pos(td
, mb_y
, INT_MAX
& 0xFFFF);
2482 if (avctx
->active_thread_type
== FF_THREAD_FRAME
)
2483 ff_thread_report_progress(&curframe
->tf
, mb_y
, 0);
2489 static int vp7_decode_mb_row_sliced(AVCodecContext
*avctx
, void *tdata
,
2490 int jobnr
, int threadnr
)
2492 return vp78_decode_mb_row_sliced(avctx
, tdata
, jobnr
, threadnr
, IS_VP7
);
2495 static int vp8_decode_mb_row_sliced(AVCodecContext
*avctx
, void *tdata
,
2496 int jobnr
, int threadnr
)
2498 return vp78_decode_mb_row_sliced(avctx
, tdata
, jobnr
, threadnr
, IS_VP8
);
2501 static av_always_inline
2502 int vp78_decode_frame(AVCodecContext
*avctx
, void *data
, int *got_frame
,
2503 AVPacket
*avpkt
, int is_vp7
)
2505 VP8Context
*s
= avctx
->priv_data
;
2506 int ret
, i
, referenced
, num_jobs
;
2507 enum AVDiscard skip_thresh
;
2508 VP8Frame
*av_uninit(curframe
), *prev_frame
;
2511 ret
= vp7_decode_frame_header(s
, avpkt
->data
, avpkt
->size
);
2513 ret
= vp8_decode_frame_header(s
, avpkt
->data
, avpkt
->size
);
2518 if (s
->actually_webp
) {
2519 // avctx->pix_fmt already set in caller.
2520 } else if (!is_vp7
&& s
->pix_fmt
== AV_PIX_FMT_NONE
) {
2521 enum AVPixelFormat pix_fmts
[] = {
2522 #if CONFIG_VP8_VAAPI_HWACCEL
2529 s
->pix_fmt
= ff_get_format(s
->avctx
, pix_fmts
);
2530 if (s
->pix_fmt
< 0) {
2531 ret
= AVERROR(EINVAL
);
2534 avctx
->pix_fmt
= s
->pix_fmt
;
2537 prev_frame
= s
->framep
[VP56_FRAME_CURRENT
];
2539 referenced
= s
->update_last
|| s
->update_golden
== VP56_FRAME_CURRENT
||
2540 s
->update_altref
== VP56_FRAME_CURRENT
;
2542 skip_thresh
= !referenced
? AVDISCARD_NONREF
2543 : !s
->keyframe
? AVDISCARD_NONKEY
2546 if (avctx
->skip_frame
>= skip_thresh
) {
2548 memcpy(&s
->next_framep
[0], &s
->framep
[0], sizeof(s
->framep
[0]) * 4);
2551 s
->deblock_filter
= s
->filter
.level
&& avctx
->skip_loop_filter
< skip_thresh
;
2553 // release no longer referenced frames
2554 for (i
= 0; i
< 5; i
++)
2555 if (s
->frames
[i
].tf
.f
->data
[0] &&
2556 &s
->frames
[i
] != prev_frame
&&
2557 &s
->frames
[i
] != s
->framep
[VP56_FRAME_PREVIOUS
] &&
2558 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN
] &&
2559 &s
->frames
[i
] != s
->framep
[VP56_FRAME_GOLDEN2
])
2560 vp8_release_frame(s
, &s
->frames
[i
]);
2562 curframe
= s
->framep
[VP56_FRAME_CURRENT
] = vp8_find_free_buffer(s
);
2565 avctx
->colorspace
= AVCOL_SPC_BT470BG
;
2567 avctx
->color_range
= AVCOL_RANGE_JPEG
;
2569 avctx
->color_range
= AVCOL_RANGE_MPEG
;
2571 /* Given that arithmetic probabilities are updated every frame, it's quite
2572 * likely that the values we have on a random interframe are complete
2573 * junk if we didn't start decode on a keyframe. So just don't display
2574 * anything rather than junk. */
2575 if (!s
->keyframe
&& (!s
->framep
[VP56_FRAME_PREVIOUS
] ||
2576 !s
->framep
[VP56_FRAME_GOLDEN
] ||
2577 !s
->framep
[VP56_FRAME_GOLDEN2
])) {
2578 av_log(avctx
, AV_LOG_WARNING
,
2579 "Discarding interframe without a prior keyframe!\n");
2580 ret
= AVERROR_INVALIDDATA
;
2584 curframe
->tf
.f
->key_frame
= s
->keyframe
;
2585 curframe
->tf
.f
->pict_type
= s
->keyframe
? AV_PICTURE_TYPE_I
2586 : AV_PICTURE_TYPE_P
;
2587 if ((ret
= vp8_alloc_frame(s
, curframe
, referenced
))) {
2588 av_log(avctx
, AV_LOG_ERROR
, "get_buffer() failed!\n");
2592 // check if golden and altref are swapped
2593 if (s
->update_altref
!= VP56_FRAME_NONE
)
2594 s
->next_framep
[VP56_FRAME_GOLDEN2
] = s
->framep
[s
->update_altref
];
2596 s
->next_framep
[VP56_FRAME_GOLDEN2
] = s
->framep
[VP56_FRAME_GOLDEN2
];
2598 if (s
->update_golden
!= VP56_FRAME_NONE
)
2599 s
->next_framep
[VP56_FRAME_GOLDEN
] = s
->framep
[s
->update_golden
];
2601 s
->next_framep
[VP56_FRAME_GOLDEN
] = s
->framep
[VP56_FRAME_GOLDEN
];
2604 s
->next_framep
[VP56_FRAME_PREVIOUS
] = curframe
;
2606 s
->next_framep
[VP56_FRAME_PREVIOUS
] = s
->framep
[VP56_FRAME_PREVIOUS
];
2608 s
->next_framep
[VP56_FRAME_CURRENT
] = curframe
;
2610 ff_thread_finish_setup(avctx
);
2612 if (avctx
->hwaccel
) {
2613 ret
= avctx
->hwaccel
->start_frame(avctx
, avpkt
->data
, avpkt
->size
);
2617 ret
= avctx
->hwaccel
->decode_slice(avctx
, avpkt
->data
, avpkt
->size
);
2621 ret
= avctx
->hwaccel
->end_frame(avctx
);
2626 s
->linesize
= curframe
->tf
.f
->linesize
[0];
2627 s
->uvlinesize
= curframe
->tf
.f
->linesize
[1];
2629 memset(s
->top_nnz
, 0, s
->mb_width
* sizeof(*s
->top_nnz
));
2630 /* Zero macroblock structures for top/top-left prediction
2631 * from outside the frame. */
2633 memset(s
->macroblocks
+ s
->mb_height
* 2 - 1, 0,
2634 (s
->mb_width
+ 1) * sizeof(*s
->macroblocks
));
2635 if (!s
->mb_layout
&& s
->keyframe
)
2636 memset(s
->intra4x4_pred_mode_top
, DC_PRED
, s
->mb_width
* 4);
2638 memset(s
->ref_count
, 0, sizeof(s
->ref_count
));
2640 if (s
->mb_layout
== 1) {
2641 // Make sure the previous frame has read its segmentation map,
2642 // if we re-use the same map.
2643 if (prev_frame
&& s
->segmentation
.enabled
&&
2644 !s
->segmentation
.update_map
)
2645 ff_thread_await_progress(&prev_frame
->tf
, 1, 0);
2647 vp7_decode_mv_mb_modes(avctx
, curframe
, prev_frame
);
2649 vp8_decode_mv_mb_modes(avctx
, curframe
, prev_frame
);
2652 if (avctx
->active_thread_type
== FF_THREAD_FRAME
)
2655 num_jobs
= FFMIN(s
->num_coeff_partitions
, avctx
->thread_count
);
2656 s
->num_jobs
= num_jobs
;
2657 s
->curframe
= curframe
;
2658 s
->prev_frame
= prev_frame
;
2659 s
->mv_min
.y
= -MARGIN
;
2660 s
->mv_max
.y
= ((s
->mb_height
- 1) << 6) + MARGIN
;
2661 for (i
= 0; i
< MAX_THREADS
; i
++) {
2662 s
->thread_data
[i
].thread_mb_pos
= 0;
2663 s
->thread_data
[i
].wait_mb_pos
= INT_MAX
;
2667 avctx
->execute2(avctx
, vp7_decode_mb_row_sliced
, s
->thread_data
, NULL
,
2670 avctx
->execute2(avctx
, vp8_decode_mb_row_sliced
, s
->thread_data
, NULL
,
2674 ff_thread_report_progress(&curframe
->tf
, INT_MAX
, 0);
2675 memcpy(&s
->framep
[0], &s
->next_framep
[0], sizeof(s
->framep
[0]) * 4);
2678 // if future frames don't use the updated probabilities,
2679 // reset them to the values we saved
2680 if (!s
->update_probabilities
)
2681 s
->prob
[0] = s
->prob
[1];
2683 if (!s
->invisible
) {
2684 if ((ret
= av_frame_ref(data
, curframe
->tf
.f
)) < 0)
2691 memcpy(&s
->next_framep
[0], &s
->framep
[0], sizeof(s
->framep
[0]) * 4);
2695 int ff_vp8_decode_frame(AVCodecContext
*avctx
, void *data
, int *got_frame
,
2698 return vp78_decode_frame(avctx
, data
, got_frame
, avpkt
, IS_VP8
);
2701 #if CONFIG_VP7_DECODER
2702 static int vp7_decode_frame(AVCodecContext
*avctx
, void *data
, int *got_frame
,
2705 return vp78_decode_frame(avctx
, data
, got_frame
, avpkt
, IS_VP7
);
2707 #endif /* CONFIG_VP7_DECODER */
2709 av_cold
int ff_vp8_decode_free(AVCodecContext
*avctx
)
2711 VP8Context
*s
= avctx
->priv_data
;
2714 vp8_decode_flush_impl(avctx
, 1);
2715 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->frames
); i
++)
2716 av_frame_free(&s
->frames
[i
].tf
.f
);
2721 static av_cold
int vp8_init_frames(VP8Context
*s
)
2724 for (i
= 0; i
< FF_ARRAY_ELEMS(s
->frames
); i
++) {
2725 s
->frames
[i
].tf
.f
= av_frame_alloc();
2726 if (!s
->frames
[i
].tf
.f
)
2727 return AVERROR(ENOMEM
);
2732 static av_always_inline
2733 int vp78_decode_init(AVCodecContext
*avctx
, int is_vp7
)
2735 VP8Context
*s
= avctx
->priv_data
;
2739 s
->pix_fmt
= AV_PIX_FMT_NONE
;
2740 avctx
->pix_fmt
= AV_PIX_FMT_YUV420P
;
2741 avctx
->internal
->allocate_progress
= 1;
2743 ff_videodsp_init(&s
->vdsp
, 8);
2745 ff_vp78dsp_init(&s
->vp8dsp
);
2746 if (CONFIG_VP7_DECODER
&& is_vp7
) {
2747 ff_h264_pred_init(&s
->hpc
, AV_CODEC_ID_VP7
, 8, 1);
2748 ff_vp7dsp_init(&s
->vp8dsp
);
2749 } else if (CONFIG_VP8_DECODER
&& !is_vp7
) {
2750 ff_h264_pred_init(&s
->hpc
, AV_CODEC_ID_VP8
, 8, 1);
2751 ff_vp8dsp_init(&s
->vp8dsp
);
2754 /* does not change for VP8 */
2755 memcpy(s
->prob
[0].scan
, ff_zigzag_scan
, sizeof(s
->prob
[0].scan
));
2757 if ((ret
= vp8_init_frames(s
)) < 0) {
2758 ff_vp8_decode_free(avctx
);
2765 #if CONFIG_VP7_DECODER
2766 static int vp7_decode_init(AVCodecContext
*avctx
)
2768 return vp78_decode_init(avctx
, IS_VP7
);
2770 #endif /* CONFIG_VP7_DECODER */
2772 av_cold
int ff_vp8_decode_init(AVCodecContext
*avctx
)
2774 return vp78_decode_init(avctx
, IS_VP8
);
2777 #if CONFIG_VP8_DECODER
2778 static av_cold
int vp8_decode_init_thread_copy(AVCodecContext
*avctx
)
2780 VP8Context
*s
= avctx
->priv_data
;
2785 if ((ret
= vp8_init_frames(s
)) < 0) {
2786 ff_vp8_decode_free(avctx
);
2793 #define REBASE(pic) pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL
2795 static int vp8_decode_update_thread_context(AVCodecContext
*dst
,
2796 const AVCodecContext
*src
)
2798 VP8Context
*s
= dst
->priv_data
, *s_src
= src
->priv_data
;
2801 if (s
->macroblocks_base
&&
2802 (s_src
->mb_width
!= s
->mb_width
|| s_src
->mb_height
!= s
->mb_height
)) {
2804 s
->mb_width
= s_src
->mb_width
;
2805 s
->mb_height
= s_src
->mb_height
;
2808 s
->prob
[0] = s_src
->prob
[!s_src
->update_probabilities
];
2809 s
->segmentation
= s_src
->segmentation
;
2810 s
->lf_delta
= s_src
->lf_delta
;
2811 memcpy(s
->sign_bias
, s_src
->sign_bias
, sizeof(s
->sign_bias
));
2813 for (i
= 0; i
< FF_ARRAY_ELEMS(s_src
->frames
); i
++) {
2814 if (s_src
->frames
[i
].tf
.f
->data
[0]) {
2815 int ret
= vp8_ref_frame(s
, &s
->frames
[i
], &s_src
->frames
[i
]);
2821 s
->framep
[0] = REBASE(s_src
->next_framep
[0]);
2822 s
->framep
[1] = REBASE(s_src
->next_framep
[1]);
2823 s
->framep
[2] = REBASE(s_src
->next_framep
[2]);
2824 s
->framep
[3] = REBASE(s_src
->next_framep
[3]);
2828 #endif /* CONFIG_VP8_DECODER */
2830 #if CONFIG_VP7_DECODER
2831 AVCodec ff_vp7_decoder
= {
2833 .long_name
= NULL_IF_CONFIG_SMALL("On2 VP7"),
2834 .type
= AVMEDIA_TYPE_VIDEO
,
2835 .id
= AV_CODEC_ID_VP7
,
2836 .priv_data_size
= sizeof(VP8Context
),
2837 .init
= vp7_decode_init
,
2838 .close
= ff_vp8_decode_free
,
2839 .decode
= vp7_decode_frame
,
2840 .capabilities
= AV_CODEC_CAP_DR1
,
2841 .flush
= vp8_decode_flush
,
2843 #endif /* CONFIG_VP7_DECODER */
2845 #if CONFIG_VP8_DECODER
2846 AVCodec ff_vp8_decoder
= {
2848 .long_name
= NULL_IF_CONFIG_SMALL("On2 VP8"),
2849 .type
= AVMEDIA_TYPE_VIDEO
,
2850 .id
= AV_CODEC_ID_VP8
,
2851 .priv_data_size
= sizeof(VP8Context
),
2852 .init
= ff_vp8_decode_init
,
2853 .close
= ff_vp8_decode_free
,
2854 .decode
= ff_vp8_decode_frame
,
2855 .capabilities
= AV_CODEC_CAP_DR1
| AV_CODEC_CAP_FRAME_THREADS
|
2856 AV_CODEC_CAP_SLICE_THREADS
,
2857 .hw_configs
= (const AVCodecHWConfigInternal
*[]) {
2858 #if CONFIG_VP8_VAAPI_HWACCEL
2863 .flush
= vp8_decode_flush
,
2864 .init_thread_copy
= ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy
),
2865 .update_thread_context
= ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context
),
2867 #endif /* CONFIG_VP7_DECODER */