2 * H.26L/H.264/AVC/JVT/14496-10/... direct mb/block decoding
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG-4 part10 direct mb/block decoding.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "mpegutils.h"
32 #include "rectangle.h"
33 #include "threadframe.h"
37 static int get_scale_factor(const H264SliceContext
*sl
,
38 int poc
, int poc1
, int i
)
40 int poc0
= sl
->ref_list
[0][i
].poc
;
41 int64_t pocdiff
= poc1
- (int64_t)poc0
;
42 int td
= av_clip_int8(pocdiff
);
44 if (pocdiff
!= (int)pocdiff
)
45 avpriv_request_sample(sl
->h264
->avctx
, "pocdiff overflow");
47 if (td
== 0 || sl
->ref_list
[0][i
].parent
->long_ref
) {
50 int64_t pocdiff0
= poc
- (int64_t)poc0
;
51 int tb
= av_clip_int8(pocdiff0
);
52 int tx
= (16384 + (FFABS(td
) >> 1)) / td
;
54 if (pocdiff0
!= (int)pocdiff0
)
55 av_log(sl
->h264
->avctx
, AV_LOG_DEBUG
, "pocdiff0 overflow\n");
57 return av_clip_intp2((tb
* tx
+ 32) >> 6, 10);
61 void ff_h264_direct_dist_scale_factor(const H264Context
*const h
,
64 const int poc
= FIELD_PICTURE(h
) ? h
->cur_pic_ptr
->field_poc
[h
->picture_structure
== PICT_BOTTOM_FIELD
]
65 : h
->cur_pic_ptr
->poc
;
66 const int poc1
= sl
->ref_list
[1][0].poc
;
70 for (field
= 0; field
< 2; field
++) {
71 const int poc
= h
->cur_pic_ptr
->field_poc
[field
];
72 const int poc1
= sl
->ref_list
[1][0].parent
->field_poc
[field
];
73 for (i
= 0; i
< 2 * sl
->ref_count
[0]; i
++)
74 sl
->dist_scale_factor_field
[field
][i
^ field
] =
75 get_scale_factor(sl
, poc
, poc1
, i
+ 16);
78 for (i
= 0; i
< sl
->ref_count
[0]; i
++)
79 sl
->dist_scale_factor
[i
] = get_scale_factor(sl
, poc
, poc1
, i
);
82 static void fill_colmap(const H264Context
*h
, H264SliceContext
*sl
,
83 int map
[2][16 + 32], int list
,
84 int field
, int colfield
, int mbafi
)
86 const H264Picture
*const ref1
= sl
->ref_list
[1][0].parent
;
87 int j
, old_ref
, rfield
;
88 int start
= mbafi
? 16 : 0;
89 int end
= mbafi
? 16 + 2 * sl
->ref_count
[0] : sl
->ref_count
[0];
90 int interl
= mbafi
|| h
->picture_structure
!= PICT_FRAME
;
92 /* bogus; fills in for missing frames */
93 memset(map
[list
], 0, sizeof(map
[list
]));
95 for (rfield
= 0; rfield
< 2; rfield
++) {
96 for (old_ref
= 0; old_ref
< ref1
->ref_count
[colfield
][list
]; old_ref
++) {
97 int poc
= ref1
->ref_poc
[colfield
][list
][old_ref
];
101 // FIXME: store all MBAFF references so this is not needed
102 else if (interl
&& (poc
& 3) == 3)
103 poc
= (poc
& ~3) + rfield
+ 1;
105 for (j
= start
; j
< end
; j
++) {
106 if (4 * sl
->ref_list
[0][j
].parent
->frame_num
+
107 (sl
->ref_list
[0][j
].reference
& 3) == poc
) {
108 int cur_ref
= mbafi
? (j
- 16) ^ field
: j
;
110 map
[list
][2 * old_ref
+ (rfield
^ field
) + 16] = cur_ref
;
111 if (rfield
== field
|| !interl
)
112 map
[list
][old_ref
] = cur_ref
;
120 void ff_h264_direct_ref_list_init(const H264Context
*const h
, H264SliceContext
*sl
)
122 H264Ref
*const ref1
= &sl
->ref_list
[1][0];
123 H264Picture
*const cur
= h
->cur_pic_ptr
;
125 int sidx
= (h
->picture_structure
& 1) ^ 1;
126 int ref1sidx
= (ref1
->reference
& 1) ^ 1;
128 for (list
= 0; list
< sl
->list_count
; list
++) {
129 cur
->ref_count
[sidx
][list
] = sl
->ref_count
[list
];
130 for (j
= 0; j
< sl
->ref_count
[list
]; j
++)
131 cur
->ref_poc
[sidx
][list
][j
] = 4 * sl
->ref_list
[list
][j
].parent
->frame_num
+
132 (sl
->ref_list
[list
][j
].reference
& 3);
135 if (h
->picture_structure
== PICT_FRAME
) {
136 memcpy(cur
->ref_count
[1], cur
->ref_count
[0], sizeof(cur
->ref_count
[0]));
137 memcpy(cur
->ref_poc
[1], cur
->ref_poc
[0], sizeof(cur
->ref_poc
[0]));
140 if (h
->current_slice
== 0) {
141 cur
->mbaff
= FRAME_MBAFF(h
);
143 av_assert0(cur
->mbaff
== FRAME_MBAFF(h
));
146 sl
->col_fieldoff
= 0;
148 if (sl
->list_count
!= 2 || !sl
->ref_count
[1])
151 if (h
->picture_structure
== PICT_FRAME
) {
152 int cur_poc
= h
->cur_pic_ptr
->poc
;
153 const int *col_poc
= sl
->ref_list
[1][0].parent
->field_poc
;
154 if (col_poc
[0] == INT_MAX
&& col_poc
[1] == INT_MAX
) {
155 av_log(h
->avctx
, AV_LOG_ERROR
, "co located POCs unavailable\n");
158 sl
->col_parity
= (FFABS(col_poc
[0] - (int64_t)cur_poc
) >=
159 FFABS(col_poc
[1] - (int64_t)cur_poc
));
161 sidx
= sl
->col_parity
;
162 // FL -> FL & differ parity
163 } else if (!(h
->picture_structure
& sl
->ref_list
[1][0].reference
) &&
164 !sl
->ref_list
[1][0].parent
->mbaff
) {
165 sl
->col_fieldoff
= 2 * sl
->ref_list
[1][0].reference
- 3;
168 if (sl
->slice_type_nos
!= AV_PICTURE_TYPE_B
|| sl
->direct_spatial_mv_pred
)
171 for (list
= 0; list
< 2; list
++) {
172 fill_colmap(h
, sl
, sl
->map_col_to_list0
, list
, sidx
, ref1sidx
, 0);
174 for (field
= 0; field
< 2; field
++)
175 fill_colmap(h
, sl
, sl
->map_col_to_list0_field
[field
], list
, field
,
180 static void await_reference_mb_row(const H264Context
*const h
, H264Ref
*ref
,
183 int ref_field
= ref
->reference
- 1;
184 int ref_field_picture
= ref
->parent
->field_picture
;
185 int ref_height
= 16 * h
->mb_height
>> ref_field_picture
;
187 if (!HAVE_THREADS
|| !(h
->avctx
->active_thread_type
& FF_THREAD_FRAME
))
190 /* FIXME: It can be safe to access mb stuff
191 * even if pixels aren't deblocked yet. */
193 ff_thread_await_progress(&ref
->parent
->tf
,
194 FFMIN(16 * mb_y
>> ref_field_picture
,
196 ref_field_picture
&& ref_field
);
199 static void pred_spatial_direct_motion(const H264Context
*const h
, H264SliceContext
*sl
,
203 int b4_stride
= h
->b_stride
;
204 int mb_xy
= sl
->mb_xy
, mb_y
= sl
->mb_y
;
206 const int16_t (*l1mv0
)[2], (*l1mv1
)[2];
207 const int8_t *l1ref0
, *l1ref1
;
208 const int is_b8x8
= IS_8X8(*mb_type
);
209 unsigned int sub_mb_type
= MB_TYPE_L0L1
;
215 assert(sl
->ref_list
[1][0].reference
& 3);
217 await_reference_mb_row(h
, &sl
->ref_list
[1][0],
218 sl
->mb_y
+ !!IS_INTERLACED(*mb_type
));
220 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16 | MB_TYPE_INTRA4x4 | \
221 MB_TYPE_INTRA16x16 | MB_TYPE_INTRA_PCM)
223 /* ref = min(neighbors) */
224 for (list
= 0; list
< 2; list
++) {
225 int left_ref
= sl
->ref_cache
[list
][scan8
[0] - 1];
226 int top_ref
= sl
->ref_cache
[list
][scan8
[0] - 8];
227 int refc
= sl
->ref_cache
[list
][scan8
[0] - 8 + 4];
228 const int16_t *C
= sl
->mv_cache
[list
][scan8
[0] - 8 + 4];
229 if (refc
== PART_NOT_AVAILABLE
) {
230 refc
= sl
->ref_cache
[list
][scan8
[0] - 8 - 1];
231 C
= sl
->mv_cache
[list
][scan8
[0] - 8 - 1];
233 ref
[list
] = FFMIN3((unsigned)left_ref
,
236 if (ref
[list
] >= 0) {
237 /* This is just pred_motion() but with the cases removed that
238 * cannot happen for direct blocks. */
239 const int16_t *const A
= sl
->mv_cache
[list
][scan8
[0] - 1];
240 const int16_t *const B
= sl
->mv_cache
[list
][scan8
[0] - 8];
242 int match_count
= (left_ref
== ref
[list
]) +
243 (top_ref
== ref
[list
]) +
246 if (match_count
> 1) { // most common
247 mv
[list
] = pack16to32(mid_pred(A
[0], B
[0], C
[0]),
248 mid_pred(A
[1], B
[1], C
[1]));
250 assert(match_count
== 1);
251 if (left_ref
== ref
[list
])
252 mv
[list
] = AV_RN32A(A
);
253 else if (top_ref
== ref
[list
])
254 mv
[list
] = AV_RN32A(B
);
256 mv
[list
] = AV_RN32A(C
);
258 av_assert2(ref
[list
] < (sl
->ref_count
[list
] << !!FRAME_MBAFF(h
)));
260 int mask
= ~(MB_TYPE_L0
<< (2 * list
));
268 if (ref
[0] < 0 && ref
[1] < 0) {
271 *mb_type
|= MB_TYPE_L0L1
;
272 sub_mb_type
|= MB_TYPE_L0L1
;
275 if (!(is_b8x8
| mv
[0] | mv
[1])) {
276 fill_rectangle(&sl
->ref_cache
[0][scan8
[0]], 4, 4, 8, (uint8_t)ref
[0], 1);
277 fill_rectangle(&sl
->ref_cache
[1][scan8
[0]], 4, 4, 8, (uint8_t)ref
[1], 1);
278 fill_rectangle(&sl
->mv_cache
[0][scan8
[0]], 4, 4, 8, 0, 4);
279 fill_rectangle(&sl
->mv_cache
[1][scan8
[0]], 4, 4, 8, 0, 4);
280 *mb_type
= (*mb_type
& ~(MB_TYPE_8x8
| MB_TYPE_16x8
| MB_TYPE_8x16
|
281 MB_TYPE_P1L0
| MB_TYPE_P1L1
)) |
282 MB_TYPE_16x16
| MB_TYPE_DIRECT2
;
286 if (IS_INTERLACED(sl
->ref_list
[1][0].parent
->mb_type
[mb_xy
])) { // AFL/AFR/FR/FL -> AFL/FL
287 if (!IS_INTERLACED(*mb_type
)) { // AFR/FR -> AFL/FL
288 mb_y
= (sl
->mb_y
& ~1) + sl
->col_parity
;
290 ((sl
->mb_y
& ~1) + sl
->col_parity
) * h
->mb_stride
;
293 mb_y
+= sl
->col_fieldoff
;
294 mb_xy
+= h
->mb_stride
* sl
->col_fieldoff
; // non-zero for FL -> FL & differ parity
297 } else { // AFL/AFR/FR/FL -> AFR/FR
298 if (IS_INTERLACED(*mb_type
)) { // AFL /FL -> AFR/FR
299 mb_y
= sl
->mb_y
& ~1;
300 mb_xy
= (sl
->mb_y
& ~1) * h
->mb_stride
+ sl
->mb_x
;
301 mb_type_col
[0] = sl
->ref_list
[1][0].parent
->mb_type
[mb_xy
];
302 mb_type_col
[1] = sl
->ref_list
[1][0].parent
->mb_type
[mb_xy
+ h
->mb_stride
];
303 b8_stride
= 2 + 4 * h
->mb_stride
;
305 if (IS_INTERLACED(mb_type_col
[0]) !=
306 IS_INTERLACED(mb_type_col
[1])) {
307 mb_type_col
[0] &= ~MB_TYPE_INTERLACED
;
308 mb_type_col
[1] &= ~MB_TYPE_INTERLACED
;
311 sub_mb_type
|= MB_TYPE_16x16
| MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
312 if ((mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
) &&
313 (mb_type_col
[1] & MB_TYPE_16x16_OR_INTRA
) &&
315 *mb_type
|= MB_TYPE_16x8
| MB_TYPE_DIRECT2
; /* B_16x8 */
317 *mb_type
|= MB_TYPE_8x8
;
319 } else { // AFR/FR -> AFR/FR
322 mb_type_col
[1] = sl
->ref_list
[1][0].parent
->mb_type
[mb_xy
];
324 sub_mb_type
|= MB_TYPE_16x16
| MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
325 if (!is_b8x8
&& (mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
)) {
326 *mb_type
|= MB_TYPE_16x16
| MB_TYPE_DIRECT2
; /* B_16x16 */
327 } else if (!is_b8x8
&&
328 (mb_type_col
[0] & (MB_TYPE_16x8
| MB_TYPE_8x16
))) {
329 *mb_type
|= MB_TYPE_DIRECT2
|
330 (mb_type_col
[0] & (MB_TYPE_16x8
| MB_TYPE_8x16
));
332 if (!h
->ps
.sps
->direct_8x8_inference_flag
) {
333 /* FIXME: Save sub mb types from previous frames (or derive
334 * from MVs) so we know exactly what block size to use. */
335 sub_mb_type
+= (MB_TYPE_8x8
- MB_TYPE_16x16
); /* B_SUB_4x4 */
337 *mb_type
|= MB_TYPE_8x8
;
342 await_reference_mb_row(h
, &sl
->ref_list
[1][0], mb_y
);
344 l1mv0
= (void*)&sl
->ref_list
[1][0].parent
->motion_val
[0][h
->mb2b_xy
[mb_xy
]];
345 l1mv1
= (void*)&sl
->ref_list
[1][0].parent
->motion_val
[1][h
->mb2b_xy
[mb_xy
]];
346 l1ref0
= &sl
->ref_list
[1][0].parent
->ref_index
[0][4 * mb_xy
];
347 l1ref1
= &sl
->ref_list
[1][0].parent
->ref_index
[1][4 * mb_xy
];
352 l1mv0
+= 2 * b4_stride
;
353 l1mv1
+= 2 * b4_stride
;
357 if (IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
[0])) {
359 for (i8
= 0; i8
< 4; i8
++) {
362 int xy8
= x8
+ y8
* b8_stride
;
363 int xy4
= x8
* 3 + y8
* b4_stride
;
366 if (is_b8x8
&& !IS_DIRECT(sl
->sub_mb_type
[i8
]))
368 sl
->sub_mb_type
[i8
] = sub_mb_type
;
370 fill_rectangle(&sl
->ref_cache
[0][scan8
[i8
* 4]], 2, 2, 8,
372 fill_rectangle(&sl
->ref_cache
[1][scan8
[i8
* 4]], 2, 2, 8,
374 if (!IS_INTRA(mb_type_col
[y8
]) && !sl
->ref_list
[1][0].parent
->long_ref
&&
375 ((l1ref0
[xy8
] == 0 &&
376 FFABS(l1mv0
[xy4
][0]) <= 1 &&
377 FFABS(l1mv0
[xy4
][1]) <= 1) ||
380 FFABS(l1mv1
[xy4
][0]) <= 1 &&
381 FFABS(l1mv1
[xy4
][1]) <= 1))) {
393 fill_rectangle(&sl
->mv_cache
[0][scan8
[i8
* 4]], 2, 2, 8, a
, 4);
394 fill_rectangle(&sl
->mv_cache
[1][scan8
[i8
* 4]], 2, 2, 8, b
, 4);
396 if (!is_b8x8
&& !(n
& 3))
397 *mb_type
= (*mb_type
& ~(MB_TYPE_8x8
| MB_TYPE_16x8
| MB_TYPE_8x16
|
398 MB_TYPE_P1L0
| MB_TYPE_P1L1
)) |
399 MB_TYPE_16x16
| MB_TYPE_DIRECT2
;
400 } else if (IS_16X16(*mb_type
)) {
403 fill_rectangle(&sl
->ref_cache
[0][scan8
[0]], 4, 4, 8, (uint8_t)ref
[0], 1);
404 fill_rectangle(&sl
->ref_cache
[1][scan8
[0]], 4, 4, 8, (uint8_t)ref
[1], 1);
405 if (!IS_INTRA(mb_type_col
[0]) && !sl
->ref_list
[1][0].parent
->long_ref
&&
407 FFABS(l1mv0
[0][0]) <= 1 &&
408 FFABS(l1mv0
[0][1]) <= 1) ||
409 (l1ref0
[0] < 0 && !l1ref1
[0] &&
410 FFABS(l1mv1
[0][0]) <= 1 &&
411 FFABS(l1mv1
[0][1]) <= 1 &&
412 h
->x264_build
> 33U))) {
422 fill_rectangle(&sl
->mv_cache
[0][scan8
[0]], 4, 4, 8, a
, 4);
423 fill_rectangle(&sl
->mv_cache
[1][scan8
[0]], 4, 4, 8, b
, 4);
426 for (i8
= 0; i8
< 4; i8
++) {
427 const int x8
= i8
& 1;
428 const int y8
= i8
>> 1;
430 if (is_b8x8
&& !IS_DIRECT(sl
->sub_mb_type
[i8
]))
432 sl
->sub_mb_type
[i8
] = sub_mb_type
;
434 fill_rectangle(&sl
->mv_cache
[0][scan8
[i8
* 4]], 2, 2, 8, mv
[0], 4);
435 fill_rectangle(&sl
->mv_cache
[1][scan8
[i8
* 4]], 2, 2, 8, mv
[1], 4);
436 fill_rectangle(&sl
->ref_cache
[0][scan8
[i8
* 4]], 2, 2, 8,
438 fill_rectangle(&sl
->ref_cache
[1][scan8
[i8
* 4]], 2, 2, 8,
441 assert(b8_stride
== 2);
443 if (!IS_INTRA(mb_type_col
[0]) && !sl
->ref_list
[1][0].parent
->long_ref
&&
447 h
->x264_build
> 33U))) {
448 const int16_t (*l1mv
)[2] = l1ref0
[i8
] == 0 ? l1mv0
: l1mv1
;
449 if (IS_SUB_8X8(sub_mb_type
)) {
450 const int16_t *mv_col
= l1mv
[x8
* 3 + y8
* 3 * b4_stride
];
451 if (FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1) {
453 fill_rectangle(&sl
->mv_cache
[0][scan8
[i8
* 4]], 2, 2,
456 fill_rectangle(&sl
->mv_cache
[1][scan8
[i8
* 4]], 2, 2,
462 for (i4
= 0; i4
< 4; i4
++) {
463 const int16_t *mv_col
= l1mv
[x8
* 2 + (i4
& 1) +
464 (y8
* 2 + (i4
>> 1)) * b4_stride
];
465 if (FFABS(mv_col
[0]) <= 1 && FFABS(mv_col
[1]) <= 1) {
467 AV_ZERO32(sl
->mv_cache
[0][scan8
[i8
* 4 + i4
]]);
469 AV_ZERO32(sl
->mv_cache
[1][scan8
[i8
* 4 + i4
]]);
474 sl
->sub_mb_type
[i8
] += MB_TYPE_16x16
- MB_TYPE_8x8
;
479 if (!is_b8x8
&& !(n
& 15))
480 *mb_type
= (*mb_type
& ~(MB_TYPE_8x8
| MB_TYPE_16x8
| MB_TYPE_8x16
|
481 MB_TYPE_P1L0
| MB_TYPE_P1L1
)) |
482 MB_TYPE_16x16
| MB_TYPE_DIRECT2
;
486 static void pred_temp_direct_motion(const H264Context
*const h
, H264SliceContext
*sl
,
490 int b4_stride
= h
->b_stride
;
491 int mb_xy
= sl
->mb_xy
, mb_y
= sl
->mb_y
;
493 const int16_t (*l1mv0
)[2], (*l1mv1
)[2];
494 const int8_t *l1ref0
, *l1ref1
;
495 const int is_b8x8
= IS_8X8(*mb_type
);
496 unsigned int sub_mb_type
;
499 assert(sl
->ref_list
[1][0].reference
& 3);
501 await_reference_mb_row(h
, &sl
->ref_list
[1][0],
502 sl
->mb_y
+ !!IS_INTERLACED(*mb_type
));
504 if (IS_INTERLACED(sl
->ref_list
[1][0].parent
->mb_type
[mb_xy
])) { // AFL/AFR/FR/FL -> AFL/FL
505 if (!IS_INTERLACED(*mb_type
)) { // AFR/FR -> AFL/FL
506 mb_y
= (sl
->mb_y
& ~1) + sl
->col_parity
;
508 ((sl
->mb_y
& ~1) + sl
->col_parity
) * h
->mb_stride
;
511 mb_y
+= sl
->col_fieldoff
;
512 mb_xy
+= h
->mb_stride
* sl
->col_fieldoff
; // non-zero for FL -> FL & differ parity
515 } else { // AFL/AFR/FR/FL -> AFR/FR
516 if (IS_INTERLACED(*mb_type
)) { // AFL /FL -> AFR/FR
517 mb_y
= sl
->mb_y
& ~1;
518 mb_xy
= sl
->mb_x
+ (sl
->mb_y
& ~1) * h
->mb_stride
;
519 mb_type_col
[0] = sl
->ref_list
[1][0].parent
->mb_type
[mb_xy
];
520 mb_type_col
[1] = sl
->ref_list
[1][0].parent
->mb_type
[mb_xy
+ h
->mb_stride
];
521 b8_stride
= 2 + 4 * h
->mb_stride
;
523 if (IS_INTERLACED(mb_type_col
[0]) !=
524 IS_INTERLACED(mb_type_col
[1])) {
525 mb_type_col
[0] &= ~MB_TYPE_INTERLACED
;
526 mb_type_col
[1] &= ~MB_TYPE_INTERLACED
;
529 sub_mb_type
= MB_TYPE_16x16
| MB_TYPE_P0L0
| MB_TYPE_P0L1
|
530 MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
532 if ((mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
) &&
533 (mb_type_col
[1] & MB_TYPE_16x16_OR_INTRA
) &&
535 *mb_type
|= MB_TYPE_16x8
| MB_TYPE_L0L1
|
536 MB_TYPE_DIRECT2
; /* B_16x8 */
538 *mb_type
|= MB_TYPE_8x8
| MB_TYPE_L0L1
;
540 } else { // AFR/FR -> AFR/FR
543 mb_type_col
[1] = sl
->ref_list
[1][0].parent
->mb_type
[mb_xy
];
545 sub_mb_type
= MB_TYPE_16x16
| MB_TYPE_P0L0
| MB_TYPE_P0L1
|
546 MB_TYPE_DIRECT2
; /* B_SUB_8x8 */
547 if (!is_b8x8
&& (mb_type_col
[0] & MB_TYPE_16x16_OR_INTRA
)) {
548 *mb_type
|= MB_TYPE_16x16
| MB_TYPE_P0L0
| MB_TYPE_P0L1
|
549 MB_TYPE_DIRECT2
; /* B_16x16 */
550 } else if (!is_b8x8
&&
551 (mb_type_col
[0] & (MB_TYPE_16x8
| MB_TYPE_8x16
))) {
552 *mb_type
|= MB_TYPE_L0L1
| MB_TYPE_DIRECT2
|
553 (mb_type_col
[0] & (MB_TYPE_16x8
| MB_TYPE_8x16
));
555 if (!h
->ps
.sps
->direct_8x8_inference_flag
) {
556 /* FIXME: save sub mb types from previous frames (or derive
557 * from MVs) so we know exactly what block size to use */
558 sub_mb_type
= MB_TYPE_8x8
| MB_TYPE_P0L0
| MB_TYPE_P0L1
|
559 MB_TYPE_DIRECT2
; /* B_SUB_4x4 */
561 *mb_type
|= MB_TYPE_8x8
| MB_TYPE_L0L1
;
566 await_reference_mb_row(h
, &sl
->ref_list
[1][0], mb_y
);
568 l1mv0
= (void*)&sl
->ref_list
[1][0].parent
->motion_val
[0][h
->mb2b_xy
[mb_xy
]];
569 l1mv1
= (void*)&sl
->ref_list
[1][0].parent
->motion_val
[1][h
->mb2b_xy
[mb_xy
]];
570 l1ref0
= &sl
->ref_list
[1][0].parent
->ref_index
[0][4 * mb_xy
];
571 l1ref1
= &sl
->ref_list
[1][0].parent
->ref_index
[1][4 * mb_xy
];
576 l1mv0
+= 2 * b4_stride
;
577 l1mv1
+= 2 * b4_stride
;
582 const int *map_col_to_list0
[2] = { sl
->map_col_to_list0
[0],
583 sl
->map_col_to_list0
[1] };
584 const int *dist_scale_factor
= sl
->dist_scale_factor
;
587 if (FRAME_MBAFF(h
) && IS_INTERLACED(*mb_type
)) {
588 map_col_to_list0
[0] = sl
->map_col_to_list0_field
[sl
->mb_y
& 1][0];
589 map_col_to_list0
[1] = sl
->map_col_to_list0_field
[sl
->mb_y
& 1][1];
590 dist_scale_factor
= sl
->dist_scale_factor_field
[sl
->mb_y
& 1];
592 ref_offset
= (sl
->ref_list
[1][0].parent
->mbaff
<< 4) & (mb_type_col
[0] >> 3);
594 if (IS_INTERLACED(*mb_type
) != IS_INTERLACED(mb_type_col
[0])) {
595 int y_shift
= 2 * !IS_INTERLACED(*mb_type
);
596 assert(h
->ps
.sps
->direct_8x8_inference_flag
);
598 for (i8
= 0; i8
< 4; i8
++) {
599 const int x8
= i8
& 1;
600 const int y8
= i8
>> 1;
602 const int16_t (*l1mv
)[2] = l1mv0
;
604 if (is_b8x8
&& !IS_DIRECT(sl
->sub_mb_type
[i8
]))
606 sl
->sub_mb_type
[i8
] = sub_mb_type
;
608 fill_rectangle(&sl
->ref_cache
[1][scan8
[i8
* 4]], 2, 2, 8, 0, 1);
609 if (IS_INTRA(mb_type_col
[y8
])) {
610 fill_rectangle(&sl
->ref_cache
[0][scan8
[i8
* 4]], 2, 2, 8, 0, 1);
611 fill_rectangle(&sl
->mv_cache
[0][scan8
[i8
* 4]], 2, 2, 8, 0, 4);
612 fill_rectangle(&sl
->mv_cache
[1][scan8
[i8
* 4]], 2, 2, 8, 0, 4);
616 ref0
= l1ref0
[x8
+ y8
* b8_stride
];
618 ref0
= map_col_to_list0
[0][ref0
+ ref_offset
];
620 ref0
= map_col_to_list0
[1][l1ref1
[x8
+ y8
* b8_stride
] +
624 scale
= dist_scale_factor
[ref0
];
625 fill_rectangle(&sl
->ref_cache
[0][scan8
[i8
* 4]], 2, 2, 8,
629 const int16_t *mv_col
= l1mv
[x8
* 3 + y8
* b4_stride
];
630 int my_col
= (mv_col
[1] * (1 << y_shift
)) / 2;
631 int mx
= (scale
* mv_col
[0] + 128) >> 8;
632 int my
= (scale
* my_col
+ 128) >> 8;
633 fill_rectangle(&sl
->mv_cache
[0][scan8
[i8
* 4]], 2, 2, 8,
634 pack16to32(mx
, my
), 4);
635 fill_rectangle(&sl
->mv_cache
[1][scan8
[i8
* 4]], 2, 2, 8,
636 pack16to32(mx
- mv_col
[0], my
- my_col
), 4);
642 /* one-to-one mv scaling */
644 if (IS_16X16(*mb_type
)) {
647 fill_rectangle(&sl
->ref_cache
[1][scan8
[0]], 4, 4, 8, 0, 1);
648 if (IS_INTRA(mb_type_col
[0])) {
651 const int ref0
= l1ref0
[0] >= 0 ? map_col_to_list0
[0][l1ref0
[0] + ref_offset
]
652 : map_col_to_list0
[1][l1ref1
[0] + ref_offset
];
653 const int scale
= dist_scale_factor
[ref0
];
654 const int16_t *mv_col
= l1ref0
[0] >= 0 ? l1mv0
[0] : l1mv1
[0];
656 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
657 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
659 mv0
= pack16to32(mv_l0
[0], mv_l0
[1]);
660 mv1
= pack16to32(mv_l0
[0] - mv_col
[0], mv_l0
[1] - mv_col
[1]);
662 fill_rectangle(&sl
->ref_cache
[0][scan8
[0]], 4, 4, 8, ref
, 1);
663 fill_rectangle(&sl
->mv_cache
[0][scan8
[0]], 4, 4, 8, mv0
, 4);
664 fill_rectangle(&sl
->mv_cache
[1][scan8
[0]], 4, 4, 8, mv1
, 4);
666 for (i8
= 0; i8
< 4; i8
++) {
667 const int x8
= i8
& 1;
668 const int y8
= i8
>> 1;
670 const int16_t (*l1mv
)[2] = l1mv0
;
672 if (is_b8x8
&& !IS_DIRECT(sl
->sub_mb_type
[i8
]))
674 sl
->sub_mb_type
[i8
] = sub_mb_type
;
675 fill_rectangle(&sl
->ref_cache
[1][scan8
[i8
* 4]], 2, 2, 8, 0, 1);
676 if (IS_INTRA(mb_type_col
[0])) {
677 fill_rectangle(&sl
->ref_cache
[0][scan8
[i8
* 4]], 2, 2, 8, 0, 1);
678 fill_rectangle(&sl
->mv_cache
[0][scan8
[i8
* 4]], 2, 2, 8, 0, 4);
679 fill_rectangle(&sl
->mv_cache
[1][scan8
[i8
* 4]], 2, 2, 8, 0, 4);
683 assert(b8_stride
== 2);
686 ref0
= map_col_to_list0
[0][ref0
+ ref_offset
];
688 ref0
= map_col_to_list0
[1][l1ref1
[i8
] + ref_offset
];
691 scale
= dist_scale_factor
[ref0
];
693 fill_rectangle(&sl
->ref_cache
[0][scan8
[i8
* 4]], 2, 2, 8,
695 if (IS_SUB_8X8(sub_mb_type
)) {
696 const int16_t *mv_col
= l1mv
[x8
* 3 + y8
* 3 * b4_stride
];
697 int mx
= (scale
* mv_col
[0] + 128) >> 8;
698 int my
= (scale
* mv_col
[1] + 128) >> 8;
699 fill_rectangle(&sl
->mv_cache
[0][scan8
[i8
* 4]], 2, 2, 8,
700 pack16to32(mx
, my
), 4);
701 fill_rectangle(&sl
->mv_cache
[1][scan8
[i8
* 4]], 2, 2, 8,
702 pack16to32(mx
- mv_col
[0], my
- mv_col
[1]), 4);
704 for (i4
= 0; i4
< 4; i4
++) {
705 const int16_t *mv_col
= l1mv
[x8
* 2 + (i4
& 1) +
706 (y8
* 2 + (i4
>> 1)) * b4_stride
];
707 int16_t *mv_l0
= sl
->mv_cache
[0][scan8
[i8
* 4 + i4
]];
708 mv_l0
[0] = (scale
* mv_col
[0] + 128) >> 8;
709 mv_l0
[1] = (scale
* mv_col
[1] + 128) >> 8;
710 AV_WN32A(sl
->mv_cache
[1][scan8
[i8
* 4 + i4
]],
711 pack16to32(mv_l0
[0] - mv_col
[0],
712 mv_l0
[1] - mv_col
[1]));
720 void ff_h264_pred_direct_motion(const H264Context
*const h
, H264SliceContext
*sl
,
723 if (sl
->direct_spatial_mv_pred
)
724 pred_spatial_direct_motion(h
, sl
, mb_type
);
726 pred_temp_direct_motion(h
, sl
, mb_type
);