2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 # define FUNC(n) AV_JOIN(n ## _simple_, BITS)
27 # define PIXEL_SHIFT (BITS >> 4)
29 # define FUNC(n) n ## _complex
30 # define PIXEL_SHIFT h->pixel_shift
35 #include "h264_mc_template.c"
39 #include "h264_mc_template.c"
41 static av_noinline
void FUNC(hl_decode_mb
)(H264Context
*h
)
43 const int mb_x
= h
->mb_x
;
44 const int mb_y
= h
->mb_y
;
45 const int mb_xy
= h
->mb_xy
;
46 const int mb_type
= h
->cur_pic
.mb_type
[mb_xy
];
47 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
48 int linesize
, uvlinesize
/*dct_offset*/;
50 int *block_offset
= &h
->block_offset
[0];
51 const int transform_bypass
= !SIMPLE
&& (h
->qscale
== 0 && h
->sps
.transform_bypass
);
52 /* is_h264 should always be true if SVQ3 is disabled. */
53 const int is_h264
= !CONFIG_SVQ3_DECODER
|| SIMPLE
|| h
->avctx
->codec_id
== AV_CODEC_ID_H264
;
54 void (*idct_add
)(uint8_t *dst
, int16_t *block
, int stride
);
55 const int block_h
= 16 >> h
->chroma_y_shift
;
56 const int chroma422
= CHROMA422
;
58 dest_y
= h
->cur_pic
.f
.data
[0] + ((mb_x
<< PIXEL_SHIFT
) + mb_y
* h
->linesize
) * 16;
59 dest_cb
= h
->cur_pic
.f
.data
[1] + (mb_x
<< PIXEL_SHIFT
) * 8 + mb_y
* h
->uvlinesize
* block_h
;
60 dest_cr
= h
->cur_pic
.f
.data
[2] + (mb_x
<< PIXEL_SHIFT
) * 8 + mb_y
* h
->uvlinesize
* block_h
;
62 h
->vdsp
.prefetch(dest_y
+ (h
->mb_x
& 3) * 4 * h
->linesize
+ (64 << PIXEL_SHIFT
), h
->linesize
, 4);
63 h
->vdsp
.prefetch(dest_cb
+ (h
->mb_x
& 7) * h
->uvlinesize
+ (64 << PIXEL_SHIFT
), dest_cr
- dest_cb
, 2);
65 h
->list_counts
[mb_xy
] = h
->list_count
;
67 if (!SIMPLE
&& MB_FIELD
) {
68 linesize
= h
->mb_linesize
= h
->linesize
* 2;
69 uvlinesize
= h
->mb_uvlinesize
= h
->uvlinesize
* 2;
70 block_offset
= &h
->block_offset
[48];
71 if (mb_y
& 1) { // FIXME move out of this function?
72 dest_y
-= h
->linesize
* 15;
73 dest_cb
-= h
->uvlinesize
* (block_h
- 1);
74 dest_cr
-= h
->uvlinesize
* (block_h
- 1);
78 for (list
= 0; list
< h
->list_count
; list
++) {
79 if (!USES_LIST(mb_type
, list
))
81 if (IS_16X16(mb_type
)) {
82 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
83 fill_rectangle(ref
, 4, 4, 8, (16 + *ref
) ^ (h
->mb_y
& 1), 1);
85 for (i
= 0; i
< 16; i
+= 4) {
86 int ref
= h
->ref_cache
[list
][scan8
[i
]];
88 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2,
89 8, (16 + ref
) ^ (h
->mb_y
& 1), 1);
95 linesize
= h
->mb_linesize
= h
->linesize
;
96 uvlinesize
= h
->mb_uvlinesize
= h
->uvlinesize
;
97 // dct_offset = s->linesize * 16;
100 if (!SIMPLE
&& IS_INTRA_PCM(mb_type
)) {
102 const int bit_depth
= h
->sps
.bit_depth_luma
;
105 init_get_bits(&gb
, h
->intra_pcm_ptr
,
106 ff_h264_mb_sizes
[h
->sps
.chroma_format_idc
] * bit_depth
);
108 for (i
= 0; i
< 16; i
++) {
109 uint16_t *tmp_y
= (uint16_t *)(dest_y
+ i
* linesize
);
110 for (j
= 0; j
< 16; j
++)
111 tmp_y
[j
] = get_bits(&gb
, bit_depth
);
113 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
114 if (!h
->sps
.chroma_format_idc
) {
115 for (i
= 0; i
< block_h
; i
++) {
116 uint16_t *tmp_cb
= (uint16_t *)(dest_cb
+ i
* uvlinesize
);
117 for (j
= 0; j
< 8; j
++)
118 tmp_cb
[j
] = 1 << (bit_depth
- 1);
120 for (i
= 0; i
< block_h
; i
++) {
121 uint16_t *tmp_cr
= (uint16_t *)(dest_cr
+ i
* uvlinesize
);
122 for (j
= 0; j
< 8; j
++)
123 tmp_cr
[j
] = 1 << (bit_depth
- 1);
126 for (i
= 0; i
< block_h
; i
++) {
127 uint16_t *tmp_cb
= (uint16_t *)(dest_cb
+ i
* uvlinesize
);
128 for (j
= 0; j
< 8; j
++)
129 tmp_cb
[j
] = get_bits(&gb
, bit_depth
);
131 for (i
= 0; i
< block_h
; i
++) {
132 uint16_t *tmp_cr
= (uint16_t *)(dest_cr
+ i
* uvlinesize
);
133 for (j
= 0; j
< 8; j
++)
134 tmp_cr
[j
] = get_bits(&gb
, bit_depth
);
139 for (i
= 0; i
< 16; i
++)
140 memcpy(dest_y
+ i
* linesize
, h
->intra_pcm_ptr
+ i
* 16, 16);
141 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
142 if (!h
->sps
.chroma_format_idc
) {
143 for (i
= 0; i
< block_h
; i
++) {
144 memset(dest_cb
+ i
* uvlinesize
, 128, 8);
145 memset(dest_cr
+ i
* uvlinesize
, 128, 8);
148 const uint8_t *src_cb
= h
->intra_pcm_ptr
+ 256;
149 const uint8_t *src_cr
= h
->intra_pcm_ptr
+ 256 + block_h
* 8;
150 for (i
= 0; i
< block_h
; i
++) {
151 memcpy(dest_cb
+ i
* uvlinesize
, src_cb
+ i
* 8, 8);
152 memcpy(dest_cr
+ i
* uvlinesize
, src_cr
+ i
* 8, 8);
158 if (IS_INTRA(mb_type
)) {
159 if (h
->deblocking_filter
)
160 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
,
161 uvlinesize
, 1, 0, SIMPLE
, PIXEL_SHIFT
);
163 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) {
164 h
->hpc
.pred8x8
[h
->chroma_pred_mode
](dest_cb
, uvlinesize
);
165 h
->hpc
.pred8x8
[h
->chroma_pred_mode
](dest_cr
, uvlinesize
);
168 hl_decode_mb_predict_luma(h
, mb_type
, is_h264
, SIMPLE
,
169 transform_bypass
, PIXEL_SHIFT
,
170 block_offset
, linesize
, dest_y
, 0);
172 if (h
->deblocking_filter
)
173 xchg_mb_border(h
, dest_y
, dest_cb
, dest_cr
, linesize
,
174 uvlinesize
, 0, 0, SIMPLE
, PIXEL_SHIFT
);
175 } else if (is_h264
) {
177 FUNC(hl_motion_422
)(h
, dest_y
, dest_cb
, dest_cr
,
178 h
->me
.qpel_put
, h
->h264chroma
.put_h264_chroma_pixels_tab
,
179 h
->me
.qpel_avg
, h
->h264chroma
.avg_h264_chroma_pixels_tab
,
180 h
->h264dsp
.weight_h264_pixels_tab
,
181 h
->h264dsp
.biweight_h264_pixels_tab
);
183 FUNC(hl_motion_420
)(h
, dest_y
, dest_cb
, dest_cr
,
184 h
->me
.qpel_put
, h
->h264chroma
.put_h264_chroma_pixels_tab
,
185 h
->me
.qpel_avg
, h
->h264chroma
.avg_h264_chroma_pixels_tab
,
186 h
->h264dsp
.weight_h264_pixels_tab
,
187 h
->h264dsp
.biweight_h264_pixels_tab
);
191 hl_decode_mb_idct_luma(h
, mb_type
, is_h264
, SIMPLE
, transform_bypass
,
192 PIXEL_SHIFT
, block_offset
, linesize
, dest_y
, 0);
194 if ((SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) &&
196 uint8_t *dest
[2] = { dest_cb
, dest_cr
};
197 if (transform_bypass
) {
198 if (IS_INTRA(mb_type
) && h
->sps
.profile_idc
== 244 &&
199 (h
->chroma_pred_mode
== VERT_PRED8x8
||
200 h
->chroma_pred_mode
== HOR_PRED8x8
)) {
201 h
->hpc
.pred8x8_add
[h
->chroma_pred_mode
](dest
[0],
203 h
->mb
+ (16 * 16 * 1 << PIXEL_SHIFT
),
205 h
->hpc
.pred8x8_add
[h
->chroma_pred_mode
](dest
[1],
207 h
->mb
+ (16 * 16 * 2 << PIXEL_SHIFT
),
210 idct_add
= h
->h264dsp
.h264_add_pixels4
;
211 for (j
= 1; j
< 3; j
++) {
212 for (i
= j
* 16; i
< j
* 16 + 4; i
++)
213 if (h
->non_zero_count_cache
[scan8
[i
]] ||
214 dctcoef_get(h
->mb
, PIXEL_SHIFT
, i
* 16))
215 idct_add(dest
[j
- 1] + block_offset
[i
],
216 h
->mb
+ (i
* 16 << PIXEL_SHIFT
),
219 for (i
= j
* 16 + 4; i
< j
* 16 + 8; i
++)
220 if (h
->non_zero_count_cache
[scan8
[i
+ 4]] ||
221 dctcoef_get(h
->mb
, PIXEL_SHIFT
, i
* 16))
222 idct_add(dest
[j
- 1] + block_offset
[i
+ 4],
223 h
->mb
+ (i
* 16 << PIXEL_SHIFT
),
232 qp
[0] = h
->chroma_qp
[0] + 3;
233 qp
[1] = h
->chroma_qp
[1] + 3;
235 qp
[0] = h
->chroma_qp
[0];
236 qp
[1] = h
->chroma_qp
[1];
238 if (h
->non_zero_count_cache
[scan8
[CHROMA_DC_BLOCK_INDEX
+ 0]])
239 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ (16 * 16 * 1 << PIXEL_SHIFT
),
240 h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1 : 4][qp
[0]][0]);
241 if (h
->non_zero_count_cache
[scan8
[CHROMA_DC_BLOCK_INDEX
+ 1]])
242 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ (16 * 16 * 2 << PIXEL_SHIFT
),
243 h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2 : 5][qp
[1]][0]);
244 h
->h264dsp
.h264_idct_add8(dest
, block_offset
,
246 h
->non_zero_count_cache
);
247 } else if (CONFIG_SVQ3_DECODER
) {
248 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ 16 * 16 * 1,
249 h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1 : 4][h
->chroma_qp
[0]][0]);
250 h
->h264dsp
.h264_chroma_dc_dequant_idct(h
->mb
+ 16 * 16 * 2,
251 h
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2 : 5][h
->chroma_qp
[1]][0]);
252 for (j
= 1; j
< 3; j
++) {
253 for (i
= j
* 16; i
< j
* 16 + 4; i
++)
254 if (h
->non_zero_count_cache
[scan8
[i
]] || h
->mb
[i
* 16]) {
255 uint8_t *const ptr
= dest
[j
- 1] + block_offset
[i
];
256 ff_svq3_add_idct_c(ptr
, h
->mb
+ i
* 16,
258 ff_h264_chroma_qp
[0][h
->qscale
+ 12] - 12, 2);
264 if (h
->cbp
|| IS_INTRA(mb_type
)) {
265 h
->dsp
.clear_blocks(h
->mb
);
266 h
->dsp
.clear_blocks(h
->mb
+ (24 * 16 << PIXEL_SHIFT
));
271 #if !SIMPLE || BITS == 8
275 #include "h264_mc_template.c"
277 static av_noinline
void FUNC(hl_decode_mb_444
)(H264Context
*h
)
279 const int mb_x
= h
->mb_x
;
280 const int mb_y
= h
->mb_y
;
281 const int mb_xy
= h
->mb_xy
;
282 const int mb_type
= h
->cur_pic
.mb_type
[mb_xy
];
286 int *block_offset
= &h
->block_offset
[0];
287 const int transform_bypass
= !SIMPLE
&& (h
->qscale
== 0 && h
->sps
.transform_bypass
);
288 const int plane_count
= (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& CODEC_FLAG_GRAY
)) ? 3 : 1;
290 for (p
= 0; p
< plane_count
; p
++) {
291 dest
[p
] = h
->cur_pic
.f
.data
[p
] +
292 ((mb_x
<< PIXEL_SHIFT
) + mb_y
* h
->linesize
) * 16;
293 h
->vdsp
.prefetch(dest
[p
] + (h
->mb_x
& 3) * 4 * h
->linesize
+ (64 << PIXEL_SHIFT
),
297 h
->list_counts
[mb_xy
] = h
->list_count
;
299 if (!SIMPLE
&& MB_FIELD
) {
300 linesize
= h
->mb_linesize
= h
->mb_uvlinesize
= h
->linesize
* 2;
301 block_offset
= &h
->block_offset
[48];
302 if (mb_y
& 1) // FIXME move out of this function?
303 for (p
= 0; p
< 3; p
++)
304 dest
[p
] -= h
->linesize
* 15;
307 for (list
= 0; list
< h
->list_count
; list
++) {
308 if (!USES_LIST(mb_type
, list
))
310 if (IS_16X16(mb_type
)) {
311 int8_t *ref
= &h
->ref_cache
[list
][scan8
[0]];
312 fill_rectangle(ref
, 4, 4, 8, (16 + *ref
) ^ (h
->mb_y
& 1), 1);
314 for (i
= 0; i
< 16; i
+= 4) {
315 int ref
= h
->ref_cache
[list
][scan8
[i
]];
317 fill_rectangle(&h
->ref_cache
[list
][scan8
[i
]], 2, 2,
318 8, (16 + ref
) ^ (h
->mb_y
& 1), 1);
324 linesize
= h
->mb_linesize
= h
->mb_uvlinesize
= h
->linesize
;
327 if (!SIMPLE
&& IS_INTRA_PCM(mb_type
)) {
329 const int bit_depth
= h
->sps
.bit_depth_luma
;
331 init_get_bits(&gb
, h
->intra_pcm_ptr
, 768 * bit_depth
);
333 for (p
= 0; p
< plane_count
; p
++)
334 for (i
= 0; i
< 16; i
++) {
335 uint16_t *tmp
= (uint16_t *)(dest
[p
] + i
* linesize
);
336 for (j
= 0; j
< 16; j
++)
337 tmp
[j
] = get_bits(&gb
, bit_depth
);
340 for (p
= 0; p
< plane_count
; p
++)
341 for (i
= 0; i
< 16; i
++)
342 memcpy(dest
[p
] + i
* linesize
,
343 h
->intra_pcm_ptr
+ p
* 256 + i
* 16, 16);
346 if (IS_INTRA(mb_type
)) {
347 if (h
->deblocking_filter
)
348 xchg_mb_border(h
, dest
[0], dest
[1], dest
[2], linesize
,
349 linesize
, 1, 1, SIMPLE
, PIXEL_SHIFT
);
351 for (p
= 0; p
< plane_count
; p
++)
352 hl_decode_mb_predict_luma(h
, mb_type
, 1, SIMPLE
,
353 transform_bypass
, PIXEL_SHIFT
,
354 block_offset
, linesize
, dest
[p
], p
);
356 if (h
->deblocking_filter
)
357 xchg_mb_border(h
, dest
[0], dest
[1], dest
[2], linesize
,
358 linesize
, 0, 1, SIMPLE
, PIXEL_SHIFT
);
360 FUNC(hl_motion_444
)(h
, dest
[0], dest
[1], dest
[2],
361 h
->me
.qpel_put
, h
->h264chroma
.put_h264_chroma_pixels_tab
,
362 h
->me
.qpel_avg
, h
->h264chroma
.avg_h264_chroma_pixels_tab
,
363 h
->h264dsp
.weight_h264_pixels_tab
,
364 h
->h264dsp
.biweight_h264_pixels_tab
);
367 for (p
= 0; p
< plane_count
; p
++)
368 hl_decode_mb_idct_luma(h
, mb_type
, 1, SIMPLE
, transform_bypass
,
369 PIXEL_SHIFT
, block_offset
, linesize
,
372 if (h
->cbp
|| IS_INTRA(mb_type
)) {
373 h
->dsp
.clear_blocks(h
->mb
);
374 h
->dsp
.clear_blocks(h
->mb
+ (24 * 16 << PIXEL_SHIFT
));