2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 # define FUNC(n) AV_JOIN(n ## _simple_, BITS)
27 # define PIXEL_SHIFT (BITS >> 4)
29 # define FUNC(n) n ## _complex
30 # define PIXEL_SHIFT h->pixel_shift
35 #include "h264_mc_template.c"
39 #include "h264_mc_template.c"
41 static av_noinline
void FUNC(hl_decode_mb
)(const H264Context
*h
, H264SliceContext
*sl
)
43 const int mb_x
= sl
->mb_x
;
44 const int mb_y
= sl
->mb_y
;
45 const int mb_xy
= sl
->mb_xy
;
46 const int mb_type
= h
->cur_pic
.mb_type
[mb_xy
];
47 uint8_t *dest_y
, *dest_cb
, *dest_cr
;
48 int linesize
, uvlinesize
/*dct_offset*/;
50 const int *block_offset
= &h
->block_offset
[0];
51 const int transform_bypass
= !SIMPLE
&& (sl
->qscale
== 0 && h
->ps
.sps
->transform_bypass
);
52 void (*idct_add
)(uint8_t *dst
, int16_t *block
, int stride
);
53 const int block_h
= 16 >> h
->chroma_y_shift
;
54 const int chroma422
= CHROMA422(h
);
56 dest_y
= h
->cur_pic
.f
->data
[0] + ((mb_x
<< PIXEL_SHIFT
) + mb_y
* sl
->linesize
) * 16;
57 dest_cb
= h
->cur_pic
.f
->data
[1] + (mb_x
<< PIXEL_SHIFT
) * 8 + mb_y
* sl
->uvlinesize
* block_h
;
58 dest_cr
= h
->cur_pic
.f
->data
[2] + (mb_x
<< PIXEL_SHIFT
) * 8 + mb_y
* sl
->uvlinesize
* block_h
;
60 h
->vdsp
.prefetch(dest_y
+ (sl
->mb_x
& 3) * 4 * sl
->linesize
+ (64 << PIXEL_SHIFT
), sl
->linesize
, 4);
61 h
->vdsp
.prefetch(dest_cb
+ (sl
->mb_x
& 7) * sl
->uvlinesize
+ (64 << PIXEL_SHIFT
), dest_cr
- dest_cb
, 2);
63 h
->list_counts
[mb_xy
] = sl
->list_count
;
65 if (!SIMPLE
&& MB_FIELD(sl
)) {
66 linesize
= sl
->mb_linesize
= sl
->linesize
* 2;
67 uvlinesize
= sl
->mb_uvlinesize
= sl
->uvlinesize
* 2;
68 block_offset
= &h
->block_offset
[48];
69 if (mb_y
& 1) { // FIXME move out of this function?
70 dest_y
-= sl
->linesize
* 15;
71 dest_cb
-= sl
->uvlinesize
* (block_h
- 1);
72 dest_cr
-= sl
->uvlinesize
* (block_h
- 1);
76 for (list
= 0; list
< sl
->list_count
; list
++) {
77 if (!USES_LIST(mb_type
, list
))
79 if (IS_16X16(mb_type
)) {
80 int8_t *ref
= &sl
->ref_cache
[list
][scan8
[0]];
81 fill_rectangle(ref
, 4, 4, 8, (16 + *ref
) ^ (sl
->mb_y
& 1), 1);
83 for (i
= 0; i
< 16; i
+= 4) {
84 int ref
= sl
->ref_cache
[list
][scan8
[i
]];
86 fill_rectangle(&sl
->ref_cache
[list
][scan8
[i
]], 2, 2,
87 8, (16 + ref
) ^ (sl
->mb_y
& 1), 1);
93 linesize
= sl
->mb_linesize
= sl
->linesize
;
94 uvlinesize
= sl
->mb_uvlinesize
= sl
->uvlinesize
;
95 // dct_offset = s->linesize * 16;
98 if (!SIMPLE
&& IS_INTRA_PCM(mb_type
)) {
99 const int bit_depth
= h
->ps
.sps
->bit_depth_luma
;
103 init_get_bits(&gb
, sl
->intra_pcm_ptr
,
104 ff_h264_mb_sizes
[h
->ps
.sps
->chroma_format_idc
] * bit_depth
);
106 for (i
= 0; i
< 16; i
++) {
107 uint16_t *tmp_y
= (uint16_t *)(dest_y
+ i
* linesize
);
108 for (j
= 0; j
< 16; j
++)
109 tmp_y
[j
] = get_bits(&gb
, bit_depth
);
111 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& AV_CODEC_FLAG_GRAY
)) {
112 if (!h
->ps
.sps
->chroma_format_idc
) {
113 for (i
= 0; i
< block_h
; i
++) {
114 uint16_t *tmp_cb
= (uint16_t *)(dest_cb
+ i
* uvlinesize
);
115 uint16_t *tmp_cr
= (uint16_t *)(dest_cr
+ i
* uvlinesize
);
116 for (j
= 0; j
< 8; j
++) {
117 tmp_cb
[j
] = tmp_cr
[j
] = 1 << (bit_depth
- 1);
121 for (i
= 0; i
< block_h
; i
++) {
122 uint16_t *tmp_cb
= (uint16_t *)(dest_cb
+ i
* uvlinesize
);
123 for (j
= 0; j
< 8; j
++)
124 tmp_cb
[j
] = get_bits(&gb
, bit_depth
);
126 for (i
= 0; i
< block_h
; i
++) {
127 uint16_t *tmp_cr
= (uint16_t *)(dest_cr
+ i
* uvlinesize
);
128 for (j
= 0; j
< 8; j
++)
129 tmp_cr
[j
] = get_bits(&gb
, bit_depth
);
134 for (i
= 0; i
< 16; i
++)
135 memcpy(dest_y
+ i
* linesize
, sl
->intra_pcm_ptr
+ i
* 16, 16);
136 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& AV_CODEC_FLAG_GRAY
)) {
137 if (!h
->ps
.sps
->chroma_format_idc
) {
138 for (i
= 0; i
< 8; i
++) {
139 memset(dest_cb
+ i
* uvlinesize
, 1 << (bit_depth
- 1), 8);
140 memset(dest_cr
+ i
* uvlinesize
, 1 << (bit_depth
- 1), 8);
143 const uint8_t *src_cb
= sl
->intra_pcm_ptr
+ 256;
144 const uint8_t *src_cr
= sl
->intra_pcm_ptr
+ 256 + block_h
* 8;
145 for (i
= 0; i
< block_h
; i
++) {
146 memcpy(dest_cb
+ i
* uvlinesize
, src_cb
+ i
* 8, 8);
147 memcpy(dest_cr
+ i
* uvlinesize
, src_cr
+ i
* 8, 8);
153 if (IS_INTRA(mb_type
)) {
154 if (sl
->deblocking_filter
)
155 xchg_mb_border(h
, sl
, dest_y
, dest_cb
, dest_cr
, linesize
,
156 uvlinesize
, 1, 0, SIMPLE
, PIXEL_SHIFT
);
158 if (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& AV_CODEC_FLAG_GRAY
)) {
159 h
->hpc
.pred8x8
[sl
->chroma_pred_mode
](dest_cb
, uvlinesize
);
160 h
->hpc
.pred8x8
[sl
->chroma_pred_mode
](dest_cr
, uvlinesize
);
163 hl_decode_mb_predict_luma(h
, sl
, mb_type
, SIMPLE
,
164 transform_bypass
, PIXEL_SHIFT
,
165 block_offset
, linesize
, dest_y
, 0);
167 if (sl
->deblocking_filter
)
168 xchg_mb_border(h
, sl
, dest_y
, dest_cb
, dest_cr
, linesize
,
169 uvlinesize
, 0, 0, SIMPLE
, PIXEL_SHIFT
);
172 FUNC(hl_motion_422
)(h
, sl
, dest_y
, dest_cb
, dest_cr
,
173 h
->h264qpel
.put_h264_qpel_pixels_tab
,
174 h
->h264chroma
.put_h264_chroma_pixels_tab
,
175 h
->h264qpel
.avg_h264_qpel_pixels_tab
,
176 h
->h264chroma
.avg_h264_chroma_pixels_tab
,
177 h
->h264dsp
.weight_h264_pixels_tab
,
178 h
->h264dsp
.biweight_h264_pixels_tab
);
180 FUNC(hl_motion_420
)(h
, sl
, dest_y
, dest_cb
, dest_cr
,
181 h
->h264qpel
.put_h264_qpel_pixels_tab
,
182 h
->h264chroma
.put_h264_chroma_pixels_tab
,
183 h
->h264qpel
.avg_h264_qpel_pixels_tab
,
184 h
->h264chroma
.avg_h264_chroma_pixels_tab
,
185 h
->h264dsp
.weight_h264_pixels_tab
,
186 h
->h264dsp
.biweight_h264_pixels_tab
);
190 hl_decode_mb_idct_luma(h
, sl
, mb_type
, SIMPLE
, transform_bypass
,
191 PIXEL_SHIFT
, block_offset
, linesize
, dest_y
, 0);
193 if ((SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& AV_CODEC_FLAG_GRAY
)) &&
195 uint8_t *dest
[2] = { dest_cb
, dest_cr
};
196 if (transform_bypass
) {
197 if (IS_INTRA(mb_type
) && h
->ps
.sps
->profile_idc
== 244 &&
198 (sl
->chroma_pred_mode
== VERT_PRED8x8
||
199 sl
->chroma_pred_mode
== HOR_PRED8x8
)) {
200 h
->hpc
.pred8x8_add
[sl
->chroma_pred_mode
](dest
[0],
202 sl
->mb
+ (16 * 16 * 1 << PIXEL_SHIFT
),
204 h
->hpc
.pred8x8_add
[sl
->chroma_pred_mode
](dest
[1],
206 sl
->mb
+ (16 * 16 * 2 << PIXEL_SHIFT
),
209 idct_add
= h
->h264dsp
.h264_add_pixels4_clear
;
210 for (j
= 1; j
< 3; j
++) {
211 for (i
= j
* 16; i
< j
* 16 + 4; i
++)
212 if (sl
->non_zero_count_cache
[scan8
[i
]] ||
213 dctcoef_get(sl
->mb
, PIXEL_SHIFT
, i
* 16))
214 idct_add(dest
[j
- 1] + block_offset
[i
],
215 sl
->mb
+ (i
* 16 << PIXEL_SHIFT
),
218 for (i
= j
* 16 + 4; i
< j
* 16 + 8; i
++)
219 if (sl
->non_zero_count_cache
[scan8
[i
+ 4]] ||
220 dctcoef_get(sl
->mb
, PIXEL_SHIFT
, i
* 16))
221 idct_add(dest
[j
- 1] + block_offset
[i
+ 4],
222 sl
->mb
+ (i
* 16 << PIXEL_SHIFT
),
230 qp
[0] = sl
->chroma_qp
[0] + 3;
231 qp
[1] = sl
->chroma_qp
[1] + 3;
233 qp
[0] = sl
->chroma_qp
[0];
234 qp
[1] = sl
->chroma_qp
[1];
236 if (sl
->non_zero_count_cache
[scan8
[CHROMA_DC_BLOCK_INDEX
+ 0]])
237 h
->h264dsp
.h264_chroma_dc_dequant_idct(sl
->mb
+ (16 * 16 * 1 << PIXEL_SHIFT
),
238 h
->ps
.pps
->dequant4_coeff
[IS_INTRA(mb_type
) ? 1 : 4][qp
[0]][0]);
239 if (sl
->non_zero_count_cache
[scan8
[CHROMA_DC_BLOCK_INDEX
+ 1]])
240 h
->h264dsp
.h264_chroma_dc_dequant_idct(sl
->mb
+ (16 * 16 * 2 << PIXEL_SHIFT
),
241 h
->ps
.pps
->dequant4_coeff
[IS_INTRA(mb_type
) ? 2 : 5][qp
[1]][0]);
242 h
->h264dsp
.h264_idct_add8(dest
, block_offset
,
244 sl
->non_zero_count_cache
);
250 #if !SIMPLE || BITS == 8
254 #include "h264_mc_template.c"
256 static av_noinline
void FUNC(hl_decode_mb_444
)(const H264Context
*h
, H264SliceContext
*sl
)
258 const int mb_x
= sl
->mb_x
;
259 const int mb_y
= sl
->mb_y
;
260 const int mb_xy
= sl
->mb_xy
;
261 const int mb_type
= h
->cur_pic
.mb_type
[mb_xy
];
265 const int *block_offset
= &h
->block_offset
[0];
266 const int transform_bypass
= !SIMPLE
&& (sl
->qscale
== 0 && h
->ps
.sps
->transform_bypass
);
267 const int plane_count
= (SIMPLE
|| !CONFIG_GRAY
|| !(h
->flags
& AV_CODEC_FLAG_GRAY
)) ? 3 : 1;
269 for (p
= 0; p
< plane_count
; p
++) {
270 dest
[p
] = h
->cur_pic
.f
->data
[p
] +
271 ((mb_x
<< PIXEL_SHIFT
) + mb_y
* sl
->linesize
) * 16;
272 h
->vdsp
.prefetch(dest
[p
] + (sl
->mb_x
& 3) * 4 * sl
->linesize
+ (64 << PIXEL_SHIFT
),
276 h
->list_counts
[mb_xy
] = sl
->list_count
;
278 if (!SIMPLE
&& MB_FIELD(sl
)) {
279 linesize
= sl
->mb_linesize
= sl
->mb_uvlinesize
= sl
->linesize
* 2;
280 block_offset
= &h
->block_offset
[48];
281 if (mb_y
& 1) // FIXME move out of this function?
282 for (p
= 0; p
< 3; p
++)
283 dest
[p
] -= sl
->linesize
* 15;
284 if (FRAME_MBAFF(h
)) {
286 for (list
= 0; list
< sl
->list_count
; list
++) {
287 if (!USES_LIST(mb_type
, list
))
289 if (IS_16X16(mb_type
)) {
290 int8_t *ref
= &sl
->ref_cache
[list
][scan8
[0]];
291 fill_rectangle(ref
, 4, 4, 8, (16 + *ref
) ^ (sl
->mb_y
& 1), 1);
293 for (i
= 0; i
< 16; i
+= 4) {
294 int ref
= sl
->ref_cache
[list
][scan8
[i
]];
296 fill_rectangle(&sl
->ref_cache
[list
][scan8
[i
]], 2, 2,
297 8, (16 + ref
) ^ (sl
->mb_y
& 1), 1);
303 linesize
= sl
->mb_linesize
= sl
->mb_uvlinesize
= sl
->linesize
;
306 if (!SIMPLE
&& IS_INTRA_PCM(mb_type
)) {
308 const int bit_depth
= h
->ps
.sps
->bit_depth_luma
;
310 init_get_bits(&gb
, sl
->intra_pcm_ptr
, 768 * bit_depth
);
312 for (p
= 0; p
< plane_count
; p
++)
313 for (i
= 0; i
< 16; i
++) {
314 uint16_t *tmp
= (uint16_t *)(dest
[p
] + i
* linesize
);
315 for (j
= 0; j
< 16; j
++)
316 tmp
[j
] = get_bits(&gb
, bit_depth
);
319 for (p
= 0; p
< plane_count
; p
++)
320 for (i
= 0; i
< 16; i
++)
321 memcpy(dest
[p
] + i
* linesize
,
322 sl
->intra_pcm_ptr
+ p
* 256 + i
* 16, 16);
325 if (IS_INTRA(mb_type
)) {
326 if (sl
->deblocking_filter
)
327 xchg_mb_border(h
, sl
, dest
[0], dest
[1], dest
[2], linesize
,
328 linesize
, 1, 1, SIMPLE
, PIXEL_SHIFT
);
330 for (p
= 0; p
< plane_count
; p
++)
331 hl_decode_mb_predict_luma(h
, sl
, mb_type
, SIMPLE
,
332 transform_bypass
, PIXEL_SHIFT
,
333 block_offset
, linesize
, dest
[p
], p
);
335 if (sl
->deblocking_filter
)
336 xchg_mb_border(h
, sl
, dest
[0], dest
[1], dest
[2], linesize
,
337 linesize
, 0, 1, SIMPLE
, PIXEL_SHIFT
);
339 FUNC(hl_motion_444
)(h
, sl
, dest
[0], dest
[1], dest
[2],
340 h
->h264qpel
.put_h264_qpel_pixels_tab
,
341 h
->h264chroma
.put_h264_chroma_pixels_tab
,
342 h
->h264qpel
.avg_h264_qpel_pixels_tab
,
343 h
->h264chroma
.avg_h264_chroma_pixels_tab
,
344 h
->h264dsp
.weight_h264_pixels_tab
,
345 h
->h264dsp
.biweight_h264_pixels_tab
);
348 for (p
= 0; p
< plane_count
; p
++)
349 hl_decode_mb_idct_luma(h
, sl
, mb_type
, SIMPLE
, transform_bypass
,
350 PIXEL_SHIFT
, block_offset
, linesize
,