Rename var: val -> energy
[FFMpeg-mirror/DVCPRO-HD.git] / libavcodec / h264.c
blobc6fe739465a2c29082245ac079d1dd971f020b20
1 /*
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 /**
23 * @file h264.c
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "dsputil.h"
29 #include "avcodec.h"
30 #include "mpegvideo.h"
31 #include "h264.h"
32 #include "h264data.h"
33 #include "h264_parser.h"
34 #include "golomb.h"
35 #include "rectangle.h"
37 #include "cabac.h"
38 #ifdef ARCH_X86
39 #include "i386/h264_i386.h"
40 #endif
42 //#undef NDEBUG
43 #include <assert.h>
45 /**
46 * Value of Picture.reference when Picture is not a reference picture, but
47 * is held for delayed output.
49 #define DELAYED_PIC_REF 4
51 static VLC coeff_token_vlc[4];
52 static VLC chroma_dc_coeff_token_vlc;
54 static VLC total_zeros_vlc[15];
55 static VLC chroma_dc_total_zeros_vlc[3];
57 static VLC run_vlc[6];
58 static VLC run7_vlc;
60 static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
61 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
62 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
63 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
65 static av_always_inline uint32_t pack16to32(int a, int b){
66 #ifdef WORDS_BIGENDIAN
67 return (b&0xFFFF) + (a<<16);
68 #else
69 return (a&0xFFFF) + (b<<16);
70 #endif
73 const uint8_t ff_rem6[52]={
74 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
77 const uint8_t ff_div6[52]={
78 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
82 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
83 MpegEncContext * const s = &h->s;
84 const int mb_xy= h->mb_xy;
85 int topleft_xy, top_xy, topright_xy, left_xy[2];
86 int topleft_type, top_type, topright_type, left_type[2];
87 int left_block[8];
88 int topleft_partition= -1;
89 int i;
91 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
93 //FIXME deblocking could skip the intra and nnz parts.
94 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
95 return;
97 /* Wow, what a mess, why didn't they simplify the interlacing & intra
98 * stuff, I can't imagine that these complex rules are worth it. */
100 topleft_xy = top_xy - 1;
101 topright_xy= top_xy + 1;
102 left_xy[1] = left_xy[0] = mb_xy-1;
103 left_block[0]= 0;
104 left_block[1]= 1;
105 left_block[2]= 2;
106 left_block[3]= 3;
107 left_block[4]= 7;
108 left_block[5]= 10;
109 left_block[6]= 8;
110 left_block[7]= 11;
111 if(FRAME_MBAFF){
112 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
113 const int top_pair_xy = pair_xy - s->mb_stride;
114 const int topleft_pair_xy = top_pair_xy - 1;
115 const int topright_pair_xy = top_pair_xy + 1;
116 const int topleft_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
117 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
118 const int topright_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
119 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
120 const int curr_mb_frame_flag = !IS_INTERLACED(mb_type);
121 const int bottom = (s->mb_y & 1);
122 tprintf(s->avctx, "fill_caches: curr_mb_frame_flag:%d, left_mb_frame_flag:%d, topleft_mb_frame_flag:%d, top_mb_frame_flag:%d, topright_mb_frame_flag:%d\n", curr_mb_frame_flag, left_mb_frame_flag, topleft_mb_frame_flag, top_mb_frame_flag, topright_mb_frame_flag);
123 if (bottom
124 ? !curr_mb_frame_flag // bottom macroblock
125 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
127 top_xy -= s->mb_stride;
129 if (bottom
130 ? !curr_mb_frame_flag // bottom macroblock
131 : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
133 topleft_xy -= s->mb_stride;
134 } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
135 topleft_xy += s->mb_stride;
136 // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
137 topleft_partition = 0;
139 if (bottom
140 ? !curr_mb_frame_flag // bottom macroblock
141 : (!curr_mb_frame_flag && !topright_mb_frame_flag) // top macroblock
143 topright_xy -= s->mb_stride;
145 if (left_mb_frame_flag != curr_mb_frame_flag) {
146 left_xy[1] = left_xy[0] = pair_xy - 1;
147 if (curr_mb_frame_flag) {
148 if (bottom) {
149 left_block[0]= 2;
150 left_block[1]= 2;
151 left_block[2]= 3;
152 left_block[3]= 3;
153 left_block[4]= 8;
154 left_block[5]= 11;
155 left_block[6]= 8;
156 left_block[7]= 11;
157 } else {
158 left_block[0]= 0;
159 left_block[1]= 0;
160 left_block[2]= 1;
161 left_block[3]= 1;
162 left_block[4]= 7;
163 left_block[5]= 10;
164 left_block[6]= 7;
165 left_block[7]= 10;
167 } else {
168 left_xy[1] += s->mb_stride;
169 //left_block[0]= 0;
170 left_block[1]= 2;
171 left_block[2]= 0;
172 left_block[3]= 2;
173 //left_block[4]= 7;
174 left_block[5]= 10;
175 left_block[6]= 7;
176 left_block[7]= 10;
181 h->top_mb_xy = top_xy;
182 h->left_mb_xy[0] = left_xy[0];
183 h->left_mb_xy[1] = left_xy[1];
184 if(for_deblock){
185 topleft_type = 0;
186 topright_type = 0;
187 top_type = h->slice_table[top_xy ] < 255 ? s->current_picture.mb_type[top_xy] : 0;
188 left_type[0] = h->slice_table[left_xy[0] ] < 255 ? s->current_picture.mb_type[left_xy[0]] : 0;
189 left_type[1] = h->slice_table[left_xy[1] ] < 255 ? s->current_picture.mb_type[left_xy[1]] : 0;
191 if(FRAME_MBAFF && !IS_INTRA(mb_type)){
192 int list;
193 int v = *(uint16_t*)&h->non_zero_count[mb_xy][14];
194 for(i=0; i<16; i++)
195 h->non_zero_count_cache[scan8[i]] = (v>>i)&1;
196 for(list=0; list<h->list_count; list++){
197 if(USES_LIST(mb_type,list)){
198 uint32_t *src = (uint32_t*)s->current_picture.motion_val[list][h->mb2b_xy[mb_xy]];
199 uint32_t *dst = (uint32_t*)h->mv_cache[list][scan8[0]];
200 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
201 for(i=0; i<4; i++, dst+=8, src+=h->b_stride){
202 dst[0] = src[0];
203 dst[1] = src[1];
204 dst[2] = src[2];
205 dst[3] = src[3];
207 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
208 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = pack16to32(ref[0],ref[1])*0x0101;
209 ref += h->b8_stride;
210 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
211 *(uint32_t*)&h->ref_cache[list][scan8[10]] = pack16to32(ref[0],ref[1])*0x0101;
212 }else{
213 fill_rectangle(&h-> mv_cache[list][scan8[ 0]], 4, 4, 8, 0, 4);
214 fill_rectangle(&h->ref_cache[list][scan8[ 0]], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1);
218 }else{
219 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
220 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
221 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
222 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
223 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
226 if(IS_INTRA(mb_type)){
227 h->topleft_samples_available=
228 h->top_samples_available=
229 h->left_samples_available= 0xFFFF;
230 h->topright_samples_available= 0xEEEA;
232 if(!IS_INTRA(top_type) && (top_type==0 || h->pps.constrained_intra_pred)){
233 h->topleft_samples_available= 0xB3FF;
234 h->top_samples_available= 0x33FF;
235 h->topright_samples_available= 0x26EA;
237 for(i=0; i<2; i++){
238 if(!IS_INTRA(left_type[i]) && (left_type[i]==0 || h->pps.constrained_intra_pred)){
239 h->topleft_samples_available&= 0xDF5F;
240 h->left_samples_available&= 0x5F5F;
244 if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
245 h->topleft_samples_available&= 0x7FFF;
247 if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
248 h->topright_samples_available&= 0xFBFF;
250 if(IS_INTRA4x4(mb_type)){
251 if(IS_INTRA4x4(top_type)){
252 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
253 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
254 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
255 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
256 }else{
257 int pred;
258 if(!top_type || (IS_INTER(top_type) && h->pps.constrained_intra_pred))
259 pred= -1;
260 else{
261 pred= 2;
263 h->intra4x4_pred_mode_cache[4+8*0]=
264 h->intra4x4_pred_mode_cache[5+8*0]=
265 h->intra4x4_pred_mode_cache[6+8*0]=
266 h->intra4x4_pred_mode_cache[7+8*0]= pred;
268 for(i=0; i<2; i++){
269 if(IS_INTRA4x4(left_type[i])){
270 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
271 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
272 }else{
273 int pred;
274 if(!left_type[i] || (IS_INTER(left_type[i]) && h->pps.constrained_intra_pred))
275 pred= -1;
276 else{
277 pred= 2;
279 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
280 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
288 0 . T T. T T T T
289 1 L . .L . . . .
290 2 L . .L . . . .
291 3 . T TL . . . .
292 4 L . .L . . . .
293 5 L . .. . . . .
295 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
296 if(top_type){
297 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
298 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
299 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
300 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
302 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
303 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
305 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
306 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
308 }else{
309 h->non_zero_count_cache[4+8*0]=
310 h->non_zero_count_cache[5+8*0]=
311 h->non_zero_count_cache[6+8*0]=
312 h->non_zero_count_cache[7+8*0]=
314 h->non_zero_count_cache[1+8*0]=
315 h->non_zero_count_cache[2+8*0]=
317 h->non_zero_count_cache[1+8*3]=
318 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
322 for (i=0; i<2; i++) {
323 if(left_type[i]){
324 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
325 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
326 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
327 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
328 }else{
329 h->non_zero_count_cache[3+8*1 + 2*8*i]=
330 h->non_zero_count_cache[3+8*2 + 2*8*i]=
331 h->non_zero_count_cache[0+8*1 + 8*i]=
332 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
336 if( h->pps.cabac ) {
337 // top_cbp
338 if(top_type) {
339 h->top_cbp = h->cbp_table[top_xy];
340 } else if(IS_INTRA(mb_type)) {
341 h->top_cbp = 0x1C0;
342 } else {
343 h->top_cbp = 0;
345 // left_cbp
346 if (left_type[0]) {
347 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
348 } else if(IS_INTRA(mb_type)) {
349 h->left_cbp = 0x1C0;
350 } else {
351 h->left_cbp = 0;
353 if (left_type[0]) {
354 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
356 if (left_type[1]) {
357 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
361 #if 1
362 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
363 int list;
364 for(list=0; list<h->list_count; list++){
365 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
366 /*if(!h->mv_cache_clean[list]){
367 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
368 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
369 h->mv_cache_clean[list]= 1;
371 continue;
373 h->mv_cache_clean[list]= 0;
375 if(USES_LIST(top_type, list)){
376 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
377 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
378 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
379 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
380 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
381 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
382 h->ref_cache[list][scan8[0] + 0 - 1*8]=
383 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
384 h->ref_cache[list][scan8[0] + 2 - 1*8]=
385 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
386 }else{
387 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
388 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
389 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
390 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
391 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
394 for(i=0; i<2; i++){
395 int cache_idx = scan8[0] - 1 + i*2*8;
396 if(USES_LIST(left_type[i], list)){
397 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
398 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
399 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
400 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
401 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
402 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
403 }else{
404 *(uint32_t*)h->mv_cache [list][cache_idx ]=
405 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
406 h->ref_cache[list][cache_idx ]=
407 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
411 if((for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) && !FRAME_MBAFF)
412 continue;
414 if(USES_LIST(topleft_type, list)){
415 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
416 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
417 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
418 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
419 }else{
420 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
421 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
424 if(USES_LIST(topright_type, list)){
425 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
426 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
427 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
428 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
429 }else{
430 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
431 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
434 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
435 continue;
437 h->ref_cache[list][scan8[5 ]+1] =
438 h->ref_cache[list][scan8[7 ]+1] =
439 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
440 h->ref_cache[list][scan8[4 ]] =
441 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
442 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
443 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
444 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
445 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
446 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
448 if( h->pps.cabac ) {
449 /* XXX beurk, Load mvd */
450 if(USES_LIST(top_type, list)){
451 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
452 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
453 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
454 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
455 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
456 }else{
457 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
458 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
459 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
460 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
462 if(USES_LIST(left_type[0], list)){
463 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
464 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
465 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
466 }else{
467 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
468 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
470 if(USES_LIST(left_type[1], list)){
471 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
472 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
473 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
474 }else{
475 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
476 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
478 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
479 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
480 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
481 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
482 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
484 if(h->slice_type == FF_B_TYPE){
485 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
487 if(IS_DIRECT(top_type)){
488 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
489 }else if(IS_8X8(top_type)){
490 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
491 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
492 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
493 }else{
494 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
497 if(IS_DIRECT(left_type[0]))
498 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
499 else if(IS_8X8(left_type[0]))
500 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
501 else
502 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
504 if(IS_DIRECT(left_type[1]))
505 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
506 else if(IS_8X8(left_type[1]))
507 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
508 else
509 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
513 if(FRAME_MBAFF){
514 #define MAP_MVS\
515 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
516 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
517 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
518 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
519 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
520 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
521 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
522 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
523 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
524 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
525 if(MB_FIELD){
526 #define MAP_F2F(idx, mb_type)\
527 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
528 h->ref_cache[list][idx] <<= 1;\
529 h->mv_cache[list][idx][1] /= 2;\
530 h->mvd_cache[list][idx][1] /= 2;\
532 MAP_MVS
533 #undef MAP_F2F
534 }else{
535 #define MAP_F2F(idx, mb_type)\
536 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
537 h->ref_cache[list][idx] >>= 1;\
538 h->mv_cache[list][idx][1] <<= 1;\
539 h->mvd_cache[list][idx][1] <<= 1;\
541 MAP_MVS
542 #undef MAP_F2F
547 #endif
549 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
552 static inline void write_back_intra_pred_mode(H264Context *h){
553 const int mb_xy= h->mb_xy;
555 h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
556 h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
557 h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
558 h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
559 h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
560 h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
561 h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
565 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
567 static inline int check_intra4x4_pred_mode(H264Context *h){
568 MpegEncContext * const s = &h->s;
569 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
570 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
571 int i;
573 if(!(h->top_samples_available&0x8000)){
574 for(i=0; i<4; i++){
575 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
576 if(status<0){
577 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
578 return -1;
579 } else if(status){
580 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
585 if(!(h->left_samples_available&0x8000)){
586 for(i=0; i<4; i++){
587 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
588 if(status<0){
589 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
590 return -1;
591 } else if(status){
592 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
597 return 0;
598 } //FIXME cleanup like next
601 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
603 static inline int check_intra_pred_mode(H264Context *h, int mode){
604 MpegEncContext * const s = &h->s;
605 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
606 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
608 if(mode > 6U) {
609 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
610 return -1;
613 if(!(h->top_samples_available&0x8000)){
614 mode= top[ mode ];
615 if(mode<0){
616 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
617 return -1;
621 if(!(h->left_samples_available&0x8000)){
622 mode= left[ mode ];
623 if(mode<0){
624 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
625 return -1;
629 return mode;
633 * gets the predicted intra4x4 prediction mode.
635 static inline int pred_intra_mode(H264Context *h, int n){
636 const int index8= scan8[n];
637 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
638 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
639 const int min= FFMIN(left, top);
641 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
643 if(min<0) return DC_PRED;
644 else return min;
647 static inline void write_back_non_zero_count(H264Context *h){
648 const int mb_xy= h->mb_xy;
650 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
651 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
652 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
653 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
654 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
655 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
656 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
658 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
659 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
660 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
662 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
663 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
664 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
666 if(FRAME_MBAFF){
667 // store all luma nnzs, for deblocking
668 int v = 0, i;
669 for(i=0; i<16; i++)
670 v += (!!h->non_zero_count_cache[scan8[i]]) << i;
671 *(uint16_t*)&h->non_zero_count[mb_xy][14] = v;
676 * gets the predicted number of non zero coefficients.
677 * @param n block index
679 static inline int pred_non_zero_count(H264Context *h, int n){
680 const int index8= scan8[n];
681 const int left= h->non_zero_count_cache[index8 - 1];
682 const int top = h->non_zero_count_cache[index8 - 8];
683 int i= left + top;
685 if(i<64) i= (i+1)>>1;
687 tprintf(h->s.avctx, "pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
689 return i&31;
692 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
693 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
694 MpegEncContext *s = &h->s;
696 /* there is no consistent mapping of mvs to neighboring locations that will
697 * make mbaff happy, so we can't move all this logic to fill_caches */
698 if(FRAME_MBAFF){
699 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
700 const int16_t *mv;
701 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
702 *C = h->mv_cache[list][scan8[0]-2];
704 if(!MB_FIELD
705 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
706 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
707 if(IS_INTERLACED(mb_types[topright_xy])){
708 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
709 const int x4 = X4, y4 = Y4;\
710 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
711 if(!USES_LIST(mb_type,list))\
712 return LIST_NOT_USED;\
713 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
714 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
715 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
716 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
718 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
721 if(topright_ref == PART_NOT_AVAILABLE
722 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
723 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
724 if(!MB_FIELD
725 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
726 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
728 if(MB_FIELD
729 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
730 && i >= scan8[0]+8){
731 // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
732 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
735 #undef SET_DIAG_MV
738 if(topright_ref != PART_NOT_AVAILABLE){
739 *C= h->mv_cache[list][ i - 8 + part_width ];
740 return topright_ref;
741 }else{
742 tprintf(s->avctx, "topright MV not available\n");
744 *C= h->mv_cache[list][ i - 8 - 1 ];
745 return h->ref_cache[list][ i - 8 - 1 ];
750 * gets the predicted MV.
751 * @param n the block index
752 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
753 * @param mx the x component of the predicted motion vector
754 * @param my the y component of the predicted motion vector
756 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
757 const int index8= scan8[n];
758 const int top_ref= h->ref_cache[list][ index8 - 8 ];
759 const int left_ref= h->ref_cache[list][ index8 - 1 ];
760 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
761 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
762 const int16_t * C;
763 int diagonal_ref, match_count;
765 assert(part_width==1 || part_width==2 || part_width==4);
767 /* mv_cache
768 B . . A T T T T
769 U . . L . . , .
770 U . . L . . . .
771 U . . L . . , .
772 . . . L . . . .
775 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
776 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
777 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
778 if(match_count > 1){ //most common
779 *mx= mid_pred(A[0], B[0], C[0]);
780 *my= mid_pred(A[1], B[1], C[1]);
781 }else if(match_count==1){
782 if(left_ref==ref){
783 *mx= A[0];
784 *my= A[1];
785 }else if(top_ref==ref){
786 *mx= B[0];
787 *my= B[1];
788 }else{
789 *mx= C[0];
790 *my= C[1];
792 }else{
793 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
794 *mx= A[0];
795 *my= A[1];
796 }else{
797 *mx= mid_pred(A[0], B[0], C[0]);
798 *my= mid_pred(A[1], B[1], C[1]);
802 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
806 * gets the directionally predicted 16x8 MV.
807 * @param n the block index
808 * @param mx the x component of the predicted motion vector
809 * @param my the y component of the predicted motion vector
811 static inline void pred_16x8_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
812 if(n==0){
813 const int top_ref= h->ref_cache[list][ scan8[0] - 8 ];
814 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
816 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
818 if(top_ref == ref){
819 *mx= B[0];
820 *my= B[1];
821 return;
823 }else{
824 const int left_ref= h->ref_cache[list][ scan8[8] - 1 ];
825 const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
827 tprintf(h->s.avctx, "pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
829 if(left_ref == ref){
830 *mx= A[0];
831 *my= A[1];
832 return;
836 //RARE
837 pred_motion(h, n, 4, list, ref, mx, my);
841 * gets the directionally predicted 8x16 MV.
842 * @param n the block index
843 * @param mx the x component of the predicted motion vector
844 * @param my the y component of the predicted motion vector
846 static inline void pred_8x16_motion(H264Context * const h, int n, int list, int ref, int * const mx, int * const my){
847 if(n==0){
848 const int left_ref= h->ref_cache[list][ scan8[0] - 1 ];
849 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
851 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
853 if(left_ref == ref){
854 *mx= A[0];
855 *my= A[1];
856 return;
858 }else{
859 const int16_t * C;
860 int diagonal_ref;
862 diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
864 tprintf(h->s.avctx, "pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
866 if(diagonal_ref == ref){
867 *mx= C[0];
868 *my= C[1];
869 return;
873 //RARE
874 pred_motion(h, n, 2, list, ref, mx, my);
877 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my){
878 const int top_ref = h->ref_cache[0][ scan8[0] - 8 ];
879 const int left_ref= h->ref_cache[0][ scan8[0] - 1 ];
881 tprintf(h->s.avctx, "pred_pskip: (%d) (%d) at %2d %2d\n", top_ref, left_ref, h->s.mb_x, h->s.mb_y);
883 if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
884 || (top_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
885 || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
887 *mx = *my = 0;
888 return;
891 pred_motion(h, 0, 4, 0, 0, mx, my);
893 return;
896 static inline void direct_dist_scale_factor(H264Context * const h){
897 const int poc = h->s.current_picture_ptr->poc;
898 const int poc1 = h->ref_list[1][0].poc;
899 int i;
900 for(i=0; i<h->ref_count[0]; i++){
901 int poc0 = h->ref_list[0][i].poc;
902 int td = av_clip(poc1 - poc0, -128, 127);
903 if(td == 0 /* FIXME || pic0 is a long-term ref */){
904 h->dist_scale_factor[i] = 256;
905 }else{
906 int tb = av_clip(poc - poc0, -128, 127);
907 int tx = (16384 + (FFABS(td) >> 1)) / td;
908 h->dist_scale_factor[i] = av_clip((tb*tx + 32) >> 6, -1024, 1023);
911 if(FRAME_MBAFF){
912 for(i=0; i<h->ref_count[0]; i++){
913 h->dist_scale_factor_field[2*i] =
914 h->dist_scale_factor_field[2*i+1] = h->dist_scale_factor[i];
918 static inline void direct_ref_list_init(H264Context * const h){
919 MpegEncContext * const s = &h->s;
920 Picture * const ref1 = &h->ref_list[1][0];
921 Picture * const cur = s->current_picture_ptr;
922 int list, i, j;
923 if(cur->pict_type == FF_I_TYPE)
924 cur->ref_count[0] = 0;
925 if(cur->pict_type != FF_B_TYPE)
926 cur->ref_count[1] = 0;
927 for(list=0; list<2; list++){
928 cur->ref_count[list] = h->ref_count[list];
929 for(j=0; j<h->ref_count[list]; j++)
930 cur->ref_poc[list][j] = h->ref_list[list][j].poc;
932 if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
933 return;
934 for(list=0; list<2; list++){
935 for(i=0; i<ref1->ref_count[list]; i++){
936 const int poc = ref1->ref_poc[list][i];
937 h->map_col_to_list0[list][i] = 0; /* bogus; fills in for missing frames */
938 for(j=0; j<h->ref_count[list]; j++)
939 if(h->ref_list[list][j].poc == poc){
940 h->map_col_to_list0[list][i] = j;
941 break;
945 if(FRAME_MBAFF){
946 for(list=0; list<2; list++){
947 for(i=0; i<ref1->ref_count[list]; i++){
948 j = h->map_col_to_list0[list][i];
949 h->map_col_to_list0_field[list][2*i] = 2*j;
950 h->map_col_to_list0_field[list][2*i+1] = 2*j+1;
956 static inline void pred_direct_motion(H264Context * const h, int *mb_type){
957 MpegEncContext * const s = &h->s;
958 const int mb_xy = h->mb_xy;
959 const int b8_xy = 2*s->mb_x + 2*s->mb_y*h->b8_stride;
960 const int b4_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
961 const int mb_type_col = h->ref_list[1][0].mb_type[mb_xy];
962 const int16_t (*l1mv0)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[0][b4_xy];
963 const int16_t (*l1mv1)[2] = (const int16_t (*)[2]) &h->ref_list[1][0].motion_val[1][b4_xy];
964 const int8_t *l1ref0 = &h->ref_list[1][0].ref_index[0][b8_xy];
965 const int8_t *l1ref1 = &h->ref_list[1][0].ref_index[1][b8_xy];
966 const int is_b8x8 = IS_8X8(*mb_type);
967 unsigned int sub_mb_type;
968 int i8, i4;
970 #define MB_TYPE_16x16_OR_INTRA (MB_TYPE_16x16|MB_TYPE_INTRA4x4|MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM)
971 if(IS_8X8(mb_type_col) && !h->sps.direct_8x8_inference_flag){
972 /* FIXME save sub mb types from previous frames (or derive from MVs)
973 * so we know exactly what block size to use */
974 sub_mb_type = MB_TYPE_8x8|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_4x4 */
975 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
976 }else if(!is_b8x8 && (mb_type_col & MB_TYPE_16x16_OR_INTRA)){
977 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
978 *mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_16x16 */
979 }else{
980 sub_mb_type = MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2; /* B_SUB_8x8 */
981 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1;
983 if(!is_b8x8)
984 *mb_type |= MB_TYPE_DIRECT2;
985 if(MB_FIELD)
986 *mb_type |= MB_TYPE_INTERLACED;
988 tprintf(s->avctx, "mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
990 if(h->direct_spatial_mv_pred){
991 int ref[2];
992 int mv[2][2];
993 int list;
995 /* FIXME interlacing + spatial direct uses wrong colocated block positions */
997 /* ref = min(neighbors) */
998 for(list=0; list<2; list++){
999 int refa = h->ref_cache[list][scan8[0] - 1];
1000 int refb = h->ref_cache[list][scan8[0] - 8];
1001 int refc = h->ref_cache[list][scan8[0] - 8 + 4];
1002 if(refc == -2)
1003 refc = h->ref_cache[list][scan8[0] - 8 - 1];
1004 ref[list] = refa;
1005 if(ref[list] < 0 || (refb < ref[list] && refb >= 0))
1006 ref[list] = refb;
1007 if(ref[list] < 0 || (refc < ref[list] && refc >= 0))
1008 ref[list] = refc;
1009 if(ref[list] < 0)
1010 ref[list] = -1;
1013 if(ref[0] < 0 && ref[1] < 0){
1014 ref[0] = ref[1] = 0;
1015 mv[0][0] = mv[0][1] =
1016 mv[1][0] = mv[1][1] = 0;
1017 }else{
1018 for(list=0; list<2; list++){
1019 if(ref[list] >= 0)
1020 pred_motion(h, 0, 4, list, ref[list], &mv[list][0], &mv[list][1]);
1021 else
1022 mv[list][0] = mv[list][1] = 0;
1026 if(ref[1] < 0){
1027 if(!is_b8x8)
1028 *mb_type &= ~MB_TYPE_L1;
1029 sub_mb_type &= ~MB_TYPE_L1;
1030 }else if(ref[0] < 0){
1031 if(!is_b8x8)
1032 *mb_type &= ~MB_TYPE_L0;
1033 sub_mb_type &= ~MB_TYPE_L0;
1036 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1037 int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1038 int mb_types_col[2];
1039 int b8_stride = h->b8_stride;
1040 int b4_stride = h->b_stride;
1042 *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
1044 if(IS_INTERLACED(*mb_type)){
1045 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1046 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1047 if(s->mb_y&1){
1048 l1ref0 -= 2*b8_stride;
1049 l1ref1 -= 2*b8_stride;
1050 l1mv0 -= 4*b4_stride;
1051 l1mv1 -= 4*b4_stride;
1053 b8_stride *= 3;
1054 b4_stride *= 6;
1055 }else{
1056 int cur_poc = s->current_picture_ptr->poc;
1057 int *col_poc = h->ref_list[1]->field_poc;
1058 int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
1059 int dy = 2*col_parity - (s->mb_y&1);
1060 mb_types_col[0] =
1061 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
1062 l1ref0 += dy*b8_stride;
1063 l1ref1 += dy*b8_stride;
1064 l1mv0 += 2*dy*b4_stride;
1065 l1mv1 += 2*dy*b4_stride;
1066 b8_stride = 0;
1069 for(i8=0; i8<4; i8++){
1070 int x8 = i8&1;
1071 int y8 = i8>>1;
1072 int xy8 = x8+y8*b8_stride;
1073 int xy4 = 3*x8+y8*b4_stride;
1074 int a=0, b=0;
1076 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1077 continue;
1078 h->sub_mb_type[i8] = sub_mb_type;
1080 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1081 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1082 if(!IS_INTRA(mb_types_col[y8])
1083 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
1084 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
1085 if(ref[0] > 0)
1086 a= pack16to32(mv[0][0],mv[0][1]);
1087 if(ref[1] > 0)
1088 b= pack16to32(mv[1][0],mv[1][1]);
1089 }else{
1090 a= pack16to32(mv[0][0],mv[0][1]);
1091 b= pack16to32(mv[1][0],mv[1][1]);
1093 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
1094 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
1096 }else if(IS_16X16(*mb_type)){
1097 int a=0, b=0;
1099 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
1100 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
1101 if(!IS_INTRA(mb_type_col)
1102 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
1103 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
1104 && (h->x264_build>33 || !h->x264_build)))){
1105 if(ref[0] > 0)
1106 a= pack16to32(mv[0][0],mv[0][1]);
1107 if(ref[1] > 0)
1108 b= pack16to32(mv[1][0],mv[1][1]);
1109 }else{
1110 a= pack16to32(mv[0][0],mv[0][1]);
1111 b= pack16to32(mv[1][0],mv[1][1]);
1113 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
1114 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
1115 }else{
1116 for(i8=0; i8<4; i8++){
1117 const int x8 = i8&1;
1118 const int y8 = i8>>1;
1120 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1121 continue;
1122 h->sub_mb_type[i8] = sub_mb_type;
1124 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
1125 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
1126 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
1127 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
1129 /* col_zero_flag */
1130 if(!IS_INTRA(mb_type_col) && ( l1ref0[x8 + y8*h->b8_stride] == 0
1131 || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
1132 && (h->x264_build>33 || !h->x264_build)))){
1133 const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
1134 if(IS_SUB_8X8(sub_mb_type)){
1135 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1136 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1137 if(ref[0] == 0)
1138 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1139 if(ref[1] == 0)
1140 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1142 }else
1143 for(i4=0; i4<4; i4++){
1144 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1145 if(FFABS(mv_col[0]) <= 1 && FFABS(mv_col[1]) <= 1){
1146 if(ref[0] == 0)
1147 *(uint32_t*)h->mv_cache[0][scan8[i8*4+i4]] = 0;
1148 if(ref[1] == 0)
1149 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] = 0;
1155 }else{ /* direct temporal mv pred */
1156 const int *map_col_to_list0[2] = {h->map_col_to_list0[0], h->map_col_to_list0[1]};
1157 const int *dist_scale_factor = h->dist_scale_factor;
1159 if(FRAME_MBAFF){
1160 if(IS_INTERLACED(*mb_type)){
1161 map_col_to_list0[0] = h->map_col_to_list0_field[0];
1162 map_col_to_list0[1] = h->map_col_to_list0_field[1];
1163 dist_scale_factor = h->dist_scale_factor_field;
1165 if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
1166 /* FIXME assumes direct_8x8_inference == 1 */
1167 const int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
1168 int mb_types_col[2];
1169 int y_shift;
1171 *mb_type = MB_TYPE_8x8|MB_TYPE_L0L1
1172 | (is_b8x8 ? 0 : MB_TYPE_DIRECT2)
1173 | (*mb_type & MB_TYPE_INTERLACED);
1174 sub_mb_type = MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_16x16;
1176 if(IS_INTERLACED(*mb_type)){
1177 /* frame to field scaling */
1178 mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
1179 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1180 if(s->mb_y&1){
1181 l1ref0 -= 2*h->b8_stride;
1182 l1ref1 -= 2*h->b8_stride;
1183 l1mv0 -= 4*h->b_stride;
1184 l1mv1 -= 4*h->b_stride;
1186 y_shift = 0;
1188 if( (mb_types_col[0] & MB_TYPE_16x16_OR_INTRA)
1189 && (mb_types_col[1] & MB_TYPE_16x16_OR_INTRA)
1190 && !is_b8x8)
1191 *mb_type |= MB_TYPE_16x8;
1192 else
1193 *mb_type |= MB_TYPE_8x8;
1194 }else{
1195 /* field to frame scaling */
1196 /* col_mb_y = (mb_y&~1) + (topAbsDiffPOC < bottomAbsDiffPOC ? 0 : 1)
1197 * but in MBAFF, top and bottom POC are equal */
1198 int dy = (s->mb_y&1) ? 1 : 2;
1199 mb_types_col[0] =
1200 mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
1201 l1ref0 += dy*h->b8_stride;
1202 l1ref1 += dy*h->b8_stride;
1203 l1mv0 += 2*dy*h->b_stride;
1204 l1mv1 += 2*dy*h->b_stride;
1205 y_shift = 2;
1207 if((mb_types_col[0] & (MB_TYPE_16x16_OR_INTRA|MB_TYPE_16x8))
1208 && !is_b8x8)
1209 *mb_type |= MB_TYPE_16x16;
1210 else
1211 *mb_type |= MB_TYPE_8x8;
1214 for(i8=0; i8<4; i8++){
1215 const int x8 = i8&1;
1216 const int y8 = i8>>1;
1217 int ref0, scale;
1218 const int16_t (*l1mv)[2]= l1mv0;
1220 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1221 continue;
1222 h->sub_mb_type[i8] = sub_mb_type;
1224 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1225 if(IS_INTRA(mb_types_col[y8])){
1226 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1227 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1228 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1229 continue;
1232 ref0 = l1ref0[x8 + (y8*2>>y_shift)*h->b8_stride];
1233 if(ref0 >= 0)
1234 ref0 = map_col_to_list0[0][ref0*2>>y_shift];
1235 else{
1236 ref0 = map_col_to_list0[1][l1ref1[x8 + (y8*2>>y_shift)*h->b8_stride]*2>>y_shift];
1237 l1mv= l1mv1;
1239 scale = dist_scale_factor[ref0];
1240 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1243 const int16_t *mv_col = l1mv[x8*3 + (y8*6>>y_shift)*h->b_stride];
1244 int my_col = (mv_col[1]<<y_shift)/2;
1245 int mx = (scale * mv_col[0] + 128) >> 8;
1246 int my = (scale * my_col + 128) >> 8;
1247 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1248 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-my_col), 4);
1251 return;
1255 /* one-to-one mv scaling */
1257 if(IS_16X16(*mb_type)){
1258 int ref, mv0, mv1;
1260 fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, 0, 1);
1261 if(IS_INTRA(mb_type_col)){
1262 ref=mv0=mv1=0;
1263 }else{
1264 const int ref0 = l1ref0[0] >= 0 ? map_col_to_list0[0][l1ref0[0]]
1265 : map_col_to_list0[1][l1ref1[0]];
1266 const int scale = dist_scale_factor[ref0];
1267 const int16_t *mv_col = l1ref0[0] >= 0 ? l1mv0[0] : l1mv1[0];
1268 int mv_l0[2];
1269 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1270 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1271 ref= ref0;
1272 mv0= pack16to32(mv_l0[0],mv_l0[1]);
1273 mv1= pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1275 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref, 1);
1276 fill_rectangle(&h-> mv_cache[0][scan8[0]], 4, 4, 8, mv0, 4);
1277 fill_rectangle(&h-> mv_cache[1][scan8[0]], 4, 4, 8, mv1, 4);
1278 }else{
1279 for(i8=0; i8<4; i8++){
1280 const int x8 = i8&1;
1281 const int y8 = i8>>1;
1282 int ref0, scale;
1283 const int16_t (*l1mv)[2]= l1mv0;
1285 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
1286 continue;
1287 h->sub_mb_type[i8] = sub_mb_type;
1288 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
1289 if(IS_INTRA(mb_type_col)){
1290 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, 0, 1);
1291 fill_rectangle(&h-> mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
1292 fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
1293 continue;
1296 ref0 = l1ref0[x8 + y8*h->b8_stride];
1297 if(ref0 >= 0)
1298 ref0 = map_col_to_list0[0][ref0];
1299 else{
1300 ref0 = map_col_to_list0[1][l1ref1[x8 + y8*h->b8_stride]];
1301 l1mv= l1mv1;
1303 scale = dist_scale_factor[ref0];
1305 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
1306 if(IS_SUB_8X8(sub_mb_type)){
1307 const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
1308 int mx = (scale * mv_col[0] + 128) >> 8;
1309 int my = (scale * mv_col[1] + 128) >> 8;
1310 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
1311 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
1312 }else
1313 for(i4=0; i4<4; i4++){
1314 const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
1315 int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
1316 mv_l0[0] = (scale * mv_col[0] + 128) >> 8;
1317 mv_l0[1] = (scale * mv_col[1] + 128) >> 8;
1318 *(uint32_t*)h->mv_cache[1][scan8[i8*4+i4]] =
1319 pack16to32(mv_l0[0]-mv_col[0],mv_l0[1]-mv_col[1]);
1326 static inline void write_back_motion(H264Context *h, int mb_type){
1327 MpegEncContext * const s = &h->s;
1328 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1329 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1330 int list;
1332 if(!USES_LIST(mb_type, 0))
1333 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1335 for(list=0; list<h->list_count; list++){
1336 int y;
1337 if(!USES_LIST(mb_type, list))
1338 continue;
1340 for(y=0; y<4; y++){
1341 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1342 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1344 if( h->pps.cabac ) {
1345 if(IS_SKIP(mb_type))
1346 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1347 else
1348 for(y=0; y<4; y++){
1349 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1350 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1355 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1356 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1357 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1358 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1359 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1363 if(h->slice_type == FF_B_TYPE && h->pps.cabac){
1364 if(IS_8X8(mb_type)){
1365 uint8_t *direct_table = &h->direct_table[b8_xy];
1366 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1367 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1368 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1374 * Decodes a network abstraction layer unit.
1375 * @param consumed is the number of bytes used as input
1376 * @param length is the length of the array
1377 * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
1378 * @returns decoded bytes, might be src+1 if no escapes
1380 static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
1381 int i, si, di;
1382 uint8_t *dst;
1383 int bufidx;
1385 // src[0]&0x80; //forbidden bit
1386 h->nal_ref_idc= src[0]>>5;
1387 h->nal_unit_type= src[0]&0x1F;
1389 src++; length--;
1390 #if 0
1391 for(i=0; i<length; i++)
1392 printf("%2X ", src[i]);
1393 #endif
1394 for(i=0; i+1<length; i+=2){
1395 if(src[i]) continue;
1396 if(i>0 && src[i-1]==0) i--;
1397 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
1398 if(src[i+2]!=3){
1399 /* startcode, so we must be past the end */
1400 length=i;
1402 break;
1406 if(i>=length-1){ //no escaped 0
1407 *dst_length= length;
1408 *consumed= length+1; //+1 for the header
1409 return src;
1412 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
1413 h->rbsp_buffer[bufidx]= av_fast_realloc(h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length);
1414 dst= h->rbsp_buffer[bufidx];
1416 if (dst == NULL){
1417 return NULL;
1420 //printf("decoding esc\n");
1421 si=di=0;
1422 while(si<length){
1423 //remove escapes (very rare 1:2^22)
1424 if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
1425 if(src[si+2]==3){ //escape
1426 dst[di++]= 0;
1427 dst[di++]= 0;
1428 si+=3;
1429 continue;
1430 }else //next start code
1431 break;
1434 dst[di++]= src[si++];
1437 *dst_length= di;
1438 *consumed= si + 1;//+1 for the header
1439 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
1440 return dst;
1444 * identifies the exact end of the bitstream
1445 * @return the length of the trailing, or 0 if damaged
1447 static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
1448 int v= *src;
1449 int r;
1451 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
1453 for(r=1; r<9; r++){
1454 if(v&1) return r;
1455 v>>=1;
1457 return 0;
1461 * idct tranforms the 16 dc values and dequantize them.
1462 * @param qp quantization parameter
1464 static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1465 #define stride 16
1466 int i;
1467 int temp[16]; //FIXME check if this is a good idea
1468 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1469 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1471 //memset(block, 64, 2*256);
1472 //return;
1473 for(i=0; i<4; i++){
1474 const int offset= y_offset[i];
1475 const int z0= block[offset+stride*0] + block[offset+stride*4];
1476 const int z1= block[offset+stride*0] - block[offset+stride*4];
1477 const int z2= block[offset+stride*1] - block[offset+stride*5];
1478 const int z3= block[offset+stride*1] + block[offset+stride*5];
1480 temp[4*i+0]= z0+z3;
1481 temp[4*i+1]= z1+z2;
1482 temp[4*i+2]= z1-z2;
1483 temp[4*i+3]= z0-z3;
1486 for(i=0; i<4; i++){
1487 const int offset= x_offset[i];
1488 const int z0= temp[4*0+i] + temp[4*2+i];
1489 const int z1= temp[4*0+i] - temp[4*2+i];
1490 const int z2= temp[4*1+i] - temp[4*3+i];
1491 const int z3= temp[4*1+i] + temp[4*3+i];
1493 block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
1494 block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
1495 block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
1496 block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
1500 #if 0
1502 * dct tranforms the 16 dc values.
1503 * @param qp quantization parameter ??? FIXME
1505 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
1506 // const int qmul= dequant_coeff[qp][0];
1507 int i;
1508 int temp[16]; //FIXME check if this is a good idea
1509 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
1510 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
1512 for(i=0; i<4; i++){
1513 const int offset= y_offset[i];
1514 const int z0= block[offset+stride*0] + block[offset+stride*4];
1515 const int z1= block[offset+stride*0] - block[offset+stride*4];
1516 const int z2= block[offset+stride*1] - block[offset+stride*5];
1517 const int z3= block[offset+stride*1] + block[offset+stride*5];
1519 temp[4*i+0]= z0+z3;
1520 temp[4*i+1]= z1+z2;
1521 temp[4*i+2]= z1-z2;
1522 temp[4*i+3]= z0-z3;
1525 for(i=0; i<4; i++){
1526 const int offset= x_offset[i];
1527 const int z0= temp[4*0+i] + temp[4*2+i];
1528 const int z1= temp[4*0+i] - temp[4*2+i];
1529 const int z2= temp[4*1+i] - temp[4*3+i];
1530 const int z3= temp[4*1+i] + temp[4*3+i];
1532 block[stride*0 +offset]= (z0 + z3)>>1;
1533 block[stride*2 +offset]= (z1 + z2)>>1;
1534 block[stride*8 +offset]= (z1 - z2)>>1;
1535 block[stride*10+offset]= (z0 - z3)>>1;
1538 #endif
1540 #undef xStride
1541 #undef stride
1543 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
1544 const int stride= 16*2;
1545 const int xStride= 16;
1546 int a,b,c,d,e;
1548 a= block[stride*0 + xStride*0];
1549 b= block[stride*0 + xStride*1];
1550 c= block[stride*1 + xStride*0];
1551 d= block[stride*1 + xStride*1];
1553 e= a-b;
1554 a= a+b;
1555 b= c-d;
1556 c= c+d;
1558 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
1559 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
1560 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
1561 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
1564 #if 0
1565 static void chroma_dc_dct_c(DCTELEM *block){
1566 const int stride= 16*2;
1567 const int xStride= 16;
1568 int a,b,c,d,e;
1570 a= block[stride*0 + xStride*0];
1571 b= block[stride*0 + xStride*1];
1572 c= block[stride*1 + xStride*0];
1573 d= block[stride*1 + xStride*1];
1575 e= a-b;
1576 a= a+b;
1577 b= c-d;
1578 c= c+d;
1580 block[stride*0 + xStride*0]= (a+c);
1581 block[stride*0 + xStride*1]= (e+b);
1582 block[stride*1 + xStride*0]= (a-c);
1583 block[stride*1 + xStride*1]= (e-b);
1585 #endif
1588 * gets the chroma qp.
1590 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
1591 return h->pps.chroma_qp_table[t][qscale & 0xff];
1594 //FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
1595 //FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
1596 static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
1597 int i;
1598 const int * const quant_table= quant_coeff[qscale];
1599 const int bias= intra ? (1<<QUANT_SHIFT)/3 : (1<<QUANT_SHIFT)/6;
1600 const unsigned int threshold1= (1<<QUANT_SHIFT) - bias - 1;
1601 const unsigned int threshold2= (threshold1<<1);
1602 int last_non_zero;
1604 if(separate_dc){
1605 if(qscale<=18){
1606 //avoid overflows
1607 const int dc_bias= intra ? (1<<(QUANT_SHIFT-2))/3 : (1<<(QUANT_SHIFT-2))/6;
1608 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT-2)) - dc_bias - 1;
1609 const unsigned int dc_threshold2= (dc_threshold1<<1);
1611 int level= block[0]*quant_coeff[qscale+18][0];
1612 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1613 if(level>0){
1614 level= (dc_bias + level)>>(QUANT_SHIFT-2);
1615 block[0]= level;
1616 }else{
1617 level= (dc_bias - level)>>(QUANT_SHIFT-2);
1618 block[0]= -level;
1620 // last_non_zero = i;
1621 }else{
1622 block[0]=0;
1624 }else{
1625 const int dc_bias= intra ? (1<<(QUANT_SHIFT+1))/3 : (1<<(QUANT_SHIFT+1))/6;
1626 const unsigned int dc_threshold1= (1<<(QUANT_SHIFT+1)) - dc_bias - 1;
1627 const unsigned int dc_threshold2= (dc_threshold1<<1);
1629 int level= block[0]*quant_table[0];
1630 if(((unsigned)(level+dc_threshold1))>dc_threshold2){
1631 if(level>0){
1632 level= (dc_bias + level)>>(QUANT_SHIFT+1);
1633 block[0]= level;
1634 }else{
1635 level= (dc_bias - level)>>(QUANT_SHIFT+1);
1636 block[0]= -level;
1638 // last_non_zero = i;
1639 }else{
1640 block[0]=0;
1643 last_non_zero= 0;
1644 i=1;
1645 }else{
1646 last_non_zero= -1;
1647 i=0;
1650 for(; i<16; i++){
1651 const int j= scantable[i];
1652 int level= block[j]*quant_table[j];
1654 // if( bias+level >= (1<<(QMAT_SHIFT - 3))
1655 // || bias-level >= (1<<(QMAT_SHIFT - 3))){
1656 if(((unsigned)(level+threshold1))>threshold2){
1657 if(level>0){
1658 level= (bias + level)>>QUANT_SHIFT;
1659 block[j]= level;
1660 }else{
1661 level= (bias - level)>>QUANT_SHIFT;
1662 block[j]= -level;
1664 last_non_zero = i;
1665 }else{
1666 block[j]=0;
1670 return last_non_zero;
1673 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
1674 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1675 int src_x_offset, int src_y_offset,
1676 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
1677 MpegEncContext * const s = &h->s;
1678 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
1679 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
1680 const int luma_xy= (mx&3) + ((my&3)<<2);
1681 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
1682 uint8_t * src_cb, * src_cr;
1683 int extra_width= h->emu_edge_width;
1684 int extra_height= h->emu_edge_height;
1685 int emu=0;
1686 const int full_mx= mx>>2;
1687 const int full_my= my>>2;
1688 const int pic_width = 16*s->mb_width;
1689 const int pic_height = 16*s->mb_height >> MB_FIELD;
1691 if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
1692 return;
1694 if(mx&7) extra_width -= 3;
1695 if(my&7) extra_height -= 3;
1697 if( full_mx < 0-extra_width
1698 || full_my < 0-extra_height
1699 || full_mx + 16/*FIXME*/ > pic_width + extra_width
1700 || full_my + 16/*FIXME*/ > pic_height + extra_height){
1701 ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
1702 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
1703 emu=1;
1706 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
1707 if(!square){
1708 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
1711 if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
1713 if(MB_FIELD){
1714 // chroma offset when predicting from a field of opposite parity
1715 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
1716 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
1718 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1719 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
1721 if(emu){
1722 ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1723 src_cb= s->edge_emu_buffer;
1725 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1727 if(emu){
1728 ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
1729 src_cr= s->edge_emu_buffer;
1731 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
1734 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
1735 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1736 int x_offset, int y_offset,
1737 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1738 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1739 int list0, int list1){
1740 MpegEncContext * const s = &h->s;
1741 qpel_mc_func *qpix_op= qpix_put;
1742 h264_chroma_mc_func chroma_op= chroma_put;
1744 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1745 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1746 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1747 x_offset += 8*s->mb_x;
1748 y_offset += 8*(s->mb_y >> MB_FIELD);
1750 if(list0){
1751 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
1752 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
1753 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1754 qpix_op, chroma_op);
1756 qpix_op= qpix_avg;
1757 chroma_op= chroma_avg;
1760 if(list1){
1761 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
1762 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
1763 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1764 qpix_op, chroma_op);
1768 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
1769 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1770 int x_offset, int y_offset,
1771 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1772 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
1773 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
1774 int list0, int list1){
1775 MpegEncContext * const s = &h->s;
1777 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
1778 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
1779 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
1780 x_offset += 8*s->mb_x;
1781 y_offset += 8*(s->mb_y >> MB_FIELD);
1783 if(list0 && list1){
1784 /* don't optimize for luma-only case, since B-frames usually
1785 * use implicit weights => chroma too. */
1786 uint8_t *tmp_cb = s->obmc_scratchpad;
1787 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
1788 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
1789 int refn0 = h->ref_cache[0][ scan8[n] ];
1790 int refn1 = h->ref_cache[1][ scan8[n] ];
1792 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
1793 dest_y, dest_cb, dest_cr,
1794 x_offset, y_offset, qpix_put, chroma_put);
1795 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
1796 tmp_y, tmp_cb, tmp_cr,
1797 x_offset, y_offset, qpix_put, chroma_put);
1799 if(h->use_weight == 2){
1800 int weight0 = h->implicit_weight[refn0][refn1];
1801 int weight1 = 64 - weight0;
1802 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
1803 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
1804 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
1805 }else{
1806 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
1807 h->luma_weight[0][refn0], h->luma_weight[1][refn1],
1808 h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
1809 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1810 h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
1811 h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
1812 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1813 h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
1814 h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
1816 }else{
1817 int list = list1 ? 1 : 0;
1818 int refn = h->ref_cache[list][ scan8[n] ];
1819 Picture *ref= &h->ref_list[list][refn];
1820 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
1821 dest_y, dest_cb, dest_cr, x_offset, y_offset,
1822 qpix_put, chroma_put);
1824 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
1825 h->luma_weight[list][refn], h->luma_offset[list][refn]);
1826 if(h->use_weight_chroma){
1827 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1828 h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
1829 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
1830 h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
1835 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
1836 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1837 int x_offset, int y_offset,
1838 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
1839 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
1840 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
1841 int list0, int list1){
1842 if((h->use_weight==2 && list0 && list1
1843 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
1844 || h->use_weight==1)
1845 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1846 x_offset, y_offset, qpix_put, chroma_put,
1847 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
1848 else
1849 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
1850 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
1853 static inline void prefetch_motion(H264Context *h, int list){
1854 /* fetch pixels for estimated mv 4 macroblocks ahead
1855 * optimized for 64byte cache lines */
1856 MpegEncContext * const s = &h->s;
1857 const int refn = h->ref_cache[list][scan8[0]];
1858 if(refn >= 0){
1859 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
1860 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
1861 uint8_t **src= h->ref_list[list][refn].data;
1862 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
1863 s->dsp.prefetch(src[0]+off, s->linesize, 4);
1864 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
1865 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
1869 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
1870 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
1871 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
1872 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
1873 MpegEncContext * const s = &h->s;
1874 const int mb_xy= h->mb_xy;
1875 const int mb_type= s->current_picture.mb_type[mb_xy];
1877 assert(IS_INTER(mb_type));
1879 prefetch_motion(h, 0);
1881 if(IS_16X16(mb_type)){
1882 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
1883 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
1884 &weight_op[0], &weight_avg[0],
1885 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1886 }else if(IS_16X8(mb_type)){
1887 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
1888 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1889 &weight_op[1], &weight_avg[1],
1890 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1891 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
1892 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
1893 &weight_op[1], &weight_avg[1],
1894 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1895 }else if(IS_8X16(mb_type)){
1896 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
1897 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1898 &weight_op[2], &weight_avg[2],
1899 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
1900 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
1901 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1902 &weight_op[2], &weight_avg[2],
1903 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
1904 }else{
1905 int i;
1907 assert(IS_8X8(mb_type));
1909 for(i=0; i<4; i++){
1910 const int sub_mb_type= h->sub_mb_type[i];
1911 const int n= 4*i;
1912 int x_offset= (i&1)<<2;
1913 int y_offset= (i&2)<<1;
1915 if(IS_SUB_8X8(sub_mb_type)){
1916 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1917 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
1918 &weight_op[3], &weight_avg[3],
1919 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1920 }else if(IS_SUB_8X4(sub_mb_type)){
1921 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1922 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1923 &weight_op[4], &weight_avg[4],
1924 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1925 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
1926 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
1927 &weight_op[4], &weight_avg[4],
1928 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1929 }else if(IS_SUB_4X8(sub_mb_type)){
1930 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
1931 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1932 &weight_op[5], &weight_avg[5],
1933 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1934 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
1935 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1936 &weight_op[5], &weight_avg[5],
1937 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1938 }else{
1939 int j;
1940 assert(IS_SUB_4X4(sub_mb_type));
1941 for(j=0; j<4; j++){
1942 int sub_x_offset= x_offset + 2*(j&1);
1943 int sub_y_offset= y_offset + (j&2);
1944 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
1945 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
1946 &weight_op[6], &weight_avg[6],
1947 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
1953 prefetch_motion(h, 1);
1956 static av_cold void decode_init_vlc(void){
1957 static int done = 0;
1959 if (!done) {
1960 int i;
1961 done = 1;
1963 init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
1964 &chroma_dc_coeff_token_len [0], 1, 1,
1965 &chroma_dc_coeff_token_bits[0], 1, 1, 1);
1967 for(i=0; i<4; i++){
1968 init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
1969 &coeff_token_len [i][0], 1, 1,
1970 &coeff_token_bits[i][0], 1, 1, 1);
1973 for(i=0; i<3; i++){
1974 init_vlc(&chroma_dc_total_zeros_vlc[i], CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 4,
1975 &chroma_dc_total_zeros_len [i][0], 1, 1,
1976 &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
1978 for(i=0; i<15; i++){
1979 init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
1980 &total_zeros_len [i][0], 1, 1,
1981 &total_zeros_bits[i][0], 1, 1, 1);
1984 for(i=0; i<6; i++){
1985 init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
1986 &run_len [i][0], 1, 1,
1987 &run_bits[i][0], 1, 1, 1);
1989 init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
1990 &run_len [6][0], 1, 1,
1991 &run_bits[6][0], 1, 1, 1);
1995 static void free_tables(H264Context *h){
1996 int i;
1997 H264Context *hx;
1998 av_freep(&h->intra4x4_pred_mode);
1999 av_freep(&h->chroma_pred_mode_table);
2000 av_freep(&h->cbp_table);
2001 av_freep(&h->mvd_table[0]);
2002 av_freep(&h->mvd_table[1]);
2003 av_freep(&h->direct_table);
2004 av_freep(&h->non_zero_count);
2005 av_freep(&h->slice_table_base);
2006 h->slice_table= NULL;
2008 av_freep(&h->mb2b_xy);
2009 av_freep(&h->mb2b8_xy);
2011 for(i = 0; i < MAX_SPS_COUNT; i++)
2012 av_freep(h->sps_buffers + i);
2014 for(i = 0; i < MAX_PPS_COUNT; i++)
2015 av_freep(h->pps_buffers + i);
2017 for(i = 0; i < h->s.avctx->thread_count; i++) {
2018 hx = h->thread_context[i];
2019 if(!hx) continue;
2020 av_freep(&hx->top_borders[1]);
2021 av_freep(&hx->top_borders[0]);
2022 av_freep(&hx->s.obmc_scratchpad);
2026 static void init_dequant8_coeff_table(H264Context *h){
2027 int i,q,x;
2028 const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
2029 h->dequant8_coeff[0] = h->dequant8_buffer[0];
2030 h->dequant8_coeff[1] = h->dequant8_buffer[1];
2032 for(i=0; i<2; i++ ){
2033 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
2034 h->dequant8_coeff[1] = h->dequant8_buffer[0];
2035 break;
2038 for(q=0; q<52; q++){
2039 int shift = ff_div6[q];
2040 int idx = ff_rem6[q];
2041 for(x=0; x<64; x++)
2042 h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
2043 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
2044 h->pps.scaling_matrix8[i][x]) << shift;
2049 static void init_dequant4_coeff_table(H264Context *h){
2050 int i,j,q,x;
2051 const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
2052 for(i=0; i<6; i++ ){
2053 h->dequant4_coeff[i] = h->dequant4_buffer[i];
2054 for(j=0; j<i; j++){
2055 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
2056 h->dequant4_coeff[i] = h->dequant4_buffer[j];
2057 break;
2060 if(j<i)
2061 continue;
2063 for(q=0; q<52; q++){
2064 int shift = ff_div6[q] + 2;
2065 int idx = ff_rem6[q];
2066 for(x=0; x<16; x++)
2067 h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
2068 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
2069 h->pps.scaling_matrix4[i][x]) << shift;
2074 static void init_dequant_tables(H264Context *h){
2075 int i,x;
2076 init_dequant4_coeff_table(h);
2077 if(h->pps.transform_8x8_mode)
2078 init_dequant8_coeff_table(h);
2079 if(h->sps.transform_bypass){
2080 for(i=0; i<6; i++)
2081 for(x=0; x<16; x++)
2082 h->dequant4_coeff[i][0][x] = 1<<6;
2083 if(h->pps.transform_8x8_mode)
2084 for(i=0; i<2; i++)
2085 for(x=0; x<64; x++)
2086 h->dequant8_coeff[i][0][x] = 1<<6;
2092 * allocates tables.
2093 * needs width/height
2095 static int alloc_tables(H264Context *h){
2096 MpegEncContext * const s = &h->s;
2097 const int big_mb_num= s->mb_stride * (s->mb_height+1);
2098 int x,y;
2100 CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t))
2102 CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
2103 CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
2104 CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
2106 CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
2107 CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
2108 CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
2109 CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
2111 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
2112 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
2114 CHECKED_ALLOCZ(h->mb2b_xy , big_mb_num * sizeof(uint32_t));
2115 CHECKED_ALLOCZ(h->mb2b8_xy , big_mb_num * sizeof(uint32_t));
2116 for(y=0; y<s->mb_height; y++){
2117 for(x=0; x<s->mb_width; x++){
2118 const int mb_xy= x + y*s->mb_stride;
2119 const int b_xy = 4*x + 4*y*h->b_stride;
2120 const int b8_xy= 2*x + 2*y*h->b8_stride;
2122 h->mb2b_xy [mb_xy]= b_xy;
2123 h->mb2b8_xy[mb_xy]= b8_xy;
2127 s->obmc_scratchpad = NULL;
2129 if(!h->dequant4_coeff[0])
2130 init_dequant_tables(h);
2132 return 0;
2133 fail:
2134 free_tables(h);
2135 return -1;
2139 * Mimic alloc_tables(), but for every context thread.
2141 static void clone_tables(H264Context *dst, H264Context *src){
2142 dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
2143 dst->non_zero_count = src->non_zero_count;
2144 dst->slice_table = src->slice_table;
2145 dst->cbp_table = src->cbp_table;
2146 dst->mb2b_xy = src->mb2b_xy;
2147 dst->mb2b8_xy = src->mb2b8_xy;
2148 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
2149 dst->mvd_table[0] = src->mvd_table[0];
2150 dst->mvd_table[1] = src->mvd_table[1];
2151 dst->direct_table = src->direct_table;
2153 dst->s.obmc_scratchpad = NULL;
2154 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
2158 * Init context
2159 * Allocate buffers which are not shared amongst multiple threads.
2161 static int context_init(H264Context *h){
2162 CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2163 CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
2165 return 0;
2166 fail:
2167 return -1; // free_tables will clean up for us
2170 static av_cold void common_init(H264Context *h){
2171 MpegEncContext * const s = &h->s;
2173 s->width = s->avctx->width;
2174 s->height = s->avctx->height;
2175 s->codec_id= s->avctx->codec->id;
2177 ff_h264_pred_init(&h->hpc, s->codec_id);
2179 h->dequant_coeff_pps= -1;
2180 s->unrestricted_mv=1;
2181 s->decode=1; //FIXME
2183 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
2184 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
2187 static av_cold int decode_init(AVCodecContext *avctx){
2188 H264Context *h= avctx->priv_data;
2189 MpegEncContext * const s = &h->s;
2191 MPV_decode_defaults(s);
2193 s->avctx = avctx;
2194 common_init(h);
2196 s->out_format = FMT_H264;
2197 s->workaround_bugs= avctx->workaround_bugs;
2199 // set defaults
2200 // s->decode_mb= ff_h263_decode_mb;
2201 s->quarter_sample = 1;
2202 s->low_delay= 1;
2203 avctx->pix_fmt= PIX_FMT_YUV420P;
2205 decode_init_vlc();
2207 if(avctx->extradata_size > 0 && avctx->extradata &&
2208 *(char *)avctx->extradata == 1){
2209 h->is_avc = 1;
2210 h->got_avcC = 0;
2211 } else {
2212 h->is_avc = 0;
2215 h->thread_context[0] = h;
2216 return 0;
2219 static int frame_start(H264Context *h){
2220 MpegEncContext * const s = &h->s;
2221 int i;
2223 if(MPV_frame_start(s, s->avctx) < 0)
2224 return -1;
2225 ff_er_frame_start(s);
2227 * MPV_frame_start uses pict_type to derive key_frame.
2228 * This is incorrect for H.264; IDR markings must be used.
2229 * Zero here; IDR markings per slice in frame or fields are OR'd in later.
2230 * See decode_nal_units().
2232 s->current_picture_ptr->key_frame= 0;
2234 assert(s->linesize && s->uvlinesize);
2236 for(i=0; i<16; i++){
2237 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
2238 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
2240 for(i=0; i<4; i++){
2241 h->block_offset[16+i]=
2242 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2243 h->block_offset[24+16+i]=
2244 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
2247 /* can't be in alloc_tables because linesize isn't known there.
2248 * FIXME: redo bipred weight to not require extra buffer? */
2249 for(i = 0; i < s->avctx->thread_count; i++)
2250 if(!h->thread_context[i]->s.obmc_scratchpad)
2251 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
2253 /* some macroblocks will be accessed before they're available */
2254 if(FRAME_MBAFF || s->avctx->thread_count > 1)
2255 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
2257 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
2258 return 0;
2261 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
2262 MpegEncContext * const s = &h->s;
2263 int i;
2265 src_y -= linesize;
2266 src_cb -= uvlinesize;
2267 src_cr -= uvlinesize;
2269 // There are two lines saved, the line above the the top macroblock of a pair,
2270 // and the line above the bottom macroblock
2271 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2272 for(i=1; i<17; i++){
2273 h->left_border[i]= src_y[15+i* linesize];
2276 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
2277 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
2279 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2280 h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
2281 h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
2282 for(i=1; i<9; i++){
2283 h->left_border[i+17 ]= src_cb[7+i*uvlinesize];
2284 h->left_border[i+17+9]= src_cr[7+i*uvlinesize];
2286 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+8*uvlinesize);
2287 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+8*uvlinesize);
2291 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
2292 MpegEncContext * const s = &h->s;
2293 int temp8, i;
2294 uint64_t temp64;
2295 int deblock_left;
2296 int deblock_top;
2297 int mb_xy;
2299 if(h->deblocking_filter == 2) {
2300 mb_xy = h->mb_xy;
2301 deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
2302 deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
2303 } else {
2304 deblock_left = (s->mb_x > 0);
2305 deblock_top = (s->mb_y > 0);
2308 src_y -= linesize + 1;
2309 src_cb -= uvlinesize + 1;
2310 src_cr -= uvlinesize + 1;
2312 #define XCHG(a,b,t,xchg)\
2313 t= a;\
2314 if(xchg)\
2315 a= b;\
2316 b= t;
2318 if(deblock_left){
2319 for(i = !deblock_top; i<17; i++){
2320 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2324 if(deblock_top){
2325 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2326 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2327 if(s->mb_x+1 < s->mb_width){
2328 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2332 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2333 if(deblock_left){
2334 for(i = !deblock_top; i<9; i++){
2335 XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
2336 XCHG(h->left_border[i+17+9], src_cr[i*uvlinesize], temp8, xchg);
2339 if(deblock_top){
2340 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2341 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2346 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
2347 MpegEncContext * const s = &h->s;
2348 int i;
2350 src_y -= 2 * linesize;
2351 src_cb -= 2 * uvlinesize;
2352 src_cr -= 2 * uvlinesize;
2354 // There are two lines saved, the line above the the top macroblock of a pair,
2355 // and the line above the bottom macroblock
2356 h->left_border[0]= h->top_borders[0][s->mb_x][15];
2357 h->left_border[1]= h->top_borders[1][s->mb_x][15];
2358 for(i=2; i<34; i++){
2359 h->left_border[i]= src_y[15+i* linesize];
2362 *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 32*linesize);
2363 *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
2364 *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
2365 *(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
2367 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2368 h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
2369 h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
2370 h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
2371 h->left_border[34+18+1]= h->top_borders[1][s->mb_x][24+7];
2372 for(i=2; i<18; i++){
2373 h->left_border[i+34 ]= src_cb[7+i*uvlinesize];
2374 h->left_border[i+34+18]= src_cr[7+i*uvlinesize];
2376 *(uint64_t*)(h->top_borders[0][s->mb_x]+16)= *(uint64_t*)(src_cb+16*uvlinesize);
2377 *(uint64_t*)(h->top_borders[0][s->mb_x]+24)= *(uint64_t*)(src_cr+16*uvlinesize);
2378 *(uint64_t*)(h->top_borders[1][s->mb_x]+16)= *(uint64_t*)(src_cb+17*uvlinesize);
2379 *(uint64_t*)(h->top_borders[1][s->mb_x]+24)= *(uint64_t*)(src_cr+17*uvlinesize);
2383 static inline void xchg_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg){
2384 MpegEncContext * const s = &h->s;
2385 int temp8, i;
2386 uint64_t temp64;
2387 int deblock_left = (s->mb_x > 0);
2388 int deblock_top = (s->mb_y > 1);
2390 tprintf(s->avctx, "xchg_pair_border: src_y:%p src_cb:%p src_cr:%p ls:%d uvls:%d\n", src_y, src_cb, src_cr, linesize, uvlinesize);
2392 src_y -= 2 * linesize + 1;
2393 src_cb -= 2 * uvlinesize + 1;
2394 src_cr -= 2 * uvlinesize + 1;
2396 #define XCHG(a,b,t,xchg)\
2397 t= a;\
2398 if(xchg)\
2399 a= b;\
2400 b= t;
2402 if(deblock_left){
2403 for(i = (!deblock_top)<<1; i<34; i++){
2404 XCHG(h->left_border[i ], src_y [i* linesize], temp8, xchg);
2408 if(deblock_top){
2409 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+0), *(uint64_t*)(src_y +1), temp64, xchg);
2410 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+8), *(uint64_t*)(src_y +9), temp64, 1);
2411 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+0), *(uint64_t*)(src_y +1 +linesize), temp64, xchg);
2412 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+8), *(uint64_t*)(src_y +9 +linesize), temp64, 1);
2413 if(s->mb_x+1 < s->mb_width){
2414 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x+1]), *(uint64_t*)(src_y +17), temp64, 1);
2415 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x+1]), *(uint64_t*)(src_y +17 +linesize), temp64, 1);
2419 if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2420 if(deblock_left){
2421 for(i = (!deblock_top) << 1; i<18; i++){
2422 XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
2423 XCHG(h->left_border[i+34+18], src_cr[i*uvlinesize], temp8, xchg);
2426 if(deblock_top){
2427 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+16), *(uint64_t*)(src_cb+1), temp64, 1);
2428 XCHG(*(uint64_t*)(h->top_borders[0][s->mb_x]+24), *(uint64_t*)(src_cr+1), temp64, 1);
2429 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+16), *(uint64_t*)(src_cb+1 +uvlinesize), temp64, 1);
2430 XCHG(*(uint64_t*)(h->top_borders[1][s->mb_x]+24), *(uint64_t*)(src_cr+1 +uvlinesize), temp64, 1);
2435 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
2436 MpegEncContext * const s = &h->s;
2437 const int mb_x= s->mb_x;
2438 const int mb_y= s->mb_y;
2439 const int mb_xy= h->mb_xy;
2440 const int mb_type= s->current_picture.mb_type[mb_xy];
2441 uint8_t *dest_y, *dest_cb, *dest_cr;
2442 int linesize, uvlinesize /*dct_offset*/;
2443 int i;
2444 int *block_offset = &h->block_offset[0];
2445 const unsigned int bottom = mb_y & 1;
2446 const int transform_bypass = (s->qscale == 0 && h->sps.transform_bypass), is_h264 = (simple || s->codec_id == CODEC_ID_H264);
2447 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
2448 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
2450 dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2451 dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2452 dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2454 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
2455 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
2457 if (!simple && MB_FIELD) {
2458 linesize = h->mb_linesize = s->linesize * 2;
2459 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2460 block_offset = &h->block_offset[24];
2461 if(mb_y&1){ //FIXME move out of this func?
2462 dest_y -= s->linesize*15;
2463 dest_cb-= s->uvlinesize*7;
2464 dest_cr-= s->uvlinesize*7;
2466 if(FRAME_MBAFF) {
2467 int list;
2468 for(list=0; list<h->list_count; list++){
2469 if(!USES_LIST(mb_type, list))
2470 continue;
2471 if(IS_16X16(mb_type)){
2472 int8_t *ref = &h->ref_cache[list][scan8[0]];
2473 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
2474 }else{
2475 for(i=0; i<16; i+=4){
2476 //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
2477 int ref = h->ref_cache[list][scan8[i]];
2478 if(ref >= 0)
2479 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
2484 } else {
2485 linesize = h->mb_linesize = s->linesize;
2486 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2487 // dct_offset = s->linesize * 16;
2490 if(transform_bypass){
2491 idct_dc_add =
2492 idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
2493 }else if(IS_8x8DCT(mb_type)){
2494 idct_dc_add = s->dsp.h264_idct8_dc_add;
2495 idct_add = s->dsp.h264_idct8_add;
2496 }else{
2497 idct_dc_add = s->dsp.h264_idct_dc_add;
2498 idct_add = s->dsp.h264_idct_add;
2501 if(!simple && FRAME_MBAFF && h->deblocking_filter && IS_INTRA(mb_type)
2502 && (!bottom || !IS_INTRA(s->current_picture.mb_type[mb_xy-s->mb_stride]))){
2503 int mbt_y = mb_y&~1;
2504 uint8_t *top_y = s->current_picture.data[0] + (mbt_y * 16* s->linesize ) + mb_x * 16;
2505 uint8_t *top_cb = s->current_picture.data[1] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2506 uint8_t *top_cr = s->current_picture.data[2] + (mbt_y * 8 * s->uvlinesize) + mb_x * 8;
2507 xchg_pair_border(h, top_y, top_cb, top_cr, s->linesize, s->uvlinesize, 1);
2510 if (!simple && IS_INTRA_PCM(mb_type)) {
2511 unsigned int x, y;
2513 // The pixels are stored in h->mb array in the same order as levels,
2514 // copy them in output in the correct order.
2515 for(i=0; i<16; i++) {
2516 for (y=0; y<4; y++) {
2517 for (x=0; x<4; x++) {
2518 *(dest_y + block_offset[i] + y*linesize + x) = h->mb[i*16+y*4+x];
2522 for(i=16; i<16+4; i++) {
2523 for (y=0; y<4; y++) {
2524 for (x=0; x<4; x++) {
2525 *(dest_cb + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2529 for(i=20; i<20+4; i++) {
2530 for (y=0; y<4; y++) {
2531 for (x=0; x<4; x++) {
2532 *(dest_cr + block_offset[i] + y*uvlinesize + x) = h->mb[i*16+y*4+x];
2536 } else {
2537 if(IS_INTRA(mb_type)){
2538 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2539 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
2541 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2542 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
2543 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
2546 if(IS_INTRA4x4(mb_type)){
2547 if(simple || !s->encoding){
2548 if(IS_8x8DCT(mb_type)){
2549 for(i=0; i<16; i+=4){
2550 uint8_t * const ptr= dest_y + block_offset[i];
2551 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2552 const int nnz = h->non_zero_count_cache[ scan8[i] ];
2553 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
2554 (h->topright_samples_available<<i)&0x4000, linesize);
2555 if(nnz){
2556 if(nnz == 1 && h->mb[i*16])
2557 idct_dc_add(ptr, h->mb + i*16, linesize);
2558 else
2559 idct_add(ptr, h->mb + i*16, linesize);
2562 }else
2563 for(i=0; i<16; i++){
2564 uint8_t * const ptr= dest_y + block_offset[i];
2565 uint8_t *topright;
2566 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
2567 int nnz, tr;
2569 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
2570 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
2571 assert(mb_y || linesize <= block_offset[i]);
2572 if(!topright_avail){
2573 tr= ptr[3 - linesize]*0x01010101;
2574 topright= (uint8_t*) &tr;
2575 }else
2576 topright= ptr + 4 - linesize;
2577 }else
2578 topright= NULL;
2580 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
2581 nnz = h->non_zero_count_cache[ scan8[i] ];
2582 if(nnz){
2583 if(is_h264){
2584 if(nnz == 1 && h->mb[i*16])
2585 idct_dc_add(ptr, h->mb + i*16, linesize);
2586 else
2587 idct_add(ptr, h->mb + i*16, linesize);
2588 }else
2589 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
2593 }else{
2594 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
2595 if(is_h264){
2596 if(!transform_bypass)
2597 h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
2598 }else
2599 svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
2601 if(h->deblocking_filter && (simple || !FRAME_MBAFF))
2602 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
2603 }else if(is_h264){
2604 hl_motion(h, dest_y, dest_cb, dest_cr,
2605 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
2606 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
2607 s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
2611 if(!IS_INTRA4x4(mb_type)){
2612 if(is_h264){
2613 if(IS_INTRA16x16(mb_type)){
2614 for(i=0; i<16; i++){
2615 if(h->non_zero_count_cache[ scan8[i] ])
2616 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2617 else if(h->mb[i*16])
2618 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2620 }else{
2621 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
2622 for(i=0; i<16; i+=di){
2623 int nnz = h->non_zero_count_cache[ scan8[i] ];
2624 if(nnz){
2625 if(nnz==1 && h->mb[i*16])
2626 idct_dc_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2627 else
2628 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
2632 }else{
2633 for(i=0; i<16; i++){
2634 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
2635 uint8_t * const ptr= dest_y + block_offset[i];
2636 svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
2642 if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
2643 uint8_t *dest[2] = {dest_cb, dest_cr};
2644 if(transform_bypass){
2645 idct_add = idct_dc_add = s->dsp.add_pixels4;
2646 }else{
2647 idct_add = s->dsp.h264_idct_add;
2648 idct_dc_add = s->dsp.h264_idct_dc_add;
2649 chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
2650 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
2652 if(is_h264){
2653 for(i=16; i<16+8; i++){
2654 if(h->non_zero_count_cache[ scan8[i] ])
2655 idct_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2656 else if(h->mb[i*16])
2657 idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
2659 }else{
2660 for(i=16; i<16+8; i++){
2661 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
2662 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
2663 svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
2669 if(h->deblocking_filter) {
2670 if (!simple && FRAME_MBAFF) {
2671 //FIXME try deblocking one mb at a time?
2672 // the reduction in load/storing mvs and such might outweigh the extra backup/xchg_border
2673 const int mb_y = s->mb_y - 1;
2674 uint8_t *pair_dest_y, *pair_dest_cb, *pair_dest_cr;
2675 const int mb_xy= mb_x + mb_y*s->mb_stride;
2676 const int mb_type_top = s->current_picture.mb_type[mb_xy];
2677 const int mb_type_bottom= s->current_picture.mb_type[mb_xy+s->mb_stride];
2678 if (!bottom) return;
2679 pair_dest_y = s->current_picture.data[0] + (mb_y * 16* s->linesize ) + mb_x * 16;
2680 pair_dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2681 pair_dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2683 if(IS_INTRA(mb_type_top | mb_type_bottom))
2684 xchg_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize, 0);
2686 backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
2687 // deblock a pair
2688 // top
2689 s->mb_y--; h->mb_xy -= s->mb_stride;
2690 tprintf(h->s.avctx, "call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
2691 fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
2692 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2693 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2694 filter_mb(h, mb_x, mb_y, pair_dest_y, pair_dest_cb, pair_dest_cr, linesize, uvlinesize);
2695 // bottom
2696 s->mb_y++; h->mb_xy += s->mb_stride;
2697 tprintf(h->s.avctx, "call mbaff filter_mb\n");
2698 fill_caches(h, mb_type_bottom, 1); //FIXME don't fill stuff which isn't used by filter_mb
2699 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2700 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy+s->mb_stride]);
2701 filter_mb(h, mb_x, mb_y+1, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2702 } else {
2703 tprintf(h->s.avctx, "call filter_mb\n");
2704 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, simple);
2705 fill_caches(h, mb_type, 1); //FIXME don't fill stuff which isn't used by filter_mb
2706 filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2712 * Process a macroblock; this case avoids checks for expensive uncommon cases.
2714 static void hl_decode_mb_simple(H264Context *h){
2715 hl_decode_mb_internal(h, 1);
2719 * Process a macroblock; this handles edge cases, such as interlacing.
2721 static void av_noinline hl_decode_mb_complex(H264Context *h){
2722 hl_decode_mb_internal(h, 0);
2725 static void hl_decode_mb(H264Context *h){
2726 MpegEncContext * const s = &h->s;
2727 const int mb_xy= h->mb_xy;
2728 const int mb_type= s->current_picture.mb_type[mb_xy];
2729 int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
2731 if(!s->decode)
2732 return;
2734 if (is_complex)
2735 hl_decode_mb_complex(h);
2736 else hl_decode_mb_simple(h);
2739 static void pic_as_field(Picture *pic, const int parity){
2740 int i;
2741 for (i = 0; i < 4; ++i) {
2742 if (parity == PICT_BOTTOM_FIELD)
2743 pic->data[i] += pic->linesize[i];
2744 pic->reference = parity;
2745 pic->linesize[i] *= 2;
2749 static int split_field_copy(Picture *dest, Picture *src,
2750 int parity, int id_add){
2751 int match = !!(src->reference & parity);
2753 if (match) {
2754 *dest = *src;
2755 pic_as_field(dest, parity);
2756 dest->pic_id *= 2;
2757 dest->pic_id += id_add;
2760 return match;
2764 * Split one reference list into field parts, interleaving by parity
2765 * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
2766 * set to look at the actual start of data for that field.
2768 * @param dest output list
2769 * @param dest_len maximum number of fields to put in dest
2770 * @param src the source reference list containing fields and/or field pairs
2771 * (aka short_ref/long_ref, or
2772 * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
2773 * @param src_len number of Picture's in source (pairs and unmatched fields)
2774 * @param parity the parity of the picture being decoded/needing
2775 * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
2776 * @return number of fields placed in dest
2778 static int split_field_half_ref_list(Picture *dest, int dest_len,
2779 Picture *src, int src_len, int parity){
2780 int same_parity = 1;
2781 int same_i = 0;
2782 int opp_i = 0;
2783 int out_i;
2784 int field_output;
2786 for (out_i = 0; out_i < dest_len; out_i += field_output) {
2787 if (same_parity && same_i < src_len) {
2788 field_output = split_field_copy(dest + out_i, src + same_i,
2789 parity, 1);
2790 same_parity = !field_output;
2791 same_i++;
2793 } else if (opp_i < src_len) {
2794 field_output = split_field_copy(dest + out_i, src + opp_i,
2795 PICT_FRAME - parity, 0);
2796 same_parity = field_output;
2797 opp_i++;
2799 } else {
2800 break;
2804 return out_i;
2808 * Split the reference frame list into a reference field list.
2809 * This implements H.264 spec 8.2.4.2.5 for a combined input list.
2810 * The input list contains both reference field pairs and
2811 * unmatched reference fields; it is ordered as spec describes
2812 * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
2813 * unmatched field pairs are also present. Conceptually this is equivalent
2814 * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
2816 * @param dest output reference list where ordered fields are to be placed
2817 * @param dest_len max number of fields to place at dest
2818 * @param src source reference list, as described above
2819 * @param src_len number of pictures (pairs and unmatched fields) in src
2820 * @param parity parity of field being currently decoded
2821 * (one of PICT_{TOP,BOTTOM}_FIELD)
2822 * @param long_i index into src array that holds first long reference picture,
2823 * or src_len if no long refs present.
2825 static int split_field_ref_list(Picture *dest, int dest_len,
2826 Picture *src, int src_len,
2827 int parity, int long_i){
2829 int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
2830 dest += i;
2831 dest_len -= i;
2833 i += split_field_half_ref_list(dest, dest_len, src + long_i,
2834 src_len - long_i, parity);
2835 return i;
2839 * fills the default_ref_list.
2841 static int fill_default_ref_list(H264Context *h){
2842 MpegEncContext * const s = &h->s;
2843 int i;
2844 int smallest_poc_greater_than_current = -1;
2845 int structure_sel;
2846 Picture sorted_short_ref[32];
2847 Picture field_entry_list[2][32];
2848 Picture *frame_list[2];
2850 if (FIELD_PICTURE) {
2851 structure_sel = PICT_FRAME;
2852 frame_list[0] = field_entry_list[0];
2853 frame_list[1] = field_entry_list[1];
2854 } else {
2855 structure_sel = 0;
2856 frame_list[0] = h->default_ref_list[0];
2857 frame_list[1] = h->default_ref_list[1];
2860 if(h->slice_type==FF_B_TYPE){
2861 int list;
2862 int len[2];
2863 int short_len[2];
2864 int out_i;
2865 int limit= INT_MIN;
2867 /* sort frame according to poc in B slice */
2868 for(out_i=0; out_i<h->short_ref_count; out_i++){
2869 int best_i=INT_MIN;
2870 int best_poc=INT_MAX;
2872 for(i=0; i<h->short_ref_count; i++){
2873 const int poc= h->short_ref[i]->poc;
2874 if(poc > limit && poc < best_poc){
2875 best_poc= poc;
2876 best_i= i;
2880 assert(best_i != INT_MIN);
2882 limit= best_poc;
2883 sorted_short_ref[out_i]= *h->short_ref[best_i];
2884 tprintf(h->s.avctx, "sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
2885 if (-1 == smallest_poc_greater_than_current) {
2886 if (h->short_ref[best_i]->poc >= s->current_picture_ptr->poc) {
2887 smallest_poc_greater_than_current = out_i;
2892 tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
2894 // find the largest poc
2895 for(list=0; list<2; list++){
2896 int index = 0;
2897 int j= -99;
2898 int step= list ? -1 : 1;
2900 for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
2901 int sel;
2902 while(j<0 || j>= h->short_ref_count){
2903 if(j != -99 && step == (list ? -1 : 1))
2904 return -1;
2905 step = -step;
2906 j= smallest_poc_greater_than_current + (step>>1);
2908 sel = sorted_short_ref[j].reference | structure_sel;
2909 if(sel != PICT_FRAME) continue;
2910 frame_list[list][index ]= sorted_short_ref[j];
2911 frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
2913 short_len[list] = index;
2915 for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
2916 int sel;
2917 if(h->long_ref[i] == NULL) continue;
2918 sel = h->long_ref[i]->reference | structure_sel;
2919 if(sel != PICT_FRAME) continue;
2921 frame_list[ list ][index ]= *h->long_ref[i];
2922 frame_list[ list ][index++].pic_id= i;
2924 len[list] = index;
2927 for(list=0; list<2; list++){
2928 if (FIELD_PICTURE)
2929 len[list] = split_field_ref_list(h->default_ref_list[list],
2930 h->ref_count[list],
2931 frame_list[list],
2932 len[list],
2933 s->picture_structure,
2934 short_len[list]);
2936 // swap the two first elements of L1 when L0 and L1 are identical
2937 if(list && len[0] > 1 && len[0] == len[1])
2938 for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
2939 if(i == len[0]){
2940 FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
2941 break;
2944 if(len[list] < h->ref_count[ list ])
2945 memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
2949 }else{
2950 int index=0;
2951 int short_len;
2952 for(i=0; i<h->short_ref_count; i++){
2953 int sel;
2954 sel = h->short_ref[i]->reference | structure_sel;
2955 if(sel != PICT_FRAME) continue;
2956 frame_list[0][index ]= *h->short_ref[i];
2957 frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
2959 short_len = index;
2960 for(i = 0; i < 16; i++){
2961 int sel;
2962 if(h->long_ref[i] == NULL) continue;
2963 sel = h->long_ref[i]->reference | structure_sel;
2964 if(sel != PICT_FRAME) continue;
2965 frame_list[0][index ]= *h->long_ref[i];
2966 frame_list[0][index++].pic_id= i;
2969 if (FIELD_PICTURE)
2970 index = split_field_ref_list(h->default_ref_list[0],
2971 h->ref_count[0], frame_list[0],
2972 index, s->picture_structure,
2973 short_len);
2975 if(index < h->ref_count[0])
2976 memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
2978 #ifdef TRACE
2979 for (i=0; i<h->ref_count[0]; i++) {
2980 tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
2982 if(h->slice_type==FF_B_TYPE){
2983 for (i=0; i<h->ref_count[1]; i++) {
2984 tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
2987 #endif
2988 return 0;
2991 static void print_short_term(H264Context *h);
2992 static void print_long_term(H264Context *h);
2995 * Extract structure information about the picture described by pic_num in
2996 * the current decoding context (frame or field). Note that pic_num is
2997 * picture number without wrapping (so, 0<=pic_num<max_pic_num).
2998 * @param pic_num picture number for which to extract structure information
2999 * @param structure one of PICT_XXX describing structure of picture
3000 * with pic_num
3001 * @return frame number (short term) or long term index of picture
3002 * described by pic_num
3004 static int pic_num_extract(H264Context *h, int pic_num, int *structure){
3005 MpegEncContext * const s = &h->s;
3007 *structure = s->picture_structure;
3008 if(FIELD_PICTURE){
3009 if (!(pic_num & 1))
3010 /* opposite field */
3011 *structure ^= PICT_FRAME;
3012 pic_num >>= 1;
3015 return pic_num;
3018 static int decode_ref_pic_list_reordering(H264Context *h){
3019 MpegEncContext * const s = &h->s;
3020 int list, index, pic_structure;
3022 print_short_term(h);
3023 print_long_term(h);
3024 if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
3026 for(list=0; list<h->list_count; list++){
3027 memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
3029 if(get_bits1(&s->gb)){
3030 int pred= h->curr_pic_num;
3032 for(index=0; ; index++){
3033 unsigned int reordering_of_pic_nums_idc= get_ue_golomb(&s->gb);
3034 unsigned int pic_id;
3035 int i;
3036 Picture *ref = NULL;
3038 if(reordering_of_pic_nums_idc==3)
3039 break;
3041 if(index >= h->ref_count[list]){
3042 av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
3043 return -1;
3046 if(reordering_of_pic_nums_idc<3){
3047 if(reordering_of_pic_nums_idc<2){
3048 const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
3049 int frame_num;
3051 if(abs_diff_pic_num > h->max_pic_num){
3052 av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
3053 return -1;
3056 if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
3057 else pred+= abs_diff_pic_num;
3058 pred &= h->max_pic_num - 1;
3060 frame_num = pic_num_extract(h, pred, &pic_structure);
3062 for(i= h->short_ref_count-1; i>=0; i--){
3063 ref = h->short_ref[i];
3064 assert(ref->reference);
3065 assert(!ref->long_ref);
3066 if(ref->data[0] != NULL &&
3067 ref->frame_num == frame_num &&
3068 (ref->reference & pic_structure) &&
3069 ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
3070 break;
3072 if(i>=0)
3073 ref->pic_id= pred;
3074 }else{
3075 int long_idx;
3076 pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
3078 long_idx= pic_num_extract(h, pic_id, &pic_structure);
3080 if(long_idx>31){
3081 av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
3082 return -1;
3084 ref = h->long_ref[long_idx];
3085 assert(!(ref && !ref->reference));
3086 if(ref && (ref->reference & pic_structure)){
3087 ref->pic_id= pic_id;
3088 assert(ref->long_ref);
3089 i=0;
3090 }else{
3091 i=-1;
3095 if (i < 0) {
3096 av_log(h->s.avctx, AV_LOG_ERROR, "reference picture missing during reorder\n");
3097 memset(&h->ref_list[list][index], 0, sizeof(Picture)); //FIXME
3098 } else {
3099 for(i=index; i+1<h->ref_count[list]; i++){
3100 if(ref->long_ref == h->ref_list[list][i].long_ref && ref->pic_id == h->ref_list[list][i].pic_id)
3101 break;
3103 for(; i > index; i--){
3104 h->ref_list[list][i]= h->ref_list[list][i-1];
3106 h->ref_list[list][index]= *ref;
3107 if (FIELD_PICTURE){
3108 pic_as_field(&h->ref_list[list][index], pic_structure);
3111 }else{
3112 av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
3113 return -1;
3118 for(list=0; list<h->list_count; list++){
3119 for(index= 0; index < h->ref_count[list]; index++){
3120 if(!h->ref_list[list][index].data[0])
3121 h->ref_list[list][index]= s->current_picture;
3125 if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
3126 direct_dist_scale_factor(h);
3127 direct_ref_list_init(h);
3128 return 0;
3131 static void fill_mbaff_ref_list(H264Context *h){
3132 int list, i, j;
3133 for(list=0; list<2; list++){ //FIXME try list_count
3134 for(i=0; i<h->ref_count[list]; i++){
3135 Picture *frame = &h->ref_list[list][i];
3136 Picture *field = &h->ref_list[list][16+2*i];
3137 field[0] = *frame;
3138 for(j=0; j<3; j++)
3139 field[0].linesize[j] <<= 1;
3140 field[0].reference = PICT_TOP_FIELD;
3141 field[1] = field[0];
3142 for(j=0; j<3; j++)
3143 field[1].data[j] += frame->linesize[j];
3144 field[1].reference = PICT_BOTTOM_FIELD;
3146 h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
3147 h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
3148 for(j=0; j<2; j++){
3149 h->chroma_weight[list][16+2*i][j] = h->chroma_weight[list][16+2*i+1][j] = h->chroma_weight[list][i][j];
3150 h->chroma_offset[list][16+2*i][j] = h->chroma_offset[list][16+2*i+1][j] = h->chroma_offset[list][i][j];
3154 for(j=0; j<h->ref_count[1]; j++){
3155 for(i=0; i<h->ref_count[0]; i++)
3156 h->implicit_weight[j][16+2*i] = h->implicit_weight[j][16+2*i+1] = h->implicit_weight[j][i];
3157 memcpy(h->implicit_weight[16+2*j], h->implicit_weight[j], sizeof(*h->implicit_weight));
3158 memcpy(h->implicit_weight[16+2*j+1], h->implicit_weight[j], sizeof(*h->implicit_weight));
3162 static int pred_weight_table(H264Context *h){
3163 MpegEncContext * const s = &h->s;
3164 int list, i;
3165 int luma_def, chroma_def;
3167 h->use_weight= 0;
3168 h->use_weight_chroma= 0;
3169 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
3170 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
3171 luma_def = 1<<h->luma_log2_weight_denom;
3172 chroma_def = 1<<h->chroma_log2_weight_denom;
3174 for(list=0; list<2; list++){
3175 for(i=0; i<h->ref_count[list]; i++){
3176 int luma_weight_flag, chroma_weight_flag;
3178 luma_weight_flag= get_bits1(&s->gb);
3179 if(luma_weight_flag){
3180 h->luma_weight[list][i]= get_se_golomb(&s->gb);
3181 h->luma_offset[list][i]= get_se_golomb(&s->gb);
3182 if( h->luma_weight[list][i] != luma_def
3183 || h->luma_offset[list][i] != 0)
3184 h->use_weight= 1;
3185 }else{
3186 h->luma_weight[list][i]= luma_def;
3187 h->luma_offset[list][i]= 0;
3190 chroma_weight_flag= get_bits1(&s->gb);
3191 if(chroma_weight_flag){
3192 int j;
3193 for(j=0; j<2; j++){
3194 h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
3195 h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
3196 if( h->chroma_weight[list][i][j] != chroma_def
3197 || h->chroma_offset[list][i][j] != 0)
3198 h->use_weight_chroma= 1;
3200 }else{
3201 int j;
3202 for(j=0; j<2; j++){
3203 h->chroma_weight[list][i][j]= chroma_def;
3204 h->chroma_offset[list][i][j]= 0;
3208 if(h->slice_type != FF_B_TYPE) break;
3210 h->use_weight= h->use_weight || h->use_weight_chroma;
3211 return 0;
3214 static void implicit_weight_table(H264Context *h){
3215 MpegEncContext * const s = &h->s;
3216 int ref0, ref1;
3217 int cur_poc = s->current_picture_ptr->poc;
3219 if( h->ref_count[0] == 1 && h->ref_count[1] == 1
3220 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
3221 h->use_weight= 0;
3222 h->use_weight_chroma= 0;
3223 return;
3226 h->use_weight= 2;
3227 h->use_weight_chroma= 2;
3228 h->luma_log2_weight_denom= 5;
3229 h->chroma_log2_weight_denom= 5;
3231 for(ref0=0; ref0 < h->ref_count[0]; ref0++){
3232 int poc0 = h->ref_list[0][ref0].poc;
3233 for(ref1=0; ref1 < h->ref_count[1]; ref1++){
3234 int poc1 = h->ref_list[1][ref1].poc;
3235 int td = av_clip(poc1 - poc0, -128, 127);
3236 if(td){
3237 int tb = av_clip(cur_poc - poc0, -128, 127);
3238 int tx = (16384 + (FFABS(td) >> 1)) / td;
3239 int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
3240 if(dist_scale_factor < -64 || dist_scale_factor > 128)
3241 h->implicit_weight[ref0][ref1] = 32;
3242 else
3243 h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
3244 }else
3245 h->implicit_weight[ref0][ref1] = 32;
3251 * Mark a picture as no longer needed for reference. The refmask
3252 * argument allows unreferencing of individual fields or the whole frame.
3253 * If the picture becomes entirely unreferenced, but is being held for
3254 * display purposes, it is marked as such.
3255 * @param refmask mask of fields to unreference; the mask is bitwise
3256 * anded with the reference marking of pic
3257 * @return non-zero if pic becomes entirely unreferenced (except possibly
3258 * for display purposes) zero if one of the fields remains in
3259 * reference
3261 static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
3262 int i;
3263 if (pic->reference &= refmask) {
3264 return 0;
3265 } else {
3266 if(pic == h->delayed_output_pic)
3267 pic->reference=DELAYED_PIC_REF;
3268 else{
3269 for(i = 0; h->delayed_pic[i]; i++)
3270 if(pic == h->delayed_pic[i]){
3271 pic->reference=DELAYED_PIC_REF;
3272 break;
3275 return 1;
3280 * instantaneous decoder refresh.
3282 static void idr(H264Context *h){
3283 int i;
3285 for(i=0; i<16; i++){
3286 if (h->long_ref[i] != NULL) {
3287 unreference_pic(h, h->long_ref[i], 0);
3288 h->long_ref[i]= NULL;
3291 h->long_ref_count=0;
3293 for(i=0; i<h->short_ref_count; i++){
3294 unreference_pic(h, h->short_ref[i], 0);
3295 h->short_ref[i]= NULL;
3297 h->short_ref_count=0;
3300 /* forget old pics after a seek */
3301 static void flush_dpb(AVCodecContext *avctx){
3302 H264Context *h= avctx->priv_data;
3303 int i;
3304 for(i=0; i<16; i++) {
3305 if(h->delayed_pic[i])
3306 h->delayed_pic[i]->reference= 0;
3307 h->delayed_pic[i]= NULL;
3309 if(h->delayed_output_pic)
3310 h->delayed_output_pic->reference= 0;
3311 h->delayed_output_pic= NULL;
3312 idr(h);
3313 if(h->s.current_picture_ptr)
3314 h->s.current_picture_ptr->reference= 0;
3315 h->s.first_field= 0;
3316 ff_mpeg_flush(avctx);
3320 * Find a Picture in the short term reference list by frame number.
3321 * @param frame_num frame number to search for
3322 * @param idx the index into h->short_ref where returned picture is found
3323 * undefined if no picture found.
3324 * @return pointer to the found picture, or NULL if no pic with the provided
3325 * frame number is found
3327 static Picture * find_short(H264Context *h, int frame_num, int *idx){
3328 MpegEncContext * const s = &h->s;
3329 int i;
3331 for(i=0; i<h->short_ref_count; i++){
3332 Picture *pic= h->short_ref[i];
3333 if(s->avctx->debug&FF_DEBUG_MMCO)
3334 av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
3335 if(pic->frame_num == frame_num) {
3336 *idx = i;
3337 return pic;
3340 return NULL;
3344 * Remove a picture from the short term reference list by its index in
3345 * that list. This does no checking on the provided index; it is assumed
3346 * to be valid. Other list entries are shifted down.
3347 * @param i index into h->short_ref of picture to remove.
3349 static void remove_short_at_index(H264Context *h, int i){
3350 assert(i > 0 && i < h->short_ref_count);
3351 h->short_ref[i]= NULL;
3352 if (--h->short_ref_count)
3353 memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
3358 * @return the removed picture or NULL if an error occurs
3360 static Picture * remove_short(H264Context *h, int frame_num){
3361 MpegEncContext * const s = &h->s;
3362 Picture *pic;
3363 int i;
3365 if(s->avctx->debug&FF_DEBUG_MMCO)
3366 av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
3368 pic = find_short(h, frame_num, &i);
3369 if (pic)
3370 remove_short_at_index(h, i);
3372 return pic;
3376 * Remove a picture from the long term reference list by its index in
3377 * that list. This does no checking on the provided index; it is assumed
3378 * to be valid. The removed entry is set to NULL. Other entries are unaffected.
3379 * @param i index into h->long_ref of picture to remove.
3381 static void remove_long_at_index(H264Context *h, int i){
3382 h->long_ref[i]= NULL;
3383 h->long_ref_count--;
3388 * @return the removed picture or NULL if an error occurs
3390 static Picture * remove_long(H264Context *h, int i){
3391 Picture *pic;
3393 pic= h->long_ref[i];
3394 if (pic)
3395 remove_long_at_index(h, i);
3397 return pic;
3401 * print short term list
3403 static void print_short_term(H264Context *h) {
3404 uint32_t i;
3405 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3406 av_log(h->s.avctx, AV_LOG_DEBUG, "short term list:\n");
3407 for(i=0; i<h->short_ref_count; i++){
3408 Picture *pic= h->short_ref[i];
3409 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3415 * print long term list
3417 static void print_long_term(H264Context *h) {
3418 uint32_t i;
3419 if(h->s.avctx->debug&FF_DEBUG_MMCO) {
3420 av_log(h->s.avctx, AV_LOG_DEBUG, "long term list:\n");
3421 for(i = 0; i < 16; i++){
3422 Picture *pic= h->long_ref[i];
3423 if (pic) {
3424 av_log(h->s.avctx, AV_LOG_DEBUG, "%d fn:%d poc:%d %p\n", i, pic->frame_num, pic->poc, pic->data[0]);
3431 * Executes the reference picture marking (memory management control operations).
3433 static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
3434 MpegEncContext * const s = &h->s;
3435 int i, j;
3436 int current_ref_assigned=0;
3437 Picture *pic;
3439 if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
3440 av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
3442 for(i=0; i<mmco_count; i++){
3443 int structure, frame_num, unref_pic;
3444 if(s->avctx->debug&FF_DEBUG_MMCO)
3445 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
3447 switch(mmco[i].opcode){
3448 case MMCO_SHORT2UNUSED:
3449 if(s->avctx->debug&FF_DEBUG_MMCO)
3450 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
3451 frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
3452 pic = find_short(h, frame_num, &j);
3453 if (pic) {
3454 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3455 remove_short_at_index(h, j);
3456 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3457 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
3458 break;
3459 case MMCO_SHORT2LONG:
3460 if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
3461 h->long_ref[mmco[i].long_arg]->frame_num ==
3462 mmco[i].short_pic_num / 2) {
3463 /* do nothing, we've already moved this field pair. */
3464 } else {
3465 int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
3467 pic= remove_long(h, mmco[i].long_arg);
3468 if(pic) unreference_pic(h, pic, 0);
3470 h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
3471 if (h->long_ref[ mmco[i].long_arg ]){
3472 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3473 h->long_ref_count++;
3476 break;
3477 case MMCO_LONG2UNUSED:
3478 j = pic_num_extract(h, mmco[i].long_arg, &structure);
3479 pic = h->long_ref[j];
3480 if (pic) {
3481 if (unreference_pic(h, pic, structure ^ PICT_FRAME))
3482 remove_long_at_index(h, j);
3483 } else if(s->avctx->debug&FF_DEBUG_MMCO)
3484 av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
3485 break;
3486 case MMCO_LONG:
3487 unref_pic = 1;
3488 if (FIELD_PICTURE && !s->first_field) {
3489 if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
3490 /* Just mark second field as referenced */
3491 unref_pic = 0;
3492 } else if (s->current_picture_ptr->reference) {
3493 /* First field in pair is in short term list or
3494 * at a different long term index.
3495 * This is not allowed; see 7.4.3, notes 2 and 3.
3496 * Report the problem and keep the pair where it is,
3497 * and mark this field valid.
3499 av_log(h->s.avctx, AV_LOG_ERROR,
3500 "illegal long term reference assignment for second "
3501 "field in complementary field pair (first field is "
3502 "short term or has non-matching long index)\n");
3503 unref_pic = 0;
3507 if (unref_pic) {
3508 pic= remove_long(h, mmco[i].long_arg);
3509 if(pic) unreference_pic(h, pic, 0);
3511 h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
3512 h->long_ref[ mmco[i].long_arg ]->long_ref=1;
3513 h->long_ref_count++;
3516 s->current_picture_ptr->reference |= s->picture_structure;
3517 current_ref_assigned=1;
3518 break;
3519 case MMCO_SET_MAX_LONG:
3520 assert(mmco[i].long_arg <= 16);
3521 // just remove the long term which index is greater than new max
3522 for(j = mmco[i].long_arg; j<16; j++){
3523 pic = remove_long(h, j);
3524 if (pic) unreference_pic(h, pic, 0);
3526 break;
3527 case MMCO_RESET:
3528 while(h->short_ref_count){
3529 pic= remove_short(h, h->short_ref[0]->frame_num);
3530 if(pic) unreference_pic(h, pic, 0);
3532 for(j = 0; j < 16; j++) {
3533 pic= remove_long(h, j);
3534 if(pic) unreference_pic(h, pic, 0);
3536 break;
3537 default: assert(0);
3541 if (!current_ref_assigned && FIELD_PICTURE &&
3542 !s->first_field && s->current_picture_ptr->reference) {
3544 /* Second field of complementary field pair; the first field of
3545 * which is already referenced. If short referenced, it
3546 * should be first entry in short_ref. If not, it must exist
3547 * in long_ref; trying to put it on the short list here is an
3548 * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
3550 if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
3551 /* Just mark the second field valid */
3552 s->current_picture_ptr->reference = PICT_FRAME;
3553 } else if (s->current_picture_ptr->long_ref) {
3554 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
3555 "assignment for second field "
3556 "in complementary field pair "
3557 "(first field is long term)\n");
3558 } else {
3560 * First field in reference, but not in any sensible place on our
3561 * reference lists. This shouldn't happen unless reference
3562 * handling somewhere else is wrong.
3564 assert(0);
3566 current_ref_assigned = 1;
3569 if(!current_ref_assigned){
3570 pic= remove_short(h, s->current_picture_ptr->frame_num);
3571 if(pic){
3572 unreference_pic(h, pic, 0);
3573 av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
3576 if(h->short_ref_count)
3577 memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
3579 h->short_ref[0]= s->current_picture_ptr;
3580 h->short_ref[0]->long_ref=0;
3581 h->short_ref_count++;
3582 s->current_picture_ptr->reference |= s->picture_structure;
3585 if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
3587 /* We have too many reference frames, probably due to corrupted
3588 * stream. Need to discard one frame. Prevents overrun of the
3589 * short_ref and long_ref buffers.
3591 av_log(h->s.avctx, AV_LOG_ERROR,
3592 "number of reference frames exceeds max (probably "
3593 "corrupt input), discarding one\n");
3595 if (h->long_ref_count) {
3596 for (i = 0; i < 16; ++i)
3597 if (h->long_ref[i])
3598 break;
3600 assert(i < 16);
3601 pic = h->long_ref[i];
3602 remove_long_at_index(h, i);
3603 } else {
3604 pic = h->short_ref[h->short_ref_count - 1];
3605 remove_short_at_index(h, h->short_ref_count - 1);
3607 unreference_pic(h, pic, 0);
3610 print_short_term(h);
3611 print_long_term(h);
3612 return 0;
3615 static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
3616 MpegEncContext * const s = &h->s;
3617 int i;
3619 if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
3620 s->broken_link= get_bits1(gb) -1;
3621 h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
3622 if(h->mmco[0].long_arg == -1)
3623 h->mmco_index= 0;
3624 else{
3625 h->mmco[0].opcode= MMCO_LONG;
3626 h->mmco_index= 1;
3628 }else{
3629 if(get_bits1(gb)){ // adaptive_ref_pic_marking_mode_flag
3630 for(i= 0; i<MAX_MMCO_COUNT; i++) {
3631 MMCOOpcode opcode= get_ue_golomb(gb);
3633 h->mmco[i].opcode= opcode;
3634 if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
3635 h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
3636 /* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
3637 av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
3638 return -1;
3641 if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
3642 unsigned int long_arg= get_ue_golomb(gb);
3643 if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
3644 av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
3645 return -1;
3647 h->mmco[i].long_arg= long_arg;
3650 if(opcode > (unsigned)MMCO_LONG){
3651 av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
3652 return -1;
3654 if(opcode == MMCO_END)
3655 break;
3657 h->mmco_index= i;
3658 }else{
3659 assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
3661 if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
3662 !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
3663 h->mmco[0].opcode= MMCO_SHORT2UNUSED;
3664 h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
3665 h->mmco_index= 1;
3666 if (FIELD_PICTURE) {
3667 h->mmco[0].short_pic_num *= 2;
3668 h->mmco[1].opcode= MMCO_SHORT2UNUSED;
3669 h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
3670 h->mmco_index= 2;
3672 }else
3673 h->mmco_index= 0;
3677 return 0;
3680 static int init_poc(H264Context *h){
3681 MpegEncContext * const s = &h->s;
3682 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
3683 int field_poc[2];
3685 if(h->nal_unit_type == NAL_IDR_SLICE){
3686 h->frame_num_offset= 0;
3687 }else{
3688 if(h->frame_num < h->prev_frame_num)
3689 h->frame_num_offset= h->prev_frame_num_offset + max_frame_num;
3690 else
3691 h->frame_num_offset= h->prev_frame_num_offset;
3694 if(h->sps.poc_type==0){
3695 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
3697 if(h->nal_unit_type == NAL_IDR_SLICE){
3698 h->prev_poc_msb=
3699 h->prev_poc_lsb= 0;
3702 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
3703 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
3704 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
3705 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
3706 else
3707 h->poc_msb = h->prev_poc_msb;
3708 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
3709 field_poc[0] =
3710 field_poc[1] = h->poc_msb + h->poc_lsb;
3711 if(s->picture_structure == PICT_FRAME)
3712 field_poc[1] += h->delta_poc_bottom;
3713 }else if(h->sps.poc_type==1){
3714 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
3715 int i;
3717 if(h->sps.poc_cycle_length != 0)
3718 abs_frame_num = h->frame_num_offset + h->frame_num;
3719 else
3720 abs_frame_num = 0;
3722 if(h->nal_ref_idc==0 && abs_frame_num > 0)
3723 abs_frame_num--;
3725 expected_delta_per_poc_cycle = 0;
3726 for(i=0; i < h->sps.poc_cycle_length; i++)
3727 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
3729 if(abs_frame_num > 0){
3730 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
3731 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
3733 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
3734 for(i = 0; i <= frame_num_in_poc_cycle; i++)
3735 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
3736 } else
3737 expectedpoc = 0;
3739 if(h->nal_ref_idc == 0)
3740 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
3742 field_poc[0] = expectedpoc + h->delta_poc[0];
3743 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
3745 if(s->picture_structure == PICT_FRAME)
3746 field_poc[1] += h->delta_poc[1];
3747 }else{
3748 int poc;
3749 if(h->nal_unit_type == NAL_IDR_SLICE){
3750 poc= 0;
3751 }else{
3752 if(h->nal_ref_idc) poc= 2*(h->frame_num_offset + h->frame_num);
3753 else poc= 2*(h->frame_num_offset + h->frame_num) - 1;
3755 field_poc[0]= poc;
3756 field_poc[1]= poc;
3759 if(s->picture_structure != PICT_BOTTOM_FIELD) {
3760 s->current_picture_ptr->field_poc[0]= field_poc[0];
3761 s->current_picture_ptr->poc = field_poc[0];
3763 if(s->picture_structure != PICT_TOP_FIELD) {
3764 s->current_picture_ptr->field_poc[1]= field_poc[1];
3765 s->current_picture_ptr->poc = field_poc[1];
3767 if(!FIELD_PICTURE || !s->first_field) {
3768 Picture *cur = s->current_picture_ptr;
3769 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
3772 return 0;
3777 * initialize scan tables
3779 static void init_scan_tables(H264Context *h){
3780 MpegEncContext * const s = &h->s;
3781 int i;
3782 if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
3783 memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
3784 memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
3785 }else{
3786 for(i=0; i<16; i++){
3787 #define T(x) (x>>2) | ((x<<2) & 0xF)
3788 h->zigzag_scan[i] = T(zigzag_scan[i]);
3789 h-> field_scan[i] = T( field_scan[i]);
3790 #undef T
3793 if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
3794 memcpy(h->zigzag_scan8x8, zigzag_scan8x8, 64*sizeof(uint8_t));
3795 memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
3796 memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
3797 memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
3798 }else{
3799 for(i=0; i<64; i++){
3800 #define T(x) (x>>3) | ((x&7)<<3)
3801 h->zigzag_scan8x8[i] = T(zigzag_scan8x8[i]);
3802 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
3803 h->field_scan8x8[i] = T(field_scan8x8[i]);
3804 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
3805 #undef T
3808 if(h->sps.transform_bypass){ //FIXME same ugly
3809 h->zigzag_scan_q0 = zigzag_scan;
3810 h->zigzag_scan8x8_q0 = zigzag_scan8x8;
3811 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
3812 h->field_scan_q0 = field_scan;
3813 h->field_scan8x8_q0 = field_scan8x8;
3814 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
3815 }else{
3816 h->zigzag_scan_q0 = h->zigzag_scan;
3817 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
3818 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
3819 h->field_scan_q0 = h->field_scan;
3820 h->field_scan8x8_q0 = h->field_scan8x8;
3821 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
3826 * Replicates H264 "master" context to thread contexts.
3828 static void clone_slice(H264Context *dst, H264Context *src)
3830 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
3831 dst->s.current_picture_ptr = src->s.current_picture_ptr;
3832 dst->s.current_picture = src->s.current_picture;
3833 dst->s.linesize = src->s.linesize;
3834 dst->s.uvlinesize = src->s.uvlinesize;
3835 dst->s.first_field = src->s.first_field;
3837 dst->prev_poc_msb = src->prev_poc_msb;
3838 dst->prev_poc_lsb = src->prev_poc_lsb;
3839 dst->prev_frame_num_offset = src->prev_frame_num_offset;
3840 dst->prev_frame_num = src->prev_frame_num;
3841 dst->short_ref_count = src->short_ref_count;
3843 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
3844 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
3845 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
3846 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
3848 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
3849 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
3853 * decodes a slice header.
3854 * this will allso call MPV_common_init() and frame_start() as needed
3856 * @param h h264context
3857 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
3859 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
3861 static int decode_slice_header(H264Context *h, H264Context *h0){
3862 MpegEncContext * const s = &h->s;
3863 MpegEncContext * const s0 = &h0->s;
3864 unsigned int first_mb_in_slice;
3865 unsigned int pps_id;
3866 int num_ref_idx_active_override_flag;
3867 static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
3868 unsigned int slice_type, tmp, i;
3869 int default_ref_list_done = 0;
3870 int last_pic_structure;
3872 s->dropable= h->nal_ref_idc == 0;
3874 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
3875 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
3876 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
3877 }else{
3878 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
3879 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
3882 first_mb_in_slice= get_ue_golomb(&s->gb);
3884 if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
3885 h0->current_slice = 0;
3886 if (!s0->first_field)
3887 s->current_picture_ptr= NULL;
3890 slice_type= get_ue_golomb(&s->gb);
3891 if(slice_type > 9){
3892 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
3893 return -1;
3895 if(slice_type > 4){
3896 slice_type -= 5;
3897 h->slice_type_fixed=1;
3898 }else
3899 h->slice_type_fixed=0;
3901 slice_type= slice_type_map[ slice_type ];
3902 if (slice_type == FF_I_TYPE
3903 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
3904 default_ref_list_done = 1;
3906 h->slice_type= slice_type;
3908 s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
3909 if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
3910 av_log(h->s.avctx, AV_LOG_ERROR,
3911 "B picture before any references, skipping\n");
3912 return -1;
3915 pps_id= get_ue_golomb(&s->gb);
3916 if(pps_id>=MAX_PPS_COUNT){
3917 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
3918 return -1;
3920 if(!h0->pps_buffers[pps_id]) {
3921 av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
3922 return -1;
3924 h->pps= *h0->pps_buffers[pps_id];
3926 if(!h0->sps_buffers[h->pps.sps_id]) {
3927 av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
3928 return -1;
3930 h->sps = *h0->sps_buffers[h->pps.sps_id];
3932 if(h == h0 && h->dequant_coeff_pps != pps_id){
3933 h->dequant_coeff_pps = pps_id;
3934 init_dequant_tables(h);
3937 s->mb_width= h->sps.mb_width;
3938 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
3940 h->b_stride= s->mb_width*4;
3941 h->b8_stride= s->mb_width*2;
3943 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
3944 if(h->sps.frame_mbs_only_flag)
3945 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
3946 else
3947 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
3949 if (s->context_initialized
3950 && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
3951 if(h != h0)
3952 return -1; // width / height changed during parallelized decoding
3953 free_tables(h);
3954 MPV_common_end(s);
3956 if (!s->context_initialized) {
3957 if(h != h0)
3958 return -1; // we cant (re-)initialize context during parallel decoding
3959 if (MPV_common_init(s) < 0)
3960 return -1;
3961 s->first_field = 0;
3963 init_scan_tables(h);
3964 alloc_tables(h);
3966 for(i = 1; i < s->avctx->thread_count; i++) {
3967 H264Context *c;
3968 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
3969 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
3970 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
3971 c->sps = h->sps;
3972 c->pps = h->pps;
3973 init_scan_tables(c);
3974 clone_tables(c, h);
3977 for(i = 0; i < s->avctx->thread_count; i++)
3978 if(context_init(h->thread_context[i]) < 0)
3979 return -1;
3981 s->avctx->width = s->width;
3982 s->avctx->height = s->height;
3983 s->avctx->sample_aspect_ratio= h->sps.sar;
3984 if(!s->avctx->sample_aspect_ratio.den)
3985 s->avctx->sample_aspect_ratio.den = 1;
3987 if(h->sps.timing_info_present_flag){
3988 s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
3989 if(h->x264_build > 0 && h->x264_build < 44)
3990 s->avctx->time_base.den *= 2;
3991 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
3992 s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
3996 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
3998 h->mb_mbaff = 0;
3999 h->mb_aff_frame = 0;
4000 last_pic_structure = s0->picture_structure;
4001 if(h->sps.frame_mbs_only_flag){
4002 s->picture_structure= PICT_FRAME;
4003 }else{
4004 if(get_bits1(&s->gb)) { //field_pic_flag
4005 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
4006 } else {
4007 s->picture_structure= PICT_FRAME;
4008 h->mb_aff_frame = h->sps.mb_aff;
4012 if(h0->current_slice == 0){
4013 /* See if we have a decoded first field looking for a pair... */
4014 if (s0->first_field) {
4015 assert(s0->current_picture_ptr);
4016 assert(s0->current_picture_ptr->data[0]);
4017 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
4019 /* figure out if we have a complementary field pair */
4020 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
4022 * Previous field is unmatched. Don't display it, but let it
4023 * remain for reference if marked as such.
4025 s0->current_picture_ptr = NULL;
4026 s0->first_field = FIELD_PICTURE;
4028 } else {
4029 if (h->nal_ref_idc &&
4030 s0->current_picture_ptr->reference &&
4031 s0->current_picture_ptr->frame_num != h->frame_num) {
4033 * This and previous field were reference, but had
4034 * different frame_nums. Consider this field first in
4035 * pair. Throw away previous field except for reference
4036 * purposes.
4038 s0->first_field = 1;
4039 s0->current_picture_ptr = NULL;
4041 } else {
4042 /* Second field in complementary pair */
4043 s0->first_field = 0;
4047 } else {
4048 /* Frame or first field in a potentially complementary pair */
4049 assert(!s0->current_picture_ptr);
4050 s0->first_field = FIELD_PICTURE;
4053 if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
4054 s0->first_field = 0;
4055 return -1;
4058 if(h != h0)
4059 clone_slice(h, h0);
4061 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
4063 assert(s->mb_num == s->mb_width * s->mb_height);
4064 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
4065 first_mb_in_slice >= s->mb_num){
4066 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
4067 return -1;
4069 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
4070 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
4071 if (s->picture_structure == PICT_BOTTOM_FIELD)
4072 s->resync_mb_y = s->mb_y = s->mb_y + 1;
4073 assert(s->mb_y < s->mb_height);
4075 if(s->picture_structure==PICT_FRAME){
4076 h->curr_pic_num= h->frame_num;
4077 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
4078 }else{
4079 h->curr_pic_num= 2*h->frame_num + 1;
4080 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
4083 if(h->nal_unit_type == NAL_IDR_SLICE){
4084 get_ue_golomb(&s->gb); /* idr_pic_id */
4087 if(h->sps.poc_type==0){
4088 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
4090 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
4091 h->delta_poc_bottom= get_se_golomb(&s->gb);
4095 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
4096 h->delta_poc[0]= get_se_golomb(&s->gb);
4098 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
4099 h->delta_poc[1]= get_se_golomb(&s->gb);
4102 init_poc(h);
4104 if(h->pps.redundant_pic_cnt_present){
4105 h->redundant_pic_count= get_ue_golomb(&s->gb);
4108 //set defaults, might be overriden a few line later
4109 h->ref_count[0]= h->pps.ref_count[0];
4110 h->ref_count[1]= h->pps.ref_count[1];
4112 if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
4113 if(h->slice_type == FF_B_TYPE){
4114 h->direct_spatial_mv_pred= get_bits1(&s->gb);
4116 num_ref_idx_active_override_flag= get_bits1(&s->gb);
4118 if(num_ref_idx_active_override_flag){
4119 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
4120 if(h->slice_type==FF_B_TYPE)
4121 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
4123 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
4124 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
4125 h->ref_count[0]= h->ref_count[1]= 1;
4126 return -1;
4129 if(h->slice_type == FF_B_TYPE)
4130 h->list_count= 2;
4131 else
4132 h->list_count= 1;
4133 }else
4134 h->list_count= 0;
4136 if(!default_ref_list_done){
4137 fill_default_ref_list(h);
4140 if(decode_ref_pic_list_reordering(h) < 0)
4141 return -1;
4143 if( (h->pps.weighted_pred && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
4144 || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
4145 pred_weight_table(h);
4146 else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
4147 implicit_weight_table(h);
4148 else
4149 h->use_weight = 0;
4151 if(h->nal_ref_idc)
4152 decode_ref_pic_marking(h0, &s->gb);
4154 if(FRAME_MBAFF)
4155 fill_mbaff_ref_list(h);
4157 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
4158 tmp = get_ue_golomb(&s->gb);
4159 if(tmp > 2){
4160 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
4161 return -1;
4163 h->cabac_init_idc= tmp;
4166 h->last_qscale_diff = 0;
4167 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
4168 if(tmp>51){
4169 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
4170 return -1;
4172 s->qscale= tmp;
4173 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
4174 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
4175 //FIXME qscale / qp ... stuff
4176 if(h->slice_type == FF_SP_TYPE){
4177 get_bits1(&s->gb); /* sp_for_switch_flag */
4179 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
4180 get_se_golomb(&s->gb); /* slice_qs_delta */
4183 h->deblocking_filter = 1;
4184 h->slice_alpha_c0_offset = 0;
4185 h->slice_beta_offset = 0;
4186 if( h->pps.deblocking_filter_parameters_present ) {
4187 tmp= get_ue_golomb(&s->gb);
4188 if(tmp > 2){
4189 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
4190 return -1;
4192 h->deblocking_filter= tmp;
4193 if(h->deblocking_filter < 2)
4194 h->deblocking_filter^= 1; // 1<->0
4196 if( h->deblocking_filter ) {
4197 h->slice_alpha_c0_offset = get_se_golomb(&s->gb) << 1;
4198 h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
4202 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
4203 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
4204 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
4205 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
4206 h->deblocking_filter= 0;
4208 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
4209 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
4210 /* Cheat slightly for speed:
4211 Do not bother to deblock across slices. */
4212 h->deblocking_filter = 2;
4213 } else {
4214 h0->max_contexts = 1;
4215 if(!h0->single_decode_warning) {
4216 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
4217 h0->single_decode_warning = 1;
4219 if(h != h0)
4220 return 1; // deblocking switched inside frame
4224 #if 0 //FMO
4225 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
4226 slice_group_change_cycle= get_bits(&s->gb, ?);
4227 #endif
4229 h0->last_slice_type = slice_type;
4230 h->slice_num = ++h0->current_slice;
4232 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
4233 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
4235 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
4236 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
4237 h->slice_num,
4238 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
4239 first_mb_in_slice,
4240 av_get_pict_type_char(h->slice_type),
4241 pps_id, h->frame_num,
4242 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
4243 h->ref_count[0], h->ref_count[1],
4244 s->qscale,
4245 h->deblocking_filter, h->slice_alpha_c0_offset/2, h->slice_beta_offset/2,
4246 h->use_weight,
4247 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
4248 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
4252 return 0;
4258 static inline int get_level_prefix(GetBitContext *gb){
4259 unsigned int buf;
4260 int log;
4262 OPEN_READER(re, gb);
4263 UPDATE_CACHE(re, gb);
4264 buf=GET_CACHE(re, gb);
4266 log= 32 - av_log2(buf);
4267 #ifdef TRACE
4268 print_bin(buf>>(32-log), log);
4269 av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__);
4270 #endif
4272 LAST_SKIP_BITS(re, gb, log);
4273 CLOSE_READER(re, gb);
4275 return log-1;
4278 static inline int get_dct8x8_allowed(H264Context *h){
4279 int i;
4280 for(i=0; i<4; i++){
4281 if(!IS_SUB_8X8(h->sub_mb_type[i])
4282 || (!h->sps.direct_8x8_inference_flag && IS_DIRECT(h->sub_mb_type[i])))
4283 return 0;
4285 return 1;
4289 * decodes a residual block.
4290 * @param n block index
4291 * @param scantable scantable
4292 * @param max_coeff number of coefficients in the block
4293 * @return <0 if an error occurred
4295 static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
4296 MpegEncContext * const s = &h->s;
4297 static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
4298 int level[16];
4299 int zeros_left, coeff_num, coeff_token, total_coeff, i, j, trailing_ones, run_before;
4301 //FIXME put trailing_onex into the context
4303 if(n == CHROMA_DC_BLOCK_INDEX){
4304 coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
4305 total_coeff= coeff_token>>2;
4306 }else{
4307 if(n == LUMA_DC_BLOCK_INDEX){
4308 total_coeff= pred_non_zero_count(h, 0);
4309 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4310 total_coeff= coeff_token>>2;
4311 }else{
4312 total_coeff= pred_non_zero_count(h, n);
4313 coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
4314 total_coeff= coeff_token>>2;
4315 h->non_zero_count_cache[ scan8[n] ]= total_coeff;
4319 //FIXME set last_non_zero?
4321 if(total_coeff==0)
4322 return 0;
4323 if(total_coeff > (unsigned)max_coeff) {
4324 av_log(h->s.avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", s->mb_x, s->mb_y, total_coeff);
4325 return -1;
4328 trailing_ones= coeff_token&3;
4329 tprintf(h->s.avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
4330 assert(total_coeff<=16);
4332 for(i=0; i<trailing_ones; i++){
4333 level[i]= 1 - 2*get_bits1(gb);
4336 if(i<total_coeff) {
4337 int level_code, mask;
4338 int suffix_length = total_coeff > 10 && trailing_ones < 3;
4339 int prefix= get_level_prefix(gb);
4341 //first coefficient has suffix_length equal to 0 or 1
4342 if(prefix<14){ //FIXME try to build a large unified VLC table for all this
4343 if(suffix_length)
4344 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4345 else
4346 level_code= (prefix<<suffix_length); //part
4347 }else if(prefix==14){
4348 if(suffix_length)
4349 level_code= (prefix<<suffix_length) + get_bits(gb, suffix_length); //part
4350 else
4351 level_code= prefix + get_bits(gb, 4); //part
4352 }else if(prefix==15){
4353 level_code= (prefix<<suffix_length) + get_bits(gb, 12); //part
4354 if(suffix_length==0) level_code+=15; //FIXME doesn't make (much)sense
4355 }else{
4356 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4357 return -1;
4360 if(trailing_ones < 3) level_code += 2;
4362 suffix_length = 1;
4363 if(level_code > 5)
4364 suffix_length++;
4365 mask= -(level_code&1);
4366 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4367 i++;
4369 //remaining coefficients have suffix_length > 0
4370 for(;i<total_coeff;i++) {
4371 static const int suffix_limit[7] = {0,5,11,23,47,95,INT_MAX };
4372 prefix = get_level_prefix(gb);
4373 if(prefix<15){
4374 level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
4375 }else if(prefix==15){
4376 level_code = (prefix<<suffix_length) + get_bits(gb, 12);
4377 }else{
4378 av_log(h->s.avctx, AV_LOG_ERROR, "prefix too large at %d %d\n", s->mb_x, s->mb_y);
4379 return -1;
4381 mask= -(level_code&1);
4382 level[i]= (((2+level_code)>>1) ^ mask) - mask;
4383 if(level_code > suffix_limit[suffix_length])
4384 suffix_length++;
4388 if(total_coeff == max_coeff)
4389 zeros_left=0;
4390 else{
4391 if(n == CHROMA_DC_BLOCK_INDEX)
4392 zeros_left= get_vlc2(gb, chroma_dc_total_zeros_vlc[ total_coeff-1 ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
4393 else
4394 zeros_left= get_vlc2(gb, total_zeros_vlc[ total_coeff-1 ].table, TOTAL_ZEROS_VLC_BITS, 1);
4397 coeff_num = zeros_left + total_coeff - 1;
4398 j = scantable[coeff_num];
4399 if(n > 24){
4400 block[j] = level[0];
4401 for(i=1;i<total_coeff;i++) {
4402 if(zeros_left <= 0)
4403 run_before = 0;
4404 else if(zeros_left < 7){
4405 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4406 }else{
4407 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4409 zeros_left -= run_before;
4410 coeff_num -= 1 + run_before;
4411 j= scantable[ coeff_num ];
4413 block[j]= level[i];
4415 }else{
4416 block[j] = (level[0] * qmul[j] + 32)>>6;
4417 for(i=1;i<total_coeff;i++) {
4418 if(zeros_left <= 0)
4419 run_before = 0;
4420 else if(zeros_left < 7){
4421 run_before= get_vlc2(gb, run_vlc[zeros_left-1].table, RUN_VLC_BITS, 1);
4422 }else{
4423 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2);
4425 zeros_left -= run_before;
4426 coeff_num -= 1 + run_before;
4427 j= scantable[ coeff_num ];
4429 block[j]= (level[i] * qmul[j] + 32)>>6;
4433 if(zeros_left<0){
4434 av_log(h->s.avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", s->mb_x, s->mb_y);
4435 return -1;
4438 return 0;
4441 static void predict_field_decoding_flag(H264Context *h){
4442 MpegEncContext * const s = &h->s;
4443 const int mb_xy= h->mb_xy;
4444 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
4445 ? s->current_picture.mb_type[mb_xy-1]
4446 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
4447 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
4448 : 0;
4449 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
4453 * decodes a P_SKIP or B_SKIP macroblock
4455 static void decode_mb_skip(H264Context *h){
4456 MpegEncContext * const s = &h->s;
4457 const int mb_xy= h->mb_xy;
4458 int mb_type=0;
4460 memset(h->non_zero_count[mb_xy], 0, 16);
4461 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
4463 if(MB_FIELD)
4464 mb_type|= MB_TYPE_INTERLACED;
4466 if( h->slice_type == FF_B_TYPE )
4468 // just for fill_caches. pred_direct_motion will set the real mb_type
4469 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
4471 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4472 pred_direct_motion(h, &mb_type);
4473 mb_type|= MB_TYPE_SKIP;
4475 else
4477 int mx, my;
4478 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
4480 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
4481 pred_pskip_motion(h, &mx, &my);
4482 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
4483 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
4486 write_back_motion(h, mb_type);
4487 s->current_picture.mb_type[mb_xy]= mb_type;
4488 s->current_picture.qscale_table[mb_xy]= s->qscale;
4489 h->slice_table[ mb_xy ]= h->slice_num;
4490 h->prev_mb_skipped= 1;
4494 * decodes a macroblock
4495 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
4497 static int decode_mb_cavlc(H264Context *h){
4498 MpegEncContext * const s = &h->s;
4499 int mb_xy;
4500 int partition_count;
4501 unsigned int mb_type, cbp;
4502 int dct8x8_allowed= h->pps.transform_8x8_mode;
4504 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
4506 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
4508 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
4509 cbp = 0; /* avoid warning. FIXME: find a solution without slowing
4510 down the code */
4511 if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
4512 if(s->mb_skip_run==-1)
4513 s->mb_skip_run= get_ue_golomb(&s->gb);
4515 if (s->mb_skip_run--) {
4516 if(FRAME_MBAFF && (s->mb_y&1) == 0){
4517 if(s->mb_skip_run==0)
4518 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4519 else
4520 predict_field_decoding_flag(h);
4522 decode_mb_skip(h);
4523 return 0;
4526 if(FRAME_MBAFF){
4527 if( (s->mb_y&1) == 0 )
4528 h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&s->gb);
4529 }else
4530 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
4532 h->prev_mb_skipped= 0;
4534 mb_type= get_ue_golomb(&s->gb);
4535 if(h->slice_type == FF_B_TYPE){
4536 if(mb_type < 23){
4537 partition_count= b_mb_type_info[mb_type].partition_count;
4538 mb_type= b_mb_type_info[mb_type].type;
4539 }else{
4540 mb_type -= 23;
4541 goto decode_intra_mb;
4543 }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
4544 if(mb_type < 5){
4545 partition_count= p_mb_type_info[mb_type].partition_count;
4546 mb_type= p_mb_type_info[mb_type].type;
4547 }else{
4548 mb_type -= 5;
4549 goto decode_intra_mb;
4551 }else{
4552 assert(h->slice_type == FF_I_TYPE);
4553 decode_intra_mb:
4554 if(mb_type > 25){
4555 av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
4556 return -1;
4558 partition_count=0;
4559 cbp= i_mb_type_info[mb_type].cbp;
4560 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
4561 mb_type= i_mb_type_info[mb_type].type;
4564 if(MB_FIELD)
4565 mb_type |= MB_TYPE_INTERLACED;
4567 h->slice_table[ mb_xy ]= h->slice_num;
4569 if(IS_INTRA_PCM(mb_type)){
4570 unsigned int x, y;
4572 // We assume these blocks are very rare so we do not optimize it.
4573 align_get_bits(&s->gb);
4575 // The pixels are stored in the same order as levels in h->mb array.
4576 for(y=0; y<16; y++){
4577 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
4578 for(x=0; x<16; x++){
4579 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4580 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= get_bits(&s->gb, 8);
4583 for(y=0; y<8; y++){
4584 const int index= 256 + 4*(y&3) + 32*(y>>2);
4585 for(x=0; x<8; x++){
4586 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4587 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4590 for(y=0; y<8; y++){
4591 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
4592 for(x=0; x<8; x++){
4593 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", show_bits(&s->gb, 8));
4594 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
4598 // In deblocking, the quantizer is 0
4599 s->current_picture.qscale_table[mb_xy]= 0;
4600 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
4601 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
4602 // All coeffs are present
4603 memset(h->non_zero_count[mb_xy], 16, 16);
4605 s->current_picture.mb_type[mb_xy]= mb_type;
4606 return 0;
4609 if(MB_MBAFF){
4610 h->ref_count[0] <<= 1;
4611 h->ref_count[1] <<= 1;
4614 fill_caches(h, mb_type, 0);
4616 //mb_pred
4617 if(IS_INTRA(mb_type)){
4618 int pred_mode;
4619 // init_top_left_availability(h);
4620 if(IS_INTRA4x4(mb_type)){
4621 int i;
4622 int di = 1;
4623 if(dct8x8_allowed && get_bits1(&s->gb)){
4624 mb_type |= MB_TYPE_8x8DCT;
4625 di = 4;
4628 // fill_intra4x4_pred_table(h);
4629 for(i=0; i<16; i+=di){
4630 int mode= pred_intra_mode(h, i);
4632 if(!get_bits1(&s->gb)){
4633 const int rem_mode= get_bits(&s->gb, 3);
4634 mode = rem_mode + (rem_mode >= mode);
4637 if(di==4)
4638 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
4639 else
4640 h->intra4x4_pred_mode_cache[ scan8[i] ] = mode;
4642 write_back_intra_pred_mode(h);
4643 if( check_intra4x4_pred_mode(h) < 0)
4644 return -1;
4645 }else{
4646 h->intra16x16_pred_mode= check_intra_pred_mode(h, h->intra16x16_pred_mode);
4647 if(h->intra16x16_pred_mode < 0)
4648 return -1;
4651 pred_mode= check_intra_pred_mode(h, get_ue_golomb(&s->gb));
4652 if(pred_mode < 0)
4653 return -1;
4654 h->chroma_pred_mode= pred_mode;
4655 }else if(partition_count==4){
4656 int i, j, sub_partition_count[4], list, ref[2][4];
4658 if(h->slice_type == FF_B_TYPE){
4659 for(i=0; i<4; i++){
4660 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4661 if(h->sub_mb_type[i] >=13){
4662 av_log(h->s.avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4663 return -1;
4665 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4666 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4668 if( IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
4669 || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
4670 pred_direct_motion(h, &mb_type);
4671 h->ref_cache[0][scan8[4]] =
4672 h->ref_cache[1][scan8[4]] =
4673 h->ref_cache[0][scan8[12]] =
4674 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
4676 }else{
4677 assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
4678 for(i=0; i<4; i++){
4679 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
4680 if(h->sub_mb_type[i] >=4){
4681 av_log(h->s.avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], s->mb_x, s->mb_y);
4682 return -1;
4684 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
4685 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
4689 for(list=0; list<h->list_count; list++){
4690 int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
4691 for(i=0; i<4; i++){
4692 if(IS_DIRECT(h->sub_mb_type[i])) continue;
4693 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4694 unsigned int tmp = get_te0_golomb(&s->gb, ref_count); //FIXME init to 0 before and skip?
4695 if(tmp>=ref_count){
4696 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
4697 return -1;
4699 ref[list][i]= tmp;
4700 }else{
4701 //FIXME
4702 ref[list][i] = -1;
4707 if(dct8x8_allowed)
4708 dct8x8_allowed = get_dct8x8_allowed(h);
4710 for(list=0; list<h->list_count; list++){
4711 for(i=0; i<4; i++){
4712 if(IS_DIRECT(h->sub_mb_type[i])) {
4713 h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
4714 continue;
4716 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ]=
4717 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
4719 if(IS_DIR(h->sub_mb_type[i], 0, list)){
4720 const int sub_mb_type= h->sub_mb_type[i];
4721 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
4722 for(j=0; j<sub_partition_count[i]; j++){
4723 int mx, my;
4724 const int index= 4*i + block_width*j;
4725 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
4726 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
4727 mx += get_se_golomb(&s->gb);
4728 my += get_se_golomb(&s->gb);
4729 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4731 if(IS_SUB_8X8(sub_mb_type)){
4732 mv_cache[ 1 ][0]=
4733 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
4734 mv_cache[ 1 ][1]=
4735 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
4736 }else if(IS_SUB_8X4(sub_mb_type)){
4737 mv_cache[ 1 ][0]= mx;
4738 mv_cache[ 1 ][1]= my;
4739 }else if(IS_SUB_4X8(sub_mb_type)){
4740 mv_cache[ 8 ][0]= mx;
4741 mv_cache[ 8 ][1]= my;
4743 mv_cache[ 0 ][0]= mx;
4744 mv_cache[ 0 ][1]= my;
4746 }else{
4747 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
4748 p[0] = p[1]=
4749 p[8] = p[9]= 0;
4753 }else if(IS_DIRECT(mb_type)){
4754 pred_direct_motion(h, &mb_type);
4755 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
4756 }else{
4757 int list, mx, my, i;
4758 //FIXME we should set ref_idx_l? to 0 if we use that later ...
4759 if(IS_16X16(mb_type)){
4760 for(list=0; list<h->list_count; list++){
4761 unsigned int val;
4762 if(IS_DIR(mb_type, 0, list)){
4763 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4764 if(val >= h->ref_count[list]){
4765 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4766 return -1;
4768 }else
4769 val= LIST_NOT_USED&0xFF;
4770 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
4772 for(list=0; list<h->list_count; list++){
4773 unsigned int val;
4774 if(IS_DIR(mb_type, 0, list)){
4775 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
4776 mx += get_se_golomb(&s->gb);
4777 my += get_se_golomb(&s->gb);
4778 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4780 val= pack16to32(mx,my);
4781 }else
4782 val=0;
4783 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, val, 4);
4786 else if(IS_16X8(mb_type)){
4787 for(list=0; list<h->list_count; list++){
4788 for(i=0; i<2; i++){
4789 unsigned int val;
4790 if(IS_DIR(mb_type, i, list)){
4791 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4792 if(val >= h->ref_count[list]){
4793 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4794 return -1;
4796 }else
4797 val= LIST_NOT_USED&0xFF;
4798 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
4801 for(list=0; list<h->list_count; list++){
4802 for(i=0; i<2; i++){
4803 unsigned int val;
4804 if(IS_DIR(mb_type, i, list)){
4805 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
4806 mx += get_se_golomb(&s->gb);
4807 my += get_se_golomb(&s->gb);
4808 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4810 val= pack16to32(mx,my);
4811 }else
4812 val=0;
4813 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 4);
4816 }else{
4817 assert(IS_8X16(mb_type));
4818 for(list=0; list<h->list_count; list++){
4819 for(i=0; i<2; i++){
4820 unsigned int val;
4821 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
4822 val= get_te0_golomb(&s->gb, h->ref_count[list]);
4823 if(val >= h->ref_count[list]){
4824 av_log(h->s.avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
4825 return -1;
4827 }else
4828 val= LIST_NOT_USED&0xFF;
4829 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
4832 for(list=0; list<h->list_count; list++){
4833 for(i=0; i<2; i++){
4834 unsigned int val;
4835 if(IS_DIR(mb_type, i, list)){
4836 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
4837 mx += get_se_golomb(&s->gb);
4838 my += get_se_golomb(&s->gb);
4839 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
4841 val= pack16to32(mx,my);
4842 }else
4843 val=0;
4844 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 4);
4850 if(IS_INTER(mb_type))
4851 write_back_motion(h, mb_type);
4853 if(!IS_INTRA16x16(mb_type)){
4854 cbp= get_ue_golomb(&s->gb);
4855 if(cbp > 47){
4856 av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, s->mb_x, s->mb_y);
4857 return -1;
4860 if(IS_INTRA4x4(mb_type))
4861 cbp= golomb_to_intra4x4_cbp[cbp];
4862 else
4863 cbp= golomb_to_inter_cbp[cbp];
4865 h->cbp = cbp;
4867 if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
4868 if(get_bits1(&s->gb))
4869 mb_type |= MB_TYPE_8x8DCT;
4871 s->current_picture.mb_type[mb_xy]= mb_type;
4873 if(cbp || IS_INTRA16x16(mb_type)){
4874 int i8x8, i4x4, chroma_idx;
4875 int dquant;
4876 GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
4877 const uint8_t *scan, *scan8x8, *dc_scan;
4879 // fill_non_zero_count_cache(h);
4881 if(IS_INTERLACED(mb_type)){
4882 scan8x8= s->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0;
4883 scan= s->qscale ? h->field_scan : h->field_scan_q0;
4884 dc_scan= luma_dc_field_scan;
4885 }else{
4886 scan8x8= s->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0;
4887 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
4888 dc_scan= luma_dc_zigzag_scan;
4891 dquant= get_se_golomb(&s->gb);
4893 if( dquant > 25 || dquant < -26 ){
4894 av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
4895 return -1;
4898 s->qscale += dquant;
4899 if(((unsigned)s->qscale) > 51){
4900 if(s->qscale<0) s->qscale+= 52;
4901 else s->qscale-= 52;
4904 h->chroma_qp[0]= get_chroma_qp(h, 0, s->qscale);
4905 h->chroma_qp[1]= get_chroma_qp(h, 1, s->qscale);
4906 if(IS_INTRA16x16(mb_type)){
4907 if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
4908 return -1; //FIXME continue if partitioned and other return -1 too
4911 assert((cbp&15) == 0 || (cbp&15) == 15);
4913 if(cbp&15){
4914 for(i8x8=0; i8x8<4; i8x8++){
4915 for(i4x4=0; i4x4<4; i4x4++){
4916 const int index= i4x4 + 4*i8x8;
4917 if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
4918 return -1;
4922 }else{
4923 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
4925 }else{
4926 for(i8x8=0; i8x8<4; i8x8++){
4927 if(cbp & (1<<i8x8)){
4928 if(IS_8x8DCT(mb_type)){
4929 DCTELEM *buf = &h->mb[64*i8x8];
4930 uint8_t *nnz;
4931 for(i4x4=0; i4x4<4; i4x4++){
4932 if( decode_residual(h, gb, buf, i4x4+4*i8x8, scan8x8+16*i4x4,
4933 h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
4934 return -1;
4936 nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4937 nnz[0] += nnz[1] + nnz[8] + nnz[9];
4938 }else{
4939 for(i4x4=0; i4x4<4; i4x4++){
4940 const int index= i4x4 + 4*i8x8;
4942 if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
4943 return -1;
4947 }else{
4948 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
4949 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
4954 if(cbp&0x30){
4955 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
4956 if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
4957 return -1;
4961 if(cbp&0x20){
4962 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
4963 const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]];
4964 for(i4x4=0; i4x4<4; i4x4++){
4965 const int index= 16 + 4*chroma_idx + i4x4;
4966 if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, qmul, 15) < 0){
4967 return -1;
4971 }else{
4972 uint8_t * const nnz= &h->non_zero_count_cache[0];
4973 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4974 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4976 }else{
4977 uint8_t * const nnz= &h->non_zero_count_cache[0];
4978 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
4979 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
4980 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
4982 s->current_picture.qscale_table[mb_xy]= s->qscale;
4983 write_back_non_zero_count(h);
4985 if(MB_MBAFF){
4986 h->ref_count[0] >>= 1;
4987 h->ref_count[1] >>= 1;
4990 return 0;
4993 static int decode_cabac_field_decoding_flag(H264Context *h) {
4994 MpegEncContext * const s = &h->s;
4995 const int mb_x = s->mb_x;
4996 const int mb_y = s->mb_y & ~1;
4997 const int mba_xy = mb_x - 1 + mb_y *s->mb_stride;
4998 const int mbb_xy = mb_x + (mb_y-2)*s->mb_stride;
5000 unsigned int ctx = 0;
5002 if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
5003 ctx += 1;
5005 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5006 ctx += 1;
5009 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5012 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5013 uint8_t *state= &h->cabac_state[ctx_base];
5014 int mb_type;
5016 if(intra_slice){
5017 MpegEncContext * const s = &h->s;
5018 const int mba_xy = h->left_mb_xy[0];
5019 const int mbb_xy = h->top_mb_xy;
5020 int ctx=0;
5021 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5022 ctx++;
5023 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5024 ctx++;
5025 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5026 return 0; /* I4x4 */
5027 state += 2;
5028 }else{
5029 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5030 return 0; /* I4x4 */
5033 if( get_cabac_terminate( &h->cabac ) )
5034 return 25; /* PCM */
5036 mb_type = 1; /* I16x16 */
5037 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5038 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5039 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5040 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5041 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5042 return mb_type;
5045 static int decode_cabac_mb_type( H264Context *h ) {
5046 MpegEncContext * const s = &h->s;
5048 if( h->slice_type == FF_I_TYPE ) {
5049 return decode_cabac_intra_mb_type(h, 3, 1);
5050 } else if( h->slice_type == FF_P_TYPE ) {
5051 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5052 /* P-type */
5053 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5054 /* P_L0_D16x16, P_8x8 */
5055 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5056 } else {
5057 /* P_L0_D8x16, P_L0_D16x8 */
5058 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5060 } else {
5061 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5063 } else if( h->slice_type == FF_B_TYPE ) {
5064 const int mba_xy = h->left_mb_xy[0];
5065 const int mbb_xy = h->top_mb_xy;
5066 int ctx = 0;
5067 int bits;
5069 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5070 ctx++;
5071 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5072 ctx++;
5074 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5075 return 0; /* B_Direct_16x16 */
5077 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5078 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5081 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5082 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5083 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5084 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5085 if( bits < 8 )
5086 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5087 else if( bits == 13 ) {
5088 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5089 } else if( bits == 14 )
5090 return 11; /* B_L1_L0_8x16 */
5091 else if( bits == 15 )
5092 return 22; /* B_8x8 */
5094 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5095 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5096 } else {
5097 /* TODO SI/SP frames? */
5098 return -1;
5102 static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
5103 MpegEncContext * const s = &h->s;
5104 int mba_xy, mbb_xy;
5105 int ctx = 0;
5107 if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
5108 int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
5109 mba_xy = mb_xy - 1;
5110 if( (mb_y&1)
5111 && h->slice_table[mba_xy] == h->slice_num
5112 && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
5113 mba_xy += s->mb_stride;
5114 if( MB_FIELD ){
5115 mbb_xy = mb_xy - s->mb_stride;
5116 if( !(mb_y&1)
5117 && h->slice_table[mbb_xy] == h->slice_num
5118 && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
5119 mbb_xy -= s->mb_stride;
5120 }else
5121 mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
5122 }else{
5123 int mb_xy = h->mb_xy;
5124 mba_xy = mb_xy - 1;
5125 mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
5128 if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
5129 ctx++;
5130 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5131 ctx++;
5133 if( h->slice_type == FF_B_TYPE )
5134 ctx += 13;
5135 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5138 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5139 int mode = 0;
5141 if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
5142 return pred_mode;
5144 mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
5145 mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
5146 mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
5148 if( mode >= pred_mode )
5149 return mode + 1;
5150 else
5151 return mode;
5154 static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
5155 const int mba_xy = h->left_mb_xy[0];
5156 const int mbb_xy = h->top_mb_xy;
5158 int ctx = 0;
5160 /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
5161 if( h->slice_table[mba_xy] == h->slice_num && h->chroma_pred_mode_table[mba_xy] != 0 )
5162 ctx++;
5164 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5165 ctx++;
5167 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5168 return 0;
5170 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5171 return 1;
5172 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5173 return 2;
5174 else
5175 return 3;
5178 static int decode_cabac_mb_cbp_luma( H264Context *h) {
5179 int cbp_b, cbp_a, ctx, cbp = 0;
5181 cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
5182 cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
5184 ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
5185 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
5186 ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
5187 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
5188 ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
5189 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
5190 ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
5191 cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
5192 return cbp;
5194 static int decode_cabac_mb_cbp_chroma( H264Context *h) {
5195 int ctx;
5196 int cbp_a, cbp_b;
5198 cbp_a = (h->left_cbp>>4)&0x03;
5199 cbp_b = (h-> top_cbp>>4)&0x03;
5201 ctx = 0;
5202 if( cbp_a > 0 ) ctx++;
5203 if( cbp_b > 0 ) ctx += 2;
5204 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5205 return 0;
5207 ctx = 4;
5208 if( cbp_a == 2 ) ctx++;
5209 if( cbp_b == 2 ) ctx += 2;
5210 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5212 static int decode_cabac_mb_dqp( H264Context *h) {
5213 int ctx = 0;
5214 int val = 0;
5216 if( h->last_qscale_diff != 0 )
5217 ctx++;
5219 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5220 if( ctx < 2 )
5221 ctx = 2;
5222 else
5223 ctx = 3;
5224 val++;
5225 if(val > 102) //prevent infinite loop
5226 return INT_MIN;
5229 if( val&0x01 )
5230 return (val + 1)/2;
5231 else
5232 return -(val + 1)/2;
5234 static int decode_cabac_p_mb_sub_type( H264Context *h ) {
5235 if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
5236 return 0; /* 8x8 */
5237 if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
5238 return 1; /* 8x4 */
5239 if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
5240 return 2; /* 4x8 */
5241 return 3; /* 4x4 */
5243 static int decode_cabac_b_mb_sub_type( H264Context *h ) {
5244 int type;
5245 if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
5246 return 0; /* B_Direct_8x8 */
5247 if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
5248 return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
5249 type = 3;
5250 if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
5251 if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
5252 return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
5253 type += 4;
5255 type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
5256 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5257 return type;
5260 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5261 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5264 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5265 int refa = h->ref_cache[list][scan8[n] - 1];
5266 int refb = h->ref_cache[list][scan8[n] - 8];
5267 int ref = 0;
5268 int ctx = 0;
5270 if( h->slice_type == FF_B_TYPE) {
5271 if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
5272 ctx++;
5273 if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
5274 ctx += 2;
5275 } else {
5276 if( refa > 0 )
5277 ctx++;
5278 if( refb > 0 )
5279 ctx += 2;
5282 while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
5283 ref++;
5284 if( ctx < 4 )
5285 ctx = 4;
5286 else
5287 ctx = 5;
5288 if(ref >= 32 /*h->ref_list[list]*/){
5289 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_ref\n");
5290 return 0; //FIXME we should return -1 and check the return everywhere
5293 return ref;
5296 static int decode_cabac_mb_mvd( H264Context *h, int list, int n, int l ) {
5297 int amvd = abs( h->mvd_cache[list][scan8[n] - 1][l] ) +
5298 abs( h->mvd_cache[list][scan8[n] - 8][l] );
5299 int ctxbase = (l == 0) ? 40 : 47;
5300 int ctx, mvd;
5302 if( amvd < 3 )
5303 ctx = 0;
5304 else if( amvd > 32 )
5305 ctx = 2;
5306 else
5307 ctx = 1;
5309 if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+ctx]))
5310 return 0;
5312 mvd= 1;
5313 ctx= 3;
5314 while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase+ctx] ) ) {
5315 mvd++;
5316 if( ctx < 6 )
5317 ctx++;
5320 if( mvd >= 9 ) {
5321 int k = 3;
5322 while( get_cabac_bypass( &h->cabac ) ) {
5323 mvd += 1 << k;
5324 k++;
5325 if(k>24){
5326 av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
5327 return INT_MIN;
5330 while( k-- ) {
5331 if( get_cabac_bypass( &h->cabac ) )
5332 mvd += 1 << k;
5335 return get_cabac_bypass_sign( &h->cabac, -mvd );
5338 static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
5339 int nza, nzb;
5340 int ctx = 0;
5342 if( cat == 0 ) {
5343 nza = h->left_cbp&0x100;
5344 nzb = h-> top_cbp&0x100;
5345 } else if( cat == 1 || cat == 2 ) {
5346 nza = h->non_zero_count_cache[scan8[idx] - 1];
5347 nzb = h->non_zero_count_cache[scan8[idx] - 8];
5348 } else if( cat == 3 ) {
5349 nza = (h->left_cbp>>(6+idx))&0x01;
5350 nzb = (h-> top_cbp>>(6+idx))&0x01;
5351 } else {
5352 assert(cat == 4);
5353 nza = h->non_zero_count_cache[scan8[16+idx] - 1];
5354 nzb = h->non_zero_count_cache[scan8[16+idx] - 8];
5357 if( nza > 0 )
5358 ctx++;
5360 if( nzb > 0 )
5361 ctx += 2;
5363 return ctx + 4 * cat;
5366 DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8[63]) = {
5367 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
5368 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
5369 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5370 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
5373 static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
5374 static const int significant_coeff_flag_offset[2][6] = {
5375 { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
5376 { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
5378 static const int last_coeff_flag_offset[2][6] = {
5379 { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
5380 { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
5382 static const int coeff_abs_level_m1_offset[6] = {
5383 227+0, 227+10, 227+20, 227+30, 227+39, 426
5385 static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
5386 { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
5387 4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
5388 7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
5389 12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
5390 { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
5391 6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
5392 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
5393 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
5395 /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
5396 * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
5397 * map node ctx => cabac ctx for level=1 */
5398 static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
5399 /* map node ctx => cabac ctx for level>1 */
5400 static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
5401 static const uint8_t coeff_abs_level_transition[2][8] = {
5402 /* update node ctx after decoding a level=1 */
5403 { 1, 2, 3, 3, 4, 5, 6, 7 },
5404 /* update node ctx after decoding a level>1 */
5405 { 4, 4, 4, 4, 5, 6, 7, 7 }
5408 int index[64];
5410 int av_unused last;
5411 int coeff_count = 0;
5412 int node_ctx = 0;
5414 uint8_t *significant_coeff_ctx_base;
5415 uint8_t *last_coeff_ctx_base;
5416 uint8_t *abs_level_m1_ctx_base;
5418 #ifndef ARCH_X86
5419 #define CABAC_ON_STACK
5420 #endif
5421 #ifdef CABAC_ON_STACK
5422 #define CC &cc
5423 CABACContext cc;
5424 cc.range = h->cabac.range;
5425 cc.low = h->cabac.low;
5426 cc.bytestream= h->cabac.bytestream;
5427 #else
5428 #define CC &h->cabac
5429 #endif
5432 /* cat: 0-> DC 16x16 n = 0
5433 * 1-> AC 16x16 n = luma4x4idx
5434 * 2-> Luma4x4 n = luma4x4idx
5435 * 3-> DC Chroma n = iCbCr
5436 * 4-> AC Chroma n = 4 * iCbCr + chroma4x4idx
5437 * 5-> Luma8x8 n = 4 * luma8x8idx
5440 /* read coded block flag */
5441 if( cat != 5 ) {
5442 if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n ) ] ) == 0 ) {
5443 if( cat == 1 || cat == 2 )
5444 h->non_zero_count_cache[scan8[n]] = 0;
5445 else if( cat == 4 )
5446 h->non_zero_count_cache[scan8[16+n]] = 0;
5447 #ifdef CABAC_ON_STACK
5448 h->cabac.range = cc.range ;
5449 h->cabac.low = cc.low ;
5450 h->cabac.bytestream= cc.bytestream;
5451 #endif
5452 return;
5456 significant_coeff_ctx_base = h->cabac_state
5457 + significant_coeff_flag_offset[MB_FIELD][cat];
5458 last_coeff_ctx_base = h->cabac_state
5459 + last_coeff_flag_offset[MB_FIELD][cat];
5460 abs_level_m1_ctx_base = h->cabac_state
5461 + coeff_abs_level_m1_offset[cat];
5463 if( cat == 5 ) {
5464 #define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
5465 for(last= 0; last < coefs; last++) { \
5466 uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
5467 if( get_cabac( CC, sig_ctx )) { \
5468 uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
5469 index[coeff_count++] = last; \
5470 if( get_cabac( CC, last_ctx ) ) { \
5471 last= max_coeff; \
5472 break; \
5476 if( last == max_coeff -1 ) {\
5477 index[coeff_count++] = last;\
5479 const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
5480 #if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
5481 coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
5482 } else {
5483 coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
5484 #else
5485 DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
5486 } else {
5487 DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
5488 #endif
5490 assert(coeff_count > 0);
5492 if( cat == 0 )
5493 h->cbp_table[h->mb_xy] |= 0x100;
5494 else if( cat == 1 || cat == 2 )
5495 h->non_zero_count_cache[scan8[n]] = coeff_count;
5496 else if( cat == 3 )
5497 h->cbp_table[h->mb_xy] |= 0x40 << n;
5498 else if( cat == 4 )
5499 h->non_zero_count_cache[scan8[16+n]] = coeff_count;
5500 else {
5501 assert( cat == 5 );
5502 fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
5505 for( coeff_count--; coeff_count >= 0; coeff_count-- ) {
5506 uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;
5508 int j= scantable[index[coeff_count]];
5510 if( get_cabac( CC, ctx ) == 0 ) {
5511 node_ctx = coeff_abs_level_transition[0][node_ctx];
5512 if( !qmul ) {
5513 block[j] = get_cabac_bypass_sign( CC, -1);
5514 }else{
5515 block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
5517 } else {
5518 int coeff_abs = 2;
5519 ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
5520 node_ctx = coeff_abs_level_transition[1][node_ctx];
5522 while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
5523 coeff_abs++;
5526 if( coeff_abs >= 15 ) {
5527 int j = 0;
5528 while( get_cabac_bypass( CC ) ) {
5529 j++;
5532 coeff_abs=1;
5533 while( j-- ) {
5534 coeff_abs += coeff_abs + get_cabac_bypass( CC );
5536 coeff_abs+= 14;
5539 if( !qmul ) {
5540 if( get_cabac_bypass( CC ) ) block[j] = -coeff_abs;
5541 else block[j] = coeff_abs;
5542 }else{
5543 if( get_cabac_bypass( CC ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
5544 else block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
5548 #ifdef CABAC_ON_STACK
5549 h->cabac.range = cc.range ;
5550 h->cabac.low = cc.low ;
5551 h->cabac.bytestream= cc.bytestream;
5552 #endif
5556 static inline void compute_mb_neighbors(H264Context *h)
5558 MpegEncContext * const s = &h->s;
5559 const int mb_xy = h->mb_xy;
5560 h->top_mb_xy = mb_xy - s->mb_stride;
5561 h->left_mb_xy[0] = mb_xy - 1;
5562 if(FRAME_MBAFF){
5563 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
5564 const int top_pair_xy = pair_xy - s->mb_stride;
5565 const int top_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
5566 const int left_mb_frame_flag = !IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
5567 const int curr_mb_frame_flag = !MB_FIELD;
5568 const int bottom = (s->mb_y & 1);
5569 if (bottom
5570 ? !curr_mb_frame_flag // bottom macroblock
5571 : (!curr_mb_frame_flag && !top_mb_frame_flag) // top macroblock
5573 h->top_mb_xy -= s->mb_stride;
5575 if (left_mb_frame_flag != curr_mb_frame_flag) {
5576 h->left_mb_xy[0] = pair_xy - 1;
5578 } else if (FIELD_PICTURE) {
5579 h->top_mb_xy -= s->mb_stride;
5581 return;
5585 * decodes a macroblock
5586 * @returns 0 if ok, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
5588 static int decode_mb_cabac(H264Context *h) {
5589 MpegEncContext * const s = &h->s;
5590 int mb_xy;
5591 int mb_type, partition_count, cbp = 0;
5592 int dct8x8_allowed= h->pps.transform_8x8_mode;
5594 mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;
5596 s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
5598 tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
5599 if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
5600 int skip;
5601 /* a skipped mb needs the aff flag from the following mb */
5602 if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
5603 predict_field_decoding_flag(h);
5604 if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
5605 skip = h->next_mb_skipped;
5606 else
5607 skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
5608 /* read skip flags */
5609 if( skip ) {
5610 if( FRAME_MBAFF && (s->mb_y&1)==0 ){
5611 s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
5612 h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
5613 if(h->next_mb_skipped)
5614 predict_field_decoding_flag(h);
5615 else
5616 h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5619 decode_mb_skip(h);
5621 h->cbp_table[mb_xy] = 0;
5622 h->chroma_pred_mode_table[mb_xy] = 0;
5623 h->last_qscale_diff = 0;
5625 return 0;
5629 if(FRAME_MBAFF){
5630 if( (s->mb_y&1) == 0 )
5631 h->mb_mbaff =
5632 h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
5633 }else
5634 h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
5636 h->prev_mb_skipped = 0;
5638 compute_mb_neighbors(h);
5639 if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
5640 av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
5641 return -1;
5644 if( h->slice_type == FF_B_TYPE ) {
5645 if( mb_type < 23 ){
5646 partition_count= b_mb_type_info[mb_type].partition_count;
5647 mb_type= b_mb_type_info[mb_type].type;
5648 }else{
5649 mb_type -= 23;
5650 goto decode_intra_mb;
5652 } else if( h->slice_type == FF_P_TYPE ) {
5653 if( mb_type < 5) {
5654 partition_count= p_mb_type_info[mb_type].partition_count;
5655 mb_type= p_mb_type_info[mb_type].type;
5656 } else {
5657 mb_type -= 5;
5658 goto decode_intra_mb;
5660 } else {
5661 assert(h->slice_type == FF_I_TYPE);
5662 decode_intra_mb:
5663 partition_count = 0;
5664 cbp= i_mb_type_info[mb_type].cbp;
5665 h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
5666 mb_type= i_mb_type_info[mb_type].type;
5668 if(MB_FIELD)
5669 mb_type |= MB_TYPE_INTERLACED;
5671 h->slice_table[ mb_xy ]= h->slice_num;
5673 if(IS_INTRA_PCM(mb_type)) {
5674 const uint8_t *ptr;
5675 unsigned int x, y;
5677 // We assume these blocks are very rare so we do not optimize it.
5678 // FIXME The two following lines get the bitstream position in the cabac
5679 // decode, I think it should be done by a function in cabac.h (or cabac.c).
5680 ptr= h->cabac.bytestream;
5681 if(h->cabac.low&0x1) ptr--;
5682 if(CABAC_BITS==16){
5683 if(h->cabac.low&0x1FF) ptr--;
5686 // The pixels are stored in the same order as levels in h->mb array.
5687 for(y=0; y<16; y++){
5688 const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
5689 for(x=0; x<16; x++){
5690 tprintf(s->avctx, "LUMA ICPM LEVEL (%3d)\n", *ptr);
5691 h->mb[index + (x&3) + 16*((x>>2)&1) + 64*(x>>3)]= *ptr++;
5694 for(y=0; y<8; y++){
5695 const int index= 256 + 4*(y&3) + 32*(y>>2);
5696 for(x=0; x<8; x++){
5697 tprintf(s->avctx, "CHROMA U ICPM LEVEL (%3d)\n", *ptr);
5698 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5701 for(y=0; y<8; y++){
5702 const int index= 256 + 64 + 4*(y&3) + 32*(y>>2);
5703 for(x=0; x<8; x++){
5704 tprintf(s->avctx, "CHROMA V ICPM LEVEL (%3d)\n", *ptr);
5705 h->mb[index + (x&3) + 16*(x>>2)]= *ptr++;
5709 ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);
5711 // All blocks are present
5712 h->cbp_table[mb_xy] = 0x1ef;
5713 h->chroma_pred_mode_table[mb_xy] = 0;
5714 // In deblocking, the quantizer is 0
5715 s->current_picture.qscale_table[mb_xy]= 0;
5716 h->chroma_qp[0] = get_chroma_qp(h, 0, 0);
5717 h->chroma_qp[1] = get_chroma_qp(h, 1, 0);
5718 // All coeffs are present
5719 memset(h->non_zero_count[mb_xy], 16, 16);
5720 s->current_picture.mb_type[mb_xy]= mb_type;
5721 return 0;
5724 if(MB_MBAFF){
5725 h->ref_count[0] <<= 1;
5726 h->ref_count[1] <<= 1;
5729 fill_caches(h, mb_type, 0);
5731 if( IS_INTRA( mb_type ) ) {
5732 int i, pred_mode;
5733 if( IS_INTRA4x4( mb_type ) ) {
5734 if( dct8x8_allowed && decode_cabac_mb_transform_size( h ) ) {
5735 mb_type |= MB_TYPE_8x8DCT;
5736 for( i = 0; i < 16; i+=4 ) {
5737 int pred = pred_intra_mode( h, i );
5738 int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5739 fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
5741 } else {
5742 for( i = 0; i < 16; i++ ) {
5743 int pred = pred_intra_mode( h, i );
5744 h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );
5746 //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
5749 write_back_intra_pred_mode(h);
5750 if( check_intra4x4_pred_mode(h) < 0 ) return -1;
5751 } else {
5752 h->intra16x16_pred_mode= check_intra_pred_mode( h, h->intra16x16_pred_mode );
5753 if( h->intra16x16_pred_mode < 0 ) return -1;
5755 h->chroma_pred_mode_table[mb_xy] =
5756 pred_mode = decode_cabac_mb_chroma_pre_mode( h );
5758 pred_mode= check_intra_pred_mode( h, pred_mode );
5759 if( pred_mode < 0 ) return -1;
5760 h->chroma_pred_mode= pred_mode;
5761 } else if( partition_count == 4 ) {
5762 int i, j, sub_partition_count[4], list, ref[2][4];
5764 if( h->slice_type == FF_B_TYPE ) {
5765 for( i = 0; i < 4; i++ ) {
5766 h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
5767 sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5768 h->sub_mb_type[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5770 if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
5771 h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
5772 pred_direct_motion(h, &mb_type);
5773 h->ref_cache[0][scan8[4]] =
5774 h->ref_cache[1][scan8[4]] =
5775 h->ref_cache[0][scan8[12]] =
5776 h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
5777 if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
5778 for( i = 0; i < 4; i++ )
5779 if( IS_DIRECT(h->sub_mb_type[i]) )
5780 fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, 1, 1 );
5783 } else {
5784 for( i = 0; i < 4; i++ ) {
5785 h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
5786 sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
5787 h->sub_mb_type[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
5791 for( list = 0; list < h->list_count; list++ ) {
5792 for( i = 0; i < 4; i++ ) {
5793 if(IS_DIRECT(h->sub_mb_type[i])) continue;
5794 if(IS_DIR(h->sub_mb_type[i], 0, list)){
5795 if( h->ref_count[list] > 1 )
5796 ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
5797 else
5798 ref[list][i] = 0;
5799 } else {
5800 ref[list][i] = -1;
5802 h->ref_cache[list][ scan8[4*i]+1 ]=
5803 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
5807 if(dct8x8_allowed)
5808 dct8x8_allowed = get_dct8x8_allowed(h);
5810 for(list=0; list<h->list_count; list++){
5811 for(i=0; i<4; i++){
5812 h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
5813 if(IS_DIRECT(h->sub_mb_type[i])){
5814 fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
5815 continue;
5818 if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
5819 const int sub_mb_type= h->sub_mb_type[i];
5820 const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
5821 for(j=0; j<sub_partition_count[i]; j++){
5822 int mpx, mpy;
5823 int mx, my;
5824 const int index= 4*i + block_width*j;
5825 int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
5826 int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
5827 pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mpx, &mpy);
5829 mx = mpx + decode_cabac_mb_mvd( h, list, index, 0 );
5830 my = mpy + decode_cabac_mb_mvd( h, list, index, 1 );
5831 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5833 if(IS_SUB_8X8(sub_mb_type)){
5834 mv_cache[ 1 ][0]=
5835 mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
5836 mv_cache[ 1 ][1]=
5837 mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
5839 mvd_cache[ 1 ][0]=
5840 mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mx - mpx;
5841 mvd_cache[ 1 ][1]=
5842 mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= my - mpy;
5843 }else if(IS_SUB_8X4(sub_mb_type)){
5844 mv_cache[ 1 ][0]= mx;
5845 mv_cache[ 1 ][1]= my;
5847 mvd_cache[ 1 ][0]= mx - mpx;
5848 mvd_cache[ 1 ][1]= my - mpy;
5849 }else if(IS_SUB_4X8(sub_mb_type)){
5850 mv_cache[ 8 ][0]= mx;
5851 mv_cache[ 8 ][1]= my;
5853 mvd_cache[ 8 ][0]= mx - mpx;
5854 mvd_cache[ 8 ][1]= my - mpy;
5856 mv_cache[ 0 ][0]= mx;
5857 mv_cache[ 0 ][1]= my;
5859 mvd_cache[ 0 ][0]= mx - mpx;
5860 mvd_cache[ 0 ][1]= my - mpy;
5862 }else{
5863 uint32_t *p= (uint32_t *)&h->mv_cache[list][ scan8[4*i] ][0];
5864 uint32_t *pd= (uint32_t *)&h->mvd_cache[list][ scan8[4*i] ][0];
5865 p[0] = p[1] = p[8] = p[9] = 0;
5866 pd[0]= pd[1]= pd[8]= pd[9]= 0;
5870 } else if( IS_DIRECT(mb_type) ) {
5871 pred_direct_motion(h, &mb_type);
5872 fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
5873 fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
5874 dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
5875 } else {
5876 int list, mx, my, i, mpx, mpy;
5877 if(IS_16X16(mb_type)){
5878 for(list=0; list<h->list_count; list++){
5879 if(IS_DIR(mb_type, 0, list)){
5880 const int ref = h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 0 ) : 0;
5881 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
5882 }else
5883 fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, (uint8_t)LIST_NOT_USED, 1); //FIXME factorize and the other fill_rect below too
5885 for(list=0; list<h->list_count; list++){
5886 if(IS_DIR(mb_type, 0, list)){
5887 pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mpx, &mpy);
5889 mx = mpx + decode_cabac_mb_mvd( h, list, 0, 0 );
5890 my = mpy + decode_cabac_mb_mvd( h, list, 0, 1 );
5891 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5893 fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5894 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
5895 }else
5896 fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
5899 else if(IS_16X8(mb_type)){
5900 for(list=0; list<h->list_count; list++){
5901 for(i=0; i<2; i++){
5902 if(IS_DIR(mb_type, i, list)){
5903 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 8*i ) : 0;
5904 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
5905 }else
5906 fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
5909 for(list=0; list<h->list_count; list++){
5910 for(i=0; i<2; i++){
5911 if(IS_DIR(mb_type, i, list)){
5912 pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mpx, &mpy);
5913 mx = mpx + decode_cabac_mb_mvd( h, list, 8*i, 0 );
5914 my = mpy + decode_cabac_mb_mvd( h, list, 8*i, 1 );
5915 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5917 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx-mpx,my-mpy), 4);
5918 fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
5919 }else{
5920 fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5921 fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
5925 }else{
5926 assert(IS_8X16(mb_type));
5927 for(list=0; list<h->list_count; list++){
5928 for(i=0; i<2; i++){
5929 if(IS_DIR(mb_type, i, list)){ //FIXME optimize
5930 const int ref= h->ref_count[list] > 1 ? decode_cabac_mb_ref( h, list, 4*i ) : 0;
5931 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
5932 }else
5933 fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
5936 for(list=0; list<h->list_count; list++){
5937 for(i=0; i<2; i++){
5938 if(IS_DIR(mb_type, i, list)){
5939 pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mpx, &mpy);
5940 mx = mpx + decode_cabac_mb_mvd( h, list, 4*i, 0 );
5941 my = mpy + decode_cabac_mb_mvd( h, list, 4*i, 1 );
5943 tprintf(s->avctx, "final mv:%d %d\n", mx, my);
5944 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx-mpx,my-mpy), 4);
5945 fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
5946 }else{
5947 fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5948 fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
5955 if( IS_INTER( mb_type ) ) {
5956 h->chroma_pred_mode_table[mb_xy] = 0;
5957 write_back_motion( h, mb_type );
5960 if( !IS_INTRA16x16( mb_type ) ) {
5961 cbp = decode_cabac_mb_cbp_luma( h );
5962 cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
5965 h->cbp_table[mb_xy] = h->cbp = cbp;
5967 if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
5968 if( decode_cabac_mb_transform_size( h ) )
5969 mb_type |= MB_TYPE_8x8DCT;
5971 s->current_picture.mb_type[mb_xy]= mb_type;
5973 if( cbp || IS_INTRA16x16( mb_type ) ) {
5974 const uint8_t *scan, *scan8x8, *dc_scan;
5975 const uint32_t *qmul;
5976 int dqp;
5978 if(IS_INTERLACED(mb_type)){
5979 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
5980 scan= s->qscale ? h->field_scan : h->field_scan_q0;
5981 dc_scan= luma_dc_field_scan;
5982 }else{
5983 scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
5984 scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
5985 dc_scan= luma_dc_zigzag_scan;
5988 h->last_qscale_diff = dqp = decode_cabac_mb_dqp( h );
5989 if( dqp == INT_MIN ){
5990 av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
5991 return -1;
5993 s->qscale += dqp;
5994 if(((unsigned)s->qscale) > 51){
5995 if(s->qscale<0) s->qscale+= 52;
5996 else s->qscale-= 52;
5998 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
5999 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
6001 if( IS_INTRA16x16( mb_type ) ) {
6002 int i;
6003 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
6004 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
6006 if( cbp&15 ) {
6007 qmul = h->dequant4_coeff[0][s->qscale];
6008 for( i = 0; i < 16; i++ ) {
6009 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
6010 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
6012 } else {
6013 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
6015 } else {
6016 int i8x8, i4x4;
6017 for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
6018 if( cbp & (1<<i8x8) ) {
6019 if( IS_8x8DCT(mb_type) ) {
6020 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
6021 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
6022 } else {
6023 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
6024 for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
6025 const int index = 4*i8x8 + i4x4;
6026 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
6027 //START_TIMER
6028 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
6029 //STOP_TIMER("decode_residual")
6032 } else {
6033 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
6034 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
6039 if( cbp&0x30 ){
6040 int c;
6041 for( c = 0; c < 2; c++ ) {
6042 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
6043 decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
6047 if( cbp&0x20 ) {
6048 int c, i;
6049 for( c = 0; c < 2; c++ ) {
6050 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
6051 for( i = 0; i < 4; i++ ) {
6052 const int index = 16 + 4 * c + i;
6053 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
6054 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
6057 } else {
6058 uint8_t * const nnz= &h->non_zero_count_cache[0];
6059 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6060 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6062 } else {
6063 uint8_t * const nnz= &h->non_zero_count_cache[0];
6064 fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
6065 nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
6066 nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
6067 h->last_qscale_diff = 0;
6070 s->current_picture.qscale_table[mb_xy]= s->qscale;
6071 write_back_non_zero_count(h);
6073 if(MB_MBAFF){
6074 h->ref_count[0] >>= 1;
6075 h->ref_count[1] >>= 1;
6078 return 0;
6082 static void filter_mb_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6083 int i, d;
6084 const int index_a = qp + h->slice_alpha_c0_offset;
6085 const int alpha = (alpha_table+52)[index_a];
6086 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6088 if( bS[0] < 4 ) {
6089 int8_t tc[4];
6090 for(i=0; i<4; i++)
6091 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6092 h->s.dsp.h264_h_loop_filter_luma(pix, stride, alpha, beta, tc);
6093 } else {
6094 /* 16px edge length, because bS=4 is triggered by being at
6095 * the edge of an intra MB, so all 4 bS are the same */
6096 for( d = 0; d < 16; d++ ) {
6097 const int p0 = pix[-1];
6098 const int p1 = pix[-2];
6099 const int p2 = pix[-3];
6101 const int q0 = pix[0];
6102 const int q1 = pix[1];
6103 const int q2 = pix[2];
6105 if( FFABS( p0 - q0 ) < alpha &&
6106 FFABS( p1 - p0 ) < beta &&
6107 FFABS( q1 - q0 ) < beta ) {
6109 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6110 if( FFABS( p2 - p0 ) < beta)
6112 const int p3 = pix[-4];
6113 /* p0', p1', p2' */
6114 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6115 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6116 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6117 } else {
6118 /* p0' */
6119 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6121 if( FFABS( q2 - q0 ) < beta)
6123 const int q3 = pix[3];
6124 /* q0', q1', q2' */
6125 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6126 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6127 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6128 } else {
6129 /* q0' */
6130 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6132 }else{
6133 /* p0', q0' */
6134 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6135 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6137 tprintf(h->s.avctx, "filter_mb_edgev i:%d d:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, p2, p1, p0, q0, q1, q2, pix[-2], pix[-1], pix[0], pix[1]);
6139 pix += stride;
6143 static void filter_mb_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6144 int i;
6145 const int index_a = qp + h->slice_alpha_c0_offset;
6146 const int alpha = (alpha_table+52)[index_a];
6147 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6149 if( bS[0] < 4 ) {
6150 int8_t tc[4];
6151 for(i=0; i<4; i++)
6152 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6153 h->s.dsp.h264_h_loop_filter_chroma(pix, stride, alpha, beta, tc);
6154 } else {
6155 h->s.dsp.h264_h_loop_filter_chroma_intra(pix, stride, alpha, beta);
6159 static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6160 int i;
6161 for( i = 0; i < 16; i++, pix += stride) {
6162 int index_a;
6163 int alpha;
6164 int beta;
6166 int qp_index;
6167 int bS_index = (i >> 1);
6168 if (!MB_FIELD) {
6169 bS_index &= ~1;
6170 bS_index |= (i & 1);
6173 if( bS[bS_index] == 0 ) {
6174 continue;
6177 qp_index = MB_FIELD ? (i >> 3) : (i & 1);
6178 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6179 alpha = (alpha_table+52)[index_a];
6180 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6182 if( bS[bS_index] < 4 ) {
6183 const int tc0 = (tc0_table+52)[index_a][bS[bS_index] - 1];
6184 const int p0 = pix[-1];
6185 const int p1 = pix[-2];
6186 const int p2 = pix[-3];
6187 const int q0 = pix[0];
6188 const int q1 = pix[1];
6189 const int q2 = pix[2];
6191 if( FFABS( p0 - q0 ) < alpha &&
6192 FFABS( p1 - p0 ) < beta &&
6193 FFABS( q1 - q0 ) < beta ) {
6194 int tc = tc0;
6195 int i_delta;
6197 if( FFABS( p2 - p0 ) < beta ) {
6198 pix[-2] = p1 + av_clip( ( p2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( p1 << 1 ) ) >> 1, -tc0, tc0 );
6199 tc++;
6201 if( FFABS( q2 - q0 ) < beta ) {
6202 pix[1] = q1 + av_clip( ( q2 + ( ( p0 + q0 + 1 ) >> 1 ) - ( q1 << 1 ) ) >> 1, -tc0, tc0 );
6203 tc++;
6206 i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6207 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6208 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6209 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6211 }else{
6212 const int p0 = pix[-1];
6213 const int p1 = pix[-2];
6214 const int p2 = pix[-3];
6216 const int q0 = pix[0];
6217 const int q1 = pix[1];
6218 const int q2 = pix[2];
6220 if( FFABS( p0 - q0 ) < alpha &&
6221 FFABS( p1 - p0 ) < beta &&
6222 FFABS( q1 - q0 ) < beta ) {
6224 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6225 if( FFABS( p2 - p0 ) < beta)
6227 const int p3 = pix[-4];
6228 /* p0', p1', p2' */
6229 pix[-1] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6230 pix[-2] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6231 pix[-3] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6232 } else {
6233 /* p0' */
6234 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6236 if( FFABS( q2 - q0 ) < beta)
6238 const int q3 = pix[3];
6239 /* q0', q1', q2' */
6240 pix[0] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6241 pix[1] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6242 pix[2] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6243 } else {
6244 /* q0' */
6245 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6247 }else{
6248 /* p0', q0' */
6249 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6250 pix[ 0] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6252 tprintf(h->s.avctx, "filter_mb_mbaff_edgev i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, p2, p1, p0, q0, q1, q2, pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6257 static void filter_mb_mbaff_edgecv( H264Context *h, uint8_t *pix, int stride, int16_t bS[8], int qp[2] ) {
6258 int i;
6259 for( i = 0; i < 8; i++, pix += stride) {
6260 int index_a;
6261 int alpha;
6262 int beta;
6264 int qp_index;
6265 int bS_index = i;
6267 if( bS[bS_index] == 0 ) {
6268 continue;
6271 qp_index = MB_FIELD ? (i >> 2) : (i & 1);
6272 index_a = qp[qp_index] + h->slice_alpha_c0_offset;
6273 alpha = (alpha_table+52)[index_a];
6274 beta = (beta_table+52)[qp[qp_index] + h->slice_beta_offset];
6276 if( bS[bS_index] < 4 ) {
6277 const int tc = (tc0_table+52)[index_a][bS[bS_index] - 1] + 1;
6278 const int p0 = pix[-1];
6279 const int p1 = pix[-2];
6280 const int q0 = pix[0];
6281 const int q1 = pix[1];
6283 if( FFABS( p0 - q0 ) < alpha &&
6284 FFABS( p1 - p0 ) < beta &&
6285 FFABS( q1 - q0 ) < beta ) {
6286 const int i_delta = av_clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
6288 pix[-1] = av_clip_uint8( p0 + i_delta ); /* p0' */
6289 pix[0] = av_clip_uint8( q0 - i_delta ); /* q0' */
6290 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d, qp:%d, indexA:%d, alpha:%d, beta:%d, tc:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, qp[qp_index], index_a, alpha, beta, tc, bS[bS_index], pix[-3], p1, p0, q0, q1, pix[2], p1, pix[-1], pix[0], q1);
6292 }else{
6293 const int p0 = pix[-1];
6294 const int p1 = pix[-2];
6295 const int q0 = pix[0];
6296 const int q1 = pix[1];
6298 if( FFABS( p0 - q0 ) < alpha &&
6299 FFABS( p1 - p0 ) < beta &&
6300 FFABS( q1 - q0 ) < beta ) {
6302 pix[-1] = ( 2*p1 + p0 + q1 + 2 ) >> 2; /* p0' */
6303 pix[0] = ( 2*q1 + q0 + p1 + 2 ) >> 2; /* q0' */
6304 tprintf(h->s.avctx, "filter_mb_mbaff_edgecv i:%d\n# bS:4 -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x, %02x, %02x]\n", i, pix[-3], p1, p0, q0, q1, pix[2], pix[-3], pix[-2], pix[-1], pix[0], pix[1], pix[2]);
6310 static void filter_mb_edgeh( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6311 int i, d;
6312 const int index_a = qp + h->slice_alpha_c0_offset;
6313 const int alpha = (alpha_table+52)[index_a];
6314 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6315 const int pix_next = stride;
6317 if( bS[0] < 4 ) {
6318 int8_t tc[4];
6319 for(i=0; i<4; i++)
6320 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] : -1;
6321 h->s.dsp.h264_v_loop_filter_luma(pix, stride, alpha, beta, tc);
6322 } else {
6323 /* 16px edge length, see filter_mb_edgev */
6324 for( d = 0; d < 16; d++ ) {
6325 const int p0 = pix[-1*pix_next];
6326 const int p1 = pix[-2*pix_next];
6327 const int p2 = pix[-3*pix_next];
6328 const int q0 = pix[0];
6329 const int q1 = pix[1*pix_next];
6330 const int q2 = pix[2*pix_next];
6332 if( FFABS( p0 - q0 ) < alpha &&
6333 FFABS( p1 - p0 ) < beta &&
6334 FFABS( q1 - q0 ) < beta ) {
6336 const int p3 = pix[-4*pix_next];
6337 const int q3 = pix[ 3*pix_next];
6339 if(FFABS( p0 - q0 ) < (( alpha >> 2 ) + 2 )){
6340 if( FFABS( p2 - p0 ) < beta) {
6341 /* p0', p1', p2' */
6342 pix[-1*pix_next] = ( p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4 ) >> 3;
6343 pix[-2*pix_next] = ( p2 + p1 + p0 + q0 + 2 ) >> 2;
6344 pix[-3*pix_next] = ( 2*p3 + 3*p2 + p1 + p0 + q0 + 4 ) >> 3;
6345 } else {
6346 /* p0' */
6347 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6349 if( FFABS( q2 - q0 ) < beta) {
6350 /* q0', q1', q2' */
6351 pix[0*pix_next] = ( p1 + 2*p0 + 2*q0 + 2*q1 + q2 + 4 ) >> 3;
6352 pix[1*pix_next] = ( p0 + q0 + q1 + q2 + 2 ) >> 2;
6353 pix[2*pix_next] = ( 2*q3 + 3*q2 + q1 + q0 + p0 + 4 ) >> 3;
6354 } else {
6355 /* q0' */
6356 pix[0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6358 }else{
6359 /* p0', q0' */
6360 pix[-1*pix_next] = ( 2*p1 + p0 + q1 + 2 ) >> 2;
6361 pix[ 0*pix_next] = ( 2*q1 + q0 + p1 + 2 ) >> 2;
6363 tprintf(h->s.avctx, "filter_mb_edgeh i:%d d:%d, qp:%d, indexA:%d, alpha:%d, beta:%d\n# bS:%d -> [%02x, %02x, %02x, %02x, %02x, %02x] =>[%02x, %02x, %02x, %02x]\n", i, d, qp, index_a, alpha, beta, bS[i], p2, p1, p0, q0, q1, q2, pix[-2*pix_next], pix[-pix_next], pix[0], pix[pix_next]);
6365 pix++;
6370 static void filter_mb_edgech( H264Context *h, uint8_t *pix, int stride, int16_t bS[4], int qp ) {
6371 int i;
6372 const int index_a = qp + h->slice_alpha_c0_offset;
6373 const int alpha = (alpha_table+52)[index_a];
6374 const int beta = (beta_table+52)[qp + h->slice_beta_offset];
6376 if( bS[0] < 4 ) {
6377 int8_t tc[4];
6378 for(i=0; i<4; i++)
6379 tc[i] = bS[i] ? (tc0_table+52)[index_a][bS[i] - 1] + 1 : 0;
6380 h->s.dsp.h264_v_loop_filter_chroma(pix, stride, alpha, beta, tc);
6381 } else {
6382 h->s.dsp.h264_v_loop_filter_chroma_intra(pix, stride, alpha, beta);
6386 static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6387 MpegEncContext * const s = &h->s;
6388 int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
6389 int mb_xy, mb_type;
6390 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
6392 mb_xy = h->mb_xy;
6394 if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
6395 (h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
6396 h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
6397 filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
6398 return;
6400 assert(!FRAME_MBAFF);
6402 mb_type = s->current_picture.mb_type[mb_xy];
6403 qp = s->current_picture.qscale_table[mb_xy];
6404 qp0 = s->current_picture.qscale_table[mb_xy-1];
6405 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
6406 qpc = get_chroma_qp( h, 0, qp );
6407 qpc0 = get_chroma_qp( h, 0, qp0 );
6408 qpc1 = get_chroma_qp( h, 0, qp1 );
6409 qp0 = (qp + qp0 + 1) >> 1;
6410 qp1 = (qp + qp1 + 1) >> 1;
6411 qpc0 = (qpc + qpc0 + 1) >> 1;
6412 qpc1 = (qpc + qpc1 + 1) >> 1;
6413 qp_thresh = 15 - h->slice_alpha_c0_offset;
6414 if(qp <= qp_thresh && qp0 <= qp_thresh && qp1 <= qp_thresh &&
6415 qpc <= qp_thresh && qpc0 <= qp_thresh && qpc1 <= qp_thresh)
6416 return;
6418 if( IS_INTRA(mb_type) ) {
6419 int16_t bS4[4] = {4,4,4,4};
6420 int16_t bS3[4] = {3,3,3,3};
6421 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
6422 if( IS_8x8DCT(mb_type) ) {
6423 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6424 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6425 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6426 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6427 } else {
6428 filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
6429 filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
6430 filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
6431 filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
6432 filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
6433 filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
6434 filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
6435 filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
6437 filter_mb_edgecv( h, &img_cb[2*0], uvlinesize, bS4, qpc0 );
6438 filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
6439 filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
6440 filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
6441 filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6442 filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
6443 filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
6444 filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
6445 return;
6446 } else {
6447 DECLARE_ALIGNED_8(int16_t, bS[2][4][4]);
6448 uint64_t (*bSv)[4] = (uint64_t(*)[4])bS;
6449 int edges;
6450 if( IS_8x8DCT(mb_type) && (h->cbp&7) == 7 ) {
6451 edges = 4;
6452 bSv[0][0] = bSv[0][2] = bSv[1][0] = bSv[1][2] = 0x0002000200020002ULL;
6453 } else {
6454 int mask_edge1 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16)) ? 3 :
6455 (mb_type & MB_TYPE_16x8) ? 1 : 0;
6456 int mask_edge0 = (mb_type & (MB_TYPE_16x16 | MB_TYPE_8x16))
6457 && (s->current_picture.mb_type[mb_xy-1] & (MB_TYPE_16x16 | MB_TYPE_8x16))
6458 ? 3 : 0;
6459 int step = IS_8x8DCT(mb_type) ? 2 : 1;
6460 edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
6461 s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
6462 (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
6464 if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
6465 bSv[0][0] = 0x0004000400040004ULL;
6466 if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
6467 bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
6469 #define FILTER(hv,dir,edge)\
6470 if(bSv[dir][edge]) {\
6471 filter_mb_edge##hv( h, &img_y[4*edge*(dir?linesize:1)], linesize, bS[dir][edge], edge ? qp : qp##dir );\
6472 if(!(edge&1)) {\
6473 filter_mb_edgec##hv( h, &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6474 filter_mb_edgec##hv( h, &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir );\
6477 if( edges == 1 ) {
6478 FILTER(v,0,0);
6479 FILTER(h,1,0);
6480 } else if( IS_8x8DCT(mb_type) ) {
6481 FILTER(v,0,0);
6482 FILTER(v,0,2);
6483 FILTER(h,1,0);
6484 FILTER(h,1,2);
6485 } else {
6486 FILTER(v,0,0);
6487 FILTER(v,0,1);
6488 FILTER(v,0,2);
6489 FILTER(v,0,3);
6490 FILTER(h,1,0);
6491 FILTER(h,1,1);
6492 FILTER(h,1,2);
6493 FILTER(h,1,3);
6495 #undef FILTER
6499 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
6500 MpegEncContext * const s = &h->s;
6501 const int mb_xy= mb_x + mb_y*s->mb_stride;
6502 const int mb_type = s->current_picture.mb_type[mb_xy];
6503 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4;
6504 int first_vertical_edge_done = 0;
6505 int dir;
6506 /* FIXME: A given frame may occupy more than one position in
6507 * the reference list. So ref2frm should be populated with
6508 * frame numbers, not indices. */
6509 static const int ref2frm[34] = {-1,-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
6510 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
6512 //for sufficiently low qp, filtering wouldn't do anything
6513 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
6514 if(!FRAME_MBAFF){
6515 int qp_thresh = 15 - h->slice_alpha_c0_offset - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
6516 int qp = s->current_picture.qscale_table[mb_xy];
6517 if(qp <= qp_thresh
6518 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
6519 && (mb_y == 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
6520 return;
6524 if (FRAME_MBAFF
6525 // left mb is in picture
6526 && h->slice_table[mb_xy-1] != 255
6527 // and current and left pair do not have the same interlaced type
6528 && (IS_INTERLACED(mb_type) != IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]))
6529 // and left mb is in the same slice if deblocking_filter == 2
6530 && (h->deblocking_filter!=2 || h->slice_table[mb_xy-1] == h->slice_table[mb_xy])) {
6531 /* First vertical edge is different in MBAFF frames
6532 * There are 8 different bS to compute and 2 different Qp
6534 const int pair_xy = mb_x + (mb_y&~1)*s->mb_stride;
6535 const int left_mb_xy[2] = { pair_xy-1, pair_xy-1+s->mb_stride };
6536 int16_t bS[8];
6537 int qp[2];
6538 int bqp[2];
6539 int rqp[2];
6540 int mb_qp, mbn0_qp, mbn1_qp;
6541 int i;
6542 first_vertical_edge_done = 1;
6544 if( IS_INTRA(mb_type) )
6545 bS[0] = bS[1] = bS[2] = bS[3] = bS[4] = bS[5] = bS[6] = bS[7] = 4;
6546 else {
6547 for( i = 0; i < 8; i++ ) {
6548 int mbn_xy = MB_FIELD ? left_mb_xy[i>>2] : left_mb_xy[i&1];
6550 if( IS_INTRA( s->current_picture.mb_type[mbn_xy] ) )
6551 bS[i] = 4;
6552 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
6553 /* FIXME: with 8x8dct + cavlc, should check cbp instead of nnz */
6554 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2] )
6555 bS[i] = 2;
6556 else
6557 bS[i] = 1;
6561 mb_qp = s->current_picture.qscale_table[mb_xy];
6562 mbn0_qp = s->current_picture.qscale_table[left_mb_xy[0]];
6563 mbn1_qp = s->current_picture.qscale_table[left_mb_xy[1]];
6564 qp[0] = ( mb_qp + mbn0_qp + 1 ) >> 1;
6565 bqp[0] = ( get_chroma_qp( h, 0, mb_qp ) +
6566 get_chroma_qp( h, 0, mbn0_qp ) + 1 ) >> 1;
6567 rqp[0] = ( get_chroma_qp( h, 1, mb_qp ) +
6568 get_chroma_qp( h, 1, mbn0_qp ) + 1 ) >> 1;
6569 qp[1] = ( mb_qp + mbn1_qp + 1 ) >> 1;
6570 bqp[1] = ( get_chroma_qp( h, 0, mb_qp ) +
6571 get_chroma_qp( h, 0, mbn1_qp ) + 1 ) >> 1;
6572 rqp[1] = ( get_chroma_qp( h, 1, mb_qp ) +
6573 get_chroma_qp( h, 1, mbn1_qp ) + 1 ) >> 1;
6575 /* Filter edge */
6576 tprintf(s->avctx, "filter mb:%d/%d MBAFF, QPy:%d/%d, QPb:%d/%d QPr:%d/%d ls:%d uvls:%d", mb_x, mb_y, qp[0], qp[1], bqp[0], bqp[1], rqp[0], rqp[1], linesize, uvlinesize);
6577 { int i; for (i = 0; i < 8; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6578 filter_mb_mbaff_edgev ( h, &img_y [0], linesize, bS, qp );
6579 filter_mb_mbaff_edgecv( h, &img_cb[0], uvlinesize, bS, bqp );
6580 filter_mb_mbaff_edgecv( h, &img_cr[0], uvlinesize, bS, rqp );
6582 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6583 for( dir = 0; dir < 2; dir++ )
6585 int edge;
6586 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6587 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6588 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6590 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
6591 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6592 // how often to recheck mv-based bS when iterating between edges
6593 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6594 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6595 // how often to recheck mv-based bS when iterating along each edge
6596 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6598 if (first_vertical_edge_done) {
6599 start = 1;
6600 first_vertical_edge_done = 0;
6603 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6604 start = 1;
6606 if (FRAME_MBAFF && (dir == 1) && ((mb_y&1) == 0) && start == 0
6607 && !IS_INTERLACED(mb_type)
6608 && IS_INTERLACED(mbm_type)
6610 // This is a special case in the norm where the filtering must
6611 // be done twice (one each of the field) even if we are in a
6612 // frame macroblock.
6614 static const int nnz_idx[4] = {4,5,6,3};
6615 unsigned int tmp_linesize = 2 * linesize;
6616 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6617 int mbn_xy = mb_xy - 2 * s->mb_stride;
6618 int qp;
6619 int i, j;
6620 int16_t bS[4];
6622 for(j=0; j<2; j++, mbn_xy += s->mb_stride){
6623 if( IS_INTRA(mb_type) ||
6624 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6625 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6626 } else {
6627 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
6628 for( i = 0; i < 4; i++ ) {
6629 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
6630 mbn_nnz[nnz_idx[i]] != 0 )
6631 bS[i] = 2;
6632 else
6633 bS[i] = 1;
6636 // Do not use s->qscale as luma quantizer because it has not the same
6637 // value in IPCM macroblocks.
6638 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6639 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, tmp_linesize, tmp_uvlinesize);
6640 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6641 filter_mb_edgeh( h, &img_y[j*linesize], tmp_linesize, bS, qp );
6642 filter_mb_edgech( h, &img_cb[j*uvlinesize], tmp_uvlinesize, bS,
6643 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6644 filter_mb_edgech( h, &img_cr[j*uvlinesize], tmp_uvlinesize, bS,
6645 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6648 start = 1;
6651 /* Calculate bS */
6652 for( edge = start; edge < edges; edge++ ) {
6653 /* mbn_xy: neighbor macroblock */
6654 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6655 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6656 int16_t bS[4];
6657 int qp;
6659 if( (edge&1) && IS_8x8DCT(mb_type) )
6660 continue;
6662 if( IS_INTRA(mb_type) ||
6663 IS_INTRA(mbn_type) ) {
6664 int value;
6665 if (edge == 0) {
6666 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6667 || ((FRAME_MBAFF || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6669 value = 4;
6670 } else {
6671 value = 3;
6673 } else {
6674 value = 3;
6676 bS[0] = bS[1] = bS[2] = bS[3] = value;
6677 } else {
6678 int i, l;
6679 int mv_done;
6681 if( edge & mask_edge ) {
6682 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6683 mv_done = 1;
6685 else if( FRAME_MBAFF && IS_INTERLACED(mb_type ^ mbn_type)) {
6686 bS[0] = bS[1] = bS[2] = bS[3] = 1;
6687 mv_done = 1;
6689 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6690 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6691 int bn_idx= b_idx - (dir ? 8:1);
6692 int v = 0;
6693 for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6694 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6695 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6696 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
6698 bS[0] = bS[1] = bS[2] = bS[3] = v;
6699 mv_done = 1;
6701 else
6702 mv_done = 0;
6704 for( i = 0; i < 4; i++ ) {
6705 int x = dir == 0 ? edge : i;
6706 int y = dir == 0 ? i : edge;
6707 int b_idx= 8 + 4 + x + 8*y;
6708 int bn_idx= b_idx - (dir ? 8:1);
6710 if( h->non_zero_count_cache[b_idx] != 0 ||
6711 h->non_zero_count_cache[bn_idx] != 0 ) {
6712 bS[i] = 2;
6714 else if(!mv_done)
6716 bS[i] = 0;
6717 for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
6718 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6719 FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6720 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
6721 bS[i] = 1;
6722 break;
6728 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
6729 continue;
6732 /* Filter edge */
6733 // Do not use s->qscale as luma quantizer because it has not the same
6734 // value in IPCM macroblocks.
6735 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1;
6736 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]);
6737 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize);
6738 { int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); }
6739 if( dir == 0 ) {
6740 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp );
6741 if( (edge&1) == 0 ) {
6742 filter_mb_edgecv( h, &img_cb[2*edge], uvlinesize, bS,
6743 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6744 filter_mb_edgecv( h, &img_cr[2*edge], uvlinesize, bS,
6745 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6747 } else {
6748 filter_mb_edgeh( h, &img_y[4*edge*linesize], linesize, bS, qp );
6749 if( (edge&1) == 0 ) {
6750 filter_mb_edgech( h, &img_cb[2*edge*uvlinesize], uvlinesize, bS,
6751 ( h->chroma_qp[0] + get_chroma_qp( h, 0, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6752 filter_mb_edgech( h, &img_cr[2*edge*uvlinesize], uvlinesize, bS,
6753 ( h->chroma_qp[1] + get_chroma_qp( h, 1, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1);
6760 static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
6761 MpegEncContext * const s = &h->s;
6762 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
6764 s->mb_skip_run= -1;
6766 if( h->pps.cabac ) {
6767 int i;
6769 /* realign */
6770 align_get_bits( &s->gb );
6772 /* init cabac */
6773 ff_init_cabac_states( &h->cabac);
6774 ff_init_cabac_decoder( &h->cabac,
6775 s->gb.buffer + get_bits_count(&s->gb)/8,
6776 ( s->gb.size_in_bits - get_bits_count(&s->gb) + 7)/8);
6777 /* calculate pre-state */
6778 for( i= 0; i < 460; i++ ) {
6779 int pre;
6780 if( h->slice_type == FF_I_TYPE )
6781 pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
6782 else
6783 pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
6785 if( pre <= 63 )
6786 h->cabac_state[i] = 2 * ( 63 - pre ) + 0;
6787 else
6788 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
6791 for(;;){
6792 //START_TIMER
6793 int ret = decode_mb_cabac(h);
6794 int eos;
6795 //STOP_TIMER("decode_mb_cabac")
6797 if(ret>=0) hl_decode_mb(h);
6799 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
6800 s->mb_y++;
6802 if(ret>=0) ret = decode_mb_cabac(h);
6804 if(ret>=0) hl_decode_mb(h);
6805 s->mb_y--;
6807 eos = get_cabac_terminate( &h->cabac );
6809 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
6810 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
6811 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6812 return -1;
6815 if( ++s->mb_x >= s->mb_width ) {
6816 s->mb_x = 0;
6817 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6818 ++s->mb_y;
6819 if(FIELD_OR_MBAFF_PICTURE) {
6820 ++s->mb_y;
6824 if( eos || s->mb_y >= s->mb_height ) {
6825 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6826 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6827 return 0;
6831 } else {
6832 for(;;){
6833 int ret = decode_mb_cavlc(h);
6835 if(ret>=0) hl_decode_mb(h);
6837 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
6838 s->mb_y++;
6839 ret = decode_mb_cavlc(h);
6841 if(ret>=0) hl_decode_mb(h);
6842 s->mb_y--;
6845 if(ret<0){
6846 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6847 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6849 return -1;
6852 if(++s->mb_x >= s->mb_width){
6853 s->mb_x=0;
6854 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6855 ++s->mb_y;
6856 if(FIELD_OR_MBAFF_PICTURE) {
6857 ++s->mb_y;
6859 if(s->mb_y >= s->mb_height){
6860 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6862 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
6863 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6865 return 0;
6866 }else{
6867 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6869 return -1;
6874 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
6875 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
6876 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
6877 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6879 return 0;
6880 }else{
6881 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6883 return -1;
6889 #if 0
6890 for(;s->mb_y < s->mb_height; s->mb_y++){
6891 for(;s->mb_x < s->mb_width; s->mb_x++){
6892 int ret= decode_mb(h);
6894 hl_decode_mb(h);
6896 if(ret<0){
6897 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
6898 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6900 return -1;
6903 if(++s->mb_x >= s->mb_width){
6904 s->mb_x=0;
6905 if(++s->mb_y >= s->mb_height){
6906 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6907 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6909 return 0;
6910 }else{
6911 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6913 return -1;
6918 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
6919 if(get_bits_count(s->gb) == s->gb.size_in_bits){
6920 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
6922 return 0;
6923 }else{
6924 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
6926 return -1;
6930 s->mb_x=0;
6931 ff_draw_horiz_band(s, 16*s->mb_y, 16);
6933 #endif
6934 return -1; //not reached
6937 static int decode_unregistered_user_data(H264Context *h, int size){
6938 MpegEncContext * const s = &h->s;
6939 uint8_t user_data[16+256];
6940 int e, build, i;
6942 if(size<16)
6943 return -1;
6945 for(i=0; i<sizeof(user_data)-1 && i<size; i++){
6946 user_data[i]= get_bits(&s->gb, 8);
6949 user_data[i]= 0;
6950 e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
6951 if(e==1 && build>=0)
6952 h->x264_build= build;
6954 if(s->avctx->debug & FF_DEBUG_BUGS)
6955 av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
6957 for(; i<size; i++)
6958 skip_bits(&s->gb, 8);
6960 return 0;
6963 static int decode_sei(H264Context *h){
6964 MpegEncContext * const s = &h->s;
6966 while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
6967 int size, type;
6969 type=0;
6971 type+= show_bits(&s->gb, 8);
6972 }while(get_bits(&s->gb, 8) == 255);
6974 size=0;
6976 size+= show_bits(&s->gb, 8);
6977 }while(get_bits(&s->gb, 8) == 255);
6979 switch(type){
6980 case 5:
6981 if(decode_unregistered_user_data(h, size) < 0)
6982 return -1;
6983 break;
6984 default:
6985 skip_bits(&s->gb, 8*size);
6988 //FIXME check bits here
6989 align_get_bits(&s->gb);
6992 return 0;
6995 static inline void decode_hrd_parameters(H264Context *h, SPS *sps){
6996 MpegEncContext * const s = &h->s;
6997 int cpb_count, i;
6998 cpb_count = get_ue_golomb(&s->gb) + 1;
6999 get_bits(&s->gb, 4); /* bit_rate_scale */
7000 get_bits(&s->gb, 4); /* cpb_size_scale */
7001 for(i=0; i<cpb_count; i++){
7002 get_ue_golomb(&s->gb); /* bit_rate_value_minus1 */
7003 get_ue_golomb(&s->gb); /* cpb_size_value_minus1 */
7004 get_bits1(&s->gb); /* cbr_flag */
7006 get_bits(&s->gb, 5); /* initial_cpb_removal_delay_length_minus1 */
7007 get_bits(&s->gb, 5); /* cpb_removal_delay_length_minus1 */
7008 get_bits(&s->gb, 5); /* dpb_output_delay_length_minus1 */
7009 get_bits(&s->gb, 5); /* time_offset_length */
7012 static inline int decode_vui_parameters(H264Context *h, SPS *sps){
7013 MpegEncContext * const s = &h->s;
7014 int aspect_ratio_info_present_flag;
7015 unsigned int aspect_ratio_idc;
7016 int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
7018 aspect_ratio_info_present_flag= get_bits1(&s->gb);
7020 if( aspect_ratio_info_present_flag ) {
7021 aspect_ratio_idc= get_bits(&s->gb, 8);
7022 if( aspect_ratio_idc == EXTENDED_SAR ) {
7023 sps->sar.num= get_bits(&s->gb, 16);
7024 sps->sar.den= get_bits(&s->gb, 16);
7025 }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
7026 sps->sar= pixel_aspect[aspect_ratio_idc];
7027 }else{
7028 av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
7029 return -1;
7031 }else{
7032 sps->sar.num=
7033 sps->sar.den= 0;
7035 // s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
7037 if(get_bits1(&s->gb)){ /* overscan_info_present_flag */
7038 get_bits1(&s->gb); /* overscan_appropriate_flag */
7041 if(get_bits1(&s->gb)){ /* video_signal_type_present_flag */
7042 get_bits(&s->gb, 3); /* video_format */
7043 get_bits1(&s->gb); /* video_full_range_flag */
7044 if(get_bits1(&s->gb)){ /* colour_description_present_flag */
7045 get_bits(&s->gb, 8); /* colour_primaries */
7046 get_bits(&s->gb, 8); /* transfer_characteristics */
7047 get_bits(&s->gb, 8); /* matrix_coefficients */
7051 if(get_bits1(&s->gb)){ /* chroma_location_info_present_flag */
7052 get_ue_golomb(&s->gb); /* chroma_sample_location_type_top_field */
7053 get_ue_golomb(&s->gb); /* chroma_sample_location_type_bottom_field */
7056 sps->timing_info_present_flag = get_bits1(&s->gb);
7057 if(sps->timing_info_present_flag){
7058 sps->num_units_in_tick = get_bits_long(&s->gb, 32);
7059 sps->time_scale = get_bits_long(&s->gb, 32);
7060 sps->fixed_frame_rate_flag = get_bits1(&s->gb);
7063 nal_hrd_parameters_present_flag = get_bits1(&s->gb);
7064 if(nal_hrd_parameters_present_flag)
7065 decode_hrd_parameters(h, sps);
7066 vcl_hrd_parameters_present_flag = get_bits1(&s->gb);
7067 if(vcl_hrd_parameters_present_flag)
7068 decode_hrd_parameters(h, sps);
7069 if(nal_hrd_parameters_present_flag || vcl_hrd_parameters_present_flag)
7070 get_bits1(&s->gb); /* low_delay_hrd_flag */
7071 get_bits1(&s->gb); /* pic_struct_present_flag */
7073 sps->bitstream_restriction_flag = get_bits1(&s->gb);
7074 if(sps->bitstream_restriction_flag){
7075 unsigned int num_reorder_frames;
7076 get_bits1(&s->gb); /* motion_vectors_over_pic_boundaries_flag */
7077 get_ue_golomb(&s->gb); /* max_bytes_per_pic_denom */
7078 get_ue_golomb(&s->gb); /* max_bits_per_mb_denom */
7079 get_ue_golomb(&s->gb); /* log2_max_mv_length_horizontal */
7080 get_ue_golomb(&s->gb); /* log2_max_mv_length_vertical */
7081 num_reorder_frames= get_ue_golomb(&s->gb);
7082 get_ue_golomb(&s->gb); /*max_dec_frame_buffering*/
7084 if(num_reorder_frames > 16 /*max_dec_frame_buffering || max_dec_frame_buffering > 16*/){
7085 av_log(h->s.avctx, AV_LOG_ERROR, "illegal num_reorder_frames %d\n", num_reorder_frames);
7086 return -1;
7089 sps->num_reorder_frames= num_reorder_frames;
7092 return 0;
7095 static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
7096 const uint8_t *jvt_list, const uint8_t *fallback_list){
7097 MpegEncContext * const s = &h->s;
7098 int i, last = 8, next = 8;
7099 const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
7100 if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
7101 memcpy(factors, fallback_list, size*sizeof(uint8_t));
7102 else
7103 for(i=0;i<size;i++){
7104 if(next)
7105 next = (last + get_se_golomb(&s->gb)) & 0xff;
7106 if(!i && !next){ /* matrix not written, we use the preset one */
7107 memcpy(factors, jvt_list, size*sizeof(uint8_t));
7108 break;
7110 last = factors[scan[i]] = next ? next : last;
7114 static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
7115 uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
7116 MpegEncContext * const s = &h->s;
7117 int fallback_sps = !is_sps && sps->scaling_matrix_present;
7118 const uint8_t *fallback[4] = {
7119 fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
7120 fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
7121 fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
7122 fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
7124 if(get_bits1(&s->gb)){
7125 sps->scaling_matrix_present |= is_sps;
7126 decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
7127 decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
7128 decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
7129 decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
7130 decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
7131 decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
7132 if(is_sps || pps->transform_8x8_mode){
7133 decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]); // Intra, Y
7134 decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]); // Inter, Y
7136 } else if(fallback_sps) {
7137 memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
7138 memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
7143 * Returns and optionally allocates SPS / PPS structures in the supplied array 'vec'
7145 static void *
7146 alloc_parameter_set(H264Context *h, void **vec, const unsigned int id, const unsigned int max,
7147 const size_t size, const char *name)
7149 if(id>=max) {
7150 av_log(h->s.avctx, AV_LOG_ERROR, "%s_id (%d) out of range\n", name, id);
7151 return NULL;
7154 if(!vec[id]) {
7155 vec[id] = av_mallocz(size);
7156 if(vec[id] == NULL)
7157 av_log(h->s.avctx, AV_LOG_ERROR, "cannot allocate memory for %s\n", name);
7159 return vec[id];
7162 static inline int decode_seq_parameter_set(H264Context *h){
7163 MpegEncContext * const s = &h->s;
7164 int profile_idc, level_idc;
7165 unsigned int sps_id, tmp, mb_width, mb_height;
7166 int i;
7167 SPS *sps;
7169 profile_idc= get_bits(&s->gb, 8);
7170 get_bits1(&s->gb); //constraint_set0_flag
7171 get_bits1(&s->gb); //constraint_set1_flag
7172 get_bits1(&s->gb); //constraint_set2_flag
7173 get_bits1(&s->gb); //constraint_set3_flag
7174 get_bits(&s->gb, 4); // reserved
7175 level_idc= get_bits(&s->gb, 8);
7176 sps_id= get_ue_golomb(&s->gb);
7178 sps = alloc_parameter_set(h, (void **)h->sps_buffers, sps_id, MAX_SPS_COUNT, sizeof(SPS), "sps");
7179 if(sps == NULL)
7180 return -1;
7182 sps->profile_idc= profile_idc;
7183 sps->level_idc= level_idc;
7185 if(sps->profile_idc >= 100){ //high profile
7186 if(get_ue_golomb(&s->gb) == 3) //chroma_format_idc
7187 get_bits1(&s->gb); //residual_color_transform_flag
7188 get_ue_golomb(&s->gb); //bit_depth_luma_minus8
7189 get_ue_golomb(&s->gb); //bit_depth_chroma_minus8
7190 sps->transform_bypass = get_bits1(&s->gb);
7191 decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
7192 }else
7193 sps->scaling_matrix_present = 0;
7195 sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
7196 sps->poc_type= get_ue_golomb(&s->gb);
7198 if(sps->poc_type == 0){ //FIXME #define
7199 sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
7200 } else if(sps->poc_type == 1){//FIXME #define
7201 sps->delta_pic_order_always_zero_flag= get_bits1(&s->gb);
7202 sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
7203 sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
7204 tmp= get_ue_golomb(&s->gb);
7206 if(tmp >= sizeof(sps->offset_for_ref_frame) / sizeof(sps->offset_for_ref_frame[0])){
7207 av_log(h->s.avctx, AV_LOG_ERROR, "poc_cycle_length overflow %u\n", tmp);
7208 return -1;
7210 sps->poc_cycle_length= tmp;
7212 for(i=0; i<sps->poc_cycle_length; i++)
7213 sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
7214 }else if(sps->poc_type != 2){
7215 av_log(h->s.avctx, AV_LOG_ERROR, "illegal POC type %d\n", sps->poc_type);
7216 return -1;
7219 tmp= get_ue_golomb(&s->gb);
7220 if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
7221 av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
7222 return -1;
7224 sps->ref_frame_count= tmp;
7225 sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
7226 mb_width= get_ue_golomb(&s->gb) + 1;
7227 mb_height= get_ue_golomb(&s->gb) + 1;
7228 if(mb_width >= INT_MAX/16 || mb_height >= INT_MAX/16 ||
7229 avcodec_check_dimensions(NULL, 16*mb_width, 16*mb_height)){
7230 av_log(h->s.avctx, AV_LOG_ERROR, "mb_width/height overflow\n");
7231 return -1;
7233 sps->mb_width = mb_width;
7234 sps->mb_height= mb_height;
7236 sps->frame_mbs_only_flag= get_bits1(&s->gb);
7237 if(!sps->frame_mbs_only_flag)
7238 sps->mb_aff= get_bits1(&s->gb);
7239 else
7240 sps->mb_aff= 0;
7242 sps->direct_8x8_inference_flag= get_bits1(&s->gb);
7244 #ifndef ALLOW_INTERLACE
7245 if(sps->mb_aff)
7246 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF support not included; enable it at compile-time.\n");
7247 #endif
7248 if(!sps->direct_8x8_inference_flag && sps->mb_aff)
7249 av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + !direct_8x8_inference is not implemented\n");
7251 sps->crop= get_bits1(&s->gb);
7252 if(sps->crop){
7253 sps->crop_left = get_ue_golomb(&s->gb);
7254 sps->crop_right = get_ue_golomb(&s->gb);
7255 sps->crop_top = get_ue_golomb(&s->gb);
7256 sps->crop_bottom= get_ue_golomb(&s->gb);
7257 if(sps->crop_left || sps->crop_top){
7258 av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
7260 if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
7261 av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
7263 }else{
7264 sps->crop_left =
7265 sps->crop_right =
7266 sps->crop_top =
7267 sps->crop_bottom= 0;
7270 sps->vui_parameters_present_flag= get_bits1(&s->gb);
7271 if( sps->vui_parameters_present_flag )
7272 decode_vui_parameters(h, sps);
7274 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7275 av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%u profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
7276 sps_id, sps->profile_idc, sps->level_idc,
7277 sps->poc_type,
7278 sps->ref_frame_count,
7279 sps->mb_width, sps->mb_height,
7280 sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
7281 sps->direct_8x8_inference_flag ? "8B8" : "",
7282 sps->crop_left, sps->crop_right,
7283 sps->crop_top, sps->crop_bottom,
7284 sps->vui_parameters_present_flag ? "VUI" : ""
7287 return 0;
7290 static void
7291 build_qp_table(PPS *pps, int t, int index)
7293 int i;
7294 for(i = 0; i < 255; i++)
7295 pps->chroma_qp_table[t][i & 0xff] = chroma_qp[av_clip(i + index, 0, 51)];
7298 static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
7299 MpegEncContext * const s = &h->s;
7300 unsigned int tmp, pps_id= get_ue_golomb(&s->gb);
7301 PPS *pps;
7303 pps = alloc_parameter_set(h, (void **)h->pps_buffers, pps_id, MAX_PPS_COUNT, sizeof(PPS), "pps");
7304 if(pps == NULL)
7305 return -1;
7307 tmp= get_ue_golomb(&s->gb);
7308 if(tmp>=MAX_SPS_COUNT || h->sps_buffers[tmp] == NULL){
7309 av_log(h->s.avctx, AV_LOG_ERROR, "sps_id out of range\n");
7310 return -1;
7312 pps->sps_id= tmp;
7314 pps->cabac= get_bits1(&s->gb);
7315 pps->pic_order_present= get_bits1(&s->gb);
7316 pps->slice_group_count= get_ue_golomb(&s->gb) + 1;
7317 if(pps->slice_group_count > 1 ){
7318 pps->mb_slice_group_map_type= get_ue_golomb(&s->gb);
7319 av_log(h->s.avctx, AV_LOG_ERROR, "FMO not supported\n");
7320 switch(pps->mb_slice_group_map_type){
7321 case 0:
7322 #if 0
7323 | for( i = 0; i <= num_slice_groups_minus1; i++ ) | | |
7324 | run_length[ i ] |1 |ue(v) |
7325 #endif
7326 break;
7327 case 2:
7328 #if 0
7329 | for( i = 0; i < num_slice_groups_minus1; i++ ) | | |
7330 |{ | | |
7331 | top_left_mb[ i ] |1 |ue(v) |
7332 | bottom_right_mb[ i ] |1 |ue(v) |
7333 | } | | |
7334 #endif
7335 break;
7336 case 3:
7337 case 4:
7338 case 5:
7339 #if 0
7340 | slice_group_change_direction_flag |1 |u(1) |
7341 | slice_group_change_rate_minus1 |1 |ue(v) |
7342 #endif
7343 break;
7344 case 6:
7345 #if 0
7346 | slice_group_id_cnt_minus1 |1 |ue(v) |
7347 | for( i = 0; i <= slice_group_id_cnt_minus1; i++ | | |
7348 |) | | |
7349 | slice_group_id[ i ] |1 |u(v) |
7350 #endif
7351 break;
7354 pps->ref_count[0]= get_ue_golomb(&s->gb) + 1;
7355 pps->ref_count[1]= get_ue_golomb(&s->gb) + 1;
7356 if(pps->ref_count[0]-1 > 32-1 || pps->ref_count[1]-1 > 32-1){
7357 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
7358 pps->ref_count[0]= pps->ref_count[1]= 1;
7359 return -1;
7362 pps->weighted_pred= get_bits1(&s->gb);
7363 pps->weighted_bipred_idc= get_bits(&s->gb, 2);
7364 pps->init_qp= get_se_golomb(&s->gb) + 26;
7365 pps->init_qs= get_se_golomb(&s->gb) + 26;
7366 pps->chroma_qp_index_offset[0]= get_se_golomb(&s->gb);
7367 pps->deblocking_filter_parameters_present= get_bits1(&s->gb);
7368 pps->constrained_intra_pred= get_bits1(&s->gb);
7369 pps->redundant_pic_cnt_present = get_bits1(&s->gb);
7371 pps->transform_8x8_mode= 0;
7372 h->dequant_coeff_pps= -1; //contents of sps/pps can change even if id doesn't, so reinit
7373 memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
7374 memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
7376 if(get_bits_count(&s->gb) < bit_length){
7377 pps->transform_8x8_mode= get_bits1(&s->gb);
7378 decode_scaling_matrices(h, h->sps_buffers[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
7379 pps->chroma_qp_index_offset[1]= get_se_golomb(&s->gb); //second_chroma_qp_index_offset
7380 } else {
7381 pps->chroma_qp_index_offset[1]= pps->chroma_qp_index_offset[0];
7384 build_qp_table(pps, 0, pps->chroma_qp_index_offset[0]);
7385 if(pps->chroma_qp_index_offset[0] != pps->chroma_qp_index_offset[1]) {
7386 build_qp_table(pps, 1, pps->chroma_qp_index_offset[1]);
7387 h->pps.chroma_qp_diff= 1;
7388 } else
7389 memcpy(pps->chroma_qp_table[1], pps->chroma_qp_table[0], 256);
7391 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
7392 av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%u sps:%u %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d/%d %s %s %s %s\n",
7393 pps_id, pps->sps_id,
7394 pps->cabac ? "CABAC" : "CAVLC",
7395 pps->slice_group_count,
7396 pps->ref_count[0], pps->ref_count[1],
7397 pps->weighted_pred ? "weighted" : "",
7398 pps->init_qp, pps->init_qs, pps->chroma_qp_index_offset[0], pps->chroma_qp_index_offset[1],
7399 pps->deblocking_filter_parameters_present ? "LPAR" : "",
7400 pps->constrained_intra_pred ? "CONSTR" : "",
7401 pps->redundant_pic_cnt_present ? "REDU" : "",
7402 pps->transform_8x8_mode ? "8x8DCT" : ""
7406 return 0;
7410 * Call decode_slice() for each context.
7412 * @param h h264 master context
7413 * @param context_count number of contexts to execute
7415 static void execute_decode_slices(H264Context *h, int context_count){
7416 MpegEncContext * const s = &h->s;
7417 AVCodecContext * const avctx= s->avctx;
7418 H264Context *hx;
7419 int i;
7421 if(context_count == 1) {
7422 decode_slice(avctx, h);
7423 } else {
7424 for(i = 1; i < context_count; i++) {
7425 hx = h->thread_context[i];
7426 hx->s.error_resilience = avctx->error_resilience;
7427 hx->s.error_count = 0;
7430 avctx->execute(avctx, (void *)decode_slice,
7431 (void **)h->thread_context, NULL, context_count);
7433 /* pull back stuff from slices to master context */
7434 hx = h->thread_context[context_count - 1];
7435 s->mb_x = hx->s.mb_x;
7436 s->mb_y = hx->s.mb_y;
7437 s->dropable = hx->s.dropable;
7438 s->picture_structure = hx->s.picture_structure;
7439 for(i = 1; i < context_count; i++)
7440 h->s.error_count += h->thread_context[i]->s.error_count;
7445 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
7446 MpegEncContext * const s = &h->s;
7447 AVCodecContext * const avctx= s->avctx;
7448 int buf_index=0;
7449 H264Context *hx; ///< thread context
7450 int context_count = 0;
7452 h->max_contexts = avctx->thread_count;
7453 #if 0
7454 int i;
7455 for(i=0; i<50; i++){
7456 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
7458 #endif
7459 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
7460 h->current_slice = 0;
7461 if (!s->first_field)
7462 s->current_picture_ptr= NULL;
7465 for(;;){
7466 int consumed;
7467 int dst_length;
7468 int bit_length;
7469 const uint8_t *ptr;
7470 int i, nalsize = 0;
7471 int err;
7473 if(h->is_avc) {
7474 if(buf_index >= buf_size) break;
7475 nalsize = 0;
7476 for(i = 0; i < h->nal_length_size; i++)
7477 nalsize = (nalsize << 8) | buf[buf_index++];
7478 if(nalsize <= 1 || (nalsize+buf_index > buf_size)){
7479 if(nalsize == 1){
7480 buf_index++;
7481 continue;
7482 }else{
7483 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
7484 break;
7487 } else {
7488 // start code prefix search
7489 for(; buf_index + 3 < buf_size; buf_index++){
7490 // This should always succeed in the first iteration.
7491 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
7492 break;
7495 if(buf_index+3 >= buf_size) break;
7497 buf_index+=3;
7500 hx = h->thread_context[context_count];
7502 ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
7503 if (ptr==NULL || dst_length < 0){
7504 return -1;
7506 while(ptr[dst_length - 1] == 0 && dst_length > 0)
7507 dst_length--;
7508 bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
7510 if(s->avctx->debug&FF_DEBUG_STARTCODE){
7511 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
7514 if (h->is_avc && (nalsize != consumed)){
7515 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
7516 consumed= nalsize;
7519 buf_index += consumed;
7521 if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
7522 ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
7523 continue;
7525 again:
7526 err = 0;
7527 switch(hx->nal_unit_type){
7528 case NAL_IDR_SLICE:
7529 if (h->nal_unit_type != NAL_IDR_SLICE) {
7530 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
7531 return -1;
7533 idr(h); //FIXME ensure we don't loose some frames if there is reordering
7534 case NAL_SLICE:
7535 init_get_bits(&hx->s.gb, ptr, bit_length);
7536 hx->intra_gb_ptr=
7537 hx->inter_gb_ptr= &hx->s.gb;
7538 hx->s.data_partitioning = 0;
7540 if((err = decode_slice_header(hx, h)))
7541 break;
7543 s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
7544 if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
7545 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7546 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7547 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7548 && avctx->skip_frame < AVDISCARD_ALL)
7549 context_count++;
7550 break;
7551 case NAL_DPA:
7552 init_get_bits(&hx->s.gb, ptr, bit_length);
7553 hx->intra_gb_ptr=
7554 hx->inter_gb_ptr= NULL;
7555 hx->s.data_partitioning = 1;
7557 err = decode_slice_header(hx, h);
7558 break;
7559 case NAL_DPB:
7560 init_get_bits(&hx->intra_gb, ptr, bit_length);
7561 hx->intra_gb_ptr= &hx->intra_gb;
7562 break;
7563 case NAL_DPC:
7564 init_get_bits(&hx->inter_gb, ptr, bit_length);
7565 hx->inter_gb_ptr= &hx->inter_gb;
7567 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
7568 && s->context_initialized
7569 && s->hurry_up < 5
7570 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
7571 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
7572 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
7573 && avctx->skip_frame < AVDISCARD_ALL)
7574 context_count++;
7575 break;
7576 case NAL_SEI:
7577 init_get_bits(&s->gb, ptr, bit_length);
7578 decode_sei(h);
7579 break;
7580 case NAL_SPS:
7581 init_get_bits(&s->gb, ptr, bit_length);
7582 decode_seq_parameter_set(h);
7584 if(s->flags& CODEC_FLAG_LOW_DELAY)
7585 s->low_delay=1;
7587 if(avctx->has_b_frames < 2)
7588 avctx->has_b_frames= !s->low_delay;
7589 break;
7590 case NAL_PPS:
7591 init_get_bits(&s->gb, ptr, bit_length);
7593 decode_picture_parameter_set(h, bit_length);
7595 break;
7596 case NAL_AUD:
7597 case NAL_END_SEQUENCE:
7598 case NAL_END_STREAM:
7599 case NAL_FILLER_DATA:
7600 case NAL_SPS_EXT:
7601 case NAL_AUXILIARY_SLICE:
7602 break;
7603 default:
7604 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
7607 if(context_count == h->max_contexts) {
7608 execute_decode_slices(h, context_count);
7609 context_count = 0;
7612 if (err < 0)
7613 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
7614 else if(err == 1) {
7615 /* Slice could not be decoded in parallel mode, copy down
7616 * NAL unit stuff to context 0 and restart. Note that
7617 * rbsp_buffer is not transfered, but since we no longer
7618 * run in parallel mode this should not be an issue. */
7619 h->nal_unit_type = hx->nal_unit_type;
7620 h->nal_ref_idc = hx->nal_ref_idc;
7621 hx = h;
7622 goto again;
7625 if(context_count)
7626 execute_decode_slices(h, context_count);
7627 return buf_index;
7631 * returns the number of bytes consumed for building the current frame
7633 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
7634 if(s->flags&CODEC_FLAG_TRUNCATED){
7635 pos -= s->parse_context.last_index;
7636 if(pos<0) pos=0; // FIXME remove (unneeded?)
7638 return pos;
7639 }else{
7640 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
7641 if(pos+10>buf_size) pos=buf_size; // oops ;)
7643 return pos;
7647 static int decode_frame(AVCodecContext *avctx,
7648 void *data, int *data_size,
7649 const uint8_t *buf, int buf_size)
7651 H264Context *h = avctx->priv_data;
7652 MpegEncContext *s = &h->s;
7653 AVFrame *pict = data;
7654 int buf_index;
7656 s->flags= avctx->flags;
7657 s->flags2= avctx->flags2;
7659 /* no supplementary picture */
7660 if (buf_size == 0) {
7661 Picture *out;
7662 int i, out_idx;
7664 //FIXME factorize this with the output code below
7665 out = h->delayed_pic[0];
7666 out_idx = 0;
7667 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7668 if(h->delayed_pic[i]->poc < out->poc){
7669 out = h->delayed_pic[i];
7670 out_idx = i;
7673 for(i=out_idx; h->delayed_pic[i]; i++)
7674 h->delayed_pic[i] = h->delayed_pic[i+1];
7676 if(out){
7677 *data_size = sizeof(AVFrame);
7678 *pict= *(AVFrame*)out;
7681 return 0;
7684 if(s->flags&CODEC_FLAG_TRUNCATED){
7685 int next= ff_h264_find_frame_end(h, buf, buf_size);
7687 if( ff_combine_frame(&s->parse_context, next, (const uint8_t **)&buf, &buf_size) < 0 )
7688 return buf_size;
7689 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
7692 if(h->is_avc && !h->got_avcC) {
7693 int i, cnt, nalsize;
7694 unsigned char *p = avctx->extradata;
7695 if(avctx->extradata_size < 7) {
7696 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
7697 return -1;
7699 if(*p != 1) {
7700 av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
7701 return -1;
7703 /* sps and pps in the avcC always have length coded with 2 bytes,
7704 so put a fake nal_length_size = 2 while parsing them */
7705 h->nal_length_size = 2;
7706 // Decode sps from avcC
7707 cnt = *(p+5) & 0x1f; // Number of sps
7708 p += 6;
7709 for (i = 0; i < cnt; i++) {
7710 nalsize = AV_RB16(p) + 2;
7711 if(decode_nal_units(h, p, nalsize) < 0) {
7712 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
7713 return -1;
7715 p += nalsize;
7717 // Decode pps from avcC
7718 cnt = *(p++); // Number of pps
7719 for (i = 0; i < cnt; i++) {
7720 nalsize = AV_RB16(p) + 2;
7721 if(decode_nal_units(h, p, nalsize) != nalsize) {
7722 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
7723 return -1;
7725 p += nalsize;
7727 // Now store right nal length size, that will be use to parse all other nals
7728 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
7729 // Do not reparse avcC
7730 h->got_avcC = 1;
7733 if(avctx->frame_number==0 && !h->is_avc && s->avctx->extradata_size){
7734 if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
7735 return -1;
7738 buf_index=decode_nal_units(h, buf, buf_size);
7739 if(buf_index < 0)
7740 return -1;
7742 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
7743 if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
7744 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
7745 return -1;
7748 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
7749 Picture *out = s->current_picture_ptr;
7750 Picture *cur = s->current_picture_ptr;
7751 Picture *prev = h->delayed_output_pic;
7752 int i, pics, cross_idr, out_of_order, out_idx;
7754 s->mb_y= 0;
7756 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
7757 s->current_picture_ptr->pict_type= s->pict_type;
7759 h->prev_frame_num_offset= h->frame_num_offset;
7760 h->prev_frame_num= h->frame_num;
7761 if(!s->dropable) {
7762 h->prev_poc_msb= h->poc_msb;
7763 h->prev_poc_lsb= h->poc_lsb;
7764 execute_ref_pic_marking(h, h->mmco, h->mmco_index);
7768 * FIXME: Error handling code does not seem to support interlaced
7769 * when slices span multiple rows
7770 * The ff_er_add_slice calls don't work right for bottom
7771 * fields; they cause massive erroneous error concealing
7772 * Error marking covers both fields (top and bottom).
7773 * This causes a mismatched s->error_count
7774 * and a bad error table. Further, the error count goes to
7775 * INT_MAX when called for bottom field, because mb_y is
7776 * past end by one (callers fault) and resync_mb_y != 0
7777 * causes problems for the first MB line, too.
7779 if (!FIELD_PICTURE)
7780 ff_er_frame_end(s);
7782 MPV_frame_end(s);
7784 if (s->first_field) {
7785 /* Wait for second field. */
7786 *data_size = 0;
7788 } else {
7789 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
7790 /* Derive top_field_first from field pocs. */
7791 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
7793 //FIXME do something with unavailable reference frames
7795 #if 0 //decode order
7796 *data_size = sizeof(AVFrame);
7797 #else
7798 /* Sort B-frames into display order */
7800 if(h->sps.bitstream_restriction_flag
7801 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
7802 s->avctx->has_b_frames = h->sps.num_reorder_frames;
7803 s->low_delay = 0;
7806 pics = 0;
7807 while(h->delayed_pic[pics]) pics++;
7809 assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
7811 h->delayed_pic[pics++] = cur;
7812 if(cur->reference == 0)
7813 cur->reference = DELAYED_PIC_REF;
7815 cross_idr = 0;
7816 for(i=0; h->delayed_pic[i]; i++)
7817 if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
7818 cross_idr = 1;
7820 out = h->delayed_pic[0];
7821 out_idx = 0;
7822 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
7823 if(h->delayed_pic[i]->poc < out->poc){
7824 out = h->delayed_pic[i];
7825 out_idx = i;
7828 out_of_order = !cross_idr && prev && out->poc < prev->poc;
7829 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
7831 else if(prev && pics <= s->avctx->has_b_frames)
7832 out = prev;
7833 else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
7834 || (s->low_delay &&
7835 ((!cross_idr && prev && out->poc > prev->poc + 2)
7836 || cur->pict_type == FF_B_TYPE)))
7838 s->low_delay = 0;
7839 s->avctx->has_b_frames++;
7840 out = prev;
7842 else if(out_of_order)
7843 out = prev;
7845 if(out_of_order || pics > s->avctx->has_b_frames){
7846 for(i=out_idx; h->delayed_pic[i]; i++)
7847 h->delayed_pic[i] = h->delayed_pic[i+1];
7850 if(prev == out)
7851 *data_size = 0;
7852 else
7853 *data_size = sizeof(AVFrame);
7854 if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
7855 prev->reference = 0;
7856 h->delayed_output_pic = out;
7857 #endif
7859 if(out)
7860 *pict= *(AVFrame*)out;
7861 else
7862 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
7866 assert(pict->data[0] || !*data_size);
7867 ff_print_debug_info(s, pict);
7868 //printf("out %d\n", (int)pict->data[0]);
7869 #if 0 //?
7871 /* Return the Picture timestamp as the frame number */
7872 /* we subtract 1 because it is added on utils.c */
7873 avctx->frame_number = s->picture_number - 1;
7874 #endif
7875 return get_consumed_bytes(s, buf_index, buf_size);
7877 #if 0
7878 static inline void fill_mb_avail(H264Context *h){
7879 MpegEncContext * const s = &h->s;
7880 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
7882 if(s->mb_y){
7883 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
7884 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
7885 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
7886 }else{
7887 h->mb_avail[0]=
7888 h->mb_avail[1]=
7889 h->mb_avail[2]= 0;
7891 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
7892 h->mb_avail[4]= 1; //FIXME move out
7893 h->mb_avail[5]= 0; //FIXME move out
7895 #endif
7897 #ifdef TEST
7898 #undef printf
7899 #undef random
7900 #define COUNT 8000
7901 #define SIZE (COUNT*40)
7902 int main(void){
7903 int i;
7904 uint8_t temp[SIZE];
7905 PutBitContext pb;
7906 GetBitContext gb;
7907 // int int_temp[10000];
7908 DSPContext dsp;
7909 AVCodecContext avctx;
7911 dsputil_init(&dsp, &avctx);
7913 init_put_bits(&pb, temp, SIZE);
7914 printf("testing unsigned exp golomb\n");
7915 for(i=0; i<COUNT; i++){
7916 START_TIMER
7917 set_ue_golomb(&pb, i);
7918 STOP_TIMER("set_ue_golomb");
7920 flush_put_bits(&pb);
7922 init_get_bits(&gb, temp, 8*SIZE);
7923 for(i=0; i<COUNT; i++){
7924 int j, s;
7926 s= show_bits(&gb, 24);
7928 START_TIMER
7929 j= get_ue_golomb(&gb);
7930 if(j != i){
7931 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7932 // return -1;
7934 STOP_TIMER("get_ue_golomb");
7938 init_put_bits(&pb, temp, SIZE);
7939 printf("testing signed exp golomb\n");
7940 for(i=0; i<COUNT; i++){
7941 START_TIMER
7942 set_se_golomb(&pb, i - COUNT/2);
7943 STOP_TIMER("set_se_golomb");
7945 flush_put_bits(&pb);
7947 init_get_bits(&gb, temp, 8*SIZE);
7948 for(i=0; i<COUNT; i++){
7949 int j, s;
7951 s= show_bits(&gb, 24);
7953 START_TIMER
7954 j= get_se_golomb(&gb);
7955 if(j != i - COUNT/2){
7956 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
7957 // return -1;
7959 STOP_TIMER("get_se_golomb");
7962 #if 0
7963 printf("testing 4x4 (I)DCT\n");
7965 DCTELEM block[16];
7966 uint8_t src[16], ref[16];
7967 uint64_t error= 0, max_error=0;
7969 for(i=0; i<COUNT; i++){
7970 int j;
7971 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
7972 for(j=0; j<16; j++){
7973 ref[j]= random()%255;
7974 src[j]= random()%255;
7977 h264_diff_dct_c(block, src, ref, 4);
7979 //normalize
7980 for(j=0; j<16; j++){
7981 // printf("%d ", block[j]);
7982 block[j]= block[j]*4;
7983 if(j&1) block[j]= (block[j]*4 + 2)/5;
7984 if(j&4) block[j]= (block[j]*4 + 2)/5;
7986 // printf("\n");
7988 s->dsp.h264_idct_add(ref, block, 4);
7989 /* for(j=0; j<16; j++){
7990 printf("%d ", ref[j]);
7992 printf("\n");*/
7994 for(j=0; j<16; j++){
7995 int diff= FFABS(src[j] - ref[j]);
7997 error+= diff*diff;
7998 max_error= FFMAX(max_error, diff);
8001 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
8002 printf("testing quantizer\n");
8003 for(qp=0; qp<52; qp++){
8004 for(i=0; i<16; i++)
8005 src1_block[i]= src2_block[i]= random()%255;
8008 printf("Testing NAL layer\n");
8010 uint8_t bitstream[COUNT];
8011 uint8_t nal[COUNT*2];
8012 H264Context h;
8013 memset(&h, 0, sizeof(H264Context));
8015 for(i=0; i<COUNT; i++){
8016 int zeros= i;
8017 int nal_length;
8018 int consumed;
8019 int out_length;
8020 uint8_t *out;
8021 int j;
8023 for(j=0; j<COUNT; j++){
8024 bitstream[j]= (random() % 255) + 1;
8027 for(j=0; j<zeros; j++){
8028 int pos= random() % COUNT;
8029 while(bitstream[pos] == 0){
8030 pos++;
8031 pos %= COUNT;
8033 bitstream[pos]=0;
8036 START_TIMER
8038 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
8039 if(nal_length<0){
8040 printf("encoding failed\n");
8041 return -1;
8044 out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
8046 STOP_TIMER("NAL")
8048 if(out_length != COUNT){
8049 printf("incorrect length %d %d\n", out_length, COUNT);
8050 return -1;
8053 if(consumed != nal_length){
8054 printf("incorrect consumed length %d %d\n", nal_length, consumed);
8055 return -1;
8058 if(memcmp(bitstream, out, COUNT)){
8059 printf("mismatch\n");
8060 return -1;
8063 #endif
8065 printf("Testing RBSP\n");
8068 return 0;
8070 #endif /* TEST */
8073 static av_cold int decode_end(AVCodecContext *avctx)
8075 H264Context *h = avctx->priv_data;
8076 MpegEncContext *s = &h->s;
8078 av_freep(&h->rbsp_buffer[0]);
8079 av_freep(&h->rbsp_buffer[1]);
8080 free_tables(h); //FIXME cleanup init stuff perhaps
8081 MPV_common_end(s);
8083 // memset(h, 0, sizeof(H264Context));
8085 return 0;
8089 AVCodec h264_decoder = {
8090 "h264",
8091 CODEC_TYPE_VIDEO,
8092 CODEC_ID_H264,
8093 sizeof(H264Context),
8094 decode_init,
8095 NULL,
8096 decode_end,
8097 decode_frame,
8098 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_TRUNCATED | CODEC_CAP_DELAY,
8099 .flush= flush_dpb,
8100 .long_name = "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
8103 #include "svq3.c"