1 /********************************************************************
3 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
8 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2007 *
9 * by the Xiph.Org Foundation http://www.xiph.org/ *
11 ********************************************************************
14 last mod: $Id: decode.c 14385 2008-01-09 19:53:18Z giles $
16 ********************************************************************/
22 #if defined(OC_DUMP_IMAGES)
27 /*No post-processing.*/
28 #define OC_PP_LEVEL_DISABLED (0)
29 /*Keep track of DC qi for each block only.*/
30 #define OC_PP_LEVEL_TRACKDCQI (1)
31 /*Deblock the luma plane.*/
32 #define OC_PP_LEVEL_DEBLOCKY (2)
33 /*Dering the luma plane.*/
34 #define OC_PP_LEVEL_DERINGY (3)
35 /*Stronger luma plane deringing.*/
36 #define OC_PP_LEVEL_SDERINGY (4)
37 /*Deblock the chroma planes.*/
38 #define OC_PP_LEVEL_DEBLOCKC (5)
39 /*Dering the chroma planes.*/
40 #define OC_PP_LEVEL_DERINGC (6)
41 /*Stronger chroma plane deringing.*/
42 #define OC_PP_LEVEL_SDERINGC (7)
43 /*Maximum valid post-processing level.*/
44 #define OC_PP_LEVEL_MAX (7)
48 /*The mode alphabets for the various mode coding schemes.
49 Scheme 0 uses a custom alphabet, which is not stored in this table.*/
50 static const int OC_MODE_ALPHABETS
[7][OC_NMODES
]={
51 /*Last MV dominates */
53 OC_MODE_INTER_MV_LAST
,OC_MODE_INTER_MV_LAST2
,OC_MODE_INTER_MV
,
54 OC_MODE_INTER_NOMV
,OC_MODE_INTRA
,OC_MODE_GOLDEN_NOMV
,OC_MODE_GOLDEN_MV
,
58 OC_MODE_INTER_MV_LAST
,OC_MODE_INTER_MV_LAST2
,OC_MODE_INTER_NOMV
,
59 OC_MODE_INTER_MV
,OC_MODE_INTRA
,OC_MODE_GOLDEN_NOMV
,OC_MODE_GOLDEN_MV
,
63 OC_MODE_INTER_MV_LAST
,OC_MODE_INTER_MV
,OC_MODE_INTER_MV_LAST2
,
64 OC_MODE_INTER_NOMV
,OC_MODE_INTRA
,OC_MODE_GOLDEN_NOMV
,OC_MODE_GOLDEN_MV
,
68 OC_MODE_INTER_MV_LAST
,OC_MODE_INTER_MV
,OC_MODE_INTER_NOMV
,
69 OC_MODE_INTER_MV_LAST2
,OC_MODE_INTRA
,OC_MODE_GOLDEN_NOMV
,
70 OC_MODE_GOLDEN_MV
,OC_MODE_INTER_MV_FOUR
74 OC_MODE_INTER_NOMV
,OC_MODE_INTER_MV_LAST
,OC_MODE_INTER_MV_LAST2
,
75 OC_MODE_INTER_MV
,OC_MODE_INTRA
,OC_MODE_GOLDEN_NOMV
,OC_MODE_GOLDEN_MV
,
79 OC_MODE_INTER_NOMV
,OC_MODE_GOLDEN_NOMV
,OC_MODE_INTER_MV_LAST
,
80 OC_MODE_INTER_MV_LAST2
,OC_MODE_INTER_MV
,OC_MODE_INTRA
,OC_MODE_GOLDEN_MV
,
85 OC_MODE_INTER_NOMV
,OC_MODE_INTRA
,OC_MODE_INTER_MV
,OC_MODE_INTER_MV_LAST
,
86 OC_MODE_INTER_MV_LAST2
,OC_MODE_GOLDEN_NOMV
,OC_MODE_GOLDEN_MV
,
92 static int oc_sb_run_unpack(oggpack_buffer
*_opb
){
103 111111xxxxxxxxxxxx 34-4129*/
104 theorapackB_read1(_opb
,&bits
);
106 theorapackB_read(_opb
,2,&bits
);
107 if((bits
&2)==0)return 2+(int)bits
;
108 else if((bits
&1)==0){
109 theorapackB_read1(_opb
,&bits
);
112 theorapackB_read(_opb
,3,&bits
);
113 if((bits
&4)==0)return 6+(int)bits
;
114 else if((bits
&2)==0){
115 ret
=10+((bits
&1)<<2);
116 theorapackB_read(_opb
,2,&bits
);
117 return ret
+(int)bits
;
119 else if((bits
&1)==0){
120 theorapackB_read(_opb
,4,&bits
);
123 theorapackB_read(_opb
,12,&bits
);
127 static int oc_block_run_unpack(oggpack_buffer
*_opb
){
138 theorapackB_read(_opb
,2,&bits
);
139 if((bits
&2)==0)return 1+(int)bits
;
140 else if((bits
&1)==0){
141 theorapackB_read1(_opb
,&bits
);
144 theorapackB_read(_opb
,2,&bits
);
145 if((bits
&2)==0)return 5+(int)bits
;
146 else if((bits
&1)==0){
147 theorapackB_read(_opb
,2,&bits
);
150 theorapackB_read(_opb
,3,&bits
);
151 if((bits
&4)==0)return 11+bits
;
152 theorapackB_read(_opb
,2,&bits2
);
153 return 15+((bits
&3)<<2)+bits2
;
158 static int oc_dec_init(oc_dec_ctx
*_dec
,const th_info
*_info
,
159 const th_setup_info
*_setup
){
164 ret
=oc_state_init(&_dec
->state
,_info
);
166 oc_huff_trees_copy(_dec
->huff_tables
,
167 (const oc_huff_node
*const *)_setup
->huff_tables
);
168 for(qti
=0;qti
<2;qti
++)for(pli
=0;pli
<3;pli
++){
169 _dec
->state
.dequant_tables
[qti
][pli
]=
170 _dec
->state
.dequant_table_data
[qti
][pli
];
172 oc_dequant_tables_init(_dec
->state
.dequant_tables
,_dec
->pp_dc_scale
,
174 for(qi
=0;qi
<64;qi
++){
177 for(qti
=0;qti
<2;qti
++)for(pli
=0;pli
<3;pli
++){
178 qsum
+=_dec
->state
.dequant_tables
[qti
][pli
][qi
][18]+
179 _dec
->state
.dequant_tables
[qti
][pli
][qi
][19]+
180 _dec
->state
.dequant_tables
[qti
][pli
][qi
][26]+
181 _dec
->state
.dequant_tables
[qti
][pli
][qi
][27]<<(pli
==0);
183 _dec
->pp_sharp_mod
[qi
]=-(qsum
>>11);
185 _dec
->dct_tokens
=(unsigned char **)oc_calloc_2d(64,
186 _dec
->state
.nfrags
,sizeof(_dec
->dct_tokens
[0][0]));
187 _dec
->extra_bits
=(ogg_uint16_t
**)oc_calloc_2d(64,
188 _dec
->state
.nfrags
,sizeof(_dec
->extra_bits
[0][0]));
189 memcpy(_dec
->state
.loop_filter_limits
,_setup
->qinfo
.loop_filter_limits
,
190 sizeof(_dec
->state
.loop_filter_limits
));
191 _dec
->pp_level
=OC_PP_LEVEL_DISABLED
;
193 _dec
->variances
=NULL
;
194 _dec
->pp_frame_data
=NULL
;
195 _dec
->stripe_cb
.ctx
=NULL
;
196 _dec
->stripe_cb
.stripe_decoded
=NULL
;
200 static void oc_dec_clear(oc_dec_ctx
*_dec
){
201 _ogg_free(_dec
->pp_frame_data
);
202 _ogg_free(_dec
->variances
);
203 _ogg_free(_dec
->dc_qis
);
204 oc_free_2d(_dec
->extra_bits
);
205 oc_free_2d(_dec
->dct_tokens
);
206 oc_huff_trees_clear(_dec
->huff_tables
);
207 oc_state_clear(&_dec
->state
);
211 static int oc_dec_frame_header_unpack(oc_dec_ctx
*_dec
){
214 TH_DEBUG("\n>>>> beginning frame %ld\n\n",dframe
);
216 /*Check to make sure this is a data packet.*/
217 theorapackB_read1(&_dec
->opb
,&val
);
218 TH_DEBUG("frame type = %s, ",val
==0?"video":"unknown");
219 if(val
!=0)return TH_EBADPACKET
;
220 /*Read in the frame type (I or P).*/
221 theorapackB_read1(&_dec
->opb
,&val
);
222 _dec
->state
.frame_type
=(int)val
;
223 TH_DEBUG("%s\n",val
?"predicted":"key");
224 /*Read in the current qi.*/
225 theorapackB_read(&_dec
->opb
,6,&val
);
226 _dec
->state
.qis
[0]=(int)val
;
227 TH_DEBUG("frame quality = { %ld ",val
);
228 theorapackB_read1(&_dec
->opb
,&val
);
229 if(!val
)_dec
->state
.nqis
=1;
231 theorapackB_read(&_dec
->opb
,6,&val
);
232 _dec
->state
.qis
[1]=(int)val
;
233 TH_DEBUG("%ld ",val
);
234 theorapackB_read1(&_dec
->opb
,&val
);
235 if(!val
)_dec
->state
.nqis
=2;
237 theorapackB_read(&_dec
->opb
,6,&val
);
238 TH_DEBUG("%ld ",val
);
239 _dec
->state
.qis
[2]=(int)val
;
245 if(_dec
->state
.frame_type
==OC_INTRA_FRAME
){
246 /*Keyframes have 3 unused configuration bits, holdovers from VP3 days.
247 Most of the other unused bits in the VP3 headers were eliminated.
248 I don't know why these remain.*/
249 /* I wanted to eliminate wasted bits, but not all config wiggle room --Monty */
250 theorapackB_read(&_dec
->opb
,3,&val
);
251 if(val
!=0)return TH_EIMPL
;
256 /*Mark all fragments as coded and in OC_MODE_INTRA.
257 This also builds up the coded fragment list (in coded order), and clears the
258 uncoded fragment list.
259 It does not update the coded macro block list, as that is not used when
260 decoding INTRA frames.*/
261 static void oc_dec_mark_all_intra(oc_dec_ctx
*_dec
){
266 int prev_ncoded_fragis
;
267 prev_ncoded_fragis
=ncoded_fragis
=0;
268 sb
=sb_end
=_dec
->state
.sbs
;
269 for(pli
=0;pli
<3;pli
++){
270 const oc_fragment_plane
*fplane
;
271 fplane
=_dec
->state
.fplanes
+pli
;
272 sb_end
+=fplane
->nsbs
;
273 for(;sb
<sb_end
;sb
++){
275 for(quadi
=0;quadi
<4;quadi
++)if(sb
->quad_valid
&1<<quadi
){
279 fragi
=sb
->map
[quadi
][bi
];
282 frag
=_dec
->state
.frags
+fragi
;
284 frag
->mbmode
=OC_MODE_INTRA
;
285 _dec
->state
.coded_fragis
[ncoded_fragis
++]=fragi
;
290 _dec
->state
.ncoded_fragis
[pli
]=ncoded_fragis
-prev_ncoded_fragis
;
291 prev_ncoded_fragis
=ncoded_fragis
;
292 _dec
->state
.nuncoded_fragis
[pli
]=0;
296 /*Decodes the bit flags for whether or not each super block is partially coded
298 Return: The number of partially coded super blocks.*/
299 static int oc_dec_partial_sb_flags_unpack(oc_dec_ctx
*_dec
){
306 theorapackB_read1(&_dec
->opb
,&val
);
310 sb_end
=sb
+_dec
->state
.nsbs
;
311 run_count
=npartial
=0;
314 run_count
=oc_sb_run_unpack(&_dec
->opb
);
315 full_run
=run_count
>=4129;
317 sb
->coded_partially
=flag
;
323 while(--run_count
>0&&sb
<sb_end
);
324 if(full_run
&&sb
<sb_end
){
325 theorapackB_read1(&_dec
->opb
,&val
);
330 /*TODO: run_count should be 0 here.
331 If it's not, we should issue a warning of some kind.*/
335 /*Decodes the bit flags for whether or not each non-partially-coded super
336 block is fully coded or not.
337 This function should only be called if there is at least one
338 non-partially-coded super block.
339 Return: The number of partially coded super blocks.*/
340 static void oc_dec_coded_sb_flags_unpack(oc_dec_ctx
*_dec
){
347 sb_end
=sb
+_dec
->state
.nsbs
;
348 /*Skip partially coded super blocks.*/
349 for(;sb
->coded_partially
;sb
++);
350 theorapackB_read1(&_dec
->opb
,&val
);
355 run_count
=oc_sb_run_unpack(&_dec
->opb
);
356 full_run
=run_count
>=4129;
357 for(;sb
<sb_end
;sb
++){
358 if(sb
->coded_partially
)continue;
359 if(run_count
--<=0)break;
360 sb
->coded_fully
=flag
;
362 if(full_run
&&sb
<sb_end
){
363 theorapackB_read1(&_dec
->opb
,&val
);
368 /*TODO: run_count should be 0 here.
369 If it's not, we should issue a warning of some kind.*/
372 static void oc_dec_coded_flags_unpack(oc_dec_ctx
*_dec
){
381 int prev_ncoded_fragis
;
383 int prev_nuncoded_fragis
;
384 npartial
=oc_dec_partial_sb_flags_unpack(_dec
);
385 if(npartial
<_dec
->state
.nsbs
)oc_dec_coded_sb_flags_unpack(_dec
);
387 theorapackB_read1(&_dec
->opb
,&val
);
392 prev_ncoded_fragis
=ncoded_fragis
=prev_nuncoded_fragis
=nuncoded_fragis
=0;
393 sb
=sb_end
=_dec
->state
.sbs
;
394 for(pli
=0;pli
<3;pli
++){
395 const oc_fragment_plane
*fplane
;
396 fplane
=_dec
->state
.fplanes
+pli
;
397 sb_end
+=fplane
->nsbs
;
398 for(;sb
<sb_end
;sb
++){
400 for(quadi
=0;quadi
<4;quadi
++)if(sb
->quad_valid
&1<<quadi
){
404 fragi
=sb
->map
[quadi
][bi
];
407 frag
=_dec
->state
.frags
+fragi
;
408 if(sb
->coded_fully
)frag
->coded
=1;
409 else if(!sb
->coded_partially
)frag
->coded
=0;
412 run_count
=oc_block_run_unpack(&_dec
->opb
);
418 if(frag
->coded
)_dec
->state
.coded_fragis
[ncoded_fragis
++]=fragi
;
419 else *(_dec
->state
.uncoded_fragis
-++nuncoded_fragis
)=fragi
;
424 _dec
->state
.ncoded_fragis
[pli
]=ncoded_fragis
-prev_ncoded_fragis
;
425 prev_ncoded_fragis
=ncoded_fragis
;
426 _dec
->state
.nuncoded_fragis
[pli
]=nuncoded_fragis
-prev_nuncoded_fragis
;
427 prev_nuncoded_fragis
=nuncoded_fragis
;
429 /*TODO: run_count should be 0 here.
430 If it's not, we should issue a warning of some kind.*/
434 // assuming 4:2:0 right now; THIS IS WRONG but only an issue if dumping debug info
435 TH_DEBUG("predicted (partially coded frame)\n");
436 TH_DEBUG("superblock coded flags = {");
438 int w
= _dec
->state
.info
.frame_width
;
439 int h
= _dec
->state
.info
.frame_height
;
442 for(y
=0;y
< (h
+31)/32;y
++){
444 for(x
=0;x
< (w
+31)/32;x
++,i
++)
445 TH_DEBUG("%x", (_dec
->state
.sbs
[i
].coded_partially
!=0)|
446 (_dec
->state
.sbs
[i
].coded_fully
));
450 for(y
=0;y
< (h
+63)/64;y
++){
452 for(x
=0;x
< (w
+63)/64;x
++,i
++)
453 TH_DEBUG("%x", (_dec
->state
.sbs
[i
].coded_partially
!=0)|
454 (_dec
->state
.sbs
[i
].coded_fully
));
457 for(y
=0;y
< (h
+63)/64;y
++){
459 for(x
=0;x
< (w
+63)/64;x
++,i
++)
460 TH_DEBUG("%x", (_dec
->state
.sbs
[i
].coded_partially
!=0)|
461 (_dec
->state
.sbs
[i
].coded_fully
));
465 if(i
!=_dec
->state
.nsbs
)
466 TH_DEBUG("WARNING! superblock count, raster %d != flat %d\n",
469 TH_DEBUG("block coded flags = {");
472 for(y
=0;y
< (h
+7)/8;y
++){
474 for(x
=0;x
< (w
+7)/8;x
++,i
++)
475 TH_DEBUG("%x", (_dec
->state
.frags
[i
].coded
!=0));
478 for(y
=0;y
< (h
+15)/16;y
++){
480 for(x
=0;x
< (w
+15)/16;x
++,i
++)
481 TH_DEBUG("%x", (_dec
->state
.frags
[i
].coded
!=0));
484 for(y
=0;y
< (h
+15)/16;y
++){
486 for(x
=0;x
< (w
+15)/16;x
++,i
++)
487 TH_DEBUG("%x", (_dec
->state
.frags
[i
].coded
!=0));
491 if(i
!=_dec
->state
.nfrags
)
492 TH_DEBUG("WARNING! block count, raster %d != flat %d\n",
493 i
,_dec
->state
.nfrags
);
500 typedef int (*oc_mode_unpack_func
)(oggpack_buffer
*_opb
);
502 static int oc_vlc_mode_unpack(oggpack_buffer
*_opb
){
506 theorapackB_read1(_opb
,&val
);
512 static int oc_clc_mode_unpack(oggpack_buffer
*_opb
){
514 theorapackB_read(_opb
,3,&val
);
518 /*Unpacks the list of macro block modes for INTER frames.*/
519 static void oc_dec_mb_modes_unpack(oc_dec_ctx
*_dec
){
520 oc_mode_unpack_func mode_unpack
;
525 int scheme0_alphabet
[8];
527 theorapackB_read(&_dec
->opb
,3,&val
);
528 mode_scheme
=(int)val
;
529 TH_DEBUG("mode encode scheme = %d\n",(int)val
);
533 /*Just in case, initialize the modes to something.
534 If the bitstream doesn't contain each index exactly once, it's likely
535 corrupt and the rest of the packet is garbage anyway, but this way we
536 won't crash, and we'll decode SOMETHING.*/
537 TH_DEBUG("mode scheme list = { ");
539 for(mi
=0;mi
<OC_NMODES
;mi
++)scheme0_alphabet
[mi
]=OC_MODE_INTER_NOMV
;
540 for(mi
=0;mi
<OC_NMODES
;mi
++){
541 theorapackB_read(&_dec
->opb
,3,&val
);
542 scheme0_alphabet
[val
]=OC_MODE_ALPHABETS
[6][mi
];
543 TH_DEBUG("%d ",(int)val
);
546 alphabet
=scheme0_alphabet
;
548 alphabet
=OC_MODE_ALPHABETS
[mode_scheme
-1];
550 mode_unpack
=oc_clc_mode_unpack
;
552 mode_unpack
=oc_vlc_mode_unpack
;
554 mb_end
=mb
+_dec
->state
.nmbs
;
556 TH_DEBUG("mode list = { ");
557 for(j
=0;mb
<mb_end
;mb
++){
558 if(mb
->mode
!=OC_MODE_INVALID
){
562 fragi
=mb
->map
[0][bi
];
563 if(fragi
>=0&&_dec
->state
.frags
[fragi
].coded
)break;
566 mb
->mode
=alphabet
[(*mode_unpack
)(&_dec
->opb
)];
571 TH_DEBUG("%d ",mb
->mode
);
576 mb
->mode
=OC_MODE_INTER_NOMV
;
584 typedef int (*oc_mv_comp_unpack_func
)(oggpack_buffer
*_opb
);
586 static int oc_vlc_mv_comp_unpack(oggpack_buffer
*_opb
){
589 theorapackB_read(_opb
,3,&bits
);
596 mvsigned
[0]=(int)(bits
-1);
597 theorapackB_read1(_opb
,&bits
);
603 mvsigned
[0]=1<<bits
-3;
604 theorapackB_read(_opb
,bits
-2,&bits
);
605 mvsigned
[0]+=(int)(bits
>>1);
609 mvsigned
[1]=-mvsigned
[0];
610 return mvsigned
[bits
];
613 static int oc_clc_mv_comp_unpack(oggpack_buffer
*_opb
){
616 theorapackB_read(_opb
,6,&bits
);
618 mvsigned
[1]=-mvsigned
[0];
619 return mvsigned
[bits
&1];
622 /*Unpacks the list of motion vectors for INTER frames, and propagtes the macro
623 block modes and motion vectors to the individual fragments.*/
624 static void oc_dec_mv_unpack_and_frag_modes_fill(oc_dec_ctx
*_dec
){
625 oc_set_chroma_mvs_func set_chroma_mvs
;
626 oc_mv_comp_unpack_func mv_comp_unpack
;
637 set_chroma_mvs
=OC_SET_CHROMA_MVS_TABLE
[_dec
->state
.info
.pixel_fmt
];
638 theorapackB_read1(&_dec
->opb
,&val
);
639 TH_DEBUG("motion vector table = %d\n",(int)val
);
640 mv_comp_unpack
=val
?oc_clc_mv_comp_unpack
:oc_vlc_mv_comp_unpack
;
641 map_idxs
=OC_MB_MAP_IDXS
[_dec
->state
.info
.pixel_fmt
];
642 map_nidxs
=OC_MB_MAP_NIDXS
[_dec
->state
.info
.pixel_fmt
];
643 memset(last_mv
,0,sizeof(last_mv
));
645 mb_end
=mb
+_dec
->state
.nmbs
;
647 TH_DEBUG("motion vectors = {");
649 for(;mb
<mb_end
;mb
++)if(mb
->mode
!=OC_MODE_INVALID
){
659 /*Search for at least one coded fragment.*/
662 mapi
=map_idxs
[mapii
];
663 fragi
=mb
->map
[mapi
>>2][mapi
&3];
664 if(fragi
>=0&&_dec
->state
.frags
[fragi
].coded
)coded
[ncoded
++]=mapi
;
666 while(++mapii
<map_nidxs
);
667 if(ncoded
<=0)continue;
670 case OC_MODE_INTER_MV_FOUR
:
674 /*Mark the tail of the list, so we don't accidentally go past it.*/
676 for(bi
=codedi
=0;bi
<4;bi
++){
677 if(coded
[codedi
]==bi
){
679 frag
=_dec
->state
.frags
+mb
->map
[0][bi
];
680 frag
->mbmode
=mb_mode
;
681 frag
->mv
[0]=lbmvs
[bi
][0]=(signed char)(*mv_comp_unpack
)(&_dec
->opb
);
682 frag
->mv
[1]=lbmvs
[bi
][1]=(signed char)(*mv_comp_unpack
)(&_dec
->opb
);
687 TH_DEBUG("%+03d,%+03d ",frag
->mv
[0],frag
->mv
[1]);
692 else lbmvs
[bi
][0]=lbmvs
[bi
][1]=0;
695 last_mv
[1][0]=last_mv
[0][0];
696 last_mv
[1][1]=last_mv
[0][1];
697 last_mv
[0][0]=lbmvs
[coded
[codedi
-1]][0];
698 last_mv
[0][1]=lbmvs
[coded
[codedi
-1]][1];
701 (*set_chroma_mvs
)(cbmvs
,(const oc_mv
*)lbmvs
);
702 for(;codedi
<ncoded
;codedi
++){
705 frag
=_dec
->state
.frags
+mb
->map
[mapi
>>2][bi
];
706 frag
->mbmode
=mb_mode
;
707 frag
->mv
[0]=cbmvs
[bi
][0];
708 frag
->mv
[1]=cbmvs
[bi
][1];
713 case OC_MODE_INTER_MV
:
715 last_mv
[1][0]=last_mv
[0][0];
716 last_mv
[1][1]=last_mv
[0][1];
717 mbmv
[0]=last_mv
[0][0]=(signed char)(*mv_comp_unpack
)(&_dec
->opb
);
718 mbmv
[1]=last_mv
[0][1]=(signed char)(*mv_comp_unpack
)(&_dec
->opb
);
723 TH_DEBUG("%+03d,%+03d ",mbmv
[0],mbmv
[1]);
729 case OC_MODE_INTER_MV_LAST
:
731 mbmv
[0]=last_mv
[0][0];
732 mbmv
[1]=last_mv
[0][1];
735 case OC_MODE_INTER_MV_LAST2
:
737 mbmv
[0]=last_mv
[1][0];
738 mbmv
[1]=last_mv
[1][1];
739 last_mv
[1][0]=last_mv
[0][0];
740 last_mv
[1][1]=last_mv
[0][1];
741 last_mv
[0][0]=mbmv
[0];
742 last_mv
[0][1]=mbmv
[1];
745 case OC_MODE_GOLDEN_MV
:
747 mbmv
[0]=(signed char)(*mv_comp_unpack
)(&_dec
->opb
);
748 mbmv
[1]=(signed char)(*mv_comp_unpack
)(&_dec
->opb
);
753 TH_DEBUG("%+03d,%+03d ",mbmv
[0],mbmv
[1]);
763 /*4MV mode fills in the fragments itself.
764 For all other modes we can use this common code.*/
765 if(mb_mode
!=OC_MODE_INTER_MV_FOUR
){
766 for(codedi
=0;codedi
<ncoded
;codedi
++){
768 fragi
=mb
->map
[mapi
>>2][mapi
&3];
769 frag
=_dec
->state
.frags
+fragi
;
770 frag
->mbmode
=mb_mode
;
781 static void oc_dec_block_qis_unpack(oc_dec_ctx
*_dec
){
784 int *coded_fragi_end
;
786 ncoded_fragis
=_dec
->state
.ncoded_fragis
[0]+
787 _dec
->state
.ncoded_fragis
[1]+_dec
->state
.ncoded_fragis
[2];
788 if(ncoded_fragis
<=0)return;
789 coded_fragi
=_dec
->state
.coded_fragis
;
790 coded_fragi_end
=coded_fragi
+ncoded_fragis
;
791 if(_dec
->state
.nqis
==1){
792 /*If this frame has only a single qi value, then just set it in all coded
794 while(coded_fragi
<coded_fragi_end
){
795 _dec
->state
.frags
[*coded_fragi
++].qi
=_dec
->state
.qis
[0];
803 /*Otherwise, we decode a qi index for each fragment, using two passes of
804 the same binary RLE scheme used for super-block coded bits.
805 The first pass marks each fragment as having a qii of 0 or greater than
806 0, and the second pass (if necessary), distinguishes between a qii of
808 At first we just store the qii in the fragment.
809 After all the qii's are decoded, we make a final pass to replace them
810 with the corresponding qi's for this frame.*/
811 theorapackB_read1(&_dec
->opb
,&val
);
814 while(coded_fragi
<coded_fragi_end
){
816 run_count
=oc_sb_run_unpack(&_dec
->opb
);
817 full_run
=run_count
>=4129;
819 _dec
->state
.frags
[*coded_fragi
++].qi
=flag
;
822 while(--run_count
>0&&coded_fragi
<coded_fragi_end
);
823 if(full_run
&&coded_fragi
<coded_fragi_end
){
824 theorapackB_read1(&_dec
->opb
,&val
);
829 /*TODO: run_count should be 0 here.
830 If it's not, we should issue a warning of some kind.*/
831 /*If we have 3 different qi's for this frame, and there was at least one
832 fragment with a non-zero qi, make the second pass.*/
833 if(_dec
->state
.nqis
==3&&nqi0
<ncoded_fragis
){
834 /*Skip qii==0 fragments.*/
835 for(coded_fragi
=_dec
->state
.coded_fragis
;
836 _dec
->state
.frags
[*coded_fragi
].qi
==0;coded_fragi
++);
837 theorapackB_read1(&_dec
->opb
,&val
);
839 while(coded_fragi
<coded_fragi_end
){
841 run_count
=oc_sb_run_unpack(&_dec
->opb
);
842 full_run
=run_count
>=4129;
843 for(;coded_fragi
<coded_fragi_end
;coded_fragi
++){
845 frag
=_dec
->state
.frags
+*coded_fragi
;
846 if(frag
->qi
==0)continue;
847 if(run_count
--<=0)break;
850 if(full_run
&&coded_fragi
<coded_fragi_end
){
851 theorapackB_read1(&_dec
->opb
,&val
);
856 /*TODO: run_count should be 0 here.
857 If it's not, we should issue a warning of some kind.*/
859 /*Finally, translate qii's to qi's.*/
860 for(coded_fragi
=_dec
->state
.coded_fragis
;coded_fragi
<coded_fragi_end
;
862 frag
=_dec
->state
.frags
+*coded_fragi
;
863 frag
->qi
=_dec
->state
.qis
[frag
->qi
];
870 /*Returns the decoded value of the given token.
871 It CANNOT be called for any of the EOB tokens.
872 _token: The token value to skip.
873 _extra_bits: The extra bits attached to this token.
874 Return: The decoded coefficient value.*/
875 typedef int (*oc_token_dec1val_func
)(int _token
,int _extra_bits
);
877 /*Handles zero run tokens.*/
878 static int oc_token_dec1val_zrl(void){
882 /*Handles 1, -1, 2 and -2 tokens.*/
883 static int oc_token_dec1val_const(int _token
){
884 static const int CONST_VALS
[4]={1,-1,2,-2};
885 return CONST_VALS
[_token
-OC_NDCT_ZRL_TOKEN_MAX
];
888 /*Handles DCT value tokens category 2.*/
889 static int oc_token_dec1val_cat2(int _token
,int _extra_bits
){
891 valsigned
[0]=_token
-OC_DCT_VAL_CAT2
+3;
892 valsigned
[1]=-valsigned
[0];
893 return valsigned
[_extra_bits
];
896 /*Handles DCT value tokens categories 3 through 8.*/
897 static int oc_token_dec1val_cati(int _token
,int _extra_bits
){
898 static const int VAL_CAT_OFFS
[6]={
899 OC_NDCT_VAL_CAT2_SIZE
+3,
900 OC_NDCT_VAL_CAT2_SIZE
+5,
901 OC_NDCT_VAL_CAT2_SIZE
+9,
902 OC_NDCT_VAL_CAT2_SIZE
+17,
903 OC_NDCT_VAL_CAT2_SIZE
+33,
904 OC_NDCT_VAL_CAT2_SIZE
+65
906 static const int VAL_CAT_MASKS
[6]={
907 0x001,0x003,0x007,0x00F,0x01F,0x1FF
909 static const int VAL_CAT_SHIFTS
[6]={1,2,3,4,5,9};
912 cati
=_token
-OC_NDCT_VAL_CAT2_MAX
;
913 valsigned
[0]=VAL_CAT_OFFS
[cati
]+(_extra_bits
&VAL_CAT_MASKS
[cati
]);
914 valsigned
[1]=-valsigned
[0];
915 return valsigned
[_extra_bits
>>VAL_CAT_SHIFTS
[cati
]&1];
918 /*A jump table for compute the first coefficient value the given token value
920 static const oc_token_dec1val_func OC_TOKEN_DEC1VAL_TABLE
[TH_NDCT_TOKENS
-
921 OC_NDCT_EOB_TOKEN_MAX
]={
922 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
923 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
924 (oc_token_dec1val_func
)oc_token_dec1val_const
,
925 (oc_token_dec1val_func
)oc_token_dec1val_const
,
926 (oc_token_dec1val_func
)oc_token_dec1val_const
,
927 (oc_token_dec1val_func
)oc_token_dec1val_const
,
928 oc_token_dec1val_cat2
,
929 oc_token_dec1val_cat2
,
930 oc_token_dec1val_cat2
,
931 oc_token_dec1val_cat2
,
932 oc_token_dec1val_cati
,
933 oc_token_dec1val_cati
,
934 oc_token_dec1val_cati
,
935 oc_token_dec1val_cati
,
936 oc_token_dec1val_cati
,
937 oc_token_dec1val_cati
,
938 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
939 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
940 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
941 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
942 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
943 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
944 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
945 (oc_token_dec1val_func
)oc_token_dec1val_zrl
,
946 (oc_token_dec1val_func
)oc_token_dec1val_zrl
949 /*Returns the decoded value of the given token.
950 It CANNOT be called for any of the EOB tokens.
951 _token: The token value to skip.
952 _extra_bits: The extra bits attached to this token.
953 Return: The decoded coefficient value.*/
954 static int oc_dct_token_dec1val(int _token
,int _extra_bits
){
955 return (*OC_TOKEN_DEC1VAL_TABLE
[_token
-OC_NDCT_EOB_TOKEN_MAX
])(_token
,
959 /*Unpacks the DC coefficient tokens.
960 Unlike when unpacking the AC coefficient tokens, we actually need to decode
961 the DC coefficient values now so that we can do DC prediction.
962 _huff_idx: The index of the Huffman table to use for each color plane.
963 _ntoks_left: The number of tokens left to be decoded in each color plane for
965 This is updated as EOB tokens and zero run tokens are decoded.
966 Return: The length of any outstanding EOB run.*/
967 static int oc_dec_dc_coeff_unpack(oc_dec_ctx
*_dec
,int _huff_idxs
[3],
968 int _ntoks_left
[3][64]){
971 int *coded_fragi_end
;
982 coded_fragi_end
=coded_fragi
=_dec
->state
.coded_fragis
;
983 for(pli
=0;pli
<3;pli
++){
984 coded_fragi_end
+=_dec
->state
.ncoded_fragis
[pli
];
985 memset(run_counts
,0,sizeof(run_counts
));
986 _dec
->eob_runs
[pli
][0]=eobs
;
987 /*Continue any previous EOB run, if there was one.*/
988 for(eobi
=eobs
;eobi
-->0&&coded_fragi
<coded_fragi_end
;){
989 _dec
->state
.frags
[*coded_fragi
++].dc
=0;
992 while(eobs
<_ntoks_left
[pli
][0]-cfi
){
998 run_counts
[63]+=eobs
;
999 token
=oc_huff_token_decode(&_dec
->opb
,
1000 _dec
->huff_tables
[_huff_idxs
[pli
]]);
1001 _dec
->dct_tokens
[0][ti
++]=(unsigned char)token
;
1002 neb
=OC_DCT_TOKEN_EXTRA_BITS
[token
];
1004 theorapackB_read(&_dec
->opb
,neb
,&val
);
1006 _dec
->extra_bits
[0][ebi
++]=(ogg_uint16_t
)eb
;
1009 skip
=oc_dct_token_skip(token
,eb
);
1012 while(eobi
-->0&&coded_fragi
<coded_fragi_end
){
1013 _dec
->state
.frags
[*coded_fragi
++].dc
=0;
1017 run_counts
[skip
-1]++;
1020 _dec
->state
.frags
[*coded_fragi
++].dc
=oc_dct_token_dec1val(token
,eb
);
1023 _dec
->ti0
[pli
][0]=ti
;
1024 _dec
->ebi0
[pli
][0]=ebi
;
1025 /*Set the EOB count to the portion of the last EOB run which extends past
1027 eobs
=eobs
+cfi
-_ntoks_left
[pli
][0];
1028 /*Add the portion of the last EOB which was included in this coefficient to
1029 to the longest run length.*/
1030 run_counts
[63]+=_ntoks_left
[pli
][0]-cfi
;
1031 /*And convert the run_counts array to a moment table.*/
1032 for(rli
=63;rli
-->0;)run_counts
[rli
]+=run_counts
[rli
+1];
1033 /*Finally, subtract off the number of coefficients that have been
1034 accounted for by runs started in this coefficient.*/
1035 for(rli
=64;rli
-->0;)_ntoks_left
[pli
][rli
]-=run_counts
[rli
];
1040 /*Unpacks the AC coefficient tokens.
1041 This can completely discard coefficient values while unpacking, and so is
1042 somewhat simpler than unpacking the DC coefficient tokens.
1043 _huff_idx: The index of the Huffman table to use for each color plane.
1044 _ntoks_left: The number of tokens left to be decoded in each color plane for
1046 This is updated as EOB tokens and zero run tokens are decoded.
1047 _eobs: The length of any outstanding EOB run from previous
1049 Return: The length of any outstanding EOB run.*/
1050 static int oc_dec_ac_coeff_unpack(oc_dec_ctx
*_dec
,int _zzi
,int _huff_idxs
[3],
1051 int _ntoks_left
[3][64],int _eobs
){
1060 for(pli
=0;pli
<3;pli
++){
1061 memset(run_counts
,0,sizeof(run_counts
));
1062 _dec
->eob_runs
[pli
][_zzi
]=_eobs
;
1064 while(_eobs
<_ntoks_left
[pli
][_zzi
]-cfi
){
1070 run_counts
[63]+=_eobs
;
1071 token
=oc_huff_token_decode(&_dec
->opb
,
1072 _dec
->huff_tables
[_huff_idxs
[pli
]]);
1073 _dec
->dct_tokens
[_zzi
][ti
++]=(unsigned char)token
;
1074 neb
=OC_DCT_TOKEN_EXTRA_BITS
[token
];
1076 theorapackB_read(&_dec
->opb
,neb
,&val
);
1078 _dec
->extra_bits
[_zzi
][ebi
++]=(ogg_uint16_t
)eb
;
1081 skip
=oc_dct_token_skip(token
,eb
);
1082 if(skip
<0)_eobs
=-skip
;
1084 run_counts
[skip
-1]++;
1089 _dec
->ti0
[pli
][_zzi
]=ti
;
1090 _dec
->ebi0
[pli
][_zzi
]=ebi
;
1091 /*Set the EOB count to the portion of the last EOB run which extends past
1093 _eobs
=_eobs
+cfi
-_ntoks_left
[pli
][_zzi
];
1094 /*Add the portion of the last EOB which was included in this coefficient to
1095 to the longest run length.*/
1096 run_counts
[63]+=_ntoks_left
[pli
][_zzi
]-cfi
;
1097 /*And convert the run_counts array to a moment table.*/
1098 for(rli
=63;rli
-->0;)run_counts
[rli
]+=run_counts
[rli
+1];
1099 /*Finally, subtract off the number of coefficients that have been
1100 accounted for by runs started in this coefficient.*/
1101 for(rli
=64-_zzi
;rli
-->0;)_ntoks_left
[pli
][_zzi
+rli
]-=run_counts
[rli
];
1106 /*Tokens describing the DCT coefficients that belong to each fragment are
1107 stored in the bitstream grouped by coefficient, not by fragment.
1109 This means that we either decode all the tokens in order, building up a
1110 separate coefficient list for each fragment as we go, and then go back and
1111 do the iDCT on each fragment, or we have to create separate lists of tokens
1112 for each coefficient, so that we can pull the next token required off the
1113 head of the appropriate list when decoding a specific fragment.
1115 The former was VP3's choice, and it meant 2*w*h extra storage for all the
1116 decoded coefficient values.
1118 We take the second option, which lets us store just one or three bytes per
1119 token (generally far fewer than the number of coefficients, due to EOB
1120 tokens and zero runs), and which requires us to only maintain a counter for
1121 each of the 64 coefficients, instead of a counter for every fragment to
1122 determine where the next token goes.
1124 Actually, we use 3 counters per coefficient, one for each color plane, so we
1125 can decode all color planes simultaneously.
1127 This lets color conversion, etc., be done as soon as a full MCU (one or
1128 two super block rows) is decoded, while the image data is still in cache.*/
1130 static void oc_dec_residual_tokens_unpack(oc_dec_ctx
*_dec
){
1131 static const int OC_HUFF_LIST_MAX
[5]={1,6,15,28,64};
1133 int ntoks_left
[3][64];
1141 for(pli
=0;pli
<3;pli
++)for(zzi
=0;zzi
<64;zzi
++){
1142 ntoks_left
[pli
][zzi
]=_dec
->state
.ncoded_fragis
[pli
];
1144 theorapackB_read(&_dec
->opb
,4,&val
);
1146 theorapackB_read(&_dec
->opb
,4,&val
);
1148 huff_idxs
[0]=huffi_y
;
1149 huff_idxs
[1]=huff_idxs
[2]=huffi_c
;
1150 _dec
->eob_runs
[0][0]=0;
1151 eobs
=oc_dec_dc_coeff_unpack(_dec
,huff_idxs
,ntoks_left
);
1152 theorapackB_read(&_dec
->opb
,4,&val
);
1154 theorapackB_read(&_dec
->opb
,4,&val
);
1157 for(hgi
=1;hgi
<5;hgi
++){
1158 huff_idxs
[0]=huffi_y
+(hgi
<<4);
1159 huff_idxs
[1]=huff_idxs
[2]=huffi_c
+(hgi
<<4);
1160 for(;zzi
<OC_HUFF_LIST_MAX
[hgi
];zzi
++){
1161 eobs
=oc_dec_ac_coeff_unpack(_dec
,zzi
,huff_idxs
,ntoks_left
,eobs
);
1164 /*TODO: eobs should be exactly zero, or 4096 or greater.
1165 The second case occurs when an EOB run of size zero is encountered, which
1166 gets treated as an infinite EOB run (where infinity is INT_MAX).
1167 If neither of these conditions holds, then a warning should be issued.*/
1172 /*Expands a single token into the given coefficient list.
1173 This fills in the zeros for zero runs as well as coefficient values, and
1174 updates the index of the current coefficient.
1175 It CANNOT be called for any of the EOB tokens.
1176 _token: The token value to expand.
1177 _extra_bits: The extra bits associated with the token.
1178 _dct_coeffs: The current list of coefficients, in zig-zag order.
1179 _zzi: A pointer to the zig-zag index of the next coefficient to write
1181 This is updated before the function returns.*/
1182 typedef void (*oc_token_expand_func
)(int _token
,int _extra_bits
,
1183 ogg_int16_t _dct_coeffs
[128],int *_zzi
);
1185 /*Expands a zero run token.*/
1186 static void oc_token_expand_zrl(int _token
,int _extra_bits
,
1187 ogg_int16_t _dct_coeffs
[128],int *_zzi
){
1190 do _dct_coeffs
[zzi
++]=0;
1191 while(_extra_bits
-->0);
1195 /*Expands a constant, single-value token.*/
1196 static void oc_token_expand_const(int _token
,int _extra_bits
,
1197 ogg_int16_t _dct_coeffs
[128],int *_zzi
){
1198 _dct_coeffs
[(*_zzi
)++]=(ogg_int16_t
)oc_token_dec1val_const(_token
);
1201 /*Expands category 2 single-valued tokens.*/
1202 static void oc_token_expand_cat2(int _token
,int _extra_bits
,
1203 ogg_int16_t _dct_coeffs
[128],int *_zzi
){
1204 _dct_coeffs
[(*_zzi
)++]=
1205 (ogg_int16_t
)oc_token_dec1val_cat2(_token
,_extra_bits
);
1208 /*Expands category 3 through 8 single-valued tokens.*/
1209 static void oc_token_expand_cati(int _token
,int _extra_bits
,
1210 ogg_int16_t _dct_coeffs
[128],int *_zzi
){
1211 _dct_coeffs
[(*_zzi
)++]=
1212 (ogg_int16_t
)oc_token_dec1val_cati(_token
,_extra_bits
);
1215 /*Expands a category 1a zero run/value combo token.*/
1216 static void oc_token_expand_run_cat1a(int _token
,int _extra_bits
,
1217 ogg_int16_t _dct_coeffs
[128],int *_zzi
){
1221 /*LOOP VECTORIZES.*/
1222 for(rl
=_token
-OC_DCT_RUN_CAT1A
+1;rl
-->0;)_dct_coeffs
[zzi
++]=0;
1223 _dct_coeffs
[zzi
++]=(ogg_int16_t
)(1-(_extra_bits
<<1));
1227 /*Expands all other zero run/value combo tokens.*/
1228 static void oc_token_expand_run(int _token
,int _extra_bits
,
1229 ogg_int16_t _dct_coeffs
[128],int *_zzi
){
1230 static const int NZEROS_ADJUST
[OC_NDCT_RUN_MAX
-OC_DCT_RUN_CAT1B
]={
1233 static const int NZEROS_MASK
[OC_NDCT_RUN_MAX
-OC_DCT_RUN_CAT1B
]={
1236 static const int VALUE_SHIFT
[OC_NDCT_RUN_MAX
-OC_DCT_RUN_CAT1B
]={
1239 static const int VALUE_MASK
[OC_NDCT_RUN_MAX
-OC_DCT_RUN_CAT1B
]={
1242 static const int VALUE_ADJUST
[OC_NDCT_RUN_MAX
-OC_DCT_RUN_CAT1B
]={
1245 static const int SIGN_SHIFT
[OC_NDCT_RUN_MAX
-OC_DCT_RUN_CAT1B
]={
1251 _token
-=OC_DCT_RUN_CAT1B
;
1252 rl
=(_extra_bits
&NZEROS_MASK
[_token
])+NZEROS_ADJUST
[_token
];
1254 /*LOOP VECTORIZES.*/
1255 while(rl
-->0)_dct_coeffs
[zzi
++]=0;
1256 valsigned
[0]=VALUE_ADJUST
[_token
]+
1257 (_extra_bits
>>VALUE_SHIFT
[_token
]&VALUE_MASK
[_token
]);
1258 valsigned
[1]=-valsigned
[0];
1259 _dct_coeffs
[zzi
++]=(ogg_int16_t
)valsigned
[
1260 _extra_bits
>>SIGN_SHIFT
[_token
]];
1264 /*A jump table for expanding token values into coefficient values.
1265 This reduces all the conditional branches, etc., needed to parse these token
1266 values down to one indirect jump.*/
1267 static const oc_token_expand_func OC_TOKEN_EXPAND_TABLE
[TH_NDCT_TOKENS
-
1268 OC_NDCT_EOB_TOKEN_MAX
]={
1269 oc_token_expand_zrl
,
1270 oc_token_expand_zrl
,
1271 oc_token_expand_const
,
1272 oc_token_expand_const
,
1273 oc_token_expand_const
,
1274 oc_token_expand_const
,
1275 oc_token_expand_cat2
,
1276 oc_token_expand_cat2
,
1277 oc_token_expand_cat2
,
1278 oc_token_expand_cat2
,
1279 oc_token_expand_cati
,
1280 oc_token_expand_cati
,
1281 oc_token_expand_cati
,
1282 oc_token_expand_cati
,
1283 oc_token_expand_cati
,
1284 oc_token_expand_cati
,
1285 oc_token_expand_run_cat1a
,
1286 oc_token_expand_run_cat1a
,
1287 oc_token_expand_run_cat1a
,
1288 oc_token_expand_run_cat1a
,
1289 oc_token_expand_run_cat1a
,
1290 oc_token_expand_run
,
1291 oc_token_expand_run
,
1292 oc_token_expand_run
,
1296 /*Expands a single token into the given coefficient list.
1297 This fills in the zeros for zero runs as well as coefficient values, and
1298 updates the index of the current coefficient.
1299 It CANNOT be called for any of the EOB tokens.
1300 _token: The token value to expand.
1301 _extra_bits: The extra bits associated with the token.
1302 _dct_coeffs: The current list of coefficients, in zig-zag order.
1303 _zzi: A pointer to the zig-zag index of the next coefficient to write
1305 This is updated before the function returns.*/
1306 static void oc_dct_token_expand(int _token
,int _extra_bits
,
1307 ogg_int16_t
*_dct_coeffs
,int *_zzi
){
1308 (*OC_TOKEN_EXPAND_TABLE
[_token
-OC_NDCT_EOB_TOKEN_MAX
])(_token
,
1309 _extra_bits
,_dct_coeffs
,_zzi
);
1314 static int oc_dec_postprocess_init(oc_dec_ctx
*_dec
){
1315 /*pp_level 0: disabled; free any memory used and return*/
1316 if(_dec
->pp_level
<=OC_PP_LEVEL_DISABLED
){
1317 if(_dec
->dc_qis
!=NULL
){
1318 _ogg_free(_dec
->dc_qis
);
1320 _ogg_free(_dec
->variances
);
1321 _dec
->variances
=NULL
;
1322 _ogg_free(_dec
->pp_frame_data
);
1323 _dec
->pp_frame_data
=NULL
;
1327 if(_dec
->dc_qis
==NULL
){
1328 /*If we haven't been tracking DC quantization indices, there's no point in
1330 if(_dec
->state
.frame_type
!=OC_INTRA_FRAME
)return 1;
1331 _dec
->dc_qis
=(unsigned char *)_ogg_malloc(
1332 _dec
->state
.nfrags
*sizeof(_dec
->dc_qis
[0]));
1333 memset(_dec
->dc_qis
,_dec
->state
.qis
[0],_dec
->state
.nfrags
);
1337 int *coded_fragi_end
;
1339 /*Update the DC quantization index of each coded block.*/
1340 qi0
=(unsigned char)_dec
->state
.qis
[0];
1341 coded_fragi_end
=_dec
->state
.coded_fragis
+_dec
->state
.ncoded_fragis
[0]+
1342 _dec
->state
.ncoded_fragis
[1]+_dec
->state
.ncoded_fragis
[2];
1343 for(coded_fragi
=_dec
->state
.coded_fragis
;coded_fragi
<coded_fragi_end
;
1345 _dec
->dc_qis
[*coded_fragi
]=qi0
;
1348 /*pp_level 1: Stop after updating DC quantization indices.*/
1349 if(_dec
->pp_level
<=OC_PP_LEVEL_TRACKDCQI
){
1350 if(_dec
->variances
!=NULL
){
1351 _ogg_free(_dec
->variances
);
1352 _dec
->variances
=NULL
;
1353 _ogg_free(_dec
->pp_frame_data
);
1354 _dec
->pp_frame_data
=NULL
;
1358 if(_dec
->variances
==NULL
||
1359 _dec
->pp_frame_has_chroma
!=(_dec
->pp_level
>=OC_PP_LEVEL_DEBLOCKC
)){
1361 frame_sz
=_dec
->state
.info
.frame_width
*_dec
->state
.info
.frame_height
;
1362 if(_dec
->pp_level
<OC_PP_LEVEL_DEBLOCKC
){
1363 _dec
->variances
=(int *)_ogg_realloc(_dec
->variances
,
1364 _dec
->state
.fplanes
[0].nfrags
*sizeof(_dec
->variances
[0]));
1365 _dec
->pp_frame_data
=(unsigned char *)_ogg_realloc(
1366 _dec
->pp_frame_data
,frame_sz
*sizeof(_dec
->pp_frame_data
[0]));
1367 _dec
->pp_frame_buf
[0].width
=_dec
->state
.info
.frame_width
;
1368 _dec
->pp_frame_buf
[0].height
=_dec
->state
.info
.frame_height
;
1369 _dec
->pp_frame_buf
[0].stride
=-_dec
->pp_frame_buf
[0].width
;
1370 _dec
->pp_frame_buf
[0].data
=_dec
->pp_frame_data
+
1371 (1-_dec
->pp_frame_buf
[0].height
)*_dec
->pp_frame_buf
[0].stride
;
1378 _dec
->variances
=(int *)_ogg_realloc(_dec
->variances
,
1379 _dec
->state
.nfrags
*sizeof(_dec
->variances
[0]));
1381 c_w
=_dec
->state
.info
.frame_width
>>!(_dec
->state
.info
.pixel_fmt
&1);
1382 c_h
=_dec
->state
.info
.frame_height
>>!(_dec
->state
.info
.pixel_fmt
&2);
1385 _dec
->pp_frame_data
=(unsigned char *)_ogg_realloc(
1386 _dec
->pp_frame_data
,frame_sz
*sizeof(_dec
->pp_frame_data
[0]));
1387 _dec
->pp_frame_buf
[0].width
=_dec
->state
.info
.frame_width
;
1388 _dec
->pp_frame_buf
[0].height
=_dec
->state
.info
.frame_height
;
1389 _dec
->pp_frame_buf
[0].stride
=_dec
->pp_frame_buf
[0].width
;
1390 _dec
->pp_frame_buf
[0].data
=_dec
->pp_frame_data
;
1391 _dec
->pp_frame_buf
[1].width
=c_w
;
1392 _dec
->pp_frame_buf
[1].height
=c_h
;
1393 _dec
->pp_frame_buf
[1].stride
=_dec
->pp_frame_buf
[1].width
;
1394 _dec
->pp_frame_buf
[1].data
=_dec
->pp_frame_buf
[0].data
+y_sz
;
1395 _dec
->pp_frame_buf
[2].width
=c_w
;
1396 _dec
->pp_frame_buf
[2].height
=c_h
;
1397 _dec
->pp_frame_buf
[2].stride
=_dec
->pp_frame_buf
[2].width
;
1398 _dec
->pp_frame_buf
[2].data
=_dec
->pp_frame_buf
[1].data
+c_sz
;
1399 oc_ycbcr_buffer_flip(_dec
->pp_frame_buf
,_dec
->pp_frame_buf
);
1401 _dec
->pp_frame_has_chroma
=(_dec
->pp_level
>=OC_PP_LEVEL_DEBLOCKC
);
1403 /*If we're not processing chroma, copy the reference frame's chroma planes.*/
1404 if(_dec
->pp_level
<OC_PP_LEVEL_DEBLOCKC
){
1405 memcpy(_dec
->pp_frame_buf
+1,
1406 _dec
->state
.ref_frame_bufs
[_dec
->state
.ref_frame_idx
[OC_FRAME_SELF
]]+1,
1407 sizeof(_dec
->pp_frame_buf
[1])*2);
1417 int eob_runs
[3][64];
1418 int bounding_values
[256];
1419 int *coded_fragis
[3];
1420 int *uncoded_fragis
[3];
1423 int ncoded_fragis
[3];
1424 int nuncoded_fragis
[3];
1425 int pred_last
[3][3];
1429 }oc_dec_pipeline_state
;
1433 /*Initialize the main decoding pipeline.*/
1434 static void oc_dec_pipeline_init(oc_dec_ctx
*_dec
,
1435 oc_dec_pipeline_state
*_pipe
){
1436 int *coded_fragi_end
;
1437 int *uncoded_fragi_end
;
1439 /*If chroma is sub-sampled in the vertical direction, we have to decode two
1440 super block rows of Y' for each super block row of Cb and Cr.*/
1441 _pipe
->mcu_nvfrags
=4<<!(_dec
->state
.info
.pixel_fmt
&2);
1442 /*Initialize the token and extra bits indices for each plane and
1444 memset(_pipe
->ti
[0],0,sizeof(_pipe
->ti
[0]));
1445 memset(_pipe
->ebi
[0],0,sizeof(_pipe
->ebi
[0]));
1446 for(pli
=1;pli
<3;pli
++){
1447 memcpy(_pipe
->ti
[pli
],_dec
->ti0
[pli
-1],sizeof(_pipe
->ti
[0]));
1448 memcpy(_pipe
->ebi
[pli
],_dec
->ebi0
[pli
-1],sizeof(_pipe
->ebi
[0]));
1450 /*Also copy over the initial the EOB run counts.*/
1451 memcpy(_pipe
->eob_runs
,_dec
->eob_runs
,sizeof(_pipe
->eob_runs
));
1452 /*Set up per-plane pointers to the coded and uncoded fragments lists.*/
1453 coded_fragi_end
=_dec
->state
.coded_fragis
;
1454 uncoded_fragi_end
=_dec
->state
.uncoded_fragis
;
1455 for(pli
=0;pli
<3;pli
++){
1456 _pipe
->coded_fragis
[pli
]=coded_fragi_end
;
1457 _pipe
->uncoded_fragis
[pli
]=uncoded_fragi_end
;
1458 coded_fragi_end
+=_dec
->state
.ncoded_fragis
[pli
];
1459 uncoded_fragi_end
-=_dec
->state
.nuncoded_fragis
[pli
];
1461 /*Set the previous DC predictor to 0 for all color planes and frame types.*/
1462 memset(_pipe
->pred_last
,0,sizeof(_pipe
->pred_last
));
1463 /*Initialize the bounding value array for the loop filter.*/
1464 _pipe
->loop_filter
=!oc_state_loop_filter_init(&_dec
->state
,
1465 _pipe
->bounding_values
);
1466 /*Initialize any buffers needed for post-processing.
1467 We also save the current post-processing level, to guard against the user
1468 changing it from a callback.*/
1469 if(!oc_dec_postprocess_init(_dec
))_pipe
->pp_level
=_dec
->pp_level
;
1470 /*If we don't have enough information to post-process, disable it, regardless
1471 of the user-requested level.*/
1473 _pipe
->pp_level
=OC_PP_LEVEL_DISABLED
;
1474 memcpy(_dec
->pp_frame_buf
,
1475 _dec
->state
.ref_frame_bufs
[_dec
->state
.ref_frame_idx
[OC_FRAME_SELF
]],
1476 sizeof(_dec
->pp_frame_buf
[0])*3);
1480 /*Undo the DC prediction in a single plane of an MCU (one or two super block
1482 As a side effect, the number of coded and uncoded fragments in this plane of
1483 the MCU is also computed.*/
1484 static void oc_dec_dc_unpredict_mcu_plane(oc_dec_ctx
*_dec
,
1485 oc_dec_pipeline_state
*_pipe
,int _pli
){
1486 /*Undo the DC prediction.*/
1487 oc_fragment_plane
*fplane
;
1495 /*Compute the first and last fragment row of the current MCU for this
1497 fplane
=_dec
->state
.fplanes
+_pli
;
1498 fragy0
=_pipe
->fragy0
[_pli
];
1499 fragy_end
=_pipe
->fragy_end
[_pli
];
1500 frag
=_dec
->state
.frags
+fplane
->froffset
+(fragy0
*fplane
->nhfrags
);
1502 pred_last
=_pipe
->pred_last
[_pli
];
1503 for(fragy
=fragy0
;fragy
<fragy_end
;fragy
++){
1504 for(fragx
=0;fragx
<fplane
->nhfrags
;fragx
++,frag
++){
1505 if(!frag
->coded
)continue;
1507 frag
->quant
[0] = frag
->dc
; /* stash un-predicted dc for debug output */
1509 pred_last
[OC_FRAME_FOR_MODE
[frag
->mbmode
]]=frag
->dc
+=
1510 oc_frag_pred_dc(frag
,fplane
,fragx
,fragy
,pred_last
);
1514 _pipe
->ncoded_fragis
[_pli
]=ncoded_fragis
;
1515 /*Also save the number of uncoded fragments so we know how many to copy.*/
1516 _pipe
->nuncoded_fragis
[_pli
]=
1517 (fragy_end
-fragy0
)*fplane
->nhfrags
-ncoded_fragis
;
1520 /*Reconstructs all coded fragments in a single MCU (one or two super block
1522 This requires that each coded fragment have a proper macro block mode and
1523 motion vector (if not in INTRA mode), and have it's DC value decoded, with
1524 the DC prediction process reversed, and the number of coded and uncoded
1525 fragments in this plane of the MCU be counted.
1526 The token lists for each color plane and coefficient should also be filled
1527 in, along with initial token offsets, extra bits offsets, and EOB run
1529 static void oc_dec_frags_recon_mcu_plane(oc_dec_ctx
*_dec
,
1530 oc_dec_pipeline_state
*_pipe
,int _pli
){
1531 /*Decode the AC coefficients.*/
1536 int *coded_fragi_end
;
1538 ebi
=_pipe
->ebi
[_pli
];
1539 eob_runs
=_pipe
->eob_runs
[_pli
];
1540 coded_fragi_end
=coded_fragi
=_pipe
->coded_fragis
[_pli
];
1541 coded_fragi_end
+=_pipe
->ncoded_fragis
[_pli
];
1542 for(;coded_fragi
<coded_fragi_end
;coded_fragi
++){
1544 oc_quant_table
*iquants
;
1545 /*This array is made one bigger than necessary so that an invalid zero
1546 run cannot cause a buffer overflow.
1547 The inverse zig-zag mapping sends all out of range indices to the last
1548 entry of this array, where they are ignored.*/
1549 ogg_int16_t dct_coeffs
[128];
1554 frag
=_dec
->state
.frags
+fragi
;
1565 token
=_dec
->dct_tokens
[zzi
][ti
[zzi
]++];
1566 ebflag
=OC_DCT_TOKEN_EXTRA_BITS
[token
]!=0;
1567 eb
=_dec
->extra_bits
[zzi
][ebi
[zzi
]]&-ebflag
;
1569 if(token
<OC_NDCT_EOB_TOKEN_MAX
){
1570 eob_runs
[zzi
]=-oc_dct_token_skip(token
,eb
);
1572 else oc_dct_token_expand(token
,eb
,dct_coeffs
,&zzi
);
1575 /*TODO: zzi should be exactly 64 here.
1576 If it's not, we should report some kind of warning.*/
1577 zzi
=OC_MINI(zzi
,64);
1578 dct_coeffs
[0]=(ogg_int16_t
)frag
->dc
;
1579 iquants
=_dec
->state
.dequant_tables
[frag
->mbmode
!=OC_MODE_INTRA
][_pli
];
1580 /*last_zzi is always initialized.
1581 If your compiler thinks otherwise, it is dumb.*/
1582 oc_state_frag_recon(&_dec
->state
,frag
,_pli
,dct_coeffs
,last_zzi
,zzi
,
1583 iquants
[_dec
->state
.qis
[0]][0],iquants
[frag
->qi
]);
1585 _pipe
->coded_fragis
[_pli
]=coded_fragi
;
1586 /*Right now the reconstructed MCU has only the coded blocks in it.*/
1587 /*TODO: We make the decision here to always copy the uncoded blocks into it
1588 from the reference frame.
1589 We could also copy the coded blocks back over the reference frame, if we
1590 wait for an additional MCU to be decoded, which might be faster if only a
1591 small number of blocks are coded.
1592 However, this introduces more latency, creating a larger cache footprint.
1593 It's unknown which decision is better, but this one results in simpler
1594 code, and the hard case (high bitrate, high resolution) is handled
1596 /*Copy the uncoded blocks from the previous reference frame.*/
1597 _pipe
->uncoded_fragis
[_pli
]-=_pipe
->nuncoded_fragis
[_pli
];
1598 oc_state_frag_copy(&_dec
->state
,_pipe
->uncoded_fragis
[_pli
],
1599 _pipe
->nuncoded_fragis
[_pli
],OC_FRAME_SELF
,OC_FRAME_PREV
,_pli
);
1604 int framei
=_dec
->state
.ref_frame_idx
[OC_FRAME_SELF
];
1605 int ystride
=_dec
->state
.ref_frame_bufs
[framei
][_pli
].stride
;
1606 int *fragi_end
= _pipe
->coded_fragis
[_pli
];
1607 int *fragi
= fragi_end
-_pipe
->ncoded_fragis
[_pli
];
1609 for(;fragi
<fragi_end
;fragi
++){
1610 oc_fragment
*frag
=_dec
->state
.frags
+*fragi
;
1611 unsigned char *src
=frag
->buffer
[framei
];
1612 for(i
=0,j
=0;j
<8;j
++){
1613 for(k
=0;k
<8;k
++,i
++)
1614 frag
->recon
[i
] = src
[k
];
1619 fragi
= _pipe
->uncoded_fragis
[_pli
];
1620 fragi_end
= fragi
+_pipe
->nuncoded_fragis
[_pli
];
1622 for(;fragi
<fragi_end
;fragi
++){
1623 oc_fragment
*frag
=_dec
->state
.frags
+*fragi
;
1624 unsigned char *src
=frag
->buffer
[framei
];
1625 for(i
=0,j
=0;j
<8;j
++){
1626 for(k
=0;k
<8;k
++,i
++)
1627 frag
->recon
[i
] = src
[k
];
1636 /*Filter a horizontal block edge.*/
1637 static void oc_filter_hedge(unsigned char *_dst
,int _dst_ystride
,
1638 const unsigned char *_src
,int _src_ystride
,int _qstep
,int _flimit
,
1639 int *_variance0
,int *_variance1
){
1640 unsigned char *rdst
;
1641 const unsigned char *rsrc
;
1642 unsigned char *cdst
;
1643 const unsigned char *csrc
;
1651 for(bx
=0;bx
<8;bx
++){
1654 for(by
=0;by
<10;by
++){
1659 for(by
=0;by
<4;by
++){
1660 sum0
+=abs(r
[by
+1]-r
[by
]);
1661 sum1
+=abs(r
[by
+5]-r
[by
+6]);
1663 *_variance0
+=OC_MINI(255,sum0
);
1664 *_variance1
+=OC_MINI(255,sum1
);
1665 if(sum0
<_flimit
&&sum1
<_flimit
&&r
[5]-r
[4]<_qstep
&&r
[4]-r
[5]<_qstep
){
1666 *cdst
=(unsigned char)(r
[0]*3+r
[1]*2+r
[2]+r
[3]+r
[4]+4>>3);
1668 *cdst
=(unsigned char)(r
[0]*2+r
[1]+r
[2]*2+r
[3]+r
[4]+r
[5]+4>>3);
1670 for(by
=0;by
<4;by
++){
1671 *cdst
=(unsigned char)(r
[by
]+r
[by
+1]+r
[by
+2]+r
[by
+3]*2+
1672 r
[by
+4]+r
[by
+5]+r
[by
+6]+4>>3);
1675 *cdst
=(unsigned char)(r
[4]+r
[5]+r
[6]+r
[7]*2+r
[8]+r
[9]*2+4>>3);
1677 *cdst
=(unsigned char)(r
[5]+r
[6]+r
[7]+r
[8]*2+r
[9]*3+4>>3);
1680 for(by
=1;by
<=8;by
++){
1681 *cdst
=(unsigned char)r
[by
];
1690 /*Filter a vertical block edge.*/
1691 static void oc_filter_vedge(unsigned char *_dst
,int _dst_ystride
,
1692 int _qstep
,int _flimit
,int *_variances
){
1693 unsigned char *rdst
;
1694 const unsigned char *rsrc
;
1695 unsigned char *cdst
;
1702 for(by
=0;by
<8;by
++){
1705 for(bx
=0;bx
<10;bx
++)r
[bx
]=*rsrc
++;
1707 for(bx
=0;bx
<4;bx
++){
1708 sum0
+=abs(r
[bx
+1]-r
[bx
]);
1709 sum1
+=abs(r
[bx
+5]-r
[bx
+6]);
1711 _variances
[0]+=OC_MINI(255,sum0
);
1712 _variances
[1]+=OC_MINI(255,sum1
);
1713 if(sum0
<_flimit
&&sum1
<_flimit
&&r
[5]-r
[4]<_qstep
&&r
[4]-r
[5]<_qstep
){
1714 *rdst
++=(unsigned char)(r
[0]*3+r
[1]*2+r
[2]+r
[3]+r
[4]+4>>3);
1715 *rdst
++=(unsigned char)(r
[0]*2+r
[1]+r
[2]*2+r
[3]+r
[4]+r
[5]+4>>3);
1716 for(bx
=0;bx
<4;bx
++){
1717 *rdst
++=(unsigned char)(r
[bx
]+r
[bx
+1]+r
[bx
+2]+r
[bx
+3]*2+
1718 r
[bx
+4]+r
[bx
+5]+r
[bx
+6]+4>>3);
1720 *rdst
++=(unsigned char)(r
[4]+r
[5]+r
[6]+r
[7]*2+r
[8]+r
[9]*2+4>>3);
1721 *rdst
=(unsigned char)(r
[5]+r
[6]+r
[7]+r
[8]*2+r
[9]*3+4>>3);
1723 else for(bx
=1;bx
<=8;bx
++)*rdst
++=(unsigned char)r
[bx
];
1728 static void oc_dec_deblock_frag_rows(oc_dec_ctx
*_dec
,
1729 th_img_plane
*_dst
,th_img_plane
*_src
,int _pli
,int _fragy0
,
1731 oc_fragment_plane
*fplane
;
1733 unsigned char *dc_qi
;
1735 const unsigned char *src
;
1746 fplane
=_dec
->state
.fplanes
+_pli
;
1747 froffset
=fplane
->froffset
+_fragy0
*fplane
->nhfrags
;
1748 variance
=_dec
->variances
+froffset
;
1749 dc_qi
=_dec
->dc_qis
+froffset
;
1751 notdone
=_fragy_end
<fplane
->nvfrags
;
1752 /*We want to clear an extra row of variances, except at the end.*/
1753 memset(variance
+(fplane
->nhfrags
&-notstart
),0,
1754 (_fragy_end
+notdone
-_fragy0
-notstart
)*fplane
->nhfrags
*sizeof(variance
[0]));
1755 /*Except for the first time, we want to point to the middle of the row.*/
1756 y
=(_fragy0
<<3)+(notstart
<<2);
1757 dst
=_dst
->data
+y
*_dst
->stride
;
1758 src
=_src
->data
+y
*_src
->stride
;
1760 memcpy(dst
,src
,_dst
->width
*sizeof(dst
[0]));
1764 /*We also want to skip the last row in the frame for this loop.*/
1765 y_end
=_fragy_end
-!notdone
<<3;
1767 qstep
=_dec
->pp_dc_scale
[*dc_qi
];
1768 flimit
=(qstep
*3)>>2;
1769 oc_filter_hedge(dst
,_dst
->stride
,src
-_src
->stride
,_src
->stride
,
1770 qstep
,flimit
,variance
,variance
+fplane
->nhfrags
);
1773 for(x
=8;x
<_dst
->width
;x
+=8){
1774 qstep
=_dec
->pp_dc_scale
[*dc_qi
];
1775 flimit
=(qstep
*3)>>2;
1776 oc_filter_hedge(dst
+x
,_dst
->stride
,src
+x
-_src
->stride
,_src
->stride
,
1777 qstep
,flimit
,variance
,variance
+fplane
->nhfrags
);
1778 oc_filter_vedge(dst
+x
-(_dst
->stride
<<2)-4,_dst
->stride
,
1779 qstep
,flimit
,variance
-1);
1783 dst
+=_dst
->stride
<<3;
1784 src
+=_src
->stride
<<3;
1786 /*And finally, handle the last row in the frame, if it's in the range.*/
1788 for(;y
<_dst
->height
;y
++){
1789 memcpy(dst
,src
,_dst
->width
*sizeof(dst
[0]));
1793 /*Filter the last row of vertical block edges.*/
1795 for(x
=8;x
<_dst
->width
;x
+=8){
1796 qstep
=_dec
->pp_dc_scale
[*dc_qi
++];
1797 flimit
=(qstep
*3)>>2;
1798 oc_filter_vedge(dst
+x
-(_dst
->stride
<<3)-4,_dst
->stride
,
1799 qstep
,flimit
,variance
++);
1804 static void oc_dering_block(unsigned char *_idata
,int _ystride
,int _b
,
1805 int _dc_scale
,int _sharp_mod
,int _strong
){
1806 static const int OCDB_MOD_MAX
[2]={24,32};
1807 static const int OCDB_MOD_SHIFT
[2]={1,0};
1808 const unsigned char *psrc
;
1809 const unsigned char *src
;
1810 const unsigned char *nsrc
;
1817 mod_hi
=OC_MINI(3*_dc_scale
,OCDB_MOD_MAX
[_strong
]);
1820 psrc
=src
-(_ystride
&-!(_b
&4));
1821 for(by
=0;by
<9;by
++){
1822 for(bx
=0;bx
<8;bx
++){
1824 mod
=32+_dc_scale
-(abs(src
[bx
]-psrc
[bx
])<<OCDB_MOD_SHIFT
[_strong
]);
1825 vmod
[(by
<<3)+bx
]=mod
<-64?_sharp_mod
:OC_CLAMPI(0,mod
,mod_hi
);
1828 src
+=_ystride
&-(!(_b
&8)|by
<7);
1832 for(bx
=0;bx
<9;bx
++){
1834 for(by
=0;by
<8;by
++){
1836 mod
=32+_dc_scale
-(abs(*src
-*psrc
)<<OCDB_MOD_SHIFT
[_strong
]);
1837 hmod
[(bx
<<3)+by
]=mod
<-64?_sharp_mod
:OC_CLAMPI(0,mod
,mod_hi
);
1845 psrc
=src
-(_ystride
&-!(_b
&4));
1847 for(by
=0;by
<8;by
++){
1855 b
+=w
**(src
-!(_b
&1));
1865 dst
[0]=OC_CLAMP255(a
*src
[0]+b
>>7);
1866 for(bx
=1;bx
<7;bx
++){
1875 w
=vmod
[(by
+1<<3)+bx
];
1878 w
=hmod
[(bx
+1<<3)+by
];
1881 dst
[bx
]=OC_CLAMP255(a
*src
[bx
]+b
>>7);
1891 w
=vmod
[(by
+1<<3)+7];
1896 b
+=w
*src
[7+!(_b
&2)];
1897 dst
[7]=OC_CLAMP255(a
*src
[7]+b
>>7);
1901 nsrc
+=_ystride
&-(!(_b
&8)|by
<6);
1905 #define OC_DERING_THRESH1 (384)
1906 #define OC_DERING_THRESH2 (4*OC_DERING_THRESH1)
1907 #define OC_DERING_THRESH3 (5*OC_DERING_THRESH1)
1908 #define OC_DERING_THRESH4 (10*OC_DERING_THRESH1)
1910 static void oc_dec_dering_frag_rows(oc_dec_ctx
*_dec
,th_img_plane
*_img
,
1911 int _pli
,int _fragy0
,int _fragy_end
){
1912 th_img_plane
*iplane
;
1913 oc_fragment_plane
*fplane
;
1916 unsigned char *idata
;
1924 fplane
=_dec
->state
.fplanes
+_pli
;
1925 froffset
=fplane
->froffset
+_fragy0
*fplane
->nhfrags
;
1926 variance
=_dec
->variances
+froffset
;
1927 frag
=_dec
->state
.frags
+froffset
;
1928 strong
=_dec
->pp_level
>=(_pli
?OC_PP_LEVEL_SDERINGC
:OC_PP_LEVEL_SDERINGY
);
1929 sthresh
=_pli
?OC_DERING_THRESH4
:OC_DERING_THRESH3
;
1931 idata
=iplane
->data
+y
*iplane
->stride
;
1932 y_end
=_fragy_end
<<3;
1934 for(x
=0;x
<iplane
->width
;x
+=8){
1940 b
=(x
<=0)|(x
+8>=iplane
->width
)<<1|(y
<=0)<<2|(y
+8>=iplane
->height
)<<3;
1941 if(strong
&&var
>sthresh
){
1942 oc_dering_block(idata
+x
,iplane
->stride
,b
,
1943 _dec
->pp_dc_scale
[qi
],_dec
->pp_sharp_mod
[qi
],1);
1944 if(_pli
||(b
&1)&&*(variance
-1)>OC_DERING_THRESH4
||
1945 (b
&2)&&variance
[1]>OC_DERING_THRESH4
||
1946 (b
&4)&&*(variance
-fplane
->nvfrags
)>OC_DERING_THRESH4
||
1947 (b
&8)&&variance
[fplane
->nvfrags
]>OC_DERING_THRESH4
){
1948 oc_dering_block(idata
+x
,iplane
->stride
,b
,
1949 _dec
->pp_dc_scale
[qi
],_dec
->pp_sharp_mod
[qi
],1);
1950 oc_dering_block(idata
+x
,iplane
->stride
,b
,
1951 _dec
->pp_dc_scale
[qi
],_dec
->pp_sharp_mod
[qi
],1);
1954 else if(var
>OC_DERING_THRESH2
){
1955 oc_dering_block(idata
+x
,iplane
->stride
,b
,
1956 _dec
->pp_dc_scale
[qi
],_dec
->pp_sharp_mod
[qi
],1);
1958 else if(var
>OC_DERING_THRESH1
){
1959 oc_dering_block(idata
+x
,iplane
->stride
,b
,
1960 _dec
->pp_dc_scale
[qi
],_dec
->pp_sharp_mod
[qi
],0);
1965 idata
+=iplane
->stride
<<3;
1971 th_dec_ctx
*th_decode_alloc(const th_info
*_info
,
1972 const th_setup_info
*_setup
){
1974 if(_info
==NULL
||_setup
==NULL
)return NULL
;
1975 dec
=_ogg_malloc(sizeof(*dec
));
1976 if(oc_dec_init(dec
,_info
,_setup
)<0){
1980 dec
->state
.curframe_num
=0;
1984 void th_decode_free(th_dec_ctx
*_dec
){
1991 int th_decode_ctl(th_dec_ctx
*_dec
,int _req
,void *_buf
,
1994 case TH_DECCTL_GET_PPLEVEL_MAX
:{
1995 if(_dec
==NULL
||_buf
==NULL
)return TH_EFAULT
;
1996 if(_buf_sz
!=sizeof(int))return TH_EINVAL
;
1997 (*(int *)_buf
)=OC_PP_LEVEL_MAX
;
2000 case TH_DECCTL_SET_PPLEVEL
:{
2002 if(_dec
==NULL
||_buf
==NULL
)return TH_EFAULT
;
2003 if(_buf_sz
!=sizeof(int))return TH_EINVAL
;
2004 pp_level
=*(int *)_buf
;
2005 if(pp_level
<0||pp_level
>OC_PP_LEVEL_MAX
)return TH_EINVAL
;
2006 _dec
->pp_level
=pp_level
;
2009 case TH_DECCTL_SET_GRANPOS
:{
2010 ogg_int64_t granpos
;
2011 if(_dec
==NULL
||_buf
==NULL
)return TH_EFAULT
;
2012 if(_buf_sz
!=sizeof(ogg_int64_t
))return TH_EINVAL
;
2013 granpos
=*(ogg_int64_t
*)_buf
;
2014 if(granpos
<0)return TH_EINVAL
;
2015 _dec
->state
.granpos
=granpos
;
2016 _dec
->state
.keyframe_num
=
2017 granpos
>>_dec
->state
.info
.keyframe_granule_shift
;
2018 _dec
->state
.curframe_num
=_dec
->state
.keyframe_num
+
2019 (granpos
&(1<<_dec
->state
.info
.keyframe_granule_shift
)-1);
2022 case TH_DECCTL_SET_STRIPE_CB
:{
2023 th_stripe_callback
*cb
;
2024 if(_dec
==NULL
||_buf
==NULL
)return TH_EFAULT
;
2025 if(_buf_sz
!=sizeof(th_stripe_callback
))return TH_EINVAL
;
2026 cb
=(th_stripe_callback
*)_buf
;
2027 _dec
->stripe_cb
.ctx
=cb
->ctx
;
2028 _dec
->stripe_cb
.stripe_decoded
=cb
->stripe_decoded
;
2031 default:return TH_EIMPL
;
2035 int th_decode_packetin(th_dec_ctx
*_dec
,const ogg_packet
*_op
,
2036 ogg_int64_t
*_granpos
){
2038 if(_dec
==NULL
||_op
==NULL
)return TH_EFAULT
;
2039 /*A completely empty packet indicates a dropped frame and is treated exactly
2040 like an inter frame with no coded blocks.
2041 Only proceed if we have a non-empty packet.*/
2044 oc_dec_pipeline_state pipe
;
2045 th_ycbcr_buffer stripe_buf
;
2051 theorapackB_readinit(&_dec
->opb
,_op
->packet
,_op
->bytes
);
2052 ret
=oc_dec_frame_header_unpack(_dec
);
2053 if(ret
<0)return ret
;
2054 /*Select a free buffer to use for the reconstructed version of this
2056 if(_dec
->state
.frame_type
!=OC_INTRA_FRAME
&&
2057 (_dec
->state
.ref_frame_idx
[OC_FRAME_GOLD
]<0||
2058 _dec
->state
.ref_frame_idx
[OC_FRAME_PREV
]<0)){
2066 /*We're decoding an INTER frame, but have no initialized reference
2067 buffers (i.e., decoding did not start on a key frame).
2068 We initialize them to a solid gray here.*/
2069 _dec
->state
.ref_frame_idx
[OC_FRAME_GOLD
]=0;
2070 _dec
->state
.ref_frame_idx
[OC_FRAME_PREV
]=0;
2071 _dec
->state
.ref_frame_idx
[OC_FRAME_SELF
]=refi
=1;
2072 info
=&_dec
->state
.info
;
2073 yhstride
=info
->frame_width
+2*OC_UMV_PADDING
;
2074 yvstride
=info
->frame_height
+2*OC_UMV_PADDING
;
2075 chstride
=yhstride
>>!(info
->pixel_fmt
&1);
2076 cvstride
=yvstride
>>!(info
->pixel_fmt
&2);
2077 yplane_sz
=(size_t)yhstride
*yvstride
;
2078 cplane_sz
=(size_t)chstride
*cvstride
;
2079 memset(_dec
->state
.ref_frame_data
,0x80,yplane_sz
+2*cplane_sz
);
2082 for(refi
=0;refi
==_dec
->state
.ref_frame_idx
[OC_FRAME_GOLD
]||
2083 refi
==_dec
->state
.ref_frame_idx
[OC_FRAME_PREV
];refi
++);
2084 _dec
->state
.ref_frame_idx
[OC_FRAME_SELF
]=refi
;
2086 if(_dec
->state
.frame_type
==OC_INTRA_FRAME
){
2087 oc_dec_mark_all_intra(_dec
);
2088 _dec
->state
.keyframe_num
=_dec
->state
.curframe_num
;
2090 oc_dec_coded_flags_unpack(_dec
);
2091 oc_dec_mb_modes_unpack(_dec
);
2092 oc_dec_mv_unpack_and_frag_modes_fill(_dec
);
2094 oc_dec_block_qis_unpack(_dec
);
2095 oc_dec_residual_tokens_unpack(_dec
);
2097 /*Update granule position.
2098 This must be done before the striped decode callbacks so that the
2099 application knows what to do with the frame data.*/
2100 _dec
->state
.granpos
=
2101 (_dec
->state
.keyframe_num
<<_dec
->state
.info
.keyframe_granule_shift
)+
2102 (_dec
->state
.curframe_num
-_dec
->state
.keyframe_num
);
2103 _dec
->state
.curframe_num
++;
2104 if(_granpos
!=NULL
)*_granpos
=_dec
->state
.granpos
;
2105 /*All of the rest of the operations -- DC prediction reversal,
2106 reconstructing coded fragments, copying uncoded fragments, loop
2107 filtering, extending borders, and out-of-loop post-processing -- should
2109 I.e., DC prediction reversal, reconstruction, and uncoded fragment
2110 copying are done for one or two super block rows, then loop filtering is
2111 run as far as it can, then bordering copying, then post-processing.
2112 For 4:2:0 video a Minimum Codable Unit or MCU contains two luma super
2113 block rows, and one chroma.
2114 Otherwise, an MCU consists of one super block row from each plane.
2115 Inside each MCU, we perform all of the steps on one color plane before
2116 moving on to the next.
2117 After reconstruction, the additional filtering stages introduce a delay
2118 since they need some pixels from the next fragment row.
2119 Thus the actual number of decoded rows available is slightly smaller for
2120 the first MCU, and slightly larger for the last.
2122 This entire process allows us to operate on the data while it is still in
2123 cache, resulting in big performance improvements.
2124 An application callback allows further application processing (blitting
2125 to video memory, color conversion, etc.) to also use the data while it's
2127 oc_dec_pipeline_init(_dec
,&pipe
);
2128 oc_ycbcr_buffer_flip(stripe_buf
,_dec
->pp_frame_buf
);
2131 for(stripe_fragy
=notstart
=0;notdone
;stripe_fragy
+=pipe
.mcu_nvfrags
){
2133 int avail_fragy_end
;
2134 avail_fragy0
=avail_fragy_end
=_dec
->state
.fplanes
[0].nvfrags
;
2135 notdone
=stripe_fragy
+pipe
.mcu_nvfrags
<avail_fragy_end
;
2136 for(pli
=0;pli
<3;pli
++){
2137 oc_fragment_plane
*fplane
;
2142 fplane
=_dec
->state
.fplanes
+pli
;
2143 /*Compute the first and last fragment row of the current MCU for this
2145 frag_shift
=pli
!=0&&!(_dec
->state
.info
.pixel_fmt
&2);
2146 pipe
.fragy0
[pli
]=stripe_fragy
>>frag_shift
;
2147 pipe
.fragy_end
[pli
]=OC_MINI(fplane
->nvfrags
,
2148 pipe
.fragy0
[pli
]+(pipe
.mcu_nvfrags
>>frag_shift
));
2149 oc_dec_dc_unpredict_mcu_plane(_dec
,&pipe
,pli
);
2150 oc_dec_frags_recon_mcu_plane(_dec
,&pipe
,pli
);
2152 if(pipe
.loop_filter
){
2155 oc_state_loop_filter_frag_rows(&_dec
->state
,pipe
.bounding_values
,
2156 refi
,pli
,pipe
.fragy0
[pli
]-sdelay
,pipe
.fragy_end
[pli
]-edelay
);
2158 /*To fill the borders, we have an additional two pixel delay, since a
2159 fragment in the next row could filter its top edge, using two pixels
2160 from a fragment in this row.
2161 But there's no reason to delay a full fragment between the two.*/
2162 oc_state_borders_fill_rows(&_dec
->state
,refi
,pli
,
2163 (pipe
.fragy0
[pli
]-sdelay
<<3)-(sdelay
<<1),
2164 (pipe
.fragy_end
[pli
]-edelay
<<3)-(edelay
<<1));
2165 /*Out-of-loop post-processing.*/
2166 pp_offset
=3*(pli
!=0);
2167 if(pipe
.pp_level
>=OC_PP_LEVEL_DEBLOCKY
+pp_offset
){
2168 /*Perform de-blocking in one plane.*/
2171 oc_dec_deblock_frag_rows(_dec
,_dec
->pp_frame_buf
,
2172 _dec
->state
.ref_frame_bufs
[refi
],pli
,
2173 pipe
.fragy0
[pli
]-sdelay
,pipe
.fragy_end
[pli
]-edelay
);
2174 if(pipe
.pp_level
>=OC_PP_LEVEL_DERINGY
+pp_offset
){
2175 /*Perform de-ringing in one plane.*/
2178 oc_dec_dering_frag_rows(_dec
,_dec
->pp_frame_buf
,pli
,
2179 pipe
.fragy0
[pli
]-sdelay
,pipe
.fragy_end
[pli
]-edelay
);
2182 /*If no post-processing is done, we still need to delay a row for the
2183 loop filter, thanks to the strange filtering order VP3 chose.*/
2184 else if(pipe
.loop_filter
){
2188 /*Compute the intersection of the available rows in all planes.
2189 If chroma is sub-sampled, the effect of each of its delays is
2190 doubled, but luma might have more post-processing filters enabled
2191 than chroma, so we don't know up front which one is the limiting
2193 avail_fragy0
=OC_MINI(avail_fragy0
,pipe
.fragy0
[pli
]-sdelay
<<frag_shift
);
2194 avail_fragy_end
=OC_MINI(avail_fragy_end
,
2195 pipe
.fragy_end
[pli
]-edelay
<<frag_shift
);
2197 if(_dec
->stripe_cb
.stripe_decoded
!=NULL
){
2198 /*Make the callback, ensuring we flip the sense of the "start" and
2199 "end" of the available region upside down.*/
2200 (*_dec
->stripe_cb
.stripe_decoded
)(_dec
->stripe_cb
.ctx
,stripe_buf
,
2201 _dec
->state
.fplanes
[0].nvfrags
-avail_fragy_end
,
2202 _dec
->state
.fplanes
[0].nvfrags
-avail_fragy0
);
2209 int x
,y
,i
,j
,k
,xn
,yn
;
2213 /* dump fragment DCT components */
2214 for(plane
=0;plane
<3;plane
++){
2220 xn
= _dec
->state
.info
.frame_width
>>3;
2221 yn
= _dec
->state
.info
.frame_height
>>3;
2226 xn
= _dec
->state
.info
.frame_width
>>4;
2227 yn
= _dec
->state
.info
.frame_height
>>4;
2232 xn
= _dec
->state
.info
.frame_width
>>4;
2233 yn
= _dec
->state
.info
.frame_height
>>4;
2238 for(x
=0;x
<xn
;x
++,i
++){
2240 for(buf
=0;buf
<4;buf
++){
2245 i
= offset
+ y
*xn
+ x
;
2251 ptr
= _dec
->state
.frags
[i
].quant
;
2256 ptr
= _dec
->state
.frags
[i
].freq
;
2261 ptr
= _dec
->state
.frags
[i
].time
;
2265 ptr
= _dec
->state
.frags
[i
].loop
;
2270 TH_DEBUG("%s %s [%d][%d] = {",bufn
,plstr
,x
,y
);
2271 if(codecheck
&& !_dec
->state
.frags
[i
].coded
)
2272 TH_DEBUG(" not coded }\n");
2277 for(k
=0;k
<8;k
++,l
++){
2278 TH_DEBUG("%d ",ptr
[l
]);
2291 /*Finish filling in the reference frame borders.*/
2292 for(pli
=0;pli
<3;pli
++)oc_state_borders_fill_caps(&_dec
->state
,refi
,pli
);
2293 /*Update the reference frame indices.*/
2294 if(_dec
->state
.frame_type
==OC_INTRA_FRAME
){
2295 /*The new frame becomes both the previous and gold reference frames.*/
2296 _dec
->state
.ref_frame_idx
[OC_FRAME_GOLD
]=
2297 _dec
->state
.ref_frame_idx
[OC_FRAME_PREV
]=
2298 _dec
->state
.ref_frame_idx
[OC_FRAME_SELF
];
2301 /*Otherwise, just replace the previous reference frame.*/
2302 _dec
->state
.ref_frame_idx
[OC_FRAME_PREV
]=
2303 _dec
->state
.ref_frame_idx
[OC_FRAME_SELF
];
2305 #if defined(OC_DUMP_IMAGES)
2306 /*Don't dump images for dropped frames.*/
2307 oc_state_dump_frame(&_dec
->state
,OC_FRAME_SELF
,"dec");
2312 /*Just update the granule position and return.*/
2313 _dec
->state
.granpos
=
2314 (_dec
->state
.keyframe_num
<<_dec
->state
.info
.keyframe_granule_shift
)+
2315 (_dec
->state
.curframe_num
-_dec
->state
.keyframe_num
);
2316 _dec
->state
.curframe_num
++;
2317 if(_granpos
!=NULL
)*_granpos
=_dec
->state
.granpos
;
2322 int th_decode_ycbcr_out(th_dec_ctx
*_dec
,th_ycbcr_buffer _ycbcr
){
2323 oc_ycbcr_buffer_flip(_ycbcr
,_dec
->pp_frame_buf
);