1 #include "mpeg3private.h"
2 #include "mpeg3protos.h"
7 #define CLIP(x) ((x) >= 0 ? ((x) < 255 ? (x) : 255) : 0)
9 static unsigned long long MMX_128
= 0x80008000800080LL
;
11 int mpeg3_new_slice_buffer(mpeg3_slice_buffer_t
*slice_buffer
)
13 pthread_mutexattr_t mutex_attr
;
15 slice_buffer
->data
= malloc(1024);
16 slice_buffer
->buffer_size
= 0;
17 slice_buffer
->buffer_allocation
= 1024;
18 slice_buffer
->current_position
= 0;
19 slice_buffer
->bits_size
= 0;
20 slice_buffer
->bits
= 0;
21 slice_buffer
->done
= 0;
22 pthread_mutexattr_init(&mutex_attr
);
23 // pthread_mutexattr_setkind_np(&mutex_attr, PTHREAD_MUTEX_FAST_NP);
24 pthread_mutex_init(&(slice_buffer
->completion_lock
), &mutex_attr
);
28 int mpeg3_delete_slice_buffer(mpeg3_slice_buffer_t
*slice_buffer
)
30 free(slice_buffer
->data
);
31 pthread_mutex_destroy(&(slice_buffer
->completion_lock
));
35 int mpeg3_expand_slice_buffer(mpeg3_slice_buffer_t
*slice_buffer
)
38 unsigned char *new_buffer
= malloc(slice_buffer
->buffer_allocation
* 2);
39 for(i
= 0; i
< slice_buffer
->buffer_size
; i
++)
40 new_buffer
[i
] = slice_buffer
->data
[i
];
41 free(slice_buffer
->data
);
42 slice_buffer
->data
= new_buffer
;
43 slice_buffer
->buffer_allocation
*= 2;
47 /* limit coefficients to -2048..2047 */
49 /* move/add 8x8-Block from block[comp] to refframe */
51 static inline int mpeg3video_addblock(mpeg3_slice_t
*slice
,
62 int spar
= slice
->sparse
[comp
];
63 /* color component index */
64 cc
= (comp
< 4) ? 0 : (comp
& 1) + 1;
69 if(video
->pict_struct
== FRAME_PICTURE
)
73 /* field DCT coding */
74 rfp
= video
->newframe
[0] +
75 video
->coded_picture_width
* (by
+ ((comp
& 2) >> 1)) + bx
+ ((comp
& 1) << 3);
76 iincr
= (video
->coded_picture_width
<< 1);
80 /* frame DCT coding */
81 rfp
= video
->newframe
[0] +
82 video
->coded_picture_width
* (by
+ ((comp
& 2) << 2)) + bx
+ ((comp
& 1) << 3);
83 iincr
= video
->coded_picture_width
;
89 rfp
= video
->newframe
[0] +
90 (video
->coded_picture_width
<< 1) * (by
+ ((comp
& 2) << 2)) + bx
+ ((comp
& 1) << 3);
91 iincr
= (video
->coded_picture_width
<< 1);
98 /* scale coordinates */
99 if(video
->chroma_format
!= CHROMA444
) bx
>>= 1;
100 if(video
->chroma_format
== CHROMA420
) by
>>= 1;
101 if(video
->pict_struct
== FRAME_PICTURE
)
103 if(dct_type
&& (video
->chroma_format
!= CHROMA420
))
105 /* field DCT coding */
106 rfp
= video
->newframe
[cc
]
107 + video
->chrom_width
* (by
+ ((comp
& 2) >> 1)) + bx
+ (comp
& 8);
108 iincr
= (video
->chrom_width
<< 1);
112 /* frame DCT coding */
113 rfp
= video
->newframe
[cc
]
114 + video
->chrom_width
* (by
+ ((comp
& 2) << 2)) + bx
+ (comp
& 8);
115 iincr
= video
->chrom_width
;
121 rfp
= video
->newframe
[cc
]
122 + (video
->chrom_width
<< 1) * (by
+ ((comp
& 2) << 2)) + bx
+ (comp
& 8);
123 iincr
= (video
->chrom_width
<< 1);
127 bp
= slice
->block
[comp
];
136 __asm__
__volatile__(
137 "movq (%2), %%mm6\n" /* 4 blockvals */
138 "pxor %%mm4, %%mm4\n"
139 "punpcklwd %%mm6, %%mm6\n"
140 "punpcklwd %%mm6, %%mm6\n"
143 "movq (%1), %%mm0\n" /* 8 rindex1 */
144 "movq %%mm0, %%mm2\n"
145 "punpcklbw %%mm4, %%mm0\n"
146 "punpckhbw %%mm4, %%mm2\n"
147 "paddw %%mm6, %%mm0\n"
148 "paddw %%mm6, %%mm2\n"
150 "packuswb %%mm2, %%mm0\n"
153 "leal (%1, %3), %1\n"
156 : "c" (8),"r" (rfp
), "r" (bp
), "r" (iincr
)
161 __asm__
__volatile__(
162 "pxor %%mm4, %%mm4\n"
166 "movq (%2), %%mm0\n" /* 8 rfp 0 1 2 3 4 5 6 7*/
167 "movq (%1), %%mm6\n" /* 4 blockvals 0 1 2 3 */
169 "movq %%mm0, %%mm2\n"
170 "movq 8(%1), %%mm5\n" /* 4 blockvals 0 1 2 3 */
171 "punpcklbw %%mm4, %%mm0\n" /* 0 2 4 6 */
172 "punpckhbw %%mm4, %%mm2\n" /* 1 3 5 7 */
174 "paddw %%mm6, %%mm0\n"
175 "paddw %%mm5, %%mm2\n"
176 "packuswb %%mm2, %%mm0\n"
184 : "c" (8),"r" (bp
), "r" (rfp
), "r" (iincr
)
190 for(i
= 0; i
< 8; i
++)
192 rfp
[0] = CLIP(bp
[0] + rfp
[0]);
193 rfp
[1] = CLIP(bp
[1] + rfp
[1]);
194 rfp
[2] = CLIP(bp
[2] + rfp
[2]);
195 rfp
[3] = CLIP(bp
[3] + rfp
[3]);
196 rfp
[4] = CLIP(bp
[4] + rfp
[4]);
197 rfp
[5] = CLIP(bp
[5] + rfp
[5]);
198 rfp
[6] = CLIP(bp
[6] + rfp
[6]);
199 rfp
[7] = CLIP(bp
[7] + rfp
[7]);
211 __asm__
__volatile__(
212 "movd (%2), %%mm0\n" /* " 0 0 0 v1" */
213 "punpcklwd %%mm0, %%mm0\n" /* " 0 0 v1 v1" */
214 "punpcklwd %%mm0, %%mm0\n"
215 "paddw MMX_128, %%mm0\n"
216 "packuswb %%mm0, %%mm0\n"
217 "leal (%0,%1,2), %%eax\n"
219 "movq %%mm0, (%0, %1)\n"
220 "movq %%mm0, (%%eax)\n"
221 "leal (%%eax,%1,2), %0\n"
222 "movq %%mm0, (%%eax, %1)\n"
225 "leal (%0,%1,2), %%eax\n"
226 "movq %%mm0, (%0, %1)\n"
228 "movq %%mm0, (%%eax)\n"
229 "movq %%mm0, (%%eax, %1)\n"
231 : "D" (rfp
), "c" (iincr
), "b" (bp
)
236 __asm__
__volatile__(
237 "movq MMX_128,%%mm4\n"
241 "movq 8(%1), %%mm1\n"
242 "paddw %%mm4, %%mm0\n"
244 "movq 16(%1), %%mm2\n"
245 "paddw %%mm4, %%mm1\n"
247 "movq 24(%1), %%mm3\n"
248 "paddw %%mm4, %%mm2\n"
250 "packuswb %%mm1, %%mm0\n"
251 "paddw %%mm4, %%mm3\n"
254 "packuswb %%mm3, %%mm2\n"
258 "movq %%mm2, (%2,%3)\n"
260 "leal (%2,%3,2), %2\n"
263 : "c" (4), "r" (bp
), "r" (rfp
), "r" (iincr
)
269 for(i
= 0; i
< 8; i
++)
271 rfp
[0] = CLIP(bp
[0] + 128);
272 rfp
[1] = CLIP(bp
[1] + 128);
273 rfp
[2] = CLIP(bp
[2] + 128);
274 rfp
[3] = CLIP(bp
[3] + 128);
275 rfp
[4] = CLIP(bp
[4] + 128);
276 rfp
[5] = CLIP(bp
[5] + 128);
277 rfp
[6] = CLIP(bp
[6] + 128);
278 rfp
[7] = CLIP(bp
[7] + 128);
286 int mpeg3_decode_slice(mpeg3_slice_t
*slice
)
288 mpeg3video_t
*video
= slice
->video
;
290 int mb_type
, cbp
, motion_type
= 0, dct_type
;
291 int macroblock_address
, mba_inc
, mba_max
;
292 int slice_vert_pos_ext
;
296 int mv_count
, mv_format
, mvscale
;
297 int pmv
[2][2][2], mv_field_sel
[2][2];
298 int dmv
, dmvector
[2];
300 int stwtype
, stwclass
;
303 mpeg3_slice_buffer_t
*slice_buffer
= slice
->slice_buffer
;
305 /* number of macroblocks per picture */
306 mba_max
= video
->mb_width
* video
->mb_height
;
308 /* field picture has half as many macroblocks as frame */
309 if(video
->pict_struct
!= FRAME_PICTURE
)
312 /* macroblock address */
313 macroblock_address
= 0;
314 /* first macroblock in slice is not skipped */
318 code
= mpeg3slice_getbits(slice_buffer
, 32);
319 /* decode slice header (may change quant_scale) */
320 slice_vert_pos_ext
= mpeg3video_getslicehdr(slice
, video
);
322 /* reset all DC coefficient and motion vector predictors */
323 dc_dct_pred
[0] = dc_dct_pred
[1] = dc_dct_pred
[2] = 0;
324 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
325 pmv
[0][1][0] = pmv
[0][1][1] = pmv
[1][1][0] = pmv
[1][1][1] = 0;
328 slice_buffer
->current_position
< slice_buffer
->buffer_size
;
334 if(!mpeg3slice_showbits(slice_buffer
, 23)) return 0;
335 /* decode macroblock address increment */
336 mba_inc
= mpeg3video_get_macroblock_address(slice
);
338 if(slice
->fault
) return 1;
342 /* Get the macroblock_address */
343 macroblock_address
= ((slice_vert_pos_ext
<< 7) + (code
& 255) - 1) * video
->mb_width
+ mba_inc
- 1;
344 /* first macroblock in slice: not skipped */
349 if(slice
->fault
) return 1;
351 if(macroblock_address
>= mba_max
)
353 /* mba_inc points beyond picture dimensions */
354 /*fprintf(stderr, "mpeg3_decode_slice: too many macroblocks in picture\n"); */
361 mpeg3video_macroblock_modes(slice
,
373 if(slice
->fault
) return 1;
375 if(mb_type
& MB_QUANT
)
377 qs
= mpeg3slice_getbits(slice_buffer
, 5);
380 slice
->quant_scale
= video
->qscale_type
? mpeg3_non_linear_mquant_table
[qs
] : (qs
<< 1);
382 slice
->quant_scale
= qs
;
384 if(video
->scalable_mode
== SC_DP
)
385 /* make sure quant_scale is valid */
386 slice
->quant_scale
= slice
->quant_scale
;
392 /* decode forward motion vectors */
393 if((mb_type
& MB_FORWARD
) || ((mb_type
& MB_INTRA
) && video
->conceal_mv
))
396 mpeg3video_motion_vectors(slice
,
404 video
->h_forw_r_size
,
405 video
->v_forw_r_size
,
409 mpeg3video_motion_vector(slice
,
419 if(slice
->fault
) return 1;
421 /* decode backward motion vectors */
422 if(mb_type
& MB_BACKWARD
)
425 mpeg3video_motion_vectors(slice
,
433 video
->h_back_r_size
,
434 video
->v_back_r_size
,
438 mpeg3video_motion_vector(slice
,
449 if(slice
->fault
) return 1;
451 /* remove marker_bit */
452 if((mb_type
& MB_INTRA
) && video
->conceal_mv
)
453 mpeg3slice_flushbit(slice_buffer
);
455 /* macroblock_pattern */
456 if(mb_type
& MB_PATTERN
)
458 cbp
= mpeg3video_get_cbp(slice
);
459 if(video
->chroma_format
== CHROMA422
)
461 /* coded_block_pattern_1 */
462 cbp
= (cbp
<< 2) | mpeg3slice_getbits2(slice_buffer
);
465 if(video
->chroma_format
== CHROMA444
)
467 /* coded_block_pattern_2 */
468 cbp
= (cbp
<< 6) | mpeg3slice_getbits(slice_buffer
, 6);
472 cbp
= (mb_type
& MB_INTRA
) ? ((1 << video
->blk_cnt
) - 1) : 0;
474 if(slice
->fault
) return 1;
476 mpeg3video_clearblock(slice
, 0, video
->blk_cnt
);
477 for(comp
= 0; comp
< video
->blk_cnt
; comp
++)
479 if(cbp
& (1 << (video
->blk_cnt
- comp
- 1)))
481 if(mb_type
& MB_INTRA
)
484 mpeg3video_getmpg2intrablock(slice
, video
, comp
, dc_dct_pred
);
486 mpeg3video_getintrablock(slice
, video
, comp
, dc_dct_pred
);
491 mpeg3video_getmpg2interblock(slice
, video
, comp
);
493 mpeg3video_getinterblock(slice
, video
, comp
);
495 if(slice
->fault
) return 1;
499 /* reset intra_dc predictors */
500 if(!(mb_type
& MB_INTRA
))
501 dc_dct_pred
[0] = dc_dct_pred
[1] = dc_dct_pred
[2] = 0;
503 /* reset motion vector predictors */
504 if((mb_type
& MB_INTRA
) && !video
->conceal_mv
)
506 /* intra mb without concealment motion vectors */
507 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
508 pmv
[0][1][0] = pmv
[0][1][1] = pmv
[1][1][0] = pmv
[1][1][1] = 0;
511 if((video
->pict_type
== P_TYPE
) && !(mb_type
& (MB_FORWARD
| MB_INTRA
)))
513 /* non-intra mb without forward mv in a P picture */
514 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
516 /* derive motion_type */
517 if(video
->pict_struct
== FRAME_PICTURE
)
518 motion_type
= MC_FRAME
;
521 motion_type
= MC_FIELD
;
522 /* predict from field of same parity */
523 mv_field_sel
[0][0] = (video
->pict_struct
== BOTTOM_FIELD
);
529 /* purely spatially predicted macroblock */
530 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
531 pmv
[0][1][0] = pmv
[0][1][1] = pmv
[1][1][0] = pmv
[1][1][1] = 0;
536 /* mba_inc!=1: skipped macroblock */
537 mpeg3video_clearblock(slice
, 0, video
->blk_cnt
);
539 /* reset intra_dc predictors */
540 dc_dct_pred
[0] = dc_dct_pred
[1] = dc_dct_pred
[2] = 0;
542 /* reset motion vector predictors */
543 if(video
->pict_type
== P_TYPE
)
544 pmv
[0][0][0] = pmv
[0][0][1] = pmv
[1][0][0] = pmv
[1][0][1] = 0;
546 /* derive motion_type */
547 if(video
->pict_struct
== FRAME_PICTURE
)
548 motion_type
= MC_FRAME
;
551 motion_type
= MC_FIELD
;
552 /* predict from field of same parity */
553 mv_field_sel
[0][0] = mv_field_sel
[0][1] = (video
->pict_struct
== BOTTOM_FIELD
);
556 /* skipped I are spatial-only predicted, */
557 /* skipped P and B are temporal-only predicted */
558 stwtype
= (video
->pict_type
== I_TYPE
) ? 8 : 0;
561 mb_type
&= ~MB_INTRA
;
569 /* pixel coordinates of top left corner of current macroblock */
570 bx
= 16 * (macroblock_address
% video
->mb_width
);
571 by
= 16 * (macroblock_address
/ video
->mb_width
);
573 /* motion compensation */
574 if(!(mb_type
& MB_INTRA
))
575 mpeg3video_reconstruct(video
,
585 /* copy or add block data into picture */
586 for(comp
= 0; comp
< video
->blk_cnt
; comp
++)
588 if((cbp
| snr_cbp
) & (1 << (video
->blk_cnt
- 1 - comp
)))
592 IDCT_mmx(slice
->block
[comp
]);
595 mpeg3video_idct_conversion(slice
->block
[comp
]);
597 mpeg3video_addblock(slice
,
603 (mb_type
& MB_INTRA
) == 0);
607 /* advance to next macroblock */
608 macroblock_address
++;
615 void mpeg3_slice_loop(mpeg3_slice_t
*slice
)
617 mpeg3video_t
*video
= slice
->video
;
622 pthread_mutex_lock(&(slice
->input_lock
));
626 /* Get a buffer to decode */
628 pthread_mutex_lock(&(video
->slice_lock
));
629 if(slice
->buffer_step
> 0)
631 while(slice
->current_buffer
<= slice
->last_buffer
)
633 if(!video
->slice_buffers
[slice
->current_buffer
].done
&&
634 slice
->current_buffer
<= slice
->last_buffer
)
639 slice
->current_buffer
+= slice
->buffer_step
;
644 while(slice
->current_buffer
>= slice
->last_buffer
)
646 if(!video
->slice_buffers
[slice
->current_buffer
].done
&&
647 slice
->current_buffer
>= slice
->last_buffer
)
652 slice
->current_buffer
+= slice
->buffer_step
;
657 if(!result
&& slice
->current_buffer
>= 0 && slice
->current_buffer
< video
->total_slice_buffers
)
659 slice
->slice_buffer
= &(video
->slice_buffers
[slice
->current_buffer
]);
660 slice
->slice_buffer
->done
= 1;
661 pthread_mutex_unlock(&(video
->slice_lock
));
662 pthread_mutex_unlock(&(slice
->input_lock
));
663 mpeg3_decode_slice(slice
);
664 pthread_mutex_unlock(&(slice
->slice_buffer
->completion_lock
));
667 /* Finished with all */
669 pthread_mutex_unlock(&(slice
->completion_lock
));
670 pthread_mutex_unlock(&(video
->slice_lock
));
674 pthread_mutex_unlock(&(slice
->output_lock
));
678 int mpeg3_new_slice_decoder(void *video
, mpeg3_slice_t
*slice
)
681 pthread_mutexattr_t mutex_attr
;
683 slice
->video
= video
;
685 pthread_mutexattr_init(&mutex_attr
);
686 // pthread_mutexattr_setkind_np(&mutex_attr, PTHREAD_MUTEX_FAST_NP);
687 pthread_mutex_init(&(slice
->input_lock
), &mutex_attr
);
688 pthread_mutex_lock(&(slice
->input_lock
));
689 pthread_mutex_init(&(slice
->output_lock
), &mutex_attr
);
690 pthread_mutex_lock(&(slice
->output_lock
));
691 pthread_mutex_init(&(slice
->completion_lock
), &mutex_attr
);
692 pthread_mutex_lock(&(slice
->completion_lock
));
694 pthread_attr_init(&attr
);
695 pthread_create(&(slice
->tid
), &attr
, (void*)mpeg3_slice_loop
, slice
);
700 int mpeg3_delete_slice_decoder(mpeg3_slice_t
*slice
)
703 pthread_mutex_unlock(&(slice
->input_lock
));
704 pthread_join(slice
->tid
, 0);
705 pthread_mutex_destroy(&(slice
->input_lock
));
706 pthread_mutex_destroy(&(slice
->output_lock
));