r123: Merged HEAD and TEST. New stuff shall be committed to HEAD from now on.
[cinelerra_cv/mob.git] / libmpeg3 / video / slice.c
blob5d66e94d734780eb72bcef246dc0bb36ff01388f
1 #include "mpeg3private.h"
2 #include "mpeg3protos.h"
4 #include <pthread.h>
5 #include <stdlib.h>
7 #define CLIP(x) ((x) >= 0 ? ((x) < 255 ? (x) : 255) : 0)
9 static unsigned long long MMX_128 = 0x80008000800080LL;
11 int mpeg3_new_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
13 pthread_mutexattr_t mutex_attr;
15 slice_buffer->data = malloc(1024);
16 slice_buffer->buffer_size = 0;
17 slice_buffer->buffer_allocation = 1024;
18 slice_buffer->current_position = 0;
19 slice_buffer->bits_size = 0;
20 slice_buffer->bits = 0;
21 slice_buffer->done = 0;
22 pthread_mutexattr_init(&mutex_attr);
23 // pthread_mutexattr_setkind_np(&mutex_attr, PTHREAD_MUTEX_FAST_NP);
24 pthread_mutex_init(&(slice_buffer->completion_lock), &mutex_attr);
25 return 0;
28 int mpeg3_delete_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
30 free(slice_buffer->data);
31 pthread_mutex_destroy(&(slice_buffer->completion_lock));
32 return 0;
35 int mpeg3_expand_slice_buffer(mpeg3_slice_buffer_t *slice_buffer)
37 int i;
38 unsigned char *new_buffer = malloc(slice_buffer->buffer_allocation * 2);
39 for(i = 0; i < slice_buffer->buffer_size; i++)
40 new_buffer[i] = slice_buffer->data[i];
41 free(slice_buffer->data);
42 slice_buffer->data = new_buffer;
43 slice_buffer->buffer_allocation *= 2;
44 return 0;
47 /* limit coefficients to -2048..2047 */
49 /* move/add 8x8-Block from block[comp] to refframe */
51 static inline int mpeg3video_addblock(mpeg3_slice_t *slice,
52 mpeg3video_t *video,
53 int comp,
54 int bx,
55 int by,
56 int dct_type,
57 int addflag)
59 int cc, i, iincr;
60 unsigned char *rfp;
61 short *bp;
62 int spar = slice->sparse[comp];
63 /* color component index */
64 cc = (comp < 4) ? 0 : (comp & 1) + 1;
66 if(cc == 0)
68 /* luminance */
69 if(video->pict_struct == FRAME_PICTURE)
71 if(dct_type)
73 /* field DCT coding */
74 rfp = video->newframe[0] +
75 video->coded_picture_width * (by + ((comp & 2) >> 1)) + bx + ((comp & 1) << 3);
76 iincr = (video->coded_picture_width << 1);
78 else
80 /* frame DCT coding */
81 rfp = video->newframe[0] +
82 video->coded_picture_width * (by + ((comp & 2) << 2)) + bx + ((comp & 1) << 3);
83 iincr = video->coded_picture_width;
86 else
88 /* field picture */
89 rfp = video->newframe[0] +
90 (video->coded_picture_width << 1) * (by + ((comp & 2) << 2)) + bx + ((comp & 1) << 3);
91 iincr = (video->coded_picture_width << 1);
94 else
96 /* chrominance */
98 /* scale coordinates */
99 if(video->chroma_format != CHROMA444) bx >>= 1;
100 if(video->chroma_format == CHROMA420) by >>= 1;
101 if(video->pict_struct == FRAME_PICTURE)
103 if(dct_type && (video->chroma_format != CHROMA420))
105 /* field DCT coding */
106 rfp = video->newframe[cc]
107 + video->chrom_width * (by + ((comp & 2) >> 1)) + bx + (comp & 8);
108 iincr = (video->chrom_width << 1);
110 else
112 /* frame DCT coding */
113 rfp = video->newframe[cc]
114 + video->chrom_width * (by + ((comp & 2) << 2)) + bx + (comp & 8);
115 iincr = video->chrom_width;
118 else
120 /* field picture */
121 rfp = video->newframe[cc]
122 + (video->chrom_width << 1) * (by + ((comp & 2) << 2)) + bx + (comp & 8);
123 iincr = (video->chrom_width << 1);
127 bp = slice->block[comp];
129 if(addflag)
131 #ifdef HAVE_MMX
132 if(video->have_mmx)
134 if(spar)
136 __asm__ __volatile__(
137 "movq (%2), %%mm6\n" /* 4 blockvals */
138 "pxor %%mm4, %%mm4\n"
139 "punpcklwd %%mm6, %%mm6\n"
140 "punpcklwd %%mm6, %%mm6\n"
141 ".align 8\n"
142 "1:"
143 "movq (%1), %%mm0\n" /* 8 rindex1 */
144 "movq %%mm0, %%mm2\n"
145 "punpcklbw %%mm4, %%mm0\n"
146 "punpckhbw %%mm4, %%mm2\n"
147 "paddw %%mm6, %%mm0\n"
148 "paddw %%mm6, %%mm2\n"
150 "packuswb %%mm2, %%mm0\n"
151 "movq %%mm0, (%1)\n"
153 "leal (%1, %3), %1\n"
154 "loop 1b\n"
155 : /* scr dest */
156 : "c" (8),"r" (rfp), "r" (bp), "r" (iincr)
159 else
161 __asm__ __volatile__(
162 "pxor %%mm4, %%mm4\n"
164 ".align 8\n"
165 "1:"
166 "movq (%2), %%mm0\n" /* 8 rfp 0 1 2 3 4 5 6 7*/
167 "movq (%1), %%mm6\n" /* 4 blockvals 0 1 2 3 */
169 "movq %%mm0, %%mm2\n"
170 "movq 8(%1), %%mm5\n" /* 4 blockvals 0 1 2 3 */
171 "punpcklbw %%mm4, %%mm0\n" /* 0 2 4 6 */
172 "punpckhbw %%mm4, %%mm2\n" /* 1 3 5 7 */
174 "paddw %%mm6, %%mm0\n"
175 "paddw %%mm5, %%mm2\n"
176 "packuswb %%mm2, %%mm0\n"
178 "addl $16, %1\n"
179 "movq %%mm0, (%2)\n"
181 "leal (%2,%3), %2\n"
182 "loop 1b\n"
183 : /* scr dest */
184 : "c" (8),"r" (bp), "r" (rfp), "r" (iincr)
188 else
189 #endif
190 for(i = 0; i < 8; i++)
192 rfp[0] = CLIP(bp[0] + rfp[0]);
193 rfp[1] = CLIP(bp[1] + rfp[1]);
194 rfp[2] = CLIP(bp[2] + rfp[2]);
195 rfp[3] = CLIP(bp[3] + rfp[3]);
196 rfp[4] = CLIP(bp[4] + rfp[4]);
197 rfp[5] = CLIP(bp[5] + rfp[5]);
198 rfp[6] = CLIP(bp[6] + rfp[6]);
199 rfp[7] = CLIP(bp[7] + rfp[7]);
200 rfp += iincr;
201 bp += 8;
204 else
206 #ifdef HAVE_MMX
207 if(video->have_mmx)
209 if(spar)
211 __asm__ __volatile__(
212 "movd (%2), %%mm0\n" /* " 0 0 0 v1" */
213 "punpcklwd %%mm0, %%mm0\n" /* " 0 0 v1 v1" */
214 "punpcklwd %%mm0, %%mm0\n"
215 "paddw MMX_128, %%mm0\n"
216 "packuswb %%mm0, %%mm0\n"
217 "leal (%0,%1,2), %%eax\n"
219 "movq %%mm0, (%0, %1)\n"
220 "movq %%mm0, (%%eax)\n"
221 "leal (%%eax,%1,2), %0\n"
222 "movq %%mm0, (%%eax, %1)\n"
224 "movq %%mm0, (%0)\n"
225 "leal (%0,%1,2), %%eax\n"
226 "movq %%mm0, (%0, %1)\n"
228 "movq %%mm0, (%%eax)\n"
229 "movq %%mm0, (%%eax, %1)\n"
231 : "D" (rfp), "c" (iincr), "b" (bp)
232 : "eax");
234 else
236 __asm__ __volatile__(
237 "movq MMX_128,%%mm4\n"
238 ".align 8\n"
239 "1:"
240 "movq (%1), %%mm0\n"
241 "movq 8(%1), %%mm1\n"
242 "paddw %%mm4, %%mm0\n"
244 "movq 16(%1), %%mm2\n"
245 "paddw %%mm4, %%mm1\n"
247 "movq 24(%1), %%mm3\n"
248 "paddw %%mm4, %%mm2\n"
250 "packuswb %%mm1, %%mm0\n"
251 "paddw %%mm4, %%mm3\n"
253 "addl $32, %1\n"
254 "packuswb %%mm3, %%mm2\n"
256 "movq %%mm0, (%2)\n"
258 "movq %%mm2, (%2,%3)\n"
260 "leal (%2,%3,2), %2\n"
261 "loop 1b\n"
263 : "c" (4), "r" (bp), "r" (rfp), "r" (iincr)
267 else
268 #endif
269 for(i = 0; i < 8; i++)
271 rfp[0] = CLIP(bp[0] + 128);
272 rfp[1] = CLIP(bp[1] + 128);
273 rfp[2] = CLIP(bp[2] + 128);
274 rfp[3] = CLIP(bp[3] + 128);
275 rfp[4] = CLIP(bp[4] + 128);
276 rfp[5] = CLIP(bp[5] + 128);
277 rfp[6] = CLIP(bp[6] + 128);
278 rfp[7] = CLIP(bp[7] + 128);
279 rfp+= iincr;
280 bp += 8;
283 return 0;
286 int mpeg3_decode_slice(mpeg3_slice_t *slice)
288 mpeg3video_t *video = slice->video;
289 int comp;
290 int mb_type, cbp, motion_type = 0, dct_type;
291 int macroblock_address, mba_inc, mba_max;
292 int slice_vert_pos_ext;
293 unsigned int code;
294 int bx, by;
295 int dc_dct_pred[3];
296 int mv_count, mv_format, mvscale;
297 int pmv[2][2][2], mv_field_sel[2][2];
298 int dmv, dmvector[2];
299 int qs;
300 int stwtype, stwclass;
301 int snr_cbp;
302 int i;
303 mpeg3_slice_buffer_t *slice_buffer = slice->slice_buffer;
305 /* number of macroblocks per picture */
306 mba_max = video->mb_width * video->mb_height;
308 /* field picture has half as many macroblocks as frame */
309 if(video->pict_struct != FRAME_PICTURE)
310 mba_max >>= 1;
312 /* macroblock address */
313 macroblock_address = 0;
314 /* first macroblock in slice is not skipped */
315 mba_inc = 0;
316 slice->fault = 0;
318 code = mpeg3slice_getbits(slice_buffer, 32);
319 /* decode slice header (may change quant_scale) */
320 slice_vert_pos_ext = mpeg3video_getslicehdr(slice, video);
322 /* reset all DC coefficient and motion vector predictors */
323 dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
324 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
325 pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
327 for(i = 0;
328 slice_buffer->current_position < slice_buffer->buffer_size;
329 i++)
331 if(mba_inc == 0)
333 /* Done */
334 if(!mpeg3slice_showbits(slice_buffer, 23)) return 0;
335 /* decode macroblock address increment */
336 mba_inc = mpeg3video_get_macroblock_address(slice);
338 if(slice->fault) return 1;
340 if(i == 0)
342 /* Get the macroblock_address */
343 macroblock_address = ((slice_vert_pos_ext << 7) + (code & 255) - 1) * video->mb_width + mba_inc - 1;
344 /* first macroblock in slice: not skipped */
345 mba_inc = 1;
349 if(slice->fault) return 1;
351 if(macroblock_address >= mba_max)
353 /* mba_inc points beyond picture dimensions */
354 /*fprintf(stderr, "mpeg3_decode_slice: too many macroblocks in picture\n"); */
355 return 1;
358 /* not skipped */
359 if(mba_inc == 1)
361 mpeg3video_macroblock_modes(slice,
362 video,
363 &mb_type,
364 &stwtype,
365 &stwclass,
366 &motion_type,
367 &mv_count,
368 &mv_format,
369 &dmv,
370 &mvscale,
371 &dct_type);
373 if(slice->fault) return 1;
375 if(mb_type & MB_QUANT)
377 qs = mpeg3slice_getbits(slice_buffer, 5);
379 if(video->mpeg2)
380 slice->quant_scale = video->qscale_type ? mpeg3_non_linear_mquant_table[qs] : (qs << 1);
381 else
382 slice->quant_scale = qs;
384 if(video->scalable_mode == SC_DP)
385 /* make sure quant_scale is valid */
386 slice->quant_scale = slice->quant_scale;
389 /* motion vectors */
392 /* decode forward motion vectors */
393 if((mb_type & MB_FORWARD) || ((mb_type & MB_INTRA) && video->conceal_mv))
395 if(video->mpeg2)
396 mpeg3video_motion_vectors(slice,
397 video,
398 pmv,
399 dmvector,
400 mv_field_sel,
402 mv_count,
403 mv_format,
404 video->h_forw_r_size,
405 video->v_forw_r_size,
406 dmv,
407 mvscale);
408 else
409 mpeg3video_motion_vector(slice,
410 video,
411 pmv[0][0],
412 dmvector,
413 video->forw_r_size,
414 video->forw_r_size,
417 video->full_forw);
419 if(slice->fault) return 1;
421 /* decode backward motion vectors */
422 if(mb_type & MB_BACKWARD)
424 if(video->mpeg2)
425 mpeg3video_motion_vectors(slice,
426 video,
427 pmv,
428 dmvector,
429 mv_field_sel,
431 mv_count,
432 mv_format,
433 video->h_back_r_size,
434 video->v_back_r_size,
436 mvscale);
437 else
438 mpeg3video_motion_vector(slice,
439 video,
440 pmv[0][1],
441 dmvector,
442 video->back_r_size,
443 video->back_r_size,
446 video->full_back);
449 if(slice->fault) return 1;
451 /* remove marker_bit */
452 if((mb_type & MB_INTRA) && video->conceal_mv)
453 mpeg3slice_flushbit(slice_buffer);
455 /* macroblock_pattern */
456 if(mb_type & MB_PATTERN)
458 cbp = mpeg3video_get_cbp(slice);
459 if(video->chroma_format == CHROMA422)
461 /* coded_block_pattern_1 */
462 cbp = (cbp << 2) | mpeg3slice_getbits2(slice_buffer);
464 else
465 if(video->chroma_format == CHROMA444)
467 /* coded_block_pattern_2 */
468 cbp = (cbp << 6) | mpeg3slice_getbits(slice_buffer, 6);
471 else
472 cbp = (mb_type & MB_INTRA) ? ((1 << video->blk_cnt) - 1) : 0;
474 if(slice->fault) return 1;
475 /* decode blocks */
476 mpeg3video_clearblock(slice, 0, video->blk_cnt);
477 for(comp = 0; comp < video->blk_cnt; comp++)
479 if(cbp & (1 << (video->blk_cnt - comp - 1)))
481 if(mb_type & MB_INTRA)
483 if(video->mpeg2)
484 mpeg3video_getmpg2intrablock(slice, video, comp, dc_dct_pred);
485 else
486 mpeg3video_getintrablock(slice, video, comp, dc_dct_pred);
488 else
490 if(video->mpeg2)
491 mpeg3video_getmpg2interblock(slice, video, comp);
492 else
493 mpeg3video_getinterblock(slice, video, comp);
495 if(slice->fault) return 1;
499 /* reset intra_dc predictors */
500 if(!(mb_type & MB_INTRA))
501 dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
503 /* reset motion vector predictors */
504 if((mb_type & MB_INTRA) && !video->conceal_mv)
506 /* intra mb without concealment motion vectors */
507 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
508 pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
511 if((video->pict_type == P_TYPE) && !(mb_type & (MB_FORWARD | MB_INTRA)))
513 /* non-intra mb without forward mv in a P picture */
514 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
516 /* derive motion_type */
517 if(video->pict_struct == FRAME_PICTURE)
518 motion_type = MC_FRAME;
519 else
521 motion_type = MC_FIELD;
522 /* predict from field of same parity */
523 mv_field_sel[0][0] = (video->pict_struct == BOTTOM_FIELD);
527 if(stwclass == 4)
529 /* purely spatially predicted macroblock */
530 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
531 pmv[0][1][0] = pmv[0][1][1] = pmv[1][1][0] = pmv[1][1][1] = 0;
534 else
536 /* mba_inc!=1: skipped macroblock */
537 mpeg3video_clearblock(slice, 0, video->blk_cnt);
539 /* reset intra_dc predictors */
540 dc_dct_pred[0] = dc_dct_pred[1] = dc_dct_pred[2] = 0;
542 /* reset motion vector predictors */
543 if(video->pict_type == P_TYPE)
544 pmv[0][0][0] = pmv[0][0][1] = pmv[1][0][0] = pmv[1][0][1] = 0;
546 /* derive motion_type */
547 if(video->pict_struct == FRAME_PICTURE)
548 motion_type = MC_FRAME;
549 else
551 motion_type = MC_FIELD;
552 /* predict from field of same parity */
553 mv_field_sel[0][0] = mv_field_sel[0][1] = (video->pict_struct == BOTTOM_FIELD);
556 /* skipped I are spatial-only predicted, */
557 /* skipped P and B are temporal-only predicted */
558 stwtype = (video->pict_type == I_TYPE) ? 8 : 0;
560 /* clear MB_INTRA */
561 mb_type &= ~MB_INTRA;
563 /* no block data */
564 cbp = 0;
567 snr_cbp = 0;
569 /* pixel coordinates of top left corner of current macroblock */
570 bx = 16 * (macroblock_address % video->mb_width);
571 by = 16 * (macroblock_address / video->mb_width);
573 /* motion compensation */
574 if(!(mb_type & MB_INTRA))
575 mpeg3video_reconstruct(video,
576 bx,
577 by,
578 mb_type,
579 motion_type,
580 pmv,
581 mv_field_sel,
582 dmvector,
583 stwtype);
585 /* copy or add block data into picture */
586 for(comp = 0; comp < video->blk_cnt; comp++)
588 if((cbp | snr_cbp) & (1 << (video->blk_cnt - 1 - comp)))
590 #ifdef HAVE_MMX
591 if(video->have_mmx)
592 IDCT_mmx(slice->block[comp]);
593 else
594 #endif
595 mpeg3video_idct_conversion(slice->block[comp]);
597 mpeg3video_addblock(slice,
598 video,
599 comp,
600 bx,
601 by,
602 dct_type,
603 (mb_type & MB_INTRA) == 0);
607 /* advance to next macroblock */
608 macroblock_address++;
609 mba_inc--;
612 return 0;
615 void mpeg3_slice_loop(mpeg3_slice_t *slice)
617 mpeg3video_t *video = slice->video;
618 int result = 1;
620 while(!slice->done)
622 pthread_mutex_lock(&(slice->input_lock));
624 if(!slice->done)
626 /* Get a buffer to decode */
627 result = 1;
628 pthread_mutex_lock(&(video->slice_lock));
629 if(slice->buffer_step > 0)
631 while(slice->current_buffer <= slice->last_buffer)
633 if(!video->slice_buffers[slice->current_buffer].done &&
634 slice->current_buffer <= slice->last_buffer)
636 result = 0;
637 break;
639 slice->current_buffer += slice->buffer_step;
642 else
644 while(slice->current_buffer >= slice->last_buffer)
646 if(!video->slice_buffers[slice->current_buffer].done &&
647 slice->current_buffer >= slice->last_buffer)
649 result = 0;
650 break;
652 slice->current_buffer += slice->buffer_step;
656 /* Got one */
657 if(!result && slice->current_buffer >= 0 && slice->current_buffer < video->total_slice_buffers)
659 slice->slice_buffer = &(video->slice_buffers[slice->current_buffer]);
660 slice->slice_buffer->done = 1;
661 pthread_mutex_unlock(&(video->slice_lock));
662 pthread_mutex_unlock(&(slice->input_lock));
663 mpeg3_decode_slice(slice);
664 pthread_mutex_unlock(&(slice->slice_buffer->completion_lock));
666 else
667 /* Finished with all */
669 pthread_mutex_unlock(&(slice->completion_lock));
670 pthread_mutex_unlock(&(video->slice_lock));
674 pthread_mutex_unlock(&(slice->output_lock));
678 int mpeg3_new_slice_decoder(void *video, mpeg3_slice_t *slice)
680 pthread_attr_t attr;
681 pthread_mutexattr_t mutex_attr;
683 slice->video = video;
684 slice->done = 0;
685 pthread_mutexattr_init(&mutex_attr);
686 // pthread_mutexattr_setkind_np(&mutex_attr, PTHREAD_MUTEX_FAST_NP);
687 pthread_mutex_init(&(slice->input_lock), &mutex_attr);
688 pthread_mutex_lock(&(slice->input_lock));
689 pthread_mutex_init(&(slice->output_lock), &mutex_attr);
690 pthread_mutex_lock(&(slice->output_lock));
691 pthread_mutex_init(&(slice->completion_lock), &mutex_attr);
692 pthread_mutex_lock(&(slice->completion_lock));
694 pthread_attr_init(&attr);
695 pthread_create(&(slice->tid), &attr, (void*)mpeg3_slice_loop, slice);
697 return 0;
700 int mpeg3_delete_slice_decoder(mpeg3_slice_t *slice)
702 slice->done = 1;
703 pthread_mutex_unlock(&(slice->input_lock));
704 pthread_join(slice->tid, 0);
705 pthread_mutex_destroy(&(slice->input_lock));
706 pthread_mutex_destroy(&(slice->output_lock));
707 return 0;