hvirtual/quicktime/libavcodec/mpegvideo.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  18  *
  19  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  20  */
  21
  22 #include <ctype.h>
  23 #include "avcodec.h"
  24 #include "dsputil.h"
  25 #include "mpegvideo.h"
  26 #include "simple_idct.h"
  27
  28 #ifdef USE_FASTMEMCPY
  29 #include "fastmemcpy.h"
  30 #endif
  31
  32 //#undef NDEBUG
  33 //#include <assert.h>
  34
  35 static void encode_picture(MpegEncContext *s, int picture_number);
  36 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
  37                                    DCTELEM *block, int n, int qscale);
  38 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
  39                                    DCTELEM *block, int n, int qscale);
  40 static void dct_unquantize_h263_c(MpegEncContext *s,
  41                                   DCTELEM *block, int n, int qscale);
  42 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
  43 static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale, int *overflow);
  44
  45 void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
  46 static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
  47                                     int src_x, int src_y, int w, int h);
  48
  49
  50 /* enable all paranoid tests for rounding, overflows, etc... */
  51 //#define PARANOID
  52
  53 //#define DEBUG
  54
  55
  56 /* for jpeg fast DCT */
  57 #define CONST_BITS 14
  58
  59 static const uint16_t aanscales[64] = {
  60     /* precomputed values scaled up by 14 bits */
  61     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  62     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
  63     21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
  64     19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
  65     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
  66     12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
  67     8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
  68     4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
  69 };
  70
  71 /* Input permutation for the simple_idct_mmx */
  72 static const uint8_t simple_mmx_permutation[64]={
  73         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
  74         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
  75         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
  76         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
  77         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
  78         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
  79         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
  80         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
  81 };
  82
  83 static const uint8_t h263_chroma_roundtab[16] = {
  84     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
  85 };
  86
  87 static UINT16 (*default_mv_penalty)[MAX_MV*2+1]=NULL;
  88 static UINT8 default_fcode_tab[MAX_MV*2+1];
  89
  90 /* default motion estimation */
  91 int motion_estimation_method = ME_EPZS;
  92
  93 static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64],
  94                            const UINT16 *quant_matrix, int bias, int qmin, int qmax)
  95 {
  96     int qscale;
  97
  98     for(qscale=qmin; qscale<=qmax; qscale++){
  99         int i;
 100         if (s->fdct == ff_jpeg_fdct_islow) {
 101             for(i=0;i<64;i++) {
 102                 const int j= s->idct_permutation[i];
 103                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 104                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 105                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 106                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 107
 108                 qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) /
 109                                 (qscale * quant_matrix[j]));
 110             }
 111         } else if (s->fdct == fdct_ifast) {
 112             for(i=0;i<64;i++) {
 113                 const int j= s->idct_permutation[i];
 114                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
 115                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
 116                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
 117                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
 118
 119                 qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) /
 120                                 (aanscales[i] * qscale * quant_matrix[j]));
 121             }
 122         } else {
 123             for(i=0;i<64;i++) {
 124                 const int j= s->idct_permutation[i];
 125                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
 126                    So 16           <= qscale * quant_matrix[i]             <= 7905
 127                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
 128                    so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
 129                 */
 130                 qmat  [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
 131                 qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]);
 132
 133                 if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1;
 134                 qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]);
 135             }
 136         }
 137     }
 138 }
 139 // move into common.c perhaps
 140 #define CHECKED_ALLOCZ(p, size)\
 141 {\
 142     p= av_mallocz(size);\
 143     if(p==NULL){\
 144         perror("malloc");\
 145         goto fail;\
 146     }\
 147 }
 148
 149 void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){
 150     int i;
 151     int end;
 152
 153     st->scantable= src_scantable;
 154
 155     for(i=0; i<64; i++){
 156         int j;
 157         j = src_scantable[i];
 158         st->permutated[i] = s->idct_permutation[j];
 159 #ifdef ARCH_POWERPC
 160         st->inverse[j] = i;
 161 #endif
 162     }
 163
 164     end=-1;
 165     for(i=0; i<64; i++){
 166         int j;
 167         j = st->permutated[i];
 168         if(j>end) end=j;
 169         st->raster_end[i]= end;
 170     }
 171 }
 172
 173 /* XXX: those functions should be suppressed ASAP when all IDCTs are
 174  converted */
 175 // *FIXME* this is ugly hack using local static
 176 static void (*ff_put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 177 static void (*ff_add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 178 static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
 179 {
 180     j_rev_dct (block);
 181     ff_put_pixels_clamped(block, dest, line_size);
 182 }
 183 static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
 184 {
 185     j_rev_dct (block);
 186     ff_add_pixels_clamped(block, dest, line_size);
 187 }
 188
 189 /* init common dct for both encoder and decoder */
 190 int DCT_common_init(MpegEncContext *s)
 191 {
 192     int i;
 193
 194     ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
 195     ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
 196
 197     s->dct_unquantize_h263 = dct_unquantize_h263_c;
 198     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
 199     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
 200     s->dct_quantize= dct_quantize_c;
 201
 202     if(s->avctx->dct_algo==FF_DCT_FASTINT)
 203         s->fdct = fdct_ifast;
 204     else
 205         s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
 206
 207     if(s->avctx->idct_algo==FF_IDCT_INT){
 208         s->idct_put= ff_jref_idct_put;
 209         s->idct_add= ff_jref_idct_add;
 210         s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
 211     }else{ //accurate/default
 212         s->idct_put= simple_idct_put;
 213         s->idct_add= simple_idct_add;
 214         s->idct_permutation_type= FF_NO_IDCT_PERM;
 215     }
 216
 217 #ifdef HAVE_MMX
 218     MPV_common_init_mmx(s);
 219 #endif
 220 #ifdef ARCH_ALPHA
 221     MPV_common_init_axp(s);
 222 #endif
 223 #ifdef HAVE_MLIB
 224     MPV_common_init_mlib(s);
 225 #endif
 226 #ifdef HAVE_MMI
 227     MPV_common_init_mmi(s);
 228 #endif
 229 #ifdef ARCH_ARMV4L
 230     MPV_common_init_armv4l(s);
 231 #endif
 232 #ifdef ARCH_POWERPC
 233     MPV_common_init_ppc(s);
 234 #endif
 235
 236     switch(s->idct_permutation_type){
 237     case FF_NO_IDCT_PERM:
 238         for(i=0; i<64; i++)
 239             s->idct_permutation[i]= i;
 240         break;
 241     case FF_LIBMPEG2_IDCT_PERM:
 242         for(i=0; i<64; i++)
 243             s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
 244         break;
 245     case FF_SIMPLE_IDCT_PERM:
 246         for(i=0; i<64; i++)
 247             s->idct_permutation[i]= simple_mmx_permutation[i];
 248         break;
 249     case FF_TRANSPOSE_IDCT_PERM:
 250         for(i=0; i<64; i++)
 251             s->idct_permutation[i]= ((i&7)<<3) | (i>>3);
 252         break;
 253     default:
 254         fprintf(stderr, "Internal error, IDCT permutation not set\n");
 255         return -1;
 256     }
 257
 258
 259     /* load & permutate scantables
 260        note: only wmv uses differnt ones
 261     */
 262     ff_init_scantable(s, &s->inter_scantable  , ff_zigzag_direct);
 263     ff_init_scantable(s, &s->intra_scantable  , ff_zigzag_direct);
 264     ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan);
 265     ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan);
 266
 267     return 0;
 268 }
 269
 270 /**
 271  * allocates a Picture
 272  * The pixels are allocated/set by calling get_buffer() if shared=0
 273  */
 274 static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
 275
 276     if(shared){
 277         assert(pic->data[0]);
 278         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
 279         pic->type= FF_BUFFER_TYPE_SHARED;
 280     }else{
 281         int r;
 282
 283         assert(!pic->data[0]);
 284
 285         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
 286
 287         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
 288             fprintf(stderr, "get_buffer() failed (%d %d %d %X)\n", r, pic->age, pic->type, (int)pic->data[0]);
 289             return -1;
 290         }
 291
 292         if(s->linesize && (s->linesize != pic->linesize[0] || s->uvlinesize != pic->linesize[1])){
 293             fprintf(stderr, "get_buffer() failed (stride changed)\n");
 294             return -1;
 295         }
 296
 297         if(pic->linesize[1] != pic->linesize[2]){
 298             fprintf(stderr, "get_buffer() failed (uv stride missmatch)\n");
 299             return -1;
 300         }
 301
 302         s->linesize  = pic->linesize[0];
 303         s->uvlinesize= pic->linesize[1];
 304     }
 305
 306     if(pic->qscale_table==NULL){
 307         if (s->encoding) {
 308             CHECKED_ALLOCZ(pic->mb_var   , s->mb_num * sizeof(INT16))
 309             CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(INT16))
 310             CHECKED_ALLOCZ(pic->mb_mean  , s->mb_num * sizeof(INT8))
 311         }
 312
 313         CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(UINT8)+1) //the +1 is for the slice end check
 314         CHECKED_ALLOCZ(pic->qscale_table , s->mb_num * sizeof(UINT8))
 315         pic->qstride= s->mb_width;
 316     }
 317
 318     return 0;
 319 fail: //for the CHECKED_ALLOCZ macro
 320     return -1;
 321 }
 322
 323 /**
 324  * deallocates a picture
 325  */
 326 static void free_picture(MpegEncContext *s, Picture *pic){
 327     int i;
 328
 329     if(pic->data[0] && pic->type!=FF_BUFFER_TYPE_SHARED){
 330         s->avctx->release_buffer(s->avctx, (AVFrame*)pic);
 331     }
 332
 333     av_freep(&pic->mb_var);
 334     av_freep(&pic->mc_mb_var);
 335     av_freep(&pic->mb_mean);
 336     av_freep(&pic->mbskip_table);
 337     av_freep(&pic->qscale_table);
 338
 339     if(pic->type == FF_BUFFER_TYPE_INTERNAL){
 340         for(i=0; i<4; i++){
 341             av_freep(&pic->base[i]);
 342             pic->data[i]= NULL;
 343         }
 344         av_freep(&pic->opaque);
 345         pic->type= 0;
 346     }else if(pic->type == FF_BUFFER_TYPE_SHARED){
 347         for(i=0; i<4; i++){
 348             pic->base[i]=
 349             pic->data[i]= NULL;
 350         }
 351         pic->type= 0;
 352     }
 353 }
 354
 355 /* init common structure for both encoder and decoder */
 356 int MPV_common_init(MpegEncContext *s)
 357 {
 358     int y_size, c_size, yc_size, i;
 359
 360     dsputil_init(&s->dsp, s->avctx->dsp_mask);
 361     DCT_common_init(s);
 362
 363     s->flags= s->avctx->flags;
 364
 365     s->mb_width  = (s->width  + 15) / 16;
 366     s->mb_height = (s->height + 15) / 16;
 367
 368     /* set default edge pos, will be overriden in decode_header if needed */
 369     s->h_edge_pos= s->mb_width*16;
 370     s->v_edge_pos= s->mb_height*16;
 371
 372     s->mb_num = s->mb_width * s->mb_height;
 373
 374     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 375     c_size = (s->mb_width + 2) * (s->mb_height + 2);
 376     yc_size = y_size + 2 * c_size;
 377
 378     /* convert fourcc to upper case */
 379     s->avctx->fourcc=   toupper( s->avctx->fourcc     &0xFF)
 380                      + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 )
 381                      + (toupper((s->avctx->fourcc>>16)&0xFF)<<16)
 382                      + (toupper((s->avctx->fourcc>>24)&0xFF)<<24);
 383
 384     CHECKED_ALLOCZ(s->edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
 385
 386     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
 387
 388     if (s->encoding) {
 389         int mv_table_size= (s->mb_width+2)*(s->mb_height+2);
 390
 391         /* Allocate MV tables */
 392         CHECKED_ALLOCZ(s->p_mv_table            , mv_table_size * 2 * sizeof(INT16))
 393         CHECKED_ALLOCZ(s->b_forw_mv_table       , mv_table_size * 2 * sizeof(INT16))
 394         CHECKED_ALLOCZ(s->b_back_mv_table       , mv_table_size * 2 * sizeof(INT16))
 395         CHECKED_ALLOCZ(s->b_bidir_forw_mv_table , mv_table_size * 2 * sizeof(INT16))
 396         CHECKED_ALLOCZ(s->b_bidir_back_mv_table , mv_table_size * 2 * sizeof(INT16))
 397         CHECKED_ALLOCZ(s->b_direct_forw_mv_table, mv_table_size * 2 * sizeof(INT16))
 398         CHECKED_ALLOCZ(s->b_direct_back_mv_table, mv_table_size * 2 * sizeof(INT16))
 399         CHECKED_ALLOCZ(s->b_direct_mv_table     , mv_table_size * 2 * sizeof(INT16))
 400
 401         //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
 402         CHECKED_ALLOCZ(s->me_scratchpad,  s->width*2*16*3*sizeof(uint8_t))
 403
 404         CHECKED_ALLOCZ(s->me_map      , ME_MAP_SIZE*sizeof(uint32_t))
 405         CHECKED_ALLOCZ(s->me_score_map, ME_MAP_SIZE*sizeof(uint16_t))
 406
 407         if(s->codec_id==CODEC_ID_MPEG4){
 408             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
 409             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
 410         }
 411
 412         if(s->msmpeg4_version){
 413             CHECKED_ALLOCZ(s->ac_stats, 2*2*(MAX_LEVEL+1)*(MAX_RUN+1)*2*sizeof(int));
 414         }
 415         CHECKED_ALLOCZ(s->avctx->stats_out, 256);
 416     }
 417
 418     CHECKED_ALLOCZ(s->error_status_table, s->mb_num*sizeof(UINT8))
 419
 420     if (s->out_format == FMT_H263 || s->encoding) {
 421         int size;
 422         /* Allocate MB type table */
 423         CHECKED_ALLOCZ(s->mb_type  , s->mb_num * sizeof(UINT8))
 424
 425         /* MV prediction */
 426         size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
 427         CHECKED_ALLOCZ(s->motion_val, size * 2 * sizeof(INT16));
 428     }
 429
 430     if(s->codec_id==CODEC_ID_MPEG4){
 431         /* interlaced direct mode decoding tables */
 432         CHECKED_ALLOCZ(s->field_mv_table, s->mb_num*2*2 * sizeof(INT16))
 433         CHECKED_ALLOCZ(s->field_select_table, s->mb_num*2* sizeof(INT8))
 434     }
 435     /* 4mv b frame decoding table */
 436     //note this is needed for h263 without b frames too (segfault on damaged streams otherwise)
 437     CHECKED_ALLOCZ(s->co_located_type_table, s->mb_num * sizeof(UINT8))
 438     if (s->out_format == FMT_H263) {
 439         /* ac values */
 440         CHECKED_ALLOCZ(s->ac_val[0], yc_size * sizeof(INT16) * 16);
 441         s->ac_val[1] = s->ac_val[0] + y_size;
 442         s->ac_val[2] = s->ac_val[1] + c_size;
 443
 444         /* cbp values */
 445         CHECKED_ALLOCZ(s->coded_block, y_size);
 446
 447         /* divx501 bitstream reorder buffer */
 448         CHECKED_ALLOCZ(s->bitstream_buffer, BITSTREAM_BUFFER_SIZE);
 449
 450         /* cbp, ac_pred, pred_dir */
 451         CHECKED_ALLOCZ(s->cbp_table  , s->mb_num * sizeof(UINT8))
 452         CHECKED_ALLOCZ(s->pred_dir_table, s->mb_num * sizeof(UINT8))
 453     }
 454
 455     if (s->h263_pred || s->h263_plus || !s->encoding) {
 456         /* dc values */
 457         //MN: we need these for error resilience of intra-frames
 458         CHECKED_ALLOCZ(s->dc_val[0], yc_size * sizeof(INT16));
 459         s->dc_val[1] = s->dc_val[0] + y_size;
 460         s->dc_val[2] = s->dc_val[1] + c_size;
 461         for(i=0;i<yc_size;i++)
 462             s->dc_val[0][i] = 1024;
 463     }
 464
 465     /* which mb is a intra block */
 466     CHECKED_ALLOCZ(s->mbintra_table, s->mb_num);
 467     memset(s->mbintra_table, 1, s->mb_num);
 468
 469     /* default structure is frame */
 470     s->picture_structure = PICT_FRAME;
 471
 472     /* init macroblock skip table */
 473     CHECKED_ALLOCZ(s->mbskip_table, s->mb_num+1);
 474     //Note the +1 is for a quicker mpeg4 slice_end detection
 475
 476     s->block= s->blocks[0];
 477
 478     s->parse_context.state= -1;
 479
 480     s->context_initialized = 1;
 481     return 0;
 482  fail:
 483     MPV_common_end(s);
 484     return -1;
 485 }
 486
 487
 488 //extern int sads;
 489
 490 /* init common structure for both encoder and decoder */
 491 void MPV_common_end(MpegEncContext *s)
 492 {
 493     int i;
 494
 495     av_freep(&s->mb_type);
 496     av_freep(&s->p_mv_table);
 497     av_freep(&s->b_forw_mv_table);
 498     av_freep(&s->b_back_mv_table);
 499     av_freep(&s->b_bidir_forw_mv_table);
 500     av_freep(&s->b_bidir_back_mv_table);
 501     av_freep(&s->b_direct_forw_mv_table);
 502     av_freep(&s->b_direct_back_mv_table);
 503     av_freep(&s->b_direct_mv_table);
 504     av_freep(&s->motion_val);
 505     av_freep(&s->dc_val[0]);
 506     av_freep(&s->ac_val[0]);
 507     av_freep(&s->coded_block);
 508     av_freep(&s->mbintra_table);
 509     av_freep(&s->cbp_table);
 510     av_freep(&s->pred_dir_table);
 511     av_freep(&s->me_scratchpad);
 512     av_freep(&s->me_map);
 513     av_freep(&s->me_score_map);
 514
 515     av_freep(&s->mbskip_table);
 516     av_freep(&s->bitstream_buffer);
 517     av_freep(&s->tex_pb_buffer);
 518     av_freep(&s->pb2_buffer);
 519     av_freep(&s->edge_emu_buffer);
 520     av_freep(&s->co_located_type_table);
 521     av_freep(&s->field_mv_table);
 522     av_freep(&s->field_select_table);
 523     av_freep(&s->avctx->stats_out);
 524     av_freep(&s->ac_stats);
 525     av_freep(&s->error_status_table);
 526
 527     for(i=0; i<MAX_PICTURE_COUNT; i++){
 528         free_picture(s, &s->picture[i]);
 529     }
 530     s->context_initialized = 0;
 531 }
 532
 533 /* init video encoder */
 534 int MPV_encode_init(AVCodecContext *avctx)
 535 {
 536     MpegEncContext *s = avctx->priv_data;
 537     int i;
 538
 539     avctx->pix_fmt = PIX_FMT_YUV420P;
 540
 541     s->bit_rate = avctx->bit_rate;
 542     s->bit_rate_tolerance = avctx->bit_rate_tolerance;
 543     s->frame_rate = avctx->frame_rate;
 544     s->width = avctx->width;
 545     s->height = avctx->height;
 546     if(avctx->gop_size > 600){
 547         fprintf(stderr, "Warning keyframe interval too large! reducing it ...\n");
 548         avctx->gop_size=600;
 549     }
 550     s->gop_size = avctx->gop_size;
 551     s->rtp_mode = avctx->rtp_mode;
 552     s->rtp_payload_size = avctx->rtp_payload_size;
 553     if (avctx->rtp_callback)
 554         s->rtp_callback = avctx->rtp_callback;
 555     s->qmin= avctx->qmin;
 556     s->qmax= avctx->qmax;
 557     s->max_qdiff= avctx->max_qdiff;
 558     s->qcompress= avctx->qcompress;
 559     s->qblur= avctx->qblur;
 560     s->avctx = avctx;
 561     s->flags= avctx->flags;
 562     s->max_b_frames= avctx->max_b_frames;
 563     s->b_frame_strategy= avctx->b_frame_strategy;
 564     s->codec_id= avctx->codec->id;
 565     s->luma_elim_threshold  = avctx->luma_elim_threshold;
 566     s->chroma_elim_threshold= avctx->chroma_elim_threshold;
 567     s->strict_std_compliance= avctx->strict_std_compliance;
 568     s->data_partitioning= avctx->flags & CODEC_FLAG_PART;
 569     s->mpeg_quant= avctx->mpeg_quant;
 570
 571     if (s->gop_size <= 1) {
 572         s->intra_only = 1;
 573         s->gop_size = 12;
 574     } else {
 575         s->intra_only = 0;
 576     }
 577
 578     /* ME algorithm */
 579     if (avctx->me_method == 0)
 580         /* For compatibility */
 581         s->me_method = motion_estimation_method;
 582     else
 583         s->me_method = avctx->me_method;
 584
 585     /* Fixed QSCALE */
 586     s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE);
 587
 588     s->adaptive_quant= (   s->avctx->lumi_masking
 589                         || s->avctx->dark_masking
 590                         || s->avctx->temporal_cplx_masking
 591                         || s->avctx->spatial_cplx_masking
 592                         || s->avctx->p_masking)
 593                        && !s->fixed_qscale;
 594
 595     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
 596
 597     switch(avctx->codec->id) {
 598     case CODEC_ID_MPEG1VIDEO:
 599         s->out_format = FMT_MPEG1;
 600         avctx->delay=0; //FIXME not sure, should check the spec
 601         break;
 602     case CODEC_ID_MJPEG:
 603         s->out_format = FMT_MJPEG;
 604         s->intra_only = 1; /* force intra only for jpeg */
 605         s->mjpeg_write_tables = 1; /* write all tables */
 606         s->mjpeg_data_only_frames = 0; /* write all the needed headers */
 607         s->mjpeg_vsample[0] = 2; /* set up default sampling factors */
 608         s->mjpeg_vsample[1] = 1; /* the only currently supported values */
 609         s->mjpeg_vsample[2] = 1;
 610         s->mjpeg_hsample[0] = 2;
 611         s->mjpeg_hsample[1] = 1;
 612         s->mjpeg_hsample[2] = 1;
 613         if (mjpeg_init(s) < 0)
 614             return -1;
 615         avctx->delay=0;
 616         s->low_delay=1;
 617         break;
 618     case CODEC_ID_H263:
 619         if (h263_get_picture_format(s->width, s->height) == 7) {
 620             printf("Input picture size isn't suitable for h263 codec! try h263+\n");
 621             return -1;
 622         }
 623         s->out_format = FMT_H263;
 624         avctx->delay=0;
 625         s->low_delay=1;
 626         break;
 627     case CODEC_ID_H263P:
 628         s->out_format = FMT_H263;
 629         s->h263_plus = 1;
 630         s->unrestricted_mv = 1;
 631         s->h263_aic = 1;
 632
 633         /* These are just to be sure */
 634         s->umvplus = 0;
 635         s->umvplus_dec = 0;
 636         avctx->delay=0;
 637         s->low_delay=1;
 638         break;
 639     case CODEC_ID_RV10:
 640         s->out_format = FMT_H263;
 641         s->h263_rv10 = 1;
 642         avctx->delay=0;
 643         s->low_delay=1;
 644         break;
 645     case CODEC_ID_MPEG4:
 646         s->out_format = FMT_H263;
 647         s->h263_pred = 1;
 648         s->unrestricted_mv = 1;
 649         s->low_delay= s->max_b_frames ? 0 : 1;
 650         avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
 651         break;
 652     case CODEC_ID_MSMPEG4V1:
 653         s->out_format = FMT_H263;
 654         s->h263_msmpeg4 = 1;
 655         s->h263_pred = 1;
 656         s->unrestricted_mv = 1;
 657         s->msmpeg4_version= 1;
 658         avctx->delay=0;
 659         s->low_delay=1;
 660         break;
 661     case CODEC_ID_MSMPEG4V2:
 662         s->out_format = FMT_H263;
 663         s->h263_msmpeg4 = 1;
 664         s->h263_pred = 1;
 665         s->unrestricted_mv = 1;
 666         s->msmpeg4_version= 2;
 667         avctx->delay=0;
 668         s->low_delay=1;
 669         break;
 670     case CODEC_ID_MSMPEG4V3:
 671         s->out_format = FMT_H263;
 672         s->h263_msmpeg4 = 1;
 673         s->h263_pred = 1;
 674         s->unrestricted_mv = 1;
 675         s->msmpeg4_version= 3;
 676         avctx->delay=0;
 677         s->low_delay=1;
 678         break;
 679     case CODEC_ID_WMV1:
 680         s->out_format = FMT_H263;
 681         s->h263_msmpeg4 = 1;
 682         s->h263_pred = 1;
 683         s->unrestricted_mv = 1;
 684         s->msmpeg4_version= 4;
 685         avctx->delay=0;
 686         s->low_delay=1;
 687         break;
 688     case CODEC_ID_WMV2:
 689         s->out_format = FMT_H263;
 690         s->h263_msmpeg4 = 1;
 691         s->h263_pred = 1;
 692         s->unrestricted_mv = 1;
 693         s->msmpeg4_version= 5;
 694         avctx->delay=0;
 695         s->low_delay=1;
 696         break;
 697     default:
 698         return -1;
 699     }
 700
 701     { /* set up some save defaults, some codecs might override them later */
 702         static int done=0;
 703         if(!done){
 704             int i;
 705             done=1;
 706
 707             default_mv_penalty= av_mallocz( sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 708             memset(default_mv_penalty, 0, sizeof(UINT16)*(MAX_FCODE+1)*(2*MAX_MV+1));
 709             memset(default_fcode_tab , 0, sizeof(UINT8)*(2*MAX_MV+1));
 710
 711             for(i=-16; i<16; i++){
 712                 default_fcode_tab[i + MAX_MV]= 1;
 713             }
 714         }
 715     }
 716     s->mv_penalty= default_mv_penalty;
 717     s->fcode_tab= default_fcode_tab;
 718     s->y_dc_scale_table=
 719     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 720
 721     /* dont use mv_penalty table for crap MV as it would be confused */
 722     if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty;
 723
 724     s->encoding = 1;
 725
 726     /* init */
 727     if (MPV_common_init(s) < 0)
 728         return -1;
 729
 730 #ifdef CONFIG_ENCODERS
 731     if (s->out_format == FMT_H263)
 732         h263_encode_init(s);
 733     else if (s->out_format == FMT_MPEG1)
 734         ff_mpeg1_encode_init(s);
 735     if(s->msmpeg4_version)
 736         ff_msmpeg4_encode_init(s);
 737 #endif
 738
 739     /* init default q matrix */
 740     for(i=0;i<64;i++) {
 741         int j= s->idct_permutation[i];
 742         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
 743             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 744             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 745         }else if(s->out_format == FMT_H263){
 746             s->intra_matrix[j] =
 747             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 748         }else{ /* mpeg1 */
 749             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 750             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 751         }
 752     }
 753
 754     /* precompute matrix */
 755     /* for mjpeg, we do include qscale in the matrix */
 756     if (s->out_format != FMT_MJPEG) {
 757         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias,
 758                        s->intra_matrix, s->intra_quant_bias, 1, 31);
 759         convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias,
 760                        s->inter_matrix, s->inter_quant_bias, 1, 31);
 761     }
 762
 763     if(ff_rate_control_init(s) < 0)
 764         return -1;
 765
 766     s->picture_number = 0;
 767     s->picture_in_gop_number = 0;
 768     s->fake_picture_number = 0;
 769     /* motion detector init */
 770     s->f_code = 1;
 771     s->b_code = 1;
 772
 773     return 0;
 774 }
 775
 776 int MPV_encode_end(AVCodecContext *avctx)
 777 {
 778     MpegEncContext *s = avctx->priv_data;
 779
 780 #ifdef STATS
 781     print_stats();
 782 #endif
 783
 784     ff_rate_control_uninit(s);
 785
 786     MPV_common_end(s);
 787     if (s->out_format == FMT_MJPEG)
 788         mjpeg_close(s);
 789
 790     return 0;
 791 }
 792
 793 /* draw the edges of width 'w' of an image of size width, height */
 794 //FIXME check that this is ok for mpeg4 interlaced
 795 static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w)
 796 {
 797     UINT8 *ptr, *last_line;
 798     int i;
 799
 800     last_line = buf + (height - 1) * wrap;
 801     for(i=0;i<w;i++) {
 802         /* top and bottom */
 803         memcpy(buf - (i + 1) * wrap, buf, width);
 804         memcpy(last_line + (i + 1) * wrap, last_line, width);
 805     }
 806     /* left and right */
 807     ptr = buf;
 808     for(i=0;i<height;i++) {
 809         memset(ptr - w, ptr[0], w);
 810         memset(ptr + width, ptr[width-1], w);
 811         ptr += wrap;
 812     }
 813     /* corners */
 814     for(i=0;i<w;i++) {
 815         memset(buf - (i + 1) * wrap - w, buf[0], w); /* top left */
 816         memset(buf - (i + 1) * wrap + width, buf[width-1], w); /* top right */
 817         memset(last_line + (i + 1) * wrap - w, last_line[0], w); /* top left */
 818         memset(last_line + (i + 1) * wrap + width, last_line[width-1], w); /* top right */
 819     }
 820 }
 821
 822 static int find_unused_picture(MpegEncContext *s, int shared){
 823     int i;
 824
 825     if(shared){
 826         for(i=0; i<MAX_PICTURE_COUNT; i++){
 827             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) break;
 828         }
 829     }else{
 830         for(i=0; i<MAX_PICTURE_COUNT; i++){
 831             if(s->picture[i].data[0]==NULL && s->picture[i].type!=0) break;
 832         }
 833         for(i=0; i<MAX_PICTURE_COUNT; i++){
 834             if(s->picture[i].data[0]==NULL) break;
 835         }
 836     }
 837
 838     assert(i<MAX_PICTURE_COUNT);
 839     return i;
 840 }
 841
 842 /* generic function for encode/decode called before a frame is coded/decoded */
 843 int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
 844 {
 845     int i;
 846     AVFrame *pic;
 847
 848     s->mb_skiped = 0;
 849
 850     /* mark&release old frames */
 851     if (s->pict_type != B_TYPE && s->last_picture.data[0]) {
 852         for(i=0; i<MAX_PICTURE_COUNT; i++){
 853 //printf("%8X %d %d %X %X\n", s->picture[i].data[0], s->picture[i].type, i, s->next_picture.data[0], s->last_picture.data[0]);
 854             if(s->picture[i].data[0] == s->last_picture.data[0]){
 855 //                s->picture[i].reference=0;
 856                 avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
 857                 break;
 858             }
 859         }
 860         assert(i<MAX_PICTURE_COUNT);
 861
 862         /* release forgotten pictures */
 863         /* if(mpeg124/h263) */
 864         if(!s->encoding){
 865             for(i=0; i<MAX_PICTURE_COUNT; i++){
 866                 if(s->picture[i].data[0] && s->picture[i].data[0] != s->next_picture.data[0] && s->picture[i].reference){
 867                     fprintf(stderr, "releasing zombie picture\n");
 868                     avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
 869                 }
 870             }
 871         }
 872     }
 873 alloc:
 874     if(!s->encoding){
 875         i= find_unused_picture(s, 0);
 876
 877         pic= (AVFrame*)&s->picture[i];
 878         pic->reference= s->pict_type != B_TYPE;
 879         pic->coded_picture_number= s->current_picture.coded_picture_number+1;
 880
 881         alloc_picture(s, (Picture*)pic, 0);
 882
 883         s->current_picture= s->picture[i];
 884     }
 885
 886     if (s->pict_type != B_TYPE) {
 887         s->last_picture= s->next_picture;
 888         s->next_picture= s->current_picture;
 889     }
 890
 891     if(s->pict_type != I_TYPE && s->last_picture.data[0]==NULL){
 892         fprintf(stderr, "warning: first frame is no keyframe\n");
 893         assert(s->pict_type != B_TYPE); //these should have been dropped if we dont have a reference
 894         goto alloc;
 895     }
 896
 897     s->hurry_up= s->avctx->hurry_up;
 898     s->error_resilience= avctx->error_resilience;
 899
 900     /* set dequantizer, we cant do it during init as it might change for mpeg4
 901        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
 902     if(s->out_format == FMT_H263){
 903         if(s->mpeg_quant)
 904             s->dct_unquantize = s->dct_unquantize_mpeg2;
 905         else
 906             s->dct_unquantize = s->dct_unquantize_h263;
 907     }else
 908         s->dct_unquantize = s->dct_unquantize_mpeg1;
 909
 910     return 0;
 911 }
 912
 913 /* generic function for encode/decode called after a frame has been coded/decoded */
 914 void MPV_frame_end(MpegEncContext *s)
 915 {
 916     int i;
 917     /* draw edge for correct motion prediction if outside */
 918     if(s->codec_id!=CODEC_ID_SVQ1){
 919         if (s->pict_type != B_TYPE && !s->intra_only && !(s->flags&CODEC_FLAG_EMU_EDGE)) {
 920             draw_edges(s->current_picture.data[0], s->linesize  , s->h_edge_pos   , s->v_edge_pos   , EDGE_WIDTH  );
 921             draw_edges(s->current_picture.data[1], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
 922             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
 923         }
 924     }
 925     emms_c();
 926
 927     s->last_pict_type    = s->pict_type;
 928     if(s->pict_type!=B_TYPE){
 929         s->last_non_b_pict_type= s->pict_type;
 930     }
 931
 932     s->current_picture.quality= s->qscale; //FIXME get average of qscale_table
 933     s->current_picture.pict_type= s->pict_type;
 934     s->current_picture.key_frame= s->pict_type == I_TYPE;
 935
 936     /* copy back current_picture variables */
 937     for(i=0; i<MAX_PICTURE_COUNT; i++){
 938         if(s->picture[i].data[0] == s->current_picture.data[0]){
 939             s->picture[i]= s->current_picture;
 940             break;
 941         }
 942     }
 943     assert(i<MAX_PICTURE_COUNT);
 944
 945     /* release non refernce frames */
 946     for(i=0; i<MAX_PICTURE_COUNT; i++){
 947         if(s->picture[i].data[0] && !s->picture[i].reference /*&& s->picture[i].type!=FF_BUFFER_TYPE_SHARED*/)
 948             s->avctx->release_buffer(s->avctx, (AVFrame*)&s->picture[i]);
 949     }
 950 }
 951
 952 static int get_sae(uint8_t *src, int ref, int stride){
 953     int x,y;
 954     int acc=0;
 955
 956     for(y=0; y<16; y++){
 957         for(x=0; x<16; x++){
 958             acc+= ABS(src[x+y*stride] - ref);
 959         }
 960     }
 961
 962     return acc;
 963 }
 964
 965 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
 966     int x, y, w, h;
 967     int acc=0;
 968
 969     w= s->width &~15;
 970     h= s->height&~15;
 971
 972     for(y=0; y<h; y+=16){
 973         for(x=0; x<w; x+=16){
 974             int offset= x + y*stride;
 975             int sad = s->dsp.pix_abs16x16(src + offset, ref + offset, stride);
 976             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
 977             int sae = get_sae(src + offset, mean, stride);
 978
 979             acc+= sae + 500 < sad;
 980         }
 981     }
 982     return acc;
 983 }
 984
 985
 986 static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
 987     AVFrame *pic;
 988     int i;
 989     const int encoding_delay= s->max_b_frames;
 990     int direct=1;
 991
 992     if(encoding_delay && !(s->flags&CODEC_FLAG_INPUT_PRESERVED)) direct=0;
 993     if(pic_arg->linesize[0] != s->linesize) direct=0;
 994     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
 995     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
 996
 997 //    printf("%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
 998
 999     if(direct){
1000         i= find_unused_picture(s, 1);
1001
1002         pic= (AVFrame*)&s->picture[i];
1003         pic->reference= 1;
1004
1005         for(i=0; i<4; i++){
1006             pic->data[i]= pic_arg->data[i];
1007             pic->linesize[i]= pic_arg->linesize[i];
1008         }
1009         alloc_picture(s, (Picture*)pic, 1);
1010     }else{
1011         i= find_unused_picture(s, 0);
1012
1013         pic= (AVFrame*)&s->picture[i];
1014         pic->reference= 1;
1015
1016         alloc_picture(s, (Picture*)pic, 0);
1017
1018         if(   pic->data[0] == pic_arg->data[0]
1019            && pic->data[1] == pic_arg->data[1]
1020            && pic->data[2] == pic_arg->data[2]){
1021        // empty
1022         }else{
1023             int h_chroma_shift, v_chroma_shift;
1024
1025             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
1026
1027             for(i=0; i<3; i++){
1028                 int src_stride= pic_arg->linesize[i];
1029                 int dst_stride= i ? s->uvlinesize : s->linesize;
1030                 int h_shift= i ? h_chroma_shift : 0;
1031                 int v_shift= i ? v_chroma_shift : 0;
1032                 int w= s->width >>h_shift;
1033                 int h= s->height>>v_shift;
1034                 uint8_t *src= pic_arg->data[i];
1035                 uint8_t *dst= pic->data[i];
1036
1037                 if(src_stride==dst_stride)
1038                     memcpy(dst, src, src_stride*h);
1039                 else{
1040                     while(h--){
1041                         memcpy(dst, src, w);
1042                         dst += dst_stride;
1043                         src += src_stride;
1044                     }
1045                 }
1046             }
1047         }
1048     }
1049     pic->quality= pic_arg->quality;
1050     pic->pict_type= pic_arg->pict_type;
1051
1052     if(s->input_picture[encoding_delay])
1053         pic->display_picture_number= s->input_picture[encoding_delay]->display_picture_number + 1;
1054
1055     /* shift buffer entries */
1056     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
1057         s->input_picture[i-1]= s->input_picture[i];
1058
1059     s->input_picture[encoding_delay]= (Picture*)pic;
1060
1061     return 0;
1062 }
1063
1064 static void select_input_picture(MpegEncContext *s){
1065     int i;
1066     const int encoding_delay= s->max_b_frames;
1067     int coded_pic_num=0;
1068
1069     if(s->reordered_input_picture[0])
1070         coded_pic_num= s->reordered_input_picture[0]->coded_picture_number + 1;
1071
1072     for(i=1; i<MAX_PICTURE_COUNT; i++)
1073         s->reordered_input_picture[i-1]= s->reordered_input_picture[i];
1074     s->reordered_input_picture[MAX_PICTURE_COUNT-1]= NULL;
1075
1076     /* set next picture types & ordering */
1077     if(s->reordered_input_picture[0]==NULL && s->input_picture[0]){
1078         if(/*s->picture_in_gop_number >= s->gop_size ||*/ s->next_picture.data[0]==NULL || s->intra_only){
1079             s->reordered_input_picture[0]= s->input_picture[0];
1080             s->reordered_input_picture[0]->pict_type= I_TYPE;
1081             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1082         }else{
1083             int b_frames;
1084
1085             if(s->flags&CODEC_FLAG_PASS2){
1086                 for(i=0; i<s->max_b_frames+1; i++){
1087                     int pict_num= s->input_picture[0]->display_picture_number + i;
1088                     int pict_type= s->rc_context.entry[pict_num].new_pict_type;
1089                     s->input_picture[i]->pict_type= pict_type;
1090
1091                     if(i + 1 >= s->rc_context.num_entries) break;
1092                 }
1093             }
1094
1095             if(s->input_picture[0]->pict_type){
1096                 /* user selected pict_type */
1097                 for(b_frames=0; b_frames<s->max_b_frames+1; b_frames++){
1098                     if(s->input_picture[b_frames]->pict_type!=B_TYPE) break;
1099                 }
1100
1101                 if(b_frames > s->max_b_frames){
1102                     fprintf(stderr, "warning, too many bframes in a row\n");
1103                     b_frames = s->max_b_frames;
1104                 }
1105             }else if(s->b_frame_strategy==0){
1106                 b_frames= s->max_b_frames;
1107             }else if(s->b_frame_strategy==1){
1108                 for(i=1; i<s->max_b_frames+1; i++){
1109                     if(s->input_picture[i]->b_frame_score==0){
1110                         s->input_picture[i]->b_frame_score=
1111                             get_intra_count(s, s->input_picture[i  ]->data[0],
1112                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
1113                     }
1114                 }
1115                 for(i=0; i<s->max_b_frames; i++){
1116                     if(s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
1117                 }
1118
1119                 b_frames= FFMAX(0, i-1);
1120
1121                 /* reset scores */
1122                 for(i=0; i<b_frames+1; i++){
1123                     s->input_picture[i]->b_frame_score=0;
1124                 }
1125             }else{
1126                 fprintf(stderr, "illegal b frame strategy\n");
1127                 b_frames=0;
1128             }
1129
1130             emms_c();
1131 //static int b_count=0;
1132 //b_count+= b_frames;
1133 //printf("b_frames: %d\n", b_count);
1134
1135             s->reordered_input_picture[0]= s->input_picture[b_frames];
1136             if(   s->picture_in_gop_number + b_frames >= s->gop_size
1137                || s->reordered_input_picture[0]->pict_type== I_TYPE)
1138                 s->reordered_input_picture[0]->pict_type= I_TYPE;
1139             else
1140                 s->reordered_input_picture[0]->pict_type= P_TYPE;
1141             s->reordered_input_picture[0]->coded_picture_number= coded_pic_num;
1142             for(i=0; i<b_frames; i++){
1143                 coded_pic_num++;
1144                 s->reordered_input_picture[i+1]= s->input_picture[i];
1145                 s->reordered_input_picture[i+1]->pict_type= B_TYPE;
1146                 s->reordered_input_picture[i+1]->coded_picture_number= coded_pic_num;
1147             }
1148         }
1149     }
1150
1151     if(s->reordered_input_picture[0]){
1152        s->reordered_input_picture[0]->reference= s->reordered_input_picture[0]->pict_type!=B_TYPE;
1153
1154         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
1155             int i= find_unused_picture(s, 0);
1156             Picture *pic= &s->picture[i];
1157
1158             s->new_picture= *s->reordered_input_picture[0];
1159
1160             /* mark us unused / free shared pic */
1161             for(i=0; i<4; i++)
1162                 s->reordered_input_picture[0]->data[i]= NULL;
1163             s->reordered_input_picture[0]->type= 0;
1164
1165             pic->pict_type = s->reordered_input_picture[0]->pict_type;
1166             pic->quality   = s->reordered_input_picture[0]->quality;
1167             pic->coded_picture_number = s->reordered_input_picture[0]->coded_picture_number;
1168             pic->reference = s->reordered_input_picture[0]->reference;
1169
1170             alloc_picture(s, pic, 0);
1171
1172             s->current_picture= *pic;
1173         }else{
1174             assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
1175                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
1176
1177             s->new_picture= *s->reordered_input_picture[0];
1178
1179             for(i=0; i<4; i++){
1180                 s->reordered_input_picture[0]->data[i]-=16; //FIXME dirty
1181             }
1182             s->current_picture= *s->reordered_input_picture[0];
1183         }
1184
1185         s->picture_number= s->new_picture.display_picture_number;
1186 //printf("dpn:%d\n", s->picture_number);
1187     }else{
1188        memset(&s->new_picture, 0, sizeof(Picture));
1189     }
1190 }
1191
1192 int MPV_encode_picture(AVCodecContext *avctx,
1193                        unsigned char *buf, int buf_size, void *data)
1194 {
1195     MpegEncContext *s = avctx->priv_data;
1196     AVFrame *pic_arg = data;
1197     int i;
1198
1199     init_put_bits(&s->pb, buf, buf_size, NULL, NULL);
1200
1201     s->picture_in_gop_number++;
1202
1203     load_input_picture(s, pic_arg);
1204
1205     select_input_picture(s);
1206
1207     /* output? */
1208     if(s->new_picture.data[0]){
1209
1210         s->pict_type= s->new_picture.pict_type;
1211         if (s->fixed_qscale){ /* the ratecontrol needs the last qscale so we dont touch it for CBR */
1212             s->qscale= (int)(s->new_picture.quality+0.5);
1213             assert(s->qscale);
1214         }
1215 //emms_c();
1216 //printf("qs:%f %f %d\n", s->new_picture.quality, s->current_picture.quality, s->qscale);
1217         MPV_frame_start(s, avctx);
1218
1219         encode_picture(s, s->picture_number);
1220
1221         avctx->real_pict_num  = s->picture_number;
1222         avctx->header_bits = s->header_bits;
1223         avctx->mv_bits     = s->mv_bits;
1224         avctx->misc_bits   = s->misc_bits;
1225         avctx->i_tex_bits  = s->i_tex_bits;
1226         avctx->p_tex_bits  = s->p_tex_bits;
1227         avctx->i_count     = s->i_count;
1228         avctx->p_count     = s->mb_num - s->i_count - s->skip_count; //FIXME f/b_count in avctx
1229         avctx->skip_count  = s->skip_count;
1230
1231         MPV_frame_end(s);
1232
1233         if (s->out_format == FMT_MJPEG)
1234             mjpeg_picture_trailer(s);
1235
1236         if(s->flags&CODEC_FLAG_PASS1)
1237             ff_write_pass1_stats(s);
1238     }
1239
1240     s->input_picture_number++;
1241
1242     flush_put_bits(&s->pb);
1243     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
1244
1245     s->total_bits += s->frame_bits;
1246     avctx->frame_bits  = s->frame_bits;
1247
1248     for(i=0; i<4; i++){
1249         avctx->error[i] += s->current_picture.error[i];
1250     }
1251
1252     return pbBufPtr(&s->pb) - s->pb.buf;
1253 }
1254
1255 static inline void gmc1_motion(MpegEncContext *s,
1256                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1257                                int dest_offset,
1258                                UINT8 **ref_picture, int src_offset)
1259 {
1260     UINT8 *ptr;
1261     int offset, src_x, src_y, linesize, uvlinesize;
1262     int motion_x, motion_y;
1263     int emu=0;
1264
1265     motion_x= s->sprite_offset[0][0];
1266     motion_y= s->sprite_offset[0][1];
1267     src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
1268     src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
1269     motion_x<<=(3-s->sprite_warping_accuracy);
1270     motion_y<<=(3-s->sprite_warping_accuracy);
1271     src_x = clip(src_x, -16, s->width);
1272     if (src_x == s->width)
1273         motion_x =0;
1274     src_y = clip(src_y, -16, s->height);
1275     if (src_y == s->height)
1276         motion_y =0;
1277
1278     linesize = s->linesize;
1279     uvlinesize = s->uvlinesize;
1280
1281     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1282
1283     dest_y+=dest_offset;
1284     if(s->flags&CODEC_FLAG_EMU_EDGE){
1285         if(src_x<0 || src_y<0 || src_x + (motion_x&15) + 16 > s->h_edge_pos
1286                               || src_y + (motion_y&15) + 16 > s->v_edge_pos){
1287             emulated_edge_mc(s, ptr, linesize, 17, 17, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1288             ptr= s->edge_emu_buffer;
1289             emu=1;
1290         }
1291     }
1292
1293     if((motion_x|motion_y)&7){
1294         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1295         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
1296     }else{
1297         int dxy;
1298
1299         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
1300         if (s->no_rounding){
1301             s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
1302         }else{
1303             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
1304         }
1305     }
1306
1307     if(s->flags&CODEC_FLAG_GRAY) return;
1308
1309     motion_x= s->sprite_offset[1][0];
1310     motion_y= s->sprite_offset[1][1];
1311     src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
1312     src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
1313     motion_x<<=(3-s->sprite_warping_accuracy);
1314     motion_y<<=(3-s->sprite_warping_accuracy);
1315     src_x = clip(src_x, -8, s->width>>1);
1316     if (src_x == s->width>>1)
1317         motion_x =0;
1318     src_y = clip(src_y, -8, s->height>>1);
1319     if (src_y == s->height>>1)
1320         motion_y =0;
1321
1322     offset = (src_y * uvlinesize) + src_x + (src_offset>>1);
1323     ptr = ref_picture[1] + offset;
1324     if(emu){
1325         emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1326         ptr= s->edge_emu_buffer;
1327     }
1328     s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1329
1330     ptr = ref_picture[2] + offset;
1331     if(emu){
1332         emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1333         ptr= s->edge_emu_buffer;
1334     }
1335     s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
1336
1337     return;
1338 }
1339
1340 static inline void gmc_motion(MpegEncContext *s,
1341                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1342                                int dest_offset,
1343                                UINT8 **ref_picture, int src_offset)
1344 {
1345     UINT8 *ptr;
1346     int linesize, uvlinesize;
1347     const int a= s->sprite_warping_accuracy;
1348     int ox, oy;
1349
1350     linesize = s->linesize;
1351     uvlinesize = s->uvlinesize;
1352
1353     ptr = ref_picture[0] + src_offset;
1354
1355     dest_y+=dest_offset;
1356
1357     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
1358     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
1359
1360     s->dsp.gmc(dest_y, ptr, linesize, 16,
1361            ox,
1362            oy,
1363            s->sprite_delta[0][0], s->sprite_delta[0][1],
1364            s->sprite_delta[1][0], s->sprite_delta[1][1],
1365            a+1, (1<<(2*a+1)) - s->no_rounding,
1366            s->h_edge_pos, s->v_edge_pos);
1367     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
1368            ox + s->sprite_delta[0][0]*8,
1369            oy + s->sprite_delta[1][0]*8,
1370            s->sprite_delta[0][0], s->sprite_delta[0][1],
1371            s->sprite_delta[1][0], s->sprite_delta[1][1],
1372            a+1, (1<<(2*a+1)) - s->no_rounding,
1373            s->h_edge_pos, s->v_edge_pos);
1374
1375     if(s->flags&CODEC_FLAG_GRAY) return;
1376
1377
1378     dest_cb+=dest_offset>>1;
1379     dest_cr+=dest_offset>>1;
1380
1381     ox= s->sprite_offset[1][0] + s->sprite_delta[0][0]*s->mb_x*8 + s->sprite_delta[0][1]*s->mb_y*8;
1382     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
1383
1384     ptr = ref_picture[1] + (src_offset>>1);
1385     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
1386            ox,
1387            oy,
1388            s->sprite_delta[0][0], s->sprite_delta[0][1],
1389            s->sprite_delta[1][0], s->sprite_delta[1][1],
1390            a+1, (1<<(2*a+1)) - s->no_rounding,
1391            s->h_edge_pos>>1, s->v_edge_pos>>1);
1392
1393     ptr = ref_picture[2] + (src_offset>>1);
1394     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
1395            ox,
1396            oy,
1397            s->sprite_delta[0][0], s->sprite_delta[0][1],
1398            s->sprite_delta[1][0], s->sprite_delta[1][1],
1399            a+1, (1<<(2*a+1)) - s->no_rounding,
1400            s->h_edge_pos>>1, s->v_edge_pos>>1);
1401 }
1402
1403
1404 static void emulated_edge_mc(MpegEncContext *s, UINT8 *src, int linesize, int block_w, int block_h,
1405                                     int src_x, int src_y, int w, int h){
1406     int x, y;
1407     int start_y, start_x, end_y, end_x;
1408     UINT8 *buf= s->edge_emu_buffer;
1409
1410     if(src_y>= h){
1411         src+= (h-1-src_y)*linesize;
1412         src_y=h-1;
1413     }else if(src_y<=-block_h){
1414         src+= (1-block_h-src_y)*linesize;
1415         src_y=1-block_h;
1416     }
1417     if(src_x>= w){
1418         src+= (w-1-src_x);
1419         src_x=w-1;
1420     }else if(src_x<=-block_w){
1421         src+= (1-block_w-src_x);
1422         src_x=1-block_w;
1423     }
1424
1425     start_y= FFMAX(0, -src_y);
1426     start_x= FFMAX(0, -src_x);
1427     end_y= FFMIN(block_h, h-src_y);
1428     end_x= FFMIN(block_w, w-src_x);
1429
1430     // copy existing part
1431     for(y=start_y; y<end_y; y++){
1432         for(x=start_x; x<end_x; x++){
1433             buf[x + y*linesize]= src[x + y*linesize];
1434         }
1435     }
1436
1437     //top
1438     for(y=0; y<start_y; y++){
1439         for(x=start_x; x<end_x; x++){
1440             buf[x + y*linesize]= buf[x + start_y*linesize];
1441         }
1442     }
1443
1444     //bottom
1445     for(y=end_y; y<block_h; y++){
1446         for(x=start_x; x<end_x; x++){
1447             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
1448         }
1449     }
1450
1451     for(y=0; y<block_h; y++){
1452        //left
1453         for(x=0; x<start_x; x++){
1454             buf[x + y*linesize]= buf[start_x + y*linesize];
1455         }
1456
1457        //right
1458         for(x=end_x; x<block_w; x++){
1459             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
1460         }
1461     }
1462 }
1463
1464
1465 /* apply one mpeg motion vector to the three components */
1466 static inline void mpeg_motion(MpegEncContext *s,
1467                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1468                                int dest_offset,
1469                                UINT8 **ref_picture, int src_offset,
1470                                int field_based, op_pixels_func (*pix_op)[4],
1471                                int motion_x, int motion_y, int h)
1472 {
1473     UINT8 *ptr;
1474     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1475     int emu=0;
1476 #if 0
1477 if(s->quarter_sample)
1478 {
1479     motion_x>>=1;
1480     motion_y>>=1;
1481 }
1482 #endif
1483     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1484     src_x = s->mb_x * 16 + (motion_x >> 1);
1485     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
1486
1487     /* WARNING: do no forget half pels */
1488     height = s->height >> field_based;
1489     v_edge_pos = s->v_edge_pos >> field_based;
1490     src_x = clip(src_x, -16, s->width);
1491     if (src_x == s->width)
1492         dxy &= ~1;
1493     src_y = clip(src_y, -16, height);
1494     if (src_y == height)
1495         dxy &= ~2;
1496     linesize   = s->linesize << field_based;
1497     uvlinesize = s->uvlinesize << field_based;
1498     ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
1499     dest_y += dest_offset;
1500
1501     if(s->flags&CODEC_FLAG_EMU_EDGE){
1502         if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 16 > s->h_edge_pos
1503                               || src_y + (motion_y&1) + h  > v_edge_pos){
1504             emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
1505                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1506             ptr= s->edge_emu_buffer + src_offset;
1507             emu=1;
1508         }
1509     }
1510     pix_op[0][dxy](dest_y, ptr, linesize, h);
1511
1512     if(s->flags&CODEC_FLAG_GRAY) return;
1513
1514     if (s->out_format == FMT_H263) {
1515         dxy = 0;
1516         if ((motion_x & 3) != 0)
1517             dxy |= 1;
1518         if ((motion_y & 3) != 0)
1519             dxy |= 2;
1520         mx = motion_x >> 2;
1521         my = motion_y >> 2;
1522     } else {
1523         mx = motion_x / 2;
1524         my = motion_y / 2;
1525         dxy = ((my & 1) << 1) | (mx & 1);
1526         mx >>= 1;
1527         my >>= 1;
1528     }
1529
1530     src_x = s->mb_x * 8 + mx;
1531     src_y = s->mb_y * (8 >> field_based) + my;
1532     src_x = clip(src_x, -8, s->width >> 1);
1533     if (src_x == (s->width >> 1))
1534         dxy &= ~1;
1535     src_y = clip(src_y, -8, height >> 1);
1536     if (src_y == (height >> 1))
1537         dxy &= ~2;
1538     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1539     ptr = ref_picture[1] + offset;
1540     if(emu){
1541         emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1542                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1543         ptr= s->edge_emu_buffer + (src_offset >> 1);
1544     }
1545     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1546
1547     ptr = ref_picture[2] + offset;
1548     if(emu){
1549         emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9+field_based,
1550                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1551         ptr= s->edge_emu_buffer + (src_offset >> 1);
1552     }
1553     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
1554 }
1555
1556 static inline void qpel_motion(MpegEncContext *s,
1557                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1558                                int dest_offset,
1559                                UINT8 **ref_picture, int src_offset,
1560                                int field_based, op_pixels_func (*pix_op)[4],
1561                                qpel_mc_func (*qpix_op)[16],
1562                                int motion_x, int motion_y, int h)
1563 {
1564     UINT8 *ptr;
1565     int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
1566     int emu=0;
1567
1568     dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1569     src_x = s->mb_x * 16 + (motion_x >> 2);
1570     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
1571
1572     height = s->height >> field_based;
1573     v_edge_pos = s->v_edge_pos >> field_based;
1574     src_x = clip(src_x, -16, s->width);
1575     if (src_x == s->width)
1576         dxy &= ~3;
1577     src_y = clip(src_y, -16, height);
1578     if (src_y == height)
1579         dxy &= ~12;
1580     linesize = s->linesize << field_based;
1581     uvlinesize = s->uvlinesize << field_based;
1582     ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
1583     dest_y += dest_offset;
1584 //printf("%d %d %d\n", src_x, src_y, dxy);
1585
1586     if(s->flags&CODEC_FLAG_EMU_EDGE){
1587         if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 16 > s->h_edge_pos
1588                               || src_y + (motion_y&3) + h  > v_edge_pos){
1589             emulated_edge_mc(s, ptr - src_offset, s->linesize, 17, 17+field_based,
1590                              src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
1591             ptr= s->edge_emu_buffer + src_offset;
1592             emu=1;
1593         }
1594     }
1595     if(!field_based)
1596         qpix_op[0][dxy](dest_y, ptr, linesize);
1597     else{
1598         //damn interlaced mode
1599         //FIXME boundary mirroring is not exactly correct here
1600         qpix_op[1][dxy](dest_y  , ptr  , linesize);
1601         qpix_op[1][dxy](dest_y+8, ptr+8, linesize);
1602     }
1603
1604     if(s->flags&CODEC_FLAG_GRAY) return;
1605
1606     if(field_based){
1607         mx= motion_x/2;
1608         my= motion_y>>1;
1609     }else if(s->workaround_bugs&FF_BUG_QPEL_CHROMA){
1610         mx= (motion_x>>1)|(motion_x&1);
1611         my= (motion_y>>1)|(motion_y&1);
1612     }else{
1613         mx= motion_x/2;
1614         my= motion_y/2;
1615     }
1616     mx= (mx>>1)|(mx&1);
1617     my= (my>>1)|(my&1);
1618     dxy= (mx&1) | ((my&1)<<1);
1619     mx>>=1;
1620     my>>=1;
1621
1622     src_x = s->mb_x * 8 + mx;
1623     src_y = s->mb_y * (8 >> field_based) + my;
1624     src_x = clip(src_x, -8, s->width >> 1);
1625     if (src_x == (s->width >> 1))
1626         dxy &= ~1;
1627     src_y = clip(src_y, -8, height >> 1);
1628     if (src_y == (height >> 1))
1629         dxy &= ~2;
1630
1631     offset = (src_y * uvlinesize) + src_x + (src_offset >> 1);
1632     ptr = ref_picture[1] + offset;
1633     if(emu){
1634         emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
1635                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1636         ptr= s->edge_emu_buffer + (src_offset >> 1);
1637     }
1638     pix_op[1][dxy](dest_cb + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1639
1640     ptr = ref_picture[2] + offset;
1641     if(emu){
1642         emulated_edge_mc(s, ptr - (src_offset >> 1), s->uvlinesize, 9, 9 + field_based,
1643                          src_x, src_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
1644         ptr= s->edge_emu_buffer + (src_offset >> 1);
1645     }
1646     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr,  uvlinesize, h >> 1);
1647 }
1648
1649
1650 static inline void MPV_motion(MpegEncContext *s,
1651                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
1652                               int dir, UINT8 **ref_picture,
1653                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
1654 {
1655     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
1656     int mb_x, mb_y, i;
1657     UINT8 *ptr, *dest;
1658     int emu=0;
1659
1660     mb_x = s->mb_x;
1661     mb_y = s->mb_y;
1662
1663     switch(s->mv_type) {
1664     case MV_TYPE_16X16:
1665         if(s->mcsel){
1666             if(s->real_sprite_warping_points==1){
1667                 gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
1668                             ref_picture, 0);
1669             }else{
1670                 gmc_motion(s, dest_y, dest_cb, dest_cr, 0,
1671                             ref_picture, 0);
1672             }
1673         }else if(s->quarter_sample){
1674             qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1675                         ref_picture, 0,
1676                         0, pix_op, qpix_op,
1677                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1678         }else{
1679             mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1680                         ref_picture, 0,
1681                         0, pix_op,
1682                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
1683         }
1684         break;
1685     case MV_TYPE_8X8:
1686         mx = 0;
1687         my = 0;
1688         if(s->quarter_sample){
1689             for(i=0;i<4;i++) {
1690                 motion_x = s->mv[dir][i][0];
1691                 motion_y = s->mv[dir][i][1];
1692
1693                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
1694                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
1695                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
1696
1697                 /* WARNING: do no forget half pels */
1698                 src_x = clip(src_x, -16, s->width);
1699                 if (src_x == s->width)
1700                     dxy &= ~3;
1701                 src_y = clip(src_y, -16, s->height);
1702                 if (src_y == s->height)
1703                     dxy &= ~12;
1704
1705                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1706                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1707                     if(src_x<0 || src_y<0 || src_x + (motion_x&3) + 8 > s->h_edge_pos
1708                                           || src_y + (motion_y&3) + 8 > s->v_edge_pos){
1709                         emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1710                         ptr= s->edge_emu_buffer;
1711                     }
1712                 }
1713                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1714                 qpix_op[1][dxy](dest, ptr, s->linesize);
1715
1716                 mx += s->mv[dir][i][0]/2;
1717                 my += s->mv[dir][i][1]/2;
1718             }
1719         }else{
1720             for(i=0;i<4;i++) {
1721                 motion_x = s->mv[dir][i][0];
1722                 motion_y = s->mv[dir][i][1];
1723
1724                 dxy = ((motion_y & 1) << 1) | (motion_x & 1);
1725                 src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
1726                 src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
1727
1728                 /* WARNING: do no forget half pels */
1729                 src_x = clip(src_x, -16, s->width);
1730                 if (src_x == s->width)
1731                     dxy &= ~1;
1732                 src_y = clip(src_y, -16, s->height);
1733                 if (src_y == s->height)
1734                     dxy &= ~2;
1735
1736                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
1737                 if(s->flags&CODEC_FLAG_EMU_EDGE){
1738                     if(src_x<0 || src_y<0 || src_x + (motion_x&1) + 8 > s->h_edge_pos
1739                                           || src_y + (motion_y&1) + 8 > s->v_edge_pos){
1740                         emulated_edge_mc(s, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
1741                         ptr= s->edge_emu_buffer;
1742                     }
1743                 }
1744                 dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
1745                 pix_op[1][dxy](dest, ptr, s->linesize, 8);
1746
1747                 mx += s->mv[dir][i][0];
1748                 my += s->mv[dir][i][1];
1749             }
1750         }
1751
1752         if(s->flags&CODEC_FLAG_GRAY) break;
1753         /* In case of 8X8, we construct a single chroma motion vector
1754            with a special rounding */
1755         for(i=0;i<4;i++) {
1756         }
1757         if (mx >= 0)
1758             mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
1759         else {
1760             mx = -mx;
1761             mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1));
1762         }
1763         if (my >= 0)
1764             my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
1765         else {
1766             my = -my;
1767             my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1));
1768         }
1769         dxy = ((my & 1) << 1) | (mx & 1);
1770         mx >>= 1;
1771         my >>= 1;
1772
1773         src_x = mb_x * 8 + mx;
1774         src_y = mb_y * 8 + my;
1775         src_x = clip(src_x, -8, s->width/2);
1776         if (src_x == s->width/2)
1777             dxy &= ~1;
1778         src_y = clip(src_y, -8, s->height/2);
1779         if (src_y == s->height/2)
1780             dxy &= ~2;
1781
1782         offset = (src_y * (s->uvlinesize)) + src_x;
1783         ptr = ref_picture[1] + offset;
1784         if(s->flags&CODEC_FLAG_EMU_EDGE){
1785                 if(src_x<0 || src_y<0 || src_x + (dxy &1) + 8 > s->h_edge_pos>>1
1786                                       || src_y + (dxy>>1) + 8 > s->v_edge_pos>>1){
1787                     emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1788                     ptr= s->edge_emu_buffer;
1789                     emu=1;
1790                 }
1791             }
1792         pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
1793
1794         ptr = ref_picture[2] + offset;
1795         if(emu){
1796             emulated_edge_mc(s, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
1797             ptr= s->edge_emu_buffer;
1798         }
1799         pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
1800         break;
1801     case MV_TYPE_FIELD:
1802         if (s->picture_structure == PICT_FRAME) {
1803             if(s->quarter_sample){
1804                 /* top field */
1805                 qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
1806                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
1807                             1, pix_op, qpix_op,
1808                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
1809                 /* bottom field */
1810                 qpel_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
1811                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
1812                             1, pix_op, qpix_op,
1813                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
1814             }else{
1815                 /* top field */
1816                 mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
1817                             ref_picture, s->field_select[dir][0] ? s->linesize : 0,
1818                             1, pix_op,
1819                             s->mv[dir][0][0], s->mv[dir][0][1], 8);
1820                 /* bottom field */
1821                 mpeg_motion(s, dest_y, dest_cb, dest_cr, s->linesize,
1822                             ref_picture, s->field_select[dir][1] ? s->linesize : 0,
1823                             1, pix_op,
1824                             s->mv[dir][1][0], s->mv[dir][1][1], 8);
1825             }
1826         } else {
1827
1828
1829         }
1830         break;
1831     }
1832 }
1833
1834
1835 /* put block[] to dest[] */
1836 static inline void put_dct(MpegEncContext *s,
1837                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1838 {
1839     s->dct_unquantize(s, block, i, s->qscale);
1840     s->idct_put (dest, line_size, block);
1841 }
1842
1843 /* add block[] to dest[] */
1844 static inline void add_dct(MpegEncContext *s,
1845                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1846 {
1847     if (s->block_last_index[i] >= 0) {
1848         s->idct_add (dest, line_size, block);
1849     }
1850 }
1851
1852 static inline void add_dequant_dct(MpegEncContext *s,
1853                            DCTELEM *block, int i, UINT8 *dest, int line_size)
1854 {
1855     if (s->block_last_index[i] >= 0) {
1856         s->dct_unquantize(s, block, i, s->qscale);
1857
1858         s->idct_add (dest, line_size, block);
1859     }
1860 }
1861
1862 /**
1863  * cleans dc, ac, coded_block for the current non intra MB
1864  */
1865 void ff_clean_intra_table_entries(MpegEncContext *s)
1866 {
1867     int wrap = s->block_wrap[0];
1868     int xy = s->block_index[0];
1869
1870     s->dc_val[0][xy           ] =
1871     s->dc_val[0][xy + 1       ] =
1872     s->dc_val[0][xy     + wrap] =
1873     s->dc_val[0][xy + 1 + wrap] = 1024;
1874     /* ac pred */
1875     memset(s->ac_val[0][xy       ], 0, 32 * sizeof(INT16));
1876     memset(s->ac_val[0][xy + wrap], 0, 32 * sizeof(INT16));
1877     if (s->msmpeg4_version>=3) {
1878         s->coded_block[xy           ] =
1879         s->coded_block[xy + 1       ] =
1880         s->coded_block[xy     + wrap] =
1881         s->coded_block[xy + 1 + wrap] = 0;
1882     }
1883     /* chroma */
1884     wrap = s->block_wrap[4];
1885     xy = s->mb_x + 1 + (s->mb_y + 1) * wrap;
1886     s->dc_val[1][xy] =
1887     s->dc_val[2][xy] = 1024;
1888     /* ac pred */
1889     memset(s->ac_val[1][xy], 0, 16 * sizeof(INT16));
1890     memset(s->ac_val[2][xy], 0, 16 * sizeof(INT16));
1891
1892     s->mbintra_table[s->mb_x + s->mb_y*s->mb_width]= 0;
1893 }
1894
1895 /* generic function called after a macroblock has been parsed by the
1896    decoder or after it has been encoded by the encoder.
1897
1898    Important variables used:
1899    s->mb_intra : true if intra macroblock
1900    s->mv_dir   : motion vector direction
1901    s->mv_type  : motion vector type
1902    s->mv       : motion vector
1903    s->interlaced_dct : true if interlaced dct used (mpeg2)
1904  */
1905 void MPV_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
1906 {
1907     int mb_x, mb_y;
1908     const int mb_xy = s->mb_y * s->mb_width + s->mb_x;
1909
1910     mb_x = s->mb_x;
1911     mb_y = s->mb_y;
1912
1913     s->current_picture.qscale_table[mb_xy]= s->qscale;
1914
1915     /* update DC predictors for P macroblocks */
1916     if (!s->mb_intra) {
1917         if (s->h263_pred || s->h263_aic) {
1918             if(s->mbintra_table[mb_xy])
1919                 ff_clean_intra_table_entries(s);
1920         } else {
1921             s->last_dc[0] =
1922             s->last_dc[1] =
1923             s->last_dc[2] = 128 << s->intra_dc_precision;
1924         }
1925     }
1926     else if (s->h263_pred || s->h263_aic)
1927         s->mbintra_table[mb_xy]=1;
1928
1929     /* update motion predictor, not for B-frames as they need the motion_val from the last P/S-Frame */
1930     if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE) { //FIXME move into h263.c if possible, format specific stuff shouldnt be here
1931
1932         const int wrap = s->block_wrap[0];
1933         const int xy = s->block_index[0];
1934         const int mb_index= s->mb_x + s->mb_y*s->mb_width;
1935         if(s->mv_type == MV_TYPE_8X8){
1936             s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_4MV;
1937         } else {
1938             int motion_x, motion_y;
1939             if (s->mb_intra) {
1940                 motion_x = 0;
1941                 motion_y = 0;
1942                 if(s->co_located_type_table)
1943                     s->co_located_type_table[mb_index]= 0;
1944             } else if (s->mv_type == MV_TYPE_16X16) {
1945                 motion_x = s->mv[0][0][0];
1946                 motion_y = s->mv[0][0][1];
1947                 if(s->co_located_type_table)
1948                     s->co_located_type_table[mb_index]= 0;
1949             } else /*if (s->mv_type == MV_TYPE_FIELD)*/ {
1950                 int i;
1951                 motion_x = s->mv[0][0][0] + s->mv[0][1][0];
1952                 motion_y = s->mv[0][0][1] + s->mv[0][1][1];
1953                 motion_x = (motion_x>>1) | (motion_x&1);
1954                 for(i=0; i<2; i++){
1955                     s->field_mv_table[mb_index][i][0]= s->mv[0][i][0];
1956                     s->field_mv_table[mb_index][i][1]= s->mv[0][i][1];
1957                     s->field_select_table[mb_index][i]= s->field_select[0][i];
1958                 }
1959                 s->co_located_type_table[mb_index]= CO_LOCATED_TYPE_FIELDMV;
1960             }
1961             /* no update if 8X8 because it has been done during parsing */
1962             s->motion_val[xy][0] = motion_x;
1963             s->motion_val[xy][1] = motion_y;
1964             s->motion_val[xy + 1][0] = motion_x;
1965             s->motion_val[xy + 1][1] = motion_y;
1966             s->motion_val[xy + wrap][0] = motion_x;
1967             s->motion_val[xy + wrap][1] = motion_y;
1968             s->motion_val[xy + 1 + wrap][0] = motion_x;
1969             s->motion_val[xy + 1 + wrap][1] = motion_y;
1970         }
1971     }
1972
1973     if ((s->flags&CODEC_FLAG_PSNR) || !(s->encoding && (s->intra_only || s->pict_type==B_TYPE))) { //FIXME precalc
1974         UINT8 *dest_y, *dest_cb, *dest_cr;
1975         int dct_linesize, dct_offset;
1976         op_pixels_func (*op_pix)[4];
1977         qpel_mc_func (*op_qpix)[16];
1978
1979         /* avoid copy if macroblock skipped in last frame too */
1980         if (s->pict_type != B_TYPE) {
1981             s->current_picture.mbskip_table[mb_xy]= s->mb_skiped;
1982         }
1983
1984         /* skip only during decoding as we might trash the buffers during encoding a bit */
1985         if(!s->encoding){
1986             UINT8 *mbskip_ptr = &s->mbskip_table[mb_xy];
1987             const int age= s->current_picture.age;
1988
1989             assert(age);
1990
1991             if (s->mb_skiped) {
1992                 s->mb_skiped= 0;
1993                 assert(s->pict_type!=I_TYPE);
1994
1995                 (*mbskip_ptr) ++; /* indicate that this time we skiped it */
1996                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
1997
1998                 /* if previous was skipped too, then nothing to do !  */
1999                 if (*mbskip_ptr >= age){
2000 //if(s->pict_type!=B_TYPE && s->mb_x==0) printf("\n");
2001 //if(s->pict_type!=B_TYPE) printf("%d%d ", *mbskip_ptr, age);
2002                     if(s->pict_type!=B_TYPE) return;
2003                     if(s->avctx->draw_horiz_band==NULL && *mbskip_ptr > age) return;
2004                     /* we dont draw complete frames here so we cant skip */
2005                 }
2006             } else {
2007                 *mbskip_ptr = 0; /* not skipped */
2008             }
2009         }else
2010             s->mb_skiped= 0;
2011
2012         if(s->pict_type==B_TYPE && s->avctx->draw_horiz_band){
2013             dest_y  = s->current_picture.data[0] + mb_x * 16;
2014             dest_cb = s->current_picture.data[1] + mb_x * 8;
2015             dest_cr = s->current_picture.data[2] + mb_x * 8;
2016         }else{
2017             dest_y  = s->current_picture.data[0] + (mb_y * 16* s->linesize  ) + mb_x * 16;
2018             dest_cb = s->current_picture.data[1] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2019             dest_cr = s->current_picture.data[2] + (mb_y * 8 * s->uvlinesize) + mb_x * 8;
2020         }
2021
2022         if (s->interlaced_dct) {
2023             dct_linesize = s->linesize * 2;
2024             dct_offset = s->linesize;
2025         } else {
2026             dct_linesize = s->linesize;
2027             dct_offset = s->linesize * 8;
2028         }
2029
2030         if (!s->mb_intra) {
2031             /* motion handling */
2032             /* decoding or more than one mb_type (MC was allready done otherwise) */
2033             if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
2034                 if ((!s->no_rounding) || s->pict_type==B_TYPE){
2035                     op_pix = s->dsp.put_pixels_tab;
2036                     op_qpix= s->dsp.put_qpel_pixels_tab;
2037                 }else{
2038                     op_pix = s->dsp.put_no_rnd_pixels_tab;
2039                     op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2040                 }
2041
2042                 if (s->mv_dir & MV_DIR_FORWARD) {
2043                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2044                     op_pix = s->dsp.avg_pixels_tab;
2045                     op_qpix= s->dsp.avg_qpel_pixels_tab;
2046                 }
2047                 if (s->mv_dir & MV_DIR_BACKWARD) {
2048                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2049                 }
2050             }
2051
2052             /* skip dequant / idct if we are really late ;) */
2053             if(s->hurry_up>1) return;
2054
2055             /* add dct residue */
2056             if(s->encoding || !(   s->mpeg2 || s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO
2057                                 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
2058                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
2059                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2060                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2061                 add_dequant_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2062
2063                 if(!(s->flags&CODEC_FLAG_GRAY)){
2064                     add_dequant_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2065                     add_dequant_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2066                 }
2067             } else {
2068                 add_dct(s, block[0], 0, dest_y, dct_linesize);
2069                 add_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2070                 add_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2071                 add_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2072
2073                 if(!(s->flags&CODEC_FLAG_GRAY)){
2074                     add_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2075                     add_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2076                 }
2077             }
2078         } else {
2079             /* dct only in intra block */
2080             if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){
2081                 put_dct(s, block[0], 0, dest_y, dct_linesize);
2082                 put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
2083                 put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
2084                 put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
2085
2086                 if(!(s->flags&CODEC_FLAG_GRAY)){
2087                     put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
2088                     put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
2089                 }
2090             }else{
2091                 s->idct_put(dest_y                 , dct_linesize, block[0]);
2092                 s->idct_put(dest_y              + 8, dct_linesize, block[1]);
2093                 s->idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
2094                 s->idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
2095
2096                 if(!(s->flags&CODEC_FLAG_GRAY)){
2097                     s->idct_put(dest_cb, s->uvlinesize, block[4]);
2098                     s->idct_put(dest_cr, s->uvlinesize, block[5]);
2099                 }
2100             }
2101         }
2102     }
2103 }
2104
2105 static inline void dct_single_coeff_elimination(MpegEncContext *s, int n, int threshold)
2106 {
2107     static const char tab[64]=
2108         {3,2,2,1,1,1,1,1,
2109          1,1,1,1,1,1,1,1,
2110          1,1,1,1,1,1,1,1,
2111          0,0,0,0,0,0,0,0,
2112          0,0,0,0,0,0,0,0,
2113          0,0,0,0,0,0,0,0,
2114          0,0,0,0,0,0,0,0,
2115          0,0,0,0,0,0,0,0};
2116     int score=0;
2117     int run=0;
2118     int i;
2119     DCTELEM *block= s->block[n];
2120     const int last_index= s->block_last_index[n];
2121     int skip_dc;
2122
2123     if(threshold<0){
2124         skip_dc=0;
2125         threshold= -threshold;
2126     }else
2127         skip_dc=1;
2128
2129     /* are all which we could set to zero are allready zero? */
2130     if(last_index<=skip_dc - 1) return;
2131
2132     for(i=0; i<=last_index; i++){
2133         const int j = s->intra_scantable.permutated[i];
2134         const int level = ABS(block[j]);
2135         if(level==1){
2136             if(skip_dc && i==0) continue;
2137             score+= tab[run];
2138             run=0;
2139         }else if(level>1){
2140             return;
2141         }else{
2142             run++;
2143         }
2144     }
2145     if(score >= threshold) return;
2146     for(i=skip_dc; i<=last_index; i++){
2147         const int j = s->intra_scantable.permutated[i];
2148         block[j]=0;
2149     }
2150     if(block[0]) s->block_last_index[n]= 0;
2151     else         s->block_last_index[n]= -1;
2152 }
2153
2154 static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index)
2155 {
2156     int i;
2157     const int maxlevel= s->max_qcoeff;
2158     const int minlevel= s->min_qcoeff;
2159
2160     if(s->mb_intra){
2161         i=1; //skip clipping of intra dc
2162     }else
2163         i=0;
2164
2165     for(;i<=last_index; i++){
2166         const int j= s->intra_scantable.permutated[i];
2167         int level = block[j];
2168
2169         if     (level>maxlevel) level=maxlevel;
2170         else if(level<minlevel) level=minlevel;
2171         block[j]= level;
2172     }
2173 }
2174
2175 static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n)
2176 {
2177     int i;
2178
2179     if(s->mb_intra){
2180         i=1; //skip clipping of intra dc
2181          //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
2182     }else
2183         i=0;
2184
2185     for(;i<=s->block_last_index[n]; i++){
2186         const int j = s->intra_scantable.permutated[i];
2187         int level = block[j];
2188
2189         block[j]= ROUNDED_DIV(level*oldq, newq);
2190     }
2191
2192     for(i=s->block_last_index[n]; i>=0; i--){
2193         const int j = s->intra_scantable.permutated[i];
2194         if(block[j]) break;
2195     }
2196     s->block_last_index[n]= i;
2197 }
2198
2199 static inline void auto_requantize_coeffs(MpegEncContext *s, DCTELEM block[6][64])
2200 {
2201     int i,n, newq;
2202     const int maxlevel= s->max_qcoeff;
2203     const int minlevel= s->min_qcoeff;
2204     int largest=0, smallest=0;
2205
2206     assert(s->adaptive_quant);
2207
2208     for(n=0; n<6; n++){
2209         if(s->mb_intra){
2210             i=1; //skip clipping of intra dc
2211              //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...)
2212         }else
2213             i=0;
2214
2215         for(;i<=s->block_last_index[n]; i++){
2216             const int j = s->intra_scantable.permutated[i];
2217             int level = block[n][j];
2218             if(largest  < level) largest = level;
2219             if(smallest > level) smallest= level;
2220         }
2221     }
2222
2223     for(newq=s->qscale+1; newq<32; newq++){
2224         if(   ROUNDED_DIV(smallest*s->qscale, newq) >= minlevel
2225            && ROUNDED_DIV(largest *s->qscale, newq) <= maxlevel)
2226             break;
2227     }
2228
2229     if(s->out_format==FMT_H263){
2230         /* h263 like formats cannot change qscale by more than 2 easiely */
2231         if(s->avctx->qmin + 2 < newq)
2232             newq= s->avctx->qmin + 2;
2233     }
2234
2235     for(n=0; n<6; n++){
2236         requantize_coeffs(s, block[n], s->qscale, newq, n);
2237         clip_coeffs(s, block[n], s->block_last_index[n]);
2238     }
2239
2240     s->dquant+= newq - s->qscale;
2241     s->qscale= newq;
2242 }
2243 #if 0
2244 static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
2245     int score=0;
2246     int x,y;
2247
2248     for(y=0; y<7; y++){
2249         for(x=0; x<16; x+=4){
2250             score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride])
2251                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
2252         }
2253         s+= stride;
2254     }
2255
2256     return score;
2257 }
2258
2259 static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
2260     int score=0;
2261     int x,y;
2262
2263     for(y=0; y<7; y++){
2264         for(x=0; x<16; x++){
2265             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2266         }
2267         s1+= stride;
2268         s2+= stride;
2269     }
2270
2271     return score;
2272 }
2273 #else
2274 #define SQ(a) ((a)*(a))
2275
2276 static int pix_vcmp16x8(UINT8 *s, int stride){ //FIXME move to dsputil & optimize
2277     int score=0;
2278     int x,y;
2279
2280     for(y=0; y<7; y++){
2281         for(x=0; x<16; x+=4){
2282             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])
2283                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
2284         }
2285         s+= stride;
2286     }
2287
2288     return score;
2289 }
2290
2291 static int pix_diff_vcmp16x8(UINT8 *s1, UINT8*s2, int stride){ //FIXME move to dsputil & optimize
2292     int score=0;
2293     int x,y;
2294
2295     for(y=0; y<7; y++){
2296         for(x=0; x<16; x++){
2297             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2298         }
2299         s1+= stride;
2300         s2+= stride;
2301     }
2302
2303     return score;
2304 }
2305
2306 #endif
2307
2308 void ff_draw_horiz_band(MpegEncContext *s){
2309     if (    s->avctx->draw_horiz_band
2310         && (s->last_picture.data[0] || s->low_delay) ) {
2311         UINT8 *src_ptr[3];
2312         int y, h, offset;
2313         y = s->mb_y * 16;
2314         h = s->height - y;
2315         if (h > 16)
2316             h = 16;
2317
2318         if(s->pict_type==B_TYPE)
2319             offset = 0;
2320         else
2321             offset = y * s->linesize;
2322
2323         if(s->pict_type==B_TYPE || s->low_delay){
2324             src_ptr[0] = s->current_picture.data[0] + offset;
2325             src_ptr[1] = s->current_picture.data[1] + (offset >> 2);
2326             src_ptr[2] = s->current_picture.data[2] + (offset >> 2);
2327         } else {
2328             src_ptr[0] = s->last_picture.data[0] + offset;
2329             src_ptr[1] = s->last_picture.data[1] + (offset >> 2);
2330             src_ptr[2] = s->last_picture.data[2] + (offset >> 2);
2331         }
2332         emms_c();
2333
2334         s->avctx->draw_horiz_band(s->avctx, src_ptr, s->linesize,
2335                                y, s->width, h);
2336     }
2337 }
2338
2339 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2340 {
2341     const int mb_x= s->mb_x;
2342     const int mb_y= s->mb_y;
2343     int i;
2344     int skip_dct[6];
2345     int dct_offset   = s->linesize*8; //default for progressive frames
2346
2347     for(i=0; i<6; i++) skip_dct[i]=0;
2348
2349     if(s->adaptive_quant){
2350         s->dquant= s->current_picture.qscale_table[mb_x + mb_y*s->mb_width] - s->qscale;
2351
2352         if(s->out_format==FMT_H263){
2353             if     (s->dquant> 2) s->dquant= 2;
2354             else if(s->dquant<-2) s->dquant=-2;
2355         }
2356
2357         if(s->codec_id==CODEC_ID_MPEG4){
2358             if(!s->mb_intra){
2359                 assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8);
2360
2361                 if(s->mv_dir&MV_DIRECT)
2362                     s->dquant=0;
2363             }
2364         }
2365         s->qscale+= s->dquant;
2366         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2367         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2368     }
2369
2370     if (s->mb_intra) {
2371         UINT8 *ptr;
2372         int wrap_y;
2373         int emu=0;
2374
2375         wrap_y = s->linesize;
2376         ptr = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2377
2378         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2379             emulated_edge_mc(s, ptr, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2380             ptr= s->edge_emu_buffer;
2381             emu=1;
2382         }
2383
2384         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2385             int progressive_score, interlaced_score;
2386
2387             progressive_score= pix_vcmp16x8(ptr, wrap_y  ) + pix_vcmp16x8(ptr + wrap_y*8, wrap_y );
2388             interlaced_score = pix_vcmp16x8(ptr, wrap_y*2) + pix_vcmp16x8(ptr + wrap_y  , wrap_y*2);
2389
2390             if(progressive_score > interlaced_score + 100){
2391                 s->interlaced_dct=1;
2392
2393                 dct_offset= wrap_y;
2394                 wrap_y<<=1;
2395             }else
2396                 s->interlaced_dct=0;
2397         }
2398
2399         s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
2400         s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
2401         s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
2402         s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
2403
2404         if(s->flags&CODEC_FLAG_GRAY){
2405             skip_dct[4]= 1;
2406             skip_dct[5]= 1;
2407         }else{
2408             int wrap_c = s->uvlinesize;
2409             ptr = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2410             if(emu){
2411                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2412                 ptr= s->edge_emu_buffer;
2413             }
2414             s->dsp.get_pixels(s->block[4], ptr, wrap_c);
2415
2416             ptr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2417             if(emu){
2418                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2419                 ptr= s->edge_emu_buffer;
2420             }
2421             s->dsp.get_pixels(s->block[5], ptr, wrap_c);
2422         }
2423     }else{
2424         op_pixels_func (*op_pix)[4];
2425         qpel_mc_func (*op_qpix)[16];
2426         UINT8 *dest_y, *dest_cb, *dest_cr;
2427         UINT8 *ptr_y, *ptr_cb, *ptr_cr;
2428         int wrap_y, wrap_c;
2429         int emu=0;
2430
2431         dest_y  = s->current_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
2432         dest_cb = s->current_picture.data[1] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
2433         dest_cr = s->current_picture.data[2] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
2434         wrap_y = s->linesize;
2435         wrap_c = s->uvlinesize;
2436         ptr_y  = s->new_picture.data[0] + (mb_y * 16 * wrap_y) + mb_x * 16;
2437         ptr_cb = s->new_picture.data[1] + (mb_y * 8 * wrap_c) + mb_x * 8;
2438         ptr_cr = s->new_picture.data[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
2439
2440         if ((!s->no_rounding) || s->pict_type==B_TYPE){
2441             op_pix = s->dsp.put_pixels_tab;
2442             op_qpix= s->dsp.put_qpel_pixels_tab;
2443         }else{
2444             op_pix = s->dsp.put_no_rnd_pixels_tab;
2445             op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
2446         }
2447
2448         if (s->mv_dir & MV_DIR_FORWARD) {
2449             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix);
2450             op_pix = s->dsp.avg_pixels_tab;
2451             op_qpix= s->dsp.avg_qpel_pixels_tab;
2452         }
2453         if (s->mv_dir & MV_DIR_BACKWARD) {
2454             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);
2455         }
2456
2457         if(mb_x*16+16 > s->width || mb_y*16+16 > s->height){
2458             emulated_edge_mc(s, ptr_y, wrap_y, 16, 16, mb_x*16, mb_y*16, s->width, s->height);
2459             ptr_y= s->edge_emu_buffer;
2460             emu=1;
2461         }
2462
2463         if(s->flags&CODEC_FLAG_INTERLACED_DCT){
2464             int progressive_score, interlaced_score;
2465
2466             progressive_score= pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y  )
2467                              + pix_diff_vcmp16x8(ptr_y + wrap_y*8, dest_y + wrap_y*8, wrap_y  );
2468             interlaced_score = pix_diff_vcmp16x8(ptr_y           , dest_y           , wrap_y*2)
2469                              + pix_diff_vcmp16x8(ptr_y + wrap_y  , dest_y + wrap_y  , wrap_y*2);
2470
2471             if(progressive_score > interlaced_score + 600){
2472                 s->interlaced_dct=1;
2473
2474                 dct_offset= wrap_y;
2475                 wrap_y<<=1;
2476             }else
2477                 s->interlaced_dct=0;
2478         }
2479
2480         s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
2481         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
2482         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
2483         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
2484
2485         if(s->flags&CODEC_FLAG_GRAY){
2486             skip_dct[4]= 1;
2487             skip_dct[5]= 1;
2488         }else{
2489             if(emu){
2490                 emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2491                 ptr_cb= s->edge_emu_buffer;
2492             }
2493             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2494             if(emu){
2495                 emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
2496                 ptr_cr= s->edge_emu_buffer;
2497             }
2498             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2499         }
2500         /* pre quantization */
2501         if(s->current_picture.mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
2502             //FIXME optimize
2503             if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
2504             if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
2505             if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
2506             if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
2507             if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_c) < 20*s->qscale) skip_dct[4]= 1;
2508             if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_c) < 20*s->qscale) skip_dct[5]= 1;
2509 #if 0
2510 {
2511  static int stat[7];
2512  int num=0;
2513  for(i=0; i<6; i++)
2514   if(skip_dct[i]) num++;
2515  stat[num]++;
2516
2517  if(s->mb_x==0 && s->mb_y==0){
2518   for(i=0; i<7; i++){
2519    printf("%6d %1d\n", stat[i], i);
2520   }
2521  }
2522 }
2523 #endif
2524         }
2525
2526     }
2527
2528 #if 0
2529             {
2530                 float adap_parm;
2531
2532                 adap_parm = ((s->avg_mb_var << 1) + s->mb_var[s->mb_width*mb_y+mb_x] + 1.0) /
2533                             ((s->mb_var[s->mb_width*mb_y+mb_x] << 1) + s->avg_mb_var + 1.0);
2534
2535                 printf("\ntype=%c qscale=%2d adap=%0.2f dquant=%4.2f var=%4d avgvar=%4d",
2536                         (s->mb_type[s->mb_width*mb_y+mb_x] > 0) ? 'I' : 'P',
2537                         s->qscale, adap_parm, s->qscale*adap_parm,
2538                         s->mb_var[s->mb_width*mb_y+mb_x], s->avg_mb_var);
2539             }
2540 #endif
2541     /* DCT & quantize */
2542     if(s->out_format==FMT_MJPEG){
2543         for(i=0;i<6;i++) {
2544             int overflow;
2545             s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, 8, &overflow);
2546             if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2547         }
2548     }else{
2549         for(i=0;i<6;i++) {
2550             if(!skip_dct[i]){
2551                 int overflow;
2552                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2553             // FIXME we could decide to change to quantizer instead of clipping
2554             // JS: I don't think that would be a good idea it could lower quality instead
2555             //     of improve it. Just INTRADC clipping deserves changes in quantizer
2556                 if (overflow) clip_coeffs(s, s->block[i], s->block_last_index[i]);
2557             }else
2558                 s->block_last_index[i]= -1;
2559         }
2560         if(s->luma_elim_threshold && !s->mb_intra)
2561             for(i=0; i<4; i++)
2562                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2563         if(s->chroma_elim_threshold && !s->mb_intra)
2564             for(i=4; i<6; i++)
2565                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2566     }
2567
2568     if((s->flags&CODEC_FLAG_GRAY) && s->mb_intra){
2569         s->block_last_index[4]=
2570         s->block_last_index[5]= 0;
2571         s->block[4][0]=
2572         s->block[5][0]= 128;
2573     }
2574
2575 #ifdef CONFIG_ENCODERS
2576     /* huffman encode */
2577     switch(s->out_format) {
2578     case FMT_MPEG1:
2579         mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2580         break;
2581     case FMT_H263:
2582         if (s->h263_msmpeg4)
2583             msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2584         else if(s->h263_pred)
2585             mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2586         else
2587             h263_encode_mb(s, s->block, motion_x, motion_y);
2588         break;
2589     case FMT_MJPEG:
2590         mjpeg_encode_mb(s, s->block);
2591         break;
2592     }
2593 #endif
2594 }
2595
2596 void ff_copy_bits(PutBitContext *pb, UINT8 *src, int length)
2597 {
2598     int bytes= length>>4;
2599     int bits= length&15;
2600     int i;
2601
2602     if(length==0) return;
2603
2604     for(i=0; i<bytes; i++) put_bits(pb, 16, be2me_16(((uint16_t*)src)[i]));
2605     put_bits(pb, bits, be2me_16(((uint16_t*)src)[i])>>(16-bits));
2606 }
2607
2608 static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext *s, int type){
2609     int i;
2610
2611     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2612
2613     /* mpeg1 */
2614     d->mb_incr= s->mb_incr;
2615     for(i=0; i<3; i++)
2616         d->last_dc[i]= s->last_dc[i];
2617
2618     /* statistics */
2619     d->mv_bits= s->mv_bits;
2620     d->i_tex_bits= s->i_tex_bits;
2621     d->p_tex_bits= s->p_tex_bits;
2622     d->i_count= s->i_count;
2623     d->f_count= s->f_count;
2624     d->b_count= s->b_count;
2625     d->skip_count= s->skip_count;
2626     d->misc_bits= s->misc_bits;
2627     d->last_bits= 0;
2628
2629     d->mb_skiped= s->mb_skiped;
2630     d->qscale= s->qscale;
2631 }
2632
2633 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
2634     int i;
2635
2636     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2637     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
2638
2639     /* mpeg1 */
2640     d->mb_incr= s->mb_incr;
2641     for(i=0; i<3; i++)
2642         d->last_dc[i]= s->last_dc[i];
2643
2644     /* statistics */
2645     d->mv_bits= s->mv_bits;
2646     d->i_tex_bits= s->i_tex_bits;
2647     d->p_tex_bits= s->p_tex_bits;
2648     d->i_count= s->i_count;
2649     d->f_count= s->f_count;
2650     d->b_count= s->b_count;
2651     d->skip_count= s->skip_count;
2652     d->misc_bits= s->misc_bits;
2653
2654     d->mb_intra= s->mb_intra;
2655     d->mb_skiped= s->mb_skiped;
2656     d->mv_type= s->mv_type;
2657     d->mv_dir= s->mv_dir;
2658     d->pb= s->pb;
2659     if(s->data_partitioning){
2660         d->pb2= s->pb2;
2661         d->tex_pb= s->tex_pb;
2662     }
2663     d->block= s->block;
2664     for(i=0; i<6; i++)
2665         d->block_last_index[i]= s->block_last_index[i];
2666     d->interlaced_dct= s->interlaced_dct;
2667     d->qscale= s->qscale;
2668 }
2669
2670 static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
2671                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2672                            int *dmin, int *next_block, int motion_x, int motion_y)
2673 {
2674     int bits_count;
2675
2676     copy_context_before_encode(s, backup, type);
2677
2678     s->block= s->blocks[*next_block];
2679     s->pb= pb[*next_block];
2680     if(s->data_partitioning){
2681         s->pb2   = pb2   [*next_block];
2682         s->tex_pb= tex_pb[*next_block];
2683     }
2684
2685     encode_mb(s, motion_x, motion_y);
2686
2687     bits_count= get_bit_count(&s->pb);
2688     if(s->data_partitioning){
2689         bits_count+= get_bit_count(&s->pb2);
2690         bits_count+= get_bit_count(&s->tex_pb);
2691     }
2692
2693     if(bits_count<*dmin){
2694         *dmin= bits_count;
2695         *next_block^=1;
2696
2697         copy_context_after_encode(best, s, type);
2698     }
2699 }
2700
2701 static inline int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
2702     uint32_t *sq = squareTbl + 256;
2703     int acc=0;
2704     int x,y;
2705
2706     if(w==16 && h==16)
2707         return s->dsp.pix_norm(src1, src2, stride);
2708
2709     for(y=0; y<h; y++){
2710         for(x=0; x<w; x++){
2711             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2712         }
2713     }
2714     return acc;
2715 }
2716
2717 static void encode_picture(MpegEncContext *s, int picture_number)
2718 {
2719     int mb_x, mb_y, pdif = 0;
2720     int i;
2721     int bits;
2722     MpegEncContext best_s, backup_s;
2723     UINT8 bit_buf[2][3000];
2724     UINT8 bit_buf2[2][3000];
2725     UINT8 bit_buf_tex[2][3000];
2726     PutBitContext pb[2], pb2[2], tex_pb[2];
2727
2728     for(i=0; i<2; i++){
2729         init_put_bits(&pb    [i], bit_buf    [i], 3000, NULL, NULL);
2730         init_put_bits(&pb2   [i], bit_buf2   [i], 3000, NULL, NULL);
2731         init_put_bits(&tex_pb[i], bit_buf_tex[i], 3000, NULL, NULL);
2732     }
2733
2734     s->picture_number = picture_number;
2735
2736     s->block_wrap[0]=
2737     s->block_wrap[1]=
2738     s->block_wrap[2]=
2739     s->block_wrap[3]= s->mb_width*2 + 2;
2740     s->block_wrap[4]=
2741     s->block_wrap[5]= s->mb_width + 2;
2742
2743     /* Reset the average MB variance */
2744     s->current_picture.mb_var_sum = 0;
2745     s->current_picture.mc_mb_var_sum = 0;
2746
2747     /* we need to initialize some time vars before we can encode b-frames */
2748     if (s->h263_pred && !s->h263_msmpeg4)
2749         ff_set_mpeg4_time(s, s->picture_number);
2750
2751     s->scene_change_score=0;
2752
2753     s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration
2754
2755     /* Estimate motion for every MB */
2756     if(s->pict_type != I_TYPE){
2757         for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2758             s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
2759             s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
2760             s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
2761             s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
2762             for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2763                 s->mb_x = mb_x;
2764                 s->mb_y = mb_y;
2765                 s->block_index[0]+=2;
2766                 s->block_index[1]+=2;
2767                 s->block_index[2]+=2;
2768                 s->block_index[3]+=2;
2769
2770                 /* compute motion vector & mb_type and store in context */
2771                 if(s->pict_type==B_TYPE)
2772                     ff_estimate_b_frame_motion(s, mb_x, mb_y);
2773                 else
2774                     ff_estimate_p_frame_motion(s, mb_x, mb_y);
2775 //                s->mb_type[mb_y*s->mb_width + mb_x]=MB_TYPE_INTER;
2776             }
2777         }
2778     }else /* if(s->pict_type == I_TYPE) */{
2779         /* I-Frame */
2780         //FIXME do we need to zero them?
2781         memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
2782         memset(s->p_mv_table   , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
2783         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
2784
2785         if(!s->fixed_qscale){
2786             /* finding spatial complexity for I-frame rate control */
2787             for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2788                 for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2789                     int xx = mb_x * 16;
2790                     int yy = mb_y * 16;
2791                     uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
2792                     int varc;
2793                     int sum = s->dsp.pix_sum(pix, s->linesize);
2794
2795                     varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
2796
2797                     s->current_picture.mb_var [s->mb_width * mb_y + mb_x] = varc;
2798                     s->current_picture.mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
2799                     s->current_picture.mb_var_sum    += varc;
2800                 }
2801             }
2802         }
2803     }
2804     emms_c();
2805
2806     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
2807         s->pict_type= I_TYPE;
2808         memset(s->mb_type   , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
2809 //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
2810     }
2811
2812     if(s->pict_type==P_TYPE || s->pict_type==S_TYPE)
2813         s->f_code= ff_get_best_fcode(s, s->p_mv_table, MB_TYPE_INTER);
2814         ff_fix_long_p_mvs(s);
2815     if(s->pict_type==B_TYPE){
2816         s->f_code= ff_get_best_fcode(s, s->b_forw_mv_table, MB_TYPE_FORWARD);
2817         s->b_code= ff_get_best_fcode(s, s->b_back_mv_table, MB_TYPE_BACKWARD);
2818
2819         ff_fix_long_b_mvs(s, s->b_forw_mv_table, s->f_code, MB_TYPE_FORWARD);
2820         ff_fix_long_b_mvs(s, s->b_back_mv_table, s->b_code, MB_TYPE_BACKWARD);
2821         ff_fix_long_b_mvs(s, s->b_bidir_forw_mv_table, s->f_code, MB_TYPE_BIDIR);
2822         ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR);
2823     }
2824
2825     if (s->fixed_qscale)
2826         s->frame_qscale = s->current_picture.quality;
2827     else
2828         s->frame_qscale = ff_rate_estimate_qscale(s);
2829
2830     if(s->adaptive_quant){
2831         switch(s->codec_id){
2832         case CODEC_ID_MPEG4:
2833             ff_clean_mpeg4_qscales(s);
2834             break;
2835         case CODEC_ID_H263:
2836         case CODEC_ID_H263P:
2837             ff_clean_h263_qscales(s);
2838             break;
2839         }
2840
2841         s->qscale= s->current_picture.qscale_table[0];
2842     }else
2843         s->qscale= (int)(s->frame_qscale + 0.5);
2844
2845     if (s->out_format == FMT_MJPEG) {
2846         /* for mjpeg, we do include qscale in the matrix */
2847         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
2848         for(i=1;i<64;i++){
2849             int j= s->idct_permutation[i];
2850
2851             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
2852         }
2853         convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
2854                        s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8);
2855     }
2856
2857     //FIXME var duplication
2858     s->current_picture.key_frame= s->pict_type == I_TYPE;
2859     s->current_picture.pict_type= s->pict_type;
2860
2861     if(s->current_picture.key_frame)
2862         s->picture_in_gop_number=0;
2863
2864     s->last_bits= get_bit_count(&s->pb);
2865     switch(s->out_format) {
2866     case FMT_MJPEG:
2867         mjpeg_picture_header(s);
2868         break;
2869     case FMT_H263:
2870         if (s->h263_msmpeg4)
2871             msmpeg4_encode_picture_header(s, picture_number);
2872         else if (s->h263_pred)
2873             mpeg4_encode_picture_header(s, picture_number);
2874         else if (s->h263_rv10)
2875             rv10_encode_picture_header(s, picture_number);
2876         else
2877             h263_encode_picture_header(s, picture_number);
2878         break;
2879     case FMT_MPEG1:
2880         mpeg1_encode_picture_header(s, picture_number);
2881         break;
2882     }
2883     bits= get_bit_count(&s->pb);
2884     s->header_bits= bits - s->last_bits;
2885     s->last_bits= bits;
2886     s->mv_bits=0;
2887     s->misc_bits=0;
2888     s->i_tex_bits=0;
2889     s->p_tex_bits=0;
2890     s->i_count=0;
2891     s->f_count=0;
2892     s->b_count=0;
2893     s->skip_count=0;
2894
2895     for(i=0; i<3; i++){
2896         /* init last dc values */
2897         /* note: quant matrix value (8) is implied here */
2898         s->last_dc[i] = 128;
2899
2900         s->current_picture.error[i] = 0;
2901     }
2902     s->mb_incr = 1;
2903     s->last_mv[0][0][0] = 0;
2904     s->last_mv[0][0][1] = 0;
2905
2906     if (s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P)
2907         s->gob_index = ff_h263_get_gob_height(s);
2908
2909     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
2910         ff_mpeg4_init_partitions(s);
2911
2912     s->resync_mb_x=0;
2913     s->resync_mb_y=0;
2914     s->first_slice_line = 1;
2915     s->ptr_lastgob = s->pb.buf;
2916     s->ptr_last_mb_line = s->pb.buf;
2917     for(mb_y=0; mb_y < s->mb_height; mb_y++) {
2918         s->y_dc_scale= s->y_dc_scale_table[ s->qscale ];
2919         s->c_dc_scale= s->c_dc_scale_table[ s->qscale ];
2920
2921         s->block_index[0]= s->block_wrap[0]*(mb_y*2 + 1) - 1;
2922         s->block_index[1]= s->block_wrap[0]*(mb_y*2 + 1);
2923         s->block_index[2]= s->block_wrap[0]*(mb_y*2 + 2) - 1;
2924         s->block_index[3]= s->block_wrap[0]*(mb_y*2 + 2);
2925         s->block_index[4]= s->block_wrap[4]*(mb_y + 1)                    + s->block_wrap[0]*(s->mb_height*2 + 2);
2926         s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2);
2927         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2928             const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x];
2929             const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1;
2930 //            int d;
2931             int dmin=10000000;
2932
2933             s->mb_x = mb_x;
2934             s->mb_y = mb_y;
2935             s->block_index[0]+=2;
2936             s->block_index[1]+=2;
2937             s->block_index[2]+=2;
2938             s->block_index[3]+=2;
2939             s->block_index[4]++;
2940             s->block_index[5]++;
2941
2942             /* write gob / video packet header  */
2943             if(s->rtp_mode){
2944                 int current_packet_size, is_gob_start;
2945
2946                 current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
2947                 is_gob_start=0;
2948
2949                 if(s->codec_id==CODEC_ID_MPEG4){
2950                     if(current_packet_size + s->mb_line_avgsize/s->mb_width >= s->rtp_payload_size
2951                        && s->mb_y + s->mb_x>0){
2952
2953                         if(s->partitioned_frame){
2954                             ff_mpeg4_merge_partitions(s);
2955                             ff_mpeg4_init_partitions(s);
2956                         }
2957                         ff_mpeg4_encode_video_packet_header(s);
2958
2959                         if(s->flags&CODEC_FLAG_PASS1){
2960                             int bits= get_bit_count(&s->pb);
2961                             s->misc_bits+= bits - s->last_bits;
2962                             s->last_bits= bits;
2963                         }
2964                         ff_mpeg4_clean_buffers(s);
2965                         is_gob_start=1;
2966                     }
2967                 }else{
2968                     if(current_packet_size + s->mb_line_avgsize*s->gob_index >= s->rtp_payload_size
2969                        && s->mb_x==0 && s->mb_y>0 && s->mb_y%s->gob_index==0){
2970
2971                         h263_encode_gob_header(s, mb_y);
2972                         is_gob_start=1;
2973                     }
2974                 }
2975
2976                 if(is_gob_start){
2977                     s->ptr_lastgob = pbBufPtr(&s->pb);
2978                     s->first_slice_line=1;
2979                     s->resync_mb_x=mb_x;
2980                     s->resync_mb_y=mb_y;
2981                 }
2982             }
2983
2984             if(  (s->resync_mb_x   == s->mb_x)
2985                && s->resync_mb_y+1 == s->mb_y){
2986                 s->first_slice_line=0;
2987             }
2988
2989             if(mb_type & (mb_type-1)){ // more than 1 MB type possible
2990                 int next_block=0;
2991                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
2992
2993                 copy_context_before_encode(&backup_s, s, -1);
2994                 backup_s.pb= s->pb;
2995                 best_s.data_partitioning= s->data_partitioning;
2996                 best_s.partitioned_frame= s->partitioned_frame;
2997                 if(s->data_partitioning){
2998                     backup_s.pb2= s->pb2;
2999                     backup_s.tex_pb= s->tex_pb;
3000                 }
3001
3002                 if(mb_type&MB_TYPE_INTER){
3003                     s->mv_dir = MV_DIR_FORWARD;
3004                     s->mv_type = MV_TYPE_16X16;
3005                     s->mb_intra= 0;
3006                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3007                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3008                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER, pb, pb2, tex_pb,
3009                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3010                 }
3011                 if(mb_type&MB_TYPE_INTER4V){
3012                     s->mv_dir = MV_DIR_FORWARD;
3013                     s->mv_type = MV_TYPE_8X8;
3014                     s->mb_intra= 0;
3015                     for(i=0; i<4; i++){
3016                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3017                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3018                     }
3019                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTER4V, pb, pb2, tex_pb,
3020                                  &dmin, &next_block, 0, 0);
3021                 }
3022                 if(mb_type&MB_TYPE_FORWARD){
3023                     s->mv_dir = MV_DIR_FORWARD;
3024                     s->mv_type = MV_TYPE_16X16;
3025                     s->mb_intra= 0;
3026                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3027                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3028                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_FORWARD, pb, pb2, tex_pb,
3029                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3030                 }
3031                 if(mb_type&MB_TYPE_BACKWARD){
3032                     s->mv_dir = MV_DIR_BACKWARD;
3033                     s->mv_type = MV_TYPE_16X16;
3034                     s->mb_intra= 0;
3035                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3036                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3037                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BACKWARD, pb, pb2, tex_pb,
3038                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3039                 }
3040                 if(mb_type&MB_TYPE_BIDIR){
3041                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3042                     s->mv_type = MV_TYPE_16X16;
3043                     s->mb_intra= 0;
3044                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3045                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3046                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3047                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3048                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_BIDIR, pb, pb2, tex_pb,
3049                                  &dmin, &next_block, 0, 0);
3050                 }
3051                 if(mb_type&MB_TYPE_DIRECT){
3052                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3053                     s->mv_type = MV_TYPE_16X16; //FIXME
3054                     s->mb_intra= 0;
3055                     s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
3056                     s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
3057                     s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
3058                     s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
3059                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_DIRECT, pb, pb2, tex_pb,
3060                                  &dmin, &next_block, s->b_direct_mv_table[xy][0], s->b_direct_mv_table[xy][1]);
3061                 }
3062                 if(mb_type&MB_TYPE_INTRA){
3063                     s->mv_dir = MV_DIR_FORWARD;
3064                     s->mv_type = MV_TYPE_16X16;
3065                     s->mb_intra= 1;
3066                     s->mv[0][0][0] = 0;
3067                     s->mv[0][0][1] = 0;
3068                     encode_mb_hq(s, &backup_s, &best_s, MB_TYPE_INTRA, pb, pb2, tex_pb,
3069                                  &dmin, &next_block, 0, 0);
3070                     /* force cleaning of ac/dc pred stuff if needed ... */
3071                     if(s->h263_pred || s->h263_aic)
3072                         s->mbintra_table[mb_x + mb_y*s->mb_width]=1;
3073                 }
3074                 copy_context_after_encode(s, &best_s, -1);
3075
3076                 pb_bits_count= get_bit_count(&s->pb);
3077                 flush_put_bits(&s->pb);
3078                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3079                 s->pb= backup_s.pb;
3080
3081                 if(s->data_partitioning){
3082                     pb2_bits_count= get_bit_count(&s->pb2);
3083                     flush_put_bits(&s->pb2);
3084                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3085                     s->pb2= backup_s.pb2;
3086
3087                     tex_pb_bits_count= get_bit_count(&s->tex_pb);
3088                     flush_put_bits(&s->tex_pb);
3089                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3090                     s->tex_pb= backup_s.tex_pb;
3091                 }
3092                 s->last_bits= get_bit_count(&s->pb);
3093             } else {
3094                 int motion_x, motion_y;
3095                 s->mv_type=MV_TYPE_16X16;
3096                 // only one MB-Type possible
3097                 switch(mb_type){
3098                 case MB_TYPE_INTRA:
3099                     s->mv_dir = MV_DIR_FORWARD;
3100                     s->mb_intra= 1;
3101                     motion_x= s->mv[0][0][0] = 0;
3102                     motion_y= s->mv[0][0][1] = 0;
3103                     break;
3104                 case MB_TYPE_INTER:
3105                     s->mv_dir = MV_DIR_FORWARD;
3106                     s->mb_intra= 0;
3107                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3108                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3109                     break;
3110                 case MB_TYPE_INTER4V:
3111                     s->mv_dir = MV_DIR_FORWARD;
3112                     s->mv_type = MV_TYPE_8X8;
3113                     s->mb_intra= 0;
3114                     for(i=0; i<4; i++){
3115                         s->mv[0][i][0] = s->motion_val[s->block_index[i]][0];
3116                         s->mv[0][i][1] = s->motion_val[s->block_index[i]][1];
3117                     }
3118                     motion_x= motion_y= 0;
3119                     break;
3120                 case MB_TYPE_DIRECT:
3121                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3122                     s->mb_intra= 0;
3123                     motion_x=s->b_direct_mv_table[xy][0];
3124                     motion_y=s->b_direct_mv_table[xy][1];
3125                     s->mv[0][0][0] = s->b_direct_forw_mv_table[xy][0];
3126                     s->mv[0][0][1] = s->b_direct_forw_mv_table[xy][1];
3127                     s->mv[1][0][0] = s->b_direct_back_mv_table[xy][0];
3128                     s->mv[1][0][1] = s->b_direct_back_mv_table[xy][1];
3129                     break;
3130                 case MB_TYPE_BIDIR:
3131                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3132                     s->mb_intra= 0;
3133                     motion_x=0;
3134                     motion_y=0;
3135                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3136                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3137                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3138                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3139                     break;
3140                 case MB_TYPE_BACKWARD:
3141                     s->mv_dir = MV_DIR_BACKWARD;
3142                     s->mb_intra= 0;
3143                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3144                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3145                     break;
3146                 case MB_TYPE_FORWARD:
3147                     s->mv_dir = MV_DIR_FORWARD;
3148                     s->mb_intra= 0;
3149                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3150                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3151 //                    printf(" %d %d ", motion_x, motion_y);
3152                     break;
3153                 default:
3154                     motion_x=motion_y=0; //gcc warning fix
3155                     printf("illegal MB type\n");
3156                 }
3157                 encode_mb(s, motion_x, motion_y);
3158             }
3159             /* clean the MV table in IPS frames for direct mode in B frames */
3160             if(s->mb_intra /* && I,P,S_TYPE */){
3161                 s->p_mv_table[xy][0]=0;
3162                 s->p_mv_table[xy][1]=0;
3163             }
3164
3165             MPV_decode_mb(s, s->block);
3166
3167             if(s->flags&CODEC_FLAG_PSNR){
3168                 int w= 16;
3169                 int h= 16;
3170
3171                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3172                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3173
3174                 s->current_picture.error[0] += sse(
3175                     s,
3176                     s->new_picture    .data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3177                     s->current_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3178                     w, h, s->linesize);
3179                 s->current_picture.error[1] += sse(
3180                     s,
3181                     s->new_picture    .data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3182                     s->current_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3183                     w>>1, h>>1, s->uvlinesize);
3184                 s->current_picture.error[2] += sse(
3185                     s,
3186                     s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3187                     s->current_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
3188                     w>>1, h>>1, s->uvlinesize);
3189             }
3190 //printf("MB %d %d bits\n", s->mb_x+s->mb_y*s->mb_width, get_bit_count(&s->pb));
3191         }
3192
3193
3194         /* Obtain average mb_row size for RTP */
3195         if (s->rtp_mode) {
3196             if (mb_y==0)
3197                 s->mb_line_avgsize = pbBufPtr(&s->pb) - s->ptr_last_mb_line;
3198             else {
3199                 s->mb_line_avgsize = (s->mb_line_avgsize + pbBufPtr(&s->pb) - s->ptr_last_mb_line) >> 1;
3200             }
3201             s->ptr_last_mb_line = pbBufPtr(&s->pb);
3202         }
3203     }
3204     emms_c();
3205
3206     if(s->codec_id==CODEC_ID_MPEG4 && s->partitioned_frame)
3207         ff_mpeg4_merge_partitions(s);
3208
3209     if (s->msmpeg4_version && s->msmpeg4_version<4 && s->pict_type == I_TYPE)
3210         msmpeg4_encode_ext_header(s);
3211
3212     if(s->codec_id==CODEC_ID_MPEG4)
3213         ff_mpeg4_stuffing(&s->pb);
3214
3215     //if (s->gob_number)
3216     //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
3217
3218     /* Send the last GOB if RTP */
3219     if (s->rtp_mode) {
3220         flush_put_bits(&s->pb);
3221         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
3222         /* Call the RTP callback to send the last GOB */
3223         if (s->rtp_callback)
3224             s->rtp_callback(s->ptr_lastgob, pdif, s->gob_number);
3225         s->ptr_lastgob = pbBufPtr(&s->pb);
3226         //fprintf(stderr,"\nGOB: %2d size: %d (last)", s->gob_number, pdif);
3227     }
3228 }
3229
3230 static int dct_quantize_c(MpegEncContext *s,
3231                         DCTELEM *block, int n,
3232                         int qscale, int *overflow)
3233 {
3234     int i, j, level, last_non_zero, q;
3235     const int *qmat;
3236     const UINT8 *scantable= s->intra_scantable.scantable;
3237     int bias;
3238     int max=0;
3239     unsigned int threshold1, threshold2;
3240
3241     s->fdct (block);
3242
3243     if (s->mb_intra) {
3244         if (!s->h263_aic) {
3245             if (n < 4)
3246                 q = s->y_dc_scale;
3247             else
3248                 q = s->c_dc_scale;
3249             q = q << 3;
3250         } else
3251             /* For AIC we skip quant/dequant of INTRADC */
3252             q = 1 << 3;
3253
3254         /* note: block[0] is assumed to be positive */
3255         block[0] = (block[0] + (q >> 1)) / q;
3256         i = 1;
3257         last_non_zero = 0;
3258         qmat = s->q_intra_matrix[qscale];
3259         bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3260     } else {
3261         i = 0;
3262         last_non_zero = -1;
3263         qmat = s->q_inter_matrix[qscale];
3264         bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT);
3265     }
3266     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3267     threshold2= (threshold1<<1);
3268
3269     for(;i<64;i++) {
3270         j = scantable[i];
3271         level = block[j];
3272         level = level * qmat[j];
3273
3274 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
3275 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
3276         if(((unsigned)(level+threshold1))>threshold2){
3277             if(level>0){
3278                 level= (bias + level)>>QMAT_SHIFT;
3279                 block[j]= level;
3280             }else{
3281                 level= (bias - level)>>QMAT_SHIFT;
3282                 block[j]= -level;
3283             }
3284             max |=level;
3285             last_non_zero = i;
3286         }else{
3287             block[j]=0;
3288         }
3289     }
3290     *overflow= s->max_qcoeff < max; //overflow might have happend
3291
3292     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
3293     if (s->idct_permutation_type != FF_NO_IDCT_PERM)
3294         ff_block_permute(block, s->idct_permutation, scantable, last_non_zero);
3295
3296     return last_non_zero;
3297 }
3298
3299 static void dct_unquantize_mpeg1_c(MpegEncContext *s,
3300                                    DCTELEM *block, int n, int qscale)
3301 {
3302     int i, level, nCoeffs;
3303     const UINT16 *quant_matrix;
3304
3305     nCoeffs= s->block_last_index[n];
3306
3307     if (s->mb_intra) {
3308         if (n < 4)
3309             block[0] = block[0] * s->y_dc_scale;
3310         else
3311             block[0] = block[0] * s->c_dc_scale;
3312         /* XXX: only mpeg1 */
3313         quant_matrix = s->intra_matrix;
3314         for(i=1;i<=nCoeffs;i++) {
3315             int j= s->intra_scantable.permutated[i];
3316             level = block[j];
3317             if (level) {
3318                 if (level < 0) {
3319                     level = -level;
3320                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3321                     level = (level - 1) | 1;
3322                     level = -level;
3323                 } else {
3324                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3325                     level = (level - 1) | 1;
3326                 }
3327 #ifdef PARANOID
3328                 if (level < -2048 || level > 2047)
3329                     fprintf(stderr, "unquant error %d %d\n", i, level);
3330 #endif
3331                 block[j] = level;
3332             }
3333         }
3334     } else {
3335         i = 0;
3336         quant_matrix = s->inter_matrix;
3337         for(;i<=nCoeffs;i++) {
3338             int j= s->intra_scantable.permutated[i];
3339             level = block[j];
3340             if (level) {
3341                 if (level < 0) {
3342                     level = -level;
3343                     level = (((level << 1) + 1) * qscale *
3344                              ((int) (quant_matrix[j]))) >> 4;
3345                     level = (level - 1) | 1;
3346                     level = -level;
3347                 } else {
3348                     level = (((level << 1) + 1) * qscale *
3349                              ((int) (quant_matrix[j]))) >> 4;
3350                     level = (level - 1) | 1;
3351                 }
3352 #ifdef PARANOID
3353                 if (level < -2048 || level > 2047)
3354                     fprintf(stderr, "unquant error %d %d\n", i, level);
3355 #endif
3356                 block[j] = level;
3357             }
3358         }
3359     }
3360 }
3361
3362 static void dct_unquantize_mpeg2_c(MpegEncContext *s,
3363                                    DCTELEM *block, int n, int qscale)
3364 {
3365     int i, level, nCoeffs;
3366     const UINT16 *quant_matrix;
3367
3368     if(s->alternate_scan) nCoeffs= 63;
3369     else nCoeffs= s->block_last_index[n];
3370
3371     if (s->mb_intra) {
3372         if (n < 4)
3373             block[0] = block[0] * s->y_dc_scale;
3374         else
3375             block[0] = block[0] * s->c_dc_scale;
3376         quant_matrix = s->intra_matrix;
3377         for(i=1;i<=nCoeffs;i++) {
3378             int j= s->intra_scantable.permutated[i];
3379             level = block[j];
3380             if (level) {
3381                 if (level < 0) {
3382                     level = -level;
3383                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3384                     level = -level;
3385                 } else {
3386                     level = (int)(level * qscale * quant_matrix[j]) >> 3;
3387                 }
3388 #ifdef PARANOID
3389                 if (level < -2048 || level > 2047)
3390                     fprintf(stderr, "unquant error %d %d\n", i, level);
3391 #endif
3392                 block[j] = level;
3393             }
3394         }
3395     } else {
3396         int sum=-1;
3397         i = 0;
3398         quant_matrix = s->inter_matrix;
3399         for(;i<=nCoeffs;i++) {
3400             int j= s->intra_scantable.permutated[i];
3401             level = block[j];
3402             if (level) {
3403                 if (level < 0) {
3404                     level = -level;
3405                     level = (((level << 1) + 1) * qscale *
3406                              ((int) (quant_matrix[j]))) >> 4;
3407                     level = -level;
3408                 } else {
3409                     level = (((level << 1) + 1) * qscale *
3410                              ((int) (quant_matrix[j]))) >> 4;
3411                 }
3412 #ifdef PARANOID
3413                 if (level < -2048 || level > 2047)
3414                     fprintf(stderr, "unquant error %d %d\n", i, level);
3415 #endif
3416                 block[j] = level;
3417                 sum+=level;
3418             }
3419         }
3420         block[63]^=sum&1;
3421     }
3422 }
3423
3424
3425 static void dct_unquantize_h263_c(MpegEncContext *s,
3426                                   DCTELEM *block, int n, int qscale)
3427 {
3428     int i, level, qmul, qadd;
3429     int nCoeffs;
3430
3431     assert(s->block_last_index[n]>=0);
3432
3433     qadd = (qscale - 1) | 1;
3434     qmul = qscale << 1;
3435
3436     if (s->mb_intra) {
3437         if (!s->h263_aic) {
3438             if (n < 4)
3439                 block[0] = block[0] * s->y_dc_scale;
3440             else
3441                 block[0] = block[0] * s->c_dc_scale;
3442         }else
3443             qadd = 0;
3444         i = 1;
3445         nCoeffs= 63; //does not allways use zigzag table
3446     } else {
3447         i = 0;
3448         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
3449     }
3450
3451     for(;i<=nCoeffs;i++) {
3452         level = block[i];
3453         if (level) {
3454             if (level < 0) {
3455                 level = level * qmul - qadd;
3456             } else {
3457                 level = level * qmul + qadd;
3458             }
3459 #ifdef PARANOID
3460                 if (level < -2048 || level > 2047)
3461                     fprintf(stderr, "unquant error %d %d\n", i, level);
3462 #endif
3463             block[i] = level;
3464         }
3465     }
3466 }
3467
3468 char ff_get_pict_type_char(int pict_type){
3469     switch(pict_type){
3470     case I_TYPE: return 'I';
3471     case P_TYPE: return 'P';
3472     case B_TYPE: return 'B';
3473     case S_TYPE: return 'S';
3474     }
3475 }
3476
3477 AVCodec mpeg1video_encoder = {
3478     "mpeg1video",
3479     CODEC_TYPE_VIDEO,
3480     CODEC_ID_MPEG1VIDEO,
3481     sizeof(MpegEncContext),
3482     MPV_encode_init,
3483     MPV_encode_picture,
3484     MPV_encode_end,
3485 };
3486
3487 AVCodec h263_encoder = {
3488     "h263",
3489     CODEC_TYPE_VIDEO,
3490     CODEC_ID_H263,
3491     sizeof(MpegEncContext),
3492     MPV_encode_init,
3493     MPV_encode_picture,
3494     MPV_encode_end,
3495 };
3496
3497 AVCodec h263p_encoder = {
3498     "h263p",
3499     CODEC_TYPE_VIDEO,
3500     CODEC_ID_H263P,
3501     sizeof(MpegEncContext),
3502     MPV_encode_init,
3503     MPV_encode_picture,
3504     MPV_encode_end,
3505 };
3506
3507 AVCodec rv10_encoder = {
3508     "rv10",
3509     CODEC_TYPE_VIDEO,
3510     CODEC_ID_RV10,
3511     sizeof(MpegEncContext),
3512     MPV_encode_init,
3513     MPV_encode_picture,
3514     MPV_encode_end,
3515 };
3516
3517 AVCodec mjpeg_encoder = {
3518     "mjpeg",
3519     CODEC_TYPE_VIDEO,
3520     CODEC_ID_MJPEG,
3521     sizeof(MpegEncContext),
3522     MPV_encode_init,
3523     MPV_encode_picture,
3524     MPV_encode_end,
3525 };
3526
3527 AVCodec mpeg4_encoder = {
3528     "mpeg4",
3529     CODEC_TYPE_VIDEO,
3530     CODEC_ID_MPEG4,
3531     sizeof(MpegEncContext),
3532     MPV_encode_init,
3533     MPV_encode_picture,
3534     MPV_encode_end,
3535 };
3536
3537 AVCodec msmpeg4v1_encoder = {
3538     "msmpeg4v1",
3539     CODEC_TYPE_VIDEO,
3540     CODEC_ID_MSMPEG4V1,
3541     sizeof(MpegEncContext),
3542     MPV_encode_init,
3543     MPV_encode_picture,
3544     MPV_encode_end,
3545 };
3546
3547 AVCodec msmpeg4v2_encoder = {
3548     "msmpeg4v2",
3549     CODEC_TYPE_VIDEO,
3550     CODEC_ID_MSMPEG4V2,
3551     sizeof(MpegEncContext),
3552     MPV_encode_init,
3553     MPV_encode_picture,
3554     MPV_encode_end,
3555 };
3556
3557 AVCodec msmpeg4v3_encoder = {
3558     "msmpeg4",
3559     CODEC_TYPE_VIDEO,
3560     CODEC_ID_MSMPEG4V3,
3561     sizeof(MpegEncContext),
3562     MPV_encode_init,
3563     MPV_encode_picture,
3564     MPV_encode_end,
3565 };
3566
3567 AVCodec wmv1_encoder = {
3568     "wmv1",
3569     CODEC_TYPE_VIDEO,
3570     CODEC_ID_WMV1,
3571     sizeof(MpegEncContext),
3572     MPV_encode_init,
3573     MPV_encode_picture,
3574     MPV_encode_end,
3575 };
3576
3577 AVCodec wmv2_encoder = {
3578     "wmv2",
3579     CODEC_TYPE_VIDEO,
3580     CODEC_ID_WMV2,
3581     sizeof(MpegEncContext),
3582     MPV_encode_init,
3583     MPV_encode_picture,
3584     MPV_encode_end,
3585 };