libavcodec/mpegvideo_enc.c

   1 /*
   2  * The simplest mpeg encoder (well, it was the simplest!)
   3  * Copyright (c) 2000,2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * 4MV & hq & B-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /*
  26  * non linear quantizers with large QPs and VBV with restrictive qmin fixes sponsored by NOA GmbH
  27  */
  28
  29 /**
  30  * @file
  31  * The simplest mpeg encoder (well, it was the simplest!).
  32  */
  33
  34 #include "config_components.h"
  35
  36 #include <stdint.h>
  37
  38 #include "libavutil/emms.h"
  39 #include "libavutil/internal.h"
  40 #include "libavutil/intmath.h"
  41 #include "libavutil/mathematics.h"
  42 #include "libavutil/mem.h"
  43 #include "libavutil/mem_internal.h"
  44 #include "libavutil/opt.h"
  45 #include "libavutil/thread.h"
  46 #include "avcodec.h"
  47 #include "encode.h"
  48 #include "idctdsp.h"
  49 #include "mpeg12codecs.h"
  50 #include "mpeg12data.h"
  51 #include "mpeg12enc.h"
  52 #include "mpegvideo.h"
  53 #include "mpegvideodata.h"
  54 #include "mpegvideoenc.h"
  55 #include "h261enc.h"
  56 #include "h263.h"
  57 #include "h263data.h"
  58 #include "h263enc.h"
  59 #include "mjpegenc_common.h"
  60 #include "mathops.h"
  61 #include "mpegutils.h"
  62 #include "mjpegenc.h"
  63 #include "speedhqenc.h"
  64 #include "msmpeg4enc.h"
  65 #include "pixblockdsp.h"
  66 #include "qpeldsp.h"
  67 #include "faandct.h"
  68 #include "aandcttab.h"
  69 #include "flvenc.h"
  70 #include "mpeg4video.h"
  71 #include "mpeg4videodata.h"
  72 #include "mpeg4videoenc.h"
  73 #include "internal.h"
  74 #include "bytestream.h"
  75 #include "wmv2enc.h"
  76 #include "rv10enc.h"
  77 #include "packet_internal.h"
  78 #include "libavutil/refstruct.h"
  79 #include <limits.h>
  80 #include "sp5x.h"
  81
  82 #define QUANT_BIAS_SHIFT 8
  83
  84 #define QMAT_SHIFT_MMX 16
  85 #define QMAT_SHIFT 21
  86
  87 static int encode_picture(MpegEncContext *s, const AVPacket *pkt);
  88 static int dct_quantize_refine(MpegEncContext *s, int16_t *block, int16_t *weight, int16_t *orig, int n, int qscale);
  89 static int sse_mb(MpegEncContext *s);
  90 static void denoise_dct_c(MpegEncContext *s, int16_t *block);
  91 static int dct_quantize_c(MpegEncContext *s,
  92                           int16_t *block, int n,
  93                           int qscale, int *overflow);
  94 static int dct_quantize_trellis_c(MpegEncContext *s, int16_t *block, int n, int qscale, int *overflow);
  95
  96 static uint8_t default_mv_penalty[MAX_FCODE + 1][MAX_DMV * 2 + 1];
  97 static uint8_t default_fcode_tab[MAX_MV * 2 + 1];
  98
  99 static const AVOption mpv_generic_options[] = {
 100     FF_MPV_COMMON_OPTS
 101     FF_MPV_COMMON_MOTION_EST_OPTS
 102     { NULL },
 103 };
 104
 105 const AVClass ff_mpv_enc_class = {
 106     .class_name = "generic mpegvideo encoder",
 107     .item_name  = av_default_item_name,
 108     .option     = mpv_generic_options,
 109     .version    = LIBAVUTIL_VERSION_INT,
 110 };
 111
 112 void ff_convert_matrix(MpegEncContext *s, int (*qmat)[64],
 113                        uint16_t (*qmat16)[2][64],
 114                        const uint16_t *quant_matrix,
 115                        int bias, int qmin, int qmax, int intra)
 116 {
 117     FDCTDSPContext *fdsp = &s->fdsp;
 118     int qscale;
 119     int shift = 0;
 120
 121     for (qscale = qmin; qscale <= qmax; qscale++) {
 122         int i;
 123         int qscale2;
 124
 125         if (s->q_scale_type) qscale2 = ff_mpeg2_non_linear_qscale[qscale];
 126         else                 qscale2 = qscale << 1;
 127
 128         if (fdsp->fdct == ff_jpeg_fdct_islow_8  ||
 129 #if CONFIG_FAANDCT
 130             fdsp->fdct == ff_faandct            ||
 131 #endif /* CONFIG_FAANDCT */
 132             fdsp->fdct == ff_jpeg_fdct_islow_10) {
 133             for (i = 0; i < 64; i++) {
 134                 const int j = s->idsp.idct_permutation[i];
 135                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
 136                 /* 1 * 1 <= qscale2 * quant_matrix[j] <= 112 * 255
 137                  * Assume x = qscale2 * quant_matrix[j]
 138                  *                 1 <=              x  <= 28560
 139                  *     (1 << 22) / 1 >= (1 << 22) / (x) >= (1 << 22) / 28560
 140                  *           4194304 >= (1 << 22) / (x) >= 146 */
 141
 142                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
 143             }
 144         } else if (fdsp->fdct == ff_fdct_ifast) {
 145             for (i = 0; i < 64; i++) {
 146                 const int j = s->idsp.idct_permutation[i];
 147                 int64_t den = ff_aanscales[i] * (int64_t) qscale2 * quant_matrix[j];
 148                 /* 1247 * 1 * 1 <= ff_aanscales[i] * qscale2 * quant_matrix[j] <= 31521 * 112 * 255
 149                  * Assume x = ff_aanscales[i] * qscale2 * quant_matrix[j]
 150                  *              1247 <=              x  <= 900239760
 151                  *  (1 << 36) / 1247 >= (1 << 36) / (x) >= (1 << 36) / 900239760
 152                  *          55107840 >= (1 << 36) / (x) >= 76 */
 153
 154                 qmat[qscale][i] = (int)((UINT64_C(2) << (QMAT_SHIFT + 14)) / den);
 155             }
 156         } else {
 157             for (i = 0; i < 64; i++) {
 158                 const int j = s->idsp.idct_permutation[i];
 159                 int64_t den = (int64_t) qscale2 * quant_matrix[j];
 160                 /* 1 * 1 <= qscale2 * quant_matrix[j] <= 112 * 255
 161                  * Assume x = qscale2 * quant_matrix[j]
 162                  *                 1 <=              x  <= 28560
 163                  *     (1 << 22) / 1 >= (1 << 22) / (x) >= (1 << 22) / 28560
 164                  *           4194304 >= (1 << 22) / (x) >= 146
 165                  *
 166                  *                 1 <=              x  <= 28560
 167                  *     (1 << 17) / 1 >= (1 << 17) / (x) >= (1 << 17) / 28560
 168                  *            131072 >= (1 << 17) / (x) >= 4 */
 169
 170                 qmat[qscale][i] = (int)((UINT64_C(2) << QMAT_SHIFT) / den);
 171                 qmat16[qscale][0][i] = (2 << QMAT_SHIFT_MMX) / den;
 172
 173                 if (qmat16[qscale][0][i] == 0 ||
 174                     qmat16[qscale][0][i] == 128 * 256)
 175                     qmat16[qscale][0][i] = 128 * 256 - 1;
 176                 qmat16[qscale][1][i] =
 177                     ROUNDED_DIV(bias * (1<<(16 - QUANT_BIAS_SHIFT)),
 178                                 qmat16[qscale][0][i]);
 179             }
 180         }
 181
 182         for (i = intra; i < 64; i++) {
 183             int64_t max = 8191;
 184             if (fdsp->fdct == ff_fdct_ifast) {
 185                 max = (8191LL * ff_aanscales[i]) >> 14;
 186             }
 187             while (((max * qmat[qscale][i]) >> shift) > INT_MAX) {
 188                 shift++;
 189             }
 190         }
 191     }
 192     if (shift) {
 193         av_log(s->avctx, AV_LOG_INFO,
 194                "Warning, QMAT_SHIFT is larger than %d, overflows possible\n",
 195                QMAT_SHIFT - shift);
 196     }
 197 }
 198
 199 static inline void update_qscale(MpegEncContext *s)
 200 {
 201     if (s->q_scale_type == 1 && 0) {
 202         int i;
 203         int bestdiff=INT_MAX;
 204         int best = 1;
 205
 206         for (i = 0 ; i<FF_ARRAY_ELEMS(ff_mpeg2_non_linear_qscale); i++) {
 207             int diff = FFABS((ff_mpeg2_non_linear_qscale[i]<<(FF_LAMBDA_SHIFT + 6)) - (int)s->lambda * 139);
 208             if (ff_mpeg2_non_linear_qscale[i] < s->avctx->qmin ||
 209                 (ff_mpeg2_non_linear_qscale[i] > s->avctx->qmax && !s->vbv_ignore_qmax))
 210                 continue;
 211             if (diff < bestdiff) {
 212                 bestdiff = diff;
 213                 best = i;
 214             }
 215         }
 216         s->qscale = best;
 217     } else {
 218         s->qscale = (s->lambda * 139 + FF_LAMBDA_SCALE * 64) >>
 219                     (FF_LAMBDA_SHIFT + 7);
 220         s->qscale = av_clip(s->qscale, s->avctx->qmin, s->vbv_ignore_qmax ? 31 : s->avctx->qmax);
 221     }
 222
 223     s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
 224                  FF_LAMBDA_SHIFT;
 225 }
 226
 227 void ff_write_quant_matrix(PutBitContext *pb, uint16_t *matrix)
 228 {
 229     int i;
 230
 231     if (matrix) {
 232         put_bits(pb, 1, 1);
 233         for (i = 0; i < 64; i++) {
 234             put_bits(pb, 8, matrix[ff_zigzag_direct[i]]);
 235         }
 236     } else
 237         put_bits(pb, 1, 0);
 238 }
 239
 240 /**
 241  * init s->cur_pic.qscale_table from s->lambda_table
 242  */
 243 static void init_qscale_tab(MpegEncContext *s)
 244 {
 245     int8_t * const qscale_table = s->cur_pic.qscale_table;
 246     int i;
 247
 248     for (i = 0; i < s->mb_num; i++) {
 249         unsigned int lam = s->lambda_table[s->mb_index2xy[i]];
 250         int qp = (lam * 139 + FF_LAMBDA_SCALE * 64) >> (FF_LAMBDA_SHIFT + 7);
 251         qscale_table[s->mb_index2xy[i]] = av_clip(qp, s->avctx->qmin,
 252                                                   s->avctx->qmax);
 253     }
 254 }
 255
 256 static void update_duplicate_context_after_me(MpegEncContext *dst,
 257                                               const MpegEncContext *src)
 258 {
 259 #define COPY(a) dst->a= src->a
 260     COPY(pict_type);
 261     COPY(f_code);
 262     COPY(b_code);
 263     COPY(qscale);
 264     COPY(lambda);
 265     COPY(lambda2);
 266     COPY(frame_pred_frame_dct); // FIXME don't set in encode_header
 267     COPY(progressive_frame);    // FIXME don't set in encode_header
 268     COPY(partitioned_frame);    // FIXME don't set in encode_header
 269 #undef COPY
 270 }
 271
 272 static void mpv_encode_init_static(void)
 273 {
 274    for (int i = -16; i < 16; i++)
 275         default_fcode_tab[i + MAX_MV] = 1;
 276 }
 277
 278 /**
 279  * Set the given MpegEncContext to defaults for encoding.
 280  * the changed fields will not depend upon the prior state of the MpegEncContext.
 281  */
 282 static void mpv_encode_defaults(MpegEncContext *s)
 283 {
 284     static AVOnce init_static_once = AV_ONCE_INIT;
 285
 286     ff_mpv_common_defaults(s);
 287
 288     ff_thread_once(&init_static_once, mpv_encode_init_static);
 289
 290     s->me.mv_penalty = default_mv_penalty;
 291     s->fcode_tab     = default_fcode_tab;
 292
 293     s->input_picture_number  = 0;
 294     s->picture_in_gop_number = 0;
 295 }
 296
 297 av_cold void ff_dct_encode_init(MpegEncContext *s)
 298 {
 299     s->dct_quantize = dct_quantize_c;
 300     s->denoise_dct  = denoise_dct_c;
 301
 302 #if ARCH_MIPS
 303     ff_mpvenc_dct_init_mips(s);
 304 #elif ARCH_X86
 305     ff_dct_encode_init_x86(s);
 306 #endif
 307
 308     if (s->avctx->trellis)
 309         s->dct_quantize  = dct_quantize_trellis_c;
 310 }
 311
 312 static av_cold int me_cmp_init(MpegEncContext *s, AVCodecContext *avctx)
 313 {
 314     MECmpContext mecc;
 315     me_cmp_func me_cmp[6];
 316     int ret;
 317
 318     ff_me_cmp_init(&mecc, avctx);
 319     ret = ff_me_init(&s->me, avctx, &mecc, 1);
 320     if (ret < 0)
 321         return ret;
 322     ret = ff_set_cmp(&mecc, me_cmp, s->frame_skip_cmp, 1);
 323     if (ret < 0)
 324         return ret;
 325     s->frame_skip_cmp_fn = me_cmp[1];
 326     if (avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT) {
 327         ret = ff_set_cmp(&mecc, me_cmp, avctx->ildct_cmp, 1);
 328         if (ret < 0)
 329             return ret;
 330         if (!me_cmp[0] || !me_cmp[4])
 331             return AVERROR(EINVAL);
 332         s->ildct_cmp[0] = me_cmp[0];
 333         s->ildct_cmp[1] = me_cmp[4];
 334     }
 335
 336     s->sum_abs_dctelem = mecc.sum_abs_dctelem;
 337
 338     s->sse_cmp[0] = mecc.sse[0];
 339     s->sse_cmp[1] = mecc.sse[1];
 340     s->sad_cmp[0] = mecc.sad[0];
 341     s->sad_cmp[1] = mecc.sad[1];
 342     if (avctx->mb_cmp == FF_CMP_NSSE) {
 343         s->n_sse_cmp[0] = mecc.nsse[0];
 344         s->n_sse_cmp[1] = mecc.nsse[1];
 345     } else {
 346         s->n_sse_cmp[0] = mecc.sse[0];
 347         s->n_sse_cmp[1] = mecc.sse[1];
 348     }
 349
 350     return 0;
 351 }
 352
 353 /* init video encoder */
 354 av_cold int ff_mpv_encode_init(AVCodecContext *avctx)
 355 {
 356     MpegEncContext *s = avctx->priv_data;
 357     AVCPBProperties *cpb_props;
 358     int i, ret;
 359     int mb_array_size, mv_table_size;
 360
 361     mpv_encode_defaults(s);
 362
 363     switch (avctx->pix_fmt) {
 364     case AV_PIX_FMT_YUVJ444P:
 365     case AV_PIX_FMT_YUV444P:
 366         s->chroma_format = CHROMA_444;
 367         break;
 368     case AV_PIX_FMT_YUVJ422P:
 369     case AV_PIX_FMT_YUV422P:
 370         s->chroma_format = CHROMA_422;
 371         break;
 372     case AV_PIX_FMT_YUVJ420P:
 373     case AV_PIX_FMT_YUV420P:
 374     default:
 375         s->chroma_format = CHROMA_420;
 376         break;
 377     }
 378
 379     avctx->bits_per_raw_sample = av_clip(avctx->bits_per_raw_sample, 0, 8);
 380
 381     s->bit_rate = avctx->bit_rate;
 382     s->width    = avctx->width;
 383     s->height   = avctx->height;
 384     if (avctx->gop_size > 600 &&
 385         avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL) {
 386         av_log(avctx, AV_LOG_WARNING,
 387                "keyframe interval too large!, reducing it from %d to %d\n",
 388                avctx->gop_size, 600);
 389         avctx->gop_size = 600;
 390     }
 391     s->gop_size     = avctx->gop_size;
 392     s->avctx        = avctx;
 393     if (avctx->max_b_frames > MAX_B_FRAMES) {
 394         av_log(avctx, AV_LOG_ERROR, "Too many B-frames requested, maximum "
 395                "is %d.\n", MAX_B_FRAMES);
 396         avctx->max_b_frames = MAX_B_FRAMES;
 397     } else if (avctx->max_b_frames < 0) {
 398         av_log(avctx, AV_LOG_ERROR,
 399                "max b frames must be 0 or positive for mpegvideo based encoders\n");
 400         return AVERROR(EINVAL);
 401     }
 402     s->max_b_frames = avctx->max_b_frames;
 403     s->codec_id     = avctx->codec->id;
 404     if (s->max_b_frames && !(avctx->codec->capabilities & AV_CODEC_CAP_DELAY)) {
 405         av_log(avctx, AV_LOG_ERROR, "B-frames not supported by codec\n");
 406         return AVERROR(EINVAL);
 407     }
 408
 409     s->quarter_sample     = (avctx->flags & AV_CODEC_FLAG_QPEL) != 0;
 410     s->rtp_mode           = !!s->rtp_payload_size;
 411     s->intra_dc_precision = avctx->intra_dc_precision;
 412
 413     // workaround some differences between how applications specify dc precision
 414     if (s->intra_dc_precision < 0) {
 415         s->intra_dc_precision += 8;
 416     } else if (s->intra_dc_precision >= 8)
 417         s->intra_dc_precision -= 8;
 418
 419     if (s->intra_dc_precision < 0) {
 420         av_log(avctx, AV_LOG_ERROR,
 421                 "intra dc precision must be positive, note some applications use"
 422                 " 0 and some 8 as base meaning 8bit, the value must not be smaller than that\n");
 423         return AVERROR(EINVAL);
 424     }
 425
 426     if (s->intra_dc_precision > (avctx->codec_id == AV_CODEC_ID_MPEG2VIDEO ? 3 : 0)) {
 427         av_log(avctx, AV_LOG_ERROR, "intra dc precision too large\n");
 428         return AVERROR(EINVAL);
 429     }
 430     s->user_specified_pts = AV_NOPTS_VALUE;
 431
 432     if (s->gop_size <= 1) {
 433         s->intra_only = 1;
 434         s->gop_size   = 12;
 435     } else {
 436         s->intra_only = 0;
 437     }
 438
 439     /* Fixed QSCALE */
 440     s->fixed_qscale = !!(avctx->flags & AV_CODEC_FLAG_QSCALE);
 441
 442     s->adaptive_quant = (avctx->lumi_masking ||
 443                          avctx->dark_masking ||
 444                          avctx->temporal_cplx_masking ||
 445                          avctx->spatial_cplx_masking  ||
 446                          avctx->p_masking      ||
 447                          s->border_masking ||
 448                          (s->mpv_flags & FF_MPV_FLAG_QP_RD)) &&
 449                         !s->fixed_qscale;
 450
 451     s->loop_filter = !!(avctx->flags & AV_CODEC_FLAG_LOOP_FILTER);
 452
 453     if (avctx->rc_max_rate && !avctx->rc_buffer_size) {
 454         switch(avctx->codec_id) {
 455         case AV_CODEC_ID_MPEG1VIDEO:
 456         case AV_CODEC_ID_MPEG2VIDEO:
 457             avctx->rc_buffer_size = FFMAX(avctx->rc_max_rate, 15000000) * 112LL / 15000000 * 16384;
 458             break;
 459         case AV_CODEC_ID_MPEG4:
 460         case AV_CODEC_ID_MSMPEG4V1:
 461         case AV_CODEC_ID_MSMPEG4V2:
 462         case AV_CODEC_ID_MSMPEG4V3:
 463             if       (avctx->rc_max_rate >= 15000000) {
 464                 avctx->rc_buffer_size = 320 + (avctx->rc_max_rate - 15000000LL) * (760-320) / (38400000 - 15000000);
 465             } else if(avctx->rc_max_rate >=  2000000) {
 466                 avctx->rc_buffer_size =  80 + (avctx->rc_max_rate -  2000000LL) * (320- 80) / (15000000 -  2000000);
 467             } else if(avctx->rc_max_rate >=   384000) {
 468                 avctx->rc_buffer_size =  40 + (avctx->rc_max_rate -   384000LL) * ( 80- 40) / ( 2000000 -   384000);
 469             } else
 470                 avctx->rc_buffer_size = 40;
 471             avctx->rc_buffer_size *= 16384;
 472             break;
 473         }
 474         if (avctx->rc_buffer_size) {
 475             av_log(avctx, AV_LOG_INFO, "Automatically choosing VBV buffer size of %d kbyte\n", avctx->rc_buffer_size/8192);
 476         }
 477     }
 478
 479     if ((!avctx->rc_max_rate) != (!avctx->rc_buffer_size)) {
 480         av_log(avctx, AV_LOG_ERROR, "Either both buffer size and max rate or neither must be specified\n");
 481         return AVERROR(EINVAL);
 482     }
 483
 484     if (avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate) {
 485         av_log(avctx, AV_LOG_INFO,
 486                "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
 487     }
 488
 489     if (avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate) {
 490         av_log(avctx, AV_LOG_ERROR, "bitrate below min bitrate\n");
 491         return AVERROR(EINVAL);
 492     }
 493
 494     if (avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate) {
 495         av_log(avctx, AV_LOG_ERROR, "bitrate above max bitrate\n");
 496         return AVERROR(EINVAL);
 497     }
 498
 499     if (avctx->rc_max_rate &&
 500         avctx->rc_max_rate == avctx->bit_rate &&
 501         avctx->rc_max_rate != avctx->rc_min_rate) {
 502         av_log(avctx, AV_LOG_INFO,
 503                "impossible bitrate constraints, this will fail\n");
 504     }
 505
 506     if (avctx->rc_buffer_size &&
 507         avctx->bit_rate * (int64_t)avctx->time_base.num >
 508             avctx->rc_buffer_size * (int64_t)avctx->time_base.den) {
 509         av_log(avctx, AV_LOG_ERROR, "VBV buffer too small for bitrate\n");
 510         return AVERROR(EINVAL);
 511     }
 512
 513     if (!s->fixed_qscale &&
 514         avctx->bit_rate * av_q2d(avctx->time_base) >
 515             avctx->bit_rate_tolerance) {
 516         double nbt = avctx->bit_rate * av_q2d(avctx->time_base) * 5;
 517         av_log(avctx, AV_LOG_WARNING,
 518                "bitrate tolerance %d too small for bitrate %"PRId64", overriding\n", avctx->bit_rate_tolerance, avctx->bit_rate);
 519         if (nbt <= INT_MAX) {
 520             avctx->bit_rate_tolerance = nbt;
 521         } else
 522             avctx->bit_rate_tolerance = INT_MAX;
 523     }
 524
 525     if (avctx->rc_max_rate &&
 526         avctx->rc_min_rate == avctx->rc_max_rate &&
 527         (s->codec_id == AV_CODEC_ID_MPEG1VIDEO ||
 528          s->codec_id == AV_CODEC_ID_MPEG2VIDEO) &&
 529         90000LL * (avctx->rc_buffer_size - 1) >
 530             avctx->rc_max_rate * 0xFFFFLL) {
 531         av_log(avctx, AV_LOG_INFO,
 532                "Warning vbv_delay will be set to 0xFFFF (=VBR) as the "
 533                "specified vbv buffer is too large for the given bitrate!\n");
 534     }
 535
 536     if ((avctx->flags & AV_CODEC_FLAG_4MV) && s->codec_id != AV_CODEC_ID_MPEG4 &&
 537         s->codec_id != AV_CODEC_ID_H263 && s->codec_id != AV_CODEC_ID_H263P &&
 538         s->codec_id != AV_CODEC_ID_FLV1) {
 539         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
 540         return AVERROR(EINVAL);
 541     }
 542
 543     if (s->obmc && avctx->mb_decision != FF_MB_DECISION_SIMPLE) {
 544         av_log(avctx, AV_LOG_ERROR,
 545                "OBMC is only supported with simple mb decision\n");
 546         return AVERROR(EINVAL);
 547     }
 548
 549     if (s->quarter_sample && s->codec_id != AV_CODEC_ID_MPEG4) {
 550         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
 551         return AVERROR(EINVAL);
 552     }
 553
 554     if ((s->codec_id == AV_CODEC_ID_MPEG4 ||
 555          s->codec_id == AV_CODEC_ID_H263  ||
 556          s->codec_id == AV_CODEC_ID_H263P) &&
 557         (avctx->sample_aspect_ratio.num > 255 ||
 558          avctx->sample_aspect_ratio.den > 255)) {
 559         av_log(avctx, AV_LOG_WARNING,
 560                "Invalid pixel aspect ratio %i/%i, limit is 255/255 reducing\n",
 561                avctx->sample_aspect_ratio.num, avctx->sample_aspect_ratio.den);
 562         av_reduce(&avctx->sample_aspect_ratio.num, &avctx->sample_aspect_ratio.den,
 563                    avctx->sample_aspect_ratio.num,  avctx->sample_aspect_ratio.den, 255);
 564     }
 565
 566     if ((s->codec_id == AV_CODEC_ID_H263  ||
 567          s->codec_id == AV_CODEC_ID_H263P) &&
 568         (avctx->width  > 2048 ||
 569          avctx->height > 1152 )) {
 570         av_log(avctx, AV_LOG_ERROR, "H.263 does not support resolutions above 2048x1152\n");
 571         return AVERROR(EINVAL);
 572     }
 573     if (s->codec_id == AV_CODEC_ID_FLV1 &&
 574         (avctx->width  > 65535 ||
 575          avctx->height > 65535 )) {
 576         av_log(avctx, AV_LOG_ERROR, "FLV does not support resolutions above 16bit\n");
 577         return AVERROR(EINVAL);
 578     }
 579     if ((s->codec_id == AV_CODEC_ID_H263  ||
 580          s->codec_id == AV_CODEC_ID_H263P ||
 581          s->codec_id == AV_CODEC_ID_RV20) &&
 582         ((avctx->width &3) ||
 583          (avctx->height&3) )) {
 584         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 4\n");
 585         return AVERROR(EINVAL);
 586     }
 587
 588     if (s->codec_id == AV_CODEC_ID_RV10 &&
 589         (avctx->width &15 ||
 590          avctx->height&15 )) {
 591         av_log(avctx, AV_LOG_ERROR, "width and height must be a multiple of 16\n");
 592         return AVERROR(EINVAL);
 593     }
 594
 595     if ((s->codec_id == AV_CODEC_ID_WMV1 ||
 596          s->codec_id == AV_CODEC_ID_WMV2) &&
 597          avctx->width & 1) {
 598         av_log(avctx, AV_LOG_ERROR, "width must be multiple of 2\n");
 599         return AVERROR(EINVAL);
 600     }
 601
 602     if ((avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT | AV_CODEC_FLAG_INTERLACED_ME)) &&
 603         s->codec_id != AV_CODEC_ID_MPEG4 && s->codec_id != AV_CODEC_ID_MPEG2VIDEO) {
 604         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
 605         return AVERROR(EINVAL);
 606     }
 607
 608     if ((s->mpv_flags & FF_MPV_FLAG_CBP_RD) && !avctx->trellis) {
 609         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
 610         return AVERROR(EINVAL);
 611     }
 612
 613     if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) &&
 614         avctx->mb_decision != FF_MB_DECISION_RD) {
 615         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=rd\n");
 616         return AVERROR(EINVAL);
 617     }
 618
 619     if (s->scenechange_threshold < 1000000000 &&
 620         (avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)) {
 621         av_log(avctx, AV_LOG_ERROR,
 622                "closed gop with scene change detection are not supported yet, "
 623                "set threshold to 1000000000\n");
 624         return AVERROR_PATCHWELCOME;
 625     }
 626
 627     if (avctx->flags & AV_CODEC_FLAG_LOW_DELAY) {
 628         if (s->codec_id != AV_CODEC_ID_MPEG2VIDEO &&
 629             avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL) {
 630             av_log(avctx, AV_LOG_ERROR,
 631                    "low delay forcing is only available for mpeg2, "
 632                    "set strict_std_compliance to 'unofficial' or lower in order to allow it\n");
 633             return AVERROR(EINVAL);
 634         }
 635         if (s->max_b_frames != 0) {
 636             av_log(avctx, AV_LOG_ERROR,
 637                    "B-frames cannot be used with low delay\n");
 638             return AVERROR(EINVAL);
 639         }
 640     }
 641
 642     if (s->q_scale_type == 1) {
 643         if (avctx->qmax > 28) {
 644             av_log(avctx, AV_LOG_ERROR,
 645                    "non linear quant only supports qmax <= 28 currently\n");
 646             return AVERROR_PATCHWELCOME;
 647         }
 648     }
 649
 650     if (avctx->slices > 1 &&
 651         !(avctx->codec->capabilities & AV_CODEC_CAP_SLICE_THREADS)) {
 652         av_log(avctx, AV_LOG_ERROR, "Multiple slices are not supported by this codec\n");
 653         return AVERROR(EINVAL);
 654     }
 655
 656     if (s->b_frame_strategy && (avctx->flags & AV_CODEC_FLAG_PASS2)) {
 657         av_log(avctx, AV_LOG_INFO,
 658                "notice: b_frame_strategy only affects the first pass\n");
 659         s->b_frame_strategy = 0;
 660     }
 661
 662     i = av_gcd(avctx->time_base.den, avctx->time_base.num);
 663     if (i > 1) {
 664         av_log(avctx, AV_LOG_INFO, "removing common factors from framerate\n");
 665         avctx->time_base.den /= i;
 666         avctx->time_base.num /= i;
 667         //return -1;
 668     }
 669
 670     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG1VIDEO || s->codec_id == AV_CODEC_ID_MPEG2VIDEO || s->codec_id == AV_CODEC_ID_MJPEG || s->codec_id == AV_CODEC_ID_AMV || s->codec_id == AV_CODEC_ID_SPEEDHQ) {
 671         // (a + x * 3 / 8) / x
 672         s->intra_quant_bias = 3 << (QUANT_BIAS_SHIFT - 3);
 673         s->inter_quant_bias = 0;
 674     } else {
 675         s->intra_quant_bias = 0;
 676         // (a - x / 4) / x
 677         s->inter_quant_bias = -(1 << (QUANT_BIAS_SHIFT - 2));
 678     }
 679
 680     if (avctx->qmin > avctx->qmax || avctx->qmin <= 0) {
 681         av_log(avctx, AV_LOG_ERROR, "qmin and or qmax are invalid, they must be 0 < min <= max\n");
 682         return AVERROR(EINVAL);
 683     }
 684
 685     av_log(avctx, AV_LOG_DEBUG, "intra_quant_bias = %d inter_quant_bias = %d\n",s->intra_quant_bias,s->inter_quant_bias);
 686
 687     if (avctx->codec_id == AV_CODEC_ID_MPEG4 &&
 688         avctx->time_base.den > (1 << 16) - 1) {
 689         av_log(avctx, AV_LOG_ERROR,
 690                "timebase %d/%d not supported by MPEG 4 standard, "
 691                "the maximum admitted value for the timebase denominator "
 692                "is %d\n", avctx->time_base.num, avctx->time_base.den,
 693                (1 << 16) - 1);
 694         return AVERROR(EINVAL);
 695     }
 696     s->time_increment_bits = av_log2(avctx->time_base.den - 1) + 1;
 697
 698     switch (avctx->codec->id) {
 699 #if CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER
 700     case AV_CODEC_ID_MPEG2VIDEO:
 701         s->rtp_mode   = 1;
 702         /* fallthrough */
 703     case AV_CODEC_ID_MPEG1VIDEO:
 704         s->out_format = FMT_MPEG1;
 705         s->low_delay  = !!(avctx->flags & AV_CODEC_FLAG_LOW_DELAY);
 706         avctx->delay  = s->low_delay ? 0 : (s->max_b_frames + 1);
 707         ff_mpeg1_encode_init(s);
 708         break;
 709 #endif
 710 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
 711     case AV_CODEC_ID_MJPEG:
 712     case AV_CODEC_ID_AMV:
 713         s->out_format = FMT_MJPEG;
 714         s->intra_only = 1; /* force intra only for jpeg */
 715         if ((ret = ff_mjpeg_encode_init(s)) < 0)
 716             return ret;
 717         avctx->delay = 0;
 718         s->low_delay = 1;
 719         break;
 720 #endif
 721     case AV_CODEC_ID_SPEEDHQ:
 722         s->out_format = FMT_SPEEDHQ;
 723         s->intra_only = 1; /* force intra only for SHQ */
 724         if (!CONFIG_SPEEDHQ_ENCODER)
 725             return AVERROR_ENCODER_NOT_FOUND;
 726         if ((ret = ff_speedhq_encode_init(s)) < 0)
 727             return ret;
 728         avctx->delay = 0;
 729         s->low_delay = 1;
 730         break;
 731     case AV_CODEC_ID_H261:
 732         if (!CONFIG_H261_ENCODER)
 733             return AVERROR_ENCODER_NOT_FOUND;
 734         ret = ff_h261_encode_init(s);
 735         if (ret < 0)
 736             return ret;
 737         s->out_format = FMT_H261;
 738         avctx->delay  = 0;
 739         s->low_delay  = 1;
 740         s->rtp_mode   = 0; /* Sliced encoding not supported */
 741         break;
 742     case AV_CODEC_ID_H263:
 743         if (!CONFIG_H263_ENCODER)
 744             return AVERROR_ENCODER_NOT_FOUND;
 745         if (ff_match_2uint16(ff_h263_format, FF_ARRAY_ELEMS(ff_h263_format),
 746                              s->width, s->height) == 8) {
 747             av_log(avctx, AV_LOG_ERROR,
 748                    "The specified picture size of %dx%d is not valid for "
 749                    "the H.263 codec.\nValid sizes are 128x96, 176x144, "
 750                    "352x288, 704x576, and 1408x1152. "
 751                    "Try H.263+.\n", s->width, s->height);
 752             return AVERROR(EINVAL);
 753         }
 754         s->out_format = FMT_H263;
 755         avctx->delay  = 0;
 756         s->low_delay  = 1;
 757         break;
 758     case AV_CODEC_ID_H263P:
 759         s->out_format = FMT_H263;
 760         s->h263_plus  = 1;
 761         /* Fx */
 762         s->h263_aic        = (avctx->flags & AV_CODEC_FLAG_AC_PRED) ? 1 : 0;
 763         s->modified_quant  = s->h263_aic;
 764         s->loop_filter     = (avctx->flags & AV_CODEC_FLAG_LOOP_FILTER) ? 1 : 0;
 765         s->unrestricted_mv = s->obmc || s->loop_filter || s->umvplus;
 766         s->flipflop_rounding = 1;
 767
 768         /* /Fx */
 769         /* These are just to be sure */
 770         avctx->delay = 0;
 771         s->low_delay = 1;
 772         break;
 773     case AV_CODEC_ID_FLV1:
 774         s->out_format      = FMT_H263;
 775         s->h263_flv        = 2; /* format = 1; 11-bit codes */
 776         s->unrestricted_mv = 1;
 777         s->rtp_mode  = 0; /* don't allow GOB */
 778         avctx->delay = 0;
 779         s->low_delay = 1;
 780         break;
 781     case AV_CODEC_ID_RV10:
 782         s->out_format = FMT_H263;
 783         avctx->delay  = 0;
 784         s->low_delay  = 1;
 785         break;
 786     case AV_CODEC_ID_RV20:
 787         s->out_format      = FMT_H263;
 788         avctx->delay       = 0;
 789         s->low_delay       = 1;
 790         s->modified_quant  = 1;
 791         s->h263_aic        = 1;
 792         s->h263_plus       = 1;
 793         s->loop_filter     = 1;
 794         s->unrestricted_mv = 0;
 795         break;
 796     case AV_CODEC_ID_MPEG4:
 797         s->out_format      = FMT_H263;
 798         s->h263_pred       = 1;
 799         s->unrestricted_mv = 1;
 800         s->flipflop_rounding = 1;
 801         s->low_delay       = s->max_b_frames ? 0 : 1;
 802         avctx->delay       = s->low_delay ? 0 : (s->max_b_frames + 1);
 803         break;
 804     case AV_CODEC_ID_MSMPEG4V2:
 805         s->out_format      = FMT_H263;
 806         s->h263_pred       = 1;
 807         s->unrestricted_mv = 1;
 808         s->msmpeg4_version = MSMP4_V2;
 809         avctx->delay       = 0;
 810         s->low_delay       = 1;
 811         break;
 812     case AV_CODEC_ID_MSMPEG4V3:
 813         s->out_format        = FMT_H263;
 814         s->h263_pred         = 1;
 815         s->unrestricted_mv   = 1;
 816         s->msmpeg4_version   = MSMP4_V3;
 817         s->flipflop_rounding = 1;
 818         avctx->delay         = 0;
 819         s->low_delay         = 1;
 820         break;
 821     case AV_CODEC_ID_WMV1:
 822         s->out_format        = FMT_H263;
 823         s->h263_pred         = 1;
 824         s->unrestricted_mv   = 1;
 825         s->msmpeg4_version   = MSMP4_WMV1;
 826         s->flipflop_rounding = 1;
 827         avctx->delay         = 0;
 828         s->low_delay         = 1;
 829         break;
 830     case AV_CODEC_ID_WMV2:
 831         s->out_format        = FMT_H263;
 832         s->h263_pred         = 1;
 833         s->unrestricted_mv   = 1;
 834         s->msmpeg4_version   = MSMP4_WMV2;
 835         s->flipflop_rounding = 1;
 836         avctx->delay         = 0;
 837         s->low_delay         = 1;
 838         break;
 839     default:
 840         return AVERROR(EINVAL);
 841     }
 842
 843     avctx->has_b_frames = !s->low_delay;
 844
 845     s->encoding = 1;
 846
 847     s->progressive_frame    =
 848     s->progressive_sequence = !(avctx->flags & (AV_CODEC_FLAG_INTERLACED_DCT |
 849                                                 AV_CODEC_FLAG_INTERLACED_ME) ||
 850                                 s->alternate_scan);
 851
 852     if (s->lmin > s->lmax) {
 853         av_log(avctx, AV_LOG_WARNING, "Clipping lmin value to %d\n", s->lmax);
 854         s->lmin = s->lmax;
 855     }
 856
 857     /* init */
 858     ff_mpv_idct_init(s);
 859     if ((ret = ff_mpv_common_init(s)) < 0)
 860         return ret;
 861
 862     ff_fdctdsp_init(&s->fdsp, avctx);
 863     ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx);
 864     ff_pixblockdsp_init(&s->pdsp, avctx);
 865     ret = me_cmp_init(s, avctx);
 866     if (ret < 0)
 867         return ret;
 868
 869     if (!(avctx->stats_out = av_mallocz(256))               ||
 870         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix,          32) ||
 871         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix,   32) ||
 872         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix,          32) ||
 873         !FF_ALLOCZ_TYPED_ARRAY(s->q_intra_matrix16,        32) ||
 874         !FF_ALLOCZ_TYPED_ARRAY(s->q_chroma_intra_matrix16, 32) ||
 875         !FF_ALLOCZ_TYPED_ARRAY(s->q_inter_matrix16,        32) ||
 876         !FF_ALLOCZ_TYPED_ARRAY(s->input_picture,           MAX_B_FRAMES + 1) ||
 877         !FF_ALLOCZ_TYPED_ARRAY(s->reordered_input_picture, MAX_B_FRAMES + 1) ||
 878         !(s->new_pic = av_frame_alloc()) ||
 879         !(s->picture_pool = ff_mpv_alloc_pic_pool(0)))
 880         return AVERROR(ENOMEM);
 881
 882     /* Allocate MV tables; the MV and MB tables will be copied
 883      * to slice contexts by ff_update_duplicate_context().  */
 884     mv_table_size = (s->mb_height + 2) * s->mb_stride + 1;
 885     if (!FF_ALLOCZ_TYPED_ARRAY(s->p_mv_table_base,            mv_table_size) ||
 886         !FF_ALLOCZ_TYPED_ARRAY(s->b_forw_mv_table_base,       mv_table_size) ||
 887         !FF_ALLOCZ_TYPED_ARRAY(s->b_back_mv_table_base,       mv_table_size) ||
 888         !FF_ALLOCZ_TYPED_ARRAY(s->b_bidir_forw_mv_table_base, mv_table_size) ||
 889         !FF_ALLOCZ_TYPED_ARRAY(s->b_bidir_back_mv_table_base, mv_table_size) ||
 890         !FF_ALLOCZ_TYPED_ARRAY(s->b_direct_mv_table_base,     mv_table_size))
 891         return AVERROR(ENOMEM);
 892     s->p_mv_table            = s->p_mv_table_base + s->mb_stride + 1;
 893     s->b_forw_mv_table       = s->b_forw_mv_table_base + s->mb_stride + 1;
 894     s->b_back_mv_table       = s->b_back_mv_table_base + s->mb_stride + 1;
 895     s->b_bidir_forw_mv_table = s->b_bidir_forw_mv_table_base + s->mb_stride + 1;
 896     s->b_bidir_back_mv_table = s->b_bidir_back_mv_table_base + s->mb_stride + 1;
 897     s->b_direct_mv_table     = s->b_direct_mv_table_base + s->mb_stride + 1;
 898
 899     /* Allocate MB type table */
 900     mb_array_size = s->mb_stride * s->mb_height;
 901     if (!FF_ALLOCZ_TYPED_ARRAY(s->mb_type,      mb_array_size) ||
 902         !FF_ALLOCZ_TYPED_ARRAY(s->lambda_table, mb_array_size) ||
 903         !FF_ALLOC_TYPED_ARRAY (s->cplx_tab,     mb_array_size) ||
 904         !FF_ALLOC_TYPED_ARRAY (s->bits_tab,     mb_array_size) ||
 905         !FF_ALLOCZ_TYPED_ARRAY(s->mc_mb_var,    mb_array_size) ||
 906         !FF_ALLOCZ_TYPED_ARRAY(s->mb_var, mb_array_size) ||
 907         !(s->mb_mean = av_mallocz(mb_array_size)))
 908         return AVERROR(ENOMEM);
 909
 910 #define ALLOCZ_ARRAYS(p, mult, numb) ((p) = av_calloc(numb, mult * sizeof(*(p))))
 911     if (s->codec_id == AV_CODEC_ID_MPEG4 ||
 912         (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME)) {
 913         int16_t (*tmp1)[2];
 914         uint8_t *tmp2;
 915         if (!(tmp1 = ALLOCZ_ARRAYS(s->b_field_mv_table_base, 8, mv_table_size)) ||
 916             !(tmp2 = ALLOCZ_ARRAYS(s->b_field_select_table[0][0], 2 * 4, mv_table_size)) ||
 917             !ALLOCZ_ARRAYS(s->p_field_select_table[0], 2 * 2, mv_table_size))
 918             return AVERROR(ENOMEM);
 919
 920         s->p_field_select_table[1] = s->p_field_select_table[0] + 2 * mv_table_size;
 921         tmp1 += s->mb_stride + 1;
 922
 923         for (int i = 0; i < 2; i++) {
 924             for (int j = 0; j < 2; j++) {
 925                 for (int k = 0; k < 2; k++) {
 926                     s->b_field_mv_table[i][j][k] = tmp1;
 927                     tmp1 += mv_table_size;
 928                 }
 929                 s->b_field_select_table[i][j] = tmp2;
 930                 tmp2 += 2 * mv_table_size;
 931             }
 932         }
 933     }
 934
 935     if (s->noise_reduction) {
 936         if (!FF_ALLOCZ_TYPED_ARRAY(s->dct_offset, 2))
 937             return AVERROR(ENOMEM);
 938     }
 939
 940     ff_dct_encode_init(s);
 941
 942     if (s->mpeg_quant || s->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
 943         s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra;
 944         s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter;
 945     } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 946         s->dct_unquantize_intra = s->dct_unquantize_h263_intra;
 947         s->dct_unquantize_inter = s->dct_unquantize_h263_inter;
 948     } else {
 949         s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra;
 950         s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter;
 951     }
 952
 953     if ((CONFIG_H263P_ENCODER || CONFIG_RV20_ENCODER) && s->modified_quant)
 954         s->chroma_qscale_table = ff_h263_chroma_qscale_table;
 955
 956     if (s->slice_context_count > 1) {
 957         s->rtp_mode = 1;
 958
 959         if (avctx->codec_id == AV_CODEC_ID_H263P)
 960             s->h263_slice_structured = 1;
 961     }
 962
 963     if (CONFIG_H263_ENCODER && s->out_format == FMT_H263) {
 964         ff_h263_encode_init(s);
 965 #if CONFIG_MSMPEG4ENC
 966         if (s->msmpeg4_version != MSMP4_UNUSED)
 967             ff_msmpeg4_encode_init(s);
 968 #endif
 969     }
 970
 971     /* init q matrix */
 972     for (i = 0; i < 64; i++) {
 973         int j = s->idsp.idct_permutation[i];
 974         if (CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4 &&
 975             s->mpeg_quant) {
 976             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
 977             s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i];
 978         } else if (s->out_format == FMT_H263 || s->out_format == FMT_H261) {
 979             s->intra_matrix[j] =
 980             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 981         } else if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
 982             s->intra_matrix[j] =
 983             s->inter_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 984         } else {
 985             /* MPEG-1/2 */
 986             s->chroma_intra_matrix[j] =
 987             s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i];
 988             s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i];
 989         }
 990         if (avctx->intra_matrix)
 991             s->intra_matrix[j] = avctx->intra_matrix[i];
 992         if (avctx->inter_matrix)
 993             s->inter_matrix[j] = avctx->inter_matrix[i];
 994     }
 995
 996     /* precompute matrix */
 997     /* for mjpeg, we do include qscale in the matrix */
 998     if (s->out_format != FMT_MJPEG) {
 999         ret = ff_check_codec_matrices(avctx, FF_MATRIX_TYPE_INTRA | FF_MATRIX_TYPE_INTER, 1, 255);
1000         if (ret < 0)
1001             return ret;
1002
1003         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
1004                           s->intra_matrix, s->intra_quant_bias, avctx->qmin,
1005                           31, 1);
1006         ff_convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16,
1007                           s->inter_matrix, s->inter_quant_bias, avctx->qmin,
1008                           31, 0);
1009     }
1010
1011     if ((ret = ff_rate_control_init(s)) < 0)
1012         return ret;
1013
1014     if (s->b_frame_strategy == 2) {
1015         for (i = 0; i < s->max_b_frames + 2; i++) {
1016             s->tmp_frames[i] = av_frame_alloc();
1017             if (!s->tmp_frames[i])
1018                 return AVERROR(ENOMEM);
1019
1020             s->tmp_frames[i]->format = AV_PIX_FMT_YUV420P;
1021             s->tmp_frames[i]->width  = s->width  >> s->brd_scale;
1022             s->tmp_frames[i]->height = s->height >> s->brd_scale;
1023
1024             ret = av_frame_get_buffer(s->tmp_frames[i], 0);
1025             if (ret < 0)
1026                 return ret;
1027         }
1028     }
1029
1030     cpb_props = ff_encode_add_cpb_side_data(avctx);
1031     if (!cpb_props)
1032         return AVERROR(ENOMEM);
1033     cpb_props->max_bitrate = avctx->rc_max_rate;
1034     cpb_props->min_bitrate = avctx->rc_min_rate;
1035     cpb_props->avg_bitrate = avctx->bit_rate;
1036     cpb_props->buffer_size = avctx->rc_buffer_size;
1037
1038     return 0;
1039 }
1040
1041 av_cold int ff_mpv_encode_end(AVCodecContext *avctx)
1042 {
1043     MpegEncContext *s = avctx->priv_data;
1044     int i;
1045
1046     ff_rate_control_uninit(&s->rc_context);
1047
1048     ff_mpv_common_end(s);
1049     av_refstruct_pool_uninit(&s->picture_pool);
1050
1051     if (s->input_picture && s->reordered_input_picture) {
1052         for (int i = 0; i < MAX_B_FRAMES + 1; i++) {
1053             av_refstruct_unref(&s->input_picture[i]);
1054             av_refstruct_unref(&s->reordered_input_picture[i]);
1055         }
1056     }
1057     for (i = 0; i < FF_ARRAY_ELEMS(s->tmp_frames); i++)
1058         av_frame_free(&s->tmp_frames[i]);
1059
1060     av_frame_free(&s->new_pic);
1061
1062     av_freep(&avctx->stats_out);
1063
1064     av_freep(&s->p_mv_table_base);
1065     av_freep(&s->b_forw_mv_table_base);
1066     av_freep(&s->b_back_mv_table_base);
1067     av_freep(&s->b_bidir_forw_mv_table_base);
1068     av_freep(&s->b_bidir_back_mv_table_base);
1069     av_freep(&s->b_direct_mv_table_base);
1070     av_freep(&s->b_field_mv_table_base);
1071     av_freep(&s->b_field_select_table[0][0]);
1072     av_freep(&s->p_field_select_table[0]);
1073
1074     av_freep(&s->mb_type);
1075     av_freep(&s->lambda_table);
1076
1077     av_freep(&s->cplx_tab);
1078     av_freep(&s->bits_tab);
1079
1080     if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
1081     if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
1082     s->q_chroma_intra_matrix=   NULL;
1083     s->q_chroma_intra_matrix16= NULL;
1084     av_freep(&s->q_intra_matrix);
1085     av_freep(&s->q_inter_matrix);
1086     av_freep(&s->q_intra_matrix16);
1087     av_freep(&s->q_inter_matrix16);
1088     av_freep(&s->input_picture);
1089     av_freep(&s->reordered_input_picture);
1090     av_freep(&s->dct_offset);
1091     av_freep(&s->mb_var);
1092     av_freep(&s->mc_mb_var);
1093     av_freep(&s->mb_mean);
1094
1095     return 0;
1096 }
1097
1098 #define IS_ENCODER 1
1099 #include "mpv_reconstruct_mb_template.c"
1100
1101 static void mpv_reconstruct_mb(MpegEncContext *s, int16_t block[12][64])
1102 {
1103     if (s->avctx->debug & FF_DEBUG_DCT_COEFF) {
1104        /* print DCT coefficients */
1105        av_log(s->avctx, AV_LOG_DEBUG, "DCT coeffs of MB at %dx%d:\n", s->mb_x, s->mb_y);
1106        for (int i = 0; i < 6; i++) {
1107            for (int j = 0; j < 64; j++) {
1108                av_log(s->avctx, AV_LOG_DEBUG, "%5d",
1109                       block[i][s->idsp.idct_permutation[j]]);
1110            }
1111            av_log(s->avctx, AV_LOG_DEBUG, "\n");
1112        }
1113     }
1114
1115     mpv_reconstruct_mb_internal(s, block, 0, MAY_BE_MPEG12_H261);
1116 }
1117
1118 static int get_sae(const uint8_t *src, int ref, int stride)
1119 {
1120     int x,y;
1121     int acc = 0;
1122
1123     for (y = 0; y < 16; y++) {
1124         for (x = 0; x < 16; x++) {
1125             acc += FFABS(src[x + y * stride] - ref);
1126         }
1127     }
1128
1129     return acc;
1130 }
1131
1132 static int get_intra_count(MpegEncContext *s, const uint8_t *src,
1133                            const uint8_t *ref, int stride)
1134 {
1135     int x, y, w, h;
1136     int acc = 0;
1137
1138     w = s->width  & ~15;
1139     h = s->height & ~15;
1140
1141     for (y = 0; y < h; y += 16) {
1142         for (x = 0; x < w; x += 16) {
1143             int offset = x + y * stride;
1144             int sad  = s->sad_cmp[0](NULL, src + offset, ref + offset,
1145                                      stride, 16);
1146             int mean = (s->mpvencdsp.pix_sum(src + offset, stride) + 128) >> 8;
1147             int sae  = get_sae(src + offset, mean, stride);
1148
1149             acc += sae + 500 < sad;
1150         }
1151     }
1152     return acc;
1153 }
1154
1155 /**
1156  * Allocates new buffers for an AVFrame and copies the properties
1157  * from another AVFrame.
1158  */
1159 static int prepare_picture(MpegEncContext *s, AVFrame *f, const AVFrame *props_frame)
1160 {
1161     AVCodecContext *avctx = s->avctx;
1162     int ret;
1163
1164     f->width  = avctx->width  + 2 * EDGE_WIDTH;
1165     f->height = avctx->height + 2 * EDGE_WIDTH;
1166
1167     ret = ff_encode_alloc_frame(avctx, f);
1168     if (ret < 0)
1169         return ret;
1170
1171     ret = ff_mpv_pic_check_linesize(avctx, f, &s->linesize, &s->uvlinesize);
1172     if (ret < 0)
1173         return ret;
1174
1175     for (int i = 0; f->data[i]; i++) {
1176         int offset = (EDGE_WIDTH >> (i ? s->chroma_y_shift : 0)) *
1177                      f->linesize[i] +
1178                      (EDGE_WIDTH >> (i ? s->chroma_x_shift : 0));
1179         f->data[i] += offset;
1180     }
1181     f->width  = avctx->width;
1182     f->height = avctx->height;
1183
1184     ret = av_frame_copy_props(f, props_frame);
1185     if (ret < 0)
1186         return ret;
1187
1188     return 0;
1189 }
1190
1191 static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg)
1192 {
1193     MPVPicture *pic = NULL;
1194     int64_t pts;
1195     int display_picture_number = 0, ret;
1196     int encoding_delay = s->max_b_frames ? s->max_b_frames
1197                                          : (s->low_delay ? 0 : 1);
1198     int flush_offset = 1;
1199     int direct = 1;
1200
1201     av_assert1(!s->input_picture[0]);
1202
1203     if (pic_arg) {
1204         pts = pic_arg->pts;
1205         display_picture_number = s->input_picture_number++;
1206
1207         if (pts != AV_NOPTS_VALUE) {
1208             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1209                 int64_t last = s->user_specified_pts;
1210
1211                 if (pts <= last) {
1212                     av_log(s->avctx, AV_LOG_ERROR,
1213                            "Invalid pts (%"PRId64") <= last (%"PRId64")\n",
1214                            pts, last);
1215                     return AVERROR(EINVAL);
1216                 }
1217
1218                 if (!s->low_delay && display_picture_number == 1)
1219                     s->dts_delta = pts - last;
1220             }
1221             s->user_specified_pts = pts;
1222         } else {
1223             if (s->user_specified_pts != AV_NOPTS_VALUE) {
1224                 s->user_specified_pts =
1225                 pts = s->user_specified_pts + 1;
1226                 av_log(s->avctx, AV_LOG_INFO,
1227                        "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n",
1228                        pts);
1229             } else {
1230                 pts = display_picture_number;
1231             }
1232         }
1233
1234         if (pic_arg->linesize[0] != s->linesize ||
1235             pic_arg->linesize[1] != s->uvlinesize ||
1236             pic_arg->linesize[2] != s->uvlinesize)
1237             direct = 0;
1238         if ((s->width & 15) || (s->height & 15))
1239             direct = 0;
1240         if (((intptr_t)(pic_arg->data[0])) & (STRIDE_ALIGN-1))
1241             direct = 0;
1242         if (s->linesize & (STRIDE_ALIGN-1))
1243             direct = 0;
1244
1245         ff_dlog(s->avctx, "%d %d %"PTRDIFF_SPECIFIER" %"PTRDIFF_SPECIFIER"\n", pic_arg->linesize[0],
1246                 pic_arg->linesize[1], s->linesize, s->uvlinesize);
1247
1248         pic = av_refstruct_pool_get(s->picture_pool);
1249         if (!pic)
1250             return AVERROR(ENOMEM);
1251
1252         if (direct) {
1253             if ((ret = av_frame_ref(pic->f, pic_arg)) < 0)
1254                 goto fail;
1255             pic->shared = 1;
1256         } else {
1257             ret = prepare_picture(s, pic->f, pic_arg);
1258             if (ret < 0)
1259                 goto fail;
1260
1261             for (int i = 0; i < 3; i++) {
1262                 ptrdiff_t src_stride = pic_arg->linesize[i];
1263                 ptrdiff_t dst_stride = i ? s->uvlinesize : s->linesize;
1264                 int h_shift = i ? s->chroma_x_shift : 0;
1265                 int v_shift = i ? s->chroma_y_shift : 0;
1266                 int w = AV_CEIL_RSHIFT(s->width , h_shift);
1267                 int h = AV_CEIL_RSHIFT(s->height, v_shift);
1268                 const uint8_t *src = pic_arg->data[i];
1269                 uint8_t *dst = pic->f->data[i];
1270                 int vpad = 16;
1271
1272                 if (   s->codec_id == AV_CODEC_ID_MPEG2VIDEO
1273                     && !s->progressive_sequence
1274                     && FFALIGN(s->height, 32) - s->height > 16)
1275                     vpad = 32;
1276
1277                 if (!s->avctx->rc_buffer_size)
1278                     dst += INPLACE_OFFSET;
1279
1280                 if (src_stride == dst_stride)
1281                     memcpy(dst, src, src_stride * h - src_stride + w);
1282                 else {
1283                     int h2 = h;
1284                     uint8_t *dst2 = dst;
1285                     while (h2--) {
1286                         memcpy(dst2, src, w);
1287                         dst2 += dst_stride;
1288                         src += src_stride;
1289                     }
1290                 }
1291                 if ((s->width & 15) || (s->height & (vpad-1))) {
1292                     s->mpvencdsp.draw_edges(dst, dst_stride,
1293                                             w, h,
1294                                             16 >> h_shift,
1295                                             vpad >> v_shift,
1296                                             EDGE_BOTTOM);
1297                 }
1298             }
1299             emms_c();
1300         }
1301
1302         pic->display_picture_number = display_picture_number;
1303         pic->f->pts = pts; // we set this here to avoid modifying pic_arg
1304     } else if (!s->reordered_input_picture[1]) {
1305         /* Flushing: When the above check is true, the encoder is about to run
1306          * out of frames to encode. Check if there are input_pictures left;
1307          * if so, ensure s->input_picture[0] contains the first picture.
1308          * A flush_offset != 1 will only happen if we did not receive enough
1309          * input frames. */
1310         for (flush_offset = 0; flush_offset < encoding_delay + 1; flush_offset++)
1311             if (s->input_picture[flush_offset])
1312                 break;
1313
1314         encoding_delay -= flush_offset - 1;
1315     }
1316
1317     /* shift buffer entries */
1318     for (int i = flush_offset; i <= MAX_B_FRAMES; i++)
1319         s->input_picture[i - flush_offset] = s->input_picture[i];
1320     for (int i = MAX_B_FRAMES + 1 - flush_offset; i <= MAX_B_FRAMES; i++)
1321         s->input_picture[i] = NULL;
1322
1323     s->input_picture[encoding_delay] = pic;
1324
1325     return 0;
1326 fail:
1327     av_refstruct_unref(&pic);
1328     return ret;
1329 }
1330
1331 static int skip_check(MpegEncContext *s, const MPVPicture *p, const MPVPicture *ref)
1332 {
1333     int x, y, plane;
1334     int score = 0;
1335     int64_t score64 = 0;
1336
1337     for (plane = 0; plane < 3; plane++) {
1338         const int stride = p->f->linesize[plane];
1339         const int bw = plane ? 1 : 2;
1340         for (y = 0; y < s->mb_height * bw; y++) {
1341             for (x = 0; x < s->mb_width * bw; x++) {
1342                 int off = p->shared ? 0 : 16;
1343                 const uint8_t *dptr = p->f->data[plane] + 8 * (x + y * stride) + off;
1344                 const uint8_t *rptr = ref->f->data[plane] + 8 * (x + y * stride);
1345                 int v = s->frame_skip_cmp_fn(s, dptr, rptr, stride, 8);
1346
1347                 switch (FFABS(s->frame_skip_exp)) {
1348                 case 0: score    =  FFMAX(score, v);          break;
1349                 case 1: score   += FFABS(v);                  break;
1350                 case 2: score64 += v * (int64_t)v;                       break;
1351                 case 3: score64 += FFABS(v * (int64_t)v * v);            break;
1352                 case 4: score64 += (v * (int64_t)v) * (v * (int64_t)v);  break;
1353                 }
1354             }
1355         }
1356     }
1357     emms_c();
1358
1359     if (score)
1360         score64 = score;
1361     if (s->frame_skip_exp < 0)
1362         score64 = pow(score64 / (double)(s->mb_width * s->mb_height),
1363                       -1.0/s->frame_skip_exp);
1364
1365     if (score64 < s->frame_skip_threshold)
1366         return 1;
1367     if (score64 < ((s->frame_skip_factor * (int64_t) s->lambda) >> 8))
1368         return 1;
1369     return 0;
1370 }
1371
1372 static int encode_frame(AVCodecContext *c, const AVFrame *frame, AVPacket *pkt)
1373 {
1374     int ret;
1375     int size = 0;
1376
1377     ret = avcodec_send_frame(c, frame);
1378     if (ret < 0)
1379         return ret;
1380
1381     do {
1382         ret = avcodec_receive_packet(c, pkt);
1383         if (ret >= 0) {
1384             size += pkt->size;
1385             av_packet_unref(pkt);
1386         } else if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
1387             return ret;
1388     } while (ret >= 0);
1389
1390     return size;
1391 }
1392
1393 static int estimate_best_b_count(MpegEncContext *s)
1394 {
1395     AVPacket *pkt;
1396     const int scale = s->brd_scale;
1397     int width  = s->width  >> scale;
1398     int height = s->height >> scale;
1399     int i, j, out_size, p_lambda, b_lambda, lambda2;
1400     int64_t best_rd  = INT64_MAX;
1401     int best_b_count = -1;
1402     int ret = 0;
1403
1404     av_assert0(scale >= 0 && scale <= 3);
1405
1406     pkt = av_packet_alloc();
1407     if (!pkt)
1408         return AVERROR(ENOMEM);
1409
1410     //emms_c();
1411     p_lambda = s->last_lambda_for[AV_PICTURE_TYPE_P];
1412     //p_lambda * FFABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
1413     b_lambda = s->last_lambda_for[AV_PICTURE_TYPE_B];
1414     if (!b_lambda) // FIXME we should do this somewhere else
1415         b_lambda = p_lambda;
1416     lambda2  = (b_lambda * b_lambda + (1 << FF_LAMBDA_SHIFT) / 2) >>
1417                FF_LAMBDA_SHIFT;
1418
1419     for (i = 0; i < s->max_b_frames + 2; i++) {
1420         const MPVPicture *pre_input_ptr = i ? s->input_picture[i - 1] :
1421                                            s->next_pic.ptr;
1422
1423         if (pre_input_ptr) {
1424             const uint8_t *data[4];
1425             memcpy(data, pre_input_ptr->f->data, sizeof(data));
1426
1427             if (!pre_input_ptr->shared && i) {
1428                 data[0] += INPLACE_OFFSET;
1429                 data[1] += INPLACE_OFFSET;
1430                 data[2] += INPLACE_OFFSET;
1431             }
1432
1433             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[0],
1434                                        s->tmp_frames[i]->linesize[0],
1435                                        data[0],
1436                                        pre_input_ptr->f->linesize[0],
1437                                        width, height);
1438             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[1],
1439                                        s->tmp_frames[i]->linesize[1],
1440                                        data[1],
1441                                        pre_input_ptr->f->linesize[1],
1442                                        width >> 1, height >> 1);
1443             s->mpvencdsp.shrink[scale](s->tmp_frames[i]->data[2],
1444                                        s->tmp_frames[i]->linesize[2],
1445                                        data[2],
1446                                        pre_input_ptr->f->linesize[2],
1447                                        width >> 1, height >> 1);
1448         }
1449     }
1450
1451     for (j = 0; j < s->max_b_frames + 1; j++) {
1452         AVCodecContext *c;
1453         int64_t rd = 0;
1454
1455         if (!s->input_picture[j])
1456             break;
1457
1458         c = avcodec_alloc_context3(NULL);
1459         if (!c) {
1460             ret = AVERROR(ENOMEM);
1461             goto fail;
1462         }
1463
1464         c->width        = width;
1465         c->height       = height;
1466         c->flags        = AV_CODEC_FLAG_QSCALE | AV_CODEC_FLAG_PSNR;
1467         c->flags       |= s->avctx->flags & AV_CODEC_FLAG_QPEL;
1468         c->mb_decision  = s->avctx->mb_decision;
1469         c->me_cmp       = s->avctx->me_cmp;
1470         c->mb_cmp       = s->avctx->mb_cmp;
1471         c->me_sub_cmp   = s->avctx->me_sub_cmp;
1472         c->pix_fmt      = AV_PIX_FMT_YUV420P;
1473         c->time_base    = s->avctx->time_base;
1474         c->max_b_frames = s->max_b_frames;
1475
1476         ret = avcodec_open2(c, s->avctx->codec, NULL);
1477         if (ret < 0)
1478             goto fail;
1479
1480
1481         s->tmp_frames[0]->pict_type = AV_PICTURE_TYPE_I;
1482         s->tmp_frames[0]->quality   = 1 * FF_QP2LAMBDA;
1483
1484         out_size = encode_frame(c, s->tmp_frames[0], pkt);
1485         if (out_size < 0) {
1486             ret = out_size;
1487             goto fail;
1488         }
1489
1490         //rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
1491
1492         for (i = 0; i < s->max_b_frames + 1; i++) {
1493             int is_p = i % (j + 1) == j || i == s->max_b_frames;
1494
1495             s->tmp_frames[i + 1]->pict_type = is_p ?
1496                                      AV_PICTURE_TYPE_P : AV_PICTURE_TYPE_B;
1497             s->tmp_frames[i + 1]->quality   = is_p ? p_lambda : b_lambda;
1498
1499             out_size = encode_frame(c, s->tmp_frames[i + 1], pkt);
1500             if (out_size < 0) {
1501                 ret = out_size;
1502                 goto fail;
1503             }
1504
1505             rd += (out_size * (uint64_t)lambda2) >> (FF_LAMBDA_SHIFT - 3);
1506         }
1507
1508         /* get the delayed frames */
1509         out_size = encode_frame(c, NULL, pkt);
1510         if (out_size < 0) {
1511             ret = out_size;
1512             goto fail;
1513         }
1514         rd += (out_size * (uint64_t)lambda2) >> (FF_LAMBDA_SHIFT - 3);
1515
1516         rd += c->error[0] + c->error[1] + c->error[2];
1517
1518         if (rd < best_rd) {
1519             best_rd = rd;
1520             best_b_count = j;
1521         }
1522
1523 fail:
1524         avcodec_free_context(&c);
1525         av_packet_unref(pkt);
1526         if (ret < 0) {
1527             best_b_count = ret;
1528             break;
1529         }
1530     }
1531
1532     av_packet_free(&pkt);
1533
1534     return best_b_count;
1535 }
1536
1537 /**
1538  * Determines whether an input picture is discarded or not
1539  * and if not determines the length of the next chain of B frames
1540  * and moves these pictures (including the P frame) into
1541  * reordered_input_picture.
1542  * input_picture[0] is always NULL when exiting this function, even on error;
1543  * reordered_input_picture[0] is always NULL when exiting this function on error.
1544  */
1545 static int set_bframe_chain_length(MpegEncContext *s)
1546 {
1547     /* Either nothing to do or can't do anything */
1548     if (s->reordered_input_picture[0] || !s->input_picture[0])
1549         return 0;
1550
1551     /* set next picture type & ordering */
1552     if (s->frame_skip_threshold || s->frame_skip_factor) {
1553         if (s->picture_in_gop_number < s->gop_size &&
1554             s->next_pic.ptr &&
1555             skip_check(s, s->input_picture[0], s->next_pic.ptr)) {
1556             // FIXME check that the gop check above is +-1 correct
1557             av_refstruct_unref(&s->input_picture[0]);
1558
1559             ff_vbv_update(s, 0);
1560
1561             return 0;
1562         }
1563     }
1564
1565     if (/*s->picture_in_gop_number >= s->gop_size ||*/
1566         !s->next_pic.ptr || s->intra_only) {
1567         s->reordered_input_picture[0] = s->input_picture[0];
1568         s->input_picture[0] = NULL;
1569         s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_I;
1570         s->reordered_input_picture[0]->coded_picture_number =
1571             s->coded_picture_number++;
1572     } else {
1573         int b_frames = 0;
1574
1575         if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
1576             for (int i = 0; i < s->max_b_frames + 1; i++) {
1577                 int pict_num = s->input_picture[0]->display_picture_number + i;
1578
1579                 if (pict_num >= s->rc_context.num_entries)
1580                     break;
1581                 if (!s->input_picture[i]) {
1582                     s->rc_context.entry[pict_num - 1].new_pict_type = AV_PICTURE_TYPE_P;
1583                     break;
1584                 }
1585
1586                 s->input_picture[i]->f->pict_type =
1587                     s->rc_context.entry[pict_num].new_pict_type;
1588             }
1589         }
1590
1591         if (s->b_frame_strategy == 0) {
1592             b_frames = s->max_b_frames;
1593             while (b_frames && !s->input_picture[b_frames])
1594                 b_frames--;
1595         } else if (s->b_frame_strategy == 1) {
1596             int i;
1597             for (i = 1; i < s->max_b_frames + 1; i++) {
1598                 if (s->input_picture[i] &&
1599                     s->input_picture[i]->b_frame_score == 0) {
1600                     s->input_picture[i]->b_frame_score =
1601                         get_intra_count(s,
1602                                         s->input_picture[i    ]->f->data[0],
1603                                         s->input_picture[i - 1]->f->data[0],
1604                                         s->linesize) + 1;
1605                 }
1606             }
1607             for (i = 0; i < s->max_b_frames + 1; i++) {
1608                 if (!s->input_picture[i] ||
1609                     s->input_picture[i]->b_frame_score - 1 >
1610                         s->mb_num / s->b_sensitivity)
1611                     break;
1612             }
1613
1614             b_frames = FFMAX(0, i - 1);
1615
1616             /* reset scores */
1617             for (i = 0; i < b_frames + 1; i++) {
1618                 s->input_picture[i]->b_frame_score = 0;
1619             }
1620         } else if (s->b_frame_strategy == 2) {
1621             b_frames = estimate_best_b_count(s);
1622             if (b_frames < 0) {
1623                 av_refstruct_unref(&s->input_picture[0]);
1624                 return b_frames;
1625             }
1626         }
1627
1628         emms_c();
1629
1630         for (int i = b_frames - 1; i >= 0; i--) {
1631             int type = s->input_picture[i]->f->pict_type;
1632             if (type && type != AV_PICTURE_TYPE_B)
1633                 b_frames = i;
1634         }
1635         if (s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_B &&
1636             b_frames == s->max_b_frames) {
1637             av_log(s->avctx, AV_LOG_ERROR,
1638                     "warning, too many B-frames in a row\n");
1639         }
1640
1641         if (s->picture_in_gop_number + b_frames >= s->gop_size) {
1642             if ((s->mpv_flags & FF_MPV_FLAG_STRICT_GOP) &&
1643                 s->gop_size > s->picture_in_gop_number) {
1644                 b_frames = s->gop_size - s->picture_in_gop_number - 1;
1645             } else {
1646                 if (s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP)
1647                     b_frames = 0;
1648                 s->input_picture[b_frames]->f->pict_type = AV_PICTURE_TYPE_I;
1649             }
1650         }
1651
1652         if ((s->avctx->flags & AV_CODEC_FLAG_CLOSED_GOP) && b_frames &&
1653             s->input_picture[b_frames]->f->pict_type == AV_PICTURE_TYPE_I)
1654             b_frames--;
1655
1656         s->reordered_input_picture[0] = s->input_picture[b_frames];
1657         s->input_picture[b_frames]    = NULL;
1658         if (s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_I)
1659             s->reordered_input_picture[0]->f->pict_type = AV_PICTURE_TYPE_P;
1660         s->reordered_input_picture[0]->coded_picture_number =
1661             s->coded_picture_number++;
1662         for (int i = 0; i < b_frames; i++) {
1663             s->reordered_input_picture[i + 1] = s->input_picture[i];
1664             s->input_picture[i]               = NULL;
1665             s->reordered_input_picture[i + 1]->f->pict_type =
1666                 AV_PICTURE_TYPE_B;
1667             s->reordered_input_picture[i + 1]->coded_picture_number =
1668                 s->coded_picture_number++;
1669         }
1670     }
1671
1672     return 0;
1673 }
1674
1675 static int select_input_picture(MpegEncContext *s)
1676 {
1677     int ret;
1678
1679     av_assert1(!s->reordered_input_picture[0]);
1680
1681     for (int i = 1; i <= MAX_B_FRAMES; i++)
1682         s->reordered_input_picture[i - 1] = s->reordered_input_picture[i];
1683     s->reordered_input_picture[MAX_B_FRAMES] = NULL;
1684
1685     ret = set_bframe_chain_length(s);
1686     av_assert1(!s->input_picture[0]);
1687     if (ret < 0)
1688         return ret;
1689
1690     av_frame_unref(s->new_pic);
1691
1692     if (s->reordered_input_picture[0]) {
1693         s->reordered_input_picture[0]->reference =
1694            s->reordered_input_picture[0]->f->pict_type != AV_PICTURE_TYPE_B;
1695
1696         if (s->reordered_input_picture[0]->shared || s->avctx->rc_buffer_size) {
1697             // input is a shared pix, so we can't modify it -> allocate a new
1698             // one & ensure that the shared one is reuseable
1699             av_frame_move_ref(s->new_pic, s->reordered_input_picture[0]->f);
1700
1701             ret = prepare_picture(s, s->reordered_input_picture[0]->f, s->new_pic);
1702             if (ret < 0)
1703                 goto fail;
1704         } else {
1705             // input is not a shared pix -> reuse buffer for current_pix
1706             ret = av_frame_ref(s->new_pic, s->reordered_input_picture[0]->f);
1707             if (ret < 0)
1708                 goto fail;
1709             for (int i = 0; i < MPV_MAX_PLANES; i++) {
1710                 if (s->new_pic->data[i])
1711                     s->new_pic->data[i] += INPLACE_OFFSET;
1712             }
1713         }
1714         s->cur_pic.ptr = s->reordered_input_picture[0];
1715         s->reordered_input_picture[0] = NULL;
1716         av_assert1(s->mb_width  == s->buffer_pools.alloc_mb_width);
1717         av_assert1(s->mb_height == s->buffer_pools.alloc_mb_height);
1718         av_assert1(s->mb_stride == s->buffer_pools.alloc_mb_stride);
1719         ret = ff_mpv_alloc_pic_accessories(s->avctx, &s->cur_pic,
1720                                            &s->sc, &s->buffer_pools, s->mb_height);
1721         if (ret < 0) {
1722             ff_mpv_unref_picture(&s->cur_pic);
1723             return ret;
1724         }
1725         s->picture_number = s->cur_pic.ptr->display_picture_number;
1726
1727     }
1728     return 0;
1729 fail:
1730     av_refstruct_unref(&s->reordered_input_picture[0]);
1731     return ret;
1732 }
1733
1734 static void frame_end(MpegEncContext *s)
1735 {
1736     if (s->unrestricted_mv &&
1737         s->cur_pic.reference &&
1738         !s->intra_only) {
1739         int hshift = s->chroma_x_shift;
1740         int vshift = s->chroma_y_shift;
1741         s->mpvencdsp.draw_edges(s->cur_pic.data[0],
1742                                 s->cur_pic.linesize[0],
1743                                 s->h_edge_pos, s->v_edge_pos,
1744                                 EDGE_WIDTH, EDGE_WIDTH,
1745                                 EDGE_TOP | EDGE_BOTTOM);
1746         s->mpvencdsp.draw_edges(s->cur_pic.data[1],
1747                                 s->cur_pic.linesize[1],
1748                                 s->h_edge_pos >> hshift,
1749                                 s->v_edge_pos >> vshift,
1750                                 EDGE_WIDTH >> hshift,
1751                                 EDGE_WIDTH >> vshift,
1752                                 EDGE_TOP | EDGE_BOTTOM);
1753         s->mpvencdsp.draw_edges(s->cur_pic.data[2],
1754                                 s->cur_pic.linesize[2],
1755                                 s->h_edge_pos >> hshift,
1756                                 s->v_edge_pos >> vshift,
1757                                 EDGE_WIDTH >> hshift,
1758                                 EDGE_WIDTH >> vshift,
1759                                 EDGE_TOP | EDGE_BOTTOM);
1760     }
1761
1762     emms_c();
1763
1764     s->last_pict_type                 = s->pict_type;
1765     s->last_lambda_for [s->pict_type] = s->cur_pic.ptr->f->quality;
1766     if (s->pict_type!= AV_PICTURE_TYPE_B)
1767         s->last_non_b_pict_type = s->pict_type;
1768 }
1769
1770 static void update_noise_reduction(MpegEncContext *s)
1771 {
1772     int intra, i;
1773
1774     for (intra = 0; intra < 2; intra++) {
1775         if (s->dct_count[intra] > (1 << 16)) {
1776             for (i = 0; i < 64; i++) {
1777                 s->dct_error_sum[intra][i] >>= 1;
1778             }
1779             s->dct_count[intra] >>= 1;
1780         }
1781
1782         for (i = 0; i < 64; i++) {
1783             s->dct_offset[intra][i] = (s->noise_reduction *
1784                                        s->dct_count[intra] +
1785                                        s->dct_error_sum[intra][i] / 2) /
1786                                       (s->dct_error_sum[intra][i] + 1);
1787         }
1788     }
1789 }
1790
1791 static void frame_start(MpegEncContext *s)
1792 {
1793     s->cur_pic.ptr->f->pict_type = s->pict_type;
1794
1795     if (s->pict_type != AV_PICTURE_TYPE_B) {
1796         ff_mpv_replace_picture(&s->last_pic, &s->next_pic);
1797         ff_mpv_replace_picture(&s->next_pic, &s->cur_pic);
1798     }
1799
1800     if (s->dct_error_sum) {
1801         av_assert2(s->noise_reduction && s->encoding);
1802         update_noise_reduction(s);
1803     }
1804 }
1805
1806 int ff_mpv_encode_picture(AVCodecContext *avctx, AVPacket *pkt,
1807                           const AVFrame *pic_arg, int *got_packet)
1808 {
1809     MpegEncContext *s = avctx->priv_data;
1810     int stuffing_count, ret;
1811     int context_count = s->slice_context_count;
1812
1813     ff_mpv_unref_picture(&s->cur_pic);
1814
1815     s->vbv_ignore_qmax = 0;
1816
1817     s->picture_in_gop_number++;
1818
1819     if (load_input_picture(s, pic_arg) < 0)
1820         return -1;
1821
1822     if (select_input_picture(s) < 0) {
1823         return -1;
1824     }
1825
1826     /* output? */
1827     if (s->new_pic->data[0]) {
1828         int growing_buffer = context_count == 1 && !s->data_partitioning;
1829         size_t pkt_size = 10000 + s->mb_width * s->mb_height *
1830                                   (growing_buffer ? 64 : (MAX_MB_BYTES + 100));
1831         if (CONFIG_MJPEG_ENCODER && avctx->codec_id == AV_CODEC_ID_MJPEG) {
1832             ret = ff_mjpeg_add_icc_profile_size(avctx, s->new_pic, &pkt_size);
1833             if (ret < 0)
1834                 return ret;
1835         }
1836         if ((ret = ff_alloc_packet(avctx, pkt, pkt_size)) < 0)
1837             return ret;
1838         pkt->size = avctx->internal->byte_buffer_size - AV_INPUT_BUFFER_PADDING_SIZE;
1839         if (s->mb_info) {
1840             s->mb_info_ptr = av_packet_new_side_data(pkt,
1841                                  AV_PKT_DATA_H263_MB_INFO,
1842                                  s->mb_width*s->mb_height*12);
1843             if (!s->mb_info_ptr)
1844                 return AVERROR(ENOMEM);
1845             s->prev_mb_info = s->last_mb_info = s->mb_info_size = 0;
1846         }
1847
1848         s->pict_type = s->new_pic->pict_type;
1849         //emms_c();
1850         frame_start(s);
1851 vbv_retry:
1852         ret = encode_picture(s, pkt);
1853         if (growing_buffer) {
1854             av_assert0(s->pb.buf == avctx->internal->byte_buffer);
1855             pkt->data = s->pb.buf;
1856             pkt->size = avctx->internal->byte_buffer_size;
1857         }
1858         if (ret < 0)
1859             return -1;
1860
1861         frame_end(s);
1862
1863        if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) && s->out_format == FMT_MJPEG)
1864             ff_mjpeg_encode_picture_trailer(&s->pb, s->header_bits);
1865
1866         if (avctx->rc_buffer_size) {
1867             RateControlContext *rcc = &s->rc_context;
1868             int max_size = FFMAX(rcc->buffer_index * avctx->rc_max_available_vbv_use, rcc->buffer_index - 500);
1869             int hq = (avctx->mb_decision == FF_MB_DECISION_RD || avctx->trellis);
1870             int min_step = hq ? 1 : (1<<(FF_LAMBDA_SHIFT + 7))/139;
1871
1872             if (put_bits_count(&s->pb) > max_size &&
1873                 s->lambda < s->lmax) {
1874                 s->next_lambda = FFMAX(s->lambda + min_step, s->lambda *
1875                                        (s->qscale + 1) / s->qscale);
1876                 if (s->adaptive_quant) {
1877                     int i;
1878                     for (i = 0; i < s->mb_height * s->mb_stride; i++)
1879                         s->lambda_table[i] =
1880                             FFMAX(s->lambda_table[i] + min_step,
1881                                   s->lambda_table[i] * (s->qscale + 1) /
1882                                   s->qscale);
1883                 }
1884                 s->mb_skipped = 0;        // done in frame_start()
1885                 // done in encode_picture() so we must undo it
1886                 if (s->pict_type == AV_PICTURE_TYPE_P) {
1887                     s->no_rounding ^= s->flipflop_rounding;
1888                 }
1889                 if (s->pict_type != AV_PICTURE_TYPE_B) {
1890                     s->time_base       = s->last_time_base;
1891                     s->last_non_b_time = s->time - s->pp_time;
1892                 }
1893                 s->vbv_ignore_qmax = 1;
1894                 av_log(avctx, AV_LOG_VERBOSE, "reencoding frame due to VBV\n");
1895                 goto vbv_retry;
1896             }
1897
1898             av_assert0(avctx->rc_max_rate);
1899         }
1900
1901         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1902             ff_write_pass1_stats(s);
1903
1904         for (int i = 0; i < MPV_MAX_PLANES; i++)
1905             avctx->error[i] += s->encoding_error[i];
1906         ff_side_data_set_encoder_stats(pkt, s->cur_pic.ptr->f->quality,
1907                                        s->encoding_error,
1908                                        (avctx->flags&AV_CODEC_FLAG_PSNR) ? MPV_MAX_PLANES : 0,
1909                                        s->pict_type);
1910
1911         if (avctx->flags & AV_CODEC_FLAG_PASS1)
1912             assert(put_bits_count(&s->pb) == s->header_bits + s->mv_bits +
1913                                              s->misc_bits + s->i_tex_bits +
1914                                              s->p_tex_bits);
1915         flush_put_bits(&s->pb);
1916         s->frame_bits  = put_bits_count(&s->pb);
1917
1918         stuffing_count = ff_vbv_update(s, s->frame_bits);
1919         s->stuffing_bits = 8*stuffing_count;
1920         if (stuffing_count) {
1921             if (put_bytes_left(&s->pb, 0) < stuffing_count + 50) {
1922                 av_log(avctx, AV_LOG_ERROR, "stuffing too large\n");
1923                 return -1;
1924             }
1925
1926             switch (s->codec_id) {
1927             case AV_CODEC_ID_MPEG1VIDEO:
1928             case AV_CODEC_ID_MPEG2VIDEO:
1929                 while (stuffing_count--) {
1930                     put_bits(&s->pb, 8, 0);
1931                 }
1932             break;
1933             case AV_CODEC_ID_MPEG4:
1934                 put_bits(&s->pb, 16, 0);
1935                 put_bits(&s->pb, 16, 0x1C3);
1936                 stuffing_count -= 4;
1937                 while (stuffing_count--) {
1938                     put_bits(&s->pb, 8, 0xFF);
1939                 }
1940             break;
1941             default:
1942                 av_log(avctx, AV_LOG_ERROR, "vbv buffer overflow\n");
1943                 s->stuffing_bits = 0;
1944             }
1945             flush_put_bits(&s->pb);
1946             s->frame_bits  = put_bits_count(&s->pb);
1947         }
1948
1949         /* update MPEG-1/2 vbv_delay for CBR */
1950         if (avctx->rc_max_rate                          &&
1951             avctx->rc_min_rate == avctx->rc_max_rate &&
1952             s->out_format == FMT_MPEG1                     &&
1953             90000LL * (avctx->rc_buffer_size - 1) <=
1954                 avctx->rc_max_rate * 0xFFFFLL) {
1955             AVCPBProperties *props;
1956             size_t props_size;
1957
1958             int vbv_delay, min_delay;
1959             double inbits  = avctx->rc_max_rate *
1960                              av_q2d(avctx->time_base);
1961             int    minbits = s->frame_bits - 8 *
1962                              (s->vbv_delay_pos - 1);
1963             double bits    = s->rc_context.buffer_index + minbits - inbits;
1964             uint8_t *const vbv_delay_ptr = s->pb.buf + s->vbv_delay_pos;
1965
1966             if (bits < 0)
1967                 av_log(avctx, AV_LOG_ERROR,
1968                        "Internal error, negative bits\n");
1969
1970             av_assert1(s->repeat_first_field == 0);
1971
1972             vbv_delay = bits * 90000 / avctx->rc_max_rate;
1973             min_delay = (minbits * 90000LL + avctx->rc_max_rate - 1) /
1974                         avctx->rc_max_rate;
1975
1976             vbv_delay = FFMAX(vbv_delay, min_delay);
1977
1978             av_assert0(vbv_delay < 0xFFFF);
1979
1980             vbv_delay_ptr[0] &= 0xF8;
1981             vbv_delay_ptr[0] |= vbv_delay >> 13;
1982             vbv_delay_ptr[1]  = vbv_delay >> 5;
1983             vbv_delay_ptr[2] &= 0x07;
1984             vbv_delay_ptr[2] |= vbv_delay << 3;
1985
1986             props = av_cpb_properties_alloc(&props_size);
1987             if (!props)
1988                 return AVERROR(ENOMEM);
1989             props->vbv_delay = vbv_delay * 300;
1990
1991             ret = av_packet_add_side_data(pkt, AV_PKT_DATA_CPB_PROPERTIES,
1992                                           (uint8_t*)props, props_size);
1993             if (ret < 0) {
1994                 av_freep(&props);
1995                 return ret;
1996             }
1997         }
1998         s->total_bits     += s->frame_bits;
1999
2000         pkt->pts = s->cur_pic.ptr->f->pts;
2001         pkt->duration = s->cur_pic.ptr->f->duration;
2002         if (!s->low_delay && s->pict_type != AV_PICTURE_TYPE_B) {
2003             if (!s->cur_pic.ptr->coded_picture_number)
2004                 pkt->dts = pkt->pts - s->dts_delta;
2005             else
2006                 pkt->dts = s->reordered_pts;
2007             s->reordered_pts = pkt->pts;
2008         } else
2009             pkt->dts = pkt->pts;
2010
2011         // the no-delay case is handled in generic code
2012         if (avctx->codec->capabilities & AV_CODEC_CAP_DELAY) {
2013             ret = ff_encode_reordered_opaque(avctx, pkt, s->cur_pic.ptr->f);
2014             if (ret < 0)
2015                 return ret;
2016         }
2017
2018         if (s->cur_pic.ptr->f->flags & AV_FRAME_FLAG_KEY)
2019             pkt->flags |= AV_PKT_FLAG_KEY;
2020         if (s->mb_info)
2021             av_packet_shrink_side_data(pkt, AV_PKT_DATA_H263_MB_INFO, s->mb_info_size);
2022     } else {
2023         s->frame_bits = 0;
2024     }
2025
2026     ff_mpv_unref_picture(&s->cur_pic);
2027
2028     av_assert1((s->frame_bits & 7) == 0);
2029
2030     pkt->size = s->frame_bits / 8;
2031     *got_packet = !!pkt->size;
2032     return 0;
2033 }
2034
2035 static inline void dct_single_coeff_elimination(MpegEncContext *s,
2036                                                 int n, int threshold)
2037 {
2038     static const char tab[64] = {
2039         3, 2, 2, 1, 1, 1, 1, 1,
2040         1, 1, 1, 1, 1, 1, 1, 1,
2041         1, 1, 1, 1, 1, 1, 1, 1,
2042         0, 0, 0, 0, 0, 0, 0, 0,
2043         0, 0, 0, 0, 0, 0, 0, 0,
2044         0, 0, 0, 0, 0, 0, 0, 0,
2045         0, 0, 0, 0, 0, 0, 0, 0,
2046         0, 0, 0, 0, 0, 0, 0, 0
2047     };
2048     int score = 0;
2049     int run = 0;
2050     int i;
2051     int16_t *block = s->block[n];
2052     const int last_index = s->block_last_index[n];
2053     int skip_dc;
2054
2055     if (threshold < 0) {
2056         skip_dc = 0;
2057         threshold = -threshold;
2058     } else
2059         skip_dc = 1;
2060
2061     /* Are all we could set to zero already zero? */
2062     if (last_index <= skip_dc - 1)
2063         return;
2064
2065     for (i = 0; i <= last_index; i++) {
2066         const int j = s->intra_scantable.permutated[i];
2067         const int level = FFABS(block[j]);
2068         if (level == 1) {
2069             if (skip_dc && i == 0)
2070                 continue;
2071             score += tab[run];
2072             run = 0;
2073         } else if (level > 1) {
2074             return;
2075         } else {
2076             run++;
2077         }
2078     }
2079     if (score >= threshold)
2080         return;
2081     for (i = skip_dc; i <= last_index; i++) {
2082         const int j = s->intra_scantable.permutated[i];
2083         block[j] = 0;
2084     }
2085     if (block[0])
2086         s->block_last_index[n] = 0;
2087     else
2088         s->block_last_index[n] = -1;
2089 }
2090
2091 static inline void clip_coeffs(MpegEncContext *s, int16_t *block,
2092                                int last_index)
2093 {
2094     int i;
2095     const int maxlevel = s->max_qcoeff;
2096     const int minlevel = s->min_qcoeff;
2097     int overflow = 0;
2098
2099     if (s->mb_intra) {
2100         i = 1; // skip clipping of intra dc
2101     } else
2102         i = 0;
2103
2104     for (; i <= last_index; i++) {
2105         const int j = s->intra_scantable.permutated[i];
2106         int level = block[j];
2107
2108         if (level > maxlevel) {
2109             level = maxlevel;
2110             overflow++;
2111         } else if (level < minlevel) {
2112             level = minlevel;
2113             overflow++;
2114         }
2115
2116         block[j] = level;
2117     }
2118
2119     if (overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
2120         av_log(s->avctx, AV_LOG_INFO,
2121                "warning, clipping %d dct coefficients to %d..%d\n",
2122                overflow, minlevel, maxlevel);
2123 }
2124
2125 static void get_visual_weight(int16_t *weight, const uint8_t *ptr, int stride)
2126 {
2127     int x, y;
2128     // FIXME optimize
2129     for (y = 0; y < 8; y++) {
2130         for (x = 0; x < 8; x++) {
2131             int x2, y2;
2132             int sum = 0;
2133             int sqr = 0;
2134             int count = 0;
2135
2136             for (y2 = FFMAX(y - 1, 0); y2 < FFMIN(8, y + 2); y2++) {
2137                 for (x2= FFMAX(x - 1, 0); x2 < FFMIN(8, x + 2); x2++) {
2138                     int v = ptr[x2 + y2 * stride];
2139                     sum += v;
2140                     sqr += v * v;
2141                     count++;
2142                 }
2143             }
2144             weight[x + 8 * y]= (36 * ff_sqrt(count * sqr - sum * sum)) / count;
2145         }
2146     }
2147 }
2148
2149 static av_always_inline void encode_mb_internal(MpegEncContext *s,
2150                                                 int motion_x, int motion_y,
2151                                                 int mb_block_height,
2152                                                 int mb_block_width,
2153                                                 int mb_block_count,
2154                                                 int chroma_x_shift,
2155                                                 int chroma_y_shift,
2156                                                 int chroma_format)
2157 {
2158 /* Interlaced DCT is only possible with MPEG-2 and MPEG-4
2159  * and neither of these encoders currently supports 444. */
2160 #define INTERLACED_DCT(s) ((chroma_format == CHROMA_420 || chroma_format == CHROMA_422) && \
2161                            (s)->avctx->flags & AV_CODEC_FLAG_INTERLACED_DCT)
2162     int16_t weight[12][64];
2163     int16_t orig[12][64];
2164     const int mb_x = s->mb_x;
2165     const int mb_y = s->mb_y;
2166     int i;
2167     int skip_dct[12];
2168     int dct_offset = s->linesize * 8; // default for progressive frames
2169     int uv_dct_offset = s->uvlinesize * 8;
2170     const uint8_t *ptr_y, *ptr_cb, *ptr_cr;
2171     ptrdiff_t wrap_y, wrap_c;
2172
2173     for (i = 0; i < mb_block_count; i++)
2174         skip_dct[i] = s->skipdct;
2175
2176     if (s->adaptive_quant) {
2177         const int last_qp = s->qscale;
2178         const int mb_xy = mb_x + mb_y * s->mb_stride;
2179
2180         s->lambda = s->lambda_table[mb_xy];
2181         s->lambda2 = (s->lambda * s->lambda + FF_LAMBDA_SCALE / 2) >>
2182                      FF_LAMBDA_SHIFT;
2183
2184         if (!(s->mpv_flags & FF_MPV_FLAG_QP_RD)) {
2185             s->dquant = s->cur_pic.qscale_table[mb_xy] - last_qp;
2186
2187             if (s->out_format == FMT_H263) {
2188                 s->dquant = av_clip(s->dquant, -2, 2);
2189
2190                 if (s->codec_id == AV_CODEC_ID_MPEG4) {
2191                     if (!s->mb_intra) {
2192                         if (s->pict_type == AV_PICTURE_TYPE_B) {
2193                             if (s->dquant & 1 || s->mv_dir & MV_DIRECT)
2194                                 s->dquant = 0;
2195                         }
2196                         if (s->mv_type == MV_TYPE_8X8)
2197                             s->dquant = 0;
2198                     }
2199                 }
2200             }
2201         }
2202         ff_set_qscale(s, last_qp + s->dquant);
2203     } else if (s->mpv_flags & FF_MPV_FLAG_QP_RD)
2204         ff_set_qscale(s, s->qscale + s->dquant);
2205
2206     wrap_y = s->linesize;
2207     wrap_c = s->uvlinesize;
2208     ptr_y  = s->new_pic->data[0] +
2209              (mb_y * 16 * wrap_y)              + mb_x * 16;
2210     ptr_cb = s->new_pic->data[1] +
2211              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2212     ptr_cr = s->new_pic->data[2] +
2213              (mb_y * mb_block_height * wrap_c) + mb_x * mb_block_width;
2214
2215     if((mb_x * 16 + 16 > s->width || mb_y * 16 + 16 > s->height) && s->codec_id != AV_CODEC_ID_AMV){
2216         uint8_t *ebuf = s->sc.edge_emu_buffer + 38 * wrap_y;
2217         int cw = (s->width  + chroma_x_shift) >> chroma_x_shift;
2218         int ch = (s->height + chroma_y_shift) >> chroma_y_shift;
2219         s->vdsp.emulated_edge_mc(ebuf, ptr_y,
2220                                  wrap_y, wrap_y,
2221                                  16, 16, mb_x * 16, mb_y * 16,
2222                                  s->width, s->height);
2223         ptr_y = ebuf;
2224         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y, ptr_cb,
2225                                  wrap_c, wrap_c,
2226                                  mb_block_width, mb_block_height,
2227                                  mb_x * mb_block_width, mb_y * mb_block_height,
2228                                  cw, ch);
2229         ptr_cb = ebuf + 16 * wrap_y;
2230         s->vdsp.emulated_edge_mc(ebuf + 16 * wrap_y + 16, ptr_cr,
2231                                  wrap_c, wrap_c,
2232                                  mb_block_width, mb_block_height,
2233                                  mb_x * mb_block_width, mb_y * mb_block_height,
2234                                  cw, ch);
2235         ptr_cr = ebuf + 16 * wrap_y + 16;
2236     }
2237
2238     if (s->mb_intra) {
2239         if (INTERLACED_DCT(s)) {
2240             int progressive_score, interlaced_score;
2241
2242             s->interlaced_dct = 0;
2243             progressive_score = s->ildct_cmp[1](s, ptr_y, NULL, wrap_y, 8) +
2244                                 s->ildct_cmp[1](s, ptr_y + wrap_y * 8,
2245                                                 NULL, wrap_y, 8) - 400;
2246
2247             if (progressive_score > 0) {
2248                 interlaced_score = s->ildct_cmp[1](s, ptr_y,
2249                                                    NULL, wrap_y * 2, 8) +
2250                                    s->ildct_cmp[1](s, ptr_y + wrap_y,
2251                                                    NULL, wrap_y * 2, 8);
2252                 if (progressive_score > interlaced_score) {
2253                     s->interlaced_dct = 1;
2254
2255                     dct_offset = wrap_y;
2256                     uv_dct_offset = wrap_c;
2257                     wrap_y <<= 1;
2258                     if (chroma_format == CHROMA_422 ||
2259                         chroma_format == CHROMA_444)
2260                         wrap_c <<= 1;
2261                 }
2262             }
2263         }
2264
2265         s->pdsp.get_pixels(s->block[0], ptr_y,                  wrap_y);
2266         s->pdsp.get_pixels(s->block[1], ptr_y + 8,              wrap_y);
2267         s->pdsp.get_pixels(s->block[2], ptr_y + dct_offset,     wrap_y);
2268         s->pdsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
2269
2270         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2271             skip_dct[4] = 1;
2272             skip_dct[5] = 1;
2273         } else {
2274             s->pdsp.get_pixels(s->block[4], ptr_cb, wrap_c);
2275             s->pdsp.get_pixels(s->block[5], ptr_cr, wrap_c);
2276             if (chroma_format == CHROMA_422) {
2277                 s->pdsp.get_pixels(s->block[6], ptr_cb + uv_dct_offset, wrap_c);
2278                 s->pdsp.get_pixels(s->block[7], ptr_cr + uv_dct_offset, wrap_c);
2279             } else if (chroma_format == CHROMA_444) {
2280                 s->pdsp.get_pixels(s->block[ 6], ptr_cb + 8, wrap_c);
2281                 s->pdsp.get_pixels(s->block[ 7], ptr_cr + 8, wrap_c);
2282                 s->pdsp.get_pixels(s->block[ 8], ptr_cb + uv_dct_offset, wrap_c);
2283                 s->pdsp.get_pixels(s->block[ 9], ptr_cr + uv_dct_offset, wrap_c);
2284                 s->pdsp.get_pixels(s->block[10], ptr_cb + uv_dct_offset + 8, wrap_c);
2285                 s->pdsp.get_pixels(s->block[11], ptr_cr + uv_dct_offset + 8, wrap_c);
2286             }
2287         }
2288     } else {
2289         op_pixels_func (*op_pix)[4];
2290         qpel_mc_func (*op_qpix)[16];
2291         uint8_t *dest_y, *dest_cb, *dest_cr;
2292
2293         dest_y  = s->dest[0];
2294         dest_cb = s->dest[1];
2295         dest_cr = s->dest[2];
2296
2297         if ((!s->no_rounding) || s->pict_type == AV_PICTURE_TYPE_B) {
2298             op_pix  = s->hdsp.put_pixels_tab;
2299             op_qpix = s->qdsp.put_qpel_pixels_tab;
2300         } else {
2301             op_pix  = s->hdsp.put_no_rnd_pixels_tab;
2302             op_qpix = s->qdsp.put_no_rnd_qpel_pixels_tab;
2303         }
2304
2305         if (s->mv_dir & MV_DIR_FORWARD) {
2306             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 0,
2307                           s->last_pic.data,
2308                           op_pix, op_qpix);
2309             op_pix  = s->hdsp.avg_pixels_tab;
2310             op_qpix = s->qdsp.avg_qpel_pixels_tab;
2311         }
2312         if (s->mv_dir & MV_DIR_BACKWARD) {
2313             ff_mpv_motion(s, dest_y, dest_cb, dest_cr, 1,
2314                           s->next_pic.data,
2315                           op_pix, op_qpix);
2316         }
2317
2318         if (INTERLACED_DCT(s)) {
2319             int progressive_score, interlaced_score;
2320
2321             s->interlaced_dct = 0;
2322             progressive_score = s->ildct_cmp[0](s, dest_y, ptr_y, wrap_y, 8) +
2323                                 s->ildct_cmp[0](s, dest_y + wrap_y * 8,
2324                                                 ptr_y + wrap_y * 8,
2325                                                 wrap_y, 8) - 400;
2326
2327             if (s->avctx->ildct_cmp == FF_CMP_VSSE)
2328                 progressive_score -= 400;
2329
2330             if (progressive_score > 0) {
2331                 interlaced_score = s->ildct_cmp[0](s, dest_y, ptr_y,
2332                                                    wrap_y * 2, 8) +
2333                                    s->ildct_cmp[0](s, dest_y + wrap_y,
2334                                                    ptr_y + wrap_y,
2335                                                    wrap_y * 2, 8);
2336
2337                 if (progressive_score > interlaced_score) {
2338                     s->interlaced_dct = 1;
2339
2340                     dct_offset = wrap_y;
2341                     uv_dct_offset = wrap_c;
2342                     wrap_y <<= 1;
2343                     if (chroma_format == CHROMA_422)
2344                         wrap_c <<= 1;
2345                 }
2346             }
2347         }
2348
2349         s->pdsp.diff_pixels(s->block[0], ptr_y, dest_y, wrap_y);
2350         s->pdsp.diff_pixels(s->block[1], ptr_y + 8, dest_y + 8, wrap_y);
2351         s->pdsp.diff_pixels(s->block[2], ptr_y + dct_offset,
2352                             dest_y + dct_offset, wrap_y);
2353         s->pdsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8,
2354                             dest_y + dct_offset + 8, wrap_y);
2355
2356         if (s->avctx->flags & AV_CODEC_FLAG_GRAY) {
2357             skip_dct[4] = 1;
2358             skip_dct[5] = 1;
2359         } else {
2360             s->pdsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
2361             s->pdsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
2362             if (!chroma_y_shift) { /* 422 */
2363                 s->pdsp.diff_pixels(s->block[6], ptr_cb + uv_dct_offset,
2364                                     dest_cb + uv_dct_offset, wrap_c);
2365                 s->pdsp.diff_pixels(s->block[7], ptr_cr + uv_dct_offset,
2366                                     dest_cr + uv_dct_offset, wrap_c);
2367             }
2368         }
2369         /* pre quantization */
2370         if (s->mc_mb_var[s->mb_stride * mb_y + mb_x] < 2 * s->qscale * s->qscale) {
2371             // FIXME optimize
2372             if (s->sad_cmp[1](NULL, ptr_y, dest_y, wrap_y, 8) < 20 * s->qscale)
2373                 skip_dct[0] = 1;
2374             if (s->sad_cmp[1](NULL, ptr_y + 8, dest_y + 8, wrap_y, 8) < 20 * s->qscale)
2375                 skip_dct[1] = 1;
2376             if (s->sad_cmp[1](NULL, ptr_y + dct_offset, dest_y + dct_offset,
2377                               wrap_y, 8) < 20 * s->qscale)
2378                 skip_dct[2] = 1;
2379             if (s->sad_cmp[1](NULL, ptr_y + dct_offset + 8, dest_y + dct_offset + 8,
2380                               wrap_y, 8) < 20 * s->qscale)
2381                 skip_dct[3] = 1;
2382             if (s->sad_cmp[1](NULL, ptr_cb, dest_cb, wrap_c, 8) < 20 * s->qscale)
2383                 skip_dct[4] = 1;
2384             if (s->sad_cmp[1](NULL, ptr_cr, dest_cr, wrap_c, 8) < 20 * s->qscale)
2385                 skip_dct[5] = 1;
2386             if (!chroma_y_shift) { /* 422 */
2387                 if (s->sad_cmp[1](NULL, ptr_cb + uv_dct_offset,
2388                                   dest_cb + uv_dct_offset,
2389                                   wrap_c, 8) < 20 * s->qscale)
2390                     skip_dct[6] = 1;
2391                 if (s->sad_cmp[1](NULL, ptr_cr + uv_dct_offset,
2392                                   dest_cr + uv_dct_offset,
2393                                   wrap_c, 8) < 20 * s->qscale)
2394                     skip_dct[7] = 1;
2395             }
2396         }
2397     }
2398
2399     if (s->quantizer_noise_shaping) {
2400         if (!skip_dct[0])
2401             get_visual_weight(weight[0], ptr_y                 , wrap_y);
2402         if (!skip_dct[1])
2403             get_visual_weight(weight[1], ptr_y              + 8, wrap_y);
2404         if (!skip_dct[2])
2405             get_visual_weight(weight[2], ptr_y + dct_offset    , wrap_y);
2406         if (!skip_dct[3])
2407             get_visual_weight(weight[3], ptr_y + dct_offset + 8, wrap_y);
2408         if (!skip_dct[4])
2409             get_visual_weight(weight[4], ptr_cb                , wrap_c);
2410         if (!skip_dct[5])
2411             get_visual_weight(weight[5], ptr_cr                , wrap_c);
2412         if (!chroma_y_shift) { /* 422 */
2413             if (!skip_dct[6])
2414                 get_visual_weight(weight[6], ptr_cb + uv_dct_offset,
2415                                   wrap_c);
2416             if (!skip_dct[7])
2417                 get_visual_weight(weight[7], ptr_cr + uv_dct_offset,
2418                                   wrap_c);
2419         }
2420         memcpy(orig[0], s->block[0], sizeof(int16_t) * 64 * mb_block_count);
2421     }
2422
2423     /* DCT & quantize */
2424     av_assert2(s->out_format != FMT_MJPEG || s->qscale == 8);
2425     {
2426         for (i = 0; i < mb_block_count; i++) {
2427             if (!skip_dct[i]) {
2428                 int overflow;
2429                 s->block_last_index[i] = s->dct_quantize(s, s->block[i], i, s->qscale, &overflow);
2430                 // FIXME we could decide to change to quantizer instead of
2431                 // clipping
2432                 // JS: I don't think that would be a good idea it could lower
2433                 //     quality instead of improve it. Just INTRADC clipping
2434                 //     deserves changes in quantizer
2435                 if (overflow)
2436                     clip_coeffs(s, s->block[i], s->block_last_index[i]);
2437             } else
2438                 s->block_last_index[i] = -1;
2439         }
2440         if (s->quantizer_noise_shaping) {
2441             for (i = 0; i < mb_block_count; i++) {
2442                 if (!skip_dct[i]) {
2443                     s->block_last_index[i] =
2444                         dct_quantize_refine(s, s->block[i], weight[i],
2445                                             orig[i], i, s->qscale);
2446                 }
2447             }
2448         }
2449
2450         if (s->luma_elim_threshold && !s->mb_intra)
2451             for (i = 0; i < 4; i++)
2452                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
2453         if (s->chroma_elim_threshold && !s->mb_intra)
2454             for (i = 4; i < mb_block_count; i++)
2455                 dct_single_coeff_elimination(s, i, s->chroma_elim_threshold);
2456
2457         if (s->mpv_flags & FF_MPV_FLAG_CBP_RD) {
2458             for (i = 0; i < mb_block_count; i++) {
2459                 if (s->block_last_index[i] == -1)
2460                     s->coded_score[i] = INT_MAX / 256;
2461             }
2462         }
2463     }
2464
2465     if ((s->avctx->flags & AV_CODEC_FLAG_GRAY) && s->mb_intra) {
2466         s->block_last_index[4] =
2467         s->block_last_index[5] = 0;
2468         s->block[4][0] =
2469         s->block[5][0] = (1024 + s->c_dc_scale / 2) / s->c_dc_scale;
2470         if (!chroma_y_shift) { /* 422 / 444 */
2471             for (i=6; i<12; i++) {
2472                 s->block_last_index[i] = 0;
2473                 s->block[i][0] = s->block[4][0];
2474             }
2475         }
2476     }
2477
2478     // non c quantize code returns incorrect block_last_index FIXME
2479     if (s->alternate_scan && s->dct_quantize != dct_quantize_c) {
2480         for (i = 0; i < mb_block_count; i++) {
2481             int j;
2482             if (s->block_last_index[i] > 0) {
2483                 for (j = 63; j > 0; j--) {
2484                     if (s->block[i][s->intra_scantable.permutated[j]])
2485                         break;
2486                 }
2487                 s->block_last_index[i] = j;
2488             }
2489         }
2490     }
2491
2492     /* huffman encode */
2493     switch(s->codec_id){ //FIXME funct ptr could be slightly faster
2494     case AV_CODEC_ID_MPEG1VIDEO:
2495     case AV_CODEC_ID_MPEG2VIDEO:
2496         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
2497             ff_mpeg1_encode_mb(s, s->block, motion_x, motion_y);
2498         break;
2499     case AV_CODEC_ID_MPEG4:
2500         if (CONFIG_MPEG4_ENCODER)
2501             ff_mpeg4_encode_mb(s, s->block, motion_x, motion_y);
2502         break;
2503     case AV_CODEC_ID_MSMPEG4V2:
2504     case AV_CODEC_ID_MSMPEG4V3:
2505     case AV_CODEC_ID_WMV1:
2506         if (CONFIG_MSMPEG4ENC)
2507             ff_msmpeg4_encode_mb(s, s->block, motion_x, motion_y);
2508         break;
2509     case AV_CODEC_ID_WMV2:
2510         if (CONFIG_WMV2_ENCODER)
2511             ff_wmv2_encode_mb(s, s->block, motion_x, motion_y);
2512         break;
2513     case AV_CODEC_ID_H261:
2514         if (CONFIG_H261_ENCODER)
2515             ff_h261_encode_mb(s, s->block, motion_x, motion_y);
2516         break;
2517     case AV_CODEC_ID_H263:
2518     case AV_CODEC_ID_H263P:
2519     case AV_CODEC_ID_FLV1:
2520     case AV_CODEC_ID_RV10:
2521     case AV_CODEC_ID_RV20:
2522         if (CONFIG_H263_ENCODER)
2523             ff_h263_encode_mb(s, s->block, motion_x, motion_y);
2524         break;
2525 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
2526     case AV_CODEC_ID_MJPEG:
2527     case AV_CODEC_ID_AMV:
2528         ff_mjpeg_encode_mb(s, s->block);
2529         break;
2530 #endif
2531     case AV_CODEC_ID_SPEEDHQ:
2532         if (CONFIG_SPEEDHQ_ENCODER)
2533             ff_speedhq_encode_mb(s, s->block);
2534         break;
2535     default:
2536         av_assert1(0);
2537     }
2538 }
2539
2540 static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
2541 {
2542     if (s->chroma_format == CHROMA_420)
2543         encode_mb_internal(s, motion_x, motion_y,  8, 8, 6, 1, 1, CHROMA_420);
2544     else if (s->chroma_format == CHROMA_422)
2545         encode_mb_internal(s, motion_x, motion_y, 16, 8, 8, 1, 0, CHROMA_422);
2546     else
2547         encode_mb_internal(s, motion_x, motion_y, 16, 16, 12, 0, 0, CHROMA_444);
2548 }
2549
2550 static inline void copy_context_before_encode(MpegEncContext *d,
2551                                               const MpegEncContext *s)
2552 {
2553     int i;
2554
2555     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2556
2557     /* MPEG-1 */
2558     d->mb_skip_run= s->mb_skip_run;
2559     for(i=0; i<3; i++)
2560         d->last_dc[i] = s->last_dc[i];
2561
2562     /* statistics */
2563     d->mv_bits= s->mv_bits;
2564     d->i_tex_bits= s->i_tex_bits;
2565     d->p_tex_bits= s->p_tex_bits;
2566     d->i_count= s->i_count;
2567     d->misc_bits= s->misc_bits;
2568     d->last_bits= 0;
2569
2570     d->mb_skipped= 0;
2571     d->qscale= s->qscale;
2572     d->dquant= s->dquant;
2573
2574     d->esc3_level_length= s->esc3_level_length;
2575 }
2576
2577 static inline void copy_context_after_encode(MpegEncContext *d,
2578                                              const MpegEncContext *s)
2579 {
2580     int i;
2581
2582     memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
2583     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster than a loop?
2584
2585     /* MPEG-1 */
2586     d->mb_skip_run= s->mb_skip_run;
2587     for(i=0; i<3; i++)
2588         d->last_dc[i] = s->last_dc[i];
2589
2590     /* statistics */
2591     d->mv_bits= s->mv_bits;
2592     d->i_tex_bits= s->i_tex_bits;
2593     d->p_tex_bits= s->p_tex_bits;
2594     d->i_count= s->i_count;
2595     d->misc_bits= s->misc_bits;
2596
2597     d->mb_intra= s->mb_intra;
2598     d->mb_skipped= s->mb_skipped;
2599     d->mv_type= s->mv_type;
2600     d->mv_dir= s->mv_dir;
2601     d->pb= s->pb;
2602     if(s->data_partitioning){
2603         d->pb2= s->pb2;
2604         d->tex_pb= s->tex_pb;
2605     }
2606     d->block= s->block;
2607     for(i=0; i<8; i++)
2608         d->block_last_index[i]= s->block_last_index[i];
2609     d->interlaced_dct= s->interlaced_dct;
2610     d->qscale= s->qscale;
2611
2612     d->esc3_level_length= s->esc3_level_length;
2613 }
2614
2615 static void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best,
2616                          PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
2617                          int *dmin, int *next_block, int motion_x, int motion_y)
2618 {
2619     int score;
2620     uint8_t *dest_backup[3];
2621
2622     copy_context_before_encode(s, backup);
2623
2624     s->block= s->blocks[*next_block];
2625     s->pb= pb[*next_block];
2626     if(s->data_partitioning){
2627         s->pb2   = pb2   [*next_block];
2628         s->tex_pb= tex_pb[*next_block];
2629     }
2630
2631     if(*next_block){
2632         memcpy(dest_backup, s->dest, sizeof(s->dest));
2633         s->dest[0] = s->sc.rd_scratchpad;
2634         s->dest[1] = s->sc.rd_scratchpad + 16*s->linesize;
2635         s->dest[2] = s->sc.rd_scratchpad + 16*s->linesize + 8;
2636         av_assert0(s->linesize >= 32); //FIXME
2637     }
2638
2639     encode_mb(s, motion_x, motion_y);
2640
2641     score= put_bits_count(&s->pb);
2642     if(s->data_partitioning){
2643         score+= put_bits_count(&s->pb2);
2644         score+= put_bits_count(&s->tex_pb);
2645     }
2646
2647     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
2648         mpv_reconstruct_mb(s, s->block);
2649
2650         score *= s->lambda2;
2651         score += sse_mb(s) << FF_LAMBDA_SHIFT;
2652     }
2653
2654     if(*next_block){
2655         memcpy(s->dest, dest_backup, sizeof(s->dest));
2656     }
2657
2658     if(score<*dmin){
2659         *dmin= score;
2660         *next_block^=1;
2661
2662         copy_context_after_encode(best, s);
2663     }
2664 }
2665
2666 static int sse(MpegEncContext *s, const uint8_t *src1, const uint8_t *src2, int w, int h, int stride){
2667     const uint32_t *sq = ff_square_tab + 256;
2668     int acc=0;
2669     int x,y;
2670
2671     if(w==16 && h==16)
2672         return s->sse_cmp[0](NULL, src1, src2, stride, 16);
2673     else if(w==8 && h==8)
2674         return s->sse_cmp[1](NULL, src1, src2, stride, 8);
2675
2676     for(y=0; y<h; y++){
2677         for(x=0; x<w; x++){
2678             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
2679         }
2680     }
2681
2682     av_assert2(acc>=0);
2683
2684     return acc;
2685 }
2686
2687 static int sse_mb(MpegEncContext *s){
2688     int w= 16;
2689     int h= 16;
2690     int chroma_mb_w = w >> s->chroma_x_shift;
2691     int chroma_mb_h = h >> s->chroma_y_shift;
2692
2693     if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
2694     if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
2695
2696     if(w==16 && h==16)
2697         return s->n_sse_cmp[0](s, s->new_pic->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
2698                                s->dest[0], s->linesize, 16) +
2699                s->n_sse_cmp[1](s, s->new_pic->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2700                                s->dest[1], s->uvlinesize, chroma_mb_h) +
2701                s->n_sse_cmp[1](s, s->new_pic->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2702                                s->dest[2], s->uvlinesize, chroma_mb_h);
2703     else
2704         return  sse(s, s->new_pic->data[0] + s->mb_x * 16 + s->mb_y * s->linesize * 16,
2705                     s->dest[0], w, h, s->linesize) +
2706                 sse(s, s->new_pic->data[1] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2707                     s->dest[1], w >> s->chroma_x_shift, h >> s->chroma_y_shift, s->uvlinesize) +
2708                 sse(s, s->new_pic->data[2] + s->mb_x * chroma_mb_w + s->mb_y * s->uvlinesize * chroma_mb_h,
2709                     s->dest[2], w >> s->chroma_x_shift, h >> s->chroma_y_shift, s->uvlinesize);
2710 }
2711
2712 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
2713     MpegEncContext *s= *(void**)arg;
2714
2715
2716     s->me.pre_pass=1;
2717     s->me.dia_size= s->avctx->pre_dia_size;
2718     s->first_slice_line=1;
2719     for(s->mb_y= s->end_mb_y-1; s->mb_y >= s->start_mb_y; s->mb_y--) {
2720         for(s->mb_x=s->mb_width-1; s->mb_x >=0 ;s->mb_x--) {
2721             ff_pre_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2722         }
2723         s->first_slice_line=0;
2724     }
2725
2726     s->me.pre_pass=0;
2727
2728     return 0;
2729 }
2730
2731 static int estimate_motion_thread(AVCodecContext *c, void *arg){
2732     MpegEncContext *s= *(void**)arg;
2733
2734     s->me.dia_size= s->avctx->dia_size;
2735     s->first_slice_line=1;
2736     for(s->mb_y= s->start_mb_y; s->mb_y < s->end_mb_y; s->mb_y++) {
2737         s->mb_x=0; //for block init below
2738         ff_init_block_index(s);
2739         for(s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
2740             s->block_index[0]+=2;
2741             s->block_index[1]+=2;
2742             s->block_index[2]+=2;
2743             s->block_index[3]+=2;
2744
2745             /* compute motion vector & mb_type and store in context */
2746             if(s->pict_type==AV_PICTURE_TYPE_B)
2747                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
2748             else
2749                 ff_estimate_p_frame_motion(s, s->mb_x, s->mb_y);
2750         }
2751         s->first_slice_line=0;
2752     }
2753     return 0;
2754 }
2755
2756 static int mb_var_thread(AVCodecContext *c, void *arg){
2757     MpegEncContext *s= *(void**)arg;
2758     int mb_x, mb_y;
2759
2760     for(mb_y=s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
2761         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2762             int xx = mb_x * 16;
2763             int yy = mb_y * 16;
2764             const uint8_t *pix = s->new_pic->data[0] + (yy * s->linesize) + xx;
2765             int varc;
2766             int sum = s->mpvencdsp.pix_sum(pix, s->linesize);
2767
2768             varc = (s->mpvencdsp.pix_norm1(pix, s->linesize) -
2769                     (((unsigned) sum * sum) >> 8) + 500 + 128) >> 8;
2770
2771             s->mb_var [s->mb_stride * mb_y + mb_x] = varc;
2772             s->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
2773             s->me.mb_var_sum_temp    += varc;
2774         }
2775     }
2776     return 0;
2777 }
2778
2779 static void write_slice_end(MpegEncContext *s){
2780     if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4){
2781         if(s->partitioned_frame){
2782             ff_mpeg4_merge_partitions(s);
2783         }
2784
2785         ff_mpeg4_stuffing(&s->pb);
2786     } else if ((CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER) &&
2787                s->out_format == FMT_MJPEG) {
2788         ff_mjpeg_encode_stuffing(s);
2789     } else if (CONFIG_SPEEDHQ_ENCODER && s->out_format == FMT_SPEEDHQ) {
2790         ff_speedhq_end_slice(s);
2791     }
2792
2793     flush_put_bits(&s->pb);
2794
2795     if ((s->avctx->flags & AV_CODEC_FLAG_PASS1) && !s->partitioned_frame)
2796         s->misc_bits+= get_bits_diff(s);
2797 }
2798
2799 static void write_mb_info(MpegEncContext *s)
2800 {
2801     uint8_t *ptr = s->mb_info_ptr + s->mb_info_size - 12;
2802     int offset = put_bits_count(&s->pb);
2803     int mba  = s->mb_x + s->mb_width * (s->mb_y % s->gob_index);
2804     int gobn = s->mb_y / s->gob_index;
2805     int pred_x, pred_y;
2806     if (CONFIG_H263_ENCODER)
2807         ff_h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
2808     bytestream_put_le32(&ptr, offset);
2809     bytestream_put_byte(&ptr, s->qscale);
2810     bytestream_put_byte(&ptr, gobn);
2811     bytestream_put_le16(&ptr, mba);
2812     bytestream_put_byte(&ptr, pred_x); /* hmv1 */
2813     bytestream_put_byte(&ptr, pred_y); /* vmv1 */
2814     /* 4MV not implemented */
2815     bytestream_put_byte(&ptr, 0); /* hmv2 */
2816     bytestream_put_byte(&ptr, 0); /* vmv2 */
2817 }
2818
2819 static void update_mb_info(MpegEncContext *s, int startcode)
2820 {
2821     if (!s->mb_info)
2822         return;
2823     if (put_bytes_count(&s->pb, 0) - s->prev_mb_info >= s->mb_info) {
2824         s->mb_info_size += 12;
2825         s->prev_mb_info = s->last_mb_info;
2826     }
2827     if (startcode) {
2828         s->prev_mb_info = put_bytes_count(&s->pb, 0);
2829         /* This might have incremented mb_info_size above, and we return without
2830          * actually writing any info into that slot yet. But in that case,
2831          * this will be called again at the start of the after writing the
2832          * start code, actually writing the mb info. */
2833         return;
2834     }
2835
2836     s->last_mb_info = put_bytes_count(&s->pb, 0);
2837     if (!s->mb_info_size)
2838         s->mb_info_size += 12;
2839     write_mb_info(s);
2840 }
2841
2842 int ff_mpv_reallocate_putbitbuffer(MpegEncContext *s, size_t threshold, size_t size_increase)
2843 {
2844     if (put_bytes_left(&s->pb, 0) < threshold
2845         && s->slice_context_count == 1
2846         && s->pb.buf == s->avctx->internal->byte_buffer) {
2847         int lastgob_pos = s->ptr_lastgob - s->pb.buf;
2848
2849         uint8_t *new_buffer = NULL;
2850         int new_buffer_size = 0;
2851
2852         if ((s->avctx->internal->byte_buffer_size + size_increase) >= INT_MAX/8) {
2853             av_log(s->avctx, AV_LOG_ERROR, "Cannot reallocate putbit buffer\n");
2854             return AVERROR(ENOMEM);
2855         }
2856
2857         emms_c();
2858
2859         av_fast_padded_malloc(&new_buffer, &new_buffer_size,
2860                               s->avctx->internal->byte_buffer_size + size_increase);
2861         if (!new_buffer)
2862             return AVERROR(ENOMEM);
2863
2864         memcpy(new_buffer, s->avctx->internal->byte_buffer, s->avctx->internal->byte_buffer_size);
2865         av_free(s->avctx->internal->byte_buffer);
2866         s->avctx->internal->byte_buffer      = new_buffer;
2867         s->avctx->internal->byte_buffer_size = new_buffer_size;
2868         rebase_put_bits(&s->pb, new_buffer, new_buffer_size);
2869         s->ptr_lastgob   = s->pb.buf + lastgob_pos;
2870     }
2871     if (put_bytes_left(&s->pb, 0) < threshold)
2872         return AVERROR(EINVAL);
2873     return 0;
2874 }
2875
2876 static int encode_thread(AVCodecContext *c, void *arg){
2877     MpegEncContext *s= *(void**)arg;
2878     int mb_x, mb_y, mb_y_order;
2879     int chr_h= 16>>s->chroma_y_shift;
2880     int i, j;
2881     MpegEncContext best_s = { 0 }, backup_s;
2882     uint8_t bit_buf[2][MAX_MB_BYTES];
2883     uint8_t bit_buf2[2][MAX_MB_BYTES];
2884     uint8_t bit_buf_tex[2][MAX_MB_BYTES];
2885     PutBitContext pb[2], pb2[2], tex_pb[2];
2886
2887     for(i=0; i<2; i++){
2888         init_put_bits(&pb    [i], bit_buf    [i], MAX_MB_BYTES);
2889         init_put_bits(&pb2   [i], bit_buf2   [i], MAX_MB_BYTES);
2890         init_put_bits(&tex_pb[i], bit_buf_tex[i], MAX_MB_BYTES);
2891     }
2892
2893     s->last_bits= put_bits_count(&s->pb);
2894     s->mv_bits=0;
2895     s->misc_bits=0;
2896     s->i_tex_bits=0;
2897     s->p_tex_bits=0;
2898     s->i_count=0;
2899
2900     for(i=0; i<3; i++){
2901         /* init last dc values */
2902         /* note: quant matrix value (8) is implied here */
2903         s->last_dc[i] = 128 << s->intra_dc_precision;
2904
2905         s->encoding_error[i] = 0;
2906     }
2907     if(s->codec_id==AV_CODEC_ID_AMV){
2908         s->last_dc[0] = 128*8/13;
2909         s->last_dc[1] = 128*8/14;
2910         s->last_dc[2] = 128*8/14;
2911     }
2912     s->mb_skip_run = 0;
2913     memset(s->last_mv, 0, sizeof(s->last_mv));
2914
2915     s->last_mv_dir = 0;
2916
2917     switch(s->codec_id){
2918     case AV_CODEC_ID_H263:
2919     case AV_CODEC_ID_H263P:
2920     case AV_CODEC_ID_FLV1:
2921         if (CONFIG_H263_ENCODER)
2922             s->gob_index = H263_GOB_HEIGHT(s->height);
2923         break;
2924     case AV_CODEC_ID_MPEG4:
2925         if(CONFIG_MPEG4_ENCODER && s->partitioned_frame)
2926             ff_mpeg4_init_partitions(s);
2927         break;
2928     }
2929
2930     s->resync_mb_x=0;
2931     s->resync_mb_y=0;
2932     s->first_slice_line = 1;
2933     s->ptr_lastgob = s->pb.buf;
2934     for (mb_y_order = s->start_mb_y; mb_y_order < s->end_mb_y; mb_y_order++) {
2935         if (CONFIG_SPEEDHQ_ENCODER && s->codec_id == AV_CODEC_ID_SPEEDHQ) {
2936             int first_in_slice;
2937             mb_y = ff_speedhq_mb_y_order_to_mb(mb_y_order, s->mb_height, &first_in_slice);
2938             if (first_in_slice && mb_y_order != s->start_mb_y)
2939                 ff_speedhq_end_slice(s);
2940             s->last_dc[0] = s->last_dc[1] = s->last_dc[2] = 1024 << s->intra_dc_precision;
2941         } else {
2942             mb_y = mb_y_order;
2943         }
2944         s->mb_x=0;
2945         s->mb_y= mb_y;
2946
2947         ff_set_qscale(s, s->qscale);
2948         ff_init_block_index(s);
2949
2950         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
2951             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
2952             int mb_type= s->mb_type[xy];
2953 //            int d;
2954             int dmin= INT_MAX;
2955             int dir;
2956             int size_increase =  s->avctx->internal->byte_buffer_size/4
2957                                + s->mb_width*MAX_MB_BYTES;
2958
2959             ff_mpv_reallocate_putbitbuffer(s, MAX_MB_BYTES, size_increase);
2960             if (put_bytes_left(&s->pb, 0) < MAX_MB_BYTES){
2961                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2962                 return -1;
2963             }
2964             if(s->data_partitioning){
2965                 if (put_bytes_left(&s->pb2,    0) < MAX_MB_BYTES ||
2966                     put_bytes_left(&s->tex_pb, 0) < MAX_MB_BYTES) {
2967                     av_log(s->avctx, AV_LOG_ERROR, "encoded partitioned frame too large\n");
2968                     return -1;
2969                 }
2970             }
2971
2972             s->mb_x = mb_x;
2973             s->mb_y = mb_y;  // moved into loop, can get changed by H.261
2974             ff_update_block_index(s, 8, 0, s->chroma_x_shift);
2975
2976             if(CONFIG_H261_ENCODER && s->codec_id == AV_CODEC_ID_H261){
2977                 ff_h261_reorder_mb_index(s);
2978                 xy= s->mb_y*s->mb_stride + s->mb_x;
2979                 mb_type= s->mb_type[xy];
2980             }
2981
2982             /* write gob / video packet header  */
2983             if(s->rtp_mode){
2984                 int current_packet_size, is_gob_start;
2985
2986                 current_packet_size = put_bytes_count(&s->pb, 1)
2987                                       - (s->ptr_lastgob - s->pb.buf);
2988
2989                 is_gob_start = s->rtp_payload_size &&
2990                                current_packet_size >= s->rtp_payload_size &&
2991                                mb_y + mb_x > 0;
2992
2993                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
2994
2995                 switch(s->codec_id){
2996                 case AV_CODEC_ID_H263:
2997                 case AV_CODEC_ID_H263P:
2998                     if(!s->h263_slice_structured)
2999                         if(s->mb_x || s->mb_y%s->gob_index) is_gob_start=0;
3000                     break;
3001                 case AV_CODEC_ID_MPEG2VIDEO:
3002                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
3003                 case AV_CODEC_ID_MPEG1VIDEO:
3004                     if(s->mb_skip_run) is_gob_start=0;
3005                     break;
3006                 case AV_CODEC_ID_MJPEG:
3007                     if(s->mb_x==0 && s->mb_y!=0) is_gob_start=1;
3008                     break;
3009                 }
3010
3011                 if(is_gob_start){
3012                     if(s->start_mb_y != mb_y || mb_x!=0){
3013                         write_slice_end(s);
3014
3015                         if(CONFIG_MPEG4_ENCODER && s->codec_id==AV_CODEC_ID_MPEG4 && s->partitioned_frame){
3016                             ff_mpeg4_init_partitions(s);
3017                         }
3018                     }
3019
3020                     av_assert2((put_bits_count(&s->pb)&7) == 0);
3021                     current_packet_size= put_bits_ptr(&s->pb) - s->ptr_lastgob;
3022
3023                     if (s->error_rate && s->resync_mb_x + s->resync_mb_y > 0) {
3024                         int r = put_bytes_count(&s->pb, 0) + s->picture_number + 16 + s->mb_x + s->mb_y;
3025                         int d = 100 / s->error_rate;
3026                         if(r % d == 0){
3027                             current_packet_size=0;
3028                             s->pb.buf_ptr= s->ptr_lastgob;
3029                             av_assert1(put_bits_ptr(&s->pb) == s->ptr_lastgob);
3030                         }
3031                     }
3032
3033                     switch(s->codec_id){
3034                     case AV_CODEC_ID_MPEG4:
3035                         if (CONFIG_MPEG4_ENCODER) {
3036                             ff_mpeg4_encode_video_packet_header(s);
3037                             ff_mpeg4_clean_buffers(s);
3038                         }
3039                     break;
3040                     case AV_CODEC_ID_MPEG1VIDEO:
3041                     case AV_CODEC_ID_MPEG2VIDEO:
3042                         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER) {
3043                             ff_mpeg1_encode_slice_header(s);
3044                             ff_mpeg1_clean_buffers(s);
3045                         }
3046                     break;
3047                     case AV_CODEC_ID_H263:
3048                     case AV_CODEC_ID_H263P:
3049                         if (CONFIG_H263_ENCODER) {
3050                             update_mb_info(s, 1);
3051                             ff_h263_encode_gob_header(s, mb_y);
3052                         }
3053                     break;
3054                     }
3055
3056                     if (s->avctx->flags & AV_CODEC_FLAG_PASS1) {
3057                         int bits= put_bits_count(&s->pb);
3058                         s->misc_bits+= bits - s->last_bits;
3059                         s->last_bits= bits;
3060                     }
3061
3062                     s->ptr_lastgob += current_packet_size;
3063                     s->first_slice_line=1;
3064                     s->resync_mb_x=mb_x;
3065                     s->resync_mb_y=mb_y;
3066                 }
3067             }
3068
3069             if(  (s->resync_mb_x   == s->mb_x)
3070                && s->resync_mb_y+1 == s->mb_y){
3071                 s->first_slice_line=0;
3072             }
3073
3074             s->mb_skipped=0;
3075             s->dquant=0; //only for QP_RD
3076
3077             update_mb_info(s, 0);
3078
3079             if (mb_type & (mb_type-1) || (s->mpv_flags & FF_MPV_FLAG_QP_RD)) { // more than 1 MB type possible or FF_MPV_FLAG_QP_RD
3080                 int next_block=0;
3081                 int pb_bits_count, pb2_bits_count, tex_pb_bits_count;
3082
3083                 copy_context_before_encode(&backup_s, s);
3084                 backup_s.pb= s->pb;
3085                 best_s.data_partitioning= s->data_partitioning;
3086                 best_s.partitioned_frame= s->partitioned_frame;
3087                 if(s->data_partitioning){
3088                     backup_s.pb2= s->pb2;
3089                     backup_s.tex_pb= s->tex_pb;
3090                 }
3091
3092                 if(mb_type&CANDIDATE_MB_TYPE_INTER){
3093                     s->mv_dir = MV_DIR_FORWARD;
3094                     s->mv_type = MV_TYPE_16X16;
3095                     s->mb_intra= 0;
3096                     s->mv[0][0][0] = s->p_mv_table[xy][0];
3097                     s->mv[0][0][1] = s->p_mv_table[xy][1];
3098                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3099                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3100                 }
3101                 if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
3102                     s->mv_dir = MV_DIR_FORWARD;
3103                     s->mv_type = MV_TYPE_FIELD;
3104                     s->mb_intra= 0;
3105                     for(i=0; i<2; i++){
3106                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3107                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3108                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3109                     }
3110                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3111                                  &dmin, &next_block, 0, 0);
3112                 }
3113                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
3114                     s->mv_dir = MV_DIR_FORWARD;
3115                     s->mv_type = MV_TYPE_16X16;
3116                     s->mb_intra= 0;
3117                     s->mv[0][0][0] = 0;
3118                     s->mv[0][0][1] = 0;
3119                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3120                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3121                 }
3122                 if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
3123                     s->mv_dir = MV_DIR_FORWARD;
3124                     s->mv_type = MV_TYPE_8X8;
3125                     s->mb_intra= 0;
3126                     for(i=0; i<4; i++){
3127                         s->mv[0][i][0] = s->cur_pic.motion_val[0][s->block_index[i]][0];
3128                         s->mv[0][i][1] = s->cur_pic.motion_val[0][s->block_index[i]][1];
3129                     }
3130                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3131                                  &dmin, &next_block, 0, 0);
3132                 }
3133                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
3134                     s->mv_dir = MV_DIR_FORWARD;
3135                     s->mv_type = MV_TYPE_16X16;
3136                     s->mb_intra= 0;
3137                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3138                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3139                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3140                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
3141                 }
3142                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
3143                     s->mv_dir = MV_DIR_BACKWARD;
3144                     s->mv_type = MV_TYPE_16X16;
3145                     s->mb_intra= 0;
3146                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3147                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3148                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3149                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
3150                 }
3151                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
3152                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3153                     s->mv_type = MV_TYPE_16X16;
3154                     s->mb_intra= 0;
3155                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3156                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3157                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3158                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3159                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3160                                  &dmin, &next_block, 0, 0);
3161                 }
3162                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
3163                     s->mv_dir = MV_DIR_FORWARD;
3164                     s->mv_type = MV_TYPE_FIELD;
3165                     s->mb_intra= 0;
3166                     for(i=0; i<2; i++){
3167                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3168                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3169                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3170                     }
3171                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3172                                  &dmin, &next_block, 0, 0);
3173                 }
3174                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
3175                     s->mv_dir = MV_DIR_BACKWARD;
3176                     s->mv_type = MV_TYPE_FIELD;
3177                     s->mb_intra= 0;
3178                     for(i=0; i<2; i++){
3179                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3180                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3181                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3182                     }
3183                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3184                                  &dmin, &next_block, 0, 0);
3185                 }
3186                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
3187                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3188                     s->mv_type = MV_TYPE_FIELD;
3189                     s->mb_intra= 0;
3190                     for(dir=0; dir<2; dir++){
3191                         for(i=0; i<2; i++){
3192                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3193                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3194                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3195                         }
3196                     }
3197                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3198                                  &dmin, &next_block, 0, 0);
3199                 }
3200                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
3201                     s->mv_dir = 0;
3202                     s->mv_type = MV_TYPE_16X16;
3203                     s->mb_intra= 1;
3204                     s->mv[0][0][0] = 0;
3205                     s->mv[0][0][1] = 0;
3206                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3207                                  &dmin, &next_block, 0, 0);
3208                     if(s->h263_pred || s->h263_aic){
3209                         if(best_s.mb_intra)
3210                             s->mbintra_table[mb_x + mb_y*s->mb_stride]=1;
3211                         else
3212                             ff_clean_intra_table_entries(s); //old mode?
3213                     }
3214                 }
3215
3216                 if ((s->mpv_flags & FF_MPV_FLAG_QP_RD) && dmin < INT_MAX) {
3217                     if(best_s.mv_type==MV_TYPE_16X16){ //FIXME move 4mv after QPRD
3218                         const int last_qp= backup_s.qscale;
3219                         int qpi, qp, dc[6];
3220                         int16_t ac[6][16];
3221                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
3222                         static const int dquant_tab[4]={-1,1,-2,2};
3223                         int storecoefs = s->mb_intra && s->dc_val[0];
3224
3225                         av_assert2(backup_s.dquant == 0);
3226
3227                         //FIXME intra
3228                         s->mv_dir= best_s.mv_dir;
3229                         s->mv_type = MV_TYPE_16X16;
3230                         s->mb_intra= best_s.mb_intra;
3231                         s->mv[0][0][0] = best_s.mv[0][0][0];
3232                         s->mv[0][0][1] = best_s.mv[0][0][1];
3233                         s->mv[1][0][0] = best_s.mv[1][0][0];
3234                         s->mv[1][0][1] = best_s.mv[1][0][1];
3235
3236                         qpi = s->pict_type == AV_PICTURE_TYPE_B ? 2 : 0;
3237                         for(; qpi<4; qpi++){
3238                             int dquant= dquant_tab[qpi];
3239                             qp= last_qp + dquant;
3240                             if(qp < s->avctx->qmin || qp > s->avctx->qmax)
3241                                 continue;
3242                             backup_s.dquant= dquant;
3243                             if(storecoefs){
3244                                 for(i=0; i<6; i++){
3245                                     dc[i]= s->dc_val[0][ s->block_index[i] ];
3246                                     memcpy(ac[i], s->ac_val[0][s->block_index[i]], sizeof(int16_t)*16);
3247                                 }
3248                             }
3249
3250                             encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3251                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
3252                             if(best_s.qscale != qp){
3253                                 if(storecoefs){
3254                                     for(i=0; i<6; i++){
3255                                         s->dc_val[0][ s->block_index[i] ]= dc[i];
3256                                         memcpy(s->ac_val[0][s->block_index[i]], ac[i], sizeof(int16_t)*16);
3257                                     }
3258                                 }
3259                             }
3260                         }
3261                     }
3262                 }
3263                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT){
3264                     int mx= s->b_direct_mv_table[xy][0];
3265                     int my= s->b_direct_mv_table[xy][1];
3266
3267                     backup_s.dquant = 0;
3268                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3269                     s->mb_intra= 0;
3270                     ff_mpeg4_set_direct_mv(s, mx, my);
3271                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3272                                  &dmin, &next_block, mx, my);
3273                 }
3274                 if(CONFIG_MPEG4_ENCODER && mb_type&CANDIDATE_MB_TYPE_DIRECT0){
3275                     backup_s.dquant = 0;
3276                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
3277                     s->mb_intra= 0;
3278                     ff_mpeg4_set_direct_mv(s, 0, 0);
3279                     encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3280                                  &dmin, &next_block, 0, 0);
3281                 }
3282                 if (!best_s.mb_intra && s->mpv_flags & FF_MPV_FLAG_SKIP_RD) {
3283                     int coded=0;
3284                     for(i=0; i<6; i++)
3285                         coded |= s->block_last_index[i];
3286                     if(coded){
3287                         int mx,my;
3288                         memcpy(s->mv, best_s.mv, sizeof(s->mv));
3289                         if(CONFIG_MPEG4_ENCODER && best_s.mv_dir & MV_DIRECT){
3290                             mx=my=0; //FIXME find the one we actually used
3291                             ff_mpeg4_set_direct_mv(s, mx, my);
3292                         }else if(best_s.mv_dir&MV_DIR_BACKWARD){
3293                             mx= s->mv[1][0][0];
3294                             my= s->mv[1][0][1];
3295                         }else{
3296                             mx= s->mv[0][0][0];
3297                             my= s->mv[0][0][1];
3298                         }
3299
3300                         s->mv_dir= best_s.mv_dir;
3301                         s->mv_type = best_s.mv_type;
3302                         s->mb_intra= 0;
3303 /*                        s->mv[0][0][0] = best_s.mv[0][0][0];
3304                         s->mv[0][0][1] = best_s.mv[0][0][1];
3305                         s->mv[1][0][0] = best_s.mv[1][0][0];
3306                         s->mv[1][0][1] = best_s.mv[1][0][1];*/
3307                         backup_s.dquant= 0;
3308                         s->skipdct=1;
3309                         encode_mb_hq(s, &backup_s, &best_s, pb, pb2, tex_pb,
3310                                         &dmin, &next_block, mx, my);
3311                         s->skipdct=0;
3312                     }
3313                 }
3314
3315                 s->cur_pic.qscale_table[xy] = best_s.qscale;
3316
3317                 copy_context_after_encode(s, &best_s);
3318
3319                 pb_bits_count= put_bits_count(&s->pb);
3320                 flush_put_bits(&s->pb);
3321                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
3322                 s->pb= backup_s.pb;
3323
3324                 if(s->data_partitioning){
3325                     pb2_bits_count= put_bits_count(&s->pb2);
3326                     flush_put_bits(&s->pb2);
3327                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
3328                     s->pb2= backup_s.pb2;
3329
3330                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
3331                     flush_put_bits(&s->tex_pb);
3332                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
3333                     s->tex_pb= backup_s.tex_pb;
3334                 }
3335                 s->last_bits= put_bits_count(&s->pb);
3336
3337                 if (CONFIG_H263_ENCODER &&
3338                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3339                     ff_h263_update_mb(s);
3340
3341                 if(next_block==0){ //FIXME 16 vs linesize16
3342                     s->hdsp.put_pixels_tab[0][0](s->dest[0], s->sc.rd_scratchpad                     , s->linesize  ,16);
3343                     s->hdsp.put_pixels_tab[1][0](s->dest[1], s->sc.rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
3344                     s->hdsp.put_pixels_tab[1][0](s->dest[2], s->sc.rd_scratchpad + 16*s->linesize + 8, s->uvlinesize, 8);
3345                 }
3346
3347                 if(s->avctx->mb_decision == FF_MB_DECISION_BITS)
3348                     mpv_reconstruct_mb(s, s->block);
3349             } else {
3350                 int motion_x = 0, motion_y = 0;
3351                 s->mv_type=MV_TYPE_16X16;
3352                 // only one MB-Type possible
3353
3354                 switch(mb_type){
3355                 case CANDIDATE_MB_TYPE_INTRA:
3356                     s->mv_dir = 0;
3357                     s->mb_intra= 1;
3358                     motion_x= s->mv[0][0][0] = 0;
3359                     motion_y= s->mv[0][0][1] = 0;
3360                     break;
3361                 case CANDIDATE_MB_TYPE_INTER:
3362                     s->mv_dir = MV_DIR_FORWARD;
3363                     s->mb_intra= 0;
3364                     motion_x= s->mv[0][0][0] = s->p_mv_table[xy][0];
3365                     motion_y= s->mv[0][0][1] = s->p_mv_table[xy][1];
3366                     break;
3367                 case CANDIDATE_MB_TYPE_INTER_I:
3368                     s->mv_dir = MV_DIR_FORWARD;
3369                     s->mv_type = MV_TYPE_FIELD;
3370                     s->mb_intra= 0;
3371                     for(i=0; i<2; i++){
3372                         j= s->field_select[0][i] = s->p_field_select_table[i][xy];
3373                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
3374                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
3375                     }
3376                     break;
3377                 case CANDIDATE_MB_TYPE_INTER4V:
3378                     s->mv_dir = MV_DIR_FORWARD;
3379                     s->mv_type = MV_TYPE_8X8;
3380                     s->mb_intra= 0;
3381                     for(i=0; i<4; i++){
3382                         s->mv[0][i][0] = s->cur_pic.motion_val[0][s->block_index[i]][0];
3383                         s->mv[0][i][1] = s->cur_pic.motion_val[0][s->block_index[i]][1];
3384                     }
3385                     break;
3386                 case CANDIDATE_MB_TYPE_DIRECT:
3387                     if (CONFIG_MPEG4_ENCODER) {
3388                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3389                         s->mb_intra= 0;
3390                         motion_x=s->b_direct_mv_table[xy][0];
3391                         motion_y=s->b_direct_mv_table[xy][1];
3392                         ff_mpeg4_set_direct_mv(s, motion_x, motion_y);
3393                     }
3394                     break;
3395                 case CANDIDATE_MB_TYPE_DIRECT0:
3396                     if (CONFIG_MPEG4_ENCODER) {
3397                         s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD|MV_DIRECT;
3398                         s->mb_intra= 0;
3399                         ff_mpeg4_set_direct_mv(s, 0, 0);
3400                     }
3401                     break;
3402                 case CANDIDATE_MB_TYPE_BIDIR:
3403                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3404                     s->mb_intra= 0;
3405                     s->mv[0][0][0] = s->b_bidir_forw_mv_table[xy][0];
3406                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
3407                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
3408                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
3409                     break;
3410                 case CANDIDATE_MB_TYPE_BACKWARD:
3411                     s->mv_dir = MV_DIR_BACKWARD;
3412                     s->mb_intra= 0;
3413                     motion_x= s->mv[1][0][0] = s->b_back_mv_table[xy][0];
3414                     motion_y= s->mv[1][0][1] = s->b_back_mv_table[xy][1];
3415                     break;
3416                 case CANDIDATE_MB_TYPE_FORWARD:
3417                     s->mv_dir = MV_DIR_FORWARD;
3418                     s->mb_intra= 0;
3419                     motion_x= s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
3420                     motion_y= s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
3421                     break;
3422                 case CANDIDATE_MB_TYPE_FORWARD_I:
3423                     s->mv_dir = MV_DIR_FORWARD;
3424                     s->mv_type = MV_TYPE_FIELD;
3425                     s->mb_intra= 0;
3426                     for(i=0; i<2; i++){
3427                         j= s->field_select[0][i] = s->b_field_select_table[0][i][xy];
3428                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
3429                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
3430                     }
3431                     break;
3432                 case CANDIDATE_MB_TYPE_BACKWARD_I:
3433                     s->mv_dir = MV_DIR_BACKWARD;
3434                     s->mv_type = MV_TYPE_FIELD;
3435                     s->mb_intra= 0;
3436                     for(i=0; i<2; i++){
3437                         j= s->field_select[1][i] = s->b_field_select_table[1][i][xy];
3438                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
3439                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
3440                     }
3441                     break;
3442                 case CANDIDATE_MB_TYPE_BIDIR_I:
3443                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
3444                     s->mv_type = MV_TYPE_FIELD;
3445                     s->mb_intra= 0;
3446                     for(dir=0; dir<2; dir++){
3447                         for(i=0; i<2; i++){
3448                             j= s->field_select[dir][i] = s->b_field_select_table[dir][i][xy];
3449                             s->mv[dir][i][0] = s->b_field_mv_table[dir][i][j][xy][0];
3450                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
3451                         }
3452                     }
3453                     break;
3454                 default:
3455                     av_log(s->avctx, AV_LOG_ERROR, "illegal MB type\n");
3456                 }
3457
3458                 encode_mb(s, motion_x, motion_y);
3459
3460                 // RAL: Update last macroblock type
3461                 s->last_mv_dir = s->mv_dir;
3462
3463                 if (CONFIG_H263_ENCODER &&
3464                     s->out_format == FMT_H263 && s->pict_type!=AV_PICTURE_TYPE_B)
3465                     ff_h263_update_mb(s);
3466
3467                 mpv_reconstruct_mb(s, s->block);
3468             }
3469
3470             /* clean the MV table in IPS frames for direct mode in B-frames */
3471             if(s->mb_intra /* && I,P,S_TYPE */){
3472                 s->p_mv_table[xy][0]=0;
3473                 s->p_mv_table[xy][1]=0;
3474             }
3475
3476             if (s->avctx->flags & AV_CODEC_FLAG_PSNR) {
3477                 int w= 16;
3478                 int h= 16;
3479
3480                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
3481                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
3482
3483                 s->encoding_error[0] += sse(
3484                     s, s->new_pic->data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
3485                     s->dest[0], w, h, s->linesize);
3486                 s->encoding_error[1] += sse(
3487                     s, s->new_pic->data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3488                     s->dest[1], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3489                 s->encoding_error[2] += sse(
3490                     s, s->new_pic->data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*chr_h,
3491                     s->dest[2], w>>1, h>>s->chroma_y_shift, s->uvlinesize);
3492             }
3493             if(s->loop_filter){
3494                 if(CONFIG_H263_ENCODER && s->out_format == FMT_H263)
3495                     ff_h263_loop_filter(s);
3496             }
3497             ff_dlog(s->avctx, "MB %d %d bits\n",
3498                     s->mb_x + s->mb_y * s->mb_stride, put_bits_count(&s->pb));
3499         }
3500     }
3501
3502 #if CONFIG_MSMPEG4ENC
3503     //not beautiful here but we must write it before flushing so it has to be here
3504     if (s->msmpeg4_version != MSMP4_UNUSED && s->msmpeg4_version < MSMP4_WMV1 &&
3505         s->pict_type == AV_PICTURE_TYPE_I)
3506         ff_msmpeg4_encode_ext_header(s);
3507 #endif
3508
3509     write_slice_end(s);
3510
3511     return 0;
3512 }
3513
3514 #define MERGE(field) dst->field += src->field; src->field=0
3515 static void merge_context_after_me(MpegEncContext *dst, MpegEncContext *src){
3516     MERGE(me.scene_change_score);
3517     MERGE(me.mc_mb_var_sum_temp);
3518     MERGE(me.mb_var_sum_temp);
3519 }
3520
3521 static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src){
3522     int i;
3523
3524     MERGE(dct_count[0]); //note, the other dct vars are not part of the context
3525     MERGE(dct_count[1]);
3526     MERGE(mv_bits);
3527     MERGE(i_tex_bits);
3528     MERGE(p_tex_bits);
3529     MERGE(i_count);
3530     MERGE(misc_bits);
3531     MERGE(encoding_error[0]);
3532     MERGE(encoding_error[1]);
3533     MERGE(encoding_error[2]);
3534
3535     if (dst->noise_reduction){
3536         for(i=0; i<64; i++){
3537             MERGE(dct_error_sum[0][i]);
3538             MERGE(dct_error_sum[1][i]);
3539         }
3540     }
3541
3542     av_assert1(put_bits_count(&src->pb) % 8 ==0);
3543     av_assert1(put_bits_count(&dst->pb) % 8 ==0);
3544     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
3545     flush_put_bits(&dst->pb);
3546 }
3547
3548 static int estimate_qp(MpegEncContext *s, int dry_run){
3549     if (s->next_lambda){
3550         s->cur_pic.ptr->f->quality = s->next_lambda;
3551         if(!dry_run) s->next_lambda= 0;
3552     } else if (!s->fixed_qscale) {
3553         int quality = ff_rate_estimate_qscale(s, dry_run);
3554         s->cur_pic.ptr->f->quality = quality;
3555         if (s->cur_pic.ptr->f->quality < 0)
3556             return -1;
3557     }
3558
3559     if(s->adaptive_quant){
3560         init_qscale_tab(s);
3561
3562         switch(s->codec_id){
3563         case AV_CODEC_ID_MPEG4:
3564             if (CONFIG_MPEG4_ENCODER)
3565                 ff_clean_mpeg4_qscales(s);
3566             break;
3567         case AV_CODEC_ID_H263:
3568         case AV_CODEC_ID_H263P:
3569         case AV_CODEC_ID_FLV1:
3570             if (CONFIG_H263_ENCODER)
3571                 ff_clean_h263_qscales(s);
3572             break;
3573         }
3574
3575         s->lambda= s->lambda_table[0];
3576         //FIXME broken
3577     }else
3578         s->lambda = s->cur_pic.ptr->f->quality;
3579     update_qscale(s);
3580     return 0;
3581 }
3582
3583 /* must be called before writing the header */
3584 static void set_frame_distances(MpegEncContext * s){
3585     av_assert1(s->cur_pic.ptr->f->pts != AV_NOPTS_VALUE);
3586     s->time = s->cur_pic.ptr->f->pts * s->avctx->time_base.num;
3587
3588     if(s->pict_type==AV_PICTURE_TYPE_B){
3589         s->pb_time= s->pp_time - (s->last_non_b_time - s->time);
3590         av_assert1(s->pb_time > 0 && s->pb_time < s->pp_time);
3591     }else{
3592         s->pp_time= s->time - s->last_non_b_time;
3593         s->last_non_b_time= s->time;
3594         av_assert1(s->picture_number==0 || s->pp_time > 0);
3595     }
3596 }
3597
3598 static int encode_picture(MpegEncContext *s, const AVPacket *pkt)
3599 {
3600     int i, ret;
3601     int bits;
3602     int context_count = s->slice_context_count;
3603
3604     /* Reset the average MB variance */
3605     s->me.mb_var_sum_temp    =
3606     s->me.mc_mb_var_sum_temp = 0;
3607
3608     /* we need to initialize some time vars before we can encode B-frames */
3609     // RAL: Condition added for MPEG1VIDEO
3610     if (s->out_format == FMT_MPEG1 || (s->h263_pred && s->msmpeg4_version == MSMP4_UNUSED))
3611         set_frame_distances(s);
3612     if(CONFIG_MPEG4_ENCODER && s->codec_id == AV_CODEC_ID_MPEG4)
3613         ff_set_mpeg4_time(s);
3614
3615     s->me.scene_change_score=0;
3616
3617 //    s->lambda= s->cur_pic.ptr->quality; //FIXME qscale / ... stuff for ME rate distortion
3618
3619     if(s->pict_type==AV_PICTURE_TYPE_I){
3620         s->no_rounding = s->msmpeg4_version >= MSMP4_V3;
3621     }else if(s->pict_type!=AV_PICTURE_TYPE_B){
3622         s->no_rounding ^= s->flipflop_rounding;
3623     }
3624
3625     if (s->avctx->flags & AV_CODEC_FLAG_PASS2) {
3626         if (estimate_qp(s,1) < 0)
3627             return -1;
3628         ff_get_2pass_fcode(s);
3629     } else if (!(s->avctx->flags & AV_CODEC_FLAG_QSCALE)) {
3630         if(s->pict_type==AV_PICTURE_TYPE_B)
3631             s->lambda= s->last_lambda_for[s->pict_type];
3632         else
3633             s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
3634         update_qscale(s);
3635     }
3636
3637     if (s->out_format != FMT_MJPEG) {
3638         if(s->q_chroma_intra_matrix   != s->q_intra_matrix  ) av_freep(&s->q_chroma_intra_matrix);
3639         if(s->q_chroma_intra_matrix16 != s->q_intra_matrix16) av_freep(&s->q_chroma_intra_matrix16);
3640         s->q_chroma_intra_matrix   = s->q_intra_matrix;
3641         s->q_chroma_intra_matrix16 = s->q_intra_matrix16;
3642     }
3643
3644     ff_me_init_pic(s);
3645
3646     s->mb_intra=0; //for the rate distortion & bit compare functions
3647     for (int i = 0; i < context_count; i++) {
3648         MpegEncContext *const slice = s->thread_context[i];
3649         uint8_t *start, *end;
3650         int h;
3651
3652         if (i) {
3653             ret = ff_update_duplicate_context(slice, s);
3654             if (ret < 0)
3655                 return ret;
3656         }
3657         slice->me.temp = slice->me.scratchpad = slice->sc.scratchpad_buf;
3658
3659         h     = s->mb_height;
3660         start = pkt->data + (size_t)(((int64_t) pkt->size) * slice->start_mb_y / h);
3661         end   = pkt->data + (size_t)(((int64_t) pkt->size) * slice->  end_mb_y / h);
3662
3663         init_put_bits(&s->thread_context[i]->pb, start, end - start);
3664     }
3665
3666     /* Estimate motion for every MB */
3667     if(s->pict_type != AV_PICTURE_TYPE_I){
3668         s->lambda  = (s->lambda  * s->me_penalty_compensation + 128) >> 8;
3669         s->lambda2 = (s->lambda2 * (int64_t) s->me_penalty_compensation + 128) >> 8;
3670         if (s->pict_type != AV_PICTURE_TYPE_B) {
3671             if ((s->me_pre && s->last_non_b_pict_type == AV_PICTURE_TYPE_I) ||
3672                 s->me_pre == 2) {
3673                 s->avctx->execute(s->avctx, pre_estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3674             }
3675         }
3676
3677         s->avctx->execute(s->avctx, estimate_motion_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3678     }else /* if(s->pict_type == AV_PICTURE_TYPE_I) */{
3679         /* I-Frame */
3680         for(i=0; i<s->mb_stride*s->mb_height; i++)
3681             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3682
3683         if(!s->fixed_qscale){
3684             /* finding spatial complexity for I-frame rate control */
3685             s->avctx->execute(s->avctx, mb_var_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3686         }
3687     }
3688     for(i=1; i<context_count; i++){
3689         merge_context_after_me(s, s->thread_context[i]);
3690     }
3691     s->mc_mb_var_sum = s->me.mc_mb_var_sum_temp;
3692     s->mb_var_sum    = s->me.   mb_var_sum_temp;
3693     emms_c();
3694
3695     if (s->me.scene_change_score > s->scenechange_threshold &&
3696         s->pict_type == AV_PICTURE_TYPE_P) {
3697         s->pict_type= AV_PICTURE_TYPE_I;
3698         for(i=0; i<s->mb_stride*s->mb_height; i++)
3699             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
3700         if (s->msmpeg4_version >= MSMP4_V3)
3701             s->no_rounding=1;
3702         ff_dlog(s, "Scene change detected, encoding as I Frame %"PRId64" %"PRId64"\n",
3703                 s->mb_var_sum, s->mc_mb_var_sum);
3704     }
3705
3706     if(!s->umvplus){
3707         if(s->pict_type==AV_PICTURE_TYPE_P || s->pict_type==AV_PICTURE_TYPE_S) {
3708             s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
3709
3710             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3711                 int a,b;
3712                 a= ff_get_best_fcode(s, s->p_field_mv_table[0][0], CANDIDATE_MB_TYPE_INTER_I); //FIXME field_select
3713                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
3714                 s->f_code= FFMAX3(s->f_code, a, b);
3715             }
3716
3717             ff_fix_long_p_mvs(s, s->intra_penalty ? CANDIDATE_MB_TYPE_INTER : CANDIDATE_MB_TYPE_INTRA);
3718             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, !!s->intra_penalty);
3719             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3720                 int j;
3721                 for(i=0; i<2; i++){
3722                     for(j=0; j<2; j++)
3723                         ff_fix_long_mvs(s, s->p_field_select_table[i], j,
3724                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, !!s->intra_penalty);
3725                 }
3726             }
3727         } else if (s->pict_type == AV_PICTURE_TYPE_B) {
3728             int a, b;
3729
3730             a = ff_get_best_fcode(s, s->b_forw_mv_table, CANDIDATE_MB_TYPE_FORWARD);
3731             b = ff_get_best_fcode(s, s->b_bidir_forw_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3732             s->f_code = FFMAX(a, b);
3733
3734             a = ff_get_best_fcode(s, s->b_back_mv_table, CANDIDATE_MB_TYPE_BACKWARD);
3735             b = ff_get_best_fcode(s, s->b_bidir_back_mv_table, CANDIDATE_MB_TYPE_BIDIR);
3736             s->b_code = FFMAX(a, b);
3737
3738             ff_fix_long_mvs(s, NULL, 0, s->b_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_FORWARD, 1);
3739             ff_fix_long_mvs(s, NULL, 0, s->b_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BACKWARD, 1);
3740             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_forw_mv_table, s->f_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3741             ff_fix_long_mvs(s, NULL, 0, s->b_bidir_back_mv_table, s->b_code, CANDIDATE_MB_TYPE_BIDIR, 1);
3742             if (s->avctx->flags & AV_CODEC_FLAG_INTERLACED_ME) {
3743                 int dir, j;
3744                 for(dir=0; dir<2; dir++){
3745                     for(i=0; i<2; i++){
3746                         for(j=0; j<2; j++){
3747                             int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
3748                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
3749                             ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
3750                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
3751                         }
3752                     }
3753                 }
3754             }
3755         }
3756     }
3757
3758     if (estimate_qp(s, 0) < 0)
3759         return -1;
3760
3761     if (s->qscale < 3 && s->max_qcoeff <= 128 &&
3762         s->pict_type == AV_PICTURE_TYPE_I &&
3763         !(s->avctx->flags & AV_CODEC_FLAG_QSCALE))
3764         s->qscale= 3; //reduce clipping problems
3765
3766     if (s->out_format == FMT_MJPEG) {
3767         const uint16_t *  luma_matrix = ff_mpeg1_default_intra_matrix;
3768         const uint16_t *chroma_matrix = ff_mpeg1_default_intra_matrix;
3769
3770         ret = ff_check_codec_matrices(s->avctx, FF_MATRIX_TYPE_INTRA | FF_MATRIX_TYPE_CHROMA_INTRA, (7 + s->qscale) / s->qscale, 65535);
3771         if (ret < 0)
3772             return ret;
3773
3774         if (s->avctx->intra_matrix) {
3775             chroma_matrix =
3776             luma_matrix = s->avctx->intra_matrix;
3777         }
3778         if (s->avctx->chroma_intra_matrix)
3779             chroma_matrix = s->avctx->chroma_intra_matrix;
3780
3781         /* for mjpeg, we do include qscale in the matrix */
3782         for(i=1;i<64;i++){
3783             int j = s->idsp.idct_permutation[i];
3784
3785             s->chroma_intra_matrix[j] = av_clip_uint8((chroma_matrix[i] * s->qscale) >> 3);
3786             s->       intra_matrix[j] = av_clip_uint8((  luma_matrix[i] * s->qscale) >> 3);
3787         }
3788         s->y_dc_scale_table=
3789         s->c_dc_scale_table = ff_mpeg12_dc_scale_table[s->intra_dc_precision];
3790         s->chroma_intra_matrix[0] =
3791         s->intra_matrix[0]  = ff_mpeg12_dc_scale_table[s->intra_dc_precision][8];
3792         ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3793                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3794         ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3795                        s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3796         s->qscale= 8;
3797
3798         if (s->codec_id == AV_CODEC_ID_AMV) {
3799             static const uint8_t y[32] = {13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13};
3800             static const uint8_t c[32] = {14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14};
3801             for (int i = 1; i < 64; i++) {
3802                 int j = s->idsp.idct_permutation[ff_zigzag_direct[i]];
3803
3804                 s->intra_matrix[j]        = sp5x_qscale_five_quant_table[0][i];
3805                 s->chroma_intra_matrix[j] = sp5x_qscale_five_quant_table[1][i];
3806             }
3807             s->y_dc_scale_table = y;
3808             s->c_dc_scale_table = c;
3809             s->intra_matrix[0] = 13;
3810             s->chroma_intra_matrix[0] = 14;
3811             ff_convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16,
3812                               s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
3813             ff_convert_matrix(s, s->q_chroma_intra_matrix, s->q_chroma_intra_matrix16,
3814                               s->chroma_intra_matrix, s->intra_quant_bias, 8, 8, 1);
3815             s->qscale = 8;
3816         }
3817     }
3818
3819     if (s->pict_type == AV_PICTURE_TYPE_I) {
3820         s->cur_pic.ptr->f->flags |= AV_FRAME_FLAG_KEY;
3821     } else {
3822         s->cur_pic.ptr->f->flags &= ~AV_FRAME_FLAG_KEY;
3823     }
3824     s->cur_pic.ptr->f->pict_type = s->pict_type;
3825
3826     if (s->cur_pic.ptr->f->flags & AV_FRAME_FLAG_KEY)
3827         s->picture_in_gop_number=0;
3828
3829     s->mb_x = s->mb_y = 0;
3830     s->last_bits= put_bits_count(&s->pb);
3831     switch(s->out_format) {
3832 #if CONFIG_MJPEG_ENCODER || CONFIG_AMV_ENCODER
3833     case FMT_MJPEG:
3834         ff_mjpeg_amv_encode_picture_header(s);
3835         break;
3836 #endif
3837     case FMT_SPEEDHQ:
3838         if (CONFIG_SPEEDHQ_ENCODER)
3839             ff_speedhq_encode_picture_header(s);
3840         break;
3841     case FMT_H261:
3842         if (CONFIG_H261_ENCODER)
3843             ff_h261_encode_picture_header(s);
3844         break;
3845     case FMT_H263:
3846         if (CONFIG_WMV2_ENCODER && s->codec_id == AV_CODEC_ID_WMV2)
3847             ff_wmv2_encode_picture_header(s);
3848 #if CONFIG_MSMPEG4ENC
3849         else if (s->msmpeg4_version != MSMP4_UNUSED)
3850             ff_msmpeg4_encode_picture_header(s);
3851 #endif
3852         else if (CONFIG_MPEG4_ENCODER && s->h263_pred) {
3853             ret = ff_mpeg4_encode_picture_header(s);
3854             if (ret < 0)
3855                 return ret;
3856         } else if (CONFIG_RV10_ENCODER && s->codec_id == AV_CODEC_ID_RV10) {
3857             ret = ff_rv10_encode_picture_header(s);
3858             if (ret < 0)
3859                 return ret;
3860         }
3861         else if (CONFIG_RV20_ENCODER && s->codec_id == AV_CODEC_ID_RV20)
3862             ff_rv20_encode_picture_header(s);
3863         else if (CONFIG_FLV_ENCODER && s->codec_id == AV_CODEC_ID_FLV1)
3864             ff_flv_encode_picture_header(s);
3865         else if (CONFIG_H263_ENCODER)
3866             ff_h263_encode_picture_header(s);
3867         break;
3868     case FMT_MPEG1:
3869         if (CONFIG_MPEG1VIDEO_ENCODER || CONFIG_MPEG2VIDEO_ENCODER)
3870             ff_mpeg1_encode_picture_header(s);
3871         break;
3872     default:
3873         av_assert0(0);
3874     }
3875     bits= put_bits_count(&s->pb);
3876     s->header_bits= bits - s->last_bits;
3877
3878     for(i=1; i<context_count; i++){
3879         update_duplicate_context_after_me(s->thread_context[i], s);
3880     }
3881     s->avctx->execute(s->avctx, encode_thread, &s->thread_context[0], NULL, context_count, sizeof(void*));
3882     for(i=1; i<context_count; i++){
3883         if (s->pb.buf_end == s->thread_context[i]->pb.buf)
3884             set_put_bits_buffer_size(&s->pb, FFMIN(s->thread_context[i]->pb.buf_end - s->pb.buf, INT_MAX/8-BUF_BITS));
3885         merge_context_after_encode(s, s->thread_context[i]);
3886     }
3887     emms_c();
3888     return 0;
3889 }
3890
3891 static void denoise_dct_c(MpegEncContext *s, int16_t *block){
3892     const int intra= s->mb_intra;
3893     int i;
3894
3895     s->dct_count[intra]++;
3896
3897     for(i=0; i<64; i++){
3898         int level= block[i];
3899
3900         if(level){
3901             if(level>0){
3902                 s->dct_error_sum[intra][i] += level;
3903                 level -= s->dct_offset[intra][i];
3904                 if(level<0) level=0;
3905             }else{
3906                 s->dct_error_sum[intra][i] -= level;
3907                 level += s->dct_offset[intra][i];
3908                 if(level>0) level=0;
3909             }
3910             block[i]= level;
3911         }
3912     }
3913 }
3914
3915 static int dct_quantize_trellis_c(MpegEncContext *s,
3916                                   int16_t *block, int n,
3917                                   int qscale, int *overflow){
3918     const int *qmat;
3919     const uint16_t *matrix;
3920     const uint8_t *scantable;
3921     const uint8_t *perm_scantable;
3922     int max=0;
3923     unsigned int threshold1, threshold2;
3924     int bias=0;
3925     int run_tab[65];
3926     int level_tab[65];
3927     int score_tab[65];
3928     int survivor[65];
3929     int survivor_count;
3930     int last_run=0;
3931     int last_level=0;
3932     int last_score= 0;
3933     int last_i;
3934     int coeff[2][64];
3935     int coeff_count[64];
3936     int qmul, qadd, start_i, last_non_zero, i, dc;
3937     const int esc_length= s->ac_esc_length;
3938     const uint8_t *length, *last_length;
3939     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
3940     int mpeg2_qscale;
3941
3942     s->fdsp.fdct(block);
3943
3944     if(s->dct_error_sum)
3945         s->denoise_dct(s, block);
3946     qmul= qscale*16;
3947     qadd= ((qscale-1)|1)*8;
3948
3949     if (s->q_scale_type) mpeg2_qscale = ff_mpeg2_non_linear_qscale[qscale];
3950     else                 mpeg2_qscale = qscale << 1;
3951
3952     if (s->mb_intra) {
3953         int q;
3954         scantable= s->intra_scantable.scantable;
3955         perm_scantable= s->intra_scantable.permutated;
3956         if (!s->h263_aic) {
3957             if (n < 4)
3958                 q = s->y_dc_scale;
3959             else
3960                 q = s->c_dc_scale;
3961             q = q << 3;
3962         } else{
3963             /* For AIC we skip quant/dequant of INTRADC */
3964             q = 1 << 3;
3965             qadd=0;
3966         }
3967
3968         /* note: block[0] is assumed to be positive */
3969         block[0] = (block[0] + (q >> 1)) / q;
3970         start_i = 1;
3971         last_non_zero = 0;
3972         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
3973         matrix = n < 4 ? s->intra_matrix : s->chroma_intra_matrix;
3974         if(s->mpeg_quant || s->out_format == FMT_MPEG1 || s->out_format == FMT_MJPEG)
3975             bias= 1<<(QMAT_SHIFT-1);
3976
3977         if (n > 3 && s->intra_chroma_ac_vlc_length) {
3978             length     = s->intra_chroma_ac_vlc_length;
3979             last_length= s->intra_chroma_ac_vlc_last_length;
3980         } else {
3981             length     = s->intra_ac_vlc_length;
3982             last_length= s->intra_ac_vlc_last_length;
3983         }
3984     } else {
3985         scantable= s->inter_scantable.scantable;
3986         perm_scantable= s->inter_scantable.permutated;
3987         start_i = 0;
3988         last_non_zero = -1;
3989         qmat = s->q_inter_matrix[qscale];
3990         matrix = s->inter_matrix;
3991         length     = s->inter_ac_vlc_length;
3992         last_length= s->inter_ac_vlc_last_length;
3993     }
3994     last_i= start_i;
3995
3996     threshold1= (1<<QMAT_SHIFT) - bias - 1;
3997     threshold2= (threshold1<<1);
3998
3999     for(i=63; i>=start_i; i--) {
4000         const int j = scantable[i];
4001         int64_t level = (int64_t)block[j] * qmat[j];
4002
4003         if(((uint64_t)(level+threshold1))>threshold2){
4004             last_non_zero = i;
4005             break;
4006         }
4007     }
4008
4009     for(i=start_i; i<=last_non_zero; i++) {
4010         const int j = scantable[i];
4011         int64_t level = (int64_t)block[j] * qmat[j];
4012
4013 //        if(   bias+level >= (1<<(QMAT_SHIFT - 3))
4014 //           || bias-level >= (1<<(QMAT_SHIFT - 3))){
4015         if(((uint64_t)(level+threshold1))>threshold2){
4016             if(level>0){
4017                 level= (bias + level)>>QMAT_SHIFT;
4018                 coeff[0][i]= level;
4019                 coeff[1][i]= level-1;
4020 //                coeff[2][k]= level-2;
4021             }else{
4022                 level= (bias - level)>>QMAT_SHIFT;
4023                 coeff[0][i]= -level;
4024                 coeff[1][i]= -level+1;
4025 //                coeff[2][k]= -level+2;
4026             }
4027             coeff_count[i]= FFMIN(level, 2);
4028             av_assert2(coeff_count[i]);
4029             max |=level;
4030         }else{
4031             coeff[0][i]= (level>>31)|1;
4032             coeff_count[i]= 1;
4033         }
4034     }
4035
4036     *overflow= s->max_qcoeff < max; //overflow might have happened
4037
4038     if(last_non_zero < start_i){
4039         memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4040         return last_non_zero;
4041     }
4042
4043     score_tab[start_i]= 0;
4044     survivor[0]= start_i;
4045     survivor_count= 1;
4046
4047     for(i=start_i; i<=last_non_zero; i++){
4048         int level_index, j, zero_distortion;
4049         int dct_coeff= FFABS(block[ scantable[i] ]);
4050         int best_score=256*256*256*120;
4051
4052         if (s->fdsp.fdct == ff_fdct_ifast)
4053             dct_coeff= (dct_coeff*ff_inv_aanscales[ scantable[i] ]) >> 12;
4054         zero_distortion= dct_coeff*dct_coeff;
4055
4056         for(level_index=0; level_index < coeff_count[i]; level_index++){
4057             int distortion;
4058             int level= coeff[level_index][i];
4059             const int alevel= FFABS(level);
4060             int unquant_coeff;
4061
4062             av_assert2(level);
4063
4064             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4065                 unquant_coeff= alevel*qmul + qadd;
4066             } else if(s->out_format == FMT_MJPEG) {
4067                 j = s->idsp.idct_permutation[scantable[i]];
4068                 unquant_coeff = alevel * matrix[j] * 8;
4069             }else{ // MPEG-1
4070                 j = s->idsp.idct_permutation[scantable[i]]; // FIXME: optimize
4071                 if(s->mb_intra){
4072                         unquant_coeff = (int)(  alevel  * mpeg2_qscale * matrix[j]) >> 4;
4073                         unquant_coeff =   (unquant_coeff - 1) | 1;
4074                 }else{
4075                         unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[j])) >> 5;
4076                         unquant_coeff =   (unquant_coeff - 1) | 1;
4077                 }
4078                 unquant_coeff<<= 3;
4079             }
4080
4081             distortion= (unquant_coeff - dct_coeff) * (unquant_coeff - dct_coeff) - zero_distortion;
4082             level+=64;
4083             if((level&(~127)) == 0){
4084                 for(j=survivor_count-1; j>=0; j--){
4085                     int run= i - survivor[j];
4086                     int score= distortion + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4087                     score += score_tab[i-run];
4088
4089                     if(score < best_score){
4090                         best_score= score;
4091                         run_tab[i+1]= run;
4092                         level_tab[i+1]= level-64;
4093                     }
4094                 }
4095
4096                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4097                     for(j=survivor_count-1; j>=0; j--){
4098                         int run= i - survivor[j];
4099                         int score= distortion + last_length[UNI_AC_ENC_INDEX(run, level)]*lambda;
4100                         score += score_tab[i-run];
4101                         if(score < last_score){
4102                             last_score= score;
4103                             last_run= run;
4104                             last_level= level-64;
4105                             last_i= i+1;
4106                         }
4107                     }
4108                 }
4109             }else{
4110                 distortion += esc_length*lambda;
4111                 for(j=survivor_count-1; j>=0; j--){
4112                     int run= i - survivor[j];
4113                     int score= distortion + score_tab[i-run];
4114
4115                     if(score < best_score){
4116                         best_score= score;
4117                         run_tab[i+1]= run;
4118                         level_tab[i+1]= level-64;
4119                     }
4120                 }
4121
4122                 if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4123                   for(j=survivor_count-1; j>=0; j--){
4124                         int run= i - survivor[j];
4125                         int score= distortion + score_tab[i-run];
4126                         if(score < last_score){
4127                             last_score= score;
4128                             last_run= run;
4129                             last_level= level-64;
4130                             last_i= i+1;
4131                         }
4132                     }
4133                 }
4134             }
4135         }
4136
4137         score_tab[i+1]= best_score;
4138
4139         // Note: there is a vlc code in MPEG-4 which is 1 bit shorter then another one with a shorter run and the same level
4140         if(last_non_zero <= 27){
4141             for(; survivor_count; survivor_count--){
4142                 if(score_tab[ survivor[survivor_count-1] ] <= best_score)
4143                     break;
4144             }
4145         }else{
4146             for(; survivor_count; survivor_count--){
4147                 if(score_tab[ survivor[survivor_count-1] ] <= best_score + lambda)
4148                     break;
4149             }
4150         }
4151
4152         survivor[ survivor_count++ ]= i+1;
4153     }
4154
4155     if(s->out_format != FMT_H263 && s->out_format != FMT_H261){
4156         last_score= 256*256*256*120;
4157         for(i= survivor[0]; i<=last_non_zero + 1; i++){
4158             int score= score_tab[i];
4159             if (i)
4160                 score += lambda * 2; // FIXME more exact?
4161
4162             if(score < last_score){
4163                 last_score= score;
4164                 last_i= i;
4165                 last_level= level_tab[i];
4166                 last_run= run_tab[i];
4167             }
4168         }
4169     }
4170
4171     s->coded_score[n] = last_score;
4172
4173     dc= FFABS(block[0]);
4174     last_non_zero= last_i - 1;
4175     memset(block + start_i, 0, (64-start_i)*sizeof(int16_t));
4176
4177     if(last_non_zero < start_i)
4178         return last_non_zero;
4179
4180     if(last_non_zero == 0 && start_i == 0){
4181         int best_level= 0;
4182         int best_score= dc * dc;
4183
4184         for(i=0; i<coeff_count[0]; i++){
4185             int level= coeff[i][0];
4186             int alevel= FFABS(level);
4187             int unquant_coeff, score, distortion;
4188
4189             if(s->out_format == FMT_H263 || s->out_format == FMT_H261){
4190                     unquant_coeff= (alevel*qmul + qadd)>>3;
4191             } else{ // MPEG-1
4192                     unquant_coeff = (((  alevel  << 1) + 1) * mpeg2_qscale * ((int) matrix[0])) >> 5;
4193                     unquant_coeff =   (unquant_coeff - 1) | 1;
4194             }
4195             unquant_coeff = (unquant_coeff + 4) >> 3;
4196             unquant_coeff<<= 3 + 3;
4197
4198             distortion= (unquant_coeff - dc) * (unquant_coeff - dc);
4199             level+=64;
4200             if((level&(~127)) == 0) score= distortion + last_length[UNI_AC_ENC_INDEX(0, level)]*lambda;
4201             else                    score= distortion + esc_length*lambda;
4202
4203             if(score < best_score){
4204                 best_score= score;
4205                 best_level= level - 64;
4206             }
4207         }
4208         block[0]= best_level;
4209         s->coded_score[n] = best_score - dc*dc;
4210         if(best_level == 0) return -1;
4211         else                return last_non_zero;
4212     }
4213
4214     i= last_i;
4215     av_assert2(last_level);
4216
4217     block[ perm_scantable[last_non_zero] ]= last_level;
4218     i -= last_run + 1;
4219
4220     for(; i>start_i; i -= run_tab[i] + 1){
4221         block[ perm_scantable[i-1] ]= level_tab[i];
4222     }
4223
4224     return last_non_zero;
4225 }
4226
4227 static int16_t basis[64][64];
4228
4229 static void build_basis(uint8_t *perm){
4230     int i, j, x, y;
4231     emms_c();
4232     for(i=0; i<8; i++){
4233         for(j=0; j<8; j++){
4234             for(y=0; y<8; y++){
4235                 for(x=0; x<8; x++){
4236                     double s= 0.25*(1<<BASIS_SHIFT);
4237                     int index= 8*i + j;
4238                     int perm_index= perm[index];
4239                     if(i==0) s*= sqrt(0.5);
4240                     if(j==0) s*= sqrt(0.5);
4241                     basis[perm_index][8*x + y]= lrintf(s * cos((M_PI/8.0)*i*(x+0.5)) * cos((M_PI/8.0)*j*(y+0.5)));
4242                 }
4243             }
4244         }
4245     }
4246 }
4247
4248 static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
4249                         int16_t *block, int16_t *weight, int16_t *orig,
4250                         int n, int qscale){
4251     int16_t rem[64];
4252     LOCAL_ALIGNED_16(int16_t, d1, [64]);
4253     const uint8_t *scantable;
4254     const uint8_t *perm_scantable;
4255 //    unsigned int threshold1, threshold2;
4256 //    int bias=0;
4257     int run_tab[65];
4258     int prev_run=0;
4259     int prev_level=0;
4260     int qmul, qadd, start_i, last_non_zero, i, dc;
4261     const uint8_t *length;
4262     const uint8_t *last_length;
4263     int lambda;
4264     int rle_index, run, q = 1, sum; //q is only used when s->mb_intra is true
4265
4266     if(basis[0][0] == 0)
4267         build_basis(s->idsp.idct_permutation);
4268
4269     qmul= qscale*2;
4270     qadd= (qscale-1)|1;
4271     if (s->mb_intra) {
4272         scantable= s->intra_scantable.scantable;
4273         perm_scantable= s->intra_scantable.permutated;
4274         if (!s->h263_aic) {
4275             if (n < 4)
4276                 q = s->y_dc_scale;
4277             else
4278                 q = s->c_dc_scale;
4279         } else{
4280             /* For AIC we skip quant/dequant of INTRADC */
4281             q = 1;
4282             qadd=0;
4283         }
4284         q <<= RECON_SHIFT-3;
4285         /* note: block[0] is assumed to be positive */
4286         dc= block[0]*q;
4287 //        block[0] = (block[0] + (q >> 1)) / q;
4288         start_i = 1;
4289 //        if(s->mpeg_quant || s->out_format == FMT_MPEG1)
4290 //            bias= 1<<(QMAT_SHIFT-1);
4291         if (n > 3 && s->intra_chroma_ac_vlc_length) {
4292             length     = s->intra_chroma_ac_vlc_length;
4293             last_length= s->intra_chroma_ac_vlc_last_length;
4294         } else {
4295             length     = s->intra_ac_vlc_length;
4296             last_length= s->intra_ac_vlc_last_length;
4297         }
4298     } else {
4299         scantable= s->inter_scantable.scantable;
4300         perm_scantable= s->inter_scantable.permutated;
4301         dc= 0;
4302         start_i = 0;
4303         length     = s->inter_ac_vlc_length;
4304         last_length= s->inter_ac_vlc_last_length;
4305     }
4306     last_non_zero = s->block_last_index[n];
4307
4308     dc += (1<<(RECON_SHIFT-1));
4309     for(i=0; i<64; i++){
4310         rem[i] = dc - (orig[i] << RECON_SHIFT); // FIXME use orig directly instead of copying to rem[]
4311     }
4312
4313     sum=0;
4314     for(i=0; i<64; i++){
4315         int one= 36;
4316         int qns=4;
4317         int w;
4318
4319         w= FFABS(weight[i]) + qns*one;
4320         w= 15 + (48*qns*one + w/2)/w; // 16 .. 63
4321
4322         weight[i] = w;
4323 //        w=weight[i] = (63*qns + (w/2)) / w;
4324
4325         av_assert2(w>0);
4326         av_assert2(w<(1<<6));
4327         sum += w*w;
4328     }
4329     lambda= sum*(uint64_t)s->lambda2 >> (FF_LAMBDA_SHIFT - 6 + 6 + 6 + 6);
4330
4331     run=0;
4332     rle_index=0;
4333     for(i=start_i; i<=last_non_zero; i++){
4334         int j= perm_scantable[i];
4335         const int level= block[j];
4336         int coeff;
4337
4338         if(level){
4339             if(level<0) coeff= qmul*level - qadd;
4340             else        coeff= qmul*level + qadd;
4341             run_tab[rle_index++]=run;
4342             run=0;
4343
4344             s->mpvencdsp.add_8x8basis(rem, basis[j], coeff);
4345         }else{
4346             run++;
4347         }
4348     }
4349
4350     for(;;){
4351         int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0);
4352         int best_coeff=0;
4353         int best_change=0;
4354         int run2, best_unquant_change=0, analyze_gradient;
4355         analyze_gradient = last_non_zero > 2 || s->quantizer_noise_shaping >= 3;
4356
4357         if(analyze_gradient){
4358             for(i=0; i<64; i++){
4359                 int w= weight[i];
4360
4361                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
4362             }
4363             s->fdsp.fdct(d1);
4364         }
4365
4366         if(start_i){
4367             const int level= block[0];
4368             int change, old_coeff;
4369
4370             av_assert2(s->mb_intra);
4371
4372             old_coeff= q*level;
4373
4374             for(change=-1; change<=1; change+=2){
4375                 int new_level= level + change;
4376                 int score, new_coeff;
4377
4378                 new_coeff= q*new_level;
4379                 if(new_coeff >= 2048 || new_coeff < 0)
4380                     continue;
4381
4382                 score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0],
4383                                                   new_coeff - old_coeff);
4384                 if(score<best_score){
4385                     best_score= score;
4386                     best_coeff= 0;
4387                     best_change= change;
4388                     best_unquant_change= new_coeff - old_coeff;
4389                 }
4390             }
4391         }
4392
4393         run=0;
4394         rle_index=0;
4395         run2= run_tab[rle_index++];
4396         prev_level=0;
4397         prev_run=0;
4398
4399         for(i=start_i; i<64; i++){
4400             int j= perm_scantable[i];
4401             const int level= block[j];
4402             int change, old_coeff;
4403
4404             if(s->quantizer_noise_shaping < 3 && i > last_non_zero + 1)
4405                 break;
4406
4407             if(level){
4408                 if(level<0) old_coeff= qmul*level - qadd;
4409                 else        old_coeff= qmul*level + qadd;
4410                 run2= run_tab[rle_index++]; //FIXME ! maybe after last
4411             }else{
4412                 old_coeff=0;
4413                 run2--;
4414                 av_assert2(run2>=0 || i >= last_non_zero );
4415             }
4416
4417             for(change=-1; change<=1; change+=2){
4418                 int new_level= level + change;
4419                 int score, new_coeff, unquant_change;
4420
4421                 score=0;
4422                 if(s->quantizer_noise_shaping < 2 && FFABS(new_level) > FFABS(level))
4423                    continue;
4424
4425                 if(new_level){
4426                     if(new_level<0) new_coeff= qmul*new_level - qadd;
4427                     else            new_coeff= qmul*new_level + qadd;
4428                     if(new_coeff >= 2048 || new_coeff <= -2048)
4429                         continue;
4430                     //FIXME check for overflow
4431
4432                     if(level){
4433                         if(level < 63 && level > -63){
4434                             if(i < last_non_zero)
4435                                 score +=   length[UNI_AC_ENC_INDEX(run, new_level+64)]
4436                                          - length[UNI_AC_ENC_INDEX(run, level+64)];
4437                             else
4438                                 score +=   last_length[UNI_AC_ENC_INDEX(run, new_level+64)]
4439                                          - last_length[UNI_AC_ENC_INDEX(run, level+64)];
4440                         }
4441                     }else{
4442                         av_assert2(FFABS(new_level)==1);
4443
4444                         if(analyze_gradient){
4445                             int g= d1[ scantable[i] ];
4446                             if(g && (g^new_level) >= 0)
4447                                 continue;
4448                         }
4449
4450                         if(i < last_non_zero){
4451                             int next_i= i + run2 + 1;
4452                             int next_level= block[ perm_scantable[next_i] ] + 64;
4453
4454                             if(next_level&(~127))
4455                                 next_level= 0;
4456
4457                             if(next_i < last_non_zero)
4458                                 score +=   length[UNI_AC_ENC_INDEX(run, 65)]
4459                                          + length[UNI_AC_ENC_INDEX(run2, next_level)]
4460                                          - length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4461                             else
4462                                 score +=  length[UNI_AC_ENC_INDEX(run, 65)]
4463                                         + last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4464                                         - last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)];
4465                         }else{
4466                             score += last_length[UNI_AC_ENC_INDEX(run, 65)];
4467                             if(prev_level){
4468                                 score +=  length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4469                                         - last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4470                             }
4471                         }
4472                     }
4473                 }else{
4474                     new_coeff=0;
4475                     av_assert2(FFABS(level)==1);
4476
4477                     if(i < last_non_zero){
4478                         int next_i= i + run2 + 1;
4479                         int next_level= block[ perm_scantable[next_i] ] + 64;
4480
4481                         if(next_level&(~127))
4482                             next_level= 0;
4483
4484                         if(next_i < last_non_zero)
4485                             score +=   length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4486                                      - length[UNI_AC_ENC_INDEX(run2, next_level)]
4487                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4488                         else
4489                             score +=   last_length[UNI_AC_ENC_INDEX(run + run2 + 1, next_level)]
4490                                      - last_length[UNI_AC_ENC_INDEX(run2, next_level)]
4491                                      - length[UNI_AC_ENC_INDEX(run, 65)];
4492                     }else{
4493                         score += -last_length[UNI_AC_ENC_INDEX(run, 65)];
4494                         if(prev_level){
4495                             score +=  last_length[UNI_AC_ENC_INDEX(prev_run, prev_level)]
4496                                     - length[UNI_AC_ENC_INDEX(prev_run, prev_level)];
4497                         }
4498                     }
4499                 }
4500
4501                 score *= lambda;
4502
4503                 unquant_change= new_coeff - old_coeff;
4504                 av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0);
4505
4506                 score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j],
4507                                                    unquant_change);
4508                 if(score<best_score){
4509                     best_score= score;
4510                     best_coeff= i;
4511                     best_change= change;
4512                     best_unquant_change= unquant_change;
4513                 }
4514             }
4515             if(level){
4516                 prev_level= level + 64;
4517                 if(prev_level&(~127))
4518                     prev_level= 0;
4519                 prev_run= run;
4520                 run=0;
4521             }else{
4522                 run++;
4523             }
4524         }
4525
4526         if(best_change){
4527             int j= perm_scantable[ best_coeff ];
4528
4529             block[j] += best_change;
4530
4531             if(best_coeff > last_non_zero){
4532                 last_non_zero= best_coeff;
4533                 av_assert2(block[j]);
4534             }else{
4535                 for(; last_non_zero>=start_i; last_non_zero--){
4536                     if(block[perm_scantable[last_non_zero]])
4537                         break;
4538                 }
4539             }
4540
4541             run=0;
4542             rle_index=0;
4543             for(i=start_i; i<=last_non_zero; i++){
4544                 int j= perm_scantable[i];
4545                 const int level= block[j];
4546
4547                  if(level){
4548                      run_tab[rle_index++]=run;
4549                      run=0;
4550                  }else{
4551                      run++;
4552                  }
4553             }
4554
4555             s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change);
4556         }else{
4557             break;
4558         }
4559     }
4560
4561     return last_non_zero;
4562 }
4563
4564 /**
4565  * Permute an 8x8 block according to permutation.
4566  * @param block the block which will be permuted according to
4567  *              the given permutation vector
4568  * @param permutation the permutation vector
4569  * @param last the last non zero coefficient in scantable order, used to
4570  *             speed the permutation up
4571  * @param scantable the used scantable, this is only used to speed the
4572  *                  permutation up, the block is not (inverse) permutated
4573  *                  to scantable order!
4574  */
4575 void ff_block_permute(int16_t *block, const uint8_t *permutation,
4576                       const uint8_t *scantable, int last)
4577 {
4578     int i;
4579     int16_t temp[64];
4580
4581     if (last <= 0)
4582         return;
4583     //FIXME it is ok but not clean and might fail for some permutations
4584     // if (permutation[1] == 1)
4585     // return;
4586
4587     for (i = 0; i <= last; i++) {
4588         const int j = scantable[i];
4589         temp[j] = block[j];
4590         block[j] = 0;
4591     }
4592
4593     for (i = 0; i <= last; i++) {
4594         const int j = scantable[i];
4595         const int perm_j = permutation[j];
4596         block[perm_j] = temp[j];
4597     }
4598 }
4599
4600 static int dct_quantize_c(MpegEncContext *s,
4601                           int16_t *block, int n,
4602                           int qscale, int *overflow)
4603 {
4604     int i, last_non_zero, q, start_i;
4605     const int *qmat;
4606     const uint8_t *scantable;
4607     int bias;
4608     int max=0;
4609     unsigned int threshold1, threshold2;
4610
4611     s->fdsp.fdct(block);
4612
4613     if(s->dct_error_sum)
4614         s->denoise_dct(s, block);
4615
4616     if (s->mb_intra) {
4617         scantable= s->intra_scantable.scantable;
4618         if (!s->h263_aic) {
4619             if (n < 4)
4620                 q = s->y_dc_scale;
4621             else
4622                 q = s->c_dc_scale;
4623             q = q << 3;
4624         } else
4625             /* For AIC we skip quant/dequant of INTRADC */
4626             q = 1 << 3;
4627
4628         /* note: block[0] is assumed to be positive */
4629         block[0] = (block[0] + (q >> 1)) / q;
4630         start_i = 1;
4631         last_non_zero = 0;
4632         qmat = n < 4 ? s->q_intra_matrix[qscale] : s->q_chroma_intra_matrix[qscale];
4633         bias= s->intra_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4634     } else {
4635         scantable= s->inter_scantable.scantable;
4636         start_i = 0;
4637         last_non_zero = -1;
4638         qmat = s->q_inter_matrix[qscale];
4639         bias= s->inter_quant_bias*(1<<(QMAT_SHIFT - QUANT_BIAS_SHIFT));
4640     }
4641     threshold1= (1<<QMAT_SHIFT) - bias - 1;
4642     threshold2= (threshold1<<1);
4643     for(i=63;i>=start_i;i--) {
4644         const int j = scantable[i];
4645         int64_t level = (int64_t)block[j] * qmat[j];
4646
4647         if(((uint64_t)(level+threshold1))>threshold2){
4648             last_non_zero = i;
4649             break;
4650         }else{
4651             block[j]=0;
4652         }
4653     }
4654     for(i=start_i; i<=last_non_zero; i++) {
4655         const int j = scantable[i];
4656         int64_t level = (int64_t)block[j] * qmat[j];
4657
4658 //        if(   bias+level >= (1<<QMAT_SHIFT)
4659 //           || bias-level >= (1<<QMAT_SHIFT)){
4660         if(((uint64_t)(level+threshold1))>threshold2){
4661             if(level>0){
4662                 level= (bias + level)>>QMAT_SHIFT;
4663                 block[j]= level;
4664             }else{
4665                 level= (bias - level)>>QMAT_SHIFT;
4666                 block[j]= -level;
4667             }
4668             max |=level;
4669         }else{
4670             block[j]=0;
4671         }
4672     }
4673     *overflow= s->max_qcoeff < max; //overflow might have happened
4674
4675     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
4676     if (s->idsp.perm_type != FF_IDCT_PERM_NONE)
4677         ff_block_permute(block, s->idsp.idct_permutation,
4678                       scantable, last_non_zero);
4679
4680     return last_non_zero;
4681 }