vp8/encoder/encodeframe.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vpx_ports/config.h"
  13 #include "encodemb.h"
  14 #include "encodemv.h"
  15 #include "common.h"
  16 #include "onyx_int.h"
  17 #include "extend.h"
  18 #include "entropymode.h"
  19 #include "quant_common.h"
  20 #include "segmentation.h"
  21 #include "setupintrarecon.h"
  22 #include "encodeintra.h"
  23 #include "reconinter.h"
  24 #include "rdopt.h"
  25 #include "pickinter.h"
  26 #include "findnearmv.h"
  27 #include "reconintra.h"
  28 #include <stdio.h>
  29 #include <limits.h>
  30 #include "subpixel.h"
  31 #include "vpx_ports/vpx_timer.h"
  32
  33 #if CONFIG_RUNTIME_CPU_DETECT
  34 #define RTCD(x)     &cpi->common.rtcd.x
  35 #define IF_RTCD(x)  (x)
  36 #else
  37 #define RTCD(x)     NULL
  38 #define IF_RTCD(x)  NULL
  39 #endif
  40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
  41
  42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
  43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
  44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
  45                                       MACROBLOCK *x,
  46                                       MB_ROW_COMP *mbr_ei,
  47                                       int mb_row,
  48                                       int count);
  49 void vp8_build_block_offsets(MACROBLOCK *x);
  50 void vp8_setup_block_ptrs(MACROBLOCK *x);
  51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
  52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
  53
  54 #ifdef MODE_STATS
  55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
  57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
  59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
  60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  61 #endif
  62
  63 static const int qrounding_factors[129] =
  64 {
  65     48, 48, 48, 48, 48, 48, 48, 48,
  66     48, 48, 48, 48, 48, 48, 48, 48,
  67     48, 48, 48, 48, 48, 48, 48, 48,
  68     48, 48, 48, 48, 48, 48, 48, 48,
  69     48, 48, 48, 48, 48, 48, 48, 48,
  70     48, 48, 48, 48, 48, 48, 48, 48,
  71     48, 48, 48, 48, 48, 48, 48, 48,
  72     48, 48, 48, 48, 48, 48, 48, 48,
  73     48, 48, 48, 48, 48, 48, 48, 48,
  74     48, 48, 48, 48, 48, 48, 48, 48,
  75     48, 48, 48, 48, 48, 48, 48, 48,
  76     48, 48, 48, 48, 48, 48, 48, 48,
  77     48, 48, 48, 48, 48, 48, 48, 48,
  78     48, 48, 48, 48, 48, 48, 48, 48,
  79     48, 48, 48, 48, 48, 48, 48, 48,
  80     48, 48, 48, 48, 48, 48, 48, 48,
  81     48
  82 };
  83
  84 static const int qzbin_factors[129] =
  85 {
  86     84, 84, 84, 84, 84, 84, 84, 84,
  87     84, 84, 84, 84, 84, 84, 84, 84,
  88     84, 84, 84, 84, 84, 84, 84, 84,
  89     84, 84, 84, 84, 84, 84, 84, 84,
  90     84, 84, 84, 84, 84, 84, 84, 84,
  91     84, 84, 84, 84, 84, 84, 84, 84,
  92     80, 80, 80, 80, 80, 80, 80, 80,
  93     80, 80, 80, 80, 80, 80, 80, 80,
  94     80, 80, 80, 80, 80, 80, 80, 80,
  95     80, 80, 80, 80, 80, 80, 80, 80,
  96     80, 80, 80, 80, 80, 80, 80, 80,
  97     80, 80, 80, 80, 80, 80, 80, 80,
  98     80, 80, 80, 80, 80, 80, 80, 80,
  99     80, 80, 80, 80, 80, 80, 80, 80,
 100     80, 80, 80, 80, 80, 80, 80, 80,
 101     80, 80, 80, 80, 80, 80, 80, 80,
 102     80
 103 };
 104
 105 static const int qrounding_factors_y2[129] =
 106 {
 107     48, 48, 48, 48, 48, 48, 48, 48,
 108     48, 48, 48, 48, 48, 48, 48, 48,
 109     48, 48, 48, 48, 48, 48, 48, 48,
 110     48, 48, 48, 48, 48, 48, 48, 48,
 111     48, 48, 48, 48, 48, 48, 48, 48,
 112     48, 48, 48, 48, 48, 48, 48, 48,
 113     48, 48, 48, 48, 48, 48, 48, 48,
 114     48, 48, 48, 48, 48, 48, 48, 48,
 115     48, 48, 48, 48, 48, 48, 48, 48,
 116     48, 48, 48, 48, 48, 48, 48, 48,
 117     48, 48, 48, 48, 48, 48, 48, 48,
 118     48, 48, 48, 48, 48, 48, 48, 48,
 119     48, 48, 48, 48, 48, 48, 48, 48,
 120     48, 48, 48, 48, 48, 48, 48, 48,
 121     48, 48, 48, 48, 48, 48, 48, 48,
 122     48, 48, 48, 48, 48, 48, 48, 48,
 123     48
 124 };
 125
 126 static const int qzbin_factors_y2[129] =
 127 {
 128     84, 84, 84, 84, 84, 84, 84, 84,
 129     84, 84, 84, 84, 84, 84, 84, 84,
 130     84, 84, 84, 84, 84, 84, 84, 84,
 131     84, 84, 84, 84, 84, 84, 84, 84,
 132     84, 84, 84, 84, 84, 84, 84, 84,
 133     84, 84, 84, 84, 84, 84, 84, 84,
 134     80, 80, 80, 80, 80, 80, 80, 80,
 135     80, 80, 80, 80, 80, 80, 80, 80,
 136     80, 80, 80, 80, 80, 80, 80, 80,
 137     80, 80, 80, 80, 80, 80, 80, 80,
 138     80, 80, 80, 80, 80, 80, 80, 80,
 139     80, 80, 80, 80, 80, 80, 80, 80,
 140     80, 80, 80, 80, 80, 80, 80, 80,
 141     80, 80, 80, 80, 80, 80, 80, 80,
 142     80, 80, 80, 80, 80, 80, 80, 80,
 143     80, 80, 80, 80, 80, 80, 80, 80,
 144     80
 145 };
 146
 147 #define EXACT_QUANT
 148 #ifdef EXACT_QUANT
 149 static void vp8cx_invert_quant(int improved_quant, short *quant,
 150                                short *shift, short d)
 151 {
 152     if(improved_quant)
 153     {
 154         unsigned t;
 155         int l;
 156         t = d;
 157         for(l = 0; t > 1; l++)
 158             t>>=1;
 159         t = 1 + (1<<(16+l))/d;
 160         *quant = (short)(t - (1<<16));
 161         *shift = l;
 162     }
 163     else
 164     {
 165         *quant = (1 << 16) / d;
 166         *shift = 0;
 167     }
 168 }
 169
 170 void vp8cx_init_quantizer(VP8_COMP *cpi)
 171 {
 172     int i;
 173     int quant_val;
 174     int Q;
 175
 176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 177
 178     for (Q = 0; Q < QINDEX_RANGE; Q++)
 179     {
 180         // dc values
 181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 182         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
 183         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
 184                            cpi->Y1quant_shift[Q] + 0, quant_val);
 185         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 186         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 187         cpi->common.Y1dequant[Q][0] = quant_val;
 188         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 189
 190         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 191         cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
 192         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
 193                            cpi->Y2quant_shift[Q] + 0, quant_val);
 194         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 195         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 196         cpi->common.Y2dequant[Q][0] = quant_val;
 197         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 198
 199         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 200         cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
 201         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
 202                            cpi->UVquant_shift[Q] + 0, quant_val);
 203         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 204         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 205         cpi->common.UVdequant[Q][0] = quant_val;
 206         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 207
 208         // all the ac values = ;
 209         for (i = 1; i < 16; i++)
 210         {
 211             int rc = vp8_default_zig_zag1d[i];
 212
 213             quant_val = vp8_ac_yquant(Q);
 214             cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
 215             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
 216                                cpi->Y1quant_shift[Q] + rc, quant_val);
 217             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 218             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 219             cpi->common.Y1dequant[Q][rc] = quant_val;
 220             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 221
 222             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 223             cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
 224             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
 225                                cpi->Y2quant_shift[Q] + rc, quant_val);
 226             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 227             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 228             cpi->common.Y2dequant[Q][rc] = quant_val;
 229             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 230
 231             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 232             cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
 233             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
 234                                cpi->UVquant_shift[Q] + rc, quant_val);
 235             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 236             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 237             cpi->common.UVdequant[Q][rc] = quant_val;
 238             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 239         }
 240     }
 241 }
 242 #else
 243 void vp8cx_init_quantizer(VP8_COMP *cpi)
 244 {
 245     int i;
 246     int quant_val;
 247     int Q;
 248
 249     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 250
 251     for (Q = 0; Q < QINDEX_RANGE; Q++)
 252     {
 253         // dc values
 254         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 255         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
 256         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 257         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 258         cpi->common.Y1dequant[Q][0] = quant_val;
 259         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 260
 261         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 262         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
 263         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 264         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 265         cpi->common.Y2dequant[Q][0] = quant_val;
 266         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 267
 268         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 269         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
 270         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 271         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 272         cpi->common.UVdequant[Q][0] = quant_val;
 273         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 274
 275         // all the ac values = ;
 276         for (i = 1; i < 16; i++)
 277         {
 278             int rc = vp8_default_zig_zag1d[i];
 279
 280             quant_val = vp8_ac_yquant(Q);
 281             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
 282             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 283             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 284             cpi->common.Y1dequant[Q][rc] = quant_val;
 285             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 286
 287             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 288             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
 289             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 290             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 291             cpi->common.Y2dequant[Q][rc] = quant_val;
 292             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 293
 294             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 295             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
 296             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 297             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 298             cpi->common.UVdequant[Q][rc] = quant_val;
 299             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 300         }
 301     }
 302 }
 303 #endif
 304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 305 {
 306     int i;
 307     int QIndex;
 308     MACROBLOCKD *xd = &x->e_mbd;
 309     int zbin_extra;
 310
 311     // Select the baseline MB Q index.
 312     if (xd->segmentation_enabled)
 313     {
 314         // Abs Value
 315         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
 316
 317             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 318         // Delta Value
 319         else
 320         {
 321             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 322             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
 323         }
 324     }
 325     else
 326         QIndex = cpi->common.base_qindex;
 327
 328     // Y
 329     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 330
 331     for (i = 0; i < 16; i++)
 332     {
 333         x->block[i].quant = cpi->Y1quant[QIndex];
 334         x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
 335         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
 336         x->block[i].zbin = cpi->Y1zbin[QIndex];
 337         x->block[i].round = cpi->Y1round[QIndex];
 338         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
 339         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
 340         x->block[i].zbin_extra = (short)zbin_extra;
 341     }
 342
 343     // UV
 344     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 345
 346     for (i = 16; i < 24; i++)
 347     {
 348         x->block[i].quant = cpi->UVquant[QIndex];
 349         x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
 350         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
 351         x->block[i].zbin = cpi->UVzbin[QIndex];
 352         x->block[i].round = cpi->UVround[QIndex];
 353         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
 354         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
 355         x->block[i].zbin_extra = (short)zbin_extra;
 356     }
 357
 358     // Y2
 359     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 360     x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
 361     x->block[24].quant = cpi->Y2quant[QIndex];
 362     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
 363     x->block[24].zbin = cpi->Y2zbin[QIndex];
 364     x->block[24].round = cpi->Y2round[QIndex];
 365     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
 366     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
 367     x->block[24].zbin_extra = (short)zbin_extra;
 368
 369     /* save this macroblock QIndex for vp8_update_zbin_extra() */
 370     x->q_index = QIndex;
 371 }
 372 void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x)
 373 {
 374     int i;
 375     int QIndex = x->q_index;
 376     int zbin_extra;
 377
 378     // Y
 379     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 380     for (i = 0; i < 16; i++)
 381     {
 382         x->block[i].zbin_extra = (short)zbin_extra;
 383     }
 384
 385     // UV
 386     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 387     for (i = 16; i < 24; i++)
 388     {
 389         x->block[i].zbin_extra = (short)zbin_extra;
 390     }
 391
 392     // Y2
 393     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 394     x->block[24].zbin_extra = (short)zbin_extra;
 395 }
 396
 397 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 398 {
 399     // Clear Zbin mode boost for default case
 400     cpi->zbin_mode_boost = 0;
 401
 402     // MB level quantizer setup
 403     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
 404 }
 405
 406
 407 /* activity_avg must be positive, or flat regions could get a zero weight
 408  *  (infinite lambda), which confounds analysis.
 409  * This also avoids the need for divide by zero checks in
 410  *  vp8_activity_masking().
 411  */
 412 #define VP8_ACTIVITY_AVG_MIN (64)
 413
 414 /* This is used as a reference when computing the source variance for the
 415  *  purposes of activity masking.
 416  * Eventually this should be replaced by custom no-reference routines,
 417  *  which will be faster.
 418  */
 419 static const unsigned char VP8_VAR_OFFS[16]=
 420 {
 421     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
 422 };
 423
 424 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 425 {
 426     unsigned int act;
 427     unsigned int sse;
 428     int sum;
 429     unsigned int a;
 430     unsigned int b;
 431     /* TODO: This could also be done over smaller areas (8x8), but that would
 432      *  require extensive changes elsewhere, as lambda is assumed to be fixed
 433      *  over an entire MB in most of the code.
 434      * Another option is to compute four 8x8 variances, and pick a single
 435      *  lambda using a non-linear combination (e.g., the smallest, or second
 436      *  smallest, etc.).
 437      */
 438     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
 439      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
 440     /* This requires a full 32 bits of precision. */
 441     act = (sse<<8) - sum*sum;
 442     /* Drop 4 to give us some headroom to work with. */
 443     act = (act + 8) >> 4;
 444     /* If the region is flat, lower the activity some more. */
 445     if (act < 8<<12)
 446         act = act < 5<<12 ? act : 5<<12;
 447     /* TODO: For non-flat regions, edge regions should receive less masking
 448      *  than textured regions, but identifying edge regions quickly and
 449      *  reliably enough is still a subject of experimentation.
 450      * This will be most noticable near edges with a complex shape (e.g.,
 451      *  text), but the 4x4 transform size should make this less of a problem
 452      *  than it would be for an 8x8 transform.
 453      */
 454     /* Apply the masking to the RD multiplier. */
 455     a = act + 4*cpi->activity_avg;
 456     b = 4*act + cpi->activity_avg;
 457     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
 458     return act;
 459 }
 460
 461
 462
 463 static
 464 void encode_mb_row(VP8_COMP *cpi,
 465                    VP8_COMMON *cm,
 466                    int mb_row,
 467                    MACROBLOCK  *x,
 468                    MACROBLOCKD *xd,
 469                    TOKENEXTRA **tp,
 470                    int *segment_counts,
 471                    int *totalrate)
 472 {
 473     INT64 activity_sum = 0;
 474     int i;
 475     int recon_yoffset, recon_uvoffset;
 476     int mb_col;
 477     int ref_fb_idx = cm->lst_fb_idx;
 478     int dst_fb_idx = cm->new_fb_idx;
 479     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
 480     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
 481     int seg_map_index = (mb_row * cpi->common.mb_cols);
 482
 483 #if CONFIG_MULTITHREAD
 484     const int nsync = cpi->mt_sync_range;
 485     const int rightmost_col = cm->mb_cols - 1;
 486     volatile const int *last_row_current_mb_col;
 487
 488     if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
 489         last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1];
 490     else
 491         last_row_current_mb_col = &rightmost_col;
 492 #endif
 493
 494     // reset above block coeffs
 495     xd->above_context = cm->above_context;
 496
 497     xd->up_available = (mb_row != 0);
 498     recon_yoffset = (mb_row * recon_y_stride * 16);
 499     recon_uvoffset = (mb_row * recon_uv_stride * 8);
 500
 501     cpi->tplist[mb_row].start = *tp;
 502     //printf("Main mb_row = %d\n", mb_row);
 503
 504     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
 505     // units as they are always compared to values that are in 1/8th pel units
 506     xd->mb_to_top_edge = -((mb_row * 16) << 3);
 507     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
 508
 509     // Set up limit values for vertical motion vector components
 510     // to prevent them extending beyond the UMV borders
 511     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
 512     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
 513                         + (VP8BORDERINPIXELS - 16);
 514
 515     // for each macroblock col in image
 516     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
 517     {
 518         // Distance of Mb to the left & right edges, specified in
 519         // 1/8th pel units as they are always compared to values
 520         // that are in 1/8th pel units
 521         xd->mb_to_left_edge = -((mb_col * 16) << 3);
 522         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
 523
 524         // Set up limit values for horizontal motion vector components
 525         // to prevent them extending beyond the UMV borders
 526         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
 527         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
 528                             + (VP8BORDERINPIXELS - 16);
 529
 530         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
 531         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
 532         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 533         xd->left_available = (mb_col != 0);
 534
 535         x->rddiv = cpi->RDDIV;
 536         x->rdmult = cpi->RDMULT;
 537
 538 #if CONFIG_MULTITHREAD
 539         if ((cpi->b_multi_threaded != 0) && (mb_row != 0))
 540         {
 541             if ((mb_col & (nsync - 1)) == 0)
 542             {
 543                 while (mb_col > (*last_row_current_mb_col - nsync)
 544                         && (*last_row_current_mb_col) != (cm->mb_cols - 1))
 545                 {
 546                     x86_pause_hint();
 547                     thread_sleep(0);
 548                 }
 549             }
 550         }
 551 #endif
 552
 553         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
 554             activity_sum += vp8_activity_masking(cpi, x);
 555
 556         // Is segmentation enabled
 557         // MB level adjutment to quantizer
 558         if (xd->segmentation_enabled)
 559         {
 560             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
 561             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
 562                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
 563             else
 564                 xd->mode_info_context->mbmi.segment_id = 0;
 565
 566             vp8cx_mb_init_quantizer(cpi, x);
 567         }
 568         else
 569             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 570
 571         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 572
 573         if (cm->frame_type == KEY_FRAME)
 574         {
 575             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
 576 #ifdef MODE_STATS
 577             y_modes[xd->mbmi.mode] ++;
 578 #endif
 579         }
 580         else
 581         {
 582             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
 583
 584 #ifdef MODE_STATS
 585             inter_y_modes[xd->mbmi.mode] ++;
 586
 587             if (xd->mbmi.mode == SPLITMV)
 588             {
 589                 int b;
 590
 591                 for (b = 0; b < xd->mbmi.partition_count; b++)
 592                 {
 593                     inter_b_modes[x->partition->bmi[b].mode] ++;
 594                 }
 595             }
 596
 597 #endif
 598
 599             // Count of last ref frame 0,0 useage
 600             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 601                 cpi->inter_zz_count ++;
 602
 603             // Special case code for cyclic refresh
 604             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
 605             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
 606             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
 607             {
 608                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 609
 610                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
 611                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
 612                 // else mark it as dirty (1).
 613                 if (xd->mode_info_context->mbmi.segment_id)
 614                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
 615                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 616                 {
 617                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
 618                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
 619                 }
 620                 else
 621                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
 622
 623             }
 624         }
 625
 626         cpi->tplist[mb_row].stop = *tp;
 627
 628         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 629
 630         for (i = 0; i < 16; i++)
 631             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 632
 633         // adjust to the next column of macroblocks
 634         x->src.y_buffer += 16;
 635         x->src.u_buffer += 8;
 636         x->src.v_buffer += 8;
 637
 638         recon_yoffset += 16;
 639         recon_uvoffset += 8;
 640
 641         // Keep track of segment useage
 642         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 643
 644         // skip to next mb
 645         xd->mode_info_context++;
 646         x->partition_info++;
 647
 648         xd->above_context++;
 649 #if CONFIG_MULTITHREAD
 650         if (cpi->b_multi_threaded != 0)
 651         {
 652             cpi->mt_current_mb_col[mb_row] = mb_col;
 653         }
 654 #endif
 655     }
 656
 657     //extend the recon for intra prediction
 658     vp8_extend_mb_row(
 659         &cm->yv12_fb[dst_fb_idx],
 660         xd->dst.y_buffer + 16,
 661         xd->dst.u_buffer + 8,
 662         xd->dst.v_buffer + 8);
 663
 664     // this is to account for the border
 665     xd->mode_info_context++;
 666     x->partition_info++;
 667     x->activity_sum += activity_sum;
 668
 669 #if CONFIG_MULTITHREAD
 670     if ((cpi->b_multi_threaded != 0) && (mb_row == cm->mb_rows - 1))
 671     {
 672         sem_post(&cpi->h_event_end_encoding); /* signal frame encoding end */
 673     }
 674 #endif
 675 }
 676
 677 void vp8_encode_frame(VP8_COMP *cpi)
 678 {
 679     int mb_row;
 680     MACROBLOCK *const x = & cpi->mb;
 681     VP8_COMMON *const cm = & cpi->common;
 682     MACROBLOCKD *const xd = & x->e_mbd;
 683
 684     TOKENEXTRA *tp = cpi->tok;
 685     int segment_counts[MAX_MB_SEGMENTS];
 686     int totalrate;
 687
 688     // Functions setup for all frame types so we can use MC in AltRef
 689     if (cm->mcomp_filter_type == SIXTAP)
 690     {
 691         xd->subpixel_predict        = SUBPIX_INVOKE(
 692                                         &cpi->common.rtcd.subpix, sixtap4x4);
 693         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 694                                         &cpi->common.rtcd.subpix, sixtap8x4);
 695         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 696                                         &cpi->common.rtcd.subpix, sixtap8x8);
 697         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 698                                         &cpi->common.rtcd.subpix, sixtap16x16);
 699     }
 700     else
 701     {
 702         xd->subpixel_predict        = SUBPIX_INVOKE(
 703                                         &cpi->common.rtcd.subpix, bilinear4x4);
 704         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 705                                         &cpi->common.rtcd.subpix, bilinear8x4);
 706         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 707                                         &cpi->common.rtcd.subpix, bilinear8x8);
 708         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 709                                       &cpi->common.rtcd.subpix, bilinear16x16);
 710     }
 711
 712     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 713
 714     x->vector_range = 32;
 715
 716     // Count of MBs using the alternate Q if any
 717     cpi->alt_qcount = 0;
 718
 719     // Reset frame count of inter 0,0 motion vector useage.
 720     cpi->inter_zz_count = 0;
 721
 722     vpx_memset(segment_counts, 0, sizeof(segment_counts));
 723
 724     cpi->prediction_error = 0;
 725     cpi->intra_error = 0;
 726     cpi->skip_true_count = 0;
 727     cpi->skip_false_count = 0;
 728
 729 #if 0
 730     // Experimental code
 731     cpi->frame_distortion = 0;
 732     cpi->last_mb_distortion = 0;
 733 #endif
 734
 735     totalrate = 0;
 736
 737     x->partition_info = x->pi;
 738
 739     xd->mode_info_context = cm->mi;
 740     xd->mode_info_stride = cm->mode_info_stride;
 741
 742     xd->frame_type = cm->frame_type;
 743
 744     xd->frames_since_golden = cm->frames_since_golden;
 745     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 746     vp8_zero(cpi->MVcount);
 747     // vp8_zero( Contexts)
 748     vp8_zero(cpi->coef_counts);
 749
 750     // reset intra mode contexts
 751     if (cm->frame_type == KEY_FRAME)
 752         vp8_init_mbmode_probs(cm);
 753
 754
 755     vp8cx_frame_init_quantizer(cpi);
 756
 757     if (cpi->compressor_speed == 2)
 758     {
 759         if (cpi->oxcf.cpu_used < 0)
 760             cpi->Speed = -(cpi->oxcf.cpu_used);
 761         else
 762             vp8_auto_select_speed(cpi);
 763     }
 764
 765     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
 766     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
 767
 768     // Copy data over into macro block data sturctures.
 769
 770     x->src = * cpi->Source;
 771     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
 772     xd->dst = cm->yv12_fb[cm->new_fb_idx];
 773
 774     // set up frame new frame for intra coded blocks
 775
 776     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 777
 778     vp8_build_block_offsets(x);
 779
 780     vp8_setup_block_dptrs(&x->e_mbd);
 781
 782     vp8_setup_block_ptrs(x);
 783
 784     x->activity_sum = 0;
 785
 786     xd->mode_info_context->mbmi.mode = DC_PRED;
 787     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 788
 789     xd->left_context = &cm->left_context;
 790
 791     vp8_zero(cpi->count_mb_ref_frame_usage)
 792     vp8_zero(cpi->ymode_count)
 793     vp8_zero(cpi->uv_mode_count)
 794
 795     x->mvc = cm->fc.mvc;
 796
 797     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 798
 799     {
 800         struct vpx_usec_timer  emr_timer;
 801         vpx_usec_timer_start(&emr_timer);
 802
 803 #if CONFIG_MULTITHREAD
 804         if (cpi->b_multi_threaded)
 805         {
 806             int i;
 807
 808             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
 809
 810             for (i = 0; i < cm->mb_rows; i++)
 811                 cpi->mt_current_mb_col[i] = 0;
 812
 813             for (i = 0; i < cpi->encoding_thread_count; i++)
 814             {
 815                 sem_post(&cpi->h_event_start_encoding[i]);
 816             }
 817
 818             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
 819             {
 820                 vp8_zero(cm->left_context)
 821
 822                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 823
 824                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 825
 826                 // adjust to the next row of mbs
 827                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
 828                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 829                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 830
 831                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
 832                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 833
 834             }
 835
 836             sem_wait(&cpi->h_event_end_encoding); /* wait for other threads to finish */
 837
 838             cpi->tok_count = 0;
 839
 840             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
 841             {
 842                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
 843             }
 844
 845             if (xd->segmentation_enabled)
 846             {
 847                 int i, j;
 848
 849                 if (xd->segmentation_enabled)
 850                 {
 851
 852                     for (i = 0; i < cpi->encoding_thread_count; i++)
 853                     {
 854                         for (j = 0; j < 4; j++)
 855                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
 856                     }
 857                 }
 858             }
 859
 860             for (i = 0; i < cpi->encoding_thread_count; i++)
 861             {
 862                 totalrate += cpi->mb_row_ei[i].totalrate;
 863             }
 864
 865             for (i = 0; i < cpi->encoding_thread_count; i++)
 866             {
 867                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
 868             }
 869
 870         }
 871         else
 872 #endif
 873         {
 874             // for each macroblock row in image
 875             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
 876             {
 877
 878                 vp8_zero(cm->left_context)
 879
 880                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 881
 882                 // adjust to the next row of mbs
 883                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 884                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 885                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 886             }
 887
 888             cpi->tok_count = tp - cpi->tok;
 889
 890         }
 891
 892         vpx_usec_timer_mark(&emr_timer);
 893         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
 894
 895     }
 896
 897
 898     // Work out the segment probabilites if segmentation is enabled
 899     if (xd->segmentation_enabled)
 900     {
 901         int tot_count;
 902         int i;
 903
 904         // Set to defaults
 905         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
 906
 907         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
 908
 909         if (tot_count)
 910         {
 911             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
 912
 913             tot_count = segment_counts[0] + segment_counts[1];
 914
 915             if (tot_count > 0)
 916             {
 917                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
 918             }
 919
 920             tot_count = segment_counts[2] + segment_counts[3];
 921
 922             if (tot_count > 0)
 923                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
 924
 925             // Zero probabilities not allowed
 926             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
 927             {
 928                 if (xd->mb_segment_tree_probs[i] == 0)
 929                     xd->mb_segment_tree_probs[i] = 1;
 930             }
 931         }
 932     }
 933
 934     // 256 rate units to the bit
 935     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
 936
 937     // Make a note of the percentage MBs coded Intra.
 938     if (cm->frame_type == KEY_FRAME)
 939     {
 940         cpi->this_frame_percent_intra = 100;
 941     }
 942     else
 943     {
 944         int tot_modes;
 945
 946         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
 947                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
 948                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
 949                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 950
 951         if (tot_modes)
 952             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 953
 954     }
 955
 956 #if 0
 957     {
 958         int cnt = 0;
 959         int flag[2] = {0, 0};
 960
 961         for (cnt = 0; cnt < MVPcount; cnt++)
 962         {
 963             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
 964             {
 965                 flag[0] = 1;
 966                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
 967                 break;
 968             }
 969         }
 970
 971         for (cnt = 0; cnt < MVPcount; cnt++)
 972         {
 973             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
 974             {
 975                 flag[1] = 1;
 976                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
 977                 break;
 978             }
 979         }
 980
 981         if (flag[0] || flag[1])
 982             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
 983     }
 984 #endif
 985
 986     // Adjust the projected reference frame useage probability numbers to reflect
 987     // what we have just seen. This may be usefull when we make multiple itterations
 988     // of the recode loop rather than continuing to use values from the previous frame.
 989     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
 990     {
 991         const int *const rfct = cpi->count_mb_ref_frame_usage;
 992         const int rf_intra = rfct[INTRA_FRAME];
 993         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 994
 995         if ((rf_intra + rf_inter) > 0)
 996         {
 997             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
 998
 999             if (cpi->prob_intra_coded < 1)
1000                 cpi->prob_intra_coded = 1;
1001
1002             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
1003             {
1004                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
1005
1006                 if (cpi->prob_last_coded < 1)
1007                     cpi->prob_last_coded = 1;
1008
1009                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
1010                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
1011
1012                 if (cpi->prob_gf_coded < 1)
1013                     cpi->prob_gf_coded = 1;
1014             }
1015         }
1016     }
1017
1018 #if 0
1019     // Keep record of the total distortion this time around for future use
1020     cpi->last_frame_distortion = cpi->frame_distortion;
1021 #endif
1022
1023     /* Update the average activity for the next frame.
1024      * This is feed-forward for now; it could also be saved in two-pass, or
1025      *  done during lookahead when that is eventually added.
1026      */
1027     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1028     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1029         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1030
1031 }
1032 void vp8_setup_block_ptrs(MACROBLOCK *x)
1033 {
1034     int r, c;
1035     int i;
1036
1037     for (r = 0; r < 4; r++)
1038     {
1039         for (c = 0; c < 4; c++)
1040         {
1041             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1042         }
1043     }
1044
1045     for (r = 0; r < 2; r++)
1046     {
1047         for (c = 0; c < 2; c++)
1048         {
1049             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1050         }
1051     }
1052
1053
1054     for (r = 0; r < 2; r++)
1055     {
1056         for (c = 0; c < 2; c++)
1057         {
1058             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1059         }
1060     }
1061
1062     x->block[24].src_diff = x->src_diff + 384;
1063
1064
1065     for (i = 0; i < 25; i++)
1066     {
1067         x->block[i].coeff = x->coeff + i * 16;
1068     }
1069 }
1070
1071 void vp8_build_block_offsets(MACROBLOCK *x)
1072 {
1073     int block = 0;
1074     int br, bc;
1075
1076     vp8_build_block_doffsets(&x->e_mbd);
1077
1078     // y blocks
1079     for (br = 0; br < 4; br++)
1080     {
1081         for (bc = 0; bc < 4; bc++)
1082         {
1083             BLOCK *this_block = &x->block[block];
1084             this_block->base_src = &x->src.y_buffer;
1085             this_block->src_stride = x->src.y_stride;
1086             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1087             ++block;
1088         }
1089     }
1090
1091     // u blocks
1092     for (br = 0; br < 2; br++)
1093     {
1094         for (bc = 0; bc < 2; bc++)
1095         {
1096             BLOCK *this_block = &x->block[block];
1097             this_block->base_src = &x->src.u_buffer;
1098             this_block->src_stride = x->src.uv_stride;
1099             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1100             ++block;
1101         }
1102     }
1103
1104     // v blocks
1105     for (br = 0; br < 2; br++)
1106     {
1107         for (bc = 0; bc < 2; bc++)
1108         {
1109             BLOCK *this_block = &x->block[block];
1110             this_block->base_src = &x->src.v_buffer;
1111             this_block->src_stride = x->src.uv_stride;
1112             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1113             ++block;
1114         }
1115     }
1116 }
1117
1118 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1119 {
1120     const MACROBLOCKD *xd = & x->e_mbd;
1121     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1122     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1123
1124 #ifdef MODE_STATS
1125     const int is_key = cpi->common.frame_type == KEY_FRAME;
1126
1127     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1128
1129     if (m == B_PRED)
1130     {
1131         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1132
1133         int b = 0;
1134
1135         do
1136         {
1137             ++ bct[xd->block[b].bmi.mode];
1138         }
1139         while (++b < 16);
1140     }
1141
1142 #endif
1143
1144     ++cpi->ymode_count[m];
1145     ++cpi->uv_mode_count[uvm];
1146
1147 }
1148 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1149 {
1150     int Error4x4, Error16x16, error_uv;
1151     int rate4x4, rate16x16, rateuv;
1152     int dist4x4, dist16x16, distuv;
1153     int rate = 0;
1154     int rate4x4_tokenonly = 0;
1155     int rate16x16_tokenonly = 0;
1156     int rateuv_tokenonly = 0;
1157
1158     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1159
1160 #if !(CONFIG_REALTIME_ONLY)
1161     if (cpi->sf.RD && cpi->compressor_speed != 2)
1162     {
1163         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1164         rate += rateuv;
1165
1166         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1167
1168         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4, Error16x16);
1169
1170         rate += (Error4x4 < Error16x16) ? rate4x4 : rate16x16;
1171     }
1172     else
1173 #endif
1174     {
1175         int rate2, best_distortion;
1176         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1177         int this_rd;
1178         Error16x16 = INT_MAX;
1179
1180         vp8_pick_intra_mbuv_mode(x);
1181
1182         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1183         {
1184             int distortion2;
1185
1186             x->e_mbd.mode_info_context->mbmi.mode = mode;
1187             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
1188             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1189             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
1190             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1191
1192             if (Error16x16 > this_rd)
1193             {
1194                 Error16x16 = this_rd;
1195                 best_mode = mode;
1196                 best_distortion = distortion2;
1197             }
1198         }
1199         x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1200
1201         Error4x4 = vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &best_distortion);
1202     }
1203
1204     if (Error4x4 < Error16x16)
1205     {
1206         x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1207         vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1208     }
1209     else
1210     {
1211         vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1212     }
1213
1214     vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1215     sum_intra_stats(cpi, x);
1216     vp8_tokenize_mb(cpi, &x->e_mbd, t);
1217
1218     return rate;
1219 }
1220 #ifdef SPEEDSTATS
1221 extern int cnt_pm;
1222 #endif
1223
1224 extern void vp8_fix_contexts(MACROBLOCKD *x);
1225
1226 int vp8cx_encode_inter_macroblock
1227 (
1228     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1229     int recon_yoffset, int recon_uvoffset
1230 )
1231 {
1232     MACROBLOCKD *const xd = &x->e_mbd;
1233     int inter_error;
1234     int intra_error = 0;
1235     int rate;
1236     int distortion;
1237
1238     x->skip = 0;
1239
1240     if (xd->segmentation_enabled)
1241         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1242     else
1243         x->encode_breakout = cpi->oxcf.encode_breakout;
1244
1245 #if !(CONFIG_REALTIME_ONLY)
1246
1247     if (cpi->sf.RD)
1248     {
1249         int zbin_mode_boost_enabled = cpi->zbin_mode_boost_enabled;
1250
1251         /* Are we using the fast quantizer for the mode selection? */
1252         if(cpi->sf.use_fastquant_for_pick)
1253         {
1254             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1255
1256             /* the fast quantizer does not use zbin_extra, so
1257              * do not recalculate */
1258             cpi->zbin_mode_boost_enabled = 0;
1259         }
1260         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1261
1262         /* switch back to the regular quantizer for the encode */
1263         if (cpi->sf.improved_quant)
1264         {
1265             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1266         }
1267
1268         /* restore cpi->zbin_mode_boost_enabled */
1269         cpi->zbin_mode_boost_enabled = zbin_mode_boost_enabled;
1270
1271     }
1272     else
1273 #endif
1274         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1275
1276
1277     cpi->prediction_error += inter_error;
1278     cpi->intra_error += intra_error;
1279
1280 #if 0
1281     // Experimental RD code
1282     cpi->frame_distortion += distortion;
1283     cpi->last_mb_distortion = distortion;
1284 #endif
1285
1286     // MB level adjutment to quantizer setup
1287     if (xd->segmentation_enabled)
1288     {
1289         // If cyclic update enabled
1290         if (cpi->cyclic_refresh_mode_enabled)
1291         {
1292             // Clear segment_id back to 0 if not coded (last frame 0,0)
1293             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1294                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1295             {
1296                 xd->mode_info_context->mbmi.segment_id = 0;
1297
1298                 /* segment_id changed, so update */
1299                 vp8cx_mb_init_quantizer(cpi, x);
1300             }
1301         }
1302     }
1303
1304     {
1305         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1306         if (cpi->zbin_mode_boost_enabled)
1307         {
1308             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1309                  cpi->zbin_mode_boost = 0;
1310             else
1311             {
1312                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1313                 {
1314                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1315                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1316                     else
1317                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1318                 }
1319                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1320                     cpi->zbin_mode_boost = 0;
1321                 else
1322                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1323             }
1324         }
1325         else
1326             cpi->zbin_mode_boost = 0;
1327
1328         vp8_update_zbin_extra(cpi, x);
1329     }
1330
1331     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1332
1333     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1334     {
1335         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1336
1337         if (xd->mode_info_context->mbmi.mode == B_PRED)
1338         {
1339             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1340         }
1341         else
1342         {
1343             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1344         }
1345
1346         sum_intra_stats(cpi, x);
1347     }
1348     else
1349     {
1350         MV best_ref_mv;
1351         MV nearest, nearby;
1352         int mdcounts[4];
1353         int ref_fb_idx;
1354
1355         vp8_find_near_mvs(xd, xd->mode_info_context,
1356                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1357
1358         vp8_build_uvmvs(xd, cpi->common.full_pixel);
1359
1360         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1361             ref_fb_idx = cpi->common.lst_fb_idx;
1362         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1363             ref_fb_idx = cpi->common.gld_fb_idx;
1364         else
1365             ref_fb_idx = cpi->common.alt_fb_idx;
1366
1367         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1368         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1369         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1370
1371         if (xd->mode_info_context->mbmi.mode == SPLITMV)
1372         {
1373             int i;
1374
1375             for (i = 0; i < 16; i++)
1376             {
1377                 if (xd->block[i].bmi.mode == NEW4X4)
1378                 {
1379                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1380                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1381                 }
1382             }
1383         }
1384         else if (xd->mode_info_context->mbmi.mode == NEWMV)
1385         {
1386             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1387             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1388         }
1389
1390         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1391         {
1392             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1393
1394             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1395             if (!cpi->common.mb_no_coeff_skip)
1396                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1397
1398         }
1399         else
1400             vp8_stuff_inter16x16(x);
1401     }
1402
1403     if (!x->skip)
1404         vp8_tokenize_mb(cpi, xd, t);
1405     else
1406     {
1407         if (cpi->common.mb_no_coeff_skip)
1408         {
1409             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1410                 xd->mode_info_context->mbmi.dc_diff = 0;
1411             else
1412                 xd->mode_info_context->mbmi.dc_diff = 1;
1413
1414             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1415             cpi->skip_true_count ++;
1416             vp8_fix_contexts(xd);
1417         }
1418         else
1419         {
1420             vp8_stuff_mb(cpi, xd, t);
1421             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1422             cpi->skip_false_count ++;
1423         }
1424     }
1425
1426     return rate;
1427 }