vp8/encoder/encodeframe.c

   1 /*
   2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
   3  *
   4  *  Use of this source code is governed by a BSD-style license
   5  *  that can be found in the LICENSE file in the root of the source
   6  *  tree. An additional intellectual property rights grant can be found
   7  *  in the file PATENTS.  All contributing project authors may
   8  *  be found in the AUTHORS file in the root of the source tree.
   9  */
  10
  11
  12 #include "vpx_ports/config.h"
  13 #include "encodemb.h"
  14 #include "encodemv.h"
  15 #include "common.h"
  16 #include "onyx_int.h"
  17 #include "extend.h"
  18 #include "entropymode.h"
  19 #include "quant_common.h"
  20 #include "segmentation.h"
  21 #include "setupintrarecon.h"
  22 #include "encodeintra.h"
  23 #include "reconinter.h"
  24 #include "rdopt.h"
  25 #include "pickinter.h"
  26 #include "findnearmv.h"
  27 #include "reconintra.h"
  28 #include <stdio.h>
  29 #include <limits.h>
  30 #include "subpixel.h"
  31 #include "vpx_ports/vpx_timer.h"
  32
  33 #if CONFIG_RUNTIME_CPU_DETECT
  34 #define RTCD(x)     &cpi->common.rtcd.x
  35 #define IF_RTCD(x)  (x)
  36 #else
  37 #define RTCD(x)     NULL
  38 #define IF_RTCD(x)  NULL
  39 #endif
  40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
  41
  42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
  43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
  44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
  45                                       MACROBLOCK *x,
  46                                       MB_ROW_COMP *mbr_ei,
  47                                       int mb_row,
  48                                       int count);
  49 void vp8_build_block_offsets(MACROBLOCK *x);
  50 void vp8_setup_block_ptrs(MACROBLOCK *x);
  51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
  52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
  53
  54 #ifdef MODE_STATS
  55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
  57 unsigned int inter_b_modes[15]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  58 unsigned int y_modes[5]   = {0, 0, 0, 0, 0};
  59 unsigned int uv_modes[4]  = {0, 0, 0, 0};
  60 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
  61 #endif
  62
  63 static const int qrounding_factors[129] =
  64 {
  65     48, 48, 48, 48, 48, 48, 48, 48,
  66     48, 48, 48, 48, 48, 48, 48, 48,
  67     48, 48, 48, 48, 48, 48, 48, 48,
  68     48, 48, 48, 48, 48, 48, 48, 48,
  69     48, 48, 48, 48, 48, 48, 48, 48,
  70     48, 48, 48, 48, 48, 48, 48, 48,
  71     48, 48, 48, 48, 48, 48, 48, 48,
  72     48, 48, 48, 48, 48, 48, 48, 48,
  73     48, 48, 48, 48, 48, 48, 48, 48,
  74     48, 48, 48, 48, 48, 48, 48, 48,
  75     48, 48, 48, 48, 48, 48, 48, 48,
  76     48, 48, 48, 48, 48, 48, 48, 48,
  77     48, 48, 48, 48, 48, 48, 48, 48,
  78     48, 48, 48, 48, 48, 48, 48, 48,
  79     48, 48, 48, 48, 48, 48, 48, 48,
  80     48, 48, 48, 48, 48, 48, 48, 48,
  81     48
  82 };
  83
  84 static const int qzbin_factors[129] =
  85 {
  86     84, 84, 84, 84, 84, 84, 84, 84,
  87     84, 84, 84, 84, 84, 84, 84, 84,
  88     84, 84, 84, 84, 84, 84, 84, 84,
  89     84, 84, 84, 84, 84, 84, 84, 84,
  90     84, 84, 84, 84, 84, 84, 84, 84,
  91     84, 84, 84, 84, 84, 84, 84, 84,
  92     80, 80, 80, 80, 80, 80, 80, 80,
  93     80, 80, 80, 80, 80, 80, 80, 80,
  94     80, 80, 80, 80, 80, 80, 80, 80,
  95     80, 80, 80, 80, 80, 80, 80, 80,
  96     80, 80, 80, 80, 80, 80, 80, 80,
  97     80, 80, 80, 80, 80, 80, 80, 80,
  98     80, 80, 80, 80, 80, 80, 80, 80,
  99     80, 80, 80, 80, 80, 80, 80, 80,
 100     80, 80, 80, 80, 80, 80, 80, 80,
 101     80, 80, 80, 80, 80, 80, 80, 80,
 102     80
 103 };
 104
 105 static const int qrounding_factors_y2[129] =
 106 {
 107     48, 48, 48, 48, 48, 48, 48, 48,
 108     48, 48, 48, 48, 48, 48, 48, 48,
 109     48, 48, 48, 48, 48, 48, 48, 48,
 110     48, 48, 48, 48, 48, 48, 48, 48,
 111     48, 48, 48, 48, 48, 48, 48, 48,
 112     48, 48, 48, 48, 48, 48, 48, 48,
 113     48, 48, 48, 48, 48, 48, 48, 48,
 114     48, 48, 48, 48, 48, 48, 48, 48,
 115     48, 48, 48, 48, 48, 48, 48, 48,
 116     48, 48, 48, 48, 48, 48, 48, 48,
 117     48, 48, 48, 48, 48, 48, 48, 48,
 118     48, 48, 48, 48, 48, 48, 48, 48,
 119     48, 48, 48, 48, 48, 48, 48, 48,
 120     48, 48, 48, 48, 48, 48, 48, 48,
 121     48, 48, 48, 48, 48, 48, 48, 48,
 122     48, 48, 48, 48, 48, 48, 48, 48,
 123     48
 124 };
 125
 126 static const int qzbin_factors_y2[129] =
 127 {
 128     84, 84, 84, 84, 84, 84, 84, 84,
 129     84, 84, 84, 84, 84, 84, 84, 84,
 130     84, 84, 84, 84, 84, 84, 84, 84,
 131     84, 84, 84, 84, 84, 84, 84, 84,
 132     84, 84, 84, 84, 84, 84, 84, 84,
 133     84, 84, 84, 84, 84, 84, 84, 84,
 134     80, 80, 80, 80, 80, 80, 80, 80,
 135     80, 80, 80, 80, 80, 80, 80, 80,
 136     80, 80, 80, 80, 80, 80, 80, 80,
 137     80, 80, 80, 80, 80, 80, 80, 80,
 138     80, 80, 80, 80, 80, 80, 80, 80,
 139     80, 80, 80, 80, 80, 80, 80, 80,
 140     80, 80, 80, 80, 80, 80, 80, 80,
 141     80, 80, 80, 80, 80, 80, 80, 80,
 142     80, 80, 80, 80, 80, 80, 80, 80,
 143     80, 80, 80, 80, 80, 80, 80, 80,
 144     80
 145 };
 146
 147 #define EXACT_QUANT
 148 #ifdef EXACT_QUANT
 149 static void vp8cx_invert_quant(int improved_quant, short *quant,
 150                                short *shift, short d)
 151 {
 152     if(improved_quant)
 153     {
 154         unsigned t;
 155         int l;
 156         t = d;
 157         for(l = 0; t > 1; l++)
 158             t>>=1;
 159         t = 1 + (1<<(16+l))/d;
 160         *quant = (short)(t - (1<<16));
 161         *shift = l;
 162     }
 163     else
 164     {
 165         *quant = (1 << 16) / d;
 166         *shift = 0;
 167     }
 168 }
 169
 170 void vp8cx_init_quantizer(VP8_COMP *cpi)
 171 {
 172     int i;
 173     int quant_val;
 174     int Q;
 175
 176     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 177
 178     for (Q = 0; Q < QINDEX_RANGE; Q++)
 179     {
 180         // dc values
 181         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 182         cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
 183         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
 184                            cpi->Y1quant_shift[Q] + 0, quant_val);
 185         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 186         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 187         cpi->common.Y1dequant[Q][0] = quant_val;
 188         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 189
 190         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 191         cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
 192         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
 193                            cpi->Y2quant_shift[Q] + 0, quant_val);
 194         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 195         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 196         cpi->common.Y2dequant[Q][0] = quant_val;
 197         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 198
 199         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 200         cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
 201         vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
 202                            cpi->UVquant_shift[Q] + 0, quant_val);
 203         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 204         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 205         cpi->common.UVdequant[Q][0] = quant_val;
 206         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 207
 208         // all the ac values = ;
 209         for (i = 1; i < 16; i++)
 210         {
 211             int rc = vp8_default_zig_zag1d[i];
 212
 213             quant_val = vp8_ac_yquant(Q);
 214             cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
 215             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
 216                                cpi->Y1quant_shift[Q] + rc, quant_val);
 217             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 218             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 219             cpi->common.Y1dequant[Q][rc] = quant_val;
 220             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 221
 222             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 223             cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
 224             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
 225                                cpi->Y2quant_shift[Q] + rc, quant_val);
 226             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 227             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 228             cpi->common.Y2dequant[Q][rc] = quant_val;
 229             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 230
 231             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 232             cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
 233             vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
 234                                cpi->UVquant_shift[Q] + rc, quant_val);
 235             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 236             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 237             cpi->common.UVdequant[Q][rc] = quant_val;
 238             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 239         }
 240     }
 241 }
 242 #else
 243 void vp8cx_init_quantizer(VP8_COMP *cpi)
 244 {
 245     int i;
 246     int quant_val;
 247     int Q;
 248
 249     int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
 250
 251     for (Q = 0; Q < QINDEX_RANGE; Q++)
 252     {
 253         // dc values
 254         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
 255         cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
 256         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 257         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 258         cpi->common.Y1dequant[Q][0] = quant_val;
 259         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 260
 261         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
 262         cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
 263         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 264         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 265         cpi->common.Y2dequant[Q][0] = quant_val;
 266         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 267
 268         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
 269         cpi->UVquant[Q][0] = (1 << 16) / quant_val;
 270         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
 271         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
 272         cpi->common.UVdequant[Q][0] = quant_val;
 273         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 274
 275         // all the ac values = ;
 276         for (i = 1; i < 16; i++)
 277         {
 278             int rc = vp8_default_zig_zag1d[i];
 279
 280             quant_val = vp8_ac_yquant(Q);
 281             cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
 282             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 283             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 284             cpi->common.Y1dequant[Q][rc] = quant_val;
 285             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 286
 287             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
 288             cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
 289             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
 290             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
 291             cpi->common.Y2dequant[Q][rc] = quant_val;
 292             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 293
 294             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
 295             cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
 296             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
 297             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
 298             cpi->common.UVdequant[Q][rc] = quant_val;
 299             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 300         }
 301     }
 302 }
 303 #endif
 304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 305 {
 306     int i;
 307     int QIndex;
 308     MACROBLOCKD *xd = &x->e_mbd;
 309     int zbin_extra;
 310
 311     // Select the baseline MB Q index.
 312     if (xd->segmentation_enabled)
 313     {
 314         // Abs Value
 315         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
 316
 317             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 318         // Delta Value
 319         else
 320         {
 321             QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
 322             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
 323         }
 324     }
 325     else
 326         QIndex = cpi->common.base_qindex;
 327
 328     // Y
 329     zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 330
 331     for (i = 0; i < 16; i++)
 332     {
 333         x->block[i].quant = cpi->Y1quant[QIndex];
 334         x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
 335         x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
 336         x->block[i].zbin = cpi->Y1zbin[QIndex];
 337         x->block[i].round = cpi->Y1round[QIndex];
 338         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
 339         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
 340         x->block[i].zbin_extra = (short)zbin_extra;
 341     }
 342
 343     // UV
 344     zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 345
 346     for (i = 16; i < 24; i++)
 347     {
 348         x->block[i].quant = cpi->UVquant[QIndex];
 349         x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
 350         x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
 351         x->block[i].zbin = cpi->UVzbin[QIndex];
 352         x->block[i].round = cpi->UVround[QIndex];
 353         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
 354         x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
 355         x->block[i].zbin_extra = (short)zbin_extra;
 356     }
 357
 358     // Y2
 359     zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
 360     x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
 361     x->block[24].quant = cpi->Y2quant[QIndex];
 362     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
 363     x->block[24].zbin = cpi->Y2zbin[QIndex];
 364     x->block[24].round = cpi->Y2round[QIndex];
 365     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
 366     x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
 367     x->block[24].zbin_extra = (short)zbin_extra;
 368 }
 369
 370 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 371 {
 372     // Clear Zbin mode boost for default case
 373     cpi->zbin_mode_boost = 0;
 374
 375     // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
 376     // when these values are not all zero.
 377     if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
 378     {
 379         vp8cx_init_quantizer(cpi);
 380     }
 381
 382     // MB level quantizer setup
 383     vp8cx_mb_init_quantizer(cpi, &cpi->mb);
 384 }
 385
 386
 387 /* activity_avg must be positive, or flat regions could get a zero weight
 388  *  (infinite lambda), which confounds analysis.
 389  * This also avoids the need for divide by zero checks in
 390  *  vp8_activity_masking().
 391  */
 392 #define VP8_ACTIVITY_AVG_MIN (64)
 393
 394 /* This is used as a reference when computing the source variance for the
 395  *  purposes of activity masking.
 396  * Eventually this should be replaced by custom no-reference routines,
 397  *  which will be faster.
 398  */
 399 static const unsigned char VP8_VAR_OFFS[16]=
 400 {
 401     128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
 402 };
 403
 404 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
 405 {
 406     unsigned int act;
 407     unsigned int sse;
 408     int sum;
 409     unsigned int a;
 410     unsigned int b;
 411     /* TODO: This could also be done over smaller areas (8x8), but that would
 412      *  require extensive changes elsewhere, as lambda is assumed to be fixed
 413      *  over an entire MB in most of the code.
 414      * Another option is to compute four 8x8 variances, and pick a single
 415      *  lambda using a non-linear combination (e.g., the smallest, or second
 416      *  smallest, etc.).
 417      */
 418     VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
 419      x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
 420     /* This requires a full 32 bits of precision. */
 421     act = (sse<<8) - sum*sum;
 422     /* Drop 4 to give us some headroom to work with. */
 423     act = (act + 8) >> 4;
 424     /* If the region is flat, lower the activity some more. */
 425     if (act < 8<<12)
 426         act = act < 5<<12 ? act : 5<<12;
 427     /* TODO: For non-flat regions, edge regions should receive less masking
 428      *  than textured regions, but identifying edge regions quickly and
 429      *  reliably enough is still a subject of experimentation.
 430      * This will be most noticable near edges with a complex shape (e.g.,
 431      *  text), but the 4x4 transform size should make this less of a problem
 432      *  than it would be for an 8x8 transform.
 433      */
 434     /* Apply the masking to the RD multiplier. */
 435     a = act + 4*cpi->activity_avg;
 436     b = 4*act + cpi->activity_avg;
 437     x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
 438     return act;
 439 }
 440
 441
 442
 443 static
 444 void encode_mb_row(VP8_COMP *cpi,
 445                    VP8_COMMON *cm,
 446                    int mb_row,
 447                    MACROBLOCK  *x,
 448                    MACROBLOCKD *xd,
 449                    TOKENEXTRA **tp,
 450                    int *segment_counts,
 451                    int *totalrate)
 452 {
 453     INT64 activity_sum = 0;
 454     int i;
 455     int recon_yoffset, recon_uvoffset;
 456     int mb_col;
 457     int ref_fb_idx = cm->lst_fb_idx;
 458     int dst_fb_idx = cm->new_fb_idx;
 459     int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
 460     int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
 461     int seg_map_index = (mb_row * cpi->common.mb_cols);
 462
 463
 464     // reset above block coeffs
 465     xd->above_context = cm->above_context;
 466
 467     xd->up_available = (mb_row != 0);
 468     recon_yoffset = (mb_row * recon_y_stride * 16);
 469     recon_uvoffset = (mb_row * recon_uv_stride * 8);
 470
 471     cpi->tplist[mb_row].start = *tp;
 472     //printf("Main mb_row = %d\n", mb_row);
 473
 474     // Distance of Mb to the top & bottom edges, specified in 1/8th pel
 475     // units as they are always compared to values that are in 1/8th pel units
 476     xd->mb_to_top_edge = -((mb_row * 16) << 3);
 477     xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
 478
 479     // Set up limit values for vertical motion vector components
 480     // to prevent them extending beyond the UMV borders
 481     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
 482     x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
 483                         + (VP8BORDERINPIXELS - 16);
 484
 485     // for each macroblock col in image
 486     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
 487     {
 488         // Distance of Mb to the left & right edges, specified in
 489         // 1/8th pel units as they are always compared to values
 490         // that are in 1/8th pel units
 491         xd->mb_to_left_edge = -((mb_col * 16) << 3);
 492         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
 493
 494         // Set up limit values for horizontal motion vector components
 495         // to prevent them extending beyond the UMV borders
 496         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
 497         x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
 498                             + (VP8BORDERINPIXELS - 16);
 499
 500         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
 501         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
 502         xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 503         xd->left_available = (mb_col != 0);
 504
 505         x->rddiv = cpi->RDDIV;
 506         x->rdmult = cpi->RDMULT;
 507
 508         if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
 509             activity_sum += vp8_activity_masking(cpi, x);
 510
 511         // Is segmentation enabled
 512         // MB level adjutment to quantizer
 513         if (xd->segmentation_enabled)
 514         {
 515             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
 516             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
 517                 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
 518             else
 519                 xd->mode_info_context->mbmi.segment_id = 0;
 520
 521             vp8cx_mb_init_quantizer(cpi, x);
 522         }
 523         else
 524             xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 525
 526         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 527
 528         if (cm->frame_type == KEY_FRAME)
 529         {
 530             *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
 531 #ifdef MODE_STATS
 532             y_modes[xd->mbmi.mode] ++;
 533 #endif
 534         }
 535         else
 536         {
 537             *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
 538
 539 #ifdef MODE_STATS
 540             inter_y_modes[xd->mbmi.mode] ++;
 541
 542             if (xd->mbmi.mode == SPLITMV)
 543             {
 544                 int b;
 545
 546                 for (b = 0; b < xd->mbmi.partition_count; b++)
 547                 {
 548                     inter_b_modes[x->partition->bmi[b].mode] ++;
 549                 }
 550             }
 551
 552 #endif
 553
 554             // Count of last ref frame 0,0 useage
 555             if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 556                 cpi->inter_zz_count ++;
 557
 558             // Special case code for cyclic refresh
 559             // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
 560             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
 561             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
 562             {
 563                 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 564
 565                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
 566                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
 567                 // else mark it as dirty (1).
 568                 if (xd->mode_info_context->mbmi.segment_id)
 569                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
 570                 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
 571                 {
 572                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
 573                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
 574                 }
 575                 else
 576                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
 577
 578             }
 579         }
 580
 581         cpi->tplist[mb_row].stop = *tp;
 582
 583         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 584
 585         for (i = 0; i < 16; i++)
 586             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 587
 588         // adjust to the next column of macroblocks
 589         x->src.y_buffer += 16;
 590         x->src.u_buffer += 8;
 591         x->src.v_buffer += 8;
 592
 593         recon_yoffset += 16;
 594         recon_uvoffset += 8;
 595
 596         // Keep track of segment useage
 597         segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 598
 599         // skip to next mb
 600         xd->mode_info_context++;
 601         x->partition_info++;
 602
 603         xd->above_context++;
 604         cpi->current_mb_col_main = mb_col;
 605     }
 606
 607     //extend the recon for intra prediction
 608     vp8_extend_mb_row(
 609         &cm->yv12_fb[dst_fb_idx],
 610         xd->dst.y_buffer + 16,
 611         xd->dst.u_buffer + 8,
 612         xd->dst.v_buffer + 8);
 613
 614     // this is to account for the border
 615     xd->mode_info_context++;
 616     x->partition_info++;
 617     x->activity_sum += activity_sum;
 618 }
 619
 620
 621
 622
 623
 624 void vp8_encode_frame(VP8_COMP *cpi)
 625 {
 626     int mb_row;
 627     MACROBLOCK *const x = & cpi->mb;
 628     VP8_COMMON *const cm = & cpi->common;
 629     MACROBLOCKD *const xd = & x->e_mbd;
 630
 631     TOKENEXTRA *tp = cpi->tok;
 632     int segment_counts[MAX_MB_SEGMENTS];
 633     int totalrate;
 634
 635     // Functions setup for all frame types so we can use MC in AltRef
 636     if (cm->mcomp_filter_type == SIXTAP)
 637     {
 638         xd->subpixel_predict        = SUBPIX_INVOKE(
 639                                         &cpi->common.rtcd.subpix, sixtap4x4);
 640         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 641                                         &cpi->common.rtcd.subpix, sixtap8x4);
 642         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 643                                         &cpi->common.rtcd.subpix, sixtap8x8);
 644         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 645                                         &cpi->common.rtcd.subpix, sixtap16x16);
 646     }
 647     else
 648     {
 649         xd->subpixel_predict        = SUBPIX_INVOKE(
 650                                         &cpi->common.rtcd.subpix, bilinear4x4);
 651         xd->subpixel_predict8x4     = SUBPIX_INVOKE(
 652                                         &cpi->common.rtcd.subpix, bilinear8x4);
 653         xd->subpixel_predict8x8     = SUBPIX_INVOKE(
 654                                         &cpi->common.rtcd.subpix, bilinear8x8);
 655         xd->subpixel_predict16x16   = SUBPIX_INVOKE(
 656                                       &cpi->common.rtcd.subpix, bilinear16x16);
 657     }
 658
 659     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 660
 661     x->vector_range = 32;
 662
 663     // Count of MBs using the alternate Q if any
 664     cpi->alt_qcount = 0;
 665
 666     // Reset frame count of inter 0,0 motion vector useage.
 667     cpi->inter_zz_count = 0;
 668
 669     vpx_memset(segment_counts, 0, sizeof(segment_counts));
 670
 671     cpi->prediction_error = 0;
 672     cpi->intra_error = 0;
 673     cpi->skip_true_count = 0;
 674     cpi->skip_false_count = 0;
 675
 676 #if 0
 677     // Experimental code
 678     cpi->frame_distortion = 0;
 679     cpi->last_mb_distortion = 0;
 680 #endif
 681
 682     totalrate = 0;
 683
 684     x->partition_info = x->pi;
 685
 686     xd->mode_info_context = cm->mi;
 687     xd->mode_info_stride = cm->mode_info_stride;
 688
 689     xd->frame_type = cm->frame_type;
 690
 691     xd->frames_since_golden = cm->frames_since_golden;
 692     xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 693     vp8_zero(cpi->MVcount);
 694     // vp8_zero( Contexts)
 695     vp8_zero(cpi->coef_counts);
 696
 697     // reset intra mode contexts
 698     if (cm->frame_type == KEY_FRAME)
 699         vp8_init_mbmode_probs(cm);
 700
 701
 702     vp8cx_frame_init_quantizer(cpi);
 703
 704     if (cpi->compressor_speed == 2)
 705     {
 706         if (cpi->oxcf.cpu_used < 0)
 707             cpi->Speed = -(cpi->oxcf.cpu_used);
 708         else
 709             vp8_auto_select_speed(cpi);
 710     }
 711
 712     vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
 713     vp8cx_initialize_me_consts(cpi, cm->base_qindex);
 714
 715     // Copy data over into macro block data sturctures.
 716
 717     x->src = * cpi->Source;
 718     xd->pre = cm->yv12_fb[cm->lst_fb_idx];
 719     xd->dst = cm->yv12_fb[cm->new_fb_idx];
 720
 721     // set up frame new frame for intra coded blocks
 722
 723     vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 724
 725     vp8_build_block_offsets(x);
 726
 727     vp8_setup_block_dptrs(&x->e_mbd);
 728
 729     vp8_setup_block_ptrs(x);
 730
 731     x->activity_sum = 0;
 732
 733     xd->mode_info_context->mbmi.mode = DC_PRED;
 734     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 735
 736     xd->left_context = &cm->left_context;
 737
 738     vp8_zero(cpi->count_mb_ref_frame_usage)
 739     vp8_zero(cpi->ymode_count)
 740     vp8_zero(cpi->uv_mode_count)
 741
 742     x->mvc = cm->fc.mvc;
 743
 744     vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 745
 746     {
 747         struct vpx_usec_timer  emr_timer;
 748         vpx_usec_timer_start(&emr_timer);
 749
 750         if (!cpi->b_multi_threaded)
 751         {
 752             // for each macroblock row in image
 753             for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
 754             {
 755
 756                 vp8_zero(cm->left_context)
 757
 758                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 759
 760                 // adjust to the next row of mbs
 761                 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 762                 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 763                 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 764             }
 765
 766             cpi->tok_count = tp - cpi->tok;
 767
 768         }
 769         else
 770         {
 771 #if CONFIG_MULTITHREAD
 772             int i;
 773
 774             vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1,  cpi->encoding_thread_count);
 775
 776             for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
 777             {
 778                 cpi->current_mb_col_main = -1;
 779
 780                 for (i = 0; i < cpi->encoding_thread_count; i++)
 781                 {
 782                     if ((mb_row + i + 1) >= cm->mb_rows)
 783                         break;
 784
 785                     cpi->mb_row_ei[i].mb_row = mb_row + i + 1;
 786                     cpi->mb_row_ei[i].tp  = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24);
 787                     cpi->mb_row_ei[i].current_mb_col = -1;
 788                     //SetEvent(cpi->h_event_mbrencoding[i]);
 789                     sem_post(&cpi->h_event_mbrencoding[i]);
 790                 }
 791
 792                 vp8_zero(cm->left_context)
 793
 794                 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 795
 796                 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 797
 798                 // adjust to the next row of mbs
 799                 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
 800                 x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 801                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 802
 803                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
 804                 x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 805
 806                 if (mb_row < cm->mb_rows - 1)
 807                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
 808                     sem_wait(&cpi->h_event_main);
 809             }
 810
 811             /*
 812             for( ;mb_row<cm->mb_rows; mb_row ++)
 813             {
 814             vp8_zero( cm->left_context)
 815
 816             tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
 817
 818             encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
 819             // adjust to the next row of mbs
 820             x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
 821             x->src.u_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 822             x->src.v_buffer +=  8 * x->src.uv_stride - 8 * cm->mb_cols;
 823
 824             }
 825             */
 826             cpi->tok_count = 0;
 827
 828             for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
 829             {
 830                 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
 831             }
 832
 833             if (xd->segmentation_enabled)
 834             {
 835
 836                 int i, j;
 837
 838                 if (xd->segmentation_enabled)
 839                 {
 840
 841                     for (i = 0; i < cpi->encoding_thread_count; i++)
 842                     {
 843                         for (j = 0; j < 4; j++)
 844                             segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
 845                     }
 846                 }
 847
 848             }
 849
 850             for (i = 0; i < cpi->encoding_thread_count; i++)
 851             {
 852                 totalrate += cpi->mb_row_ei[i].totalrate;
 853             }
 854
 855             for (i = 0; i < cpi->encoding_thread_count; i++)
 856             {
 857                 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
 858             }
 859
 860 #endif
 861
 862         }
 863
 864         vpx_usec_timer_mark(&emr_timer);
 865         cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
 866
 867     }
 868
 869
 870     // Work out the segment probabilites if segmentation is enabled
 871     if (xd->segmentation_enabled)
 872     {
 873         int tot_count;
 874         int i;
 875
 876         // Set to defaults
 877         vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
 878
 879         tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
 880
 881         if (tot_count)
 882         {
 883             xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
 884
 885             tot_count = segment_counts[0] + segment_counts[1];
 886
 887             if (tot_count > 0)
 888             {
 889                 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
 890             }
 891
 892             tot_count = segment_counts[2] + segment_counts[3];
 893
 894             if (tot_count > 0)
 895                 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
 896
 897             // Zero probabilities not allowed
 898             for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
 899             {
 900                 if (xd->mb_segment_tree_probs[i] == 0)
 901                     xd->mb_segment_tree_probs[i] = 1;
 902             }
 903         }
 904     }
 905
 906     // 256 rate units to the bit
 907     cpi->projected_frame_size = totalrate >> 8;   // projected_frame_size in units of BYTES
 908
 909     // Make a note of the percentage MBs coded Intra.
 910     if (cm->frame_type == KEY_FRAME)
 911     {
 912         cpi->this_frame_percent_intra = 100;
 913     }
 914     else
 915     {
 916         int tot_modes;
 917
 918         tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
 919                     + cpi->count_mb_ref_frame_usage[LAST_FRAME]
 920                     + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
 921                     + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
 922
 923         if (tot_modes)
 924             cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
 925
 926     }
 927
 928 #if 0
 929     {
 930         int cnt = 0;
 931         int flag[2] = {0, 0};
 932
 933         for (cnt = 0; cnt < MVPcount; cnt++)
 934         {
 935             if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
 936             {
 937                 flag[0] = 1;
 938                 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
 939                 break;
 940             }
 941         }
 942
 943         for (cnt = 0; cnt < MVPcount; cnt++)
 944         {
 945             if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
 946             {
 947                 flag[1] = 1;
 948                 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
 949                 break;
 950             }
 951         }
 952
 953         if (flag[0] || flag[1])
 954             vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
 955     }
 956 #endif
 957
 958     // Adjust the projected reference frame useage probability numbers to reflect
 959     // what we have just seen. This may be usefull when we make multiple itterations
 960     // of the recode loop rather than continuing to use values from the previous frame.
 961     if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
 962     {
 963         const int *const rfct = cpi->count_mb_ref_frame_usage;
 964         const int rf_intra = rfct[INTRA_FRAME];
 965         const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
 966
 967         if ((rf_intra + rf_inter) > 0)
 968         {
 969             cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
 970
 971             if (cpi->prob_intra_coded < 1)
 972                 cpi->prob_intra_coded = 1;
 973
 974             if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
 975             {
 976                 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
 977
 978                 if (cpi->prob_last_coded < 1)
 979                     cpi->prob_last_coded = 1;
 980
 981                 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
 982                                      ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
 983
 984                 if (cpi->prob_gf_coded < 1)
 985                     cpi->prob_gf_coded = 1;
 986             }
 987         }
 988     }
 989
 990 #if 0
 991     // Keep record of the total distortion this time around for future use
 992     cpi->last_frame_distortion = cpi->frame_distortion;
 993 #endif
 994
 995     /* Update the average activity for the next frame.
 996      * This is feed-forward for now; it could also be saved in two-pass, or
 997      *  done during lookahead when that is eventually added.
 998      */
 999     cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1000     if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1001         cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1002
1003 }
1004 void vp8_setup_block_ptrs(MACROBLOCK *x)
1005 {
1006     int r, c;
1007     int i;
1008
1009     for (r = 0; r < 4; r++)
1010     {
1011         for (c = 0; c < 4; c++)
1012         {
1013             x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1014         }
1015     }
1016
1017     for (r = 0; r < 2; r++)
1018     {
1019         for (c = 0; c < 2; c++)
1020         {
1021             x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1022         }
1023     }
1024
1025
1026     for (r = 0; r < 2; r++)
1027     {
1028         for (c = 0; c < 2; c++)
1029         {
1030             x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1031         }
1032     }
1033
1034     x->block[24].src_diff = x->src_diff + 384;
1035
1036
1037     for (i = 0; i < 25; i++)
1038     {
1039         x->block[i].coeff = x->coeff + i * 16;
1040     }
1041 }
1042
1043 void vp8_build_block_offsets(MACROBLOCK *x)
1044 {
1045     int block = 0;
1046     int br, bc;
1047
1048     vp8_build_block_doffsets(&x->e_mbd);
1049
1050     // y blocks
1051     for (br = 0; br < 4; br++)
1052     {
1053         for (bc = 0; bc < 4; bc++)
1054         {
1055             BLOCK *this_block = &x->block[block];
1056             this_block->base_src = &x->src.y_buffer;
1057             this_block->src_stride = x->src.y_stride;
1058             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1059             ++block;
1060         }
1061     }
1062
1063     // u blocks
1064     for (br = 0; br < 2; br++)
1065     {
1066         for (bc = 0; bc < 2; bc++)
1067         {
1068             BLOCK *this_block = &x->block[block];
1069             this_block->base_src = &x->src.u_buffer;
1070             this_block->src_stride = x->src.uv_stride;
1071             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1072             ++block;
1073         }
1074     }
1075
1076     // v blocks
1077     for (br = 0; br < 2; br++)
1078     {
1079         for (bc = 0; bc < 2; bc++)
1080         {
1081             BLOCK *this_block = &x->block[block];
1082             this_block->base_src = &x->src.v_buffer;
1083             this_block->src_stride = x->src.uv_stride;
1084             this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1085             ++block;
1086         }
1087     }
1088 }
1089
1090 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1091 {
1092     const MACROBLOCKD *xd = & x->e_mbd;
1093     const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1094     const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1095
1096 #ifdef MODE_STATS
1097     const int is_key = cpi->common.frame_type == KEY_FRAME;
1098
1099     ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1100
1101     if (m == B_PRED)
1102     {
1103         unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1104
1105         int b = 0;
1106
1107         do
1108         {
1109             ++ bct[xd->block[b].bmi.mode];
1110         }
1111         while (++b < 16);
1112     }
1113
1114 #endif
1115
1116     ++cpi->ymode_count[m];
1117     ++cpi->uv_mode_count[uvm];
1118
1119 }
1120 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1121 {
1122     int Error4x4, Error16x16, error_uv;
1123     B_PREDICTION_MODE intra_bmodes[16];
1124     int rate4x4, rate16x16, rateuv;
1125     int dist4x4, dist16x16, distuv;
1126     int rate = 0;
1127     int rate4x4_tokenonly = 0;
1128     int rate16x16_tokenonly = 0;
1129     int rateuv_tokenonly = 0;
1130     int i;
1131
1132     x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1133
1134 #if !(CONFIG_REALTIME_ONLY)
1135
1136     if (cpi->sf.RD || cpi->compressor_speed != 2)
1137     {
1138         Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
1139
1140         //save the b modes for possible later use
1141         for (i = 0; i < 16; i++)
1142             intra_bmodes[i] = x->e_mbd.block[i].bmi.mode;
1143
1144         Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1145
1146         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1147
1148         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1149         rate += rateuv;
1150
1151         if (Error4x4 < Error16x16)
1152         {
1153             rate += rate4x4;
1154             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1155
1156             // get back the intra block modes
1157             for (i = 0; i < 16; i++)
1158                 x->e_mbd.block[i].bmi.mode = intra_bmodes[i];
1159
1160             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1161             cpi->prediction_error += Error4x4 ;
1162 #if 0
1163             // Experimental RD code
1164             cpi->frame_distortion += dist4x4;
1165 #endif
1166         }
1167         else
1168         {
1169             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1170             rate += rate16x16;
1171
1172 #if 0
1173             // Experimental RD code
1174             cpi->prediction_error += Error16x16;
1175             cpi->frame_distortion += dist16x16;
1176 #endif
1177         }
1178
1179         sum_intra_stats(cpi, x);
1180
1181         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1182     }
1183     else
1184 #endif
1185     {
1186
1187         int rate2, distortion2;
1188         MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1189         int this_rd;
1190         Error16x16 = INT_MAX;
1191
1192         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1193         {
1194             x->e_mbd.mode_info_context->mbmi.mode = mode;
1195             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
1196             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1197             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
1198             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1199
1200             if (Error16x16 > this_rd)
1201             {
1202                 Error16x16 = this_rd;
1203                 best_mode = mode;
1204             }
1205         }
1206
1207         vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &distortion2);
1208
1209         if (distortion2 == INT_MAX)
1210             Error4x4 = INT_MAX;
1211         else
1212             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1213
1214         if (Error4x4 < Error16x16)
1215         {
1216             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1217             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1218             cpi->prediction_error += Error4x4;
1219         }
1220         else
1221         {
1222             x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1223             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1224             cpi->prediction_error += Error16x16;
1225         }
1226
1227         vp8_pick_intra_mbuv_mode(x);
1228         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1229         sum_intra_stats(cpi, x);
1230         vp8_tokenize_mb(cpi, &x->e_mbd, t);
1231     }
1232
1233     return rate;
1234 }
1235 #ifdef SPEEDSTATS
1236 extern int cnt_pm;
1237 #endif
1238
1239 extern void vp8_fix_contexts(MACROBLOCKD *x);
1240
1241 int vp8cx_encode_inter_macroblock
1242 (
1243     VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1244     int recon_yoffset, int recon_uvoffset
1245 )
1246 {
1247     MACROBLOCKD *const xd = &x->e_mbd;
1248     int inter_error;
1249     int intra_error = 0;
1250     int rate;
1251     int distortion;
1252
1253     x->skip = 0;
1254
1255     if (xd->segmentation_enabled)
1256         x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1257     else
1258         x->encode_breakout = cpi->oxcf.encode_breakout;
1259
1260 #if !(CONFIG_REALTIME_ONLY)
1261
1262     if (cpi->sf.RD)
1263     {
1264         /* Are we using the fast quantizer for the mode selection? */
1265         if(cpi->sf.use_fastquant_for_pick)
1266             cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1267
1268         inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1269
1270         /* switch back to the regular quantizer for the encode */
1271         if (cpi->sf.improved_quant)
1272         {
1273             cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1274         }
1275
1276     }
1277     else
1278 #endif
1279         inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1280
1281
1282     cpi->prediction_error += inter_error;
1283     cpi->intra_error += intra_error;
1284
1285 #if 0
1286     // Experimental RD code
1287     cpi->frame_distortion += distortion;
1288     cpi->last_mb_distortion = distortion;
1289 #endif
1290
1291     // MB level adjutment to quantizer setup
1292     if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled)
1293     {
1294         // If cyclic update enabled
1295         if (cpi->cyclic_refresh_mode_enabled)
1296         {
1297             // Clear segment_id back to 0 if not coded (last frame 0,0)
1298             if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1299                 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1300             {
1301                 xd->mode_info_context->mbmi.segment_id = 0;
1302             }
1303         }
1304
1305         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1306         if (cpi->zbin_mode_boost_enabled)
1307         {
1308             if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1309                  cpi->zbin_mode_boost = 0;
1310             else
1311             {
1312                 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1313                 {
1314                     if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1315                         cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1316                     else
1317                         cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1318                 }
1319                 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1320                     cpi->zbin_mode_boost = 0;
1321                 else
1322                     cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1323             }
1324         }
1325         else
1326             cpi->zbin_mode_boost = 0;
1327
1328         vp8cx_mb_init_quantizer(cpi,  x);
1329     }
1330
1331     cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1332
1333     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1334     {
1335         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1336
1337         if (xd->mode_info_context->mbmi.mode == B_PRED)
1338         {
1339             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1340         }
1341         else
1342         {
1343             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1344         }
1345
1346         sum_intra_stats(cpi, x);
1347     }
1348     else
1349     {
1350         MV best_ref_mv;
1351         MV nearest, nearby;
1352         int mdcounts[4];
1353         int ref_fb_idx;
1354
1355         vp8_find_near_mvs(xd, xd->mode_info_context,
1356                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1357
1358         vp8_build_uvmvs(xd, cpi->common.full_pixel);
1359
1360         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1361             ref_fb_idx = cpi->common.lst_fb_idx;
1362         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1363             ref_fb_idx = cpi->common.gld_fb_idx;
1364         else
1365             ref_fb_idx = cpi->common.alt_fb_idx;
1366
1367         xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1368         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1369         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1370
1371         if (xd->mode_info_context->mbmi.mode == SPLITMV)
1372         {
1373             int i;
1374
1375             for (i = 0; i < 16; i++)
1376             {
1377                 if (xd->block[i].bmi.mode == NEW4X4)
1378                 {
1379                     cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1380                     cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1381                 }
1382             }
1383         }
1384         else if (xd->mode_info_context->mbmi.mode == NEWMV)
1385         {
1386             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1387             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1388         }
1389
1390         if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1391         {
1392             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1393
1394             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1395             if (!cpi->common.mb_no_coeff_skip)
1396                 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1397
1398         }
1399         else
1400             vp8_stuff_inter16x16(x);
1401     }
1402
1403     if (!x->skip)
1404         vp8_tokenize_mb(cpi, xd, t);
1405     else
1406     {
1407         if (cpi->common.mb_no_coeff_skip)
1408         {
1409             if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1410                 xd->mode_info_context->mbmi.dc_diff = 0;
1411             else
1412                 xd->mode_info_context->mbmi.dc_diff = 1;
1413
1414             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1415             cpi->skip_true_count ++;
1416             vp8_fix_contexts(xd);
1417         }
1418         else
1419         {
1420             vp8_stuff_mb(cpi, xd, t);
1421             xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1422             cpi->skip_false_count ++;
1423         }
1424     }
1425
1426     return rate;
1427 }