move new neon subpixel function
[libvpx.git] / vp8 / encoder / encodeframe.c
blob4c95f28d64ca54504eab688a836d51bbe856fc1d
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include "vpx_ports/config.h"
13 #include "encodemb.h"
14 #include "encodemv.h"
15 #include "common.h"
16 #include "onyx_int.h"
17 #include "extend.h"
18 #include "entropymode.h"
19 #include "quant_common.h"
20 #include "segmentation.h"
21 #include "setupintrarecon.h"
22 #include "encodeintra.h"
23 #include "reconinter.h"
24 #include "rdopt.h"
25 #include "pickinter.h"
26 #include "findnearmv.h"
27 #include "reconintra.h"
28 #include <stdio.h>
29 #include <limits.h>
30 #include "subpixel.h"
31 #include "vpx_ports/vpx_timer.h"
33 #if CONFIG_RUNTIME_CPU_DETECT
34 #define RTCD(x) &cpi->common.rtcd.x
35 #define IF_RTCD(x) (x)
36 #else
37 #define RTCD(x) NULL
38 #define IF_RTCD(x) NULL
39 #endif
40 extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
42 extern void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex);
43 extern void vp8_auto_select_speed(VP8_COMP *cpi);
44 extern void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
45 MACROBLOCK *x,
46 MB_ROW_COMP *mbr_ei,
47 int mb_row,
48 int count);
49 void vp8_build_block_offsets(MACROBLOCK *x);
50 void vp8_setup_block_ptrs(MACROBLOCK *x);
51 int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset);
52 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t);
54 #ifdef MODE_STATS
55 unsigned int inter_y_modes[10] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
56 unsigned int inter_uv_modes[4] = {0, 0, 0, 0};
57 unsigned int inter_b_modes[15] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
58 unsigned int y_modes[5] = {0, 0, 0, 0, 0};
59 unsigned int uv_modes[4] = {0, 0, 0, 0};
60 unsigned int b_modes[14] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
61 #endif
63 static const int qrounding_factors[129] =
65 48, 48, 48, 48, 48, 48, 48, 48,
66 48, 48, 48, 48, 48, 48, 48, 48,
67 48, 48, 48, 48, 48, 48, 48, 48,
68 48, 48, 48, 48, 48, 48, 48, 48,
69 48, 48, 48, 48, 48, 48, 48, 48,
70 48, 48, 48, 48, 48, 48, 48, 48,
71 48, 48, 48, 48, 48, 48, 48, 48,
72 48, 48, 48, 48, 48, 48, 48, 48,
73 48, 48, 48, 48, 48, 48, 48, 48,
74 48, 48, 48, 48, 48, 48, 48, 48,
75 48, 48, 48, 48, 48, 48, 48, 48,
76 48, 48, 48, 48, 48, 48, 48, 48,
77 48, 48, 48, 48, 48, 48, 48, 48,
78 48, 48, 48, 48, 48, 48, 48, 48,
79 48, 48, 48, 48, 48, 48, 48, 48,
80 48, 48, 48, 48, 48, 48, 48, 48,
84 static const int qzbin_factors[129] =
86 84, 84, 84, 84, 84, 84, 84, 84,
87 84, 84, 84, 84, 84, 84, 84, 84,
88 84, 84, 84, 84, 84, 84, 84, 84,
89 84, 84, 84, 84, 84, 84, 84, 84,
90 84, 84, 84, 84, 84, 84, 84, 84,
91 84, 84, 84, 84, 84, 84, 84, 84,
92 80, 80, 80, 80, 80, 80, 80, 80,
93 80, 80, 80, 80, 80, 80, 80, 80,
94 80, 80, 80, 80, 80, 80, 80, 80,
95 80, 80, 80, 80, 80, 80, 80, 80,
96 80, 80, 80, 80, 80, 80, 80, 80,
97 80, 80, 80, 80, 80, 80, 80, 80,
98 80, 80, 80, 80, 80, 80, 80, 80,
99 80, 80, 80, 80, 80, 80, 80, 80,
100 80, 80, 80, 80, 80, 80, 80, 80,
101 80, 80, 80, 80, 80, 80, 80, 80,
105 static const int qrounding_factors_y2[129] =
107 48, 48, 48, 48, 48, 48, 48, 48,
108 48, 48, 48, 48, 48, 48, 48, 48,
109 48, 48, 48, 48, 48, 48, 48, 48,
110 48, 48, 48, 48, 48, 48, 48, 48,
111 48, 48, 48, 48, 48, 48, 48, 48,
112 48, 48, 48, 48, 48, 48, 48, 48,
113 48, 48, 48, 48, 48, 48, 48, 48,
114 48, 48, 48, 48, 48, 48, 48, 48,
115 48, 48, 48, 48, 48, 48, 48, 48,
116 48, 48, 48, 48, 48, 48, 48, 48,
117 48, 48, 48, 48, 48, 48, 48, 48,
118 48, 48, 48, 48, 48, 48, 48, 48,
119 48, 48, 48, 48, 48, 48, 48, 48,
120 48, 48, 48, 48, 48, 48, 48, 48,
121 48, 48, 48, 48, 48, 48, 48, 48,
122 48, 48, 48, 48, 48, 48, 48, 48,
126 static const int qzbin_factors_y2[129] =
128 84, 84, 84, 84, 84, 84, 84, 84,
129 84, 84, 84, 84, 84, 84, 84, 84,
130 84, 84, 84, 84, 84, 84, 84, 84,
131 84, 84, 84, 84, 84, 84, 84, 84,
132 84, 84, 84, 84, 84, 84, 84, 84,
133 84, 84, 84, 84, 84, 84, 84, 84,
134 80, 80, 80, 80, 80, 80, 80, 80,
135 80, 80, 80, 80, 80, 80, 80, 80,
136 80, 80, 80, 80, 80, 80, 80, 80,
137 80, 80, 80, 80, 80, 80, 80, 80,
138 80, 80, 80, 80, 80, 80, 80, 80,
139 80, 80, 80, 80, 80, 80, 80, 80,
140 80, 80, 80, 80, 80, 80, 80, 80,
141 80, 80, 80, 80, 80, 80, 80, 80,
142 80, 80, 80, 80, 80, 80, 80, 80,
143 80, 80, 80, 80, 80, 80, 80, 80,
147 #define EXACT_QUANT
148 #ifdef EXACT_QUANT
149 static void vp8cx_invert_quant(int improved_quant, short *quant,
150 short *shift, short d)
152 if(improved_quant)
154 unsigned t;
155 int l;
156 t = d;
157 for(l = 0; t > 1; l++)
158 t>>=1;
159 t = 1 + (1<<(16+l))/d;
160 *quant = (short)(t - (1<<16));
161 *shift = l;
163 else
165 *quant = (1 << 16) / d;
166 *shift = 0;
170 void vp8cx_init_quantizer(VP8_COMP *cpi)
172 int i;
173 int quant_val;
174 int Q;
176 int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
178 for (Q = 0; Q < QINDEX_RANGE; Q++)
180 // dc values
181 quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
182 cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val;
183 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
184 cpi->Y1quant_shift[Q] + 0, quant_val);
185 cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
186 cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
187 cpi->common.Y1dequant[Q][0] = quant_val;
188 cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
190 quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
191 cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val;
192 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
193 cpi->Y2quant_shift[Q] + 0, quant_val);
194 cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
195 cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
196 cpi->common.Y2dequant[Q][0] = quant_val;
197 cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
199 quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
200 cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val;
201 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
202 cpi->UVquant_shift[Q] + 0, quant_val);
203 cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
204 cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
205 cpi->common.UVdequant[Q][0] = quant_val;
206 cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
208 // all the ac values = ;
209 for (i = 1; i < 16; i++)
211 int rc = vp8_default_zig_zag1d[i];
213 quant_val = vp8_ac_yquant(Q);
214 cpi->Y1quant_fast[Q][rc] = (1 << 16) / quant_val;
215 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
216 cpi->Y1quant_shift[Q] + rc, quant_val);
217 cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
218 cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
219 cpi->common.Y1dequant[Q][rc] = quant_val;
220 cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
222 quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
223 cpi->Y2quant_fast[Q][rc] = (1 << 16) / quant_val;
224 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
225 cpi->Y2quant_shift[Q] + rc, quant_val);
226 cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
227 cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
228 cpi->common.Y2dequant[Q][rc] = quant_val;
229 cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
231 quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
232 cpi->UVquant_fast[Q][rc] = (1 << 16) / quant_val;
233 vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
234 cpi->UVquant_shift[Q] + rc, quant_val);
235 cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
236 cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
237 cpi->common.UVdequant[Q][rc] = quant_val;
238 cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
242 #else
243 void vp8cx_init_quantizer(VP8_COMP *cpi)
245 int i;
246 int quant_val;
247 int Q;
249 int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
251 for (Q = 0; Q < QINDEX_RANGE; Q++)
253 // dc values
254 quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
255 cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
256 cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
257 cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
258 cpi->common.Y1dequant[Q][0] = quant_val;
259 cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
261 quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
262 cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
263 cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
264 cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
265 cpi->common.Y2dequant[Q][0] = quant_val;
266 cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
268 quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
269 cpi->UVquant[Q][0] = (1 << 16) / quant_val;
270 cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
271 cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
272 cpi->common.UVdequant[Q][0] = quant_val;
273 cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
275 // all the ac values = ;
276 for (i = 1; i < 16; i++)
278 int rc = vp8_default_zig_zag1d[i];
280 quant_val = vp8_ac_yquant(Q);
281 cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
282 cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
283 cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
284 cpi->common.Y1dequant[Q][rc] = quant_val;
285 cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
287 quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
288 cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
289 cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
290 cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
291 cpi->common.Y2dequant[Q][rc] = quant_val;
292 cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
294 quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
295 cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
296 cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
297 cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
298 cpi->common.UVdequant[Q][rc] = quant_val;
299 cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
303 #endif
304 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
306 int i;
307 int QIndex;
308 MACROBLOCKD *xd = &x->e_mbd;
309 int zbin_extra;
311 // Select the baseline MB Q index.
312 if (xd->segmentation_enabled)
314 // Abs Value
315 if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
317 QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
318 // Delta Value
319 else
321 QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
322 QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; // Clamp to valid range
325 else
326 QIndex = cpi->common.base_qindex;
328 // Y
329 zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
331 for (i = 0; i < 16; i++)
333 x->block[i].quant = cpi->Y1quant[QIndex];
334 x->block[i].quant_fast = cpi->Y1quant_fast[QIndex];
335 x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
336 x->block[i].zbin = cpi->Y1zbin[QIndex];
337 x->block[i].round = cpi->Y1round[QIndex];
338 x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
339 x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex];
340 x->block[i].zbin_extra = (short)zbin_extra;
343 // UV
344 zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
346 for (i = 16; i < 24; i++)
348 x->block[i].quant = cpi->UVquant[QIndex];
349 x->block[i].quant_fast = cpi->UVquant_fast[QIndex];
350 x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
351 x->block[i].zbin = cpi->UVzbin[QIndex];
352 x->block[i].round = cpi->UVround[QIndex];
353 x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
354 x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex];
355 x->block[i].zbin_extra = (short)zbin_extra;
358 // Y2
359 zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
360 x->block[24].quant_fast = cpi->Y2quant_fast[QIndex];
361 x->block[24].quant = cpi->Y2quant[QIndex];
362 x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
363 x->block[24].zbin = cpi->Y2zbin[QIndex];
364 x->block[24].round = cpi->Y2round[QIndex];
365 x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
366 x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex];
367 x->block[24].zbin_extra = (short)zbin_extra;
370 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
372 // Clear Zbin mode boost for default case
373 cpi->zbin_mode_boost = 0;
375 // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
376 // when these values are not all zero.
377 if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
379 vp8cx_init_quantizer(cpi);
382 // MB level quantizer setup
383 vp8cx_mb_init_quantizer(cpi, &cpi->mb);
387 /* activity_avg must be positive, or flat regions could get a zero weight
388 * (infinite lambda), which confounds analysis.
389 * This also avoids the need for divide by zero checks in
390 * vp8_activity_masking().
392 #define VP8_ACTIVITY_AVG_MIN (64)
394 /* This is used as a reference when computing the source variance for the
395 * purposes of activity masking.
396 * Eventually this should be replaced by custom no-reference routines,
397 * which will be faster.
399 static const unsigned char VP8_VAR_OFFS[16]=
401 128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128
404 unsigned int vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x)
406 unsigned int act;
407 unsigned int sse;
408 int sum;
409 unsigned int a;
410 unsigned int b;
411 /* TODO: This could also be done over smaller areas (8x8), but that would
412 * require extensive changes elsewhere, as lambda is assumed to be fixed
413 * over an entire MB in most of the code.
414 * Another option is to compute four 8x8 variances, and pick a single
415 * lambda using a non-linear combination (e.g., the smallest, or second
416 * smallest, etc.).
418 VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16var)(x->src.y_buffer,
419 x->src.y_stride, VP8_VAR_OFFS, 0, &sse, &sum);
420 /* This requires a full 32 bits of precision. */
421 act = (sse<<8) - sum*sum;
422 /* Drop 4 to give us some headroom to work with. */
423 act = (act + 8) >> 4;
424 /* If the region is flat, lower the activity some more. */
425 if (act < 8<<12)
426 act = act < 5<<12 ? act : 5<<12;
427 /* TODO: For non-flat regions, edge regions should receive less masking
428 * than textured regions, but identifying edge regions quickly and
429 * reliably enough is still a subject of experimentation.
430 * This will be most noticable near edges with a complex shape (e.g.,
431 * text), but the 4x4 transform size should make this less of a problem
432 * than it would be for an 8x8 transform.
434 /* Apply the masking to the RD multiplier. */
435 a = act + 4*cpi->activity_avg;
436 b = 4*act + cpi->activity_avg;
437 x->rdmult = (unsigned int)(((INT64)x->rdmult*b + (a>>1))/a);
438 return act;
443 static
444 void encode_mb_row(VP8_COMP *cpi,
445 VP8_COMMON *cm,
446 int mb_row,
447 MACROBLOCK *x,
448 MACROBLOCKD *xd,
449 TOKENEXTRA **tp,
450 int *segment_counts,
451 int *totalrate)
453 INT64 activity_sum = 0;
454 int i;
455 int recon_yoffset, recon_uvoffset;
456 int mb_col;
457 int ref_fb_idx = cm->lst_fb_idx;
458 int dst_fb_idx = cm->new_fb_idx;
459 int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
460 int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
461 int seg_map_index = (mb_row * cpi->common.mb_cols);
464 // reset above block coeffs
465 xd->above_context = cm->above_context;
467 xd->up_available = (mb_row != 0);
468 recon_yoffset = (mb_row * recon_y_stride * 16);
469 recon_uvoffset = (mb_row * recon_uv_stride * 8);
471 cpi->tplist[mb_row].start = *tp;
472 //printf("Main mb_row = %d\n", mb_row);
474 // Distance of Mb to the top & bottom edges, specified in 1/8th pel
475 // units as they are always compared to values that are in 1/8th pel units
476 xd->mb_to_top_edge = -((mb_row * 16) << 3);
477 xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
479 // Set up limit values for vertical motion vector components
480 // to prevent them extending beyond the UMV borders
481 x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
482 x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
483 + (VP8BORDERINPIXELS - 16);
485 // for each macroblock col in image
486 for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
488 // Distance of Mb to the left & right edges, specified in
489 // 1/8th pel units as they are always compared to values
490 // that are in 1/8th pel units
491 xd->mb_to_left_edge = -((mb_col * 16) << 3);
492 xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
494 // Set up limit values for horizontal motion vector components
495 // to prevent them extending beyond the UMV borders
496 x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
497 x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
498 + (VP8BORDERINPIXELS - 16);
500 xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
501 xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
502 xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
503 xd->left_available = (mb_col != 0);
505 x->rddiv = cpi->RDDIV;
506 x->rdmult = cpi->RDMULT;
508 if(cpi->oxcf.tuning == VP8_TUNE_SSIM)
509 activity_sum += vp8_activity_masking(cpi, x);
511 // Is segmentation enabled
512 // MB level adjutment to quantizer
513 if (xd->segmentation_enabled)
515 // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
516 if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
517 xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
518 else
519 xd->mode_info_context->mbmi.segment_id = 0;
521 vp8cx_mb_init_quantizer(cpi, x);
523 else
524 xd->mode_info_context->mbmi.segment_id = 0; // Set to Segment 0 by default
526 x->active_ptr = cpi->active_map + seg_map_index + mb_col;
528 if (cm->frame_type == KEY_FRAME)
530 *totalrate += vp8cx_encode_intra_macro_block(cpi, x, tp);
531 #ifdef MODE_STATS
532 y_modes[xd->mbmi.mode] ++;
533 #endif
535 else
537 *totalrate += vp8cx_encode_inter_macroblock(cpi, x, tp, recon_yoffset, recon_uvoffset);
539 #ifdef MODE_STATS
540 inter_y_modes[xd->mbmi.mode] ++;
542 if (xd->mbmi.mode == SPLITMV)
544 int b;
546 for (b = 0; b < xd->mbmi.partition_count; b++)
548 inter_b_modes[x->partition->bmi[b].mode] ++;
552 #endif
554 // Count of last ref frame 0,0 useage
555 if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
556 cpi->inter_zz_count ++;
558 // Special case code for cyclic refresh
559 // If cyclic update enabled then copy xd->mbmi.segment_id; (which may have been updated based on mode
560 // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
561 if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
563 cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
565 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
566 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
567 // else mark it as dirty (1).
568 if (xd->mode_info_context->mbmi.segment_id)
569 cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
570 else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
572 if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
573 cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
575 else
576 cpi->cyclic_refresh_map[seg_map_index+mb_col] = 1;
581 cpi->tplist[mb_row].stop = *tp;
583 x->gf_active_ptr++; // Increment pointer into gf useage flags structure for next mb
585 for (i = 0; i < 16; i++)
586 vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
588 // adjust to the next column of macroblocks
589 x->src.y_buffer += 16;
590 x->src.u_buffer += 8;
591 x->src.v_buffer += 8;
593 recon_yoffset += 16;
594 recon_uvoffset += 8;
596 // Keep track of segment useage
597 segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
599 // skip to next mb
600 xd->mode_info_context++;
601 x->partition_info++;
603 xd->above_context++;
604 cpi->current_mb_col_main = mb_col;
607 //extend the recon for intra prediction
608 vp8_extend_mb_row(
609 &cm->yv12_fb[dst_fb_idx],
610 xd->dst.y_buffer + 16,
611 xd->dst.u_buffer + 8,
612 xd->dst.v_buffer + 8);
614 // this is to account for the border
615 xd->mode_info_context++;
616 x->partition_info++;
617 x->activity_sum += activity_sum;
624 void vp8_encode_frame(VP8_COMP *cpi)
626 int mb_row;
627 MACROBLOCK *const x = & cpi->mb;
628 VP8_COMMON *const cm = & cpi->common;
629 MACROBLOCKD *const xd = & x->e_mbd;
631 TOKENEXTRA *tp = cpi->tok;
632 int segment_counts[MAX_MB_SEGMENTS];
633 int totalrate;
635 // Functions setup for all frame types so we can use MC in AltRef
636 if (cm->mcomp_filter_type == SIXTAP)
638 xd->subpixel_predict = SUBPIX_INVOKE(
639 &cpi->common.rtcd.subpix, sixtap4x4);
640 xd->subpixel_predict8x4 = SUBPIX_INVOKE(
641 &cpi->common.rtcd.subpix, sixtap8x4);
642 xd->subpixel_predict8x8 = SUBPIX_INVOKE(
643 &cpi->common.rtcd.subpix, sixtap8x8);
644 xd->subpixel_predict16x16 = SUBPIX_INVOKE(
645 &cpi->common.rtcd.subpix, sixtap16x16);
647 else
649 xd->subpixel_predict = SUBPIX_INVOKE(
650 &cpi->common.rtcd.subpix, bilinear4x4);
651 xd->subpixel_predict8x4 = SUBPIX_INVOKE(
652 &cpi->common.rtcd.subpix, bilinear8x4);
653 xd->subpixel_predict8x8 = SUBPIX_INVOKE(
654 &cpi->common.rtcd.subpix, bilinear8x8);
655 xd->subpixel_predict16x16 = SUBPIX_INVOKE(
656 &cpi->common.rtcd.subpix, bilinear16x16);
659 x->gf_active_ptr = (signed char *)cpi->gf_active_flags; // Point to base of GF active flags data structure
661 x->vector_range = 32;
663 // Count of MBs using the alternate Q if any
664 cpi->alt_qcount = 0;
666 // Reset frame count of inter 0,0 motion vector useage.
667 cpi->inter_zz_count = 0;
669 vpx_memset(segment_counts, 0, sizeof(segment_counts));
671 cpi->prediction_error = 0;
672 cpi->intra_error = 0;
673 cpi->skip_true_count = 0;
674 cpi->skip_false_count = 0;
676 #if 0
677 // Experimental code
678 cpi->frame_distortion = 0;
679 cpi->last_mb_distortion = 0;
680 #endif
682 totalrate = 0;
684 x->partition_info = x->pi;
686 xd->mode_info_context = cm->mi;
687 xd->mode_info_stride = cm->mode_info_stride;
689 xd->frame_type = cm->frame_type;
691 xd->frames_since_golden = cm->frames_since_golden;
692 xd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
693 vp8_zero(cpi->MVcount);
694 // vp8_zero( Contexts)
695 vp8_zero(cpi->coef_counts);
697 // reset intra mode contexts
698 if (cm->frame_type == KEY_FRAME)
699 vp8_init_mbmode_probs(cm);
702 vp8cx_frame_init_quantizer(cpi);
704 if (cpi->compressor_speed == 2)
706 if (cpi->oxcf.cpu_used < 0)
707 cpi->Speed = -(cpi->oxcf.cpu_used);
708 else
709 vp8_auto_select_speed(cpi);
712 vp8_initialize_rd_consts(cpi, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q));
713 vp8cx_initialize_me_consts(cpi, cm->base_qindex);
715 // Copy data over into macro block data sturctures.
717 x->src = * cpi->Source;
718 xd->pre = cm->yv12_fb[cm->lst_fb_idx];
719 xd->dst = cm->yv12_fb[cm->new_fb_idx];
721 // set up frame new frame for intra coded blocks
723 vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
725 vp8_build_block_offsets(x);
727 vp8_setup_block_dptrs(&x->e_mbd);
729 vp8_setup_block_ptrs(x);
731 x->activity_sum = 0;
733 xd->mode_info_context->mbmi.mode = DC_PRED;
734 xd->mode_info_context->mbmi.uv_mode = DC_PRED;
736 xd->left_context = &cm->left_context;
738 vp8_zero(cpi->count_mb_ref_frame_usage)
739 vp8_zero(cpi->ymode_count)
740 vp8_zero(cpi->uv_mode_count)
742 x->mvc = cm->fc.mvc;
744 vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
747 struct vpx_usec_timer emr_timer;
748 vpx_usec_timer_start(&emr_timer);
750 if (!cpi->b_multi_threaded)
752 // for each macroblock row in image
753 for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
756 vp8_zero(cm->left_context)
758 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
760 // adjust to the next row of mbs
761 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
762 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
763 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
766 cpi->tok_count = tp - cpi->tok;
769 else
771 #if CONFIG_MULTITHREAD
772 int i;
774 vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, 1, cpi->encoding_thread_count);
776 for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1))
778 cpi->current_mb_col_main = -1;
780 for (i = 0; i < cpi->encoding_thread_count; i++)
782 if ((mb_row + i + 1) >= cm->mb_rows)
783 break;
785 cpi->mb_row_ei[i].mb_row = mb_row + i + 1;
786 cpi->mb_row_ei[i].tp = cpi->tok + (mb_row + i + 1) * (cm->mb_cols * 16 * 24);
787 cpi->mb_row_ei[i].current_mb_col = -1;
788 //SetEvent(cpi->h_event_mbrencoding[i]);
789 sem_post(&cpi->h_event_mbrencoding[i]);
792 vp8_zero(cm->left_context)
794 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
796 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
798 // adjust to the next row of mbs
799 x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
800 x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
801 x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
803 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
804 x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
806 if (mb_row < cm->mb_rows - 1)
807 //WaitForSingleObject(cpi->h_event_main, INFINITE);
808 sem_wait(&cpi->h_event_main);
812 for( ;mb_row<cm->mb_rows; mb_row ++)
814 vp8_zero( cm->left_context)
816 tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24);
818 encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate);
819 // adjust to the next row of mbs
820 x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols;
821 x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
822 x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
826 cpi->tok_count = 0;
828 for (mb_row = 0; mb_row < cm->mb_rows; mb_row ++)
830 cpi->tok_count += cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start;
833 if (xd->segmentation_enabled)
836 int i, j;
838 if (xd->segmentation_enabled)
841 for (i = 0; i < cpi->encoding_thread_count; i++)
843 for (j = 0; j < 4; j++)
844 segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j];
850 for (i = 0; i < cpi->encoding_thread_count; i++)
852 totalrate += cpi->mb_row_ei[i].totalrate;
855 for (i = 0; i < cpi->encoding_thread_count; i++)
857 x->activity_sum += cpi->mb_row_ei[i].mb.activity_sum;
860 #endif
864 vpx_usec_timer_mark(&emr_timer);
865 cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer);
870 // Work out the segment probabilites if segmentation is enabled
871 if (xd->segmentation_enabled)
873 int tot_count;
874 int i;
876 // Set to defaults
877 vpx_memset(xd->mb_segment_tree_probs, 255 , sizeof(xd->mb_segment_tree_probs));
879 tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3];
881 if (tot_count)
883 xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count;
885 tot_count = segment_counts[0] + segment_counts[1];
887 if (tot_count > 0)
889 xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count;
892 tot_count = segment_counts[2] + segment_counts[3];
894 if (tot_count > 0)
895 xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count;
897 // Zero probabilities not allowed
898 for (i = 0; i < MB_FEATURE_TREE_PROBS; i ++)
900 if (xd->mb_segment_tree_probs[i] == 0)
901 xd->mb_segment_tree_probs[i] = 1;
906 // 256 rate units to the bit
907 cpi->projected_frame_size = totalrate >> 8; // projected_frame_size in units of BYTES
909 // Make a note of the percentage MBs coded Intra.
910 if (cm->frame_type == KEY_FRAME)
912 cpi->this_frame_percent_intra = 100;
914 else
916 int tot_modes;
918 tot_modes = cpi->count_mb_ref_frame_usage[INTRA_FRAME]
919 + cpi->count_mb_ref_frame_usage[LAST_FRAME]
920 + cpi->count_mb_ref_frame_usage[GOLDEN_FRAME]
921 + cpi->count_mb_ref_frame_usage[ALTREF_FRAME];
923 if (tot_modes)
924 cpi->this_frame_percent_intra = cpi->count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes;
928 #if 0
930 int cnt = 0;
931 int flag[2] = {0, 0};
933 for (cnt = 0; cnt < MVPcount; cnt++)
935 if (cm->fc.pre_mvc[0][cnt] != cm->fc.mvc[0][cnt])
937 flag[0] = 1;
938 vpx_memcpy(cm->fc.pre_mvc[0], cm->fc.mvc[0], MVPcount);
939 break;
943 for (cnt = 0; cnt < MVPcount; cnt++)
945 if (cm->fc.pre_mvc[1][cnt] != cm->fc.mvc[1][cnt])
947 flag[1] = 1;
948 vpx_memcpy(cm->fc.pre_mvc[1], cm->fc.mvc[1], MVPcount);
949 break;
953 if (flag[0] || flag[1])
954 vp8_build_component_cost_table(cpi->mb.mvcost, cpi->mb.mvsadcost, (const MV_CONTEXT *) cm->fc.mvc, flag);
956 #endif
958 // Adjust the projected reference frame useage probability numbers to reflect
959 // what we have just seen. This may be usefull when we make multiple itterations
960 // of the recode loop rather than continuing to use values from the previous frame.
961 if ((cm->frame_type != KEY_FRAME) && !cm->refresh_alt_ref_frame && !cm->refresh_golden_frame)
963 const int *const rfct = cpi->count_mb_ref_frame_usage;
964 const int rf_intra = rfct[INTRA_FRAME];
965 const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME];
967 if ((rf_intra + rf_inter) > 0)
969 cpi->prob_intra_coded = (rf_intra * 255) / (rf_intra + rf_inter);
971 if (cpi->prob_intra_coded < 1)
972 cpi->prob_intra_coded = 1;
974 if ((cm->frames_since_golden > 0) || cpi->source_alt_ref_active)
976 cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128;
978 if (cpi->prob_last_coded < 1)
979 cpi->prob_last_coded = 1;
981 cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME])
982 ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128;
984 if (cpi->prob_gf_coded < 1)
985 cpi->prob_gf_coded = 1;
990 #if 0
991 // Keep record of the total distortion this time around for future use
992 cpi->last_frame_distortion = cpi->frame_distortion;
993 #endif
995 /* Update the average activity for the next frame.
996 * This is feed-forward for now; it could also be saved in two-pass, or
997 * done during lookahead when that is eventually added.
999 cpi->activity_avg = (unsigned int )(x->activity_sum/cpi->common.MBs);
1000 if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN)
1001 cpi->activity_avg = VP8_ACTIVITY_AVG_MIN;
1004 void vp8_setup_block_ptrs(MACROBLOCK *x)
1006 int r, c;
1007 int i;
1009 for (r = 0; r < 4; r++)
1011 for (c = 0; c < 4; c++)
1013 x->block[r*4+c].src_diff = x->src_diff + r * 4 * 16 + c * 4;
1017 for (r = 0; r < 2; r++)
1019 for (c = 0; c < 2; c++)
1021 x->block[16 + r*2+c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4;
1026 for (r = 0; r < 2; r++)
1028 for (c = 0; c < 2; c++)
1030 x->block[20 + r*2+c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4;
1034 x->block[24].src_diff = x->src_diff + 384;
1037 for (i = 0; i < 25; i++)
1039 x->block[i].coeff = x->coeff + i * 16;
1043 void vp8_build_block_offsets(MACROBLOCK *x)
1045 int block = 0;
1046 int br, bc;
1048 vp8_build_block_doffsets(&x->e_mbd);
1050 // y blocks
1051 for (br = 0; br < 4; br++)
1053 for (bc = 0; bc < 4; bc++)
1055 BLOCK *this_block = &x->block[block];
1056 this_block->base_src = &x->src.y_buffer;
1057 this_block->src_stride = x->src.y_stride;
1058 this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1059 ++block;
1063 // u blocks
1064 for (br = 0; br < 2; br++)
1066 for (bc = 0; bc < 2; bc++)
1068 BLOCK *this_block = &x->block[block];
1069 this_block->base_src = &x->src.u_buffer;
1070 this_block->src_stride = x->src.uv_stride;
1071 this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1072 ++block;
1076 // v blocks
1077 for (br = 0; br < 2; br++)
1079 for (bc = 0; bc < 2; bc++)
1081 BLOCK *this_block = &x->block[block];
1082 this_block->base_src = &x->src.v_buffer;
1083 this_block->src_stride = x->src.uv_stride;
1084 this_block->src = 4 * br * this_block->src_stride + 4 * bc;
1085 ++block;
1090 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
1092 const MACROBLOCKD *xd = & x->e_mbd;
1093 const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
1094 const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
1096 #ifdef MODE_STATS
1097 const int is_key = cpi->common.frame_type == KEY_FRAME;
1099 ++ (is_key ? uv_modes : inter_uv_modes)[uvm];
1101 if (m == B_PRED)
1103 unsigned int *const bct = is_key ? b_modes : inter_b_modes;
1105 int b = 0;
1109 ++ bct[xd->block[b].bmi.mode];
1111 while (++b < 16);
1114 #endif
1116 ++cpi->ymode_count[m];
1117 ++cpi->uv_mode_count[uvm];
1120 int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
1122 int Error4x4, Error16x16, error_uv;
1123 B_PREDICTION_MODE intra_bmodes[16];
1124 int rate4x4, rate16x16, rateuv;
1125 int dist4x4, dist16x16, distuv;
1126 int rate = 0;
1127 int rate4x4_tokenonly = 0;
1128 int rate16x16_tokenonly = 0;
1129 int rateuv_tokenonly = 0;
1130 int i;
1132 x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
1134 #if !(CONFIG_REALTIME_ONLY)
1136 if (cpi->sf.RD || cpi->compressor_speed != 2)
1138 Error4x4 = vp8_rd_pick_intra4x4mby_modes(cpi, x, &rate4x4, &rate4x4_tokenonly, &dist4x4);
1140 //save the b modes for possible later use
1141 for (i = 0; i < 16; i++)
1142 intra_bmodes[i] = x->e_mbd.block[i].bmi.mode;
1144 Error16x16 = vp8_rd_pick_intra16x16mby_mode(cpi, x, &rate16x16, &rate16x16_tokenonly, &dist16x16);
1146 error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
1148 vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1149 rate += rateuv;
1151 if (Error4x4 < Error16x16)
1153 rate += rate4x4;
1154 x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1156 // get back the intra block modes
1157 for (i = 0; i < 16; i++)
1158 x->e_mbd.block[i].bmi.mode = intra_bmodes[i];
1160 vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1161 cpi->prediction_error += Error4x4 ;
1162 #if 0
1163 // Experimental RD code
1164 cpi->frame_distortion += dist4x4;
1165 #endif
1167 else
1169 vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1170 rate += rate16x16;
1172 #if 0
1173 // Experimental RD code
1174 cpi->prediction_error += Error16x16;
1175 cpi->frame_distortion += dist16x16;
1176 #endif
1179 sum_intra_stats(cpi, x);
1181 vp8_tokenize_mb(cpi, &x->e_mbd, t);
1183 else
1184 #endif
1187 int rate2, distortion2;
1188 MB_PREDICTION_MODE mode, best_mode = DC_PRED;
1189 int this_rd;
1190 Error16x16 = INT_MAX;
1192 for (mode = DC_PRED; mode <= TM_PRED; mode ++)
1194 x->e_mbd.mode_info_context->mbmi.mode = mode;
1195 vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
1196 distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
1197 rate2 = x->mbmode_cost[x->e_mbd.frame_type][mode];
1198 this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1200 if (Error16x16 > this_rd)
1202 Error16x16 = this_rd;
1203 best_mode = mode;
1207 vp8_pick_intra4x4mby_modes(IF_RTCD(&cpi->rtcd), x, &rate2, &distortion2);
1209 if (distortion2 == INT_MAX)
1210 Error4x4 = INT_MAX;
1211 else
1212 Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
1214 if (Error4x4 < Error16x16)
1216 x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
1217 vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1218 cpi->prediction_error += Error4x4;
1220 else
1222 x->e_mbd.mode_info_context->mbmi.mode = best_mode;
1223 vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1224 cpi->prediction_error += Error16x16;
1227 vp8_pick_intra_mbuv_mode(x);
1228 vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1229 sum_intra_stats(cpi, x);
1230 vp8_tokenize_mb(cpi, &x->e_mbd, t);
1233 return rate;
1235 #ifdef SPEEDSTATS
1236 extern int cnt_pm;
1237 #endif
1239 extern void vp8_fix_contexts(MACROBLOCKD *x);
1241 int vp8cx_encode_inter_macroblock
1243 VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t,
1244 int recon_yoffset, int recon_uvoffset
1247 MACROBLOCKD *const xd = &x->e_mbd;
1248 int inter_error;
1249 int intra_error = 0;
1250 int rate;
1251 int distortion;
1253 x->skip = 0;
1255 if (xd->segmentation_enabled)
1256 x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
1257 else
1258 x->encode_breakout = cpi->oxcf.encode_breakout;
1260 #if !(CONFIG_REALTIME_ONLY)
1262 if (cpi->sf.RD)
1264 /* Are we using the fast quantizer for the mode selection? */
1265 if(cpi->sf.use_fastquant_for_pick)
1266 cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
1268 inter_error = vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1270 /* switch back to the regular quantizer for the encode */
1271 if (cpi->sf.improved_quant)
1273 cpi->mb.quantize_b = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
1277 else
1278 #endif
1279 inter_error = vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error);
1282 cpi->prediction_error += inter_error;
1283 cpi->intra_error += intra_error;
1285 #if 0
1286 // Experimental RD code
1287 cpi->frame_distortion += distortion;
1288 cpi->last_mb_distortion = distortion;
1289 #endif
1291 // MB level adjutment to quantizer setup
1292 if (xd->segmentation_enabled || cpi->zbin_mode_boost_enabled)
1294 // If cyclic update enabled
1295 if (cpi->cyclic_refresh_mode_enabled)
1297 // Clear segment_id back to 0 if not coded (last frame 0,0)
1298 if ((xd->mode_info_context->mbmi.segment_id == 1) &&
1299 ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
1301 xd->mode_info_context->mbmi.segment_id = 0;
1305 // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
1306 if (cpi->zbin_mode_boost_enabled)
1308 if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
1309 cpi->zbin_mode_boost = 0;
1310 else
1312 if (xd->mode_info_context->mbmi.mode == ZEROMV)
1314 if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
1315 cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
1316 else
1317 cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
1319 else if (xd->mode_info_context->mbmi.mode == SPLITMV)
1320 cpi->zbin_mode_boost = 0;
1321 else
1322 cpi->zbin_mode_boost = MV_ZBIN_BOOST;
1325 else
1326 cpi->zbin_mode_boost = 0;
1328 vp8cx_mb_init_quantizer(cpi, x);
1331 cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
1333 if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
1335 vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
1337 if (xd->mode_info_context->mbmi.mode == B_PRED)
1339 vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
1341 else
1343 vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
1346 sum_intra_stats(cpi, x);
1348 else
1350 MV best_ref_mv;
1351 MV nearest, nearby;
1352 int mdcounts[4];
1353 int ref_fb_idx;
1355 vp8_find_near_mvs(xd, xd->mode_info_context,
1356 &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
1358 vp8_build_uvmvs(xd, cpi->common.full_pixel);
1360 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
1361 ref_fb_idx = cpi->common.lst_fb_idx;
1362 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
1363 ref_fb_idx = cpi->common.gld_fb_idx;
1364 else
1365 ref_fb_idx = cpi->common.alt_fb_idx;
1367 xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
1368 xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
1369 xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
1371 if (xd->mode_info_context->mbmi.mode == SPLITMV)
1373 int i;
1375 for (i = 0; i < 16; i++)
1377 if (xd->block[i].bmi.mode == NEW4X4)
1379 cpi->MVcount[0][mv_max+((xd->block[i].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1380 cpi->MVcount[1][mv_max+((xd->block[i].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1384 else if (xd->mode_info_context->mbmi.mode == NEWMV)
1386 cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
1387 cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
1390 if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
1392 vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
1394 // Clear mb_skip_coeff if mb_no_coeff_skip is not set
1395 if (!cpi->common.mb_no_coeff_skip)
1396 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1399 else
1400 vp8_stuff_inter16x16(x);
1403 if (!x->skip)
1404 vp8_tokenize_mb(cpi, xd, t);
1405 else
1407 if (cpi->common.mb_no_coeff_skip)
1409 if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
1410 xd->mode_info_context->mbmi.dc_diff = 0;
1411 else
1412 xd->mode_info_context->mbmi.dc_diff = 1;
1414 xd->mode_info_context->mbmi.mb_skip_coeff = 1;
1415 cpi->skip_true_count ++;
1416 vp8_fix_contexts(xd);
1418 else
1420 vp8_stuff_mb(cpi, xd, t);
1421 xd->mode_info_context->mbmi.mb_skip_coeff = 0;
1422 cpi->skip_false_count ++;
1426 return rate;