Removed unused vp8_recon_intra4x4mb function
[libvpx.git] / vp8 / encoder / encodemb.c
blobefcea745ba5781d930ab3ecb8ad85b48f07c4391
1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
12 #include "vpx_ports/config.h"
13 #include "encodemb.h"
14 #include "reconinter.h"
15 #include "quantize.h"
16 #include "tokenize.h"
17 #include "invtrans.h"
18 #include "recon.h"
19 #include "reconintra.h"
20 #include "dct.h"
21 #include "vpx_mem/vpx_mem.h"
23 #if CONFIG_RUNTIME_CPU_DETECT
24 #define IF_RTCD(x) (x)
25 #else
26 #define IF_RTCD(x) NULL
27 #endif
28 void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch)
30 unsigned char *src_ptr = (*(be->base_src) + be->src);
31 short *diff_ptr = be->src_diff;
32 unsigned char *pred_ptr = bd->predictor;
33 int src_stride = be->src_stride;
35 int r, c;
37 for (r = 0; r < 4; r++)
39 for (c = 0; c < 4; c++)
41 diff_ptr[c] = src_ptr[c] - pred_ptr[c];
44 diff_ptr += pitch;
45 pred_ptr += pitch;
46 src_ptr += src_stride;
50 void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
52 short *udiff = diff + 256;
53 short *vdiff = diff + 320;
54 unsigned char *upred = pred + 256;
55 unsigned char *vpred = pred + 320;
57 int r, c;
59 for (r = 0; r < 8; r++)
61 for (c = 0; c < 8; c++)
63 udiff[c] = usrc[c] - upred[c];
66 udiff += 8;
67 upred += 8;
68 usrc += stride;
71 for (r = 0; r < 8; r++)
73 for (c = 0; c < 8; c++)
75 vdiff[c] = vsrc[c] - vpred[c];
78 vdiff += 8;
79 vpred += 8;
80 vsrc += stride;
84 void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride)
86 int r, c;
88 for (r = 0; r < 16; r++)
90 for (c = 0; c < 16; c++)
92 diff[c] = src[c] - pred[c];
95 diff += 16;
96 pred += 16;
97 src += stride;
101 static void vp8_subtract_mb(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
103 ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
104 ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
107 void vp8_build_dcblock(MACROBLOCK *x)
109 short *src_diff_ptr = &x->src_diff[384];
110 int i;
112 for (i = 0; i < 16; i++)
114 src_diff_ptr[i] = x->coeff[i * 16];
118 void vp8_transform_mbuv(MACROBLOCK *x)
120 int i;
122 for (i = 16; i < 24; i += 2)
124 x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
125 &x->block[i].coeff[0], 16);
130 void vp8_transform_intra_mby(MACROBLOCK *x)
132 int i;
134 for (i = 0; i < 16; i += 2)
136 x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
137 &x->block[i].coeff[0], 32);
140 // build dc block from 16 y dc values
141 vp8_build_dcblock(x);
143 // do 2nd order transform on the dc block
144 x->short_walsh4x4(&x->block[24].src_diff[0],
145 &x->block[24].coeff[0], 8);
150 void vp8_transform_mb(MACROBLOCK *x)
152 int i;
154 for (i = 0; i < 16; i += 2)
156 x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
157 &x->block[i].coeff[0], 32);
160 // build dc block from 16 y dc values
161 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
162 vp8_build_dcblock(x);
164 for (i = 16; i < 24; i += 2)
166 x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
167 &x->block[i].coeff[0], 16);
170 // do 2nd order transform on the dc block
171 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
172 x->short_walsh4x4(&x->block[24].src_diff[0],
173 &x->block[24].coeff[0], 8);
177 void vp8_transform_mby(MACROBLOCK *x)
179 int i;
181 for (i = 0; i < 16; i += 2)
183 x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
184 &x->block[i].coeff[0], 32);
187 // build dc block from 16 y dc values
188 if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
190 vp8_build_dcblock(x);
191 x->short_walsh4x4(&x->block[24].src_diff[0],
192 &x->block[24].coeff[0], 8);
197 void vp8_stuff_inter16x16(MACROBLOCK *x)
199 vp8_build_inter_predictors_mb_s(&x->e_mbd);
201 // recon = copy from predictors to destination
203 BLOCKD *b = &x->e_mbd.block[0];
204 unsigned char *pred_ptr = b->predictor;
205 unsigned char *dst_ptr = *(b->base_dst) + b->dst;
206 int stride = b->dst_stride;
208 int i;
209 for(i=0;i<16;i++)
210 vpx_memcpy(dst_ptr+i*stride,pred_ptr+16*i,16);
212 b = &x->e_mbd.block[16];
213 pred_ptr = b->predictor;
214 dst_ptr = *(b->base_dst) + b->dst;
215 stride = b->dst_stride;
217 for(i=0;i<8;i++)
218 vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
220 b = &x->e_mbd.block[20];
221 pred_ptr = b->predictor;
222 dst_ptr = *(b->base_dst) + b->dst;
223 stride = b->dst_stride;
225 for(i=0;i<8;i++)
226 vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
231 #if !(CONFIG_REALTIME_ONLY)
232 #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
233 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
235 typedef struct vp8_token_state vp8_token_state;
237 struct vp8_token_state{
238 int rate;
239 int error;
240 signed char next;
241 signed char token;
242 short qc;
245 // TODO: experiments to find optimal multiple numbers
246 #define Y1_RD_MULT 4
247 #define UV_RD_MULT 2
248 #define Y2_RD_MULT 16
250 static const int plane_rd_mult[4]=
252 Y1_RD_MULT,
253 Y2_RD_MULT,
254 UV_RD_MULT,
255 Y1_RD_MULT
258 void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
259 ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
260 const VP8_ENCODER_RTCD *rtcd)
262 BLOCK *b;
263 BLOCKD *d;
264 vp8_token_state tokens[17][2];
265 unsigned best_mask[2];
266 const short *dequant_ptr;
267 const short *coeff_ptr;
268 short *qcoeff_ptr;
269 short *dqcoeff_ptr;
270 int eob;
271 int i0;
272 int rc;
273 int x;
274 int sz;
275 int next;
276 int rdmult;
277 int rddiv;
278 int final_eob;
279 int rd_cost0;
280 int rd_cost1;
281 int rate0;
282 int rate1;
283 int error0;
284 int error1;
285 int t0;
286 int t1;
287 int best;
288 int band;
289 int pt;
290 int i;
291 int err_mult = plane_rd_mult[type];
293 b = &mb->block[ib];
294 d = &mb->e_mbd.block[ib];
296 /* Enable this to test the effect of RDO as a replacement for the dynamic
297 * zero bin instead of an augmentation of it.
299 #if 0
300 vp8_strict_quantize_b(b, d);
301 #endif
303 dequant_ptr = d->dequant;
304 coeff_ptr = b->coeff;
305 qcoeff_ptr = d->qcoeff;
306 dqcoeff_ptr = d->dqcoeff;
307 i0 = !type;
308 eob = d->eob;
310 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
311 rdmult = mb->rdmult * err_mult;
312 if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME)
313 rdmult = (rdmult * 9)>>4;
315 rddiv = mb->rddiv;
316 best_mask[0] = best_mask[1] = 0;
317 /* Initialize the sentinel node of the trellis. */
318 tokens[eob][0].rate = 0;
319 tokens[eob][0].error = 0;
320 tokens[eob][0].next = 16;
321 tokens[eob][0].token = DCT_EOB_TOKEN;
322 tokens[eob][0].qc = 0;
323 *(tokens[eob] + 1) = *(tokens[eob] + 0);
324 next = eob;
325 for (i = eob; i-- > i0;)
327 int base_bits;
328 int d2;
329 int dx;
331 rc = vp8_default_zig_zag1d[i];
332 x = qcoeff_ptr[rc];
333 /* Only add a trellis state for non-zero coefficients. */
334 if (x)
336 int shortcut=0;
337 error0 = tokens[next][0].error;
338 error1 = tokens[next][1].error;
339 /* Evaluate the first possibility for this state. */
340 rate0 = tokens[next][0].rate;
341 rate1 = tokens[next][1].rate;
342 t0 = (vp8_dct_value_tokens_ptr + x)->Token;
343 /* Consider both possible successor states. */
344 if (next < 16)
346 band = vp8_coef_bands[i + 1];
347 pt = vp8_prev_token_class[t0];
348 rate0 +=
349 mb->token_costs[type][band][pt][tokens[next][0].token];
350 rate1 +=
351 mb->token_costs[type][band][pt][tokens[next][1].token];
353 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
354 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
355 if (rd_cost0 == rd_cost1)
357 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
358 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
360 /* And pick the best. */
361 best = rd_cost1 < rd_cost0;
362 base_bits = *(vp8_dct_value_cost_ptr + x);
363 dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
364 d2 = dx*dx;
365 tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
366 tokens[i][0].error = d2 + (best ? error1 : error0);
367 tokens[i][0].next = next;
368 tokens[i][0].token = t0;
369 tokens[i][0].qc = x;
370 best_mask[0] |= best << i;
371 /* Evaluate the second possibility for this state. */
372 rate0 = tokens[next][0].rate;
373 rate1 = tokens[next][1].rate;
375 if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) &&
376 (abs(x)*dequant_ptr[rc]<abs(coeff_ptr[rc])+dequant_ptr[rc]))
377 shortcut = 1;
378 else
379 shortcut = 0;
381 if(shortcut)
383 sz = -(x < 0);
384 x -= 2*sz + 1;
387 /* Consider both possible successor states. */
388 if (!x)
390 /* If we reduced this coefficient to zero, check to see if
391 * we need to move the EOB back here.
393 t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
394 DCT_EOB_TOKEN : ZERO_TOKEN;
395 t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
396 DCT_EOB_TOKEN : ZERO_TOKEN;
398 else
400 t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token;
402 if (next < 16)
404 band = vp8_coef_bands[i + 1];
405 if(t0!=DCT_EOB_TOKEN)
407 pt = vp8_prev_token_class[t0];
408 rate0 += mb->token_costs[type][band][pt][
409 tokens[next][0].token];
411 if(t1!=DCT_EOB_TOKEN)
413 pt = vp8_prev_token_class[t1];
414 rate1 += mb->token_costs[type][band][pt][
415 tokens[next][1].token];
419 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
420 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
421 if (rd_cost0 == rd_cost1)
423 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
424 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
426 /* And pick the best. */
427 best = rd_cost1 < rd_cost0;
428 base_bits = *(vp8_dct_value_cost_ptr + x);
430 if(shortcut)
432 dx -= (dequant_ptr[rc] + sz) ^ sz;
433 d2 = dx*dx;
435 tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
436 tokens[i][1].error = d2 + (best ? error1 : error0);
437 tokens[i][1].next = next;
438 tokens[i][1].token =best?t1:t0;
439 tokens[i][1].qc = x;
440 best_mask[1] |= best << i;
441 /* Finally, make this the new head of the trellis. */
442 next = i;
444 /* There's no choice to make for a zero coefficient, so we don't
445 * add a new trellis node, but we do need to update the costs.
447 else
449 band = vp8_coef_bands[i + 1];
450 t0 = tokens[next][0].token;
451 t1 = tokens[next][1].token;
452 /* Update the cost of each path if we're past the EOB token. */
453 if (t0 != DCT_EOB_TOKEN)
455 tokens[next][0].rate += mb->token_costs[type][band][0][t0];
456 tokens[next][0].token = ZERO_TOKEN;
458 if (t1 != DCT_EOB_TOKEN)
460 tokens[next][1].rate += mb->token_costs[type][band][0][t1];
461 tokens[next][1].token = ZERO_TOKEN;
463 /* Don't update next, because we didn't add a new node. */
467 /* Now pick the best path through the whole trellis. */
468 band = vp8_coef_bands[i + 1];
469 VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
470 rate0 = tokens[next][0].rate;
471 rate1 = tokens[next][1].rate;
472 error0 = tokens[next][0].error;
473 error1 = tokens[next][1].error;
474 t0 = tokens[next][0].token;
475 t1 = tokens[next][1].token;
476 rate0 += mb->token_costs[type][band][pt][t0];
477 rate1 += mb->token_costs[type][band][pt][t1];
478 rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
479 rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
480 if (rd_cost0 == rd_cost1)
482 rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
483 rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
485 best = rd_cost1 < rd_cost0;
486 final_eob = i0 - 1;
487 for (i = next; i < eob; i = next)
489 x = tokens[i][best].qc;
490 if (x)
491 final_eob = i;
492 rc = vp8_default_zig_zag1d[i];
493 qcoeff_ptr[rc] = x;
494 dqcoeff_ptr[rc] = x * dequant_ptr[rc];
495 next = tokens[i][best].next;
496 best = (best_mask[best] >> i) & 1;
498 final_eob++;
500 d->eob = final_eob;
501 *a = *l = (d->eob != !type);
504 void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
506 int b;
507 int type;
508 int has_2nd_order;
509 ENTROPY_CONTEXT_PLANES t_above, t_left;
510 ENTROPY_CONTEXT *ta;
511 ENTROPY_CONTEXT *tl;
513 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
514 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
516 ta = (ENTROPY_CONTEXT *)&t_above;
517 tl = (ENTROPY_CONTEXT *)&t_left;
519 has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
520 && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
521 type = has_2nd_order ? 0 : 3;
523 for (b = 0; b < 16; b++)
525 vp8_optimize_b(x, b, type,
526 ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
529 for (b = 16; b < 20; b++)
531 vp8_optimize_b(x, b, vp8_block2type[b],
532 ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
535 for (b = 20; b < 24; b++)
537 vp8_optimize_b(x, b, vp8_block2type[b],
538 ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
542 if (has_2nd_order)
544 b=24;
545 vp8_optimize_b(x, b, vp8_block2type[b],
546 ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
551 void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
553 int b;
554 int type;
555 int has_2nd_order;
557 ENTROPY_CONTEXT_PLANES t_above, t_left;
558 ENTROPY_CONTEXT *ta;
559 ENTROPY_CONTEXT *tl;
561 if (!x->e_mbd.above_context)
562 return;
564 if (!x->e_mbd.left_context)
565 return;
567 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
568 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
570 ta = (ENTROPY_CONTEXT *)&t_above;
571 tl = (ENTROPY_CONTEXT *)&t_left;
573 has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
574 && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
575 type = has_2nd_order ? 0 : 3;
577 for (b = 0; b < 16; b++)
579 vp8_optimize_b(x, b, type,
580 ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
584 if (has_2nd_order)
586 b=24;
587 vp8_optimize_b(x, b, vp8_block2type[b],
588 ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
592 void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
594 int b;
595 ENTROPY_CONTEXT_PLANES t_above, t_left;
596 ENTROPY_CONTEXT *ta;
597 ENTROPY_CONTEXT *tl;
599 if (!x->e_mbd.above_context)
600 return;
602 if (!x->e_mbd.left_context)
603 return;
605 vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
606 vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
608 ta = (ENTROPY_CONTEXT *)&t_above;
609 tl = (ENTROPY_CONTEXT *)&t_left;
611 for (b = 16; b < 20; b++)
613 vp8_optimize_b(x, b, vp8_block2type[b],
614 ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
617 for (b = 20; b < 24; b++)
619 vp8_optimize_b(x, b, vp8_block2type[b],
620 ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
624 #endif
626 void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
628 vp8_build_inter_predictors_mb(&x->e_mbd);
630 vp8_subtract_mb(rtcd, x);
632 vp8_transform_mb(x);
634 vp8_quantize_mb(x);
636 #if !(CONFIG_REALTIME_ONLY)
637 if (x->optimize)
638 vp8_optimize_mb(x, rtcd);
639 #endif
641 vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
643 RECON_INVOKE(&rtcd->common->recon, recon_mb)
644 (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
648 /* this funciton is used by first pass only */
649 void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
651 vp8_build_inter_predictors_mby(&x->e_mbd);
653 ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
655 vp8_transform_mby(x);
657 vp8_quantize_mby(x);
659 vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
661 RECON_INVOKE(&rtcd->common->recon, recon_mby)
662 (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
666 void vp8_encode_inter16x16uv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
668 vp8_build_inter_predictors_mbuv(&x->e_mbd);
670 ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
672 vp8_transform_mbuv(x);
674 vp8_quantize_mbuv(x);
676 vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
678 vp8_recon_intra_mbuv(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
682 void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
684 vp8_build_inter_predictors_mbuv(&x->e_mbd);
685 ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
687 vp8_transform_mbuv(x);
689 vp8_quantize_mbuv(x);