2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vpx_ports/config.h"
14 #include "reconinter.h"
19 #include "reconintra.h"
21 #include "vpx_mem/vpx_mem.h"
23 #if CONFIG_RUNTIME_CPU_DETECT
24 #define IF_RTCD(x) (x)
26 #define IF_RTCD(x) NULL
28 void vp8_subtract_b_c(BLOCK
*be
, BLOCKD
*bd
, int pitch
)
30 unsigned char *src_ptr
= (*(be
->base_src
) + be
->src
);
31 short *diff_ptr
= be
->src_diff
;
32 unsigned char *pred_ptr
= bd
->predictor
;
33 int src_stride
= be
->src_stride
;
37 for (r
= 0; r
< 4; r
++)
39 for (c
= 0; c
< 4; c
++)
41 diff_ptr
[c
] = src_ptr
[c
] - pred_ptr
[c
];
46 src_ptr
+= src_stride
;
50 void vp8_subtract_mbuv_c(short *diff
, unsigned char *usrc
, unsigned char *vsrc
, unsigned char *pred
, int stride
)
52 short *udiff
= diff
+ 256;
53 short *vdiff
= diff
+ 320;
54 unsigned char *upred
= pred
+ 256;
55 unsigned char *vpred
= pred
+ 320;
59 for (r
= 0; r
< 8; r
++)
61 for (c
= 0; c
< 8; c
++)
63 udiff
[c
] = usrc
[c
] - upred
[c
];
71 for (r
= 0; r
< 8; r
++)
73 for (c
= 0; c
< 8; c
++)
75 vdiff
[c
] = vsrc
[c
] - vpred
[c
];
84 void vp8_subtract_mby_c(short *diff
, unsigned char *src
, unsigned char *pred
, int stride
)
88 for (r
= 0; r
< 16; r
++)
90 for (c
= 0; c
< 16; c
++)
92 diff
[c
] = src
[c
] - pred
[c
];
101 static void vp8_subtract_mb(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
103 ENCODEMB_INVOKE(&rtcd
->encodemb
, submby
)(x
->src_diff
, x
->src
.y_buffer
, x
->e_mbd
.predictor
, x
->src
.y_stride
);
104 ENCODEMB_INVOKE(&rtcd
->encodemb
, submbuv
)(x
->src_diff
, x
->src
.u_buffer
, x
->src
.v_buffer
, x
->e_mbd
.predictor
, x
->src
.uv_stride
);
107 void vp8_build_dcblock(MACROBLOCK
*x
)
109 short *src_diff_ptr
= &x
->src_diff
[384];
112 for (i
= 0; i
< 16; i
++)
114 src_diff_ptr
[i
] = x
->coeff
[i
* 16];
118 void vp8_transform_mbuv(MACROBLOCK
*x
)
122 for (i
= 16; i
< 24; i
+= 2)
124 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
125 &x
->block
[i
].coeff
[0], 16);
130 void vp8_transform_intra_mby(MACROBLOCK
*x
)
134 for (i
= 0; i
< 16; i
+= 2)
136 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
137 &x
->block
[i
].coeff
[0], 32);
140 // build dc block from 16 y dc values
141 vp8_build_dcblock(x
);
143 // do 2nd order transform on the dc block
144 x
->short_walsh4x4(&x
->block
[24].src_diff
[0],
145 &x
->block
[24].coeff
[0], 8);
150 void vp8_transform_mb(MACROBLOCK
*x
)
154 for (i
= 0; i
< 16; i
+= 2)
156 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
157 &x
->block
[i
].coeff
[0], 32);
160 // build dc block from 16 y dc values
161 if (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
)
162 vp8_build_dcblock(x
);
164 for (i
= 16; i
< 24; i
+= 2)
166 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
167 &x
->block
[i
].coeff
[0], 16);
170 // do 2nd order transform on the dc block
171 if (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
)
172 x
->short_walsh4x4(&x
->block
[24].src_diff
[0],
173 &x
->block
[24].coeff
[0], 8);
177 void vp8_transform_mby(MACROBLOCK
*x
)
181 for (i
= 0; i
< 16; i
+= 2)
183 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
184 &x
->block
[i
].coeff
[0], 32);
187 // build dc block from 16 y dc values
188 if (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
)
190 vp8_build_dcblock(x
);
191 x
->short_walsh4x4(&x
->block
[24].src_diff
[0],
192 &x
->block
[24].coeff
[0], 8);
197 void vp8_stuff_inter16x16(MACROBLOCK
*x
)
199 vp8_build_inter_predictors_mb_s(&x
->e_mbd
);
201 // recon = copy from predictors to destination
203 BLOCKD *b = &x->e_mbd.block[0];
204 unsigned char *pred_ptr = b->predictor;
205 unsigned char *dst_ptr = *(b->base_dst) + b->dst;
206 int stride = b->dst_stride;
210 vpx_memcpy(dst_ptr+i*stride,pred_ptr+16*i,16);
212 b = &x->e_mbd.block[16];
213 pred_ptr = b->predictor;
214 dst_ptr = *(b->base_dst) + b->dst;
215 stride = b->dst_stride;
218 vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
220 b = &x->e_mbd.block[20];
221 pred_ptr = b->predictor;
222 dst_ptr = *(b->base_dst) + b->dst;
223 stride = b->dst_stride;
226 vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
231 #if !(CONFIG_REALTIME_ONLY)
232 #define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
233 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
235 typedef struct vp8_token_state vp8_token_state
;
237 struct vp8_token_state
{
245 // TODO: experiments to find optimal multiple numbers
248 #define Y2_RD_MULT 16
250 static const int plane_rd_mult
[4]=
258 void vp8_optimize_b(MACROBLOCK
*mb
, int ib
, int type
,
259 ENTROPY_CONTEXT
*a
, ENTROPY_CONTEXT
*l
,
260 const VP8_ENCODER_RTCD
*rtcd
)
264 vp8_token_state tokens
[17][2];
265 unsigned best_mask
[2];
266 const short *dequant_ptr
;
267 const short *coeff_ptr
;
291 int err_mult
= plane_rd_mult
[type
];
294 d
= &mb
->e_mbd
.block
[ib
];
296 /* Enable this to test the effect of RDO as a replacement for the dynamic
297 * zero bin instead of an augmentation of it.
300 vp8_strict_quantize_b(b
, d
);
303 dequant_ptr
= d
->dequant
;
304 coeff_ptr
= b
->coeff
;
305 qcoeff_ptr
= d
->qcoeff
;
306 dqcoeff_ptr
= d
->dqcoeff
;
310 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
311 rdmult
= mb
->rdmult
* err_mult
;
312 if(mb
->e_mbd
.mode_info_context
->mbmi
.ref_frame
==INTRA_FRAME
)
313 rdmult
= (rdmult
* 9)>>4;
316 best_mask
[0] = best_mask
[1] = 0;
317 /* Initialize the sentinel node of the trellis. */
318 tokens
[eob
][0].rate
= 0;
319 tokens
[eob
][0].error
= 0;
320 tokens
[eob
][0].next
= 16;
321 tokens
[eob
][0].token
= DCT_EOB_TOKEN
;
322 tokens
[eob
][0].qc
= 0;
323 *(tokens
[eob
] + 1) = *(tokens
[eob
] + 0);
325 for (i
= eob
; i
-- > i0
;)
331 rc
= vp8_default_zig_zag1d
[i
];
333 /* Only add a trellis state for non-zero coefficients. */
337 error0
= tokens
[next
][0].error
;
338 error1
= tokens
[next
][1].error
;
339 /* Evaluate the first possibility for this state. */
340 rate0
= tokens
[next
][0].rate
;
341 rate1
= tokens
[next
][1].rate
;
342 t0
= (vp8_dct_value_tokens_ptr
+ x
)->Token
;
343 /* Consider both possible successor states. */
346 band
= vp8_coef_bands
[i
+ 1];
347 pt
= vp8_prev_token_class
[t0
];
349 mb
->token_costs
[type
][band
][pt
][tokens
[next
][0].token
];
351 mb
->token_costs
[type
][band
][pt
][tokens
[next
][1].token
];
353 rd_cost0
= RDCOST(rdmult
, rddiv
, rate0
, error0
);
354 rd_cost1
= RDCOST(rdmult
, rddiv
, rate1
, error1
);
355 if (rd_cost0
== rd_cost1
)
357 rd_cost0
= RDTRUNC(rdmult
, rddiv
, rate0
, error0
);
358 rd_cost1
= RDTRUNC(rdmult
, rddiv
, rate1
, error1
);
360 /* And pick the best. */
361 best
= rd_cost1
< rd_cost0
;
362 base_bits
= *(vp8_dct_value_cost_ptr
+ x
);
363 dx
= dqcoeff_ptr
[rc
] - coeff_ptr
[rc
];
365 tokens
[i
][0].rate
= base_bits
+ (best
? rate1
: rate0
);
366 tokens
[i
][0].error
= d2
+ (best
? error1
: error0
);
367 tokens
[i
][0].next
= next
;
368 tokens
[i
][0].token
= t0
;
370 best_mask
[0] |= best
<< i
;
371 /* Evaluate the second possibility for this state. */
372 rate0
= tokens
[next
][0].rate
;
373 rate1
= tokens
[next
][1].rate
;
375 if((abs(x
)*dequant_ptr
[rc
]>abs(coeff_ptr
[rc
])) &&
376 (abs(x
)*dequant_ptr
[rc
]<abs(coeff_ptr
[rc
])+dequant_ptr
[rc
]))
387 /* Consider both possible successor states. */
390 /* If we reduced this coefficient to zero, check to see if
391 * we need to move the EOB back here.
393 t0
= tokens
[next
][0].token
== DCT_EOB_TOKEN
?
394 DCT_EOB_TOKEN
: ZERO_TOKEN
;
395 t1
= tokens
[next
][1].token
== DCT_EOB_TOKEN
?
396 DCT_EOB_TOKEN
: ZERO_TOKEN
;
400 t0
=t1
= (vp8_dct_value_tokens_ptr
+ x
)->Token
;
404 band
= vp8_coef_bands
[i
+ 1];
405 if(t0
!=DCT_EOB_TOKEN
)
407 pt
= vp8_prev_token_class
[t0
];
408 rate0
+= mb
->token_costs
[type
][band
][pt
][
409 tokens
[next
][0].token
];
411 if(t1
!=DCT_EOB_TOKEN
)
413 pt
= vp8_prev_token_class
[t1
];
414 rate1
+= mb
->token_costs
[type
][band
][pt
][
415 tokens
[next
][1].token
];
419 rd_cost0
= RDCOST(rdmult
, rddiv
, rate0
, error0
);
420 rd_cost1
= RDCOST(rdmult
, rddiv
, rate1
, error1
);
421 if (rd_cost0
== rd_cost1
)
423 rd_cost0
= RDTRUNC(rdmult
, rddiv
, rate0
, error0
);
424 rd_cost1
= RDTRUNC(rdmult
, rddiv
, rate1
, error1
);
426 /* And pick the best. */
427 best
= rd_cost1
< rd_cost0
;
428 base_bits
= *(vp8_dct_value_cost_ptr
+ x
);
432 dx
-= (dequant_ptr
[rc
] + sz
) ^ sz
;
435 tokens
[i
][1].rate
= base_bits
+ (best
? rate1
: rate0
);
436 tokens
[i
][1].error
= d2
+ (best
? error1
: error0
);
437 tokens
[i
][1].next
= next
;
438 tokens
[i
][1].token
=best
?t1
:t0
;
440 best_mask
[1] |= best
<< i
;
441 /* Finally, make this the new head of the trellis. */
444 /* There's no choice to make for a zero coefficient, so we don't
445 * add a new trellis node, but we do need to update the costs.
449 band
= vp8_coef_bands
[i
+ 1];
450 t0
= tokens
[next
][0].token
;
451 t1
= tokens
[next
][1].token
;
452 /* Update the cost of each path if we're past the EOB token. */
453 if (t0
!= DCT_EOB_TOKEN
)
455 tokens
[next
][0].rate
+= mb
->token_costs
[type
][band
][0][t0
];
456 tokens
[next
][0].token
= ZERO_TOKEN
;
458 if (t1
!= DCT_EOB_TOKEN
)
460 tokens
[next
][1].rate
+= mb
->token_costs
[type
][band
][0][t1
];
461 tokens
[next
][1].token
= ZERO_TOKEN
;
463 /* Don't update next, because we didn't add a new node. */
467 /* Now pick the best path through the whole trellis. */
468 band
= vp8_coef_bands
[i
+ 1];
469 VP8_COMBINEENTROPYCONTEXTS(pt
, *a
, *l
);
470 rate0
= tokens
[next
][0].rate
;
471 rate1
= tokens
[next
][1].rate
;
472 error0
= tokens
[next
][0].error
;
473 error1
= tokens
[next
][1].error
;
474 t0
= tokens
[next
][0].token
;
475 t1
= tokens
[next
][1].token
;
476 rate0
+= mb
->token_costs
[type
][band
][pt
][t0
];
477 rate1
+= mb
->token_costs
[type
][band
][pt
][t1
];
478 rd_cost0
= RDCOST(rdmult
, rddiv
, rate0
, error0
);
479 rd_cost1
= RDCOST(rdmult
, rddiv
, rate1
, error1
);
480 if (rd_cost0
== rd_cost1
)
482 rd_cost0
= RDTRUNC(rdmult
, rddiv
, rate0
, error0
);
483 rd_cost1
= RDTRUNC(rdmult
, rddiv
, rate1
, error1
);
485 best
= rd_cost1
< rd_cost0
;
487 for (i
= next
; i
< eob
; i
= next
)
489 x
= tokens
[i
][best
].qc
;
492 rc
= vp8_default_zig_zag1d
[i
];
494 dqcoeff_ptr
[rc
] = x
* dequant_ptr
[rc
];
495 next
= tokens
[i
][best
].next
;
496 best
= (best_mask
[best
] >> i
) & 1;
501 *a
= *l
= (d
->eob
!= !type
);
504 void vp8_optimize_mb(MACROBLOCK
*x
, const VP8_ENCODER_RTCD
*rtcd
)
509 ENTROPY_CONTEXT_PLANES t_above
, t_left
;
513 vpx_memcpy(&t_above
, x
->e_mbd
.above_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
514 vpx_memcpy(&t_left
, x
->e_mbd
.left_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
516 ta
= (ENTROPY_CONTEXT
*)&t_above
;
517 tl
= (ENTROPY_CONTEXT
*)&t_left
;
519 has_2nd_order
= (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= B_PRED
520 && x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
);
521 type
= has_2nd_order
? 0 : 3;
523 for (b
= 0; b
< 16; b
++)
525 vp8_optimize_b(x
, b
, type
,
526 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
529 for (b
= 16; b
< 20; b
++)
531 vp8_optimize_b(x
, b
, vp8_block2type
[b
],
532 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
535 for (b
= 20; b
< 24; b
++)
537 vp8_optimize_b(x
, b
, vp8_block2type
[b
],
538 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
545 vp8_optimize_b(x
, b
, vp8_block2type
[b
],
546 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
551 void vp8_optimize_mby(MACROBLOCK
*x
, const VP8_ENCODER_RTCD
*rtcd
)
557 ENTROPY_CONTEXT_PLANES t_above
, t_left
;
561 if (!x
->e_mbd
.above_context
)
564 if (!x
->e_mbd
.left_context
)
567 vpx_memcpy(&t_above
, x
->e_mbd
.above_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
568 vpx_memcpy(&t_left
, x
->e_mbd
.left_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
570 ta
= (ENTROPY_CONTEXT
*)&t_above
;
571 tl
= (ENTROPY_CONTEXT
*)&t_left
;
573 has_2nd_order
= (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= B_PRED
574 && x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
);
575 type
= has_2nd_order
? 0 : 3;
577 for (b
= 0; b
< 16; b
++)
579 vp8_optimize_b(x
, b
, type
,
580 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
587 vp8_optimize_b(x
, b
, vp8_block2type
[b
],
588 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
592 void vp8_optimize_mbuv(MACROBLOCK
*x
, const VP8_ENCODER_RTCD
*rtcd
)
595 ENTROPY_CONTEXT_PLANES t_above
, t_left
;
599 if (!x
->e_mbd
.above_context
)
602 if (!x
->e_mbd
.left_context
)
605 vpx_memcpy(&t_above
, x
->e_mbd
.above_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
606 vpx_memcpy(&t_left
, x
->e_mbd
.left_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
608 ta
= (ENTROPY_CONTEXT
*)&t_above
;
609 tl
= (ENTROPY_CONTEXT
*)&t_left
;
611 for (b
= 16; b
< 20; b
++)
613 vp8_optimize_b(x
, b
, vp8_block2type
[b
],
614 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
617 for (b
= 20; b
< 24; b
++)
619 vp8_optimize_b(x
, b
, vp8_block2type
[b
],
620 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
626 void vp8_encode_inter16x16(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
628 vp8_build_inter_predictors_mb(&x
->e_mbd
);
630 vp8_subtract_mb(rtcd
, x
);
636 #if !(CONFIG_REALTIME_ONLY)
638 vp8_optimize_mb(x
, rtcd
);
641 vp8_inverse_transform_mb(IF_RTCD(&rtcd
->common
->idct
), &x
->e_mbd
);
643 RECON_INVOKE(&rtcd
->common
->recon
, recon_mb
)
644 (IF_RTCD(&rtcd
->common
->recon
), &x
->e_mbd
);
648 /* this funciton is used by first pass only */
649 void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
651 vp8_build_inter_predictors_mby(&x
->e_mbd
);
653 ENCODEMB_INVOKE(&rtcd
->encodemb
, submby
)(x
->src_diff
, x
->src
.y_buffer
, x
->e_mbd
.predictor
, x
->src
.y_stride
);
655 vp8_transform_mby(x
);
659 vp8_inverse_transform_mby(IF_RTCD(&rtcd
->common
->idct
), &x
->e_mbd
);
661 RECON_INVOKE(&rtcd
->common
->recon
, recon_mby
)
662 (IF_RTCD(&rtcd
->common
->recon
), &x
->e_mbd
);
666 void vp8_encode_inter16x16uv(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
668 vp8_build_inter_predictors_mbuv(&x
->e_mbd
);
670 ENCODEMB_INVOKE(&rtcd
->encodemb
, submbuv
)(x
->src_diff
, x
->src
.u_buffer
, x
->src
.v_buffer
, x
->e_mbd
.predictor
, x
->src
.uv_stride
);
672 vp8_transform_mbuv(x
);
674 vp8_quantize_mbuv(x
);
676 vp8_inverse_transform_mbuv(IF_RTCD(&rtcd
->common
->idct
), &x
->e_mbd
);
678 vp8_recon_intra_mbuv(IF_RTCD(&rtcd
->common
->recon
), &x
->e_mbd
);
682 void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
684 vp8_build_inter_predictors_mbuv(&x
->e_mbd
);
685 ENCODEMB_INVOKE(&rtcd
->encodemb
, submbuv
)(x
->src_diff
, x
->src
.u_buffer
, x
->src
.v_buffer
, x
->e_mbd
.predictor
, x
->src
.uv_stride
);
687 vp8_transform_mbuv(x
);
689 vp8_quantize_mbuv(x
);