2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
12 #include "vpx_ports/config.h"
14 #include "vp8/common/reconinter.h"
17 #include "vp8/common/invtrans.h"
18 #include "vp8/common/recon.h"
19 #include "vp8/common/reconintra.h"
21 #include "vpx_mem/vpx_mem.h"
24 #if CONFIG_RUNTIME_CPU_DETECT
25 #define IF_RTCD(x) (x)
27 #define IF_RTCD(x) NULL
29 void vp8_subtract_b_c(BLOCK
*be
, BLOCKD
*bd
, int pitch
)
31 unsigned char *src_ptr
= (*(be
->base_src
) + be
->src
);
32 short *diff_ptr
= be
->src_diff
;
33 unsigned char *pred_ptr
= bd
->predictor
;
34 int src_stride
= be
->src_stride
;
38 for (r
= 0; r
< 4; r
++)
40 for (c
= 0; c
< 4; c
++)
42 diff_ptr
[c
] = src_ptr
[c
] - pred_ptr
[c
];
47 src_ptr
+= src_stride
;
51 void vp8_subtract_mbuv_c(short *diff
, unsigned char *usrc
, unsigned char *vsrc
, unsigned char *pred
, int stride
)
53 short *udiff
= diff
+ 256;
54 short *vdiff
= diff
+ 320;
55 unsigned char *upred
= pred
+ 256;
56 unsigned char *vpred
= pred
+ 320;
60 for (r
= 0; r
< 8; r
++)
62 for (c
= 0; c
< 8; c
++)
64 udiff
[c
] = usrc
[c
] - upred
[c
];
72 for (r
= 0; r
< 8; r
++)
74 for (c
= 0; c
< 8; c
++)
76 vdiff
[c
] = vsrc
[c
] - vpred
[c
];
85 void vp8_subtract_mby_c(short *diff
, unsigned char *src
, unsigned char *pred
, int stride
)
89 for (r
= 0; r
< 16; r
++)
91 for (c
= 0; c
< 16; c
++)
93 diff
[c
] = src
[c
] - pred
[c
];
102 static void vp8_subtract_mb(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
104 ENCODEMB_INVOKE(&rtcd
->encodemb
, submby
)(x
->src_diff
, x
->src
.y_buffer
, x
->e_mbd
.predictor
, x
->src
.y_stride
);
105 ENCODEMB_INVOKE(&rtcd
->encodemb
, submbuv
)(x
->src_diff
, x
->src
.u_buffer
, x
->src
.v_buffer
, x
->e_mbd
.predictor
, x
->src
.uv_stride
);
108 static void build_dcblock(MACROBLOCK
*x
)
110 short *src_diff_ptr
= &x
->src_diff
[384];
113 for (i
= 0; i
< 16; i
++)
115 src_diff_ptr
[i
] = x
->coeff
[i
* 16];
119 void vp8_transform_mbuv(MACROBLOCK
*x
)
123 for (i
= 16; i
< 24; i
+= 2)
125 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
126 &x
->block
[i
].coeff
[0], 16);
131 void vp8_transform_intra_mby(MACROBLOCK
*x
)
135 for (i
= 0; i
< 16; i
+= 2)
137 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
138 &x
->block
[i
].coeff
[0], 32);
141 // build dc block from 16 y dc values
144 // do 2nd order transform on the dc block
145 x
->short_walsh4x4(&x
->block
[24].src_diff
[0],
146 &x
->block
[24].coeff
[0], 8);
151 static void transform_mb(MACROBLOCK
*x
)
155 for (i
= 0; i
< 16; i
+= 2)
157 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
158 &x
->block
[i
].coeff
[0], 32);
161 // build dc block from 16 y dc values
162 if (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
)
165 for (i
= 16; i
< 24; i
+= 2)
167 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
168 &x
->block
[i
].coeff
[0], 16);
171 // do 2nd order transform on the dc block
172 if (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
)
173 x
->short_walsh4x4(&x
->block
[24].src_diff
[0],
174 &x
->block
[24].coeff
[0], 8);
179 static void transform_mby(MACROBLOCK
*x
)
183 for (i
= 0; i
< 16; i
+= 2)
185 x
->vp8_short_fdct8x4(&x
->block
[i
].src_diff
[0],
186 &x
->block
[i
].coeff
[0], 32);
189 // build dc block from 16 y dc values
190 if (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
)
193 x
->short_walsh4x4(&x
->block
[24].src_diff
[0],
194 &x
->block
[24].coeff
[0], 8);
199 void vp8_stuff_inter16x16(MACROBLOCK
*x
)
201 vp8_build_inter_predictors_mb_s(&x
->e_mbd
);
203 // recon = copy from predictors to destination
205 BLOCKD *b = &x->e_mbd.block[0];
206 unsigned char *pred_ptr = b->predictor;
207 unsigned char *dst_ptr = *(b->base_dst) + b->dst;
208 int stride = b->dst_stride;
212 vpx_memcpy(dst_ptr+i*stride,pred_ptr+16*i,16);
214 b = &x->e_mbd.block[16];
215 pred_ptr = b->predictor;
216 dst_ptr = *(b->base_dst) + b->dst;
217 stride = b->dst_stride;
220 vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
222 b = &x->e_mbd.block[20];
223 pred_ptr = b->predictor;
224 dst_ptr = *(b->base_dst) + b->dst;
225 stride = b->dst_stride;
228 vpx_memcpy(dst_ptr+i*stride,pred_ptr+8*i,8);
233 #if !(CONFIG_REALTIME_ONLY)
234 #define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
236 typedef struct vp8_token_state vp8_token_state
;
238 struct vp8_token_state
{
246 // TODO: experiments to find optimal multiple numbers
249 #define Y2_RD_MULT 16
251 static const int plane_rd_mult
[4]=
259 static void optimize_b(MACROBLOCK
*mb
, int ib
, int type
,
260 ENTROPY_CONTEXT
*a
, ENTROPY_CONTEXT
*l
,
261 const VP8_ENCODER_RTCD
*rtcd
)
265 vp8_token_state tokens
[17][2];
266 unsigned best_mask
[2];
267 const short *dequant_ptr
;
268 const short *coeff_ptr
;
292 int err_mult
= plane_rd_mult
[type
];
295 d
= &mb
->e_mbd
.block
[ib
];
297 /* Enable this to test the effect of RDO as a replacement for the dynamic
298 * zero bin instead of an augmentation of it.
301 vp8_strict_quantize_b(b
, d
);
304 dequant_ptr
= d
->dequant
;
305 coeff_ptr
= b
->coeff
;
306 qcoeff_ptr
= d
->qcoeff
;
307 dqcoeff_ptr
= d
->dqcoeff
;
311 /* Now set up a Viterbi trellis to evaluate alternative roundings. */
312 rdmult
= mb
->rdmult
* err_mult
;
313 if(mb
->e_mbd
.mode_info_context
->mbmi
.ref_frame
==INTRA_FRAME
)
314 rdmult
= (rdmult
* 9)>>4;
317 best_mask
[0] = best_mask
[1] = 0;
318 /* Initialize the sentinel node of the trellis. */
319 tokens
[eob
][0].rate
= 0;
320 tokens
[eob
][0].error
= 0;
321 tokens
[eob
][0].next
= 16;
322 tokens
[eob
][0].token
= DCT_EOB_TOKEN
;
323 tokens
[eob
][0].qc
= 0;
324 *(tokens
[eob
] + 1) = *(tokens
[eob
] + 0);
326 for (i
= eob
; i
-- > i0
;)
332 rc
= vp8_default_zig_zag1d
[i
];
334 /* Only add a trellis state for non-zero coefficients. */
338 error0
= tokens
[next
][0].error
;
339 error1
= tokens
[next
][1].error
;
340 /* Evaluate the first possibility for this state. */
341 rate0
= tokens
[next
][0].rate
;
342 rate1
= tokens
[next
][1].rate
;
343 t0
= (vp8_dct_value_tokens_ptr
+ x
)->Token
;
344 /* Consider both possible successor states. */
347 band
= vp8_coef_bands
[i
+ 1];
348 pt
= vp8_prev_token_class
[t0
];
350 mb
->token_costs
[type
][band
][pt
][tokens
[next
][0].token
];
352 mb
->token_costs
[type
][band
][pt
][tokens
[next
][1].token
];
354 rd_cost0
= RDCOST(rdmult
, rddiv
, rate0
, error0
);
355 rd_cost1
= RDCOST(rdmult
, rddiv
, rate1
, error1
);
356 if (rd_cost0
== rd_cost1
)
358 rd_cost0
= RDTRUNC(rdmult
, rddiv
, rate0
, error0
);
359 rd_cost1
= RDTRUNC(rdmult
, rddiv
, rate1
, error1
);
361 /* And pick the best. */
362 best
= rd_cost1
< rd_cost0
;
363 base_bits
= *(vp8_dct_value_cost_ptr
+ x
);
364 dx
= dqcoeff_ptr
[rc
] - coeff_ptr
[rc
];
366 tokens
[i
][0].rate
= base_bits
+ (best
? rate1
: rate0
);
367 tokens
[i
][0].error
= d2
+ (best
? error1
: error0
);
368 tokens
[i
][0].next
= next
;
369 tokens
[i
][0].token
= t0
;
371 best_mask
[0] |= best
<< i
;
372 /* Evaluate the second possibility for this state. */
373 rate0
= tokens
[next
][0].rate
;
374 rate1
= tokens
[next
][1].rate
;
376 if((abs(x
)*dequant_ptr
[rc
]>abs(coeff_ptr
[rc
])) &&
377 (abs(x
)*dequant_ptr
[rc
]<abs(coeff_ptr
[rc
])+dequant_ptr
[rc
]))
388 /* Consider both possible successor states. */
391 /* If we reduced this coefficient to zero, check to see if
392 * we need to move the EOB back here.
394 t0
= tokens
[next
][0].token
== DCT_EOB_TOKEN
?
395 DCT_EOB_TOKEN
: ZERO_TOKEN
;
396 t1
= tokens
[next
][1].token
== DCT_EOB_TOKEN
?
397 DCT_EOB_TOKEN
: ZERO_TOKEN
;
401 t0
=t1
= (vp8_dct_value_tokens_ptr
+ x
)->Token
;
405 band
= vp8_coef_bands
[i
+ 1];
406 if(t0
!=DCT_EOB_TOKEN
)
408 pt
= vp8_prev_token_class
[t0
];
409 rate0
+= mb
->token_costs
[type
][band
][pt
][
410 tokens
[next
][0].token
];
412 if(t1
!=DCT_EOB_TOKEN
)
414 pt
= vp8_prev_token_class
[t1
];
415 rate1
+= mb
->token_costs
[type
][band
][pt
][
416 tokens
[next
][1].token
];
420 rd_cost0
= RDCOST(rdmult
, rddiv
, rate0
, error0
);
421 rd_cost1
= RDCOST(rdmult
, rddiv
, rate1
, error1
);
422 if (rd_cost0
== rd_cost1
)
424 rd_cost0
= RDTRUNC(rdmult
, rddiv
, rate0
, error0
);
425 rd_cost1
= RDTRUNC(rdmult
, rddiv
, rate1
, error1
);
427 /* And pick the best. */
428 best
= rd_cost1
< rd_cost0
;
429 base_bits
= *(vp8_dct_value_cost_ptr
+ x
);
433 dx
-= (dequant_ptr
[rc
] + sz
) ^ sz
;
436 tokens
[i
][1].rate
= base_bits
+ (best
? rate1
: rate0
);
437 tokens
[i
][1].error
= d2
+ (best
? error1
: error0
);
438 tokens
[i
][1].next
= next
;
439 tokens
[i
][1].token
=best
?t1
:t0
;
441 best_mask
[1] |= best
<< i
;
442 /* Finally, make this the new head of the trellis. */
445 /* There's no choice to make for a zero coefficient, so we don't
446 * add a new trellis node, but we do need to update the costs.
450 band
= vp8_coef_bands
[i
+ 1];
451 t0
= tokens
[next
][0].token
;
452 t1
= tokens
[next
][1].token
;
453 /* Update the cost of each path if we're past the EOB token. */
454 if (t0
!= DCT_EOB_TOKEN
)
456 tokens
[next
][0].rate
+= mb
->token_costs
[type
][band
][0][t0
];
457 tokens
[next
][0].token
= ZERO_TOKEN
;
459 if (t1
!= DCT_EOB_TOKEN
)
461 tokens
[next
][1].rate
+= mb
->token_costs
[type
][band
][0][t1
];
462 tokens
[next
][1].token
= ZERO_TOKEN
;
464 /* Don't update next, because we didn't add a new node. */
468 /* Now pick the best path through the whole trellis. */
469 band
= vp8_coef_bands
[i
+ 1];
470 VP8_COMBINEENTROPYCONTEXTS(pt
, *a
, *l
);
471 rate0
= tokens
[next
][0].rate
;
472 rate1
= tokens
[next
][1].rate
;
473 error0
= tokens
[next
][0].error
;
474 error1
= tokens
[next
][1].error
;
475 t0
= tokens
[next
][0].token
;
476 t1
= tokens
[next
][1].token
;
477 rate0
+= mb
->token_costs
[type
][band
][pt
][t0
];
478 rate1
+= mb
->token_costs
[type
][band
][pt
][t1
];
479 rd_cost0
= RDCOST(rdmult
, rddiv
, rate0
, error0
);
480 rd_cost1
= RDCOST(rdmult
, rddiv
, rate1
, error1
);
481 if (rd_cost0
== rd_cost1
)
483 rd_cost0
= RDTRUNC(rdmult
, rddiv
, rate0
, error0
);
484 rd_cost1
= RDTRUNC(rdmult
, rddiv
, rate1
, error1
);
486 best
= rd_cost1
< rd_cost0
;
488 for (i
= next
; i
< eob
; i
= next
)
490 x
= tokens
[i
][best
].qc
;
493 rc
= vp8_default_zig_zag1d
[i
];
495 dqcoeff_ptr
[rc
] = x
* dequant_ptr
[rc
];
496 next
= tokens
[i
][best
].next
;
497 best
= (best_mask
[best
] >> i
) & 1;
502 *a
= *l
= (d
->eob
!= !type
);
505 static void optimize_mb(MACROBLOCK
*x
, const VP8_ENCODER_RTCD
*rtcd
)
510 ENTROPY_CONTEXT_PLANES t_above
, t_left
;
514 vpx_memcpy(&t_above
, x
->e_mbd
.above_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
515 vpx_memcpy(&t_left
, x
->e_mbd
.left_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
517 ta
= (ENTROPY_CONTEXT
*)&t_above
;
518 tl
= (ENTROPY_CONTEXT
*)&t_left
;
520 has_2nd_order
= (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= B_PRED
521 && x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
);
522 type
= has_2nd_order
? PLANE_TYPE_Y_NO_DC
: PLANE_TYPE_Y_WITH_DC
;
524 for (b
= 0; b
< 16; b
++)
526 optimize_b(x
, b
, type
,
527 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
530 for (b
= 16; b
< 24; b
++)
532 optimize_b(x
, b
, PLANE_TYPE_UV
,
533 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
539 optimize_b(x
, b
, PLANE_TYPE_Y2
,
540 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
545 void vp8_optimize_mby(MACROBLOCK
*x
, const VP8_ENCODER_RTCD
*rtcd
)
551 ENTROPY_CONTEXT_PLANES t_above
, t_left
;
555 if (!x
->e_mbd
.above_context
)
558 if (!x
->e_mbd
.left_context
)
561 vpx_memcpy(&t_above
, x
->e_mbd
.above_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
562 vpx_memcpy(&t_left
, x
->e_mbd
.left_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
564 ta
= (ENTROPY_CONTEXT
*)&t_above
;
565 tl
= (ENTROPY_CONTEXT
*)&t_left
;
567 has_2nd_order
= (x
->e_mbd
.mode_info_context
->mbmi
.mode
!= B_PRED
568 && x
->e_mbd
.mode_info_context
->mbmi
.mode
!= SPLITMV
);
569 type
= has_2nd_order
? PLANE_TYPE_Y_NO_DC
: PLANE_TYPE_Y_WITH_DC
;
571 for (b
= 0; b
< 16; b
++)
573 optimize_b(x
, b
, type
,
574 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
581 optimize_b(x
, b
, PLANE_TYPE_Y2
,
582 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
586 void vp8_optimize_mbuv(MACROBLOCK
*x
, const VP8_ENCODER_RTCD
*rtcd
)
589 ENTROPY_CONTEXT_PLANES t_above
, t_left
;
593 if (!x
->e_mbd
.above_context
)
596 if (!x
->e_mbd
.left_context
)
599 vpx_memcpy(&t_above
, x
->e_mbd
.above_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
600 vpx_memcpy(&t_left
, x
->e_mbd
.left_context
, sizeof(ENTROPY_CONTEXT_PLANES
));
602 ta
= (ENTROPY_CONTEXT
*)&t_above
;
603 tl
= (ENTROPY_CONTEXT
*)&t_left
;
605 for (b
= 16; b
< 24; b
++)
607 optimize_b(x
, b
, PLANE_TYPE_UV
,
608 ta
+ vp8_block2above
[b
], tl
+ vp8_block2left
[b
], rtcd
);
613 void vp8_encode_inter16x16(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
615 vp8_build_inter_predictors_mb(&x
->e_mbd
);
617 vp8_subtract_mb(rtcd
, x
);
623 #if !(CONFIG_REALTIME_ONLY)
625 optimize_mb(x
, rtcd
);
628 vp8_inverse_transform_mb(IF_RTCD(&rtcd
->common
->idct
), &x
->e_mbd
);
630 RECON_INVOKE(&rtcd
->common
->recon
, recon_mb
)
631 (IF_RTCD(&rtcd
->common
->recon
), &x
->e_mbd
);
635 /* this funciton is used by first pass only */
636 void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
638 vp8_build_inter_predictors_mby(&x
->e_mbd
);
640 ENCODEMB_INVOKE(&rtcd
->encodemb
, submby
)(x
->src_diff
, x
->src
.y_buffer
, x
->e_mbd
.predictor
, x
->src
.y_stride
);
646 vp8_inverse_transform_mby(IF_RTCD(&rtcd
->common
->idct
), &x
->e_mbd
);
648 RECON_INVOKE(&rtcd
->common
->recon
, recon_mby
)
649 (IF_RTCD(&rtcd
->common
->recon
), &x
->e_mbd
);
653 void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD
*rtcd
, MACROBLOCK
*x
)
655 vp8_build_inter_predictors_mbuv(&x
->e_mbd
);
656 ENCODEMB_INVOKE(&rtcd
->encodemb
, submbuv
)(x
->src_diff
, x
->src
.u_buffer
, x
->src
.v_buffer
, x
->e_mbd
.predictor
, x
->src
.uv_stride
);
658 vp8_transform_mbuv(x
);
660 vp8_quantize_mbuv(x
);