Cast: Stop logging kVideoFrameSentToEncoder and rename a couple events.
[chromium-blink-merge.git] / third_party / libwebp / enc / quant.c
blobe1d202b5a3b500758645b7a089369e4710b3eeb0
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // Quantization
12 // Author: Skal (pascal.massimino@gmail.com)
14 #include <assert.h>
15 #include <math.h>
16 #include <stdlib.h> // for abs()
18 #include "./vp8enci.h"
19 #include "./cost.h"
21 #define DO_TRELLIS_I4 1
22 #define DO_TRELLIS_I16 1 // not a huge gain, but ok at low bitrate.
23 #define DO_TRELLIS_UV 0 // disable trellis for UV. Risky. Not worth.
24 #define USE_TDISTO 1
26 #define MID_ALPHA 64 // neutral value for susceptibility
27 #define MIN_ALPHA 30 // lowest usable value for susceptibility
28 #define MAX_ALPHA 100 // higher meaningful value for susceptibility
30 #define SNS_TO_DQ 0.9 // Scaling constant between the sns value and the QP
31 // power-law modulation. Must be strictly less than 1.
33 #define I4_PENALTY 4000 // Rate-penalty for quick i4/i16 decision
35 // number of non-zero coeffs below which we consider the block very flat
36 // (and apply a penalty to complex predictions)
37 #define FLATNESS_LIMIT_I16 10 // I16 mode
38 #define FLATNESS_LIMIT_I4 3 // I4 mode
39 #define FLATNESS_LIMIT_UV 2 // UV mode
40 #define FLATNESS_PENALTY 140 // roughly ~1bit per block
42 #define MULT_8B(a, b) (((a) * (b) + 128) >> 8)
44 // #define DEBUG_BLOCK
46 //------------------------------------------------------------------------------
48 #if defined(DEBUG_BLOCK)
50 #include <stdio.h>
51 #include <stdlib.h>
53 static void PrintBlockInfo(const VP8EncIterator* const it,
54 const VP8ModeScore* const rd) {
55 int i, j;
56 const int is_i16 = (it->mb_->type_ == 1);
57 printf("SOURCE / OUTPUT / ABS DELTA\n");
58 for (j = 0; j < 24; ++j) {
59 if (j == 16) printf("\n"); // newline before the U/V block
60 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_in_[i + j * BPS]);
61 printf(" ");
62 for (i = 0; i < 16; ++i) printf("%3d ", it->yuv_out_[i + j * BPS]);
63 printf(" ");
64 for (i = 0; i < 16; ++i) {
65 printf("%1d ", abs(it->yuv_out_[i + j * BPS] - it->yuv_in_[i + j * BPS]));
67 printf("\n");
69 printf("\nD:%d SD:%d R:%d H:%d nz:0x%x score:%d\n",
70 (int)rd->D, (int)rd->SD, (int)rd->R, (int)rd->H, (int)rd->nz,
71 (int)rd->score);
72 if (is_i16) {
73 printf("Mode: %d\n", rd->mode_i16);
74 printf("y_dc_levels:");
75 for (i = 0; i < 16; ++i) printf("%3d ", rd->y_dc_levels[i]);
76 printf("\n");
77 } else {
78 printf("Modes[16]: ");
79 for (i = 0; i < 16; ++i) printf("%d ", rd->modes_i4[i]);
80 printf("\n");
82 printf("y_ac_levels:\n");
83 for (j = 0; j < 16; ++j) {
84 for (i = is_i16 ? 1 : 0; i < 16; ++i) {
85 printf("%4d ", rd->y_ac_levels[j][i]);
87 printf("\n");
89 printf("\n");
90 printf("uv_levels (mode=%d):\n", rd->mode_uv);
91 for (j = 0; j < 8; ++j) {
92 for (i = 0; i < 16; ++i) {
93 printf("%4d ", rd->uv_levels[j][i]);
95 printf("\n");
99 #endif // DEBUG_BLOCK
101 //------------------------------------------------------------------------------
103 static WEBP_INLINE int clip(int v, int m, int M) {
104 return v < m ? m : v > M ? M : v;
107 static const uint8_t kZigzag[16] = {
108 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
111 static const uint8_t kDcTable[128] = {
112 4, 5, 6, 7, 8, 9, 10, 10,
113 11, 12, 13, 14, 15, 16, 17, 17,
114 18, 19, 20, 20, 21, 21, 22, 22,
115 23, 23, 24, 25, 25, 26, 27, 28,
116 29, 30, 31, 32, 33, 34, 35, 36,
117 37, 37, 38, 39, 40, 41, 42, 43,
118 44, 45, 46, 46, 47, 48, 49, 50,
119 51, 52, 53, 54, 55, 56, 57, 58,
120 59, 60, 61, 62, 63, 64, 65, 66,
121 67, 68, 69, 70, 71, 72, 73, 74,
122 75, 76, 76, 77, 78, 79, 80, 81,
123 82, 83, 84, 85, 86, 87, 88, 89,
124 91, 93, 95, 96, 98, 100, 101, 102,
125 104, 106, 108, 110, 112, 114, 116, 118,
126 122, 124, 126, 128, 130, 132, 134, 136,
127 138, 140, 143, 145, 148, 151, 154, 157
130 static const uint16_t kAcTable[128] = {
131 4, 5, 6, 7, 8, 9, 10, 11,
132 12, 13, 14, 15, 16, 17, 18, 19,
133 20, 21, 22, 23, 24, 25, 26, 27,
134 28, 29, 30, 31, 32, 33, 34, 35,
135 36, 37, 38, 39, 40, 41, 42, 43,
136 44, 45, 46, 47, 48, 49, 50, 51,
137 52, 53, 54, 55, 56, 57, 58, 60,
138 62, 64, 66, 68, 70, 72, 74, 76,
139 78, 80, 82, 84, 86, 88, 90, 92,
140 94, 96, 98, 100, 102, 104, 106, 108,
141 110, 112, 114, 116, 119, 122, 125, 128,
142 131, 134, 137, 140, 143, 146, 149, 152,
143 155, 158, 161, 164, 167, 170, 173, 177,
144 181, 185, 189, 193, 197, 201, 205, 209,
145 213, 217, 221, 225, 229, 234, 239, 245,
146 249, 254, 259, 264, 269, 274, 279, 284
149 static const uint16_t kAcTable2[128] = {
150 8, 8, 9, 10, 12, 13, 15, 17,
151 18, 20, 21, 23, 24, 26, 27, 29,
152 31, 32, 34, 35, 37, 38, 40, 41,
153 43, 44, 46, 48, 49, 51, 52, 54,
154 55, 57, 58, 60, 62, 63, 65, 66,
155 68, 69, 71, 72, 74, 75, 77, 79,
156 80, 82, 83, 85, 86, 88, 89, 93,
157 96, 99, 102, 105, 108, 111, 114, 117,
158 120, 124, 127, 130, 133, 136, 139, 142,
159 145, 148, 151, 155, 158, 161, 164, 167,
160 170, 173, 176, 179, 184, 189, 193, 198,
161 203, 207, 212, 217, 221, 226, 230, 235,
162 240, 244, 249, 254, 258, 263, 268, 274,
163 280, 286, 292, 299, 305, 311, 317, 323,
164 330, 336, 342, 348, 354, 362, 370, 379,
165 385, 393, 401, 409, 416, 424, 432, 440
168 static const uint8_t kBiasMatrices[3][2] = { // [luma-ac,luma-dc,chroma][dc,ac]
169 { 96, 110 }, { 96, 108 }, { 110, 115 }
172 // Sharpening by (slightly) raising the hi-frequency coeffs.
173 // Hack-ish but helpful for mid-bitrate range. Use with care.
174 #define SHARPEN_BITS 11 // number of descaling bits for sharpening bias
175 static const uint8_t kFreqSharpening[16] = {
176 0, 30, 60, 90,
177 30, 60, 90, 90,
178 60, 90, 90, 90,
179 90, 90, 90, 90
182 //------------------------------------------------------------------------------
183 // Initialize quantization parameters in VP8Matrix
185 // Returns the average quantizer
186 static int ExpandMatrix(VP8Matrix* const m, int type) {
187 int i, sum;
188 for (i = 0; i < 2; ++i) {
189 const int is_ac_coeff = (i > 0);
190 const int bias = kBiasMatrices[type][is_ac_coeff];
191 m->iq_[i] = (1 << QFIX) / m->q_[i];
192 m->bias_[i] = BIAS(bias);
193 // zthresh_ is the exact value such that QUANTDIV(coeff, iQ, B) is:
194 // * zero if coeff <= zthresh
195 // * non-zero if coeff > zthresh
196 m->zthresh_[i] = ((1 << QFIX) - 1 - m->bias_[i]) / m->iq_[i];
198 for (i = 2; i < 16; ++i) {
199 m->q_[i] = m->q_[1];
200 m->iq_[i] = m->iq_[1];
201 m->bias_[i] = m->bias_[1];
202 m->zthresh_[i] = m->zthresh_[1];
204 for (sum = 0, i = 0; i < 16; ++i) {
205 if (type == 0) { // we only use sharpening for AC luma coeffs
206 m->sharpen_[i] = (kFreqSharpening[i] * m->q_[i]) >> SHARPEN_BITS;
207 } else {
208 m->sharpen_[i] = 0;
210 sum += m->q_[i];
212 return (sum + 8) >> 4;
215 static void SetupMatrices(VP8Encoder* enc) {
216 int i;
217 const int tlambda_scale =
218 (enc->method_ >= 4) ? enc->config_->sns_strength
219 : 0;
220 const int num_segments = enc->segment_hdr_.num_segments_;
221 for (i = 0; i < num_segments; ++i) {
222 VP8SegmentInfo* const m = &enc->dqm_[i];
223 const int q = m->quant_;
224 int q4, q16, quv;
225 m->y1_.q_[0] = kDcTable[clip(q + enc->dq_y1_dc_, 0, 127)];
226 m->y1_.q_[1] = kAcTable[clip(q, 0, 127)];
228 m->y2_.q_[0] = kDcTable[ clip(q + enc->dq_y2_dc_, 0, 127)] * 2;
229 m->y2_.q_[1] = kAcTable2[clip(q + enc->dq_y2_ac_, 0, 127)];
231 m->uv_.q_[0] = kDcTable[clip(q + enc->dq_uv_dc_, 0, 117)];
232 m->uv_.q_[1] = kAcTable[clip(q + enc->dq_uv_ac_, 0, 127)];
234 q4 = ExpandMatrix(&m->y1_, 0);
235 q16 = ExpandMatrix(&m->y2_, 1);
236 quv = ExpandMatrix(&m->uv_, 2);
238 m->lambda_i4_ = (3 * q4 * q4) >> 7;
239 m->lambda_i16_ = (3 * q16 * q16);
240 m->lambda_uv_ = (3 * quv * quv) >> 6;
241 m->lambda_mode_ = (1 * q4 * q4) >> 7;
242 m->lambda_trellis_i4_ = (7 * q4 * q4) >> 3;
243 m->lambda_trellis_i16_ = (q16 * q16) >> 2;
244 m->lambda_trellis_uv_ = (quv *quv) << 1;
245 m->tlambda_ = (tlambda_scale * q4) >> 5;
247 m->min_disto_ = 10 * m->y1_.q_[0]; // quantization-aware min disto
248 m->max_edge_ = 0;
252 //------------------------------------------------------------------------------
253 // Initialize filtering parameters
255 // Very small filter-strength values have close to no visual effect. So we can
256 // save a little decoding-CPU by turning filtering off for these.
257 #define FSTRENGTH_CUTOFF 2
259 static void SetupFilterStrength(VP8Encoder* const enc) {
260 int i;
261 // level0 is in [0..500]. Using '-f 50' as filter_strength is mid-filtering.
262 const int level0 = 5 * enc->config_->filter_strength;
263 for (i = 0; i < NUM_MB_SEGMENTS; ++i) {
264 VP8SegmentInfo* const m = &enc->dqm_[i];
265 // We focus on the quantization of AC coeffs.
266 const int qstep = kAcTable[clip(m->quant_, 0, 127)] >> 2;
267 const int base_strength =
268 VP8FilterStrengthFromDelta(enc->filter_hdr_.sharpness_, qstep);
269 // Segments with lower complexity ('beta') will be less filtered.
270 const int f = base_strength * level0 / (256 + m->beta_);
271 m->fstrength_ = (f < FSTRENGTH_CUTOFF) ? 0 : (f > 63) ? 63 : f;
273 // We record the initial strength (mainly for the case of 1-segment only).
274 enc->filter_hdr_.level_ = enc->dqm_[0].fstrength_;
275 enc->filter_hdr_.simple_ = (enc->config_->filter_type == 0);
276 enc->filter_hdr_.sharpness_ = enc->config_->filter_sharpness;
279 //------------------------------------------------------------------------------
281 // Note: if you change the values below, remember that the max range
282 // allowed by the syntax for DQ_UV is [-16,16].
283 #define MAX_DQ_UV (6)
284 #define MIN_DQ_UV (-4)
286 // We want to emulate jpeg-like behaviour where the expected "good" quality
287 // is around q=75. Internally, our "good" middle is around c=50. So we
288 // map accordingly using linear piece-wise function
289 static double QualityToCompression(double c) {
290 const double linear_c = (c < 0.75) ? c * (2. / 3.) : 2. * c - 1.;
291 // The file size roughly scales as pow(quantizer, 3.). Actually, the
292 // exponent is somewhere between 2.8 and 3.2, but we're mostly interested
293 // in the mid-quant range. So we scale the compressibility inversely to
294 // this power-law: quant ~= compression ^ 1/3. This law holds well for
295 // low quant. Finer modeling for high-quant would make use of kAcTable[]
296 // more explicitly.
297 const double v = pow(linear_c, 1 / 3.);
298 return v;
301 static double QualityToJPEGCompression(double c, double alpha) {
302 // We map the complexity 'alpha' and quality setting 'c' to a compression
303 // exponent empirically matched to the compression curve of libjpeg6b.
304 // On average, the WebP output size will be roughly similar to that of a
305 // JPEG file compressed with same quality factor.
306 const double amin = 0.30;
307 const double amax = 0.85;
308 const double exp_min = 0.4;
309 const double exp_max = 0.9;
310 const double slope = (exp_min - exp_max) / (amax - amin);
311 // Linearly interpolate 'expn' from exp_min to exp_max
312 // in the [amin, amax] range.
313 const double expn = (alpha > amax) ? exp_min
314 : (alpha < amin) ? exp_max
315 : exp_max + slope * (alpha - amin);
316 const double v = pow(c, expn);
317 return v;
320 static int SegmentsAreEquivalent(const VP8SegmentInfo* const S1,
321 const VP8SegmentInfo* const S2) {
322 return (S1->quant_ == S2->quant_) && (S1->fstrength_ == S2->fstrength_);
325 static void SimplifySegments(VP8Encoder* const enc) {
326 int map[NUM_MB_SEGMENTS] = { 0, 1, 2, 3 };
327 const int num_segments = enc->segment_hdr_.num_segments_;
328 int num_final_segments = 1;
329 int s1, s2;
330 for (s1 = 1; s1 < num_segments; ++s1) { // find similar segments
331 const VP8SegmentInfo* const S1 = &enc->dqm_[s1];
332 int found = 0;
333 // check if we already have similar segment
334 for (s2 = 0; s2 < num_final_segments; ++s2) {
335 const VP8SegmentInfo* const S2 = &enc->dqm_[s2];
336 if (SegmentsAreEquivalent(S1, S2)) {
337 found = 1;
338 break;
341 map[s1] = s2;
342 if (!found) {
343 if (num_final_segments != s1) {
344 enc->dqm_[num_final_segments] = enc->dqm_[s1];
346 ++num_final_segments;
349 if (num_final_segments < num_segments) { // Remap
350 int i = enc->mb_w_ * enc->mb_h_;
351 while (i-- > 0) enc->mb_info_[i].segment_ = map[enc->mb_info_[i].segment_];
352 enc->segment_hdr_.num_segments_ = num_final_segments;
353 // Replicate the trailing segment infos (it's mostly cosmetics)
354 for (i = num_final_segments; i < num_segments; ++i) {
355 enc->dqm_[i] = enc->dqm_[num_final_segments - 1];
360 void VP8SetSegmentParams(VP8Encoder* const enc, float quality) {
361 int i;
362 int dq_uv_ac, dq_uv_dc;
363 const int num_segments = enc->segment_hdr_.num_segments_;
364 const double amp = SNS_TO_DQ * enc->config_->sns_strength / 100. / 128.;
365 const double Q = quality / 100.;
366 const double c_base = enc->config_->emulate_jpeg_size ?
367 QualityToJPEGCompression(Q, enc->alpha_ / 255.) :
368 QualityToCompression(Q);
369 for (i = 0; i < num_segments; ++i) {
370 // We modulate the base coefficient to accommodate for the quantization
371 // susceptibility and allow denser segments to be quantized more.
372 const double expn = 1. - amp * enc->dqm_[i].alpha_;
373 const double c = pow(c_base, expn);
374 const int q = (int)(127. * (1. - c));
375 assert(expn > 0.);
376 enc->dqm_[i].quant_ = clip(q, 0, 127);
379 // purely indicative in the bitstream (except for the 1-segment case)
380 enc->base_quant_ = enc->dqm_[0].quant_;
382 // fill-in values for the unused segments (required by the syntax)
383 for (i = num_segments; i < NUM_MB_SEGMENTS; ++i) {
384 enc->dqm_[i].quant_ = enc->base_quant_;
387 // uv_alpha_ is normally spread around ~60. The useful range is
388 // typically ~30 (quite bad) to ~100 (ok to decimate UV more).
389 // We map it to the safe maximal range of MAX/MIN_DQ_UV for dq_uv.
390 dq_uv_ac = (enc->uv_alpha_ - MID_ALPHA) * (MAX_DQ_UV - MIN_DQ_UV)
391 / (MAX_ALPHA - MIN_ALPHA);
392 // we rescale by the user-defined strength of adaptation
393 dq_uv_ac = dq_uv_ac * enc->config_->sns_strength / 100;
394 // and make it safe.
395 dq_uv_ac = clip(dq_uv_ac, MIN_DQ_UV, MAX_DQ_UV);
396 // We also boost the dc-uv-quant a little, based on sns-strength, since
397 // U/V channels are quite more reactive to high quants (flat DC-blocks
398 // tend to appear, and are displeasant).
399 dq_uv_dc = -4 * enc->config_->sns_strength / 100;
400 dq_uv_dc = clip(dq_uv_dc, -15, 15); // 4bit-signed max allowed
402 enc->dq_y1_dc_ = 0; // TODO(skal): dq-lum
403 enc->dq_y2_dc_ = 0;
404 enc->dq_y2_ac_ = 0;
405 enc->dq_uv_dc_ = dq_uv_dc;
406 enc->dq_uv_ac_ = dq_uv_ac;
408 SetupFilterStrength(enc); // initialize segments' filtering, eventually
410 if (num_segments > 1) SimplifySegments(enc);
412 SetupMatrices(enc); // finalize quantization matrices
415 //------------------------------------------------------------------------------
416 // Form the predictions in cache
418 // Must be ordered using {DC_PRED, TM_PRED, V_PRED, H_PRED} as index
419 const int VP8I16ModeOffsets[4] = { I16DC16, I16TM16, I16VE16, I16HE16 };
420 const int VP8UVModeOffsets[4] = { C8DC8, C8TM8, C8VE8, C8HE8 };
422 // Must be indexed using {B_DC_PRED -> B_HU_PRED} as index
423 const int VP8I4ModeOffsets[NUM_BMODES] = {
424 I4DC4, I4TM4, I4VE4, I4HE4, I4RD4, I4VR4, I4LD4, I4VL4, I4HD4, I4HU4
427 void VP8MakeLuma16Preds(const VP8EncIterator* const it) {
428 const uint8_t* const left = it->x_ ? it->y_left_ : NULL;
429 const uint8_t* const top = it->y_ ? it->y_top_ : NULL;
430 VP8EncPredLuma16(it->yuv_p_, left, top);
433 void VP8MakeChroma8Preds(const VP8EncIterator* const it) {
434 const uint8_t* const left = it->x_ ? it->u_left_ : NULL;
435 const uint8_t* const top = it->y_ ? it->uv_top_ : NULL;
436 VP8EncPredChroma8(it->yuv_p_, left, top);
439 void VP8MakeIntra4Preds(const VP8EncIterator* const it) {
440 VP8EncPredLuma4(it->yuv_p_, it->i4_top_);
443 //------------------------------------------------------------------------------
444 // Quantize
446 // Layout:
447 // +----+
448 // |YYYY| 0
449 // |YYYY| 4
450 // |YYYY| 8
451 // |YYYY| 12
452 // +----+
453 // |UUVV| 16
454 // |UUVV| 20
455 // +----+
457 const int VP8Scan[16 + 4 + 4] = {
458 // Luma
459 0 + 0 * BPS, 4 + 0 * BPS, 8 + 0 * BPS, 12 + 0 * BPS,
460 0 + 4 * BPS, 4 + 4 * BPS, 8 + 4 * BPS, 12 + 4 * BPS,
461 0 + 8 * BPS, 4 + 8 * BPS, 8 + 8 * BPS, 12 + 8 * BPS,
462 0 + 12 * BPS, 4 + 12 * BPS, 8 + 12 * BPS, 12 + 12 * BPS,
464 0 + 0 * BPS, 4 + 0 * BPS, 0 + 4 * BPS, 4 + 4 * BPS, // U
465 8 + 0 * BPS, 12 + 0 * BPS, 8 + 4 * BPS, 12 + 4 * BPS // V
468 //------------------------------------------------------------------------------
469 // Distortion measurement
471 static const uint16_t kWeightY[16] = {
472 38, 32, 20, 9, 32, 28, 17, 7, 20, 17, 10, 4, 9, 7, 4, 2
475 static const uint16_t kWeightTrellis[16] = {
476 #if USE_TDISTO == 0
477 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
478 #else
479 30, 27, 19, 11,
480 27, 24, 17, 10,
481 19, 17, 12, 8,
482 11, 10, 8, 6
483 #endif
486 // Init/Copy the common fields in score.
487 static void InitScore(VP8ModeScore* const rd) {
488 rd->D = 0;
489 rd->SD = 0;
490 rd->R = 0;
491 rd->H = 0;
492 rd->nz = 0;
493 rd->score = MAX_COST;
496 static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
497 dst->D = src->D;
498 dst->SD = src->SD;
499 dst->R = src->R;
500 dst->H = src->H;
501 dst->nz = src->nz; // note that nz is not accumulated, but just copied.
502 dst->score = src->score;
505 static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) {
506 dst->D += src->D;
507 dst->SD += src->SD;
508 dst->R += src->R;
509 dst->H += src->H;
510 dst->nz |= src->nz; // here, new nz bits are accumulated.
511 dst->score += src->score;
514 //------------------------------------------------------------------------------
515 // Performs trellis-optimized quantization.
517 // Trellis
519 typedef struct {
520 int prev; // best previous
521 int level; // level
522 int sign; // sign of coeff_i
523 score_t cost; // bit cost
524 score_t error; // distortion = sum of (|coeff_i| - level_i * Q_i)^2
525 int ctx; // context (only depends on 'level'. Could be spared.)
526 } Node;
528 // If a coefficient was quantized to a value Q (using a neutral bias),
529 // we test all alternate possibilities between [Q-MIN_DELTA, Q+MAX_DELTA]
530 // We don't test negative values though.
531 #define MIN_DELTA 0 // how much lower level to try
532 #define MAX_DELTA 1 // how much higher
533 #define NUM_NODES (MIN_DELTA + 1 + MAX_DELTA)
534 #define NODE(n, l) (nodes[(n) + 1][(l) + MIN_DELTA])
536 static WEBP_INLINE void SetRDScore(int lambda, VP8ModeScore* const rd) {
537 // TODO: incorporate the "* 256" in the tables?
538 rd->score = (rd->R + rd->H) * lambda + 256 * (rd->D + rd->SD);
541 static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
542 score_t distortion) {
543 return rate * lambda + 256 * distortion;
546 static int TrellisQuantizeBlock(const VP8EncIterator* const it,
547 int16_t in[16], int16_t out[16],
548 int ctx0, int coeff_type,
549 const VP8Matrix* const mtx,
550 int lambda) {
551 ProbaArray* const last_costs = it->enc_->proba_.coeffs_[coeff_type];
552 CostArray* const costs = it->enc_->proba_.level_cost_[coeff_type];
553 const int first = (coeff_type == 0) ? 1 : 0;
554 Node nodes[17][NUM_NODES];
555 int best_path[3] = {-1, -1, -1}; // store best-last/best-level/best-previous
556 score_t best_score;
557 int best_node;
558 int last = first - 1;
559 int n, m, p, nz;
562 score_t cost;
563 score_t max_error;
564 const int thresh = mtx->q_[1] * mtx->q_[1] / 4;
565 const int last_proba = last_costs[VP8EncBands[first]][ctx0][0];
567 // compute maximal distortion.
568 max_error = 0;
569 for (n = first; n < 16; ++n) {
570 const int j = kZigzag[n];
571 const int err = in[j] * in[j];
572 max_error += kWeightTrellis[j] * err;
573 if (err > thresh) last = n;
575 // we don't need to go inspect up to n = 16 coeffs. We can just go up
576 // to last + 1 (inclusive) without losing much.
577 if (last < 15) ++last;
579 // compute 'skip' score. This is the max score one can do.
580 cost = VP8BitCost(0, last_proba);
581 best_score = RDScoreTrellis(lambda, cost, max_error);
583 // initialize source node.
584 n = first - 1;
585 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
586 NODE(n, m).cost = 0;
587 NODE(n, m).error = max_error;
588 NODE(n, m).ctx = ctx0;
592 // traverse trellis.
593 for (n = first; n <= last; ++n) {
594 const int j = kZigzag[n];
595 const int Q = mtx->q_[j];
596 const int iQ = mtx->iq_[j];
597 const int B = BIAS(0x00); // neutral bias
598 // note: it's important to take sign of the _original_ coeff,
599 // so we don't have to consider level < 0 afterward.
600 const int sign = (in[j] < 0);
601 const int coeff0 = (sign ? -in[j] : in[j]) + mtx->sharpen_[j];
602 int level0 = QUANTDIV(coeff0, iQ, B);
603 if (level0 > MAX_LEVEL) level0 = MAX_LEVEL;
605 // test all alternate level values around level0.
606 for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
607 Node* const cur = &NODE(n, m);
608 int delta_error, new_error;
609 score_t cur_score = MAX_COST;
610 int level = level0 + m;
611 int last_proba;
613 cur->sign = sign;
614 cur->level = level;
615 cur->ctx = (level == 0) ? 0 : (level == 1) ? 1 : 2;
616 if (level > MAX_LEVEL || level < 0) { // node is dead?
617 cur->cost = MAX_COST;
618 continue;
620 last_proba = last_costs[VP8EncBands[n + 1]][cur->ctx][0];
622 // Compute delta_error = how much coding this level will
623 // subtract as distortion to max_error
624 new_error = coeff0 - level * Q;
625 delta_error =
626 kWeightTrellis[j] * (coeff0 * coeff0 - new_error * new_error);
628 // Inspect all possible non-dead predecessors. Retain only the best one.
629 for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
630 const Node* const prev = &NODE(n - 1, p);
631 const int prev_ctx = prev->ctx;
632 const uint16_t* const tcost = costs[VP8EncBands[n]][prev_ctx];
633 const score_t total_error = prev->error - delta_error;
634 score_t cost, base_cost, score;
636 if (prev->cost >= MAX_COST) { // dead node?
637 continue;
640 // Base cost of both terminal/non-terminal
641 base_cost = prev->cost + VP8LevelCost(tcost, level);
643 // Examine node assuming it's a non-terminal one.
644 cost = base_cost;
645 if (level && n < 15) {
646 cost += VP8BitCost(1, last_proba);
648 score = RDScoreTrellis(lambda, cost, total_error);
649 if (score < cur_score) {
650 cur_score = score;
651 cur->cost = cost;
652 cur->error = total_error;
653 cur->prev = p;
656 // Now, record best terminal node (and thus best entry in the graph).
657 if (level) {
658 cost = base_cost;
659 if (n < 15) cost += VP8BitCost(0, last_proba);
660 score = RDScoreTrellis(lambda, cost, total_error);
661 if (score < best_score) {
662 best_score = score;
663 best_path[0] = n; // best eob position
664 best_path[1] = m; // best level
665 best_path[2] = p; // best predecessor
672 // Fresh start
673 memset(in + first, 0, (16 - first) * sizeof(*in));
674 memset(out + first, 0, (16 - first) * sizeof(*out));
675 if (best_path[0] == -1) {
676 return 0; // skip!
679 // Unwind the best path.
680 // Note: best-prev on terminal node is not necessarily equal to the
681 // best_prev for non-terminal. So we patch best_path[2] in.
682 n = best_path[0];
683 best_node = best_path[1];
684 NODE(n, best_node).prev = best_path[2]; // force best-prev for terminal
685 nz = 0;
687 for (; n >= first; --n) {
688 const Node* const node = &NODE(n, best_node);
689 const int j = kZigzag[n];
690 out[n] = node->sign ? -node->level : node->level;
691 nz |= (node->level != 0);
692 in[j] = out[n] * mtx->q_[j];
693 best_node = node->prev;
695 return nz;
698 #undef NODE
700 //------------------------------------------------------------------------------
701 // Performs: difference, transform, quantize, back-transform, add
702 // all at once. Output is the reconstructed block in *yuv_out, and the
703 // quantized levels in *levels.
705 static int ReconstructIntra16(VP8EncIterator* const it,
706 VP8ModeScore* const rd,
707 uint8_t* const yuv_out,
708 int mode) {
709 VP8Encoder* const enc = it->enc_;
710 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
711 const uint8_t* const src = it->yuv_in_ + Y_OFF;
712 VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
713 int nz = 0;
714 int n;
715 int16_t tmp[16][16], dc_tmp[16];
717 for (n = 0; n < 16; ++n) {
718 VP8FTransform(src + VP8Scan[n], ref + VP8Scan[n], tmp[n]);
720 VP8FTransformWHT(tmp[0], dc_tmp);
721 nz |= VP8EncQuantizeBlockWHT(dc_tmp, rd->y_dc_levels, &dqm->y2_) << 24;
723 if (DO_TRELLIS_I16 && it->do_trellis_) {
724 int x, y;
725 VP8IteratorNzToBytes(it);
726 for (y = 0, n = 0; y < 4; ++y) {
727 for (x = 0; x < 4; ++x, ++n) {
728 const int ctx = it->top_nz_[x] + it->left_nz_[y];
729 const int non_zero =
730 TrellisQuantizeBlock(it, tmp[n], rd->y_ac_levels[n], ctx, 0,
731 &dqm->y1_, dqm->lambda_trellis_i16_);
732 it->top_nz_[x] = it->left_nz_[y] = non_zero;
733 nz |= non_zero << n;
736 } else {
737 for (n = 0; n < 16; ++n) {
738 nz |= VP8EncQuantizeBlock(tmp[n], rd->y_ac_levels[n], 1, &dqm->y1_) << n;
742 // Transform back
743 VP8ITransformWHT(dc_tmp, tmp[0]);
744 for (n = 0; n < 16; n += 2) {
745 VP8ITransform(ref + VP8Scan[n], tmp[n], yuv_out + VP8Scan[n], 1);
748 return nz;
751 static int ReconstructIntra4(VP8EncIterator* const it,
752 int16_t levels[16],
753 const uint8_t* const src,
754 uint8_t* const yuv_out,
755 int mode) {
756 const VP8Encoder* const enc = it->enc_;
757 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
758 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
759 int nz = 0;
760 int16_t tmp[16];
762 VP8FTransform(src, ref, tmp);
763 if (DO_TRELLIS_I4 && it->do_trellis_) {
764 const int x = it->i4_ & 3, y = it->i4_ >> 2;
765 const int ctx = it->top_nz_[x] + it->left_nz_[y];
766 nz = TrellisQuantizeBlock(it, tmp, levels, ctx, 3, &dqm->y1_,
767 dqm->lambda_trellis_i4_);
768 } else {
769 nz = VP8EncQuantizeBlock(tmp, levels, 0, &dqm->y1_);
771 VP8ITransform(ref, tmp, yuv_out, 0);
772 return nz;
775 static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
776 uint8_t* const yuv_out, int mode) {
777 const VP8Encoder* const enc = it->enc_;
778 const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode];
779 const uint8_t* const src = it->yuv_in_ + U_OFF;
780 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
781 int nz = 0;
782 int n;
783 int16_t tmp[8][16];
785 for (n = 0; n < 8; ++n) {
786 VP8FTransform(src + VP8Scan[16 + n], ref + VP8Scan[16 + n], tmp[n]);
788 if (DO_TRELLIS_UV && it->do_trellis_) {
789 int ch, x, y;
790 for (ch = 0, n = 0; ch <= 2; ch += 2) {
791 for (y = 0; y < 2; ++y) {
792 for (x = 0; x < 2; ++x, ++n) {
793 const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
794 const int non_zero =
795 TrellisQuantizeBlock(it, tmp[n], rd->uv_levels[n], ctx, 2,
796 &dqm->uv_, dqm->lambda_trellis_uv_);
797 it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
798 nz |= non_zero << n;
802 } else {
803 for (n = 0; n < 8; ++n) {
804 nz |= VP8EncQuantizeBlock(tmp[n], rd->uv_levels[n], 0, &dqm->uv_) << n;
808 for (n = 0; n < 8; n += 2) {
809 VP8ITransform(ref + VP8Scan[16 + n], tmp[n], yuv_out + VP8Scan[16 + n], 1);
811 return (nz << 16);
814 //------------------------------------------------------------------------------
815 // RD-opt decision. Reconstruct each modes, evalue distortion and bit-cost.
816 // Pick the mode is lower RD-cost = Rate + lambda * Distortion.
818 static void StoreMaxDelta(VP8SegmentInfo* const dqm, const int16_t DCs[16]) {
819 // We look at the first three AC coefficients to determine what is the average
820 // delta between each sub-4x4 block.
821 const int v0 = abs(DCs[1]);
822 const int v1 = abs(DCs[4]);
823 const int v2 = abs(DCs[5]);
824 int max_v = (v0 > v1) ? v1 : v0;
825 max_v = (v2 > max_v) ? v2 : max_v;
826 if (max_v > dqm->max_edge_) dqm->max_edge_ = max_v;
829 static void SwapPtr(uint8_t** a, uint8_t** b) {
830 uint8_t* const tmp = *a;
831 *a = *b;
832 *b = tmp;
835 static void SwapOut(VP8EncIterator* const it) {
836 SwapPtr(&it->yuv_out_, &it->yuv_out2_);
839 static score_t IsFlat(const int16_t* levels, int num_blocks, score_t thresh) {
840 score_t score = 0;
841 while (num_blocks-- > 0) { // TODO(skal): refine positional scoring?
842 int i;
843 for (i = 1; i < 16; ++i) { // omit DC, we're only interested in AC
844 score += (levels[i] != 0);
845 if (score > thresh) return 0;
847 levels += 16;
849 return 1;
852 static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* const rd) {
853 const int kNumBlocks = 16;
854 VP8Encoder* const enc = it->enc_;
855 VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
856 const int lambda = dqm->lambda_i16_;
857 const int tlambda = dqm->tlambda_;
858 const uint8_t* const src = it->yuv_in_ + Y_OFF;
859 VP8ModeScore rd16;
860 int mode;
862 rd->mode_i16 = -1;
863 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
864 uint8_t* const tmp_dst = it->yuv_out2_ + Y_OFF; // scratch buffer
865 int nz;
867 // Reconstruct
868 nz = ReconstructIntra16(it, &rd16, tmp_dst, mode);
870 // Measure RD-score
871 rd16.D = VP8SSE16x16(src, tmp_dst);
872 rd16.SD = tlambda ? MULT_8B(tlambda, VP8TDisto16x16(src, tmp_dst, kWeightY))
873 : 0;
874 rd16.H = VP8FixedCostsI16[mode];
875 rd16.R = VP8GetCostLuma16(it, &rd16);
876 if (mode > 0 &&
877 IsFlat(rd16.y_ac_levels[0], kNumBlocks, FLATNESS_LIMIT_I16)) {
878 // penalty to avoid flat area to be mispredicted by complex mode
879 rd16.R += FLATNESS_PENALTY * kNumBlocks;
882 // Since we always examine Intra16 first, we can overwrite *rd directly.
883 SetRDScore(lambda, &rd16);
884 if (mode == 0 || rd16.score < rd->score) {
885 CopyScore(rd, &rd16);
886 rd->mode_i16 = mode;
887 rd->nz = nz;
888 memcpy(rd->y_ac_levels, rd16.y_ac_levels, sizeof(rd16.y_ac_levels));
889 memcpy(rd->y_dc_levels, rd16.y_dc_levels, sizeof(rd16.y_dc_levels));
890 SwapOut(it);
893 SetRDScore(dqm->lambda_mode_, rd); // finalize score for mode decision.
894 VP8SetIntra16Mode(it, rd->mode_i16);
896 // we have a blocky macroblock (only DCs are non-zero) with fairly high
897 // distortion, record max delta so we can later adjust the minimal filtering
898 // strength needed to smooth these blocks out.
899 if ((rd->nz & 0xffff) == 0 && rd->D > dqm->min_disto_) {
900 StoreMaxDelta(dqm, rd->y_dc_levels);
904 //------------------------------------------------------------------------------
906 // return the cost array corresponding to the surrounding prediction modes.
907 static const uint16_t* GetCostModeI4(VP8EncIterator* const it,
908 const uint8_t modes[16]) {
909 const int preds_w = it->enc_->preds_w_;
910 const int x = (it->i4_ & 3), y = it->i4_ >> 2;
911 const int left = (x == 0) ? it->preds_[y * preds_w - 1] : modes[it->i4_ - 1];
912 const int top = (y == 0) ? it->preds_[-preds_w + x] : modes[it->i4_ - 4];
913 return VP8FixedCostsI4[top][left];
916 static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) {
917 const VP8Encoder* const enc = it->enc_;
918 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
919 const int lambda = dqm->lambda_i4_;
920 const int tlambda = dqm->tlambda_;
921 const uint8_t* const src0 = it->yuv_in_ + Y_OFF;
922 uint8_t* const best_blocks = it->yuv_out2_ + Y_OFF;
923 int total_header_bits = 0;
924 VP8ModeScore rd_best;
926 if (enc->max_i4_header_bits_ == 0) {
927 return 0;
930 InitScore(&rd_best);
931 rd_best.H = 211; // '211' is the value of VP8BitCost(0, 145)
932 SetRDScore(dqm->lambda_mode_, &rd_best);
933 VP8IteratorStartI4(it);
934 do {
935 const int kNumBlocks = 1;
936 VP8ModeScore rd_i4;
937 int mode;
938 int best_mode = -1;
939 const uint8_t* const src = src0 + VP8Scan[it->i4_];
940 const uint16_t* const mode_costs = GetCostModeI4(it, rd->modes_i4);
941 uint8_t* best_block = best_blocks + VP8Scan[it->i4_];
942 uint8_t* tmp_dst = it->yuv_p_ + I4TMP; // scratch buffer.
944 InitScore(&rd_i4);
945 VP8MakeIntra4Preds(it);
946 for (mode = 0; mode < NUM_BMODES; ++mode) {
947 VP8ModeScore rd_tmp;
948 int16_t tmp_levels[16];
950 // Reconstruct
951 rd_tmp.nz =
952 ReconstructIntra4(it, tmp_levels, src, tmp_dst, mode) << it->i4_;
954 // Compute RD-score
955 rd_tmp.D = VP8SSE4x4(src, tmp_dst);
956 rd_tmp.SD =
957 tlambda ? MULT_8B(tlambda, VP8TDisto4x4(src, tmp_dst, kWeightY))
958 : 0;
959 rd_tmp.H = mode_costs[mode];
960 rd_tmp.R = VP8GetCostLuma4(it, tmp_levels);
961 if (mode > 0 && IsFlat(tmp_levels, kNumBlocks, FLATNESS_LIMIT_I4)) {
962 rd_tmp.R += FLATNESS_PENALTY * kNumBlocks;
965 SetRDScore(lambda, &rd_tmp);
966 if (best_mode < 0 || rd_tmp.score < rd_i4.score) {
967 CopyScore(&rd_i4, &rd_tmp);
968 best_mode = mode;
969 SwapPtr(&tmp_dst, &best_block);
970 memcpy(rd_best.y_ac_levels[it->i4_], tmp_levels, sizeof(tmp_levels));
973 SetRDScore(dqm->lambda_mode_, &rd_i4);
974 AddScore(&rd_best, &rd_i4);
975 if (rd_best.score >= rd->score) {
976 return 0;
978 total_header_bits += (int)rd_i4.H; // <- equal to mode_costs[best_mode];
979 if (total_header_bits > enc->max_i4_header_bits_) {
980 return 0;
982 // Copy selected samples if not in the right place already.
983 if (best_block != best_blocks + VP8Scan[it->i4_]) {
984 VP8Copy4x4(best_block, best_blocks + VP8Scan[it->i4_]);
986 rd->modes_i4[it->i4_] = best_mode;
987 it->top_nz_[it->i4_ & 3] = it->left_nz_[it->i4_ >> 2] = (rd_i4.nz ? 1 : 0);
988 } while (VP8IteratorRotateI4(it, best_blocks));
990 // finalize state
991 CopyScore(rd, &rd_best);
992 VP8SetIntra4Mode(it, rd->modes_i4);
993 SwapOut(it);
994 memcpy(rd->y_ac_levels, rd_best.y_ac_levels, sizeof(rd->y_ac_levels));
995 return 1; // select intra4x4 over intra16x16
998 //------------------------------------------------------------------------------
1000 static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) {
1001 const int kNumBlocks = 8;
1002 const VP8Encoder* const enc = it->enc_;
1003 const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_];
1004 const int lambda = dqm->lambda_uv_;
1005 const uint8_t* const src = it->yuv_in_ + U_OFF;
1006 uint8_t* const tmp_dst = it->yuv_out2_ + U_OFF; // scratch buffer
1007 uint8_t* const dst0 = it->yuv_out_ + U_OFF;
1008 VP8ModeScore rd_best;
1009 int mode;
1011 rd->mode_uv = -1;
1012 InitScore(&rd_best);
1013 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1014 VP8ModeScore rd_uv;
1016 // Reconstruct
1017 rd_uv.nz = ReconstructUV(it, &rd_uv, tmp_dst, mode);
1019 // Compute RD-score
1020 rd_uv.D = VP8SSE16x8(src, tmp_dst);
1021 rd_uv.SD = 0; // TODO: should we call TDisto? it tends to flatten areas.
1022 rd_uv.H = VP8FixedCostsUV[mode];
1023 rd_uv.R = VP8GetCostUV(it, &rd_uv);
1024 if (mode > 0 && IsFlat(rd_uv.uv_levels[0], kNumBlocks, FLATNESS_LIMIT_UV)) {
1025 rd_uv.R += FLATNESS_PENALTY * kNumBlocks;
1028 SetRDScore(lambda, &rd_uv);
1029 if (mode == 0 || rd_uv.score < rd_best.score) {
1030 CopyScore(&rd_best, &rd_uv);
1031 rd->mode_uv = mode;
1032 memcpy(rd->uv_levels, rd_uv.uv_levels, sizeof(rd->uv_levels));
1033 memcpy(dst0, tmp_dst, UV_SIZE); // TODO: SwapUVOut() ?
1036 VP8SetIntraUVMode(it, rd->mode_uv);
1037 AddScore(rd, &rd_best);
1040 //------------------------------------------------------------------------------
1041 // Final reconstruction and quantization.
1043 static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) {
1044 const VP8Encoder* const enc = it->enc_;
1045 const int is_i16 = (it->mb_->type_ == 1);
1046 int nz = 0;
1048 if (is_i16) {
1049 nz = ReconstructIntra16(it, rd, it->yuv_out_ + Y_OFF, it->preds_[0]);
1050 } else {
1051 VP8IteratorStartI4(it);
1052 do {
1053 const int mode =
1054 it->preds_[(it->i4_ & 3) + (it->i4_ >> 2) * enc->preds_w_];
1055 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
1056 uint8_t* const dst = it->yuv_out_ + Y_OFF + VP8Scan[it->i4_];
1057 VP8MakeIntra4Preds(it);
1058 nz |= ReconstructIntra4(it, rd->y_ac_levels[it->i4_],
1059 src, dst, mode) << it->i4_;
1060 } while (VP8IteratorRotateI4(it, it->yuv_out_ + Y_OFF));
1063 nz |= ReconstructUV(it, rd, it->yuv_out_ + U_OFF, it->mb_->uv_mode_);
1064 rd->nz = nz;
1067 // Refine intra16/intra4 sub-modes based on distortion only (not rate).
1068 static void DistoRefine(VP8EncIterator* const it, int try_both_i4_i16) {
1069 const int is_i16 = (it->mb_->type_ == 1);
1070 score_t best_score = MAX_COST;
1072 if (try_both_i4_i16 || is_i16) {
1073 int mode;
1074 int best_mode = -1;
1075 for (mode = 0; mode < NUM_PRED_MODES; ++mode) {
1076 const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode];
1077 const uint8_t* const src = it->yuv_in_ + Y_OFF;
1078 const score_t score = VP8SSE16x16(src, ref);
1079 if (score < best_score) {
1080 best_mode = mode;
1081 best_score = score;
1084 VP8SetIntra16Mode(it, best_mode);
1086 if (try_both_i4_i16 || !is_i16) {
1087 uint8_t modes_i4[16];
1088 // We don't evaluate the rate here, but just account for it through a
1089 // constant penalty (i4 mode usually needs more bits compared to i16).
1090 score_t score_i4 = (score_t)I4_PENALTY;
1092 VP8IteratorStartI4(it);
1093 do {
1094 int mode;
1095 int best_sub_mode = -1;
1096 score_t best_sub_score = MAX_COST;
1097 const uint8_t* const src = it->yuv_in_ + Y_OFF + VP8Scan[it->i4_];
1099 // TODO(skal): we don't really need the prediction pixels here,
1100 // but just the distortion against 'src'.
1101 VP8MakeIntra4Preds(it);
1102 for (mode = 0; mode < NUM_BMODES; ++mode) {
1103 const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode];
1104 const score_t score = VP8SSE4x4(src, ref);
1105 if (score < best_sub_score) {
1106 best_sub_mode = mode;
1107 best_sub_score = score;
1110 modes_i4[it->i4_] = best_sub_mode;
1111 score_i4 += best_sub_score;
1112 if (score_i4 >= best_score) break;
1113 } while (VP8IteratorRotateI4(it, it->yuv_in_ + Y_OFF));
1114 if (score_i4 < best_score) {
1115 VP8SetIntra4Mode(it, modes_i4);
1120 //------------------------------------------------------------------------------
1121 // Entry point
1123 int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd,
1124 VP8RDLevel rd_opt) {
1125 int is_skipped;
1126 const int method = it->enc_->method_;
1128 InitScore(rd);
1130 // We can perform predictions for Luma16x16 and Chroma8x8 already.
1131 // Luma4x4 predictions needs to be done as-we-go.
1132 VP8MakeLuma16Preds(it);
1133 VP8MakeChroma8Preds(it);
1135 if (rd_opt > RD_OPT_NONE) {
1136 it->do_trellis_ = (rd_opt >= RD_OPT_TRELLIS_ALL);
1137 PickBestIntra16(it, rd);
1138 if (method >= 2) {
1139 PickBestIntra4(it, rd);
1141 PickBestUV(it, rd);
1142 if (rd_opt == RD_OPT_TRELLIS) { // finish off with trellis-optim now
1143 it->do_trellis_ = 1;
1144 SimpleQuantize(it, rd);
1146 } else {
1147 // For method == 2, pick the best intra4/intra16 based on SSE (~tad slower).
1148 // For method <= 1, we refine intra4 or intra16 (but don't re-examine mode).
1149 DistoRefine(it, (method >= 2));
1150 SimpleQuantize(it, rd);
1152 is_skipped = (rd->nz == 0);
1153 VP8SetSkip(it, is_skipped);
1154 return is_skipped;