1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
10 // WebP encoder: internal header.
12 // Author: Skal (pascal.massimino@gmail.com)
14 #ifndef WEBP_ENC_VP8ENCI_H_
15 #define WEBP_ENC_VP8ENCI_H_
17 #include <string.h> // for memcpy()
18 #include "../webp/encode.h"
19 #include "../dsp/dsp.h"
20 #include "../utils/bit_writer.h"
21 #include "../utils/thread.h"
27 //------------------------------------------------------------------------------
28 // Various defines and enums
31 #define ENC_MAJ_VERSION 0
32 #define ENC_MIN_VERSION 4
33 #define ENC_REV_VERSION 0
35 // intra prediction modes
36 enum { B_DC_PRED
= 0, // 4x4 modes
46 NUM_BMODES
= B_HU_PRED
+ 1 - B_DC_PRED
, // = 10
49 DC_PRED
= B_DC_PRED
, V_PRED
= B_VE_PRED
,
50 H_PRED
= B_HE_PRED
, TM_PRED
= B_TM_PRED
,
54 enum { NUM_MB_SEGMENTS
= 4,
55 MAX_NUM_PARTITIONS
= 8,
56 NUM_TYPES
= 4, // 0: i16-AC, 1: i16-DC, 2:chroma-AC, 3:i4-AC
60 MAX_LF_LEVELS
= 64, // Maximum loop filter level
61 MAX_VARIABLE_LEVEL
= 67, // last (inclusive) level with variable cost
62 MAX_LEVEL
= 2047 // max level (note: max codable is 2047 + 67)
65 typedef enum { // Rate-distortion optimization levels
66 RD_OPT_NONE
= 0, // no rd-opt
67 RD_OPT_BASIC
= 1, // basic scoring (no trellis)
68 RD_OPT_TRELLIS
= 2, // perform trellis-quant on the final decision only
69 RD_OPT_TRELLIS_ALL
= 3 // trellis-quant for every scoring (much slower)
72 // YUV-cache parameters. Cache is 16-pixels wide.
73 // The original or reconstructed samples can be accessed using VP8Scan[]
74 // The predicted blocks can be accessed using offsets to yuv_p_ and
75 // the arrays VP8*ModeOffsets[];
76 // +----+ YUV Samples area. See VP8Scan[] for accessing the blocks.
77 // Y_OFF |YYYY| <- original samples ('yuv_in_')
81 // U_OFF |UUVV| V_OFF (=U_OFF + 8)
84 // Y_OFF |YYYY| <- compressed/decoded samples ('yuv_out_')
85 // |YYYY| There are two buffers like this ('yuv_out_'/'yuv_out2_')
91 // +----+ Prediction area ('yuv_p_', size = PRED_SIZE)
92 // I16DC16 |YYYY| Intra16 predictions (16x16 block each)
108 // +----+ Chroma U/V predictions (16x8 block each)
117 // +----+ Intra 4x4 predictions (4x4 block each)
118 // |YYYY| I4DC4 I4TM4 I4VE4 I4HE4
119 // |YYYY| I4RD4 I4VR4 I4LD4 I4VL4
120 // |YY..| I4HD4 I4HU4 I4TMP
122 #define BPS 16 // this is the common stride
123 #define Y_SIZE (BPS * 16)
124 #define UV_SIZE (BPS * 8)
125 #define YUV_SIZE (Y_SIZE + UV_SIZE)
126 #define PRED_SIZE (6 * 16 * BPS + 12 * BPS)
128 #define U_OFF (Y_SIZE)
129 #define V_OFF (U_OFF + 8)
131 #define DO_ALIGN(PTR) ((uintptr_t)((PTR) + ALIGN_CST) & ~ALIGN_CST)
133 extern const int VP8Scan
[16 + 4 + 4]; // in quant.c
134 extern const int VP8UVModeOffsets
[4]; // in analyze.c
135 extern const int VP8I16ModeOffsets
[4];
136 extern const int VP8I4ModeOffsets
[NUM_BMODES
];
138 // Layout of prediction blocks
140 #define I16DC16 (0 * 16 * BPS)
141 #define I16TM16 (1 * 16 * BPS)
142 #define I16VE16 (2 * 16 * BPS)
143 #define I16HE16 (3 * 16 * BPS)
144 // chroma 8x8, two U/V blocks side by side (hence: 16x8 each)
145 #define C8DC8 (4 * 16 * BPS)
146 #define C8TM8 (4 * 16 * BPS + 8 * BPS)
147 #define C8VE8 (5 * 16 * BPS)
148 #define C8HE8 (5 * 16 * BPS + 8 * BPS)
150 #define I4DC4 (6 * 16 * BPS + 0)
151 #define I4TM4 (6 * 16 * BPS + 4)
152 #define I4VE4 (6 * 16 * BPS + 8)
153 #define I4HE4 (6 * 16 * BPS + 12)
154 #define I4RD4 (6 * 16 * BPS + 4 * BPS + 0)
155 #define I4VR4 (6 * 16 * BPS + 4 * BPS + 4)
156 #define I4LD4 (6 * 16 * BPS + 4 * BPS + 8)
157 #define I4VL4 (6 * 16 * BPS + 4 * BPS + 12)
158 #define I4HD4 (6 * 16 * BPS + 8 * BPS + 0)
159 #define I4HU4 (6 * 16 * BPS + 8 * BPS + 4)
160 #define I4TMP (6 * 16 * BPS + 8 * BPS + 8)
162 typedef int64_t score_t
; // type used for scores, rate, distortion
163 #define MAX_COST ((score_t)0x7fffffffffffffLL)
166 #define BIAS(b) ((b) << (QFIX - 8))
167 // Fun fact: this is the _only_ line where we're actually being lossy and
169 static WEBP_INLINE
int QUANTDIV(int n
, int iQ
, int B
) {
170 return (n
* iQ
+ B
) >> QFIX
;
173 // size of histogram used by CollectHistogram.
174 #define MAX_COEFF_THRESH 31
175 typedef struct VP8Histogram VP8Histogram
;
176 struct VP8Histogram
{
177 // TODO(skal): we only need to store the max_value and last_non_zero actually.
178 int distribution
[MAX_COEFF_THRESH
+ 1];
181 // Uncomment the following to remove token-buffer code:
182 // #define DISABLE_TOKEN_BUFFER
184 //------------------------------------------------------------------------------
187 typedef uint32_t proba_t
; // 16b + 16b
188 typedef uint8_t ProbaArray
[NUM_CTX
][NUM_PROBAS
];
189 typedef proba_t StatsArray
[NUM_CTX
][NUM_PROBAS
];
190 typedef uint16_t CostArray
[NUM_CTX
][MAX_VARIABLE_LEVEL
+ 1];
191 typedef double LFStats
[NUM_MB_SEGMENTS
][MAX_LF_LEVELS
]; // filter stats
193 typedef struct VP8Encoder VP8Encoder
;
197 int num_segments_
; // Actual number of segments. 1 segment only = unused.
198 int update_map_
; // whether to update the segment map or not.
199 // must be 0 if there's only 1 segment.
200 int size_
; // bit-cost for transmitting the segment map
203 // Struct collecting all frame-persistent probabilities.
205 uint8_t segments_
[3]; // probabilities for segment tree
206 uint8_t skip_proba_
; // final probability of being skipped.
207 ProbaArray coeffs_
[NUM_TYPES
][NUM_BANDS
]; // 924 bytes
208 StatsArray stats_
[NUM_TYPES
][NUM_BANDS
]; // 4224 bytes
209 CostArray level_cost_
[NUM_TYPES
][NUM_BANDS
]; // 11.4k
210 int dirty_
; // if true, need to call VP8CalculateLevelCosts()
211 int use_skip_proba_
; // Note: we always use skip_proba for now.
212 int nb_skip_
; // number of skipped blocks
215 // Filter parameters. Not actually used in the code (we don't perform
216 // the in-loop filtering), but filled from user's config
218 int simple_
; // filtering type: 0=complex, 1=simple
219 int level_
; // base filter level [0..63]
220 int sharpness_
; // [0..7]
221 int i4x4_lf_delta_
; // delta filter level for i4x4 relative to i16x16
224 //------------------------------------------------------------------------------
225 // Informations about the macroblocks.
229 unsigned int type_
:2; // 0=i4x4, 1=i16x16
230 unsigned int uv_mode_
:2;
231 unsigned int skip_
:1;
232 unsigned int segment_
:2;
233 uint8_t alpha_
; // quantization-susceptibility
236 typedef struct VP8Matrix
{
237 uint16_t q_
[16]; // quantizer steps
238 uint16_t iq_
[16]; // reciprocals, fixed point.
239 uint16_t bias_
[16]; // rounding bias
240 uint16_t zthresh_
[16]; // value under which a coefficient is zeroed
241 uint16_t sharpen_
[16]; // frequency boosters for slight sharpening
245 VP8Matrix y1_
, y2_
, uv_
; // quantization matrices
246 int alpha_
; // quant-susceptibility, range [-127,127]. Zero is neutral.
247 // Lower values indicate a lower risk of blurriness.
248 int beta_
; // filter-susceptibility, range [0,255].
249 int quant_
; // final segment quantizer.
250 int fstrength_
; // final in-loop filtering strength
251 int max_edge_
; // max edge delta (for filtering strength)
252 int min_disto_
; // minimum distortion required to trigger filtering record
254 int lambda_i16_
, lambda_i4_
, lambda_uv_
;
255 int lambda_mode_
, lambda_trellis_
, tlambda_
;
256 int lambda_trellis_i16_
, lambda_trellis_i4_
, lambda_trellis_uv_
;
259 // Handy transient struct to accumulate score and info during RD-optimization
260 // and mode evaluation.
262 score_t D
, SD
; // Distortion, spectral distortion
263 score_t H
, R
, score
; // header bits, rate, score.
264 int16_t y_dc_levels
[16]; // Quantized levels for luma-DC, luma-AC, chroma.
265 int16_t y_ac_levels
[16][16];
266 int16_t uv_levels
[4 + 4][16];
267 int mode_i16
; // mode number for intra16 prediction
268 uint8_t modes_i4
[16]; // mode numbers for intra4 predictions
269 int mode_uv
; // mode number of chroma prediction
270 uint32_t nz
; // non-zero blocks
273 // Iterator structure to iterate through macroblocks, pointing to the
274 // right neighbouring data (samples, predictions, contexts, ...)
276 int x_
, y_
; // current macroblock
277 int y_stride_
, uv_stride_
; // respective strides
278 uint8_t* yuv_in_
; // input samples
279 uint8_t* yuv_out_
; // output samples
280 uint8_t* yuv_out2_
; // secondary buffer swapped with yuv_out_.
281 uint8_t* yuv_p_
; // scratch buffer for prediction
282 VP8Encoder
* enc_
; // back-pointer
283 VP8MBInfo
* mb_
; // current macroblock
284 VP8BitWriter
* bw_
; // current bit-writer
285 uint8_t* preds_
; // intra mode predictors (4x4 blocks)
286 uint32_t* nz_
; // non-zero pattern
287 uint8_t i4_boundary_
[37]; // 32+5 boundary samples needed by intra4x4
288 uint8_t* i4_top_
; // pointer to the current top boundary sample
289 int i4_
; // current intra4x4 mode being tested
290 int top_nz_
[9]; // top-non-zero context.
291 int left_nz_
[9]; // left-non-zero. left_nz[8] is independent.
292 uint64_t bit_count_
[4][3]; // bit counters for coded levels.
293 uint64_t luma_bits_
; // macroblock bit-cost for luma
294 uint64_t uv_bits_
; // macroblock bit-cost for chroma
295 LFStats
* lf_stats_
; // filter stats (borrowed from enc_)
296 int do_trellis_
; // if true, perform extra level optimisation
297 int count_down_
; // number of mb still to be processed
298 int count_down0_
; // starting counter value (for progress)
299 int percent0_
; // saved initial progress percent
301 uint8_t* y_left_
; // left luma samples (addressable from index -1 to 15).
302 uint8_t* u_left_
; // left u samples (addressable from index -1 to 7)
303 uint8_t* v_left_
; // left v samples (addressable from index -1 to 7)
305 uint8_t* y_top_
; // top luma samples at position 'x_'
306 uint8_t* uv_top_
; // top u/v samples at position 'x_', packed as 16 bytes
308 // memory for storing y/u/v_left_ and yuv_in_/out_*
309 uint8_t yuv_left_mem_
[17 + 16 + 16 + 8 + ALIGN_CST
]; // memory for *_left_
310 uint8_t yuv_mem_
[3 * YUV_SIZE
+ PRED_SIZE
+ ALIGN_CST
]; // memory for yuv_*
314 // must be called first
315 void VP8IteratorInit(VP8Encoder
* const enc
, VP8EncIterator
* const it
);
317 void VP8IteratorReset(VP8EncIterator
* const it
);
318 // reset iterator position to row 'y'
319 void VP8IteratorSetRow(VP8EncIterator
* const it
, int y
);
320 // set count down (=number of iterations to go)
321 void VP8IteratorSetCountDown(VP8EncIterator
* const it
, int count_down
);
322 // return true if iteration is finished
323 int VP8IteratorIsDone(const VP8EncIterator
* const it
);
324 // Import uncompressed samples from source.
325 // If tmp_32 is not NULL, import boundary samples too.
326 // tmp_32 is a 32-bytes scratch buffer that must be aligned in memory.
327 void VP8IteratorImport(VP8EncIterator
* const it
, uint8_t* tmp_32
);
328 // export decimated samples
329 void VP8IteratorExport(const VP8EncIterator
* const it
);
330 // go to next macroblock. Returns false if not finished.
331 int VP8IteratorNext(VP8EncIterator
* const it
);
332 // save the yuv_out_ boundary values to top_/left_ arrays for next iterations.
333 void VP8IteratorSaveBoundary(VP8EncIterator
* const it
);
334 // Report progression based on macroblock rows. Return 0 for user-abort request.
335 int VP8IteratorProgress(const VP8EncIterator
* const it
,
336 int final_delta_percent
);
337 // Intra4x4 iterations
338 void VP8IteratorStartI4(VP8EncIterator
* const it
);
339 // returns true if not done.
340 int VP8IteratorRotateI4(VP8EncIterator
* const it
,
341 const uint8_t* const yuv_out
);
343 // Non-zero context setup/teardown
344 void VP8IteratorNzToBytes(VP8EncIterator
* const it
);
345 void VP8IteratorBytesToNz(VP8EncIterator
* const it
);
347 // Helper functions to set mode properties
348 void VP8SetIntra16Mode(const VP8EncIterator
* const it
, int mode
);
349 void VP8SetIntra4Mode(const VP8EncIterator
* const it
, const uint8_t* modes
);
350 void VP8SetIntraUVMode(const VP8EncIterator
* const it
, int mode
);
351 void VP8SetSkip(const VP8EncIterator
* const it
, int skip
);
352 void VP8SetSegment(const VP8EncIterator
* const it
, int segment
);
354 //------------------------------------------------------------------------------
355 // Paginated token buffer
357 typedef struct VP8Tokens VP8Tokens
; // struct details in token.c
360 #if !defined(DISABLE_TOKEN_BUFFER)
361 VP8Tokens
* pages_
; // first page
362 VP8Tokens
** last_page_
; // last page
363 uint16_t* tokens_
; // set to (*last_page_)->tokens_
364 int left_
; // how many free tokens left before the page is full.
366 int error_
; // true in case of malloc error
369 void VP8TBufferInit(VP8TBuffer
* const b
); // initialize an empty buffer
370 void VP8TBufferClear(VP8TBuffer
* const b
); // de-allocate pages memory
372 #if !defined(DISABLE_TOKEN_BUFFER)
374 // Finalizes bitstream when probabilities are known.
375 // Deletes the allocated token memory if final_pass is true.
376 int VP8EmitTokens(VP8TBuffer
* const b
, VP8BitWriter
* const bw
,
377 const uint8_t* const probas
, int final_pass
);
379 // record the coding of coefficients without knowing the probabilities yet
380 int VP8RecordCoeffTokens(int ctx
, int coeff_type
, int first
, int last
,
381 const int16_t* const coeffs
,
382 VP8TBuffer
* const tokens
);
384 // Estimate the final coded size given a set of 'probas'.
385 size_t VP8EstimateTokenSize(VP8TBuffer
* const b
, const uint8_t* const probas
);
388 void VP8TokenToStats(const VP8TBuffer
* const b
, proba_t
* const stats
);
390 #endif // !DISABLE_TOKEN_BUFFER
392 //------------------------------------------------------------------------------
396 const WebPConfig
* config_
; // user configuration and parameters
397 WebPPicture
* pic_
; // input / output picture
400 VP8FilterHeader filter_hdr_
; // filtering information
401 VP8SegmentHeader segment_hdr_
; // segment information
403 int profile_
; // VP8's profile, deduced from Config.
405 // dimension, in macroblock units.
407 int preds_w_
; // stride of the *preds_ prediction plane (=4*mb_w + 1)
409 // number of partitions (1, 2, 4 or 8 = MAX_NUM_PARTITIONS)
412 // per-partition boolean decoders.
413 VP8BitWriter bw_
; // part0
414 VP8BitWriter parts_
[MAX_NUM_PARTITIONS
]; // token partitions
415 VP8TBuffer tokens_
; // token buffer
417 int percent_
; // for progress
421 uint8_t* alpha_data_
; // non-NULL if transparency is present
422 uint32_t alpha_data_size_
;
423 WebPWorker alpha_worker_
;
427 VP8BitWriter layer_bw_
;
428 uint8_t* layer_data_
;
429 size_t layer_data_size_
;
431 // quantization info (one set of DC/AC dequant factor per segment)
432 VP8SegmentInfo dqm_
[NUM_MB_SEGMENTS
];
433 int base_quant_
; // nominal quantizer value. Only used
434 // for relative coding of segments' quant.
435 int alpha_
; // global susceptibility (<=> complexity)
436 int uv_alpha_
; // U/V quantization susceptibility
437 // global offset of quantizers, shared by all segments
439 int dq_y2_dc_
, dq_y2_ac_
;
440 int dq_uv_dc_
, dq_uv_ac_
;
442 // probabilities and statistics
444 uint64_t sse_
[4]; // sum of Y/U/V/A squared errors for all macroblocks
445 uint64_t sse_count_
; // pixel count for the sse_[] stats
447 int residual_bytes_
[3][4];
450 // quality/speed settings
451 int method_
; // 0=fastest, 6=best/slowest.
452 VP8RDLevel rd_opt_level_
; // Deduced from method_.
453 int max_i4_header_bits_
; // partition #0 safeness factor
454 int thread_level_
; // derived from config->thread_level
455 int do_search_
; // derived from config->target_XXX
456 int use_tokens_
; // if true, use token buffer
459 VP8MBInfo
* mb_info_
; // contextual macroblock infos (mb_w_ + 1)
460 uint8_t* preds_
; // predictions modes: (4*mb_w+1) * (4*mb_h+1)
461 uint32_t* nz_
; // non-zero bit context: mb_w+1
462 uint8_t *y_top_
; // top luma samples.
463 uint8_t *uv_top_
; // top u/v samples.
464 // U and V are packed into 16 bytes (8 U + 8 V)
465 LFStats
*lf_stats_
; // autofilter stats (if NULL, autofilter is off)
468 //------------------------------------------------------------------------------
469 // internal functions. Not public.
472 extern const uint8_t VP8CoeffsProba0
[NUM_TYPES
][NUM_BANDS
][NUM_CTX
][NUM_PROBAS
];
474 VP8CoeffsUpdateProba
[NUM_TYPES
][NUM_BANDS
][NUM_CTX
][NUM_PROBAS
];
475 // Reset the token probabilities to their initial (default) values
476 void VP8DefaultProbas(VP8Encoder
* const enc
);
477 // Write the token probabilities
478 void VP8WriteProbas(VP8BitWriter
* const bw
, const VP8Proba
* const probas
);
479 // Writes the partition #0 modes (that is: all intra modes)
480 void VP8CodeIntraModes(VP8Encoder
* const enc
);
483 // Generates the final bitstream by coding the partition0 and headers,
484 // and appending an assembly of all the pre-coded token partitions.
485 // Return true if everything is ok.
486 int VP8EncWrite(VP8Encoder
* const enc
);
487 // Release memory allocated for bit-writing in VP8EncLoop & seq.
488 void VP8EncFreeBitWriters(VP8Encoder
* const enc
);
491 extern const uint8_t VP8EncBands
[16 + 1];
492 extern const uint8_t VP8Cat3
[];
493 extern const uint8_t VP8Cat4
[];
494 extern const uint8_t VP8Cat5
[];
495 extern const uint8_t VP8Cat6
[];
497 // Form all the four Intra16x16 predictions in the yuv_p_ cache
498 void VP8MakeLuma16Preds(const VP8EncIterator
* const it
);
499 // Form all the four Chroma8x8 predictions in the yuv_p_ cache
500 void VP8MakeChroma8Preds(const VP8EncIterator
* const it
);
501 // Form all the ten Intra4x4 predictions in the yuv_p_ cache
502 // for the 4x4 block it->i4_
503 void VP8MakeIntra4Preds(const VP8EncIterator
* const it
);
505 int VP8GetCostLuma16(VP8EncIterator
* const it
, const VP8ModeScore
* const rd
);
506 int VP8GetCostLuma4(VP8EncIterator
* const it
, const int16_t levels
[16]);
507 int VP8GetCostUV(VP8EncIterator
* const it
, const VP8ModeScore
* const rd
);
509 int VP8EncLoop(VP8Encoder
* const enc
);
510 int VP8EncTokenLoop(VP8Encoder
* const enc
);
513 // Assign an error code to a picture. Return false for convenience.
514 int WebPEncodingSetError(const WebPPicture
* const pic
, WebPEncodingError error
);
515 int WebPReportProgress(const WebPPicture
* const pic
,
516 int percent
, int* const percent_store
);
519 // Main analysis loop. Decides the segmentations and complexity.
520 // Assigns a first guess for Intra16 and uvmode_ prediction modes.
521 int VP8EncAnalyze(VP8Encoder
* const enc
);
524 // Sets up segment's quantization values, base_quant_ and filter strengths.
525 void VP8SetSegmentParams(VP8Encoder
* const enc
, float quality
);
526 // Pick best modes and fills the levels. Returns true if skipped.
527 int VP8Decimate(VP8EncIterator
* const it
, VP8ModeScore
* const rd
,
531 void VP8EncInitAlpha(VP8Encoder
* const enc
); // initialize alpha compression
532 int VP8EncStartAlpha(VP8Encoder
* const enc
); // start alpha coding process
533 int VP8EncFinishAlpha(VP8Encoder
* const enc
); // finalize compressed data
534 int VP8EncDeleteAlpha(VP8Encoder
* const enc
); // delete compressed data
537 void VP8EncInitLayer(VP8Encoder
* const enc
); // init everything
538 void VP8EncCodeLayerBlock(VP8EncIterator
* it
); // code one more macroblock
539 int VP8EncFinishLayer(VP8Encoder
* const enc
); // finalize coding
540 void VP8EncDeleteLayer(VP8Encoder
* enc
); // reclaim memory
546 double w
, xm
, ym
, xxm
, xym
, yym
;
548 void VP8SSIMAddStats(const DistoStats
* const src
, DistoStats
* const dst
);
549 void VP8SSIMAccumulatePlane(const uint8_t* src1
, int stride1
,
550 const uint8_t* src2
, int stride2
,
551 int W
, int H
, DistoStats
* const stats
);
552 double VP8SSIMGet(const DistoStats
* const stats
);
553 double VP8SSIMGetSquaredError(const DistoStats
* const stats
);
556 void VP8InitFilter(VP8EncIterator
* const it
);
557 void VP8StoreFilterStats(VP8EncIterator
* const it
);
558 void VP8AdjustFilterStrength(VP8EncIterator
* const it
);
560 // returns the approximate filtering strength needed to smooth a edge
561 // step of 'delta', given a sharpness parameter 'sharpness'.
562 int VP8FilterStrengthFromDelta(int sharpness
, int delta
);
564 //------------------------------------------------------------------------------
570 #endif /* WEBP_ENC_VP8ENCI_H_ */