1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
10 // Speed-critical encoding functions.
12 // Author: Skal (pascal.massimino@gmail.com)
15 #include <stdlib.h> // for abs()
18 #include "../enc/vp8enci.h"
20 static WEBP_INLINE
uint8_t clip_8b(int v
) {
21 return (!(v
& ~0xff)) ? v
: (v
< 0) ? 0 : 255;
24 static WEBP_INLINE
int clip_max(int v
, int max
) {
25 return (v
> max
) ? max
: v
;
28 //------------------------------------------------------------------------------
29 // Compute susceptibility based on DCT-coeff histograms:
30 // the higher, the "easier" the macroblock is to compress.
32 const int VP8DspScan
[16 + 4 + 4] = {
34 0 + 0 * BPS
, 4 + 0 * BPS
, 8 + 0 * BPS
, 12 + 0 * BPS
,
35 0 + 4 * BPS
, 4 + 4 * BPS
, 8 + 4 * BPS
, 12 + 4 * BPS
,
36 0 + 8 * BPS
, 4 + 8 * BPS
, 8 + 8 * BPS
, 12 + 8 * BPS
,
37 0 + 12 * BPS
, 4 + 12 * BPS
, 8 + 12 * BPS
, 12 + 12 * BPS
,
39 0 + 0 * BPS
, 4 + 0 * BPS
, 0 + 4 * BPS
, 4 + 4 * BPS
, // U
40 8 + 0 * BPS
, 12 + 0 * BPS
, 8 + 4 * BPS
, 12 + 4 * BPS
// V
43 static void CollectHistogram(const uint8_t* ref
, const uint8_t* pred
,
44 int start_block
, int end_block
,
45 VP8Histogram
* const histo
) {
47 for (j
= start_block
; j
< end_block
; ++j
) {
51 VP8FTransform(ref
+ VP8DspScan
[j
], pred
+ VP8DspScan
[j
], out
);
53 // Convert coefficients to bin.
54 for (k
= 0; k
< 16; ++k
) {
55 const int v
= abs(out
[k
]) >> 3; // TODO(skal): add rounding?
56 const int clipped_value
= clip_max(v
, MAX_COEFF_THRESH
);
57 histo
->distribution
[clipped_value
]++;
62 //------------------------------------------------------------------------------
63 // run-time tables (~4k)
65 static uint8_t clip1
[255 + 510 + 1]; // clips [-255,510] to [0,255]
67 // We declare this variable 'volatile' to prevent instruction reordering
68 // and make sure it's set to true _last_ (so as to be thread-safe)
69 static volatile int tables_ok
= 0;
71 static void InitTables(void) {
74 for (i
= -255; i
<= 255 + 255; ++i
) {
75 clip1
[255 + i
] = clip_8b(i
);
82 //------------------------------------------------------------------------------
83 // Transforms (Paragraph 14.4)
85 #define STORE(x, y, v) \
86 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
88 static const int kC1
= 20091 + (1 << 16);
89 static const int kC2
= 35468;
90 #define MUL(a, b) (((a) * (b)) >> 16)
92 static WEBP_INLINE
void ITransformOne(const uint8_t* ref
, const int16_t* in
,
97 for (i
= 0; i
< 4; ++i
) { // vertical pass
98 const int a
= in
[0] + in
[8];
99 const int b
= in
[0] - in
[8];
100 const int c
= MUL(in
[4], kC2
) - MUL(in
[12], kC1
);
101 const int d
= MUL(in
[4], kC1
) + MUL(in
[12], kC2
);
111 for (i
= 0; i
< 4; ++i
) { // horizontal pass
112 const int dc
= tmp
[0] + 4;
113 const int a
= dc
+ tmp
[8];
114 const int b
= dc
- tmp
[8];
115 const int c
= MUL(tmp
[4], kC2
) - MUL(tmp
[12], kC1
);
116 const int d
= MUL(tmp
[4], kC1
) + MUL(tmp
[12], kC2
);
125 static void ITransform(const uint8_t* ref
, const int16_t* in
, uint8_t* dst
,
127 ITransformOne(ref
, in
, dst
);
129 ITransformOne(ref
+ 4, in
+ 16, dst
+ 4);
133 static void FTransform(const uint8_t* src
, const uint8_t* ref
, int16_t* out
) {
136 for (i
= 0; i
< 4; ++i
, src
+= BPS
, ref
+= BPS
) {
137 const int d0
= src
[0] - ref
[0]; // 9bit dynamic range ([-255,255])
138 const int d1
= src
[1] - ref
[1];
139 const int d2
= src
[2] - ref
[2];
140 const int d3
= src
[3] - ref
[3];
141 const int a0
= (d0
+ d3
); // 10b [-510,510]
142 const int a1
= (d1
+ d2
);
143 const int a2
= (d1
- d2
);
144 const int a3
= (d0
- d3
);
145 tmp
[0 + i
* 4] = (a0
+ a1
) * 8; // 14b [-8160,8160]
146 tmp
[1 + i
* 4] = (a2
* 2217 + a3
* 5352 + 1812) >> 9; // [-7536,7542]
147 tmp
[2 + i
* 4] = (a0
- a1
) * 8;
148 tmp
[3 + i
* 4] = (a3
* 2217 - a2
* 5352 + 937) >> 9;
150 for (i
= 0; i
< 4; ++i
) {
151 const int a0
= (tmp
[0 + i
] + tmp
[12 + i
]); // 15b
152 const int a1
= (tmp
[4 + i
] + tmp
[ 8 + i
]);
153 const int a2
= (tmp
[4 + i
] - tmp
[ 8 + i
]);
154 const int a3
= (tmp
[0 + i
] - tmp
[12 + i
]);
155 out
[0 + i
] = (a0
+ a1
+ 7) >> 4; // 12b
156 out
[4 + i
] = ((a2
* 2217 + a3
* 5352 + 12000) >> 16) + (a3
!= 0);
157 out
[8 + i
] = (a0
- a1
+ 7) >> 4;
158 out
[12+ i
] = ((a3
* 2217 - a2
* 5352 + 51000) >> 16);
162 static void ITransformWHT(const int16_t* in
, int16_t* out
) {
165 for (i
= 0; i
< 4; ++i
) {
166 const int a0
= in
[0 + i
] + in
[12 + i
];
167 const int a1
= in
[4 + i
] + in
[ 8 + i
];
168 const int a2
= in
[4 + i
] - in
[ 8 + i
];
169 const int a3
= in
[0 + i
] - in
[12 + i
];
170 tmp
[0 + i
] = a0
+ a1
;
171 tmp
[8 + i
] = a0
- a1
;
172 tmp
[4 + i
] = a3
+ a2
;
173 tmp
[12 + i
] = a3
- a2
;
175 for (i
= 0; i
< 4; ++i
) {
176 const int dc
= tmp
[0 + i
* 4] + 3; // w/ rounder
177 const int a0
= dc
+ tmp
[3 + i
* 4];
178 const int a1
= tmp
[1 + i
* 4] + tmp
[2 + i
* 4];
179 const int a2
= tmp
[1 + i
* 4] - tmp
[2 + i
* 4];
180 const int a3
= dc
- tmp
[3 + i
* 4];
181 out
[ 0] = (a0
+ a1
) >> 3;
182 out
[16] = (a3
+ a2
) >> 3;
183 out
[32] = (a0
- a1
) >> 3;
184 out
[48] = (a3
- a2
) >> 3;
189 static void FTransformWHT(const int16_t* in
, int16_t* out
) {
190 // input is 12b signed
193 for (i
= 0; i
< 4; ++i
, in
+= 64) {
194 const int a0
= (in
[0 * 16] + in
[2 * 16]); // 13b
195 const int a1
= (in
[1 * 16] + in
[3 * 16]);
196 const int a2
= (in
[1 * 16] - in
[3 * 16]);
197 const int a3
= (in
[0 * 16] - in
[2 * 16]);
198 tmp
[0 + i
* 4] = a0
+ a1
; // 14b
199 tmp
[1 + i
* 4] = a3
+ a2
;
200 tmp
[2 + i
* 4] = a3
- a2
;
201 tmp
[3 + i
* 4] = a0
- a1
;
203 for (i
= 0; i
< 4; ++i
) {
204 const int a0
= (tmp
[0 + i
] + tmp
[8 + i
]); // 15b
205 const int a1
= (tmp
[4 + i
] + tmp
[12+ i
]);
206 const int a2
= (tmp
[4 + i
] - tmp
[12+ i
]);
207 const int a3
= (tmp
[0 + i
] - tmp
[8 + i
]);
208 const int b0
= a0
+ a1
; // 16b
209 const int b1
= a3
+ a2
;
210 const int b2
= a3
- a2
;
211 const int b3
= a0
- a1
;
212 out
[ 0 + i
] = b0
>> 1; // 15b
213 out
[ 4 + i
] = b1
>> 1;
214 out
[ 8 + i
] = b2
>> 1;
215 out
[12 + i
] = b3
>> 1;
222 //------------------------------------------------------------------------------
225 #define DST(x, y) dst[(x) + (y) * BPS]
227 static WEBP_INLINE
void Fill(uint8_t* dst
, int value
, int size
) {
229 for (j
= 0; j
< size
; ++j
) {
230 memset(dst
+ j
* BPS
, value
, size
);
234 static WEBP_INLINE
void VerticalPred(uint8_t* dst
,
235 const uint8_t* top
, int size
) {
238 for (j
= 0; j
< size
; ++j
) memcpy(dst
+ j
* BPS
, top
, size
);
240 Fill(dst
, 127, size
);
244 static WEBP_INLINE
void HorizontalPred(uint8_t* dst
,
245 const uint8_t* left
, int size
) {
248 for (j
= 0; j
< size
; ++j
) {
249 memset(dst
+ j
* BPS
, left
[j
], size
);
252 Fill(dst
, 129, size
);
256 static WEBP_INLINE
void TrueMotion(uint8_t* dst
, const uint8_t* left
,
257 const uint8_t* top
, int size
) {
261 const uint8_t* const clip
= clip1
+ 255 - left
[-1];
262 for (y
= 0; y
< size
; ++y
) {
263 const uint8_t* const clip_table
= clip
+ left
[y
];
265 for (x
= 0; x
< size
; ++x
) {
266 dst
[x
] = clip_table
[top
[x
]];
271 HorizontalPred(dst
, left
, size
);
274 // true motion without left samples (hence: with default 129 value)
275 // is equivalent to VE prediction where you just copy the top samples.
276 // Note that if top samples are not available, the default value is
277 // then 129, and not 127 as in the VerticalPred case.
279 VerticalPred(dst
, top
, size
);
281 Fill(dst
, 129, size
);
286 static WEBP_INLINE
void DCMode(uint8_t* dst
, const uint8_t* left
,
288 int size
, int round
, int shift
) {
292 for (j
= 0; j
< size
; ++j
) DC
+= top
[j
];
293 if (left
) { // top and left present
294 for (j
= 0; j
< size
; ++j
) DC
+= left
[j
];
295 } else { // top, but no left
298 DC
= (DC
+ round
) >> shift
;
299 } else if (left
) { // left but no top
300 for (j
= 0; j
< size
; ++j
) DC
+= left
[j
];
302 DC
= (DC
+ round
) >> shift
;
303 } else { // no top, no left, nothing.
309 //------------------------------------------------------------------------------
310 // Chroma 8x8 prediction (paragraph 12.2)
312 static void IntraChromaPreds(uint8_t* dst
, const uint8_t* left
,
313 const uint8_t* top
) {
315 DCMode(C8DC8
+ dst
, left
, top
, 8, 8, 4);
316 VerticalPred(C8VE8
+ dst
, top
, 8);
317 HorizontalPred(C8HE8
+ dst
, left
, 8);
318 TrueMotion(C8TM8
+ dst
, left
, top
, 8);
322 if (left
) left
+= 16;
323 DCMode(C8DC8
+ dst
, left
, top
, 8, 8, 4);
324 VerticalPred(C8VE8
+ dst
, top
, 8);
325 HorizontalPred(C8HE8
+ dst
, left
, 8);
326 TrueMotion(C8TM8
+ dst
, left
, top
, 8);
329 //------------------------------------------------------------------------------
330 // luma 16x16 prediction (paragraph 12.3)
332 static void Intra16Preds(uint8_t* dst
,
333 const uint8_t* left
, const uint8_t* top
) {
334 DCMode(I16DC16
+ dst
, left
, top
, 16, 16, 5);
335 VerticalPred(I16VE16
+ dst
, top
, 16);
336 HorizontalPred(I16HE16
+ dst
, left
, 16);
337 TrueMotion(I16TM16
+ dst
, left
, top
, 16);
340 //------------------------------------------------------------------------------
341 // luma 4x4 prediction
343 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
344 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
346 static void VE4(uint8_t* dst
, const uint8_t* top
) { // vertical
347 const uint8_t vals
[4] = {
348 AVG3(top
[-1], top
[0], top
[1]),
349 AVG3(top
[ 0], top
[1], top
[2]),
350 AVG3(top
[ 1], top
[2], top
[3]),
351 AVG3(top
[ 2], top
[3], top
[4])
354 for (i
= 0; i
< 4; ++i
) {
355 memcpy(dst
+ i
* BPS
, vals
, 4);
359 static void HE4(uint8_t* dst
, const uint8_t* top
) { // horizontal
360 const int X
= top
[-1];
361 const int I
= top
[-2];
362 const int J
= top
[-3];
363 const int K
= top
[-4];
364 const int L
= top
[-5];
365 *(uint32_t*)(dst
+ 0 * BPS
) = 0x01010101U
* AVG3(X
, I
, J
);
366 *(uint32_t*)(dst
+ 1 * BPS
) = 0x01010101U
* AVG3(I
, J
, K
);
367 *(uint32_t*)(dst
+ 2 * BPS
) = 0x01010101U
* AVG3(J
, K
, L
);
368 *(uint32_t*)(dst
+ 3 * BPS
) = 0x01010101U
* AVG3(K
, L
, L
);
371 static void DC4(uint8_t* dst
, const uint8_t* top
) {
374 for (i
= 0; i
< 4; ++i
) dc
+= top
[i
] + top
[-5 + i
];
375 Fill(dst
, dc
>> 3, 4);
378 static void RD4(uint8_t* dst
, const uint8_t* top
) {
379 const int X
= top
[-1];
380 const int I
= top
[-2];
381 const int J
= top
[-3];
382 const int K
= top
[-4];
383 const int L
= top
[-5];
384 const int A
= top
[0];
385 const int B
= top
[1];
386 const int C
= top
[2];
387 const int D
= top
[3];
388 DST(0, 3) = AVG3(J
, K
, L
);
389 DST(0, 2) = DST(1, 3) = AVG3(I
, J
, K
);
390 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X
, I
, J
);
391 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A
, X
, I
);
392 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B
, A
, X
);
393 DST(2, 0) = DST(3, 1) = AVG3(C
, B
, A
);
394 DST(3, 0) = AVG3(D
, C
, B
);
397 static void LD4(uint8_t* dst
, const uint8_t* top
) {
398 const int A
= top
[0];
399 const int B
= top
[1];
400 const int C
= top
[2];
401 const int D
= top
[3];
402 const int E
= top
[4];
403 const int F
= top
[5];
404 const int G
= top
[6];
405 const int H
= top
[7];
406 DST(0, 0) = AVG3(A
, B
, C
);
407 DST(1, 0) = DST(0, 1) = AVG3(B
, C
, D
);
408 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C
, D
, E
);
409 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D
, E
, F
);
410 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E
, F
, G
);
411 DST(3, 2) = DST(2, 3) = AVG3(F
, G
, H
);
412 DST(3, 3) = AVG3(G
, H
, H
);
415 static void VR4(uint8_t* dst
, const uint8_t* top
) {
416 const int X
= top
[-1];
417 const int I
= top
[-2];
418 const int J
= top
[-3];
419 const int K
= top
[-4];
420 const int A
= top
[0];
421 const int B
= top
[1];
422 const int C
= top
[2];
423 const int D
= top
[3];
424 DST(0, 0) = DST(1, 2) = AVG2(X
, A
);
425 DST(1, 0) = DST(2, 2) = AVG2(A
, B
);
426 DST(2, 0) = DST(3, 2) = AVG2(B
, C
);
427 DST(3, 0) = AVG2(C
, D
);
429 DST(0, 3) = AVG3(K
, J
, I
);
430 DST(0, 2) = AVG3(J
, I
, X
);
431 DST(0, 1) = DST(1, 3) = AVG3(I
, X
, A
);
432 DST(1, 1) = DST(2, 3) = AVG3(X
, A
, B
);
433 DST(2, 1) = DST(3, 3) = AVG3(A
, B
, C
);
434 DST(3, 1) = AVG3(B
, C
, D
);
437 static void VL4(uint8_t* dst
, const uint8_t* top
) {
438 const int A
= top
[0];
439 const int B
= top
[1];
440 const int C
= top
[2];
441 const int D
= top
[3];
442 const int E
= top
[4];
443 const int F
= top
[5];
444 const int G
= top
[6];
445 const int H
= top
[7];
446 DST(0, 0) = AVG2(A
, B
);
447 DST(1, 0) = DST(0, 2) = AVG2(B
, C
);
448 DST(2, 0) = DST(1, 2) = AVG2(C
, D
);
449 DST(3, 0) = DST(2, 2) = AVG2(D
, E
);
451 DST(0, 1) = AVG3(A
, B
, C
);
452 DST(1, 1) = DST(0, 3) = AVG3(B
, C
, D
);
453 DST(2, 1) = DST(1, 3) = AVG3(C
, D
, E
);
454 DST(3, 1) = DST(2, 3) = AVG3(D
, E
, F
);
455 DST(3, 2) = AVG3(E
, F
, G
);
456 DST(3, 3) = AVG3(F
, G
, H
);
459 static void HU4(uint8_t* dst
, const uint8_t* top
) {
460 const int I
= top
[-2];
461 const int J
= top
[-3];
462 const int K
= top
[-4];
463 const int L
= top
[-5];
464 DST(0, 0) = AVG2(I
, J
);
465 DST(2, 0) = DST(0, 1) = AVG2(J
, K
);
466 DST(2, 1) = DST(0, 2) = AVG2(K
, L
);
467 DST(1, 0) = AVG3(I
, J
, K
);
468 DST(3, 0) = DST(1, 1) = AVG3(J
, K
, L
);
469 DST(3, 1) = DST(1, 2) = AVG3(K
, L
, L
);
470 DST(3, 2) = DST(2, 2) =
471 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L
;
474 static void HD4(uint8_t* dst
, const uint8_t* top
) {
475 const int X
= top
[-1];
476 const int I
= top
[-2];
477 const int J
= top
[-3];
478 const int K
= top
[-4];
479 const int L
= top
[-5];
480 const int A
= top
[0];
481 const int B
= top
[1];
482 const int C
= top
[2];
484 DST(0, 0) = DST(2, 1) = AVG2(I
, X
);
485 DST(0, 1) = DST(2, 2) = AVG2(J
, I
);
486 DST(0, 2) = DST(2, 3) = AVG2(K
, J
);
487 DST(0, 3) = AVG2(L
, K
);
489 DST(3, 0) = AVG3(A
, B
, C
);
490 DST(2, 0) = AVG3(X
, A
, B
);
491 DST(1, 0) = DST(3, 1) = AVG3(I
, X
, A
);
492 DST(1, 1) = DST(3, 2) = AVG3(J
, I
, X
);
493 DST(1, 2) = DST(3, 3) = AVG3(K
, J
, I
);
494 DST(1, 3) = AVG3(L
, K
, J
);
497 static void TM4(uint8_t* dst
, const uint8_t* top
) {
499 const uint8_t* const clip
= clip1
+ 255 - top
[-1];
500 for (y
= 0; y
< 4; ++y
) {
501 const uint8_t* const clip_table
= clip
+ top
[-2 - y
];
502 for (x
= 0; x
< 4; ++x
) {
503 dst
[x
] = clip_table
[top
[x
]];
513 // Left samples are top[-5 .. -2], top_left is top[-1], top are
514 // located at top[0..3], and top right is top[4..7]
515 static void Intra4Preds(uint8_t* dst
, const uint8_t* top
) {
516 DC4(I4DC4
+ dst
, top
);
517 TM4(I4TM4
+ dst
, top
);
518 VE4(I4VE4
+ dst
, top
);
519 HE4(I4HE4
+ dst
, top
);
520 RD4(I4RD4
+ dst
, top
);
521 VR4(I4VR4
+ dst
, top
);
522 LD4(I4LD4
+ dst
, top
);
523 VL4(I4VL4
+ dst
, top
);
524 HD4(I4HD4
+ dst
, top
);
525 HU4(I4HU4
+ dst
, top
);
528 //------------------------------------------------------------------------------
531 static WEBP_INLINE
int GetSSE(const uint8_t* a
, const uint8_t* b
,
535 for (y
= 0; y
< h
; ++y
) {
536 for (x
= 0; x
< w
; ++x
) {
537 const int diff
= (int)a
[x
] - b
[x
];
538 count
+= diff
* diff
;
546 static int SSE16x16(const uint8_t* a
, const uint8_t* b
) {
547 return GetSSE(a
, b
, 16, 16);
549 static int SSE16x8(const uint8_t* a
, const uint8_t* b
) {
550 return GetSSE(a
, b
, 16, 8);
552 static int SSE8x8(const uint8_t* a
, const uint8_t* b
) {
553 return GetSSE(a
, b
, 8, 8);
555 static int SSE4x4(const uint8_t* a
, const uint8_t* b
) {
556 return GetSSE(a
, b
, 4, 4);
559 //------------------------------------------------------------------------------
560 // Texture distortion
562 // We try to match the spectral content (weighted) between source and
563 // reconstructed samples.
565 // Hadamard transform
566 // Returns the weighted sum of the absolute value of transformed coefficients.
567 static int TTransform(const uint8_t* in
, const uint16_t* w
) {
572 for (i
= 0; i
< 4; ++i
, in
+= BPS
) {
573 const int a0
= in
[0] + in
[2];
574 const int a1
= in
[1] + in
[3];
575 const int a2
= in
[1] - in
[3];
576 const int a3
= in
[0] - in
[2];
577 tmp
[0 + i
* 4] = a0
+ a1
;
578 tmp
[1 + i
* 4] = a3
+ a2
;
579 tmp
[2 + i
* 4] = a3
- a2
;
580 tmp
[3 + i
* 4] = a0
- a1
;
583 for (i
= 0; i
< 4; ++i
, ++w
) {
584 const int a0
= tmp
[0 + i
] + tmp
[8 + i
];
585 const int a1
= tmp
[4 + i
] + tmp
[12+ i
];
586 const int a2
= tmp
[4 + i
] - tmp
[12+ i
];
587 const int a3
= tmp
[0 + i
] - tmp
[8 + i
];
588 const int b0
= a0
+ a1
;
589 const int b1
= a3
+ a2
;
590 const int b2
= a3
- a2
;
591 const int b3
= a0
- a1
;
593 sum
+= w
[ 0] * abs(b0
);
594 sum
+= w
[ 4] * abs(b1
);
595 sum
+= w
[ 8] * abs(b2
);
596 sum
+= w
[12] * abs(b3
);
601 static int Disto4x4(const uint8_t* const a
, const uint8_t* const b
,
602 const uint16_t* const w
) {
603 const int sum1
= TTransform(a
, w
);
604 const int sum2
= TTransform(b
, w
);
605 return abs(sum2
- sum1
) >> 5;
608 static int Disto16x16(const uint8_t* const a
, const uint8_t* const b
,
609 const uint16_t* const w
) {
612 for (y
= 0; y
< 16 * BPS
; y
+= 4 * BPS
) {
613 for (x
= 0; x
< 16; x
+= 4) {
614 D
+= Disto4x4(a
+ x
+ y
, b
+ x
+ y
, w
);
620 //------------------------------------------------------------------------------
624 static const uint8_t kZigzag
[16] = {
625 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
628 // Simple quantization
629 static int QuantizeBlock(int16_t in
[16], int16_t out
[16],
630 int n
, const VP8Matrix
* const mtx
) {
632 for (; n
< 16; ++n
) {
633 const int j
= kZigzag
[n
];
634 const int sign
= (in
[j
] < 0);
635 const int coeff
= (sign
? -in
[j
] : in
[j
]) + mtx
->sharpen_
[j
];
636 if (coeff
> mtx
->zthresh_
[j
]) {
637 const int Q
= mtx
->q_
[j
];
638 const int iQ
= mtx
->iq_
[j
];
639 const int B
= mtx
->bias_
[j
];
640 out
[n
] = QUANTDIV(coeff
, iQ
, B
);
641 if (out
[n
] > MAX_LEVEL
) out
[n
] = MAX_LEVEL
;
642 if (sign
) out
[n
] = -out
[n
];
644 if (out
[n
]) last
= n
;
653 static int QuantizeBlockWHT(int16_t in
[16], int16_t out
[16],
654 const VP8Matrix
* const mtx
) {
656 for (n
= 0; n
< 16; ++n
) {
657 const int j
= kZigzag
[n
];
658 const int sign
= (in
[j
] < 0);
659 const int coeff
= sign
? -in
[j
] : in
[j
];
660 assert(mtx
->sharpen_
[j
] == 0);
661 if (coeff
> mtx
->zthresh_
[j
]) {
662 const int Q
= mtx
->q_
[j
];
663 const int iQ
= mtx
->iq_
[j
];
664 const int B
= mtx
->bias_
[j
];
665 out
[n
] = QUANTDIV(coeff
, iQ
, B
);
666 if (out
[n
] > MAX_LEVEL
) out
[n
] = MAX_LEVEL
;
667 if (sign
) out
[n
] = -out
[n
];
669 if (out
[n
]) last
= n
;
678 //------------------------------------------------------------------------------
681 static WEBP_INLINE
void Copy(const uint8_t* src
, uint8_t* dst
, int size
) {
683 for (y
= 0; y
< size
; ++y
) {
684 memcpy(dst
, src
, size
);
690 static void Copy4x4(const uint8_t* src
, uint8_t* dst
) { Copy(src
, dst
, 4); }
692 //------------------------------------------------------------------------------
695 // Speed-critical function pointers. We have to initialize them to the default
696 // implementations within VP8EncDspInit().
697 VP8CHisto VP8CollectHistogram
;
698 VP8Idct VP8ITransform
;
699 VP8Fdct VP8FTransform
;
700 VP8WHT VP8ITransformWHT
;
701 VP8WHT VP8FTransformWHT
;
702 VP8Intra4Preds VP8EncPredLuma4
;
703 VP8IntraPreds VP8EncPredLuma16
;
704 VP8IntraPreds VP8EncPredChroma8
;
705 VP8Metric VP8SSE16x16
;
707 VP8Metric VP8SSE16x8
;
709 VP8WMetric VP8TDisto4x4
;
710 VP8WMetric VP8TDisto16x16
;
711 VP8QuantizeBlock VP8EncQuantizeBlock
;
712 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT
;
713 VP8BlockCopy VP8Copy4x4
;
715 extern void VP8EncDspInitSSE2(void);
716 extern void VP8EncDspInitNEON(void);
718 void VP8EncDspInit(void) {
721 // default C implementations
722 VP8CollectHistogram
= CollectHistogram
;
723 VP8ITransform
= ITransform
;
724 VP8FTransform
= FTransform
;
725 VP8ITransformWHT
= ITransformWHT
;
726 VP8FTransformWHT
= FTransformWHT
;
727 VP8EncPredLuma4
= Intra4Preds
;
728 VP8EncPredLuma16
= Intra16Preds
;
729 VP8EncPredChroma8
= IntraChromaPreds
;
730 VP8SSE16x16
= SSE16x16
;
732 VP8SSE16x8
= SSE16x8
;
734 VP8TDisto4x4
= Disto4x4
;
735 VP8TDisto16x16
= Disto16x16
;
736 VP8EncQuantizeBlock
= QuantizeBlock
;
737 VP8EncQuantizeBlockWHT
= QuantizeBlockWHT
;
738 VP8Copy4x4
= Copy4x4
;
740 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
742 #if defined(WEBP_USE_SSE2)
743 if (VP8GetCPUInfo(kSSE2
)) {
746 #elif defined(WEBP_USE_NEON)
747 if (VP8GetCPUInfo(kNEON
)) {