1 // Copyright 2011 Google Inc. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
10 // Speed-critical encoding functions.
12 // Author: Skal (pascal.massimino@gmail.com)
15 #include <stdlib.h> // for abs()
18 #include "../enc/vp8enci.h"
20 static WEBP_INLINE
uint8_t clip_8b(int v
) {
21 return (!(v
& ~0xff)) ? v
: (v
< 0) ? 0 : 255;
24 static WEBP_INLINE
int clip_max(int v
, int max
) {
25 return (v
> max
) ? max
: v
;
28 //------------------------------------------------------------------------------
29 // Compute susceptibility based on DCT-coeff histograms:
30 // the higher, the "easier" the macroblock is to compress.
32 const int VP8DspScan
[16 + 4 + 4] = {
34 0 + 0 * BPS
, 4 + 0 * BPS
, 8 + 0 * BPS
, 12 + 0 * BPS
,
35 0 + 4 * BPS
, 4 + 4 * BPS
, 8 + 4 * BPS
, 12 + 4 * BPS
,
36 0 + 8 * BPS
, 4 + 8 * BPS
, 8 + 8 * BPS
, 12 + 8 * BPS
,
37 0 + 12 * BPS
, 4 + 12 * BPS
, 8 + 12 * BPS
, 12 + 12 * BPS
,
39 0 + 0 * BPS
, 4 + 0 * BPS
, 0 + 4 * BPS
, 4 + 4 * BPS
, // U
40 8 + 0 * BPS
, 12 + 0 * BPS
, 8 + 4 * BPS
, 12 + 4 * BPS
// V
43 static void CollectHistogram(const uint8_t* ref
, const uint8_t* pred
,
44 int start_block
, int end_block
,
45 VP8Histogram
* const histo
) {
47 for (j
= start_block
; j
< end_block
; ++j
) {
51 VP8FTransform(ref
+ VP8DspScan
[j
], pred
+ VP8DspScan
[j
], out
);
53 // Convert coefficients to bin.
54 for (k
= 0; k
< 16; ++k
) {
55 const int v
= abs(out
[k
]) >> 3; // TODO(skal): add rounding?
56 const int clipped_value
= clip_max(v
, MAX_COEFF_THRESH
);
57 histo
->distribution
[clipped_value
]++;
62 //------------------------------------------------------------------------------
63 // run-time tables (~4k)
65 static uint8_t clip1
[255 + 510 + 1]; // clips [-255,510] to [0,255]
67 // We declare this variable 'volatile' to prevent instruction reordering
68 // and make sure it's set to true _last_ (so as to be thread-safe)
69 static volatile int tables_ok
= 0;
71 static void InitTables(void) {
74 for (i
= -255; i
<= 255 + 255; ++i
) {
75 clip1
[255 + i
] = clip_8b(i
);
82 //------------------------------------------------------------------------------
83 // Transforms (Paragraph 14.4)
85 #define STORE(x, y, v) \
86 dst[(x) + (y) * BPS] = clip_8b(ref[(x) + (y) * BPS] + ((v) >> 3))
88 static const int kC1
= 20091 + (1 << 16);
89 static const int kC2
= 35468;
90 #define MUL(a, b) (((a) * (b)) >> 16)
92 static WEBP_INLINE
void ITransformOne(const uint8_t* ref
, const int16_t* in
,
97 for (i
= 0; i
< 4; ++i
) { // vertical pass
98 const int a
= in
[0] + in
[8];
99 const int b
= in
[0] - in
[8];
100 const int c
= MUL(in
[4], kC2
) - MUL(in
[12], kC1
);
101 const int d
= MUL(in
[4], kC1
) + MUL(in
[12], kC2
);
111 for (i
= 0; i
< 4; ++i
) { // horizontal pass
112 const int dc
= tmp
[0] + 4;
113 const int a
= dc
+ tmp
[8];
114 const int b
= dc
- tmp
[8];
115 const int c
= MUL(tmp
[4], kC2
) - MUL(tmp
[12], kC1
);
116 const int d
= MUL(tmp
[4], kC1
) + MUL(tmp
[12], kC2
);
125 static void ITransform(const uint8_t* ref
, const int16_t* in
, uint8_t* dst
,
127 ITransformOne(ref
, in
, dst
);
129 ITransformOne(ref
+ 4, in
+ 16, dst
+ 4);
133 static void FTransform(const uint8_t* src
, const uint8_t* ref
, int16_t* out
) {
136 for (i
= 0; i
< 4; ++i
, src
+= BPS
, ref
+= BPS
) {
137 const int d0
= src
[0] - ref
[0]; // 9bit dynamic range ([-255,255])
138 const int d1
= src
[1] - ref
[1];
139 const int d2
= src
[2] - ref
[2];
140 const int d3
= src
[3] - ref
[3];
141 const int a0
= (d0
+ d3
); // 10b [-510,510]
142 const int a1
= (d1
+ d2
);
143 const int a2
= (d1
- d2
);
144 const int a3
= (d0
- d3
);
145 tmp
[0 + i
* 4] = (a0
+ a1
) * 8; // 14b [-8160,8160]
146 tmp
[1 + i
* 4] = (a2
* 2217 + a3
* 5352 + 1812) >> 9; // [-7536,7542]
147 tmp
[2 + i
* 4] = (a0
- a1
) * 8;
148 tmp
[3 + i
* 4] = (a3
* 2217 - a2
* 5352 + 937) >> 9;
150 for (i
= 0; i
< 4; ++i
) {
151 const int a0
= (tmp
[0 + i
] + tmp
[12 + i
]); // 15b
152 const int a1
= (tmp
[4 + i
] + tmp
[ 8 + i
]);
153 const int a2
= (tmp
[4 + i
] - tmp
[ 8 + i
]);
154 const int a3
= (tmp
[0 + i
] - tmp
[12 + i
]);
155 out
[0 + i
] = (a0
+ a1
+ 7) >> 4; // 12b
156 out
[4 + i
] = ((a2
* 2217 + a3
* 5352 + 12000) >> 16) + (a3
!= 0);
157 out
[8 + i
] = (a0
- a1
+ 7) >> 4;
158 out
[12+ i
] = ((a3
* 2217 - a2
* 5352 + 51000) >> 16);
162 static void FTransformWHT(const int16_t* in
, int16_t* out
) {
163 // input is 12b signed
166 for (i
= 0; i
< 4; ++i
, in
+= 64) {
167 const int a0
= (in
[0 * 16] + in
[2 * 16]); // 13b
168 const int a1
= (in
[1 * 16] + in
[3 * 16]);
169 const int a2
= (in
[1 * 16] - in
[3 * 16]);
170 const int a3
= (in
[0 * 16] - in
[2 * 16]);
171 tmp
[0 + i
* 4] = a0
+ a1
; // 14b
172 tmp
[1 + i
* 4] = a3
+ a2
;
173 tmp
[2 + i
* 4] = a3
- a2
;
174 tmp
[3 + i
* 4] = a0
- a1
;
176 for (i
= 0; i
< 4; ++i
) {
177 const int a0
= (tmp
[0 + i
] + tmp
[8 + i
]); // 15b
178 const int a1
= (tmp
[4 + i
] + tmp
[12+ i
]);
179 const int a2
= (tmp
[4 + i
] - tmp
[12+ i
]);
180 const int a3
= (tmp
[0 + i
] - tmp
[8 + i
]);
181 const int b0
= a0
+ a1
; // 16b
182 const int b1
= a3
+ a2
;
183 const int b2
= a3
- a2
;
184 const int b3
= a0
- a1
;
185 out
[ 0 + i
] = b0
>> 1; // 15b
186 out
[ 4 + i
] = b1
>> 1;
187 out
[ 8 + i
] = b2
>> 1;
188 out
[12 + i
] = b3
>> 1;
195 //------------------------------------------------------------------------------
198 #define DST(x, y) dst[(x) + (y) * BPS]
200 static WEBP_INLINE
void Fill(uint8_t* dst
, int value
, int size
) {
202 for (j
= 0; j
< size
; ++j
) {
203 memset(dst
+ j
* BPS
, value
, size
);
207 static WEBP_INLINE
void VerticalPred(uint8_t* dst
,
208 const uint8_t* top
, int size
) {
211 for (j
= 0; j
< size
; ++j
) memcpy(dst
+ j
* BPS
, top
, size
);
213 Fill(dst
, 127, size
);
217 static WEBP_INLINE
void HorizontalPred(uint8_t* dst
,
218 const uint8_t* left
, int size
) {
221 for (j
= 0; j
< size
; ++j
) {
222 memset(dst
+ j
* BPS
, left
[j
], size
);
225 Fill(dst
, 129, size
);
229 static WEBP_INLINE
void TrueMotion(uint8_t* dst
, const uint8_t* left
,
230 const uint8_t* top
, int size
) {
234 const uint8_t* const clip
= clip1
+ 255 - left
[-1];
235 for (y
= 0; y
< size
; ++y
) {
236 const uint8_t* const clip_table
= clip
+ left
[y
];
238 for (x
= 0; x
< size
; ++x
) {
239 dst
[x
] = clip_table
[top
[x
]];
244 HorizontalPred(dst
, left
, size
);
247 // true motion without left samples (hence: with default 129 value)
248 // is equivalent to VE prediction where you just copy the top samples.
249 // Note that if top samples are not available, the default value is
250 // then 129, and not 127 as in the VerticalPred case.
252 VerticalPred(dst
, top
, size
);
254 Fill(dst
, 129, size
);
259 static WEBP_INLINE
void DCMode(uint8_t* dst
, const uint8_t* left
,
261 int size
, int round
, int shift
) {
265 for (j
= 0; j
< size
; ++j
) DC
+= top
[j
];
266 if (left
) { // top and left present
267 for (j
= 0; j
< size
; ++j
) DC
+= left
[j
];
268 } else { // top, but no left
271 DC
= (DC
+ round
) >> shift
;
272 } else if (left
) { // left but no top
273 for (j
= 0; j
< size
; ++j
) DC
+= left
[j
];
275 DC
= (DC
+ round
) >> shift
;
276 } else { // no top, no left, nothing.
282 //------------------------------------------------------------------------------
283 // Chroma 8x8 prediction (paragraph 12.2)
285 static void IntraChromaPreds(uint8_t* dst
, const uint8_t* left
,
286 const uint8_t* top
) {
288 DCMode(C8DC8
+ dst
, left
, top
, 8, 8, 4);
289 VerticalPred(C8VE8
+ dst
, top
, 8);
290 HorizontalPred(C8HE8
+ dst
, left
, 8);
291 TrueMotion(C8TM8
+ dst
, left
, top
, 8);
295 if (left
) left
+= 16;
296 DCMode(C8DC8
+ dst
, left
, top
, 8, 8, 4);
297 VerticalPred(C8VE8
+ dst
, top
, 8);
298 HorizontalPred(C8HE8
+ dst
, left
, 8);
299 TrueMotion(C8TM8
+ dst
, left
, top
, 8);
302 //------------------------------------------------------------------------------
303 // luma 16x16 prediction (paragraph 12.3)
305 static void Intra16Preds(uint8_t* dst
,
306 const uint8_t* left
, const uint8_t* top
) {
307 DCMode(I16DC16
+ dst
, left
, top
, 16, 16, 5);
308 VerticalPred(I16VE16
+ dst
, top
, 16);
309 HorizontalPred(I16HE16
+ dst
, left
, 16);
310 TrueMotion(I16TM16
+ dst
, left
, top
, 16);
313 //------------------------------------------------------------------------------
314 // luma 4x4 prediction
316 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
317 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
319 static void VE4(uint8_t* dst
, const uint8_t* top
) { // vertical
320 const uint8_t vals
[4] = {
321 AVG3(top
[-1], top
[0], top
[1]),
322 AVG3(top
[ 0], top
[1], top
[2]),
323 AVG3(top
[ 1], top
[2], top
[3]),
324 AVG3(top
[ 2], top
[3], top
[4])
327 for (i
= 0; i
< 4; ++i
) {
328 memcpy(dst
+ i
* BPS
, vals
, 4);
332 static void HE4(uint8_t* dst
, const uint8_t* top
) { // horizontal
333 const int X
= top
[-1];
334 const int I
= top
[-2];
335 const int J
= top
[-3];
336 const int K
= top
[-4];
337 const int L
= top
[-5];
338 *(uint32_t*)(dst
+ 0 * BPS
) = 0x01010101U
* AVG3(X
, I
, J
);
339 *(uint32_t*)(dst
+ 1 * BPS
) = 0x01010101U
* AVG3(I
, J
, K
);
340 *(uint32_t*)(dst
+ 2 * BPS
) = 0x01010101U
* AVG3(J
, K
, L
);
341 *(uint32_t*)(dst
+ 3 * BPS
) = 0x01010101U
* AVG3(K
, L
, L
);
344 static void DC4(uint8_t* dst
, const uint8_t* top
) {
347 for (i
= 0; i
< 4; ++i
) dc
+= top
[i
] + top
[-5 + i
];
348 Fill(dst
, dc
>> 3, 4);
351 static void RD4(uint8_t* dst
, const uint8_t* top
) {
352 const int X
= top
[-1];
353 const int I
= top
[-2];
354 const int J
= top
[-3];
355 const int K
= top
[-4];
356 const int L
= top
[-5];
357 const int A
= top
[0];
358 const int B
= top
[1];
359 const int C
= top
[2];
360 const int D
= top
[3];
361 DST(0, 3) = AVG3(J
, K
, L
);
362 DST(0, 2) = DST(1, 3) = AVG3(I
, J
, K
);
363 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X
, I
, J
);
364 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A
, X
, I
);
365 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B
, A
, X
);
366 DST(2, 0) = DST(3, 1) = AVG3(C
, B
, A
);
367 DST(3, 0) = AVG3(D
, C
, B
);
370 static void LD4(uint8_t* dst
, const uint8_t* top
) {
371 const int A
= top
[0];
372 const int B
= top
[1];
373 const int C
= top
[2];
374 const int D
= top
[3];
375 const int E
= top
[4];
376 const int F
= top
[5];
377 const int G
= top
[6];
378 const int H
= top
[7];
379 DST(0, 0) = AVG3(A
, B
, C
);
380 DST(1, 0) = DST(0, 1) = AVG3(B
, C
, D
);
381 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C
, D
, E
);
382 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D
, E
, F
);
383 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E
, F
, G
);
384 DST(3, 2) = DST(2, 3) = AVG3(F
, G
, H
);
385 DST(3, 3) = AVG3(G
, H
, H
);
388 static void VR4(uint8_t* dst
, const uint8_t* top
) {
389 const int X
= top
[-1];
390 const int I
= top
[-2];
391 const int J
= top
[-3];
392 const int K
= top
[-4];
393 const int A
= top
[0];
394 const int B
= top
[1];
395 const int C
= top
[2];
396 const int D
= top
[3];
397 DST(0, 0) = DST(1, 2) = AVG2(X
, A
);
398 DST(1, 0) = DST(2, 2) = AVG2(A
, B
);
399 DST(2, 0) = DST(3, 2) = AVG2(B
, C
);
400 DST(3, 0) = AVG2(C
, D
);
402 DST(0, 3) = AVG3(K
, J
, I
);
403 DST(0, 2) = AVG3(J
, I
, X
);
404 DST(0, 1) = DST(1, 3) = AVG3(I
, X
, A
);
405 DST(1, 1) = DST(2, 3) = AVG3(X
, A
, B
);
406 DST(2, 1) = DST(3, 3) = AVG3(A
, B
, C
);
407 DST(3, 1) = AVG3(B
, C
, D
);
410 static void VL4(uint8_t* dst
, const uint8_t* top
) {
411 const int A
= top
[0];
412 const int B
= top
[1];
413 const int C
= top
[2];
414 const int D
= top
[3];
415 const int E
= top
[4];
416 const int F
= top
[5];
417 const int G
= top
[6];
418 const int H
= top
[7];
419 DST(0, 0) = AVG2(A
, B
);
420 DST(1, 0) = DST(0, 2) = AVG2(B
, C
);
421 DST(2, 0) = DST(1, 2) = AVG2(C
, D
);
422 DST(3, 0) = DST(2, 2) = AVG2(D
, E
);
424 DST(0, 1) = AVG3(A
, B
, C
);
425 DST(1, 1) = DST(0, 3) = AVG3(B
, C
, D
);
426 DST(2, 1) = DST(1, 3) = AVG3(C
, D
, E
);
427 DST(3, 1) = DST(2, 3) = AVG3(D
, E
, F
);
428 DST(3, 2) = AVG3(E
, F
, G
);
429 DST(3, 3) = AVG3(F
, G
, H
);
432 static void HU4(uint8_t* dst
, const uint8_t* top
) {
433 const int I
= top
[-2];
434 const int J
= top
[-3];
435 const int K
= top
[-4];
436 const int L
= top
[-5];
437 DST(0, 0) = AVG2(I
, J
);
438 DST(2, 0) = DST(0, 1) = AVG2(J
, K
);
439 DST(2, 1) = DST(0, 2) = AVG2(K
, L
);
440 DST(1, 0) = AVG3(I
, J
, K
);
441 DST(3, 0) = DST(1, 1) = AVG3(J
, K
, L
);
442 DST(3, 1) = DST(1, 2) = AVG3(K
, L
, L
);
443 DST(3, 2) = DST(2, 2) =
444 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L
;
447 static void HD4(uint8_t* dst
, const uint8_t* top
) {
448 const int X
= top
[-1];
449 const int I
= top
[-2];
450 const int J
= top
[-3];
451 const int K
= top
[-4];
452 const int L
= top
[-5];
453 const int A
= top
[0];
454 const int B
= top
[1];
455 const int C
= top
[2];
457 DST(0, 0) = DST(2, 1) = AVG2(I
, X
);
458 DST(0, 1) = DST(2, 2) = AVG2(J
, I
);
459 DST(0, 2) = DST(2, 3) = AVG2(K
, J
);
460 DST(0, 3) = AVG2(L
, K
);
462 DST(3, 0) = AVG3(A
, B
, C
);
463 DST(2, 0) = AVG3(X
, A
, B
);
464 DST(1, 0) = DST(3, 1) = AVG3(I
, X
, A
);
465 DST(1, 1) = DST(3, 2) = AVG3(J
, I
, X
);
466 DST(1, 2) = DST(3, 3) = AVG3(K
, J
, I
);
467 DST(1, 3) = AVG3(L
, K
, J
);
470 static void TM4(uint8_t* dst
, const uint8_t* top
) {
472 const uint8_t* const clip
= clip1
+ 255 - top
[-1];
473 for (y
= 0; y
< 4; ++y
) {
474 const uint8_t* const clip_table
= clip
+ top
[-2 - y
];
475 for (x
= 0; x
< 4; ++x
) {
476 dst
[x
] = clip_table
[top
[x
]];
486 // Left samples are top[-5 .. -2], top_left is top[-1], top are
487 // located at top[0..3], and top right is top[4..7]
488 static void Intra4Preds(uint8_t* dst
, const uint8_t* top
) {
489 DC4(I4DC4
+ dst
, top
);
490 TM4(I4TM4
+ dst
, top
);
491 VE4(I4VE4
+ dst
, top
);
492 HE4(I4HE4
+ dst
, top
);
493 RD4(I4RD4
+ dst
, top
);
494 VR4(I4VR4
+ dst
, top
);
495 LD4(I4LD4
+ dst
, top
);
496 VL4(I4VL4
+ dst
, top
);
497 HD4(I4HD4
+ dst
, top
);
498 HU4(I4HU4
+ dst
, top
);
501 //------------------------------------------------------------------------------
504 static WEBP_INLINE
int GetSSE(const uint8_t* a
, const uint8_t* b
,
508 for (y
= 0; y
< h
; ++y
) {
509 for (x
= 0; x
< w
; ++x
) {
510 const int diff
= (int)a
[x
] - b
[x
];
511 count
+= diff
* diff
;
519 static int SSE16x16(const uint8_t* a
, const uint8_t* b
) {
520 return GetSSE(a
, b
, 16, 16);
522 static int SSE16x8(const uint8_t* a
, const uint8_t* b
) {
523 return GetSSE(a
, b
, 16, 8);
525 static int SSE8x8(const uint8_t* a
, const uint8_t* b
) {
526 return GetSSE(a
, b
, 8, 8);
528 static int SSE4x4(const uint8_t* a
, const uint8_t* b
) {
529 return GetSSE(a
, b
, 4, 4);
532 //------------------------------------------------------------------------------
533 // Texture distortion
535 // We try to match the spectral content (weighted) between source and
536 // reconstructed samples.
538 // Hadamard transform
539 // Returns the weighted sum of the absolute value of transformed coefficients.
540 static int TTransform(const uint8_t* in
, const uint16_t* w
) {
545 for (i
= 0; i
< 4; ++i
, in
+= BPS
) {
546 const int a0
= in
[0] + in
[2];
547 const int a1
= in
[1] + in
[3];
548 const int a2
= in
[1] - in
[3];
549 const int a3
= in
[0] - in
[2];
550 tmp
[0 + i
* 4] = a0
+ a1
;
551 tmp
[1 + i
* 4] = a3
+ a2
;
552 tmp
[2 + i
* 4] = a3
- a2
;
553 tmp
[3 + i
* 4] = a0
- a1
;
556 for (i
= 0; i
< 4; ++i
, ++w
) {
557 const int a0
= tmp
[0 + i
] + tmp
[8 + i
];
558 const int a1
= tmp
[4 + i
] + tmp
[12+ i
];
559 const int a2
= tmp
[4 + i
] - tmp
[12+ i
];
560 const int a3
= tmp
[0 + i
] - tmp
[8 + i
];
561 const int b0
= a0
+ a1
;
562 const int b1
= a3
+ a2
;
563 const int b2
= a3
- a2
;
564 const int b3
= a0
- a1
;
566 sum
+= w
[ 0] * abs(b0
);
567 sum
+= w
[ 4] * abs(b1
);
568 sum
+= w
[ 8] * abs(b2
);
569 sum
+= w
[12] * abs(b3
);
574 static int Disto4x4(const uint8_t* const a
, const uint8_t* const b
,
575 const uint16_t* const w
) {
576 const int sum1
= TTransform(a
, w
);
577 const int sum2
= TTransform(b
, w
);
578 return abs(sum2
- sum1
) >> 5;
581 static int Disto16x16(const uint8_t* const a
, const uint8_t* const b
,
582 const uint16_t* const w
) {
585 for (y
= 0; y
< 16 * BPS
; y
+= 4 * BPS
) {
586 for (x
= 0; x
< 16; x
+= 4) {
587 D
+= Disto4x4(a
+ x
+ y
, b
+ x
+ y
, w
);
593 //------------------------------------------------------------------------------
597 static const uint8_t kZigzag
[16] = {
598 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
601 // Simple quantization
602 static int QuantizeBlock(int16_t in
[16], int16_t out
[16],
603 const VP8Matrix
* const mtx
) {
606 for (n
= 0; n
< 16; ++n
) {
607 const int j
= kZigzag
[n
];
608 const int sign
= (in
[j
] < 0);
609 const uint32_t coeff
= (sign
? -in
[j
] : in
[j
]) + mtx
->sharpen_
[j
];
610 if (coeff
> mtx
->zthresh_
[j
]) {
611 const uint32_t Q
= mtx
->q_
[j
];
612 const uint32_t iQ
= mtx
->iq_
[j
];
613 const uint32_t B
= mtx
->bias_
[j
];
614 int level
= QUANTDIV(coeff
, iQ
, B
);
615 if (level
> MAX_LEVEL
) level
= MAX_LEVEL
;
616 if (sign
) level
= -level
;
628 static int QuantizeBlockWHT(int16_t in
[16], int16_t out
[16],
629 const VP8Matrix
* const mtx
) {
631 for (n
= 0; n
< 16; ++n
) {
632 const int j
= kZigzag
[n
];
633 const int sign
= (in
[j
] < 0);
634 const uint32_t coeff
= sign
? -in
[j
] : in
[j
];
635 assert(mtx
->sharpen_
[j
] == 0);
636 if (coeff
> mtx
->zthresh_
[j
]) {
637 const uint32_t Q
= mtx
->q_
[j
];
638 const uint32_t iQ
= mtx
->iq_
[j
];
639 const uint32_t B
= mtx
->bias_
[j
];
640 int level
= QUANTDIV(coeff
, iQ
, B
);
641 if (level
> MAX_LEVEL
) level
= MAX_LEVEL
;
642 if (sign
) level
= -level
;
654 //------------------------------------------------------------------------------
657 static WEBP_INLINE
void Copy(const uint8_t* src
, uint8_t* dst
, int size
) {
659 for (y
= 0; y
< size
; ++y
) {
660 memcpy(dst
, src
, size
);
666 static void Copy4x4(const uint8_t* src
, uint8_t* dst
) { Copy(src
, dst
, 4); }
668 //------------------------------------------------------------------------------
671 // Speed-critical function pointers. We have to initialize them to the default
672 // implementations within VP8EncDspInit().
673 VP8CHisto VP8CollectHistogram
;
674 VP8Idct VP8ITransform
;
675 VP8Fdct VP8FTransform
;
676 VP8WHT VP8FTransformWHT
;
677 VP8Intra4Preds VP8EncPredLuma4
;
678 VP8IntraPreds VP8EncPredLuma16
;
679 VP8IntraPreds VP8EncPredChroma8
;
680 VP8Metric VP8SSE16x16
;
682 VP8Metric VP8SSE16x8
;
684 VP8WMetric VP8TDisto4x4
;
685 VP8WMetric VP8TDisto16x16
;
686 VP8QuantizeBlock VP8EncQuantizeBlock
;
687 VP8QuantizeBlockWHT VP8EncQuantizeBlockWHT
;
688 VP8BlockCopy VP8Copy4x4
;
690 extern void VP8EncDspInitSSE2(void);
691 extern void VP8EncDspInitAVX2(void);
692 extern void VP8EncDspInitNEON(void);
693 extern void VP8EncDspInitMIPS32(void);
695 static volatile VP8CPUInfo enc_last_cpuinfo_used
=
696 (VP8CPUInfo
)&enc_last_cpuinfo_used
;
698 void VP8EncDspInit(void) {
699 if (enc_last_cpuinfo_used
== VP8GetCPUInfo
) return;
701 VP8DspInit(); // common inverse transforms
704 // default C implementations
705 VP8CollectHistogram
= CollectHistogram
;
706 VP8ITransform
= ITransform
;
707 VP8FTransform
= FTransform
;
708 VP8FTransformWHT
= FTransformWHT
;
709 VP8EncPredLuma4
= Intra4Preds
;
710 VP8EncPredLuma16
= Intra16Preds
;
711 VP8EncPredChroma8
= IntraChromaPreds
;
712 VP8SSE16x16
= SSE16x16
;
714 VP8SSE16x8
= SSE16x8
;
716 VP8TDisto4x4
= Disto4x4
;
717 VP8TDisto16x16
= Disto16x16
;
718 VP8EncQuantizeBlock
= QuantizeBlock
;
719 VP8EncQuantizeBlockWHT
= QuantizeBlockWHT
;
720 VP8Copy4x4
= Copy4x4
;
722 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
723 if (VP8GetCPUInfo
!= NULL
) {
724 #if defined(WEBP_USE_SSE2)
725 if (VP8GetCPUInfo(kSSE2
)) {
729 #if defined(WEBP_USE_AVX2)
730 if (VP8GetCPUInfo(kAVX2
)) {
734 #if defined(WEBP_USE_NEON)
735 if (VP8GetCPUInfo(kNEON
)) {
739 #if defined(WEBP_USE_MIPS32)
740 if (VP8GetCPUInfo(kMIPS32
)) {
741 VP8EncDspInitMIPS32();
745 enc_last_cpuinfo_used
= VP8GetCPUInfo
;