1 // Copyright 2010 Google Inc. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
10 // Speed-critical decoding functions.
12 // Author: Skal (pascal.massimino@gmail.com)
15 #include "../dec/vp8i.h"
17 //------------------------------------------------------------------------------
19 static WEBP_INLINE
uint8_t clip_8b(int v
) {
20 return (!(v
& ~0xff)) ? v
: (v
< 0) ? 0 : 255;
23 //------------------------------------------------------------------------------
24 // Transforms (Paragraph 14.4)
26 #define STORE(x, y, v) \
27 dst[x + y * BPS] = clip_8b(dst[x + y * BPS] + ((v) >> 3))
29 #define STORE2(y, dc, d, c) do { \
30 const int DC = (dc); \
31 STORE(0, y, DC + (d)); \
32 STORE(1, y, DC + (c)); \
33 STORE(2, y, DC - (c)); \
34 STORE(3, y, DC - (d)); \
37 static const int kC1
= 20091 + (1 << 16);
38 static const int kC2
= 35468;
39 #define MUL(a, b) (((a) * (b)) >> 16)
41 static void TransformOne(const int16_t* in
, uint8_t* dst
) {
45 for (i
= 0; i
< 4; ++i
) { // vertical pass
46 const int a
= in
[0] + in
[8]; // [-4096, 4094]
47 const int b
= in
[0] - in
[8]; // [-4095, 4095]
48 const int c
= MUL(in
[4], kC2
) - MUL(in
[12], kC1
); // [-3783, 3783]
49 const int d
= MUL(in
[4], kC1
) + MUL(in
[12], kC2
); // [-3785, 3781]
50 tmp
[0] = a
+ d
; // [-7881, 7875]
51 tmp
[1] = b
+ c
; // [-7878, 7878]
52 tmp
[2] = b
- c
; // [-7878, 7878]
53 tmp
[3] = a
- d
; // [-7877, 7879]
57 // Each pass is expanding the dynamic range by ~3.85 (upper bound).
58 // The exact value is (2. + (kC1 + kC2) / 65536).
59 // After the second pass, maximum interval is [-3794, 3794], assuming
60 // an input in [-2048, 2047] interval. We then need to add a dst value
61 // in the [0, 255] range.
62 // In the worst case scenario, the input to clip_8b() can be as large as
65 for (i
= 0; i
< 4; ++i
) { // horizontal pass
66 const int dc
= tmp
[0] + 4;
67 const int a
= dc
+ tmp
[8];
68 const int b
= dc
- tmp
[8];
69 const int c
= MUL(tmp
[4], kC2
) - MUL(tmp
[12], kC1
);
70 const int d
= MUL(tmp
[4], kC1
) + MUL(tmp
[12], kC2
);
80 // Simplified transform when only in[0], in[1] and in[4] are non-zero
81 static void TransformAC3(const int16_t* in
, uint8_t* dst
) {
82 const int a
= in
[0] + 4;
83 const int c4
= MUL(in
[4], kC2
);
84 const int d4
= MUL(in
[4], kC1
);
85 const int c1
= MUL(in
[1], kC2
);
86 const int d1
= MUL(in
[1], kC1
);
87 STORE2(0, a
+ d4
, d1
, c1
);
88 STORE2(1, a
+ c4
, d1
, c1
);
89 STORE2(2, a
- c4
, d1
, c1
);
90 STORE2(3, a
- d4
, d1
, c1
);
95 static void TransformTwo(const int16_t* in
, uint8_t* dst
, int do_two
) {
96 TransformOne(in
, dst
);
98 TransformOne(in
+ 16, dst
+ 4);
102 static void TransformUV(const int16_t* in
, uint8_t* dst
) {
103 VP8Transform(in
+ 0 * 16, dst
, 1);
104 VP8Transform(in
+ 2 * 16, dst
+ 4 * BPS
, 1);
107 static void TransformDC(const int16_t *in
, uint8_t* dst
) {
108 const int DC
= in
[0] + 4;
110 for (j
= 0; j
< 4; ++j
) {
111 for (i
= 0; i
< 4; ++i
) {
117 static void TransformDCUV(const int16_t* in
, uint8_t* dst
) {
118 if (in
[0 * 16]) VP8TransformDC(in
+ 0 * 16, dst
);
119 if (in
[1 * 16]) VP8TransformDC(in
+ 1 * 16, dst
+ 4);
120 if (in
[2 * 16]) VP8TransformDC(in
+ 2 * 16, dst
+ 4 * BPS
);
121 if (in
[3 * 16]) VP8TransformDC(in
+ 3 * 16, dst
+ 4 * BPS
+ 4);
126 //------------------------------------------------------------------------------
129 static void TransformWHT(const int16_t* in
, int16_t* out
) {
132 for (i
= 0; i
< 4; ++i
) {
133 const int a0
= in
[0 + i
] + in
[12 + i
];
134 const int a1
= in
[4 + i
] + in
[ 8 + i
];
135 const int a2
= in
[4 + i
] - in
[ 8 + i
];
136 const int a3
= in
[0 + i
] - in
[12 + i
];
137 tmp
[0 + i
] = a0
+ a1
;
138 tmp
[8 + i
] = a0
- a1
;
139 tmp
[4 + i
] = a3
+ a2
;
140 tmp
[12 + i
] = a3
- a2
;
142 for (i
= 0; i
< 4; ++i
) {
143 const int dc
= tmp
[0 + i
* 4] + 3; // w/ rounder
144 const int a0
= dc
+ tmp
[3 + i
* 4];
145 const int a1
= tmp
[1 + i
* 4] + tmp
[2 + i
* 4];
146 const int a2
= tmp
[1 + i
* 4] - tmp
[2 + i
* 4];
147 const int a3
= dc
- tmp
[3 + i
* 4];
148 out
[ 0] = (a0
+ a1
) >> 3;
149 out
[16] = (a3
+ a2
) >> 3;
150 out
[32] = (a0
- a1
) >> 3;
151 out
[48] = (a3
- a2
) >> 3;
156 void (*VP8TransformWHT
)(const int16_t* in
, int16_t* out
);
158 //------------------------------------------------------------------------------
161 #define DST(x, y) dst[(x) + (y) * BPS]
163 static WEBP_INLINE
void TrueMotion(uint8_t *dst
, int size
) {
164 const uint8_t* top
= dst
- BPS
;
165 const uint8_t* const clip0
= VP8kclip1
- top
[-1];
167 for (y
= 0; y
< size
; ++y
) {
168 const uint8_t* const clip
= clip0
+ dst
[-1];
170 for (x
= 0; x
< size
; ++x
) {
171 dst
[x
] = clip
[top
[x
]];
176 static void TM4(uint8_t *dst
) { TrueMotion(dst
, 4); }
177 static void TM8uv(uint8_t *dst
) { TrueMotion(dst
, 8); }
178 static void TM16(uint8_t *dst
) { TrueMotion(dst
, 16); }
180 //------------------------------------------------------------------------------
183 static void VE16(uint8_t *dst
) { // vertical
185 for (j
= 0; j
< 16; ++j
) {
186 memcpy(dst
+ j
* BPS
, dst
- BPS
, 16);
190 static void HE16(uint8_t *dst
) { // horizontal
192 for (j
= 16; j
> 0; --j
) {
193 memset(dst
, dst
[-1], 16);
198 static WEBP_INLINE
void Put16(int v
, uint8_t* dst
) {
200 for (j
= 0; j
< 16; ++j
) {
201 memset(dst
+ j
* BPS
, v
, 16);
205 static void DC16(uint8_t *dst
) { // DC
208 for (j
= 0; j
< 16; ++j
) {
209 DC
+= dst
[-1 + j
* BPS
] + dst
[j
- BPS
];
214 static void DC16NoTop(uint8_t *dst
) { // DC with top samples not available
217 for (j
= 0; j
< 16; ++j
) {
218 DC
+= dst
[-1 + j
* BPS
];
223 static void DC16NoLeft(uint8_t *dst
) { // DC with left samples not available
226 for (i
= 0; i
< 16; ++i
) {
232 static void DC16NoTopLeft(uint8_t *dst
) { // DC with no top and left samples
236 //------------------------------------------------------------------------------
239 #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2)
240 #define AVG2(a, b) (((a) + (b) + 1) >> 1)
242 static void VE4(uint8_t *dst
) { // vertical
243 const uint8_t* top
= dst
- BPS
;
244 const uint8_t vals
[4] = {
245 AVG3(top
[-1], top
[0], top
[1]),
246 AVG3(top
[ 0], top
[1], top
[2]),
247 AVG3(top
[ 1], top
[2], top
[3]),
248 AVG3(top
[ 2], top
[3], top
[4])
251 for (i
= 0; i
< 4; ++i
) {
252 memcpy(dst
+ i
* BPS
, vals
, sizeof(vals
));
256 static void HE4(uint8_t *dst
) { // horizontal
257 const int A
= dst
[-1 - BPS
];
258 const int B
= dst
[-1];
259 const int C
= dst
[-1 + BPS
];
260 const int D
= dst
[-1 + 2 * BPS
];
261 const int E
= dst
[-1 + 3 * BPS
];
262 *(uint32_t*)(dst
+ 0 * BPS
) = 0x01010101U
* AVG3(A
, B
, C
);
263 *(uint32_t*)(dst
+ 1 * BPS
) = 0x01010101U
* AVG3(B
, C
, D
);
264 *(uint32_t*)(dst
+ 2 * BPS
) = 0x01010101U
* AVG3(C
, D
, E
);
265 *(uint32_t*)(dst
+ 3 * BPS
) = 0x01010101U
* AVG3(D
, E
, E
);
268 static void DC4(uint8_t *dst
) { // DC
271 for (i
= 0; i
< 4; ++i
) dc
+= dst
[i
- BPS
] + dst
[-1 + i
* BPS
];
273 for (i
= 0; i
< 4; ++i
) memset(dst
+ i
* BPS
, dc
, 4);
276 static void RD4(uint8_t *dst
) { // Down-right
277 const int I
= dst
[-1 + 0 * BPS
];
278 const int J
= dst
[-1 + 1 * BPS
];
279 const int K
= dst
[-1 + 2 * BPS
];
280 const int L
= dst
[-1 + 3 * BPS
];
281 const int X
= dst
[-1 - BPS
];
282 const int A
= dst
[0 - BPS
];
283 const int B
= dst
[1 - BPS
];
284 const int C
= dst
[2 - BPS
];
285 const int D
= dst
[3 - BPS
];
286 DST(0, 3) = AVG3(J
, K
, L
);
287 DST(0, 2) = DST(1, 3) = AVG3(I
, J
, K
);
288 DST(0, 1) = DST(1, 2) = DST(2, 3) = AVG3(X
, I
, J
);
289 DST(0, 0) = DST(1, 1) = DST(2, 2) = DST(3, 3) = AVG3(A
, X
, I
);
290 DST(1, 0) = DST(2, 1) = DST(3, 2) = AVG3(B
, A
, X
);
291 DST(2, 0) = DST(3, 1) = AVG3(C
, B
, A
);
292 DST(3, 0) = AVG3(D
, C
, B
);
295 static void LD4(uint8_t *dst
) { // Down-Left
296 const int A
= dst
[0 - BPS
];
297 const int B
= dst
[1 - BPS
];
298 const int C
= dst
[2 - BPS
];
299 const int D
= dst
[3 - BPS
];
300 const int E
= dst
[4 - BPS
];
301 const int F
= dst
[5 - BPS
];
302 const int G
= dst
[6 - BPS
];
303 const int H
= dst
[7 - BPS
];
304 DST(0, 0) = AVG3(A
, B
, C
);
305 DST(1, 0) = DST(0, 1) = AVG3(B
, C
, D
);
306 DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C
, D
, E
);
307 DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D
, E
, F
);
308 DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E
, F
, G
);
309 DST(3, 2) = DST(2, 3) = AVG3(F
, G
, H
);
310 DST(3, 3) = AVG3(G
, H
, H
);
313 static void VR4(uint8_t *dst
) { // Vertical-Right
314 const int I
= dst
[-1 + 0 * BPS
];
315 const int J
= dst
[-1 + 1 * BPS
];
316 const int K
= dst
[-1 + 2 * BPS
];
317 const int X
= dst
[-1 - BPS
];
318 const int A
= dst
[0 - BPS
];
319 const int B
= dst
[1 - BPS
];
320 const int C
= dst
[2 - BPS
];
321 const int D
= dst
[3 - BPS
];
322 DST(0, 0) = DST(1, 2) = AVG2(X
, A
);
323 DST(1, 0) = DST(2, 2) = AVG2(A
, B
);
324 DST(2, 0) = DST(3, 2) = AVG2(B
, C
);
325 DST(3, 0) = AVG2(C
, D
);
327 DST(0, 3) = AVG3(K
, J
, I
);
328 DST(0, 2) = AVG3(J
, I
, X
);
329 DST(0, 1) = DST(1, 3) = AVG3(I
, X
, A
);
330 DST(1, 1) = DST(2, 3) = AVG3(X
, A
, B
);
331 DST(2, 1) = DST(3, 3) = AVG3(A
, B
, C
);
332 DST(3, 1) = AVG3(B
, C
, D
);
335 static void VL4(uint8_t *dst
) { // Vertical-Left
336 const int A
= dst
[0 - BPS
];
337 const int B
= dst
[1 - BPS
];
338 const int C
= dst
[2 - BPS
];
339 const int D
= dst
[3 - BPS
];
340 const int E
= dst
[4 - BPS
];
341 const int F
= dst
[5 - BPS
];
342 const int G
= dst
[6 - BPS
];
343 const int H
= dst
[7 - BPS
];
344 DST(0, 0) = AVG2(A
, B
);
345 DST(1, 0) = DST(0, 2) = AVG2(B
, C
);
346 DST(2, 0) = DST(1, 2) = AVG2(C
, D
);
347 DST(3, 0) = DST(2, 2) = AVG2(D
, E
);
349 DST(0, 1) = AVG3(A
, B
, C
);
350 DST(1, 1) = DST(0, 3) = AVG3(B
, C
, D
);
351 DST(2, 1) = DST(1, 3) = AVG3(C
, D
, E
);
352 DST(3, 1) = DST(2, 3) = AVG3(D
, E
, F
);
353 DST(3, 2) = AVG3(E
, F
, G
);
354 DST(3, 3) = AVG3(F
, G
, H
);
357 static void HU4(uint8_t *dst
) { // Horizontal-Up
358 const int I
= dst
[-1 + 0 * BPS
];
359 const int J
= dst
[-1 + 1 * BPS
];
360 const int K
= dst
[-1 + 2 * BPS
];
361 const int L
= dst
[-1 + 3 * BPS
];
362 DST(0, 0) = AVG2(I
, J
);
363 DST(2, 0) = DST(0, 1) = AVG2(J
, K
);
364 DST(2, 1) = DST(0, 2) = AVG2(K
, L
);
365 DST(1, 0) = AVG3(I
, J
, K
);
366 DST(3, 0) = DST(1, 1) = AVG3(J
, K
, L
);
367 DST(3, 1) = DST(1, 2) = AVG3(K
, L
, L
);
368 DST(3, 2) = DST(2, 2) =
369 DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L
;
372 static void HD4(uint8_t *dst
) { // Horizontal-Down
373 const int I
= dst
[-1 + 0 * BPS
];
374 const int J
= dst
[-1 + 1 * BPS
];
375 const int K
= dst
[-1 + 2 * BPS
];
376 const int L
= dst
[-1 + 3 * BPS
];
377 const int X
= dst
[-1 - BPS
];
378 const int A
= dst
[0 - BPS
];
379 const int B
= dst
[1 - BPS
];
380 const int C
= dst
[2 - BPS
];
382 DST(0, 0) = DST(2, 1) = AVG2(I
, X
);
383 DST(0, 1) = DST(2, 2) = AVG2(J
, I
);
384 DST(0, 2) = DST(2, 3) = AVG2(K
, J
);
385 DST(0, 3) = AVG2(L
, K
);
387 DST(3, 0) = AVG3(A
, B
, C
);
388 DST(2, 0) = AVG3(X
, A
, B
);
389 DST(1, 0) = DST(3, 1) = AVG3(I
, X
, A
);
390 DST(1, 1) = DST(3, 2) = AVG3(J
, I
, X
);
391 DST(1, 2) = DST(3, 3) = AVG3(K
, J
, I
);
392 DST(1, 3) = AVG3(L
, K
, J
);
399 //------------------------------------------------------------------------------
402 static void VE8uv(uint8_t *dst
) { // vertical
404 for (j
= 0; j
< 8; ++j
) {
405 memcpy(dst
+ j
* BPS
, dst
- BPS
, 8);
409 static void HE8uv(uint8_t *dst
) { // horizontal
411 for (j
= 0; j
< 8; ++j
) {
412 memset(dst
, dst
[-1], 8);
417 // helper for chroma-DC predictions
418 static WEBP_INLINE
void Put8x8uv(uint8_t value
, uint8_t* dst
) {
420 for (j
= 0; j
< 8; ++j
) {
421 memset(dst
+ j
* BPS
, value
, 8);
425 static void DC8uv(uint8_t *dst
) { // DC
428 for (i
= 0; i
< 8; ++i
) {
429 dc0
+= dst
[i
- BPS
] + dst
[-1 + i
* BPS
];
431 Put8x8uv(dc0
>> 4, dst
);
434 static void DC8uvNoLeft(uint8_t *dst
) { // DC with no left samples
437 for (i
= 0; i
< 8; ++i
) {
440 Put8x8uv(dc0
>> 3, dst
);
443 static void DC8uvNoTop(uint8_t *dst
) { // DC with no top samples
446 for (i
= 0; i
< 8; ++i
) {
447 dc0
+= dst
[-1 + i
* BPS
];
449 Put8x8uv(dc0
>> 3, dst
);
452 static void DC8uvNoTopLeft(uint8_t *dst
) { // DC with nothing
456 //------------------------------------------------------------------------------
457 // default C implementations
459 const VP8PredFunc VP8PredLuma4
[NUM_BMODES
] = {
460 DC4
, TM4
, VE4
, HE4
, RD4
, VR4
, LD4
, VL4
, HD4
, HU4
463 const VP8PredFunc VP8PredLuma16
[NUM_B_DC_MODES
] = {
464 DC16
, TM16
, VE16
, HE16
,
465 DC16NoTop
, DC16NoLeft
, DC16NoTopLeft
468 const VP8PredFunc VP8PredChroma8
[NUM_B_DC_MODES
] = {
469 DC8uv
, TM8uv
, VE8uv
, HE8uv
,
470 DC8uvNoTop
, DC8uvNoLeft
, DC8uvNoTopLeft
473 //------------------------------------------------------------------------------
474 // Edge filtering functions
476 // 4 pixels in, 2 pixels out
477 static WEBP_INLINE
void do_filter2(uint8_t* p
, int step
) {
478 const int p1
= p
[-2*step
], p0
= p
[-step
], q0
= p
[0], q1
= p
[step
];
479 const int a
= 3 * (q0
- p0
) + VP8ksclip1
[p1
- q1
]; // in [-893,892]
480 const int a1
= VP8ksclip2
[(a
+ 4) >> 3]; // in [-16,15]
481 const int a2
= VP8ksclip2
[(a
+ 3) >> 3];
482 p
[-step
] = VP8kclip1
[p0
+ a2
];
483 p
[ 0] = VP8kclip1
[q0
- a1
];
486 // 4 pixels in, 4 pixels out
487 static WEBP_INLINE
void do_filter4(uint8_t* p
, int step
) {
488 const int p1
= p
[-2*step
], p0
= p
[-step
], q0
= p
[0], q1
= p
[step
];
489 const int a
= 3 * (q0
- p0
);
490 const int a1
= VP8ksclip2
[(a
+ 4) >> 3];
491 const int a2
= VP8ksclip2
[(a
+ 3) >> 3];
492 const int a3
= (a1
+ 1) >> 1;
493 p
[-2*step
] = VP8kclip1
[p1
+ a3
];
494 p
[- step
] = VP8kclip1
[p0
+ a2
];
495 p
[ 0] = VP8kclip1
[q0
- a1
];
496 p
[ step
] = VP8kclip1
[q1
- a3
];
499 // 6 pixels in, 6 pixels out
500 static WEBP_INLINE
void do_filter6(uint8_t* p
, int step
) {
501 const int p2
= p
[-3*step
], p1
= p
[-2*step
], p0
= p
[-step
];
502 const int q0
= p
[0], q1
= p
[step
], q2
= p
[2*step
];
503 const int a
= VP8ksclip1
[3 * (q0
- p0
) + VP8ksclip1
[p1
- q1
]];
504 // a is in [-128,127], a1 in [-27,27], a2 in [-18,18] and a3 in [-9,9]
505 const int a1
= (27 * a
+ 63) >> 7; // eq. to ((3 * a + 7) * 9) >> 7
506 const int a2
= (18 * a
+ 63) >> 7; // eq. to ((2 * a + 7) * 9) >> 7
507 const int a3
= (9 * a
+ 63) >> 7; // eq. to ((1 * a + 7) * 9) >> 7
508 p
[-3*step
] = VP8kclip1
[p2
+ a3
];
509 p
[-2*step
] = VP8kclip1
[p1
+ a2
];
510 p
[- step
] = VP8kclip1
[p0
+ a1
];
511 p
[ 0] = VP8kclip1
[q0
- a1
];
512 p
[ step
] = VP8kclip1
[q1
- a2
];
513 p
[ 2*step
] = VP8kclip1
[q2
- a3
];
516 static WEBP_INLINE
int hev(const uint8_t* p
, int step
, int thresh
) {
517 const int p1
= p
[-2*step
], p0
= p
[-step
], q0
= p
[0], q1
= p
[step
];
518 return (VP8kabs0
[p1
- p0
] > thresh
) || (VP8kabs0
[q1
- q0
] > thresh
);
521 static WEBP_INLINE
int needs_filter(const uint8_t* p
, int step
, int t
) {
522 const int p1
= p
[-2 * step
], p0
= p
[-step
], q0
= p
[0], q1
= p
[step
];
523 return ((4 * VP8kabs0
[p0
- q0
] + VP8kabs0
[p1
- q1
]) <= t
);
526 static WEBP_INLINE
int needs_filter2(const uint8_t* p
,
527 int step
, int t
, int it
) {
528 const int p3
= p
[-4 * step
], p2
= p
[-3 * step
], p1
= p
[-2 * step
];
529 const int p0
= p
[-step
], q0
= p
[0];
530 const int q1
= p
[step
], q2
= p
[2 * step
], q3
= p
[3 * step
];
531 if ((4 * VP8kabs0
[p0
- q0
] + VP8kabs0
[p1
- q1
]) > t
) return 0;
532 return VP8kabs0
[p3
- p2
] <= it
&& VP8kabs0
[p2
- p1
] <= it
&&
533 VP8kabs0
[p1
- p0
] <= it
&& VP8kabs0
[q3
- q2
] <= it
&&
534 VP8kabs0
[q2
- q1
] <= it
&& VP8kabs0
[q1
- q0
] <= it
;
537 //------------------------------------------------------------------------------
538 // Simple In-loop filtering (Paragraph 15.2)
540 static void SimpleVFilter16(uint8_t* p
, int stride
, int thresh
) {
542 const int thresh2
= 2 * thresh
+ 1;
543 for (i
= 0; i
< 16; ++i
) {
544 if (needs_filter(p
+ i
, stride
, thresh2
)) {
545 do_filter2(p
+ i
, stride
);
550 static void SimpleHFilter16(uint8_t* p
, int stride
, int thresh
) {
552 const int thresh2
= 2 * thresh
+ 1;
553 for (i
= 0; i
< 16; ++i
) {
554 if (needs_filter(p
+ i
* stride
, 1, thresh2
)) {
555 do_filter2(p
+ i
* stride
, 1);
560 static void SimpleVFilter16i(uint8_t* p
, int stride
, int thresh
) {
562 for (k
= 3; k
> 0; --k
) {
564 SimpleVFilter16(p
, stride
, thresh
);
568 static void SimpleHFilter16i(uint8_t* p
, int stride
, int thresh
) {
570 for (k
= 3; k
> 0; --k
) {
572 SimpleHFilter16(p
, stride
, thresh
);
576 //------------------------------------------------------------------------------
577 // Complex In-loop filtering (Paragraph 15.3)
579 static WEBP_INLINE
void FilterLoop26(uint8_t* p
,
580 int hstride
, int vstride
, int size
,
581 int thresh
, int ithresh
, int hev_thresh
) {
582 const int thresh2
= 2 * thresh
+ 1;
584 if (needs_filter2(p
, hstride
, thresh2
, ithresh
)) {
585 if (hev(p
, hstride
, hev_thresh
)) {
586 do_filter2(p
, hstride
);
588 do_filter6(p
, hstride
);
595 static WEBP_INLINE
void FilterLoop24(uint8_t* p
,
596 int hstride
, int vstride
, int size
,
597 int thresh
, int ithresh
, int hev_thresh
) {
598 const int thresh2
= 2 * thresh
+ 1;
600 if (needs_filter2(p
, hstride
, thresh2
, ithresh
)) {
601 if (hev(p
, hstride
, hev_thresh
)) {
602 do_filter2(p
, hstride
);
604 do_filter4(p
, hstride
);
611 // on macroblock edges
612 static void VFilter16(uint8_t* p
, int stride
,
613 int thresh
, int ithresh
, int hev_thresh
) {
614 FilterLoop26(p
, stride
, 1, 16, thresh
, ithresh
, hev_thresh
);
617 static void HFilter16(uint8_t* p
, int stride
,
618 int thresh
, int ithresh
, int hev_thresh
) {
619 FilterLoop26(p
, 1, stride
, 16, thresh
, ithresh
, hev_thresh
);
622 // on three inner edges
623 static void VFilter16i(uint8_t* p
, int stride
,
624 int thresh
, int ithresh
, int hev_thresh
) {
626 for (k
= 3; k
> 0; --k
) {
628 FilterLoop24(p
, stride
, 1, 16, thresh
, ithresh
, hev_thresh
);
632 static void HFilter16i(uint8_t* p
, int stride
,
633 int thresh
, int ithresh
, int hev_thresh
) {
635 for (k
= 3; k
> 0; --k
) {
637 FilterLoop24(p
, 1, stride
, 16, thresh
, ithresh
, hev_thresh
);
641 // 8-pixels wide variant, for chroma filtering
642 static void VFilter8(uint8_t* u
, uint8_t* v
, int stride
,
643 int thresh
, int ithresh
, int hev_thresh
) {
644 FilterLoop26(u
, stride
, 1, 8, thresh
, ithresh
, hev_thresh
);
645 FilterLoop26(v
, stride
, 1, 8, thresh
, ithresh
, hev_thresh
);
648 static void HFilter8(uint8_t* u
, uint8_t* v
, int stride
,
649 int thresh
, int ithresh
, int hev_thresh
) {
650 FilterLoop26(u
, 1, stride
, 8, thresh
, ithresh
, hev_thresh
);
651 FilterLoop26(v
, 1, stride
, 8, thresh
, ithresh
, hev_thresh
);
654 static void VFilter8i(uint8_t* u
, uint8_t* v
, int stride
,
655 int thresh
, int ithresh
, int hev_thresh
) {
656 FilterLoop24(u
+ 4 * stride
, stride
, 1, 8, thresh
, ithresh
, hev_thresh
);
657 FilterLoop24(v
+ 4 * stride
, stride
, 1, 8, thresh
, ithresh
, hev_thresh
);
660 static void HFilter8i(uint8_t* u
, uint8_t* v
, int stride
,
661 int thresh
, int ithresh
, int hev_thresh
) {
662 FilterLoop24(u
+ 4, 1, stride
, 8, thresh
, ithresh
, hev_thresh
);
663 FilterLoop24(v
+ 4, 1, stride
, 8, thresh
, ithresh
, hev_thresh
);
666 //------------------------------------------------------------------------------
668 VP8DecIdct2 VP8Transform
;
669 VP8DecIdct VP8TransformAC3
;
670 VP8DecIdct VP8TransformUV
;
671 VP8DecIdct VP8TransformDC
;
672 VP8DecIdct VP8TransformDCUV
;
674 VP8LumaFilterFunc VP8VFilter16
;
675 VP8LumaFilterFunc VP8HFilter16
;
676 VP8ChromaFilterFunc VP8VFilter8
;
677 VP8ChromaFilterFunc VP8HFilter8
;
678 VP8LumaFilterFunc VP8VFilter16i
;
679 VP8LumaFilterFunc VP8HFilter16i
;
680 VP8ChromaFilterFunc VP8VFilter8i
;
681 VP8ChromaFilterFunc VP8HFilter8i
;
682 VP8SimpleFilterFunc VP8SimpleVFilter16
;
683 VP8SimpleFilterFunc VP8SimpleHFilter16
;
684 VP8SimpleFilterFunc VP8SimpleVFilter16i
;
685 VP8SimpleFilterFunc VP8SimpleHFilter16i
;
687 extern void VP8DspInitSSE2(void);
688 extern void VP8DspInitNEON(void);
689 extern void VP8DspInitMIPS32(void);
691 void VP8DspInit(void) {
694 VP8TransformWHT
= TransformWHT
;
695 VP8Transform
= TransformTwo
;
696 VP8TransformUV
= TransformUV
;
697 VP8TransformDC
= TransformDC
;
698 VP8TransformDCUV
= TransformDCUV
;
699 VP8TransformAC3
= TransformAC3
;
701 VP8VFilter16
= VFilter16
;
702 VP8HFilter16
= HFilter16
;
703 VP8VFilter8
= VFilter8
;
704 VP8HFilter8
= HFilter8
;
705 VP8VFilter16i
= VFilter16i
;
706 VP8HFilter16i
= HFilter16i
;
707 VP8VFilter8i
= VFilter8i
;
708 VP8HFilter8i
= HFilter8i
;
709 VP8SimpleVFilter16
= SimpleVFilter16
;
710 VP8SimpleHFilter16
= SimpleHFilter16
;
711 VP8SimpleVFilter16i
= SimpleVFilter16i
;
712 VP8SimpleHFilter16i
= SimpleHFilter16i
;
714 // If defined, use CPUInfo() to overwrite some pointers with faster versions.
715 if (VP8GetCPUInfo
!= NULL
) {
716 #if defined(WEBP_USE_SSE2)
717 if (VP8GetCPUInfo(kSSE2
)) {
720 #elif defined(WEBP_USE_NEON)
721 if (VP8GetCPUInfo(kNEON
)) {
724 #elif defined(WEBP_USE_MIPS32)
725 if (VP8GetCPUInfo(kMIPS32
)) {