1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
10 // Image transforms and color space conversion methods for lossless decoder.
12 // Authors: Vikas Arora (vikaas.arora@gmail.com)
13 // Jyrki Alakuijala (jyrki@google.com)
14 // Urvang Joshi (urvang@google.com)
18 // Define the following if target arch is sure to have SSE2
19 // #define WEBP_TARGET_HAS_SSE2
21 #if defined(__cplusplus) || defined(c_plusplus)
25 #if defined(WEBP_TARGET_HAS_SSE2)
26 #include <emmintrin.h>
31 #include "./lossless.h"
32 #include "../dec/vp8li.h"
35 #define MAX_DIFF_COST (1e30f)
37 // lookup table for small values of log2(int)
38 #define APPROX_LOG_MAX 4096
39 #define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
40 const float kLog2Table
[LOG_LOOKUP_IDX_MAX
] = {
41 0.0000000000000000f
, 0.0000000000000000f
,
42 1.0000000000000000f
, 1.5849625007211560f
,
43 2.0000000000000000f
, 2.3219280948873621f
,
44 2.5849625007211560f
, 2.8073549220576041f
,
45 3.0000000000000000f
, 3.1699250014423121f
,
46 3.3219280948873621f
, 3.4594316186372973f
,
47 3.5849625007211560f
, 3.7004397181410921f
,
48 3.8073549220576041f
, 3.9068905956085187f
,
49 4.0000000000000000f
, 4.0874628412503390f
,
50 4.1699250014423121f
, 4.2479275134435852f
,
51 4.3219280948873626f
, 4.3923174227787606f
,
52 4.4594316186372973f
, 4.5235619560570130f
,
53 4.5849625007211560f
, 4.6438561897747243f
,
54 4.7004397181410917f
, 4.7548875021634682f
,
55 4.8073549220576037f
, 4.8579809951275718f
,
56 4.9068905956085187f
, 4.9541963103868749f
,
57 5.0000000000000000f
, 5.0443941193584533f
,
58 5.0874628412503390f
, 5.1292830169449663f
,
59 5.1699250014423121f
, 5.2094533656289501f
,
60 5.2479275134435852f
, 5.2854022188622487f
,
61 5.3219280948873626f
, 5.3575520046180837f
,
62 5.3923174227787606f
, 5.4262647547020979f
,
63 5.4594316186372973f
, 5.4918530963296747f
,
64 5.5235619560570130f
, 5.5545888516776376f
,
65 5.5849625007211560f
, 5.6147098441152083f
,
66 5.6438561897747243f
, 5.6724253419714951f
,
67 5.7004397181410917f
, 5.7279204545631987f
,
68 5.7548875021634682f
, 5.7813597135246599f
,
69 5.8073549220576037f
, 5.8328900141647412f
,
70 5.8579809951275718f
, 5.8826430493618415f
,
71 5.9068905956085187f
, 5.9307373375628866f
,
72 5.9541963103868749f
, 5.9772799234999167f
,
73 6.0000000000000000f
, 6.0223678130284543f
,
74 6.0443941193584533f
, 6.0660891904577720f
,
75 6.0874628412503390f
, 6.1085244567781691f
,
76 6.1292830169449663f
, 6.1497471195046822f
,
77 6.1699250014423121f
, 6.1898245588800175f
,
78 6.2094533656289501f
, 6.2288186904958804f
,
79 6.2479275134435852f
, 6.2667865406949010f
,
80 6.2854022188622487f
, 6.3037807481771030f
,
81 6.3219280948873626f
, 6.3398500028846243f
,
82 6.3575520046180837f
, 6.3750394313469245f
,
83 6.3923174227787606f
, 6.4093909361377017f
,
84 6.4262647547020979f
, 6.4429434958487279f
,
85 6.4594316186372973f
, 6.4757334309663976f
,
86 6.4918530963296747f
, 6.5077946401986963f
,
87 6.5235619560570130f
, 6.5391588111080309f
,
88 6.5545888516776376f
, 6.5698556083309478f
,
89 6.5849625007211560f
, 6.5999128421871278f
,
90 6.6147098441152083f
, 6.6293566200796094f
,
91 6.6438561897747243f
, 6.6582114827517946f
,
92 6.6724253419714951f
, 6.6865005271832185f
,
93 6.7004397181410917f
, 6.7142455176661224f
,
94 6.7279204545631987f
, 6.7414669864011464f
,
95 6.7548875021634682f
, 6.7681843247769259f
,
96 6.7813597135246599f
, 6.7944158663501061f
,
97 6.8073549220576037f
, 6.8201789624151878f
,
98 6.8328900141647412f
, 6.8454900509443747f
,
99 6.8579809951275718f
, 6.8703647195834047f
,
100 6.8826430493618415f
, 6.8948177633079437f
,
101 6.9068905956085187f
, 6.9188632372745946f
,
102 6.9307373375628866f
, 6.9425145053392398f
,
103 6.9541963103868749f
, 6.9657842846620869f
,
104 6.9772799234999167f
, 6.9886846867721654f
,
105 7.0000000000000000f
, 7.0112272554232539f
,
106 7.0223678130284543f
, 7.0334230015374501f
,
107 7.0443941193584533f
, 7.0552824355011898f
,
108 7.0660891904577720f
, 7.0768155970508308f
,
109 7.0874628412503390f
, 7.0980320829605263f
,
110 7.1085244567781691f
, 7.1189410727235076f
,
111 7.1292830169449663f
, 7.1395513523987936f
,
112 7.1497471195046822f
, 7.1598713367783890f
,
113 7.1699250014423121f
, 7.1799090900149344f
,
114 7.1898245588800175f
, 7.1996723448363644f
,
115 7.2094533656289501f
, 7.2191685204621611f
,
116 7.2288186904958804f
, 7.2384047393250785f
,
117 7.2479275134435852f
, 7.2573878426926521f
,
118 7.2667865406949010f
, 7.2761244052742375f
,
119 7.2854022188622487f
, 7.2946207488916270f
,
120 7.3037807481771030f
, 7.3128829552843557f
,
121 7.3219280948873626f
, 7.3309168781146167f
,
122 7.3398500028846243f
, 7.3487281542310771f
,
123 7.3575520046180837f
, 7.3663222142458160f
,
124 7.3750394313469245f
, 7.3837042924740519f
,
125 7.3923174227787606f
, 7.4008794362821843f
,
126 7.4093909361377017f
, 7.4178525148858982f
,
127 7.4262647547020979f
, 7.4346282276367245f
,
128 7.4429434958487279f
, 7.4512111118323289f
,
129 7.4594316186372973f
, 7.4676055500829976f
,
130 7.4757334309663976f
, 7.4838157772642563f
,
131 7.4918530963296747f
, 7.4998458870832056f
,
132 7.5077946401986963f
, 7.5156998382840427f
,
133 7.5235619560570130f
, 7.5313814605163118f
,
134 7.5391588111080309f
, 7.5468944598876364f
,
135 7.5545888516776376f
, 7.5622424242210728f
,
136 7.5698556083309478f
, 7.5774288280357486f
,
137 7.5849625007211560f
, 7.5924570372680806f
,
138 7.5999128421871278f
, 7.6073303137496104f
,
139 7.6147098441152083f
, 7.6220518194563764f
,
140 7.6293566200796094f
, 7.6366246205436487f
,
141 7.6438561897747243f
, 7.6510516911789281f
,
142 7.6582114827517946f
, 7.6653359171851764f
,
143 7.6724253419714951f
, 7.6794800995054464f
,
144 7.6865005271832185f
, 7.6934869574993252f
,
145 7.7004397181410917f
, 7.7073591320808825f
,
146 7.7142455176661224f
, 7.7210991887071855f
,
147 7.7279204545631987f
, 7.7347096202258383f
,
148 7.7414669864011464f
, 7.7481928495894605f
,
149 7.7548875021634682f
, 7.7615512324444795f
,
150 7.7681843247769259f
, 7.7747870596011736f
,
151 7.7813597135246599f
, 7.7879025593914317f
,
152 7.7944158663501061f
, 7.8008998999203047f
,
153 7.8073549220576037f
, 7.8137811912170374f
,
154 7.8201789624151878f
, 7.8265484872909150f
,
155 7.8328900141647412f
, 7.8392037880969436f
,
156 7.8454900509443747f
, 7.8517490414160571f
,
157 7.8579809951275718f
, 7.8641861446542797f
,
158 7.8703647195834047f
, 7.8765169465649993f
,
159 7.8826430493618415f
, 7.8887432488982591f
,
160 7.8948177633079437f
, 7.9008668079807486f
,
161 7.9068905956085187f
, 7.9128893362299619f
,
162 7.9188632372745946f
, 7.9248125036057812f
,
163 7.9307373375628866f
, 7.9366379390025709f
,
164 7.9425145053392398f
, 7.9483672315846778f
,
165 7.9541963103868749f
, 7.9600019320680805f
,
166 7.9657842846620869f
, 7.9715435539507719f
,
167 7.9772799234999167f
, 7.9829935746943103f
,
168 7.9886846867721654f
, 7.9943534368588577f
171 const float kSLog2Table
[LOG_LOOKUP_IDX_MAX
] = {
172 0.00000000f
, 0.00000000f
, 2.00000000f
, 4.75488750f
,
173 8.00000000f
, 11.60964047f
, 15.50977500f
, 19.65148445f
,
174 24.00000000f
, 28.52932501f
, 33.21928095f
, 38.05374781f
,
175 43.01955001f
, 48.10571634f
, 53.30296891f
, 58.60335893f
,
176 64.00000000f
, 69.48686830f
, 75.05865003f
, 80.71062276f
,
177 86.43856190f
, 92.23866588f
, 98.10749561f
, 104.04192499f
,
178 110.03910002f
, 116.09640474f
, 122.21143267f
, 128.38196256f
,
179 134.60593782f
, 140.88144886f
, 147.20671787f
, 153.58008562f
,
180 160.00000000f
, 166.46500594f
, 172.97373660f
, 179.52490559f
,
181 186.11730005f
, 192.74977453f
, 199.42124551f
, 206.13068654f
,
182 212.87712380f
, 219.65963219f
, 226.47733176f
, 233.32938445f
,
183 240.21499122f
, 247.13338933f
, 254.08384998f
, 261.06567603f
,
184 268.07820003f
, 275.12078236f
, 282.19280949f
, 289.29369244f
,
185 296.42286534f
, 303.57978409f
, 310.76392512f
, 317.97478424f
,
186 325.21187564f
, 332.47473081f
, 339.76289772f
, 347.07593991f
,
187 354.41343574f
, 361.77497759f
, 369.16017124f
, 376.56863518f
,
188 384.00000000f
, 391.45390785f
, 398.93001188f
, 406.42797576f
,
189 413.94747321f
, 421.48818752f
, 429.04981119f
, 436.63204548f
,
190 444.23460010f
, 451.85719280f
, 459.49954906f
, 467.16140179f
,
191 474.84249102f
, 482.54256363f
, 490.26137307f
, 497.99867911f
,
192 505.75424759f
, 513.52785023f
, 521.31926438f
, 529.12827280f
,
193 536.95466351f
, 544.79822957f
, 552.65876890f
, 560.53608414f
,
194 568.42998244f
, 576.34027536f
, 584.26677867f
, 592.20931226f
,
195 600.16769996f
, 608.14176943f
, 616.13135206f
, 624.13628279f
,
196 632.15640007f
, 640.19154569f
, 648.24156472f
, 656.30630539f
,
197 664.38561898f
, 672.47935976f
, 680.58738488f
, 688.70955430f
,
198 696.84573069f
, 704.99577935f
, 713.15956818f
, 721.33696754f
,
199 729.52785023f
, 737.73209140f
, 745.94956849f
, 754.18016116f
,
200 762.42375127f
, 770.68022275f
, 778.94946161f
, 787.23135586f
,
201 795.52579543f
, 803.83267219f
, 812.15187982f
, 820.48331383f
,
202 828.82687147f
, 837.18245171f
, 845.54995518f
, 853.92928416f
,
203 862.32034249f
, 870.72303558f
, 879.13727036f
, 887.56295522f
,
204 896.00000000f
, 904.44831595f
, 912.90781569f
, 921.37841320f
,
205 929.86002376f
, 938.35256392f
, 946.85595152f
, 955.37010560f
,
206 963.89494641f
, 972.43039537f
, 980.97637504f
, 989.53280911f
,
207 998.09962237f
, 1006.67674069f
, 1015.26409097f
, 1023.86160116f
,
208 1032.46920021f
, 1041.08681805f
, 1049.71438560f
, 1058.35183469f
,
209 1066.99909811f
, 1075.65610955f
, 1084.32280357f
, 1092.99911564f
,
210 1101.68498204f
, 1110.38033993f
, 1119.08512727f
, 1127.79928282f
,
211 1136.52274614f
, 1145.25545758f
, 1153.99735821f
, 1162.74838989f
,
212 1171.50849518f
, 1180.27761738f
, 1189.05570047f
, 1197.84268914f
,
213 1206.63852876f
, 1215.44316535f
, 1224.25654560f
, 1233.07861684f
,
214 1241.90932703f
, 1250.74862473f
, 1259.59645914f
, 1268.45278005f
,
215 1277.31753781f
, 1286.19068338f
, 1295.07216828f
, 1303.96194457f
,
216 1312.85996488f
, 1321.76618236f
, 1330.68055071f
, 1339.60302413f
,
217 1348.53355734f
, 1357.47210556f
, 1366.41862452f
, 1375.37307041f
,
218 1384.33539991f
, 1393.30557020f
, 1402.28353887f
, 1411.26926400f
,
219 1420.26270412f
, 1429.26381818f
, 1438.27256558f
, 1447.28890615f
,
220 1456.31280014f
, 1465.34420819f
, 1474.38309138f
, 1483.42941118f
,
221 1492.48312945f
, 1501.54420843f
, 1510.61261078f
, 1519.68829949f
,
222 1528.77123795f
, 1537.86138993f
, 1546.95871952f
, 1556.06319119f
,
223 1565.17476976f
, 1574.29342040f
, 1583.41910860f
, 1592.55180020f
,
224 1601.69146137f
, 1610.83805860f
, 1619.99155871f
, 1629.15192882f
,
225 1638.31913637f
, 1647.49314911f
, 1656.67393509f
, 1665.86146266f
,
226 1675.05570047f
, 1684.25661744f
, 1693.46418280f
, 1702.67836605f
,
227 1711.89913698f
, 1721.12646563f
, 1730.36032233f
, 1739.60067768f
,
228 1748.84750254f
, 1758.10076802f
, 1767.36044551f
, 1776.62650662f
,
229 1785.89892323f
, 1795.17766747f
, 1804.46271172f
, 1813.75402857f
,
230 1823.05159087f
, 1832.35537170f
, 1841.66534438f
, 1850.98148244f
,
231 1860.30375965f
, 1869.63214999f
, 1878.96662767f
, 1888.30716711f
,
232 1897.65374295f
, 1907.00633003f
, 1916.36490342f
, 1925.72943838f
,
233 1935.09991037f
, 1944.47629506f
, 1953.85856831f
, 1963.24670620f
,
234 1972.64068498f
, 1982.04048108f
, 1991.44607117f
, 2000.85743204f
,
235 2010.27454072f
, 2019.69737440f
, 2029.12591044f
, 2038.56012640f
238 float VP8LFastSLog2Slow(int v
) {
239 assert(v
>= LOG_LOOKUP_IDX_MAX
);
240 if (v
< APPROX_LOG_MAX
) {
242 const float v_f
= (float)v
;
243 while (v
>= LOG_LOOKUP_IDX_MAX
) {
247 return v_f
* (kLog2Table
[v
] + log_cnt
);
249 return (float)(LOG_2_RECIPROCAL
* v
* log((double)v
));
253 float VP8LFastLog2Slow(int v
) {
254 assert(v
>= LOG_LOOKUP_IDX_MAX
);
255 if (v
< APPROX_LOG_MAX
) {
257 while (v
>= LOG_LOOKUP_IDX_MAX
) {
261 return kLog2Table
[v
] + log_cnt
;
263 return (float)(LOG_2_RECIPROCAL
* log((double)v
));
267 //------------------------------------------------------------------------------
270 // In-place sum of each component with mod 256.
271 static WEBP_INLINE
void AddPixelsEq(uint32_t* a
, uint32_t b
) {
272 const uint32_t alpha_and_green
= (*a
& 0xff00ff00u
) + (b
& 0xff00ff00u
);
273 const uint32_t red_and_blue
= (*a
& 0x00ff00ffu
) + (b
& 0x00ff00ffu
);
274 *a
= (alpha_and_green
& 0xff00ff00u
) | (red_and_blue
& 0x00ff00ffu
);
277 static WEBP_INLINE
uint32_t Average2(uint32_t a0
, uint32_t a1
) {
278 return (((a0
^ a1
) & 0xfefefefeL
) >> 1) + (a0
& a1
);
281 static WEBP_INLINE
uint32_t Average3(uint32_t a0
, uint32_t a1
, uint32_t a2
) {
282 return Average2(Average2(a0
, a2
), a1
);
285 static WEBP_INLINE
uint32_t Average4(uint32_t a0
, uint32_t a1
,
286 uint32_t a2
, uint32_t a3
) {
287 return Average2(Average2(a0
, a1
), Average2(a2
, a3
));
290 #if defined(WEBP_TARGET_HAS_SSE2)
291 static WEBP_INLINE
uint32_t ClampedAddSubtractFull(uint32_t c0
, uint32_t c1
,
293 const __m128i zero
= _mm_setzero_si128();
294 const __m128i C0
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(c0
), zero
);
295 const __m128i C1
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(c1
), zero
);
296 const __m128i C2
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2
), zero
);
297 const __m128i V1
= _mm_add_epi16(C0
, C1
);
298 const __m128i V2
= _mm_sub_epi16(V1
, C2
);
299 const __m128i b
= _mm_packus_epi16(V2
, V2
);
300 const uint32_t output
= _mm_cvtsi128_si32(b
);
304 static WEBP_INLINE
uint32_t ClampedAddSubtractHalf(uint32_t c0
, uint32_t c1
,
306 const uint32_t ave
= Average2(c0
, c1
);
307 const __m128i zero
= _mm_setzero_si128();
308 const __m128i A0
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(ave
), zero
);
309 const __m128i B0
= _mm_unpacklo_epi8(_mm_cvtsi32_si128(c2
), zero
);
310 const __m128i A1
= _mm_sub_epi16(A0
, B0
);
311 const __m128i BgtA
= _mm_cmpgt_epi16(B0
, A0
);
312 const __m128i A2
= _mm_sub_epi16(A1
, BgtA
);
313 const __m128i A3
= _mm_srai_epi16(A2
, 1);
314 const __m128i A4
= _mm_add_epi16(A0
, A3
);
315 const __m128i A5
= _mm_packus_epi16(A4
, A4
);
316 const uint32_t output
= _mm_cvtsi128_si32(A5
);
320 static WEBP_INLINE
uint32_t Select(uint32_t a
, uint32_t b
, uint32_t c
) {
322 const __m128i zero
= _mm_setzero_si128();
323 const __m128i A0
= _mm_cvtsi32_si128(a
);
324 const __m128i B0
= _mm_cvtsi32_si128(b
);
325 const __m128i C0
= _mm_cvtsi32_si128(c
);
326 const __m128i AC0
= _mm_subs_epu8(A0
, C0
);
327 const __m128i CA0
= _mm_subs_epu8(C0
, A0
);
328 const __m128i BC0
= _mm_subs_epu8(B0
, C0
);
329 const __m128i CB0
= _mm_subs_epu8(C0
, B0
);
330 const __m128i AC
= _mm_or_si128(AC0
, CA0
);
331 const __m128i BC
= _mm_or_si128(BC0
, CB0
);
332 const __m128i pa
= _mm_unpacklo_epi8(AC
, zero
); // |a - c|
333 const __m128i pb
= _mm_unpacklo_epi8(BC
, zero
); // |b - c|
334 const __m128i diff
= _mm_sub_epi16(pb
, pa
);
337 _mm_storeu_si128((__m128i
*)out
, diff
);
338 pa_minus_pb
= out
[0] + out
[1] + out
[2] + out
[3];
340 return (pa_minus_pb
<= 0) ? a
: b
;
345 static WEBP_INLINE
uint32_t Clip255(uint32_t a
) {
349 // return 0, when a is a negative integer.
350 // return 255, when a is positive.
354 static WEBP_INLINE
int AddSubtractComponentFull(int a
, int b
, int c
) {
355 return Clip255(a
+ b
- c
);
358 static WEBP_INLINE
uint32_t ClampedAddSubtractFull(uint32_t c0
, uint32_t c1
,
360 const int a
= AddSubtractComponentFull(c0
>> 24, c1
>> 24, c2
>> 24);
361 const int r
= AddSubtractComponentFull((c0
>> 16) & 0xff,
364 const int g
= AddSubtractComponentFull((c0
>> 8) & 0xff,
367 const int b
= AddSubtractComponentFull(c0
& 0xff, c1
& 0xff, c2
& 0xff);
368 return (a
<< 24) | (r
<< 16) | (g
<< 8) | b
;
371 static WEBP_INLINE
int AddSubtractComponentHalf(int a
, int b
) {
372 return Clip255(a
+ (a
- b
) / 2);
375 static WEBP_INLINE
uint32_t ClampedAddSubtractHalf(uint32_t c0
, uint32_t c1
,
377 const uint32_t ave
= Average2(c0
, c1
);
378 const int a
= AddSubtractComponentHalf(ave
>> 24, c2
>> 24);
379 const int r
= AddSubtractComponentHalf((ave
>> 16) & 0xff, (c2
>> 16) & 0xff);
380 const int g
= AddSubtractComponentHalf((ave
>> 8) & 0xff, (c2
>> 8) & 0xff);
381 const int b
= AddSubtractComponentHalf((ave
>> 0) & 0xff, (c2
>> 0) & 0xff);
382 return (a
<< 24) | (r
<< 16) | (g
<< 8) | b
;
385 static WEBP_INLINE
int Sub3(int a
, int b
, int c
) {
386 const int pb
= b
- c
;
387 const int pa
= a
- c
;
388 return abs(pb
) - abs(pa
);
391 static WEBP_INLINE
uint32_t Select(uint32_t a
, uint32_t b
, uint32_t c
) {
392 const int pa_minus_pb
=
393 Sub3((a
>> 24) , (b
>> 24) , (c
>> 24) ) +
394 Sub3((a
>> 16) & 0xff, (b
>> 16) & 0xff, (c
>> 16) & 0xff) +
395 Sub3((a
>> 8) & 0xff, (b
>> 8) & 0xff, (c
>> 8) & 0xff) +
396 Sub3((a
) & 0xff, (b
) & 0xff, (c
) & 0xff);
397 return (pa_minus_pb
<= 0) ? a
: b
;
401 //------------------------------------------------------------------------------
404 static uint32_t Predictor0(uint32_t left
, const uint32_t* const top
) {
409 static uint32_t Predictor1(uint32_t left
, const uint32_t* const top
) {
413 static uint32_t Predictor2(uint32_t left
, const uint32_t* const top
) {
417 static uint32_t Predictor3(uint32_t left
, const uint32_t* const top
) {
421 static uint32_t Predictor4(uint32_t left
, const uint32_t* const top
) {
425 static uint32_t Predictor5(uint32_t left
, const uint32_t* const top
) {
426 const uint32_t pred
= Average3(left
, top
[0], top
[1]);
429 static uint32_t Predictor6(uint32_t left
, const uint32_t* const top
) {
430 const uint32_t pred
= Average2(left
, top
[-1]);
433 static uint32_t Predictor7(uint32_t left
, const uint32_t* const top
) {
434 const uint32_t pred
= Average2(left
, top
[0]);
437 static uint32_t Predictor8(uint32_t left
, const uint32_t* const top
) {
438 const uint32_t pred
= Average2(top
[-1], top
[0]);
442 static uint32_t Predictor9(uint32_t left
, const uint32_t* const top
) {
443 const uint32_t pred
= Average2(top
[0], top
[1]);
447 static uint32_t Predictor10(uint32_t left
, const uint32_t* const top
) {
448 const uint32_t pred
= Average4(left
, top
[-1], top
[0], top
[1]);
451 static uint32_t Predictor11(uint32_t left
, const uint32_t* const top
) {
452 const uint32_t pred
= Select(top
[0], left
, top
[-1]);
455 static uint32_t Predictor12(uint32_t left
, const uint32_t* const top
) {
456 const uint32_t pred
= ClampedAddSubtractFull(left
, top
[0], top
[-1]);
459 static uint32_t Predictor13(uint32_t left
, const uint32_t* const top
) {
460 const uint32_t pred
= ClampedAddSubtractHalf(left
, top
[0], top
[-1]);
464 typedef uint32_t (*PredictorFunc
)(uint32_t left
, const uint32_t* const top
);
465 static const PredictorFunc kPredictors
[16] = {
466 Predictor0
, Predictor1
, Predictor2
, Predictor3
,
467 Predictor4
, Predictor5
, Predictor6
, Predictor7
,
468 Predictor8
, Predictor9
, Predictor10
, Predictor11
,
469 Predictor12
, Predictor13
,
470 Predictor0
, Predictor0
// <- padding security sentinels
473 // TODO(vikasa): Replace 256 etc with defines.
474 static float PredictionCostSpatial(const int* counts
,
475 int weight_0
, double exp_val
) {
476 const int significant_symbols
= 16;
477 const double exp_decay_factor
= 0.6;
478 double bits
= weight_0
* counts
[0];
480 for (i
= 1; i
< significant_symbols
; ++i
) {
481 bits
+= exp_val
* (counts
[i
] + counts
[256 - i
]);
482 exp_val
*= exp_decay_factor
;
484 return (float)(-0.1 * bits
);
487 // Compute the combined Shanon's entropy for distribution {X} and {X+Y}
488 static float CombinedShannonEntropy(const int* const X
,
489 const int* const Y
, int n
) {
492 int sumX
= 0, sumXY
= 0;
493 for (i
= 0; i
< n
; ++i
) {
495 const int xy
= X
[i
] + Y
[i
];
498 retval
-= VP8LFastSLog2(x
);
502 retval
-= VP8LFastSLog2(xy
);
505 retval
+= VP8LFastSLog2(sumX
) + VP8LFastSLog2(sumXY
);
506 return (float)retval
;
509 static float PredictionCostSpatialHistogram(int accumulated
[4][256],
513 for (i
= 0; i
< 4; ++i
) {
514 const double kExpValue
= 0.94;
515 retval
+= PredictionCostSpatial(tile
[i
], 1, kExpValue
);
516 retval
+= CombinedShannonEntropy(tile
[i
], accumulated
[i
], 256);
518 return (float)retval
;
521 static int GetBestPredictorForTile(int width
, int height
,
522 int tile_x
, int tile_y
, int bits
,
523 int accumulated
[4][256],
524 const uint32_t* const argb_scratch
) {
525 const int kNumPredModes
= 14;
526 const int col_start
= tile_x
<< bits
;
527 const int row_start
= tile_y
<< bits
;
528 const int tile_size
= 1 << bits
;
529 const int ymax
= (tile_size
<= height
- row_start
) ?
530 tile_size
: height
- row_start
;
531 const int xmax
= (tile_size
<= width
- col_start
) ?
532 tile_size
: width
- col_start
;
534 float best_diff
= MAX_DIFF_COST
;
538 for (mode
= 0; mode
< kNumPredModes
; ++mode
) {
539 const uint32_t* current_row
= argb_scratch
;
540 const PredictorFunc pred_func
= kPredictors
[mode
];
543 memset(&histo
[0][0], 0, sizeof(histo
));
544 for (y
= 0; y
< ymax
; ++y
) {
546 const int row
= row_start
+ y
;
547 const uint32_t* const upper_row
= current_row
;
548 current_row
= upper_row
+ width
;
549 for (x
= 0; x
< xmax
; ++x
) {
550 const int col
= col_start
+ x
;
552 uint32_t predict_diff
;
554 predict
= (col
== 0) ? ARGB_BLACK
: current_row
[col
- 1]; // Left.
555 } else if (col
== 0) {
556 predict
= upper_row
[col
]; // Top.
558 predict
= pred_func(current_row
[col
- 1], upper_row
+ col
);
560 predict_diff
= VP8LSubPixels(current_row
[col
], predict
);
561 ++histo
[0][predict_diff
>> 24];
562 ++histo
[1][((predict_diff
>> 16) & 0xff)];
563 ++histo
[2][((predict_diff
>> 8) & 0xff)];
564 ++histo
[3][(predict_diff
& 0xff)];
567 cur_diff
= PredictionCostSpatialHistogram(accumulated
, histo
);
568 if (cur_diff
< best_diff
) {
569 best_diff
= cur_diff
;
577 static void CopyTileWithPrediction(int width
, int height
,
578 int tile_x
, int tile_y
, int bits
, int mode
,
579 const uint32_t* const argb_scratch
,
580 uint32_t* const argb
) {
581 const int col_start
= tile_x
<< bits
;
582 const int row_start
= tile_y
<< bits
;
583 const int tile_size
= 1 << bits
;
584 const int ymax
= (tile_size
<= height
- row_start
) ?
585 tile_size
: height
- row_start
;
586 const int xmax
= (tile_size
<= width
- col_start
) ?
587 tile_size
: width
- col_start
;
588 const PredictorFunc pred_func
= kPredictors
[mode
];
589 const uint32_t* current_row
= argb_scratch
;
592 for (y
= 0; y
< ymax
; ++y
) {
594 const int row
= row_start
+ y
;
595 const uint32_t* const upper_row
= current_row
;
596 current_row
= upper_row
+ width
;
597 for (x
= 0; x
< xmax
; ++x
) {
598 const int col
= col_start
+ x
;
599 const int pix
= row
* width
+ col
;
602 predict
= (col
== 0) ? ARGB_BLACK
: current_row
[col
- 1]; // Left.
603 } else if (col
== 0) {
604 predict
= upper_row
[col
]; // Top.
606 predict
= pred_func(current_row
[col
- 1], upper_row
+ col
);
608 argb
[pix
] = VP8LSubPixels(current_row
[col
], predict
);
613 void VP8LResidualImage(int width
, int height
, int bits
,
614 uint32_t* const argb
, uint32_t* const argb_scratch
,
615 uint32_t* const image
) {
616 const int max_tile_size
= 1 << bits
;
617 const int tiles_per_row
= VP8LSubSampleSize(width
, bits
);
618 const int tiles_per_col
= VP8LSubSampleSize(height
, bits
);
619 uint32_t* const upper_row
= argb_scratch
;
620 uint32_t* const current_tile_rows
= argb_scratch
+ width
;
623 memset(histo
, 0, sizeof(histo
));
624 for (tile_y
= 0; tile_y
< tiles_per_col
; ++tile_y
) {
625 const int tile_y_offset
= tile_y
* max_tile_size
;
626 const int this_tile_height
=
627 (tile_y
< tiles_per_col
- 1) ? max_tile_size
: height
- tile_y_offset
;
630 memcpy(upper_row
, current_tile_rows
+ (max_tile_size
- 1) * width
,
631 width
* sizeof(*upper_row
));
633 memcpy(current_tile_rows
, &argb
[tile_y_offset
* width
],
634 this_tile_height
* width
* sizeof(*current_tile_rows
));
635 for (tile_x
= 0; tile_x
< tiles_per_row
; ++tile_x
) {
638 const int tile_x_offset
= tile_x
* max_tile_size
;
639 int all_x_max
= tile_x_offset
+ max_tile_size
;
640 if (all_x_max
> width
) {
643 pred
= GetBestPredictorForTile(width
, height
, tile_x
, tile_y
, bits
, histo
,
645 image
[tile_y
* tiles_per_row
+ tile_x
] = 0xff000000u
| (pred
<< 8);
646 CopyTileWithPrediction(width
, height
, tile_x
, tile_y
, bits
, pred
,
648 for (y
= 0; y
< max_tile_size
; ++y
) {
651 int all_y
= tile_y_offset
+ y
;
652 if (all_y
>= height
) {
655 ix
= all_y
* width
+ tile_x_offset
;
656 for (all_x
= tile_x_offset
; all_x
< all_x_max
; ++all_x
, ++ix
) {
657 const uint32_t a
= argb
[ix
];
659 ++histo
[1][((a
>> 16) & 0xff)];
660 ++histo
[2][((a
>> 8) & 0xff)];
661 ++histo
[3][(a
& 0xff)];
668 // Inverse prediction.
669 static void PredictorInverseTransform(const VP8LTransform
* const transform
,
670 int y_start
, int y_end
, uint32_t* data
) {
671 const int width
= transform
->xsize_
;
672 if (y_start
== 0) { // First Row follows the L (mode=1) mode.
674 const uint32_t pred0
= Predictor0(data
[-1], NULL
);
675 AddPixelsEq(data
, pred0
);
676 for (x
= 1; x
< width
; ++x
) {
677 const uint32_t pred1
= Predictor1(data
[x
- 1], NULL
);
678 AddPixelsEq(data
+ x
, pred1
);
686 const int mask
= (1 << transform
->bits_
) - 1;
687 const int tiles_per_row
= VP8LSubSampleSize(width
, transform
->bits_
);
688 const uint32_t* pred_mode_base
=
689 transform
->data_
+ (y
>> transform
->bits_
) * tiles_per_row
;
693 const uint32_t pred2
= Predictor2(data
[-1], data
- width
);
694 const uint32_t* pred_mode_src
= pred_mode_base
;
695 PredictorFunc pred_func
;
697 // First pixel follows the T (mode=2) mode.
698 AddPixelsEq(data
, pred2
);
701 pred_func
= kPredictors
[((*pred_mode_src
++) >> 8) & 0xf];
702 for (x
= 1; x
< width
; ++x
) {
704 if ((x
& mask
) == 0) { // start of tile. Read predictor function.
705 pred_func
= kPredictors
[((*pred_mode_src
++) >> 8) & 0xf];
707 pred
= pred_func(data
[x
- 1], data
+ x
- width
);
708 AddPixelsEq(data
+ x
, pred
);
712 if ((y
& mask
) == 0) { // Use the same mask, since tiles are squares.
713 pred_mode_base
+= tiles_per_row
;
719 void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data
, int num_pixs
) {
721 #if defined(WEBP_TARGET_HAS_SSE2)
722 const __m128i mask
= _mm_set1_epi32(0x0000ff00);
723 for (; i
+ 4 < num_pixs
; i
+= 4) {
724 const __m128i in
= _mm_loadu_si128((__m128i
*)&argb_data
[i
]);
725 const __m128i in_00g0
= _mm_and_si128(in
, mask
); // 00g0|00g0|...
726 const __m128i in_0g00
= _mm_slli_epi32(in_00g0
, 8); // 0g00|0g00|...
727 const __m128i in_000g
= _mm_srli_epi32(in_00g0
, 8); // 000g|000g|...
728 const __m128i in_0g0g
= _mm_or_si128(in_0g00
, in_000g
);
729 const __m128i out
= _mm_sub_epi8(in
, in_0g0g
);
730 _mm_storeu_si128((__m128i
*)&argb_data
[i
], out
);
732 // fallthrough and finish off with plain-C
734 for (; i
< num_pixs
; ++i
) {
735 const uint32_t argb
= argb_data
[i
];
736 const uint32_t green
= (argb
>> 8) & 0xff;
737 const uint32_t new_r
= (((argb
>> 16) & 0xff) - green
) & 0xff;
738 const uint32_t new_b
= ((argb
& 0xff) - green
) & 0xff;
739 argb_data
[i
] = (argb
& 0xff00ff00) | (new_r
<< 16) | new_b
;
743 // Add green to blue and red channels (i.e. perform the inverse transform of
744 // 'subtract green').
745 static void AddGreenToBlueAndRed(const VP8LTransform
* const transform
,
746 int y_start
, int y_end
, uint32_t* data
) {
747 const int width
= transform
->xsize_
;
748 const uint32_t* const data_end
= data
+ (y_end
- y_start
) * width
;
749 #if defined(WEBP_TARGET_HAS_SSE2)
750 const __m128i mask
= _mm_set1_epi32(0x0000ff00);
751 for (; data
+ 4 < data_end
; data
+= 4) {
752 const __m128i in
= _mm_loadu_si128((__m128i
*)data
);
753 const __m128i in_00g0
= _mm_and_si128(in
, mask
); // 00g0|00g0|...
754 const __m128i in_0g00
= _mm_slli_epi32(in_00g0
, 8); // 0g00|0g00|...
755 const __m128i in_000g
= _mm_srli_epi32(in_00g0
, 8); // 000g|000g|...
756 const __m128i in_0g0g
= _mm_or_si128(in_0g00
, in_000g
);
757 const __m128i out
= _mm_add_epi8(in
, in_0g0g
);
758 _mm_storeu_si128((__m128i
*)data
, out
);
760 // fallthrough and finish off with plain-C
762 while (data
< data_end
) {
763 const uint32_t argb
= *data
;
764 const uint32_t green
= ((argb
>> 8) & 0xff);
765 uint32_t red_blue
= (argb
& 0x00ff00ffu
);
766 red_blue
+= (green
<< 16) | green
;
767 red_blue
&= 0x00ff00ffu
;
768 *data
++ = (argb
& 0xff00ff00u
) | red_blue
;
773 // Note: the members are uint8_t, so that any negative values are
774 // automatically converted to "mod 256" values.
775 uint8_t green_to_red_
;
776 uint8_t green_to_blue_
;
777 uint8_t red_to_blue_
;
780 static WEBP_INLINE
void MultipliersClear(Multipliers
* m
) {
781 m
->green_to_red_
= 0;
782 m
->green_to_blue_
= 0;
786 static WEBP_INLINE
uint32_t ColorTransformDelta(int8_t color_pred
,
788 return (uint32_t)((int)(color_pred
) * color
) >> 5;
791 static WEBP_INLINE
void ColorCodeToMultipliers(uint32_t color_code
,
792 Multipliers
* const m
) {
793 m
->green_to_red_
= (color_code
>> 0) & 0xff;
794 m
->green_to_blue_
= (color_code
>> 8) & 0xff;
795 m
->red_to_blue_
= (color_code
>> 16) & 0xff;
798 static WEBP_INLINE
uint32_t MultipliersToColorCode(Multipliers
* const m
) {
800 ((uint32_t)(m
->red_to_blue_
) << 16) |
801 ((uint32_t)(m
->green_to_blue_
) << 8) |
805 static WEBP_INLINE
uint32_t TransformColor(const Multipliers
* const m
,
806 uint32_t argb
, int inverse
) {
807 const uint32_t green
= argb
>> 8;
808 const uint32_t red
= argb
>> 16;
809 uint32_t new_red
= red
;
810 uint32_t new_blue
= argb
;
813 new_red
+= ColorTransformDelta(m
->green_to_red_
, green
);
815 new_blue
+= ColorTransformDelta(m
->green_to_blue_
, green
);
816 new_blue
+= ColorTransformDelta(m
->red_to_blue_
, new_red
);
819 new_red
-= ColorTransformDelta(m
->green_to_red_
, green
);
821 new_blue
-= ColorTransformDelta(m
->green_to_blue_
, green
);
822 new_blue
-= ColorTransformDelta(m
->red_to_blue_
, red
);
825 return (argb
& 0xff00ff00u
) | (new_red
<< 16) | (new_blue
);
828 static WEBP_INLINE
uint8_t TransformColorRed(uint8_t green_to_red
,
830 const uint32_t green
= argb
>> 8;
831 uint32_t new_red
= argb
>> 16;
832 new_red
-= ColorTransformDelta(green_to_red
, green
);
833 return (new_red
& 0xff);
836 static WEBP_INLINE
uint8_t TransformColorBlue(uint8_t green_to_blue
,
839 const uint32_t green
= argb
>> 8;
840 const uint32_t red
= argb
>> 16;
841 uint8_t new_blue
= argb
;
842 new_blue
-= ColorTransformDelta(green_to_blue
, green
);
843 new_blue
-= ColorTransformDelta(red_to_blue
, red
);
844 return (new_blue
& 0xff);
847 static WEBP_INLINE
int SkipRepeatedPixels(const uint32_t* const argb
,
849 const uint32_t v
= argb
[ix
];
850 if (ix
>= xsize
+ 3) {
851 if (v
== argb
[ix
- xsize
] &&
852 argb
[ix
- 1] == argb
[ix
- xsize
- 1] &&
853 argb
[ix
- 2] == argb
[ix
- xsize
- 2] &&
854 argb
[ix
- 3] == argb
[ix
- xsize
- 3]) {
857 return v
== argb
[ix
- 3] && v
== argb
[ix
- 2] && v
== argb
[ix
- 1];
858 } else if (ix
>= 3) {
859 return v
== argb
[ix
- 3] && v
== argb
[ix
- 2] && v
== argb
[ix
- 1];
864 static float PredictionCostCrossColor(const int accumulated
[256],
865 const int counts
[256]) {
866 // Favor low entropy, locally and globally.
867 // Favor small absolute values for PredictionCostSpatial
868 static const double kExpValue
= 2.4;
869 return CombinedShannonEntropy(counts
, accumulated
, 256) +
870 PredictionCostSpatial(counts
, 3, kExpValue
);
873 static Multipliers
GetBestColorTransformForTile(
874 int tile_x
, int tile_y
, int bits
,
877 int step
, int xsize
, int ysize
,
878 int* accumulated_red_histo
,
879 int* accumulated_blue_histo
,
880 const uint32_t* const argb
) {
881 float best_diff
= MAX_DIFF_COST
;
883 const int halfstep
= step
/ 2;
884 const int max_tile_size
= 1 << bits
;
885 const int tile_y_offset
= tile_y
* max_tile_size
;
886 const int tile_x_offset
= tile_x
* max_tile_size
;
890 int all_x_max
= tile_x_offset
+ max_tile_size
;
891 int all_y_max
= tile_y_offset
+ max_tile_size
;
893 MultipliersClear(&best_tx
);
894 if (all_x_max
> xsize
) {
897 if (all_y_max
> ysize
) {
901 for (green_to_red
= -64; green_to_red
<= 64; green_to_red
+= halfstep
) {
902 int histo
[256] = { 0 };
905 for (all_y
= tile_y_offset
; all_y
< all_y_max
; ++all_y
) {
906 int ix
= all_y
* xsize
+ tile_x_offset
;
908 for (all_x
= tile_x_offset
; all_x
< all_x_max
; ++all_x
, ++ix
) {
909 if (SkipRepeatedPixels(argb
, ix
, xsize
)) {
912 ++histo
[TransformColorRed(green_to_red
, argb
[ix
])]; // red.
915 cur_diff
= PredictionCostCrossColor(&accumulated_red_histo
[0], &histo
[0]);
916 if ((uint8_t)green_to_red
== prevX
.green_to_red_
) {
917 cur_diff
-= 3; // favor keeping the areas locally similar
919 if ((uint8_t)green_to_red
== prevY
.green_to_red_
) {
920 cur_diff
-= 3; // favor keeping the areas locally similar
922 if (green_to_red
== 0) {
925 if (cur_diff
< best_diff
) {
926 best_diff
= cur_diff
;
927 best_tx
.green_to_red_
= green_to_red
;
930 best_diff
= MAX_DIFF_COST
;
931 for (green_to_blue
= -32; green_to_blue
<= 32; green_to_blue
+= step
) {
932 for (red_to_blue
= -32; red_to_blue
<= 32; red_to_blue
+= step
) {
934 int histo
[256] = { 0 };
935 for (all_y
= tile_y_offset
; all_y
< all_y_max
; ++all_y
) {
937 int ix
= all_y
* xsize
+ tile_x_offset
;
938 for (all_x
= tile_x_offset
; all_x
< all_x_max
; ++all_x
, ++ix
) {
939 if (SkipRepeatedPixels(argb
, ix
, xsize
)) {
942 ++histo
[TransformColorBlue(green_to_blue
, red_to_blue
, argb
[ix
])];
946 PredictionCostCrossColor(&accumulated_blue_histo
[0], &histo
[0]);
947 if ((uint8_t)green_to_blue
== prevX
.green_to_blue_
) {
948 cur_diff
-= 3; // favor keeping the areas locally similar
950 if ((uint8_t)green_to_blue
== prevY
.green_to_blue_
) {
951 cur_diff
-= 3; // favor keeping the areas locally similar
953 if ((uint8_t)red_to_blue
== prevX
.red_to_blue_
) {
954 cur_diff
-= 3; // favor keeping the areas locally similar
956 if ((uint8_t)red_to_blue
== prevY
.red_to_blue_
) {
957 cur_diff
-= 3; // favor keeping the areas locally similar
959 if (green_to_blue
== 0) {
962 if (red_to_blue
== 0) {
965 if (cur_diff
< best_diff
) {
966 best_diff
= cur_diff
;
967 best_tx
.green_to_blue_
= green_to_blue
;
968 best_tx
.red_to_blue_
= red_to_blue
;
975 static void CopyTileWithColorTransform(int xsize
, int ysize
,
976 int tile_x
, int tile_y
, int bits
,
977 Multipliers color_transform
,
978 uint32_t* const argb
) {
980 int xscan
= 1 << bits
;
981 int yscan
= 1 << bits
;
984 if (xscan
> xsize
- tile_x
) {
985 xscan
= xsize
- tile_x
;
987 if (yscan
> ysize
- tile_y
) {
988 yscan
= ysize
- tile_y
;
991 for (y
= tile_y
; y
< yscan
; ++y
) {
992 int ix
= y
* xsize
+ tile_x
;
993 const int end_ix
= ix
+ xscan
;
994 for (; ix
< end_ix
; ++ix
) {
995 argb
[ix
] = TransformColor(&color_transform
, argb
[ix
], 0);
1000 void VP8LColorSpaceTransform(int width
, int height
, int bits
, int step
,
1001 uint32_t* const argb
, uint32_t* image
) {
1002 const int max_tile_size
= 1 << bits
;
1003 int tile_xsize
= VP8LSubSampleSize(width
, bits
);
1004 int tile_ysize
= VP8LSubSampleSize(height
, bits
);
1005 int accumulated_red_histo
[256] = { 0 };
1006 int accumulated_blue_histo
[256] = { 0 };
1011 MultipliersClear(&prevY
);
1012 MultipliersClear(&prevX
);
1013 for (tile_y
= 0; tile_y
< tile_ysize
; ++tile_y
) {
1014 for (tile_x
= 0; tile_x
< tile_xsize
; ++tile_x
) {
1015 Multipliers color_transform
;
1018 const int tile_y_offset
= tile_y
* max_tile_size
;
1019 const int tile_x_offset
= tile_x
* max_tile_size
;
1021 ColorCodeToMultipliers(image
[tile_y
* tile_xsize
+ tile_x
- 1], &prevX
);
1022 ColorCodeToMultipliers(image
[(tile_y
- 1) * tile_xsize
+ tile_x
],
1024 } else if (tile_x
!= 0) {
1025 ColorCodeToMultipliers(image
[tile_y
* tile_xsize
+ tile_x
- 1], &prevX
);
1028 GetBestColorTransformForTile(tile_x
, tile_y
, bits
,
1030 step
, width
, height
,
1031 &accumulated_red_histo
[0],
1032 &accumulated_blue_histo
[0],
1034 image
[tile_y
* tile_xsize
+ tile_x
] =
1035 MultipliersToColorCode(&color_transform
);
1036 CopyTileWithColorTransform(width
, height
, tile_x
, tile_y
, bits
,
1037 color_transform
, argb
);
1039 // Gather accumulated histogram data.
1040 all_x_max
= tile_x_offset
+ max_tile_size
;
1041 if (all_x_max
> width
) {
1044 for (y
= 0; y
< max_tile_size
; ++y
) {
1047 int all_y
= tile_y_offset
+ y
;
1048 if (all_y
>= height
) {
1051 ix
= all_y
* width
+ tile_x_offset
;
1052 for (all_x
= tile_x_offset
; all_x
< all_x_max
; ++all_x
, ++ix
) {
1054 argb
[ix
] == argb
[ix
- 2] &&
1055 argb
[ix
] == argb
[ix
- 1]) {
1056 continue; // repeated pixels are handled by backward references
1058 if (ix
>= width
+ 2 &&
1059 argb
[ix
- 2] == argb
[ix
- width
- 2] &&
1060 argb
[ix
- 1] == argb
[ix
- width
- 1] &&
1061 argb
[ix
] == argb
[ix
- width
]) {
1062 continue; // repeated pixels are handled by backward references
1064 ++accumulated_red_histo
[(argb
[ix
] >> 16) & 0xff];
1065 ++accumulated_blue_histo
[argb
[ix
] & 0xff];
1072 // Color space inverse transform.
1073 static void ColorSpaceInverseTransform(const VP8LTransform
* const transform
,
1074 int y_start
, int y_end
, uint32_t* data
) {
1075 const int width
= transform
->xsize_
;
1076 const int mask
= (1 << transform
->bits_
) - 1;
1077 const int tiles_per_row
= VP8LSubSampleSize(width
, transform
->bits_
);
1079 const uint32_t* pred_row
=
1080 transform
->data_
+ (y
>> transform
->bits_
) * tiles_per_row
;
1083 const uint32_t* pred
= pred_row
;
1084 Multipliers m
= { 0, 0, 0 };
1087 for (x
= 0; x
< width
; ++x
) {
1088 if ((x
& mask
) == 0) ColorCodeToMultipliers(*pred
++, &m
);
1089 data
[x
] = TransformColor(&m
, data
[x
], 1);
1093 if ((y
& mask
) == 0) pred_row
+= tiles_per_row
;;
1097 // Separate out pixels packed together using pixel-bundling.
1098 // We define two methods for ARGB data (uint32_t) and alpha-only data (uint8_t).
1099 #define COLOR_INDEX_INVERSE(FUNC_NAME, TYPE, GET_INDEX, GET_VALUE) \
1100 void FUNC_NAME(const VP8LTransform* const transform, \
1101 int y_start, int y_end, const TYPE* src, TYPE* dst) { \
1103 const int bits_per_pixel = 8 >> transform->bits_; \
1104 const int width = transform->xsize_; \
1105 const uint32_t* const color_map = transform->data_; \
1106 if (bits_per_pixel < 8) { \
1107 const int pixels_per_byte = 1 << transform->bits_; \
1108 const int count_mask = pixels_per_byte - 1; \
1109 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; \
1110 for (y = y_start; y < y_end; ++y) { \
1111 uint32_t packed_pixels = 0; \
1113 for (x = 0; x < width; ++x) { \
1114 /* We need to load fresh 'packed_pixels' once every */ \
1115 /* 'pixels_per_byte' increments of x. Fortunately, pixels_per_byte */ \
1116 /* is a power of 2, so can just use a mask for that, instead of */ \
1117 /* decrementing a counter. */ \
1118 if ((x & count_mask) == 0) packed_pixels = GET_INDEX(*src++); \
1119 *dst++ = GET_VALUE(color_map[packed_pixels & bit_mask]); \
1120 packed_pixels >>= bits_per_pixel; \
1124 for (y = y_start; y < y_end; ++y) { \
1126 for (x = 0; x < width; ++x) { \
1127 *dst++ = GET_VALUE(color_map[GET_INDEX(*src++)]); \
1133 static WEBP_INLINE
uint32_t GetARGBIndex(uint32_t idx
) {
1134 return (idx
>> 8) & 0xff;
1137 static WEBP_INLINE
uint8_t GetAlphaIndex(uint8_t idx
) {
1141 static WEBP_INLINE
uint32_t GetARGBValue(uint32_t val
) {
1145 static WEBP_INLINE
uint8_t GetAlphaValue(uint32_t val
) {
1146 return (val
>> 8) & 0xff;
1149 static COLOR_INDEX_INVERSE(ColorIndexInverseTransform
, uint32_t, GetARGBIndex
,
1151 COLOR_INDEX_INVERSE(VP8LColorIndexInverseTransformAlpha
, uint8_t, GetAlphaIndex
,
1154 #undef COLOR_INDEX_INVERSE
1156 void VP8LInverseTransform(const VP8LTransform
* const transform
,
1157 int row_start
, int row_end
,
1158 const uint32_t* const in
, uint32_t* const out
) {
1159 assert(row_start
< row_end
);
1160 assert(row_end
<= transform
->ysize_
);
1161 switch (transform
->type_
) {
1162 case SUBTRACT_GREEN
:
1163 AddGreenToBlueAndRed(transform
, row_start
, row_end
, out
);
1165 case PREDICTOR_TRANSFORM
:
1166 PredictorInverseTransform(transform
, row_start
, row_end
, out
);
1167 if (row_end
!= transform
->ysize_
) {
1168 // The last predicted row in this iteration will be the top-pred row
1169 // for the first row in next iteration.
1170 const int width
= transform
->xsize_
;
1171 memcpy(out
- width
, out
+ (row_end
- row_start
- 1) * width
,
1172 width
* sizeof(*out
));
1175 case CROSS_COLOR_TRANSFORM
:
1176 ColorSpaceInverseTransform(transform
, row_start
, row_end
, out
);
1178 case COLOR_INDEXING_TRANSFORM
:
1179 if (in
== out
&& transform
->bits_
> 0) {
1180 // Move packed pixels to the end of unpacked region, so that unpacking
1181 // can occur seamlessly.
1182 // Also, note that this is the only transform that applies on
1183 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
1184 // transforms work on effective width of xsize_.
1185 const int out_stride
= (row_end
- row_start
) * transform
->xsize_
;
1186 const int in_stride
= (row_end
- row_start
) *
1187 VP8LSubSampleSize(transform
->xsize_
, transform
->bits_
);
1188 uint32_t* const src
= out
+ out_stride
- in_stride
;
1189 memmove(src
, out
, in_stride
* sizeof(*src
));
1190 ColorIndexInverseTransform(transform
, row_start
, row_end
, src
, out
);
1192 ColorIndexInverseTransform(transform
, row_start
, row_end
, in
, out
);
1198 //------------------------------------------------------------------------------
1199 // Color space conversion.
1201 static int is_big_endian(void) {
1202 static const union {
1206 return (tmp
.b
[0] != 1);
1209 static void ConvertBGRAToRGB(const uint32_t* src
,
1210 int num_pixels
, uint8_t* dst
) {
1211 const uint32_t* const src_end
= src
+ num_pixels
;
1212 while (src
< src_end
) {
1213 const uint32_t argb
= *src
++;
1214 *dst
++ = (argb
>> 16) & 0xff;
1215 *dst
++ = (argb
>> 8) & 0xff;
1216 *dst
++ = (argb
>> 0) & 0xff;
1220 static void ConvertBGRAToRGBA(const uint32_t* src
,
1221 int num_pixels
, uint8_t* dst
) {
1222 const uint32_t* const src_end
= src
+ num_pixels
;
1223 while (src
< src_end
) {
1224 const uint32_t argb
= *src
++;
1225 *dst
++ = (argb
>> 16) & 0xff;
1226 *dst
++ = (argb
>> 8) & 0xff;
1227 *dst
++ = (argb
>> 0) & 0xff;
1228 *dst
++ = (argb
>> 24) & 0xff;
1232 static void ConvertBGRAToRGBA4444(const uint32_t* src
,
1233 int num_pixels
, uint8_t* dst
) {
1234 const uint32_t* const src_end
= src
+ num_pixels
;
1235 while (src
< src_end
) {
1236 const uint32_t argb
= *src
++;
1237 const uint8_t rg
= ((argb
>> 16) & 0xf0) | ((argb
>> 12) & 0xf);
1238 const uint8_t ba
= ((argb
>> 0) & 0xf0) | ((argb
>> 28) & 0xf);
1239 #ifdef WEBP_SWAP_16BIT_CSP
1249 static void ConvertBGRAToRGB565(const uint32_t* src
,
1250 int num_pixels
, uint8_t* dst
) {
1251 const uint32_t* const src_end
= src
+ num_pixels
;
1252 while (src
< src_end
) {
1253 const uint32_t argb
= *src
++;
1254 const uint8_t rg
= ((argb
>> 16) & 0xf8) | ((argb
>> 13) & 0x7);
1255 const uint8_t gb
= ((argb
>> 5) & 0xe0) | ((argb
>> 3) & 0x1f);
1256 #ifdef WEBP_SWAP_16BIT_CSP
1266 static void ConvertBGRAToBGR(const uint32_t* src
,
1267 int num_pixels
, uint8_t* dst
) {
1268 const uint32_t* const src_end
= src
+ num_pixels
;
1269 while (src
< src_end
) {
1270 const uint32_t argb
= *src
++;
1271 *dst
++ = (argb
>> 0) & 0xff;
1272 *dst
++ = (argb
>> 8) & 0xff;
1273 *dst
++ = (argb
>> 16) & 0xff;
1277 static void CopyOrSwap(const uint32_t* src
, int num_pixels
, uint8_t* dst
,
1278 int swap_on_big_endian
) {
1279 if (is_big_endian() == swap_on_big_endian
) {
1280 const uint32_t* const src_end
= src
+ num_pixels
;
1281 while (src
< src_end
) {
1282 uint32_t argb
= *src
++;
1284 #if !defined(__BIG_ENDIAN__)
1285 #if !defined(WEBP_REFERENCE_IMPLEMENTATION)
1286 #if defined(__i386__) || defined(__x86_64__)
1287 __asm__
volatile("bswap %0" : "=r"(argb
) : "0"(argb
));
1288 *(uint32_t*)dst
= argb
;
1289 #elif defined(_MSC_VER)
1290 argb
= _byteswap_ulong(argb
);
1291 *(uint32_t*)dst
= argb
;
1293 dst
[0] = (argb
>> 24) & 0xff;
1294 dst
[1] = (argb
>> 16) & 0xff;
1295 dst
[2] = (argb
>> 8) & 0xff;
1296 dst
[3] = (argb
>> 0) & 0xff;
1298 #else // WEBP_REFERENCE_IMPLEMENTATION
1299 dst
[0] = (argb
>> 24) & 0xff;
1300 dst
[1] = (argb
>> 16) & 0xff;
1301 dst
[2] = (argb
>> 8) & 0xff;
1302 dst
[3] = (argb
>> 0) & 0xff;
1304 #else // __BIG_ENDIAN__
1305 dst
[0] = (argb
>> 0) & 0xff;
1306 dst
[1] = (argb
>> 8) & 0xff;
1307 dst
[2] = (argb
>> 16) & 0xff;
1308 dst
[3] = (argb
>> 24) & 0xff;
1310 dst
+= sizeof(argb
);
1313 memcpy(dst
, src
, num_pixels
* sizeof(*src
));
1317 void VP8LConvertFromBGRA(const uint32_t* const in_data
, int num_pixels
,
1318 WEBP_CSP_MODE out_colorspace
, uint8_t* const rgba
) {
1319 switch (out_colorspace
) {
1321 ConvertBGRAToRGB(in_data
, num_pixels
, rgba
);
1324 ConvertBGRAToRGBA(in_data
, num_pixels
, rgba
);
1327 ConvertBGRAToRGBA(in_data
, num_pixels
, rgba
);
1328 WebPApplyAlphaMultiply(rgba
, 0, num_pixels
, 1, 0);
1331 ConvertBGRAToBGR(in_data
, num_pixels
, rgba
);
1334 CopyOrSwap(in_data
, num_pixels
, rgba
, 1);
1337 CopyOrSwap(in_data
, num_pixels
, rgba
, 1);
1338 WebPApplyAlphaMultiply(rgba
, 0, num_pixels
, 1, 0);
1341 CopyOrSwap(in_data
, num_pixels
, rgba
, 0);
1344 CopyOrSwap(in_data
, num_pixels
, rgba
, 0);
1345 WebPApplyAlphaMultiply(rgba
, 1, num_pixels
, 1, 0);
1347 case MODE_RGBA_4444
:
1348 ConvertBGRAToRGBA4444(in_data
, num_pixels
, rgba
);
1350 case MODE_rgbA_4444
:
1351 ConvertBGRAToRGBA4444(in_data
, num_pixels
, rgba
);
1352 WebPApplyAlphaMultiply4444(rgba
, num_pixels
, 1, 0);
1355 ConvertBGRAToRGB565(in_data
, num_pixels
, rgba
);
1358 assert(0); // Code flow should not reach here.
1362 // Bundles multiple (1, 2, 4 or 8) pixels into a single pixel.
1363 void VP8LBundleColorMap(const uint8_t* const row
, int width
,
1364 int xbits
, uint32_t* const dst
) {
1367 const int bit_depth
= 1 << (3 - xbits
);
1368 const int mask
= (1 << xbits
) - 1;
1369 uint32_t code
= 0xff000000;
1370 for (x
= 0; x
< width
; ++x
) {
1371 const int xsub
= x
& mask
;
1375 code
|= row
[x
] << (8 + bit_depth
* xsub
);
1376 dst
[x
>> xbits
] = code
;
1379 for (x
= 0; x
< width
; ++x
) dst
[x
] = 0xff000000 | (row
[x
] << 8);
1383 //------------------------------------------------------------------------------
1385 #if defined(__cplusplus) || defined(c_plusplus)