1 // Copyright 2012 Google Inc. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
10 // Rescaling functions
12 // Author: Skal (pascal.massimino@gmail.com)
16 #include "./rescaler.h"
17 #include "../dsp/dsp.h"
19 //------------------------------------------------------------------------------
20 // Implementations of critical functions ImportRow / ExportRow
22 void (*WebPRescalerImportRow
)(WebPRescaler
* const wrk
,
23 const uint8_t* const src
, int channel
) = NULL
;
24 void (*WebPRescalerExportRow
)(WebPRescaler
* const wrk
, int x_out
) = NULL
;
27 #define MULT_FIX(x, y) (((int64_t)(x) * (y) + (1 << (RFIX - 1))) >> RFIX)
29 static void ImportRowC(WebPRescaler
* const wrk
,
30 const uint8_t* const src
, int channel
) {
31 const int x_stride
= wrk
->num_channels
;
32 const int x_out_max
= wrk
->dst_width
* wrk
->num_channels
;
38 for (x_out
= channel
; x_out
< x_out_max
; x_out
+= x_stride
) {
40 for (; accum
> 0; accum
-= wrk
->x_sub
) {
44 { // Emit next horizontal pixel.
45 const int32_t base
= src
[x_in
];
46 const int32_t frac
= base
* (-accum
);
48 wrk
->frow
[x_out
] = (sum
+ base
) * wrk
->x_sub
- frac
;
49 // fresh fractional start for next pixel
50 sum
= (int)MULT_FIX(frac
, wrk
->fx_scale
);
53 } else { // simple bilinear interpolation
54 int left
= src
[channel
], right
= src
[channel
];
55 for (x_out
= channel
; x_out
< x_out_max
; x_out
+= x_stride
) {
62 wrk
->frow
[x_out
] = right
* wrk
->x_add
+ (left
- right
) * accum
;
66 // Accumulate the contribution of the new row.
67 for (x_out
= channel
; x_out
< x_out_max
; x_out
+= x_stride
) {
68 wrk
->irow
[x_out
] += wrk
->frow
[x_out
];
72 static void ExportRowC(WebPRescaler
* const wrk
, int x_out
) {
73 if (wrk
->y_accum
<= 0) {
74 uint8_t* const dst
= wrk
->dst
;
75 int32_t* const irow
= wrk
->irow
;
76 const int32_t* const frow
= wrk
->frow
;
77 const int yscale
= wrk
->fy_scale
* (-wrk
->y_accum
);
78 const int x_out_max
= wrk
->dst_width
* wrk
->num_channels
;
79 for (; x_out
< x_out_max
; ++x_out
) {
80 const int frac
= (int)MULT_FIX(frow
[x_out
], yscale
);
81 const int v
= (int)MULT_FIX(irow
[x_out
] - frac
, wrk
->fxy_scale
);
82 dst
[x_out
] = (!(v
& ~0xff)) ? v
: (v
< 0) ? 0 : 255;
83 irow
[x_out
] = frac
; // new fractional start
85 wrk
->y_accum
+= wrk
->y_add
;
86 wrk
->dst
+= wrk
->dst_stride
;
90 //------------------------------------------------------------------------------
93 #if defined(WEBP_USE_MIPS32)
95 static void ImportRowMIPS(WebPRescaler
* const wrk
,
96 const uint8_t* const src
, int channel
) {
97 const int x_stride
= wrk
->num_channels
;
98 const int x_out_max
= wrk
->dst_width
* wrk
->num_channels
;
99 const int fx_scale
= wrk
->fx_scale
;
100 const int x_add
= wrk
->x_add
;
101 const int x_sub
= wrk
->x_sub
;
102 int* frow
= wrk
->frow
+ channel
;
103 int* irow
= wrk
->irow
+ channel
;
104 const uint8_t* src1
= src
+ channel
;
105 int temp1
, temp2
, temp3
;
108 const int x_stride1
= x_stride
<< 2;
109 int loop_c
= x_out_max
- channel
;
111 if (!wrk
->x_expand
) {
113 "li %[temp1], 0x8000 \n\t"
114 "li %[temp2], 0x10000 \n\t"
116 "li %[accum], 0 \n\t"
118 "addu %[accum], %[accum], %[x_add] \n\t"
119 "blez %[accum], 3f \n\t"
121 "lbu %[temp3], 0(%[src1]) \n\t"
122 "subu %[accum], %[accum], %[x_sub] \n\t"
123 "addu %[src1], %[src1], %[x_stride] \n\t"
124 "addu %[sum], %[sum], %[temp3] \n\t"
125 "bgtz %[accum], 2b \n\t"
127 "lbu %[base], 0(%[src1]) \n\t"
128 "addu %[src1], %[src1], %[x_stride] \n\t"
129 "negu %[accum1], %[accum] \n\t"
130 "mul %[frac], %[base], %[accum1] \n\t"
131 "addu %[temp3], %[sum], %[base] \n\t"
132 "mul %[temp3], %[temp3], %[x_sub] \n\t"
133 "lw %[base], 0(%[irow]) \n\t"
134 "subu %[loop_c], %[loop_c], %[x_stride] \n\t"
135 "sll %[accum1], %[frac], 2 \n\t"
136 "mult %[temp1], %[temp2] \n\t"
137 "madd %[accum1], %[fx_scale] \n\t"
139 "subu %[temp3], %[temp3], %[frac] \n\t"
140 "sw %[temp3], 0(%[frow]) \n\t"
141 "add %[base], %[base], %[temp3] \n\t"
142 "sw %[base], 0(%[irow]) \n\t"
143 "addu %[irow], %[irow], %[x_stride1] \n\t"
144 "addu %[frow], %[frow], %[x_stride1] \n\t"
145 "bgtz %[loop_c], 1b \n\t"
147 : [accum
] "=&r" (accum
), [src1
] "+r" (src1
), [temp3
] "=&r" (temp3
),
148 [sum
] "=&r" (sum
), [base
] "=&r" (base
), [frac
] "=&r" (frac
),
149 [frow
] "+r" (frow
), [irow
] "+r" (irow
), [accum1
] "=&r" (accum1
),
150 [temp2
] "=&r" (temp2
), [temp1
] "=&r" (temp1
)
151 : [x_stride
] "r" (x_stride
), [fx_scale
] "r" (fx_scale
),
152 [x_sub
] "r" (x_sub
), [x_add
] "r" (x_add
),
153 [loop_c
] "r" (loop_c
), [x_stride1
] "r" (x_stride1
)
154 : "memory", "hi", "lo"
158 "lbu %[temp1], 0(%[src1]) \n\t"
159 "move %[temp2], %[temp1] \n\t"
160 "li %[accum], 0 \n\t"
162 "bgez %[accum], 2f \n\t"
163 "move %[temp2], %[temp1] \n\t"
164 "addu %[src1], %[x_stride] \n\t"
165 "lbu %[temp1], 0(%[src1]) \n\t"
166 "addu %[accum], %[x_add] \n\t"
168 "subu %[temp3], %[temp2], %[temp1] \n\t"
169 "mul %[temp3], %[temp3], %[accum] \n\t"
170 "mul %[base], %[temp1], %[x_add] \n\t"
171 "subu %[accum], %[accum], %[x_sub] \n\t"
172 "lw %[frac], 0(%[irow]) \n\t"
173 "subu %[loop_c], %[loop_c], %[x_stride] \n\t"
174 "addu %[temp3], %[base], %[temp3] \n\t"
175 "sw %[temp3], 0(%[frow]) \n\t"
176 "addu %[frow], %[x_stride1] \n\t"
177 "addu %[frac], %[temp3] \n\t"
178 "sw %[frac], 0(%[irow]) \n\t"
179 "addu %[irow], %[x_stride1] \n\t"
180 "bgtz %[loop_c], 1b \n\t"
182 : [src1
] "+r" (src1
), [accum
] "=&r" (accum
), [temp1
] "=&r" (temp1
),
183 [temp2
] "=&r" (temp2
), [temp3
] "=&r" (temp3
), [base
] "=&r" (base
),
184 [frac
] "=&r" (frac
), [frow
] "+r" (frow
), [irow
] "+r" (irow
)
185 : [x_stride
] "r" (x_stride
), [x_add
] "r" (x_add
), [x_sub
] "r" (x_sub
),
186 [x_stride1
] "r" (x_stride1
), [loop_c
] "r" (loop_c
)
187 : "memory", "hi", "lo"
192 static void ExportRowMIPS(WebPRescaler
* const wrk
, int x_out
) {
193 if (wrk
->y_accum
<= 0) {
194 uint8_t* const dst
= wrk
->dst
;
195 int32_t* const irow
= wrk
->irow
;
196 const int32_t* const frow
= wrk
->frow
;
197 const int yscale
= wrk
->fy_scale
* (-wrk
->y_accum
);
198 const int x_out_max
= wrk
->dst_width
* wrk
->num_channels
;
199 // if wrk->fxy_scale can fit into 32 bits use optimized code,
200 // otherwise use C code
201 if ((wrk
->fxy_scale
>> 32) == 0) {
202 int temp0
, temp1
, temp3
, temp4
, temp5
, temp6
, temp7
, loop_end
;
203 const int temp2
= (int)(wrk
->fxy_scale
);
204 const int temp8
= x_out_max
<< 2;
205 uint8_t* dst_t
= (uint8_t*)dst
;
206 int32_t* irow_t
= (int32_t*)irow
;
207 const int32_t* frow_t
= (const int32_t*)frow
;
210 "addiu %[temp6], $zero, -256 \n\t"
211 "addiu %[temp7], $zero, 255 \n\t"
212 "li %[temp3], 0x10000 \n\t"
213 "li %[temp4], 0x8000 \n\t"
214 "addu %[loop_end], %[frow_t], %[temp8] \n\t"
216 "lw %[temp0], 0(%[frow_t]) \n\t"
217 "mult %[temp3], %[temp4] \n\t"
218 "addiu %[frow_t], %[frow_t], 4 \n\t"
219 "sll %[temp0], %[temp0], 2 \n\t"
220 "madd %[temp0], %[yscale] \n\t"
222 "lw %[temp0], 0(%[irow_t]) \n\t"
223 "addiu %[dst_t], %[dst_t], 1 \n\t"
224 "addiu %[irow_t], %[irow_t], 4 \n\t"
225 "subu %[temp0], %[temp0], %[temp1] \n\t"
226 "mult %[temp3], %[temp4] \n\t"
227 "sll %[temp0], %[temp0], 2 \n\t"
228 "madd %[temp0], %[temp2] \n\t"
230 "sw %[temp1], -4(%[irow_t]) \n\t"
231 "and %[temp0], %[temp5], %[temp6] \n\t"
232 "slti %[temp1], %[temp5], 0 \n\t"
233 "beqz %[temp0], 2f \n\t"
234 "xor %[temp5], %[temp5], %[temp5] \n\t"
235 "movz %[temp5], %[temp7], %[temp1] \n\t"
237 "sb %[temp5], -1(%[dst_t]) \n\t"
238 "bne %[frow_t], %[loop_end], 1b \n\t"
240 : [temp0
]"=&r"(temp0
), [temp1
]"=&r"(temp1
), [temp3
]"=&r"(temp3
),
241 [temp4
]"=&r"(temp4
), [temp5
]"=&r"(temp5
), [temp6
]"=&r"(temp6
),
242 [temp7
]"=&r"(temp7
), [frow_t
]"+r"(frow_t
), [irow_t
]"+r"(irow_t
),
243 [dst_t
]"+r"(dst_t
), [loop_end
]"=&r"(loop_end
)
244 : [temp2
]"r"(temp2
), [yscale
]"r"(yscale
), [temp8
]"r"(temp8
)
245 : "memory", "hi", "lo"
247 wrk
->y_accum
+= wrk
->y_add
;
248 wrk
->dst
+= wrk
->dst_stride
;
250 ExportRowC(wrk
, x_out
);
254 #endif // WEBP_USE_MIPS32
256 //------------------------------------------------------------------------------
258 void WebPRescalerInit(WebPRescaler
* const wrk
, int src_width
, int src_height
,
259 uint8_t* const dst
, int dst_width
, int dst_height
,
260 int dst_stride
, int num_channels
, int x_add
, int x_sub
,
261 int y_add
, int y_sub
, int32_t* const work
) {
262 wrk
->x_expand
= (src_width
< dst_width
);
263 wrk
->src_width
= src_width
;
264 wrk
->src_height
= src_height
;
265 wrk
->dst_width
= dst_width
;
266 wrk
->dst_height
= dst_height
;
268 wrk
->dst_stride
= dst_stride
;
269 wrk
->num_channels
= num_channels
;
270 // for 'x_expand', we use bilinear interpolation
271 wrk
->x_add
= wrk
->x_expand
? (x_sub
- 1) : x_add
- x_sub
;
272 wrk
->x_sub
= wrk
->x_expand
? (x_add
- 1) : x_sub
;
273 wrk
->y_accum
= y_add
;
276 wrk
->fx_scale
= (1 << RFIX
) / x_sub
;
277 wrk
->fy_scale
= (1 << RFIX
) / y_sub
;
278 wrk
->fxy_scale
= wrk
->x_expand
?
279 ((int64_t)dst_height
<< RFIX
) / (x_sub
* src_height
) :
280 ((int64_t)dst_height
<< RFIX
) / (x_add
* src_height
);
282 wrk
->frow
= work
+ num_channels
* dst_width
;
284 if (WebPRescalerImportRow
== NULL
) {
285 WebPRescalerImportRow
= ImportRowC
;
286 WebPRescalerExportRow
= ExportRowC
;
287 if (VP8GetCPUInfo
!= NULL
) {
288 #if defined(WEBP_USE_MIPS32)
289 if (VP8GetCPUInfo(kMIPS32
)) {
290 WebPRescalerImportRow
= ImportRowMIPS
;
291 WebPRescalerExportRow
= ExportRowMIPS
;
301 //------------------------------------------------------------------------------
304 int WebPRescaleNeededLines(const WebPRescaler
* const wrk
, int max_num_lines
) {
305 const int num_lines
= (wrk
->y_accum
+ wrk
->y_sub
- 1) / wrk
->y_sub
;
306 return (num_lines
> max_num_lines
) ? max_num_lines
: num_lines
;
309 int WebPRescalerImport(WebPRescaler
* const wrk
, int num_lines
,
310 const uint8_t* src
, int src_stride
) {
311 int total_imported
= 0;
312 while (total_imported
< num_lines
&& wrk
->y_accum
> 0) {
314 for (channel
= 0; channel
< wrk
->num_channels
; ++channel
) {
315 WebPRescalerImportRow(wrk
, src
, channel
);
319 wrk
->y_accum
-= wrk
->y_sub
;
321 return total_imported
;
324 int WebPRescalerExport(WebPRescaler
* const rescaler
) {
325 int total_exported
= 0;
326 while (WebPRescalerHasPendingOutput(rescaler
)) {
327 WebPRescalerExportRow(rescaler
, 0);
330 return total_exported
;
333 //------------------------------------------------------------------------------