2 * Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
4 * Use of this source code is governed by a BSD-style license and patent
5 * grant that can be found in the LICENSE file in the root of the source
6 * tree. All contributing project authors may be found in the AUTHORS
7 * file in the root of the source tree.
11 /****************************************************************************
13 * Module Title : gen_scalers.c
15 * Description : Generic image scaling functions.
17 ***************************************************************************/
19 /****************************************************************************
21 ****************************************************************************/
22 #include "vpx_scale/vpxscale.h"
24 /****************************************************************************
26 ****************************************************************************/
28 /****************************************************************************
30 * ROUTINE : horizontal_line_4_5_scale_c4
32 * INPUTS : const unsigned char *source : Pointer to source data.
33 * unsigned int source_width : Stride of source.
34 * unsigned char *dest : Pointer to destination data.
35 * unsigned int dest_width : Stride of destination (NOT USED).
41 * FUNCTION : Copies horizontal line of pixels from source to
42 * destination scaling up by 4 to 5.
44 * SPECIAL NOTES : None.
46 ****************************************************************************/
48 void horizontal_line_4_5_scale_c64
50 const unsigned char *source
,
51 unsigned int source_width
,
53 unsigned int dest_width
57 unsigned int ba
, cb
, dc
, ed
;
58 unsigned char *restrict des
= dest
;
59 unsigned int *restrict src
= (unsigned int *)source
;
60 unsigned int const_51_205
, const_102_154
,
61 const_205_51
, const_154_102
;
63 unsigned int src_current
, src_next
;
67 // Constants that are to be used for the filtering. For
68 // best speed we are going to want to right shift by 16.
69 // In the generic version they were shift by 8, so put
70 // an extra 8 in now so that 16 will come out later.
71 const_51_205
= 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
72 const_205_51
= 0xCD003300; //_pack2 (205 << 8, 51 << 8);
73 const_102_154
= 0x66009A00; //_pack2 (102 << 8, 154 << 8);
74 const_154_102
= 0x9A006600; //_pack2 (154 << 8, 102 << 8);
76 // 5 points are needed to filter to give 5 output points.
77 // A load can pull up 4 at a time, and one needs to be
78 // "borrowed" from the next set of data. So instead of
79 // loading those 5 points each time, "steal" a point from
80 // the next set and only load up 4 each time through.
81 src_current
= _mem4(src
);
83 for (i
= 0; i
< source_width
- 4; i
+= 4)
85 src_next
= _mem4(src
++);
87 // Reorder the data so that it is ready for the
89 ba
= _unpklu4(src_current
);
90 cb
= _unpkhu4(_rotl(src_current
, 8));
91 dc
= _unpkhu4(src_current
);
92 ed
= _unpkhu4(_shrmb(src_next
, src_current
));
94 // Use the dot product with round and shift.
95 des
[0] = src_current
& 0xff;
96 des
[1] = _dotprsu2(ba
, const_205_51
);
97 des
[2] = _dotprsu2(cb
, const_154_102
);
98 des
[3] = _dotprsu2(dc
, const_102_154
);
99 des
[4] = _dotprsu2(ed
, const_51_205
);
103 // reuse loaded vales next time around.
104 src_current
= src_next
;
107 // vp8_filter the last set of points. Normally a point from the next set
108 // would be used, but there is no next set, so just fill.
109 ba
= _unpklu4(src_current
);
110 cb
= _unpkhu4(_rotl(src_current
, 8));
111 dc
= _unpkhu4(src_current
);
113 des
[0] = src_current
& 0xff;
114 des
[1] = _dotprsu2(ba
, const_205_51
);
115 des
[2] = _dotprsu2(cb
, const_154_102
);
116 des
[3] = _dotprsu2(dc
, const_102_154
);
117 des
[4] = src_current
& 0xff;
120 /****************************************************************************
122 * ROUTINE : vertical_band_4_5_scale_c64
124 * INPUTS : unsigned char *dest : Pointer to destination data.
125 * unsigned int dest_pitch : Stride of destination data.
126 * unsigned int dest_width : Width of destination data.
132 * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The
133 * height of the band scaled is 4-pixels.
135 * SPECIAL NOTES : The routine uses the first line of the band below
138 ****************************************************************************/
140 void vertical_band_4_5_scale_c64(unsigned char *dest
, unsigned int dest_pitch
, unsigned int dest_width
)
143 unsigned int a
, b
, c
, d
, e
;
144 unsigned int ba
, cb
, dc
, ed
;
145 unsigned char *restrict src
= dest
;
146 unsigned char *restrict des
= dest
;
147 unsigned int const_51_205
, const_102_154
,
148 const_205_51
, const_154_102
;
150 const_51_205
= 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
151 const_205_51
= 0xCD003300; //_pack2 (205 << 8, 51 << 8);
152 const_102_154
= 0x66009A00; //_pack2 (102 << 8, 154 << 8);
153 const_154_102
= 0x9A006600; //_pack2 (154 << 8, 102 << 8);
155 // Force a loop unroll here so that there is not such a
158 b
= src
[dest_pitch
];
159 c
= src
[dest_pitch
*2];
160 d
= src
[dest_pitch
*3];
161 e
= src
[dest_pitch
*5];
164 for (i
= 0; i
< dest_width
; i
++)
172 b
= src
[dest_pitch
];
173 c
= src
[dest_pitch
*2];
174 d
= src
[dest_pitch
*3];
175 e
= src
[dest_pitch
*5];
178 des
[dest_pitch
] = _dotprsu2(ba
, const_205_51
);
179 des
[dest_pitch
*2] = _dotprsu2(cb
, const_154_102
);
180 des
[dest_pitch
*3] = _dotprsu2(dc
, const_102_154
);
181 des
[dest_pitch
*4] = _dotprsu2(ed
, const_51_205
);
187 /****************************************************************************
189 * ROUTINE : last_vertical_band_4_5_scale_c64
191 * INPUTS : unsigned char *dest : Pointer to destination data.
192 * unsigned int dest_pitch : Stride of destination data.
193 * unsigned int dest_width : Width of destination data.
199 * FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The
200 * height of the band scaled is 4-pixels.
202 * SPECIAL NOTES : The routine does not have available the first line of
203 * the band below the current band, since this is the
206 ****************************************************************************/
208 void last_vertical_band_4_5_scale_c64(unsigned char *dest
, unsigned int dest_pitch
, unsigned int dest_width
)
211 unsigned int a
, b
, c
, d
;
212 unsigned int ba
, cb
, dc
;
213 unsigned char *restrict src
= dest
;
214 unsigned char *restrict des
= dest
;
215 unsigned int const_102_154
, const_205_51
, const_154_102
;
217 const_205_51
= 0xCD003300; //_pack2 (205 << 8, 51 << 8);
218 const_102_154
= 0x66009A00; //_pack2 (102 << 8, 154 << 8);
219 const_154_102
= 0x9A006600; //_pack2 (154 << 8, 102 << 8);
222 b
= src
[dest_pitch
];
223 c
= src
[dest_pitch
*2];
224 d
= src
[dest_pitch
*3];
227 for (i
= 0; i
< dest_width
; ++i
)
234 b
= src
[dest_pitch
];
235 c
= src
[dest_pitch
*2];
236 d
= src
[dest_pitch
*3];
239 des
[dest_pitch
] = _dotprsu2(ba
, const_205_51
);
240 des
[dest_pitch
*2] = _dotprsu2(cb
, const_154_102
);
241 des
[dest_pitch
*3] = _dotprsu2(dc
, const_102_154
);
242 des
[dest_pitch
*4] = (unsigned char) d
;
248 /****************************************************************************
250 * ROUTINE : horizontal_line_3_5_scale_c64
252 * INPUTS : const unsigned char *source : Pointer to source data.
253 * unsigned int source_width : Stride of source.
254 * unsigned char *dest : Pointer to destination data.
255 * unsigned int dest_width : Stride of destination (NOT USED).
261 * FUNCTION : Copies horizontal line of pixels from source to
262 * destination scaling up by 3 to 5.
264 * SPECIAL NOTES : None.
267 ****************************************************************************/
269 void horizontal_line_3_5_scale_c64
271 const unsigned char *source
,
272 unsigned int source_width
,
274 unsigned int dest_width
278 unsigned int ba
, cb
, dc
;
279 unsigned int src_current
;
280 unsigned char *restrict des
= dest
;
281 unsigned char *restrict src
= (unsigned char *)source
;
282 unsigned int const_51_205
, const_102_154
,
283 const_205_51
, const_154_102
;
287 const_51_205
= 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
288 const_205_51
= 0xCD003300; //_pack2 (205 << 8, 51 << 8);
289 const_102_154
= 0x66009A00; //_pack2 (102 << 8, 154 << 8);
290 const_154_102
= 0x9A006600; //_pack2 (154 << 8, 102 << 8);
292 for (i
= 0; i
< source_width
- 3; i
+= 3)
294 src_current
= _mem4(src
);
296 // Reorder the data so that it is ready for the
298 ba
= _unpklu4(src_current
);
299 cb
= _unpkhu4(_rotl(src_current
, 8));
300 dc
= _unpkhu4(src_current
);
302 des
[0] = src_current
& 0xff;
303 des
[1] = _dotprsu2(ba
, const_154_102
);
304 des
[2] = _dotprsu2(cb
, const_51_205
);
305 des
[3] = _dotprsu2(cb
, const_205_51
);
306 des
[4] = _dotprsu2(dc
, const_102_154
);
312 src_current
= _mem4(src
);
314 ba
= _unpklu4(src_current
);
315 cb
= _unpkhu4(_rotl(src_current
, 8));
316 dc
= _unpkhu4(src_current
);
319 des
[0] = src_current
& 0xff;
320 des
[1] = _dotprsu2(ba
, const_154_102
);
321 des
[2] = _dotprsu2(cb
, const_51_205
);
322 des
[3] = _dotprsu2(cb
, const_205_51
);
327 /****************************************************************************
329 * ROUTINE : vertical_band_3_5_scale_c64
331 * INPUTS : unsigned char *dest : Pointer to destination data.
332 * unsigned int dest_pitch : Stride of destination data.
333 * unsigned int dest_width : Width of destination data.
339 * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The
340 * height of the band scaled is 3-pixels.
342 * SPECIAL NOTES : The routine uses the first line of the band below
345 ****************************************************************************/
347 void vertical_band_3_5_scale_c64(unsigned char *dest
, unsigned int dest_pitch
, unsigned int dest_width
)
350 unsigned int a
, b
, c
, d
;
351 unsigned int ba
, cb
, dc
;
352 unsigned char *restrict src
= dest
;
353 unsigned char *restrict des
= dest
;
354 unsigned int const_51_205
, const_102_154
,
355 const_205_51
, const_154_102
;
357 const_51_205
= 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
358 const_205_51
= 0xCD003300; //_pack2 (205 << 8, 51 << 8);
359 const_102_154
= 0x66009A00; //_pack2 (102 << 8, 154 << 8);
360 const_154_102
= 0x9A006600; //_pack2 (154 << 8, 102 << 8);
363 b
= src
[dest_pitch
];
364 c
= src
[dest_pitch
*2];
365 d
= src
[dest_pitch
*5];
368 for (i
= 0; i
< dest_width
; i
++)
375 b
= src
[dest_pitch
];
376 c
= src
[dest_pitch
*2];
377 d
= src
[dest_pitch
*5];
380 des
[dest_pitch
] = _dotprsu2(ba
, const_154_102
);
381 des
[dest_pitch
*2] = _dotprsu2(cb
, const_51_205
);
382 des
[dest_pitch
*3] = _dotprsu2(cb
, const_205_51
);
383 des
[dest_pitch
*4] = _dotprsu2(dc
, const_102_154
);
389 /****************************************************************************
391 * ROUTINE : last_vertical_band_3_5_scale_c64
393 * INPUTS : unsigned char *dest : Pointer to destination data.
394 * unsigned int dest_pitch : Stride of destination data.
395 * unsigned int dest_width : Width of destination data.
401 * FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The
402 * height of the band scaled is 3-pixels.
404 * SPECIAL NOTES : The routine does not have available the first line of
405 * the band below the current band, since this is the
408 ****************************************************************************/
410 void last_vertical_band_3_5_scale_c64(unsigned char *dest
, unsigned int dest_pitch
, unsigned int dest_width
)
413 unsigned int a
, b
, c
;
415 unsigned char *restrict src
= dest
;
416 unsigned char *restrict des
= dest
;
417 unsigned int const_51_205
, const_205_51
, const_154_102
;
419 const_51_205
= 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
420 const_205_51
= 0xCD003300; //_pack2 (205 << 8, 51 << 8);
421 const_154_102
= 0x9A006600; //_pack2 (154 << 8, 102 << 8);
424 b
= src
[dest_pitch
];
425 c
= src
[dest_pitch
*2];
428 for (i
= 0; i
< dest_width
; ++i
)
434 b
= src
[dest_pitch
];
435 c
= src
[dest_pitch
*2];
438 des
[dest_pitch
] = _dotprsu2(ba
, const_154_102
);
439 des
[dest_pitch
*2] = _dotprsu2(cb
, const_51_205
);
440 des
[dest_pitch
*3] = _dotprsu2(cb
, const_205_51
);
441 des
[dest_pitch
*4] = (unsigned char)(c
) ;
447 /****************************************************************************
449 * ROUTINE : horizontal_line_1_2_scale_c64
451 * INPUTS : const unsigned char *source : Pointer to source data.
452 * unsigned int source_width : Stride of source.
453 * unsigned char *dest : Pointer to destination data.
454 * unsigned int dest_width : Stride of destination (NOT USED).
460 * FUNCTION : Copies horizontal line of pixels from source to
461 * destination scaling up by 1 to 2.
463 * SPECIAL NOTES : source width must be a multiple of 4.
465 ****************************************************************************/
466 void horizontal_line_1_2_scale_c64
468 const unsigned char *source
,
469 unsigned int source_width
,
471 unsigned int dest_width
475 unsigned char *restrict des
= dest
;
476 unsigned char *restrict src
= (unsigned char *)source
;
477 unsigned int src7_4i
, src4_1i
, src3_0i
;
478 unsigned int a4_0i
, ahi
, alo
;
479 double src7_0d
, src3_0d
;
480 const unsigned int k01
= 0x01010101;
482 for (i
= 0; i
< source_width
/ 4; i
+= 1)
484 // Load up the data from src. Here a wide load is
485 // used to get 8 bytes at once, only 5 will be used
486 // for the actual computation.
487 src7_0d
= _memd8(src
);
488 src3_0i
= _lo(src7_0d
);
489 src7_4i
= _hi(src7_0d
);
491 // Need to average between points. Shift byte 5 into
492 // the lower word. This will result in bytes 5-1
493 // averaged with 4-0.
494 src4_1i
= _shrmb(src7_4i
, src3_0i
);
495 a4_0i
= _avgu4(src4_1i
, src3_0i
);
497 // Expand the data out. Could do an unpack, however
498 // all but the multiply units are getting pretty hard
499 // here the multiply unit can take some of the computations.
500 src3_0d
= _mpyu4(src3_0i
, k01
);
502 // The averages need to be unpacked so that they are in 16
503 // bit form and will be able to be interleaved with the
505 ahi
= _unpkhu4(a4_0i
);
506 alo
= _unpklu4(a4_0i
);
511 // Mix the average result in with the orginal data.
512 ahi
= _hi(src3_0d
) | ahi
;
513 alo
= _lo(src3_0d
) | alo
;
515 _memd8(des
) = _itod(ahi
, alo
);
523 /****************************************************************************
525 * ROUTINE : vertical_band_1_2_scale_c64
527 * INPUTS : unsigned char *dest : Pointer to destination data.
528 * unsigned int dest_pitch : Stride of destination data.
529 * unsigned int dest_width : Width of destination data.
535 * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The
536 * height of the band scaled is 1-pixel.
538 * SPECIAL NOTES : The routine uses the first line of the band below
540 * Destination width must be a multiple of 4. Because the
541 * intput must be, therefore the output must be.
543 ****************************************************************************/
545 void vertical_band_1_2_scale_c64(unsigned char *dest
, unsigned int dest_pitch
, unsigned int dest_width
)
549 unsigned int *restrict line_a
= (unsigned int *)dest
;
550 unsigned int *restrict line_b
= (unsigned int *)(dest
+ (dest_pitch
* 2));
551 unsigned int *restrict des
= (unsigned int *)(dest
+ dest_pitch
);
553 for (i
= 0; i
< dest_width
/ 4; i
++)
558 _mem4(des
++) = _avgu4(a
, b
);
562 /****************************************************************************
564 * ROUTINE : last_vertical_band_1_2_scale_c64
566 * INPUTS : unsigned char *dest : Pointer to destination data.
567 * unsigned int dest_pitch : Stride of destination data.
568 * unsigned int dest_width : Width of destination data.
574 * FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The
575 * height of the band scaled is 1-pixel.
577 * SPECIAL NOTES : The routine does not have available the first line of
578 * the band below the current band, since this is the
579 * last band. Again, width must be a multiple of 4.
581 ****************************************************************************/
583 void last_vertical_band_1_2_scale_c64(unsigned char *dest
, unsigned int dest_pitch
, unsigned int dest_width
)
586 unsigned int *restrict src
= (unsigned int *)dest
;
587 unsigned int *restrict des
= (unsigned int *)(dest
+ dest_pitch
);
589 for (i
= 0; i
< dest_width
/ 4; ++i
)
591 _mem4(des
++) = _mem4(src
++);
596 register_generic_scalers(void)
598 vp8_horizontal_line_1_2_scale
= horizontal_line_1_2_scale_c64
;
599 vp8_vertical_band_1_2_scale
= vertical_band_1_2_scale_c64
;
600 vp8_last_vertical_band_1_2_scale
= last_vertical_band_1_2_scale_c64
;
601 vp8_horizontal_line_3_5_scale
= horizontal_line_3_5_scale_c64
;
602 vp8_vertical_band_3_5_scale
= vertical_band_3_5_scale_c64
;
603 vp8_last_vertical_band_3_5_scale
= last_vertical_band_3_5_scale_c64
;
604 vp8_horizontal_line_4_5_scale
= horizontal_line_4_5_scale_c64
;
605 vp8_vertical_band_4_5_scale
= vertical_band_4_5_scale_c64
;
606 vp8_last_vertical_band_4_5_scale
= last_vertical_band_4_5_scale_c64
;