2 * This file is part of the Scale2x project.
4 * Copyright (C) 2001, 2002, 2003, 2004 Andrea Mazzoleni
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 * This file contains a C and MMX implementation of the Scale2x effect.
24 * You can find an high level description of the effect at :
26 * http://scale2x.sourceforge.net/
28 * Alternatively at the previous license terms, you are allowed to use this
29 * code in your program with these conditions:
30 * - the program is not used in commercial activities.
31 * - the whole source code of the program is released with the binary.
32 * - derivative works of the program are allowed.
43 /***************************************************************************/
44 /* Scale2x C implementation */
47 * Define the macro USE_SCALE_RANDOMWRITE to enable
48 * an optimized version which writes memory in random order.
49 * This version is a little faster if you write in system memory.
50 * But it's a lot slower if you write in video memory.
51 * So, enable it only if you are sure to never write directly in video memory.
53 /* #define USE_SCALE_RANDOMWRITE */
55 #ifdef USE_SCALE_RANDOMWRITE
57 static inline void scale2x_8_def_whole(scale2x_uint8
* restrict dst0
, scale2x_uint8
* restrict dst1
, const scale2x_uint8
* restrict src0
, const scale2x_uint8
* restrict src1
, const scale2x_uint8
* restrict src2
, unsigned count
)
62 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
63 dst0
[0] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
64 dst0
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
65 dst1
[0] = src1
[0] == src2
[0] ? src2
[0] : src1
[0];
66 dst1
[1] = src1
[1] == src2
[0] ? src2
[0] : src1
[0];
82 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
83 dst0
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
84 dst0
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
85 dst1
[0] = src1
[-1] == src2
[0] ? src2
[0] : src1
[0];
86 dst1
[1] = src1
[1] == src2
[0] ? src2
[0] : src1
[0];
103 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
104 dst0
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
105 dst0
[1] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
106 dst1
[0] = src1
[-1] == src2
[0] ? src2
[0] : src1
[0];
107 dst1
[1] = src1
[0] == src2
[0] ? src2
[0] : src1
[0];
116 #else /* USE_SCALE_RANDOMWRITE */
118 static inline void scale2x_8_def_border(scale2x_uint8
* restrict dst
, const scale2x_uint8
* restrict src0
, const scale2x_uint8
* restrict src1
, const scale2x_uint8
* restrict src2
, unsigned count
)
123 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
124 dst
[0] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
125 dst
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
138 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
139 dst
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
140 dst
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
154 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
155 dst
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
156 dst
[1] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
163 static inline void scale2x_8_def_center(scale2x_uint8
* restrict dst
, const scale2x_uint8
* restrict src0
, const scale2x_uint8
* restrict src1
, const scale2x_uint8
* restrict src2
, unsigned count
)
168 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
170 dst
[1] = (src1
[1] == src0
[0] && src1
[0] != src2
[1]) || (src1
[1] == src2
[0] && src1
[0] != src0
[1]) ? src1
[1] : src1
[0];
183 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
184 dst
[0] = (src1
[-1] == src0
[0] && src1
[0] != src2
[-1]) || (src1
[-1] == src2
[0] && src1
[0] != src0
[-1]) ? src1
[-1] : src1
[0];
185 dst
[1] = (src1
[1] == src0
[0] && src1
[0] != src2
[1]) || (src1
[1] == src2
[0] && src1
[0] != src0
[1]) ? src1
[1] : src1
[0];
199 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
200 dst
[0] = (src1
[-1] == src0
[0] && src1
[0] != src2
[-1]) || (src1
[-1] == src2
[0] && src1
[0] != src0
[-1]) ? src1
[-1] : src1
[0];
208 #endif /* USE_SCALE_RANDOMWRITE */
210 #ifdef USE_SCALE_RANDOMWRITE
212 static inline void scale2x_16_def_whole(scale2x_uint16
* restrict dst0
, scale2x_uint16
* restrict dst1
, const scale2x_uint16
* restrict src0
, const scale2x_uint16
* restrict src1
, const scale2x_uint16
* restrict src2
, unsigned count
)
217 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
218 dst0
[0] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
219 dst0
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
220 dst1
[0] = src1
[0] == src2
[0] ? src2
[0] : src1
[0];
221 dst1
[1] = src1
[1] == src2
[0] ? src2
[0] : src1
[0];
237 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
238 dst0
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
239 dst0
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
240 dst1
[0] = src1
[-1] == src2
[0] ? src2
[0] : src1
[0];
241 dst1
[1] = src1
[1] == src2
[0] ? src2
[0] : src1
[0];
258 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
259 dst0
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
260 dst0
[1] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
261 dst1
[0] = src1
[-1] == src2
[0] ? src2
[0] : src1
[0];
262 dst1
[1] = src1
[0] == src2
[0] ? src2
[0] : src1
[0];
271 #else /* USE_SCALE_RANDOMWRITE */
273 static inline void scale2x_16_def_border(scale2x_uint16
* restrict dst
, const scale2x_uint16
* restrict src0
, const scale2x_uint16
* restrict src1
, const scale2x_uint16
* restrict src2
, unsigned count
)
278 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
279 dst
[0] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
280 dst
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
293 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
294 dst
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
295 dst
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
309 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
310 dst
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
311 dst
[1] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
318 static inline void scale2x_16_def_center(scale2x_uint16
* restrict dst
, const scale2x_uint16
* restrict src0
, const scale2x_uint16
* restrict src1
, const scale2x_uint16
* restrict src2
, unsigned count
)
323 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
325 dst
[1] = (src1
[1] == src0
[0] && src1
[0] != src2
[1]) || (src1
[1] == src2
[0] && src1
[0] != src0
[1]) ? src1
[1] : src1
[0];
338 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
339 dst
[0] = (src1
[-1] == src0
[0] && src1
[0] != src2
[-1]) || (src1
[-1] == src2
[0] && src1
[0] != src0
[-1]) ? src1
[-1] : src1
[0];
340 dst
[1] = (src1
[1] == src0
[0] && src1
[0] != src2
[1]) || (src1
[1] == src2
[0] && src1
[0] != src0
[1]) ? src1
[1] : src1
[0];
354 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
355 dst
[0] = (src1
[-1] == src0
[0] && src1
[0] != src2
[-1]) || (src1
[-1] == src2
[0] && src1
[0] != src0
[-1]) ? src1
[-1] : src1
[0];
363 #endif /* USE_SCALE_RANDOMWRITE */
365 #ifdef USE_SCALE_RANDOMWRITE
367 static inline void scale2x_32_def_whole(scale2x_uint32
* restrict dst0
, scale2x_uint32
* restrict dst1
, const scale2x_uint32
* restrict src0
, const scale2x_uint32
* restrict src1
, const scale2x_uint32
* restrict src2
, unsigned count
)
372 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
373 dst0
[0] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
374 dst0
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
375 dst1
[0] = src1
[0] == src2
[0] ? src2
[0] : src1
[0];
376 dst1
[1] = src1
[1] == src2
[0] ? src2
[0] : src1
[0];
392 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
393 dst0
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
394 dst0
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
395 dst1
[0] = src1
[-1] == src2
[0] ? src2
[0] : src1
[0];
396 dst1
[1] = src1
[1] == src2
[0] ? src2
[0] : src1
[0];
413 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
414 dst0
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
415 dst0
[1] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
416 dst1
[0] = src1
[-1] == src2
[0] ? src2
[0] : src1
[0];
417 dst1
[1] = src1
[0] == src2
[0] ? src2
[0] : src1
[0];
426 #else /* USE_SCALE_RANDOMWRITE */
428 static inline void scale2x_32_def_border(scale2x_uint32
* restrict dst
, const scale2x_uint32
* restrict src0
, const scale2x_uint32
* restrict src1
, const scale2x_uint32
* restrict src2
, unsigned count
)
433 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
434 dst
[0] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
435 dst
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
448 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
449 dst
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
450 dst
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
464 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
465 dst
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
466 dst
[1] = src1
[0] == src0
[0] ? src0
[0] : src1
[0];
473 static inline void scale2x_32_def_center(scale2x_uint32
* restrict dst
, const scale2x_uint32
* restrict src0
, const scale2x_uint32
* restrict src1
, const scale2x_uint32
* restrict src2
, unsigned count
)
478 if (src0
[0] != src2
[0] && src1
[0] != src1
[1]) {
480 dst
[1] = (src1
[1] == src0
[0] && src1
[0] != src2
[1]) || (src1
[1] == src2
[0] && src1
[0] != src0
[1]) ? src1
[1] : src1
[0];
493 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
494 dst
[0] = (src1
[-1] == src0
[0] && src1
[0] != src2
[-1]) || (src1
[-1] == src2
[0] && src1
[0] != src0
[-1]) ? src1
[-1] : src1
[0];
495 dst
[1] = (src1
[1] == src0
[0] && src1
[0] != src2
[1]) || (src1
[1] == src2
[0] && src1
[0] != src0
[1]) ? src1
[1] : src1
[0];
509 if (src0
[0] != src2
[0] && src1
[-1] != src1
[0]) {
510 dst
[0] = (src1
[-1] == src0
[0] && src1
[0] != src2
[-1]) || (src1
[-1] == src2
[0] && src1
[0] != src0
[-1]) ? src1
[-1] : src1
[0];
518 #endif /* USE_SCALE_RANDOMWRITE */
521 * Scale by a factor of 2 a row of pixels of 8 bits.
522 * The function is implemented in C.
523 * The pixels over the left and right borders are assumed of the same color of
524 * the pixels on the border.
525 * Note that the implementation is optimized to write data sequentially to
526 * maximize the bandwidth on video memory.
527 * \param src0 Pointer at the first pixel of the previous row.
528 * \param src1 Pointer at the first pixel of the current row.
529 * \param src2 Pointer at the first pixel of the next row.
530 * \param count Length in pixels of the src0, src1 and src2 rows.
531 * It must be at least 2.
532 * \param dst0 First destination row, double length in pixels.
533 * \param dst1 Second destination row, double length in pixels.
535 void scale2x_8_def(scale2x_uint8
* dst0
, scale2x_uint8
* dst1
, const scale2x_uint8
* src0
, const scale2x_uint8
* src1
, const scale2x_uint8
* src2
, unsigned count
)
537 #ifdef USE_SCALE_RANDOMWRITE
538 scale2x_8_def_whole(dst0
, dst1
, src0
, src1
, src2
, count
);
540 scale2x_8_def_border(dst0
, src0
, src1
, src2
, count
);
541 scale2x_8_def_border(dst1
, src2
, src1
, src0
, count
);
546 * Scale by a factor of 2 a row of pixels of 16 bits.
547 * This function operates like scale2x_8_def() but for 16 bits pixels.
548 * \param src0 Pointer at the first pixel of the previous row.
549 * \param src1 Pointer at the first pixel of the current row.
550 * \param src2 Pointer at the first pixel of the next row.
551 * \param count Length in pixels of the src0, src1 and src2 rows.
552 * It must be at least 2.
553 * \param dst0 First destination row, double length in pixels.
554 * \param dst1 Second destination row, double length in pixels.
556 void scale2x_16_def(scale2x_uint16
* dst0
, scale2x_uint16
* dst1
, const scale2x_uint16
* src0
, const scale2x_uint16
* src1
, const scale2x_uint16
* src2
, unsigned count
)
558 #ifdef USE_SCALE_RANDOMWRITE
559 scale2x_16_def_whole(dst0
, dst1
, src0
, src1
, src2
, count
);
561 scale2x_16_def_border(dst0
, src0
, src1
, src2
, count
);
562 scale2x_16_def_border(dst1
, src2
, src1
, src0
, count
);
567 * Scale by a factor of 2 a row of pixels of 32 bits.
568 * This function operates like scale2x_8_def() but for 32 bits pixels.
569 * \param src0 Pointer at the first pixel of the previous row.
570 * \param src1 Pointer at the first pixel of the current row.
571 * \param src2 Pointer at the first pixel of the next row.
572 * \param count Length in pixels of the src0, src1 and src2 rows.
573 * It must be at least 2.
574 * \param dst0 First destination row, double length in pixels.
575 * \param dst1 Second destination row, double length in pixels.
577 void scale2x_32_def(scale2x_uint32
* dst0
, scale2x_uint32
* dst1
, const scale2x_uint32
* src0
, const scale2x_uint32
* src1
, const scale2x_uint32
* src2
, unsigned count
)
579 #ifdef USE_SCALE_RANDOMWRITE
580 scale2x_32_def_whole(dst0
, dst1
, src0
, src1
, src2
, count
);
582 scale2x_32_def_border(dst0
, src0
, src1
, src2
, count
);
583 scale2x_32_def_border(dst1
, src2
, src1
, src0
, count
);
588 * Scale by a factor of 2x3 a row of pixels of 8 bits.
589 * \note Like scale2x_8_def();
591 void scale2x3_8_def(scale2x_uint8
* dst0
, scale2x_uint8
* dst1
, scale2x_uint8
* dst2
, const scale2x_uint8
* src0
, const scale2x_uint8
* src1
, const scale2x_uint8
* src2
, unsigned count
)
593 #ifdef USE_SCALE_RANDOMWRITE
594 scale2x_8_def_whole(dst0
, dst2
, src0
, src1
, src2
, count
);
595 scale2x_8_def_center(dst1
, src0
, src1
, src2
, count
);
597 scale2x_8_def_border(dst0
, src0
, src1
, src2
, count
);
598 scale2x_8_def_center(dst1
, src0
, src1
, src2
, count
);
599 scale2x_8_def_border(dst2
, src2
, src1
, src0
, count
);
604 * Scale by a factor of 2x3 a row of pixels of 16 bits.
605 * \note Like scale2x_16_def();
607 void scale2x3_16_def(scale2x_uint16
* dst0
, scale2x_uint16
* dst1
, scale2x_uint16
* dst2
, const scale2x_uint16
* src0
, const scale2x_uint16
* src1
, const scale2x_uint16
* src2
, unsigned count
)
609 #ifdef USE_SCALE_RANDOMWRITE
610 scale2x_16_def_whole(dst0
, dst2
, src0
, src1
, src2
, count
);
611 scale2x_16_def_center(dst1
, src0
, src1
, src2
, count
);
613 scale2x_16_def_border(dst0
, src0
, src1
, src2
, count
);
614 scale2x_16_def_center(dst1
, src0
, src1
, src2
, count
);
615 scale2x_16_def_border(dst2
, src2
, src1
, src0
, count
);
620 * Scale by a factor of 2x3 a row of pixels of 32 bits.
621 * \note Like scale2x_32_def();
623 void scale2x3_32_def(scale2x_uint32
* dst0
, scale2x_uint32
* dst1
, scale2x_uint32
* dst2
, const scale2x_uint32
* src0
, const scale2x_uint32
* src1
, const scale2x_uint32
* src2
, unsigned count
)
625 #ifdef USE_SCALE_RANDOMWRITE
626 scale2x_32_def_whole(dst0
, dst2
, src0
, src1
, src2
, count
);
627 scale2x_32_def_center(dst1
, src0
, src1
, src2
, count
);
629 scale2x_32_def_border(dst0
, src0
, src1
, src2
, count
);
630 scale2x_32_def_center(dst1
, src0
, src1
, src2
, count
);
631 scale2x_32_def_border(dst2
, src2
, src1
, src0
, count
);
636 * Scale by a factor of 2x4 a row of pixels of 8 bits.
637 * \note Like scale2x_8_def();
639 void scale2x4_8_def(scale2x_uint8
* dst0
, scale2x_uint8
* dst1
, scale2x_uint8
* dst2
, scale2x_uint8
* dst3
, const scale2x_uint8
* src0
, const scale2x_uint8
* src1
, const scale2x_uint8
* src2
, unsigned count
)
641 #ifdef USE_SCALE_RANDOMWRITE
642 scale2x_8_def_whole(dst0
, dst3
, src0
, src1
, src2
, count
);
643 scale2x_8_def_center(dst1
, src0
, src1
, src2
, count
);
644 scale2x_8_def_center(dst2
, src0
, src1
, src2
, count
);
646 scale2x_8_def_border(dst0
, src0
, src1
, src2
, count
);
647 scale2x_8_def_center(dst1
, src0
, src1
, src2
, count
);
648 scale2x_8_def_center(dst2
, src0
, src1
, src2
, count
);
649 scale2x_8_def_border(dst3
, src2
, src1
, src0
, count
);
654 * Scale by a factor of 2x4 a row of pixels of 16 bits.
655 * \note Like scale2x_16_def();
657 void scale2x4_16_def(scale2x_uint16
* dst0
, scale2x_uint16
* dst1
, scale2x_uint16
* dst2
, scale2x_uint16
* dst3
, const scale2x_uint16
* src0
, const scale2x_uint16
* src1
, const scale2x_uint16
* src2
, unsigned count
)
659 #ifdef USE_SCALE_RANDOMWRITE
660 scale2x_16_def_whole(dst0
, dst3
, src0
, src1
, src2
, count
);
661 scale2x_16_def_center(dst1
, src0
, src1
, src2
, count
);
662 scale2x_16_def_center(dst2
, src0
, src1
, src2
, count
);
664 scale2x_16_def_border(dst0
, src0
, src1
, src2
, count
);
665 scale2x_16_def_center(dst1
, src0
, src1
, src2
, count
);
666 scale2x_16_def_center(dst2
, src0
, src1
, src2
, count
);
667 scale2x_16_def_border(dst3
, src2
, src1
, src0
, count
);
672 * Scale by a factor of 2x4 a row of pixels of 32 bits.
673 * \note Like scale2x_32_def();
675 void scale2x4_32_def(scale2x_uint32
* dst0
, scale2x_uint32
* dst1
, scale2x_uint32
* dst2
, scale2x_uint32
* dst3
, const scale2x_uint32
* src0
, const scale2x_uint32
* src1
, const scale2x_uint32
* src2
, unsigned count
)
677 #ifdef USE_SCALE_RANDOMWRITE
678 scale2x_32_def_whole(dst0
, dst3
, src0
, src1
, src2
, count
);
679 scale2x_32_def_center(dst1
, src0
, src1
, src2
, count
);
680 scale2x_32_def_center(dst2
, src0
, src1
, src2
, count
);
682 scale2x_32_def_border(dst0
, src0
, src1
, src2
, count
);
683 scale2x_32_def_center(dst1
, src0
, src1
, src2
, count
);
684 scale2x_32_def_center(dst2
, src0
, src1
, src2
, count
);
685 scale2x_32_def_border(dst3
, src2
, src1
, src0
, count
);
689 /***************************************************************************/
690 /* Scale2x MMX implementation */
692 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))
695 * Apply the Scale2x effect at a single row.
696 * This function must be called only by the other scale2x functions.
698 * Considering the pixel map :
704 * this functions compute 2 new pixels in substitution of the source pixel E
709 * with these variables :
713 * ¤t_right -> F
714 * ¤t_upper -> B
715 * ¤t_lower -> H
717 * %0 -> current_upper
719 * %2 -> current_lower
723 * %mm0 -> *current_left
724 * %mm1 -> *current_next
729 * %mm6 -> *current_upper
732 static inline void scale2x_8_mmx_border(scale2x_uint8
* dst
, const scale2x_uint8
* src0
, const scale2x_uint8
* src1
, const scale2x_uint8
* src2
, unsigned count
)
735 assert(count
% 8 == 0);
737 /* always do the first and last run */
740 __asm__
__volatile__(
742 /* set the current, current_pre, current_next registers */
743 "movq 0(%1), %%mm0\n"
744 "movq 0(%1), %%mm7\n"
745 "movq 8(%1), %%mm1\n"
749 "movq %%mm7, %%mm2\n"
750 "movq %%mm7, %%mm3\n"
759 /* compute the upper-left pixel for dst on %%mm2 */
760 /* compute the upper-right pixel for dst on %%mm4 */
761 "movq %%mm0, %%mm2\n"
762 "movq %%mm1, %%mm4\n"
763 "movq %%mm0, %%mm3\n"
764 "movq %%mm1, %%mm5\n"
765 "pcmpeqb %%mm6, %%mm2\n"
766 "pcmpeqb %%mm6, %%mm4\n"
767 "pcmpeqb (%2), %%mm3\n"
768 "pcmpeqb (%2), %%mm5\n"
769 "pandn %%mm2, %%mm3\n"
770 "pandn %%mm4, %%mm5\n"
771 "movq %%mm0, %%mm2\n"
772 "movq %%mm1, %%mm4\n"
773 "pcmpeqb %%mm1, %%mm2\n"
774 "pcmpeqb %%mm0, %%mm4\n"
775 "pandn %%mm3, %%mm2\n"
776 "pandn %%mm5, %%mm4\n"
777 "movq %%mm2, %%mm3\n"
778 "movq %%mm4, %%mm5\n"
779 "pand %%mm6, %%mm2\n"
780 "pand %%mm6, %%mm4\n"
781 "pandn %%mm7, %%mm3\n"
782 "pandn %%mm7, %%mm5\n"
787 "movq %%mm2, %%mm3\n"
788 "punpcklbw %%mm4, %%mm2\n"
789 "punpckhbw %%mm4, %%mm3\n"
791 "movq %%mm3, 8(%3)\n"
805 /* set the current, current_pre, current_next registers */
806 "movq -8(%1), %%mm0\n"
808 "movq 8(%1), %%mm1\n"
811 "movq %%mm7, %%mm2\n"
812 "movq %%mm7, %%mm3\n"
821 /* compute the upper-left pixel for dst on %%mm2 */
822 /* compute the upper-right pixel for dst on %%mm4 */
823 "movq %%mm0, %%mm2\n"
824 "movq %%mm1, %%mm4\n"
825 "movq %%mm0, %%mm3\n"
826 "movq %%mm1, %%mm5\n"
827 "pcmpeqb %%mm6, %%mm2\n"
828 "pcmpeqb %%mm6, %%mm4\n"
829 "pcmpeqb (%2), %%mm3\n"
830 "pcmpeqb (%2), %%mm5\n"
831 "pandn %%mm2, %%mm3\n"
832 "pandn %%mm4, %%mm5\n"
833 "movq %%mm0, %%mm2\n"
834 "movq %%mm1, %%mm4\n"
835 "pcmpeqb %%mm1, %%mm2\n"
836 "pcmpeqb %%mm0, %%mm4\n"
837 "pandn %%mm3, %%mm2\n"
838 "pandn %%mm5, %%mm4\n"
839 "movq %%mm2, %%mm3\n"
840 "movq %%mm4, %%mm5\n"
841 "pand %%mm6, %%mm2\n"
842 "pand %%mm6, %%mm4\n"
843 "pandn %%mm7, %%mm3\n"
844 "pandn %%mm7, %%mm5\n"
849 "movq %%mm2, %%mm3\n"
850 "punpcklbw %%mm4, %%mm2\n"
851 "punpckhbw %%mm4, %%mm3\n"
853 "movq %%mm3, 8(%3)\n"
866 /* set the current, current_pre, current_next registers */
869 "movq -8(%1), %%mm0\n"
873 "movq %%mm7, %%mm2\n"
874 "movq %%mm7, %%mm3\n"
883 /* compute the upper-left pixel for dst on %%mm2 */
884 /* compute the upper-right pixel for dst on %%mm4 */
885 "movq %%mm0, %%mm2\n"
886 "movq %%mm1, %%mm4\n"
887 "movq %%mm0, %%mm3\n"
888 "movq %%mm1, %%mm5\n"
889 "pcmpeqb %%mm6, %%mm2\n"
890 "pcmpeqb %%mm6, %%mm4\n"
891 "pcmpeqb (%2), %%mm3\n"
892 "pcmpeqb (%2), %%mm5\n"
893 "pandn %%mm2, %%mm3\n"
894 "pandn %%mm4, %%mm5\n"
895 "movq %%mm0, %%mm2\n"
896 "movq %%mm1, %%mm4\n"
897 "pcmpeqb %%mm1, %%mm2\n"
898 "pcmpeqb %%mm0, %%mm4\n"
899 "pandn %%mm3, %%mm2\n"
900 "pandn %%mm5, %%mm4\n"
901 "movq %%mm2, %%mm3\n"
902 "movq %%mm4, %%mm5\n"
903 "pand %%mm6, %%mm2\n"
904 "pand %%mm6, %%mm4\n"
905 "pandn %%mm7, %%mm3\n"
906 "pandn %%mm7, %%mm5\n"
911 "movq %%mm2, %%mm3\n"
912 "punpcklbw %%mm4, %%mm2\n"
913 "punpckhbw %%mm4, %%mm3\n"
915 "movq %%mm3, 8(%3)\n"
917 : "+r" (src0
), "+r" (src1
), "+r" (src2
), "+r" (dst
), "+r" (count
)
923 static inline void scale2x_16_mmx_border(scale2x_uint16
* dst
, const scale2x_uint16
* src0
, const scale2x_uint16
* src1
, const scale2x_uint16
* src2
, unsigned count
)
926 assert(count
% 4 == 0);
928 /* always do the first and last run */
931 __asm__
__volatile__(
933 /* set the current, current_pre, current_next registers */
934 "movq 0(%1), %%mm0\n"
935 "movq 0(%1), %%mm7\n"
936 "movq 8(%1), %%mm1\n"
940 "movq %%mm7, %%mm2\n"
941 "movq %%mm7, %%mm3\n"
950 /* compute the upper-left pixel for dst on %%mm2 */
951 /* compute the upper-right pixel for dst on %%mm4 */
952 "movq %%mm0, %%mm2\n"
953 "movq %%mm1, %%mm4\n"
954 "movq %%mm0, %%mm3\n"
955 "movq %%mm1, %%mm5\n"
956 "pcmpeqw %%mm6, %%mm2\n"
957 "pcmpeqw %%mm6, %%mm4\n"
958 "pcmpeqw (%2), %%mm3\n"
959 "pcmpeqw (%2), %%mm5\n"
960 "pandn %%mm2, %%mm3\n"
961 "pandn %%mm4, %%mm5\n"
962 "movq %%mm0, %%mm2\n"
963 "movq %%mm1, %%mm4\n"
964 "pcmpeqw %%mm1, %%mm2\n"
965 "pcmpeqw %%mm0, %%mm4\n"
966 "pandn %%mm3, %%mm2\n"
967 "pandn %%mm5, %%mm4\n"
968 "movq %%mm2, %%mm3\n"
969 "movq %%mm4, %%mm5\n"
970 "pand %%mm6, %%mm2\n"
971 "pand %%mm6, %%mm4\n"
972 "pandn %%mm7, %%mm3\n"
973 "pandn %%mm7, %%mm5\n"
978 "movq %%mm2, %%mm3\n"
979 "punpcklwd %%mm4, %%mm2\n"
980 "punpckhwd %%mm4, %%mm3\n"
982 "movq %%mm3, 8(%3)\n"
996 /* set the current, current_pre, current_next registers */
997 "movq -8(%1), %%mm0\n"
999 "movq 8(%1), %%mm1\n"
1000 "psrlq $48, %%mm0\n"
1001 "psllq $48, %%mm1\n"
1002 "movq %%mm7, %%mm2\n"
1003 "movq %%mm7, %%mm3\n"
1004 "psllq $16, %%mm2\n"
1005 "psrlq $16, %%mm3\n"
1006 "por %%mm2, %%mm0\n"
1007 "por %%mm3, %%mm1\n"
1010 "movq (%0), %%mm6\n"
1012 /* compute the upper-left pixel for dst on %%mm2 */
1013 /* compute the upper-right pixel for dst on %%mm4 */
1014 "movq %%mm0, %%mm2\n"
1015 "movq %%mm1, %%mm4\n"
1016 "movq %%mm0, %%mm3\n"
1017 "movq %%mm1, %%mm5\n"
1018 "pcmpeqw %%mm6, %%mm2\n"
1019 "pcmpeqw %%mm6, %%mm4\n"
1020 "pcmpeqw (%2), %%mm3\n"
1021 "pcmpeqw (%2), %%mm5\n"
1022 "pandn %%mm2, %%mm3\n"
1023 "pandn %%mm4, %%mm5\n"
1024 "movq %%mm0, %%mm2\n"
1025 "movq %%mm1, %%mm4\n"
1026 "pcmpeqw %%mm1, %%mm2\n"
1027 "pcmpeqw %%mm0, %%mm4\n"
1028 "pandn %%mm3, %%mm2\n"
1029 "pandn %%mm5, %%mm4\n"
1030 "movq %%mm2, %%mm3\n"
1031 "movq %%mm4, %%mm5\n"
1032 "pand %%mm6, %%mm2\n"
1033 "pand %%mm6, %%mm4\n"
1034 "pandn %%mm7, %%mm3\n"
1035 "pandn %%mm7, %%mm5\n"
1036 "por %%mm3, %%mm2\n"
1037 "por %%mm5, %%mm4\n"
1040 "movq %%mm2, %%mm3\n"
1041 "punpcklwd %%mm4, %%mm2\n"
1042 "punpckhwd %%mm4, %%mm3\n"
1043 "movq %%mm2, (%3)\n"
1044 "movq %%mm3, 8(%3)\n"
1057 /* set the current, current_pre, current_next registers */
1058 "movq (%1), %%mm1\n"
1059 "movq (%1), %%mm7\n"
1060 "movq -8(%1), %%mm0\n"
1061 "psrlq $48, %%mm1\n"
1062 "psrlq $48, %%mm0\n"
1063 "psllq $48, %%mm1\n"
1064 "movq %%mm7, %%mm2\n"
1065 "movq %%mm7, %%mm3\n"
1066 "psllq $16, %%mm2\n"
1067 "psrlq $16, %%mm3\n"
1068 "por %%mm2, %%mm0\n"
1069 "por %%mm3, %%mm1\n"
1072 "movq (%0), %%mm6\n"
1074 /* compute the upper-left pixel for dst on %%mm2 */
1075 /* compute the upper-right pixel for dst on %%mm4 */
1076 "movq %%mm0, %%mm2\n"
1077 "movq %%mm1, %%mm4\n"
1078 "movq %%mm0, %%mm3\n"
1079 "movq %%mm1, %%mm5\n"
1080 "pcmpeqw %%mm6, %%mm2\n"
1081 "pcmpeqw %%mm6, %%mm4\n"
1082 "pcmpeqw (%2), %%mm3\n"
1083 "pcmpeqw (%2), %%mm5\n"
1084 "pandn %%mm2, %%mm3\n"
1085 "pandn %%mm4, %%mm5\n"
1086 "movq %%mm0, %%mm2\n"
1087 "movq %%mm1, %%mm4\n"
1088 "pcmpeqw %%mm1, %%mm2\n"
1089 "pcmpeqw %%mm0, %%mm4\n"
1090 "pandn %%mm3, %%mm2\n"
1091 "pandn %%mm5, %%mm4\n"
1092 "movq %%mm2, %%mm3\n"
1093 "movq %%mm4, %%mm5\n"
1094 "pand %%mm6, %%mm2\n"
1095 "pand %%mm6, %%mm4\n"
1096 "pandn %%mm7, %%mm3\n"
1097 "pandn %%mm7, %%mm5\n"
1098 "por %%mm3, %%mm2\n"
1099 "por %%mm5, %%mm4\n"
1102 "movq %%mm2, %%mm3\n"
1103 "punpcklwd %%mm4, %%mm2\n"
1104 "punpckhwd %%mm4, %%mm3\n"
1105 "movq %%mm2, (%3)\n"
1106 "movq %%mm3, 8(%3)\n"
1108 : "+r" (src0
), "+r" (src1
), "+r" (src2
), "+r" (dst
), "+r" (count
)
1114 static inline void scale2x_32_mmx_border(scale2x_uint32
* dst
, const scale2x_uint32
* src0
, const scale2x_uint32
* src1
, const scale2x_uint32
* src2
, unsigned count
)
1117 assert(count
% 2 == 0);
1119 /* always do the first and last run */
1122 __asm__
__volatile__(
1124 /* set the current, current_pre, current_next registers */
1125 "movq 0(%1), %%mm0\n"
1126 "movq 0(%1), %%mm7\n"
1127 "movq 8(%1), %%mm1\n"
1128 "psllq $32, %%mm0\n"
1129 "psllq $32, %%mm1\n"
1130 "psrlq $32, %%mm0\n"
1131 "movq %%mm7, %%mm2\n"
1132 "movq %%mm7, %%mm3\n"
1133 "psllq $32, %%mm2\n"
1134 "psrlq $32, %%mm3\n"
1135 "por %%mm2, %%mm0\n"
1136 "por %%mm3, %%mm1\n"
1139 "movq (%0), %%mm6\n"
1141 /* compute the upper-left pixel for dst on %%mm2 */
1142 /* compute the upper-right pixel for dst on %%mm4 */
1143 "movq %%mm0, %%mm2\n"
1144 "movq %%mm1, %%mm4\n"
1145 "movq %%mm0, %%mm3\n"
1146 "movq %%mm1, %%mm5\n"
1147 "pcmpeqd %%mm6, %%mm2\n"
1148 "pcmpeqd %%mm6, %%mm4\n"
1149 "pcmpeqd (%2), %%mm3\n"
1150 "pcmpeqd (%2), %%mm5\n"
1151 "pandn %%mm2, %%mm3\n"
1152 "pandn %%mm4, %%mm5\n"
1153 "movq %%mm0, %%mm2\n"
1154 "movq %%mm1, %%mm4\n"
1155 "pcmpeqd %%mm1, %%mm2\n"
1156 "pcmpeqd %%mm0, %%mm4\n"
1157 "pandn %%mm3, %%mm2\n"
1158 "pandn %%mm5, %%mm4\n"
1159 "movq %%mm2, %%mm3\n"
1160 "movq %%mm4, %%mm5\n"
1161 "pand %%mm6, %%mm2\n"
1162 "pand %%mm6, %%mm4\n"
1163 "pandn %%mm7, %%mm3\n"
1164 "pandn %%mm7, %%mm5\n"
1165 "por %%mm3, %%mm2\n"
1166 "por %%mm5, %%mm4\n"
1169 "movq %%mm2, %%mm3\n"
1170 "punpckldq %%mm4, %%mm2\n"
1171 "punpckhdq %%mm4, %%mm3\n"
1172 "movq %%mm2, (%3)\n"
1173 "movq %%mm3, 8(%3)\n"
1187 /* set the current, current_pre, current_next registers */
1188 "movq -8(%1), %%mm0\n"
1189 "movq (%1), %%mm7\n"
1190 "movq 8(%1), %%mm1\n"
1191 "psrlq $32, %%mm0\n"
1192 "psllq $32, %%mm1\n"
1193 "movq %%mm7, %%mm2\n"
1194 "movq %%mm7, %%mm3\n"
1195 "psllq $32, %%mm2\n"
1196 "psrlq $32, %%mm3\n"
1197 "por %%mm2, %%mm0\n"
1198 "por %%mm3, %%mm1\n"
1201 "movq (%0), %%mm6\n"
1203 /* compute the upper-left pixel for dst on %%mm2 */
1204 /* compute the upper-right pixel for dst on %%mm4 */
1205 "movq %%mm0, %%mm2\n"
1206 "movq %%mm1, %%mm4\n"
1207 "movq %%mm0, %%mm3\n"
1208 "movq %%mm1, %%mm5\n"
1209 "pcmpeqd %%mm6, %%mm2\n"
1210 "pcmpeqd %%mm6, %%mm4\n"
1211 "pcmpeqd (%2), %%mm3\n"
1212 "pcmpeqd (%2), %%mm5\n"
1213 "pandn %%mm2, %%mm3\n"
1214 "pandn %%mm4, %%mm5\n"
1215 "movq %%mm0, %%mm2\n"
1216 "movq %%mm1, %%mm4\n"
1217 "pcmpeqd %%mm1, %%mm2\n"
1218 "pcmpeqd %%mm0, %%mm4\n"
1219 "pandn %%mm3, %%mm2\n"
1220 "pandn %%mm5, %%mm4\n"
1221 "movq %%mm2, %%mm3\n"
1222 "movq %%mm4, %%mm5\n"
1223 "pand %%mm6, %%mm2\n"
1224 "pand %%mm6, %%mm4\n"
1225 "pandn %%mm7, %%mm3\n"
1226 "pandn %%mm7, %%mm5\n"
1227 "por %%mm3, %%mm2\n"
1228 "por %%mm5, %%mm4\n"
1231 "movq %%mm2, %%mm3\n"
1232 "punpckldq %%mm4, %%mm2\n"
1233 "punpckhdq %%mm4, %%mm3\n"
1234 "movq %%mm2, (%3)\n"
1235 "movq %%mm3, 8(%3)\n"
1248 /* set the current, current_pre, current_next registers */
1249 "movq (%1), %%mm1\n"
1250 "movq (%1), %%mm7\n"
1251 "movq -8(%1), %%mm0\n"
1252 "psrlq $32, %%mm1\n"
1253 "psrlq $32, %%mm0\n"
1254 "psllq $32, %%mm1\n"
1255 "movq %%mm7, %%mm2\n"
1256 "movq %%mm7, %%mm3\n"
1257 "psllq $32, %%mm2\n"
1258 "psrlq $32, %%mm3\n"
1259 "por %%mm2, %%mm0\n"
1260 "por %%mm3, %%mm1\n"
1263 "movq (%0), %%mm6\n"
1265 /* compute the upper-left pixel for dst on %%mm2 */
1266 /* compute the upper-right pixel for dst on %%mm4 */
1267 "movq %%mm0, %%mm2\n"
1268 "movq %%mm1, %%mm4\n"
1269 "movq %%mm0, %%mm3\n"
1270 "movq %%mm1, %%mm5\n"
1271 "pcmpeqd %%mm6, %%mm2\n"
1272 "pcmpeqd %%mm6, %%mm4\n"
1273 "pcmpeqd (%2), %%mm3\n"
1274 "pcmpeqd (%2), %%mm5\n"
1275 "pandn %%mm2, %%mm3\n"
1276 "pandn %%mm4, %%mm5\n"
1277 "movq %%mm0, %%mm2\n"
1278 "movq %%mm1, %%mm4\n"
1279 "pcmpeqd %%mm1, %%mm2\n"
1280 "pcmpeqd %%mm0, %%mm4\n"
1281 "pandn %%mm3, %%mm2\n"
1282 "pandn %%mm5, %%mm4\n"
1283 "movq %%mm2, %%mm3\n"
1284 "movq %%mm4, %%mm5\n"
1285 "pand %%mm6, %%mm2\n"
1286 "pand %%mm6, %%mm4\n"
1287 "pandn %%mm7, %%mm3\n"
1288 "pandn %%mm7, %%mm5\n"
1289 "por %%mm3, %%mm2\n"
1290 "por %%mm5, %%mm4\n"
1293 "movq %%mm2, %%mm3\n"
1294 "punpckldq %%mm4, %%mm2\n"
1295 "punpckhdq %%mm4, %%mm3\n"
1296 "movq %%mm2, (%3)\n"
1297 "movq %%mm3, 8(%3)\n"
1299 : "+r" (src0
), "+r" (src1
), "+r" (src2
), "+r" (dst
), "+r" (count
)
1306 * Scale by a factor of 2 a row of pixels of 8 bits.
1307 * This is a very fast MMX implementation.
1308 * The implementation uses a combination of cmp/and/not operations to
1309 * completly remove the need of conditional jumps. This trick give the
1310 * major speed improvement.
1311 * Also, using the 8 bytes MMX registers more than one pixel are computed
1313 * Before calling this function you must ensure that the currenct CPU supports
1314 * the MMX instruction set. After calling it you must be sure to call the EMMS
1315 * instruction before any floating-point operation.
1316 * The pixels over the left and right borders are assumed of the same color of
1317 * the pixels on the border.
1318 * Note that the implementation is optimized to write data sequentially to
1319 * maximize the bandwidth on video memory.
1320 * \param src0 Pointer at the first pixel of the previous row.
1321 * \param src1 Pointer at the first pixel of the current row.
1322 * \param src2 Pointer at the first pixel of the next row.
1323 * \param count Length in pixels of the src0, src1 and src2 rows. It must
1324 * be at least 16 and a multiple of 8.
1325 * \param dst0 First destination row, double length in pixels.
1326 * \param dst1 Second destination row, double length in pixels.
1328 void scale2x_8_mmx(scale2x_uint8
* dst0
, scale2x_uint8
* dst1
, const scale2x_uint8
* src0
, const scale2x_uint8
* src1
, const scale2x_uint8
* src2
, unsigned count
)
1330 if (count
% 8 != 0 || count
< 16) {
1331 scale2x_8_def(dst0
, dst1
, src0
, src1
, src2
, count
);
1333 scale2x_8_mmx_border(dst0
, src0
, src1
, src2
, count
);
1334 scale2x_8_mmx_border(dst1
, src2
, src1
, src0
, count
);
1339 * Scale by a factor of 2 a row of pixels of 16 bits.
1340 * This function operates like scale2x_8_mmx() but for 16 bits pixels.
1341 * \param src0 Pointer at the first pixel of the previous row.
1342 * \param src1 Pointer at the first pixel of the current row.
1343 * \param src2 Pointer at the first pixel of the next row.
1344 * \param count Length in pixels of the src0, src1 and src2 rows. It must
1345 * be at least 8 and a multiple of 4.
1346 * \param dst0 First destination row, double length in pixels.
1347 * \param dst1 Second destination row, double length in pixels.
1349 void scale2x_16_mmx(scale2x_uint16
* dst0
, scale2x_uint16
* dst1
, const scale2x_uint16
* src0
, const scale2x_uint16
* src1
, const scale2x_uint16
* src2
, unsigned count
)
1351 if (count
% 4 != 0 || count
< 8) {
1352 scale2x_16_def(dst0
, dst1
, src0
, src1
, src2
, count
);
1354 scale2x_16_mmx_border(dst0
, src0
, src1
, src2
, count
);
1355 scale2x_16_mmx_border(dst1
, src2
, src1
, src0
, count
);
1360 * Scale by a factor of 2 a row of pixels of 32 bits.
1361 * This function operates like scale2x_8_mmx() but for 32 bits pixels.
1362 * \param src0 Pointer at the first pixel of the previous row.
1363 * \param src1 Pointer at the first pixel of the current row.
1364 * \param src2 Pointer at the first pixel of the next row.
1365 * \param count Length in pixels of the src0, src1 and src2 rows. It must
1366 * be at least 4 and a multiple of 2.
1367 * \param dst0 First destination row, double length in pixels.
1368 * \param dst1 Second destination row, double length in pixels.
1370 void scale2x_32_mmx(scale2x_uint32
* dst0
, scale2x_uint32
* dst1
, const scale2x_uint32
* src0
, const scale2x_uint32
* src1
, const scale2x_uint32
* src2
, unsigned count
)
1372 if (count
% 2 != 0 || count
< 4) {
1373 scale2x_32_def(dst0
, dst1
, src0
, src1
, src2
, count
);
1375 scale2x_32_mmx_border(dst0
, src0
, src1
, src2
, count
);
1376 scale2x_32_mmx_border(dst1
, src2
, src1
, src0
, count
);
1381 * Scale by a factor of 2x3 a row of pixels of 8 bits.
1382 * This function operates like scale2x_8_mmx() but with an expansion
1383 * factor of 2x3 instead of 2x2.
1385 void scale2x3_8_mmx(scale2x_uint8
* dst0
, scale2x_uint8
* dst1
, scale2x_uint8
* dst2
, const scale2x_uint8
* src0
, const scale2x_uint8
* src1
, const scale2x_uint8
* src2
, unsigned count
)
1387 if (count
% 8 != 0 || count
< 16) {
1388 scale2x3_8_def(dst0
, dst1
, dst2
, src0
, src1
, src2
, count
);
1390 scale2x_8_mmx_border(dst0
, src0
, src1
, src2
, count
);
1391 scale2x_8_def_center(dst1
, src0
, src1
, src2
, count
);
1392 scale2x_8_mmx_border(dst2
, src2
, src1
, src0
, count
);
1397 * Scale by a factor of 2x3 a row of pixels of 16 bits.
1398 * This function operates like scale2x_16_mmx() but with an expansion
1399 * factor of 2x3 instead of 2x2.
1401 void scale2x3_16_mmx(scale2x_uint16
* dst0
, scale2x_uint16
* dst1
, scale2x_uint16
* dst2
, const scale2x_uint16
* src0
, const scale2x_uint16
* src1
, const scale2x_uint16
* src2
, unsigned count
)
1403 if (count
% 4 != 0 || count
< 8) {
1404 scale2x3_16_def(dst0
, dst1
, dst2
, src0
, src1
, src2
, count
);
1406 scale2x_16_mmx_border(dst0
, src0
, src1
, src2
, count
);
1407 scale2x_16_def_center(dst1
, src0
, src1
, src2
, count
);
1408 scale2x_16_mmx_border(dst2
, src2
, src1
, src0
, count
);
1413 * Scale by a factor of 2x3 a row of pixels of 32 bits.
1414 * This function operates like scale2x_32_mmx() but with an expansion
1415 * factor of 2x3 instead of 2x2.
1417 void scale2x3_32_mmx(scale2x_uint32
* dst0
, scale2x_uint32
* dst1
, scale2x_uint32
* dst2
, const scale2x_uint32
* src0
, const scale2x_uint32
* src1
, const scale2x_uint32
* src2
, unsigned count
)
1419 if (count
% 2 != 0 || count
< 4) {
1420 scale2x3_32_def(dst0
, dst1
, dst2
, src0
, src1
, src2
, count
);
1422 scale2x_32_mmx_border(dst0
, src0
, src1
, src2
, count
);
1423 scale2x_32_def_center(dst1
, src0
, src1
, src2
, count
);
1424 scale2x_32_mmx_border(dst2
, src2
, src1
, src0
, count
);
1429 * Scale by a factor of 2x4 a row of pixels of 8 bits.
1430 * This function operates like scale2x_8_mmx() but with an expansion
1431 * factor of 2x4 instead of 2x2.
1433 void scale2x4_8_mmx(scale2x_uint8
* dst0
, scale2x_uint8
* dst1
, scale2x_uint8
* dst2
, scale2x_uint8
* dst3
, const scale2x_uint8
* src0
, const scale2x_uint8
* src1
, const scale2x_uint8
* src2
, unsigned count
)
1435 if (count
% 8 != 0 || count
< 16) {
1436 scale2x4_8_def(dst0
, dst1
, dst2
, dst3
, src0
, src1
, src2
, count
);
1438 scale2x_8_mmx_border(dst0
, src0
, src1
, src2
, count
);
1439 scale2x_8_def_center(dst1
, src0
, src1
, src2
, count
);
1440 scale2x_8_def_center(dst2
, src0
, src1
, src2
, count
);
1441 scale2x_8_mmx_border(dst3
, src2
, src1
, src0
, count
);
1446 * Scale by a factor of 2x4 a row of pixels of 16 bits.
1447 * This function operates like scale2x_16_mmx() but with an expansion
1448 * factor of 2x4 instead of 2x2.
1450 void scale2x4_16_mmx(scale2x_uint16
* dst0
, scale2x_uint16
* dst1
, scale2x_uint16
* dst2
, scale2x_uint16
* dst3
, const scale2x_uint16
* src0
, const scale2x_uint16
* src1
, const scale2x_uint16
* src2
, unsigned count
)
1452 if (count
% 4 != 0 || count
< 8) {
1453 scale2x4_16_def(dst0
, dst1
, dst2
, dst3
, src0
, src1
, src2
, count
);
1455 scale2x_16_mmx_border(dst0
, src0
, src1
, src2
, count
);
1456 scale2x_16_def_center(dst1
, src0
, src1
, src2
, count
);
1457 scale2x_16_def_center(dst2
, src0
, src1
, src2
, count
);
1458 scale2x_16_mmx_border(dst3
, src2
, src1
, src0
, count
);
1463 * Scale by a factor of 2x4 a row of pixels of 32 bits.
1464 * This function operates like scale2x_32_mmx() but with an expansion
1465 * factor of 2x4 instead of 2x2.
1467 void scale2x4_32_mmx(scale2x_uint32
* dst0
, scale2x_uint32
* dst1
, scale2x_uint32
* dst2
, scale2x_uint32
* dst3
, const scale2x_uint32
* src0
, const scale2x_uint32
* src1
, const scale2x_uint32
* src2
, unsigned count
)
1469 if (count
% 2 != 0 || count
< 4) {
1470 scale2x4_32_def(dst0
, dst1
, dst2
, dst3
, src0
, src1
, src2
, count
);
1472 scale2x_32_mmx_border(dst0
, src0
, src1
, src2
, count
);
1473 scale2x_32_def_center(dst1
, src0
, src1
, src2
, count
);
1474 scale2x_32_def_center(dst2
, src0
, src1
, src2
, count
);
1475 scale2x_32_mmx_border(dst3
, src2
, src1
, src0
, count
);