2 * This file is part of the Advance project.
4 * Copyright (C) 1999-2002 Andrea Mazzoleni
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 * In addition, as a special exception, Andrea Mazzoleni
21 * gives permission to link the code of this program with
22 * the MAME library (or with modified versions of MAME that use the
23 * same license as MAME), and distribute linked combinations including
24 * the two. You must obey the GNU General Public License in all
25 * respects for all of the code used other than MAME. If you modify
26 * this file, you may extend this exception to your version of the
27 * file, but you are not obligated to do so. If you do not wish to
28 * do so, delete this exception statement from your version.
32 * Alternatively at the previous license terms, you are allowed to use this
33 * code in your program with these conditions:
34 * - the program is not used in commercial activities.
35 * - the whole source code of the program is released with the binary.
41 extern "C" bool cpu_mmx
;
44 static void internal_scale2x_16_def(u16
*dst
, const u16
* src0
, const u16
* src1
, const u16
* src2
, unsigned count
) {
47 if (src1
[1] == src0
[0] && src2
[0] != src0
[0])
59 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
60 dst
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
61 dst
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
75 if (src1
[-1] == src0
[0] && src2
[0] != src0
[0])
82 static void internal_scale2x_32_def(u32
* dst
,
90 if (src1
[1] == src0
[0] && src2
[0] != src0
[0])
102 if (src0
[0] != src2
[0] && src1
[-1] != src1
[1]) {
103 dst
[0] = src1
[-1] == src0
[0] ? src0
[0] : src1
[0];
104 dst
[1] = src1
[1] == src0
[0] ? src0
[0] : src1
[0];
118 if (src1
[-1] == src0
[0] && src2
[0] != src0
[0])
126 static void internal_scale2x_16_mmx_single(u16
* dst
, const u16
* src0
, const u16
* src1
, const u16
* src2
, unsigned count
) {
127 /* always do the first and last run */
131 __asm__
__volatile__(
133 /* set the current, current_pre, current_next registers */
134 "movq 0(%1), %%mm0\n"
150 /* compute the upper-left pixel for dst on %%mm2 */
151 /* compute the upper-right pixel for dst on %%mm4 */
156 "pcmpeqw %%mm6,%%mm2\n"
157 "pcmpeqw %%mm6,%%mm4\n"
158 "pcmpeqw (%2),%%mm3\n"
159 "pcmpeqw (%2),%%mm5\n"
160 "pandn %%mm2,%%mm3\n"
161 "pandn %%mm4,%%mm5\n"
164 "pcmpeqw %%mm1,%%mm2\n"
165 "pcmpeqw %%mm0,%%mm4\n"
166 "pandn %%mm3,%%mm2\n"
167 "pandn %%mm5,%%mm4\n"
172 "pandn %%mm7,%%mm3\n"
173 "pandn %%mm7,%%mm5\n"
179 "punpcklwd %%mm4,%%mm2\n"
180 "punpckhwd %%mm4,%%mm3\n"
196 /* set the current, current_pre, current_next registers */
197 "movq -8(%1),%%mm0\n"
212 /* compute the upper-left pixel for dst on %%mm2 */
213 /* compute the upper-right pixel for dst on %%mm4 */
218 "pcmpeqw %%mm6,%%mm2\n"
219 "pcmpeqw %%mm6,%%mm4\n"
220 "pcmpeqw (%2),%%mm3\n"
221 "pcmpeqw (%2),%%mm5\n"
222 "pandn %%mm2,%%mm3\n"
223 "pandn %%mm4,%%mm5\n"
226 "pcmpeqw %%mm1,%%mm2\n"
227 "pcmpeqw %%mm0,%%mm4\n"
228 "pandn %%mm3,%%mm2\n"
229 "pandn %%mm5,%%mm4\n"
234 "pandn %%mm7,%%mm3\n"
235 "pandn %%mm7,%%mm5\n"
241 "punpcklwd %%mm4,%%mm2\n"
242 "punpckhwd %%mm4,%%mm3\n"
257 /* set the current, current_pre, current_next registers */
260 "movq -8(%1),%%mm0\n"
274 /* compute the upper-left pixel for dst on %%mm2 */
275 /* compute the upper-right pixel for dst on %%mm4 */
280 "pcmpeqw %%mm6,%%mm2\n"
281 "pcmpeqw %%mm6,%%mm4\n"
282 "pcmpeqw (%2),%%mm3\n"
283 "pcmpeqw (%2),%%mm5\n"
284 "pandn %%mm2,%%mm3\n"
285 "pandn %%mm4,%%mm5\n"
288 "pcmpeqw %%mm1,%%mm2\n"
289 "pcmpeqw %%mm0,%%mm4\n"
290 "pandn %%mm3,%%mm2\n"
291 "pandn %%mm5,%%mm4\n"
296 "pandn %%mm7,%%mm3\n"
297 "pandn %%mm7,%%mm5\n"
303 "punpcklwd %%mm4,%%mm2\n"
304 "punpckhwd %%mm4,%%mm3\n"
309 : "+r" (src0
), "+r" (src1
), "+r" (src2
), "+r" (dst
), "+r" (count
)
322 /* set the current, current_pre, current_next registers */
323 movq mm0
, qword ptr
[ebx
];
324 movq mm7
, qword ptr
[ebx
];
325 movq mm1
, qword ptr
[ebx
+ 8];
337 movq mm6
, qword ptr
[eax
];
339 /* compute the upper-left pixel for dst on %%mm2 */
340 /* compute the upper-right pixel for dst on %%mm4 */
347 pcmpeqw mm3
, qword ptr
[ecx
];
348 pcmpeqw mm5
, qword ptr
[ecx
];
370 movq qword ptr
[edx
], mm2
;
371 movq qword ptr
[edx
+ 8], mm3
;
385 /* set the current, current_pre, current_next registers */
386 movq mm0
, qword ptr
[ebx
-8];
387 movq mm7
, qword ptr
[ebx
];
388 movq mm1
, qword ptr
[ebx
+8];
399 movq mm6
, qword ptr
[eax
];
401 /* compute the upper-left pixel for dst on %%mm2 */
402 /* compute the upper-right pixel for dst on %%mm4 */
409 pcmpeqw mm3
, qword ptr
[ecx
];
410 pcmpeqw mm5
, qword ptr
[ecx
];
432 movq qword ptr
[edx
], mm2
;
433 movq qword ptr
[edx
+8], mm3
;
446 /* set the current, current_pre, current_next registers */
447 movq mm1
, qword ptr
[ebx
];
448 movq mm7
, qword ptr
[ebx
];
449 movq mm0
, qword ptr
[ebx
-8];
461 movq mm6
, qword ptr
[eax
];
463 /* compute the upper-left pixel for dst on %%mm2 */
464 /* compute the upper-right pixel for dst on %%mm4 */
471 pcmpeqw mm3
, qword ptr
[ecx
];
472 pcmpeqw mm5
, qword ptr
[ecx
];
494 movq qword ptr
[edx
], mm2
;
495 movq qword ptr
[edx
+8], mm3
;
508 static void internal_scale2x_32_mmx_single(u32
* dst
, const u32
* src0
, const u32
* src1
, const u32
* src2
, unsigned count
) {
509 /* always do the first and last run */
513 __asm__
__volatile__(
515 /* set the current, current_pre, current_next registers */
532 /* compute the upper-left pixel for dst on %%mm2 */
533 /* compute the upper-right pixel for dst on %%mm4 */
538 "pcmpeqd %%mm6,%%mm2\n"
539 "pcmpeqd %%mm6,%%mm4\n"
540 "pcmpeqd (%2),%%mm3\n"
541 "pcmpeqd (%2),%%mm5\n"
542 "pandn %%mm2,%%mm3\n"
543 "pandn %%mm4,%%mm5\n"
546 "pcmpeqd %%mm1,%%mm2\n"
547 "pcmpeqd %%mm0,%%mm4\n"
548 "pandn %%mm3,%%mm2\n"
549 "pandn %%mm5,%%mm4\n"
554 "pandn %%mm7,%%mm3\n"
555 "pandn %%mm7,%%mm5\n"
561 "punpckldq %%mm4,%%mm2\n"
562 "punpckhdq %%mm4,%%mm3\n"
564 "movq %%mm3, 8(%3)\n"
578 /* set the current, current_pre, current_next registers */
579 "movq -8(%1),%%mm0\n"
594 /* compute the upper-left pixel for dst on %%mm2 */
595 /* compute the upper-right pixel for dst on %%mm4 */
600 "pcmpeqd %%mm6,%%mm2\n"
601 "pcmpeqd %%mm6,%%mm4\n"
602 "pcmpeqd (%2),%%mm3\n"
603 "pcmpeqd (%2),%%mm5\n"
604 "pandn %%mm2,%%mm3\n"
605 "pandn %%mm4,%%mm5\n"
608 "pcmpeqd %%mm1,%%mm2\n"
609 "pcmpeqd %%mm0,%%mm4\n"
610 "pandn %%mm3,%%mm2\n"
611 "pandn %%mm5,%%mm4\n"
616 "pandn %%mm7,%%mm3\n"
617 "pandn %%mm7,%%mm5\n"
623 "punpckldq %%mm4,%%mm2\n"
624 "punpckhdq %%mm4,%%mm3\n"
639 /* set the current, current_pre, current_next registers */
642 "movq -8(%1), %%mm0\n"
656 /* compute the upper-left pixel for dst on %%mm2 */
657 /* compute the upper-right pixel for dst on %%mm4 */
662 "pcmpeqd %%mm6,%%mm2\n"
663 "pcmpeqd %%mm6,%%mm4\n"
664 "pcmpeqd (%2),%%mm3\n"
665 "pcmpeqd (%2),%%mm5\n"
666 "pandn %%mm2,%%mm3\n"
667 "pandn %%mm4,%%mm5\n"
670 "pcmpeqd %%mm1,%%mm2\n"
671 "pcmpeqd %%mm0,%%mm4\n"
672 "pandn %%mm3,%%mm2\n"
673 "pandn %%mm5,%%mm4\n"
678 "pandn %%mm7,%%mm3\n"
679 "pandn %%mm7,%%mm5\n"
685 "punpckldq %%mm4,%%mm2\n"
686 "punpckhdq %%mm4,%%mm3\n"
691 : "+r" (src0
), "+r" (src1
), "+r" (src2
), "+r" (dst
), "+r" (count
)
704 /* set the current, current_pre, current_next registers */
705 movq mm0
,qword ptr
[ebx
];
706 movq mm7
,qword ptr
[ebx
];
707 movq mm1
,qword ptr
[ebx
+ 8];
719 movq mm6
,qword ptr
[eax
];
721 /* compute the upper-left pixel for dst on %%mm2 */
722 /* compute the upper-right pixel for dst on %%mm4 */
729 pcmpeqd mm3
,qword ptr
[ecx
];
730 pcmpeqd mm5
,qword ptr
[ecx
];
752 movq qword ptr
[edx
],mm2
;
753 movq qword ptr
[edx
+8],mm3
;
766 /* set the current, current_pre, current_next registers */
767 movq mm0
,qword ptr
[ebx
-8];
768 movq mm7
,qword ptr
[ebx
];
769 movq mm1
,qword ptr
[ebx
+8];
780 movq mm6
,qword ptr
[eax
];
782 /* compute the upper-left pixel for dst on %%mm2 */
783 /* compute the upper-right pixel for dst on %%mm4 */
790 pcmpeqd mm3
,qword ptr
[ecx
];
791 pcmpeqd mm5
,qword ptr
[ecx
];
813 movq qword ptr
[edx
],mm2
;
814 movq qword ptr
[edx
+8],mm3
;
827 /* set the current, current_pre, current_next registers */
828 movq mm1
,qword ptr
[ebx
];
829 movq mm7
,qword ptr
[ebx
];
830 movq mm0
,qword ptr
[ebx
-8];
842 movq mm6
,qword ptr
[eax
];
844 /* compute the upper-left pixel for dst on %%mm2 */
845 /* compute the upper-right pixel for dst on %%mm4 */
852 pcmpeqd mm3
,qword ptr
[ecx
];
853 pcmpeqd mm5
,qword ptr
[ecx
];
875 movq qword ptr
[edx
],mm2
;
876 movq qword ptr
[edx
+8],mm3
;
889 static void internal_scale2x_16_mmx(u16
* dst0
, u16
* dst1
, const u16
* src0
, const u16
* src1
, const u16
* src2
, unsigned count
) {
890 // assert( count >= 2*4 );
891 internal_scale2x_16_mmx_single(dst0
, src0
, src1
, src2
, count
);
892 internal_scale2x_16_mmx_single(dst1
, src2
, src1
, src0
, count
);
895 static void internal_scale2x_32_mmx(u32
* dst0
, u32
* dst1
, const u32
* src0
, const u32
* src1
, const u32
* src2
, unsigned count
) {
896 // assert( count >= 2*2 );
897 internal_scale2x_32_mmx_single(dst0
, src0
, src1
, src2
, count
);
898 internal_scale2x_32_mmx_single(dst1
, src2
, src1
, src0
, count
);
902 void AdMame2x(u8
*srcPtr
, u32 srcPitch
, u8
* unused
/* deltaPtr */,
903 u8
*dstPtr
, u32 dstPitch
, int width
, int height
)
905 u16
*dst0
= (u16
*)dstPtr
;
906 u16
*dst1
= dst0
+ (dstPitch
>> 1);
908 u16
*src0
= (u16
*)srcPtr
;
909 u16
*src1
= src0
+ (srcPitch
>> 1);
910 u16
*src2
= src1
+ (srcPitch
>> 1);
913 internal_scale2x_16_mmx(dst0
, dst1
, src0
, src0
, src1
, width
);
921 internal_scale2x_16_mmx(dst0
, dst1
, src0
, src1
, src2
, width
);
924 src2
+= srcPitch
>> 1;
929 internal_scale2x_16_mmx(dst0
, dst1
, src0
, src1
, src1
, width
);
932 internal_scale2x_16_def(dst0
, src0
, src0
, src1
, width
);
933 internal_scale2x_16_def(dst1
, src1
, src0
, src0
, width
);
941 internal_scale2x_16_def(dst0
, src0
, src1
, src2
, width
);
942 internal_scale2x_16_def(dst1
, src2
, src1
, src0
, width
);
945 src2
+= srcPitch
>> 1;
950 internal_scale2x_16_def(dst0
, src0
, src1
, src1
, width
);
951 internal_scale2x_16_def(dst1
, src1
, src1
, src0
, width
);
957 void AdMame2x32(u8
*srcPtr
, u32 srcPitch
, u8
* unused
/* deltaPtr */,
958 u8
*dstPtr
, u32 dstPitch
, int width
, int height
)
960 u32
*dst0
= (u32
*)dstPtr
;
961 u32
*dst1
= dst0
+ (dstPitch
>> 2);
963 u32
*src0
= (u32
*)srcPtr
;
964 u32
*src1
= src0
+ (srcPitch
>> 2);
965 u32
*src2
= src1
+ (srcPitch
>> 2);
968 internal_scale2x_32_mmx(dst0
, dst1
, src0
, src0
, src1
, width
);
974 dst0
+= dstPitch
>> 1;
975 dst1
+= dstPitch
>> 1;
976 internal_scale2x_32_mmx(dst0
, dst1
, src0
, src1
, src2
, width
);
979 src2
+= srcPitch
>> 2;
982 dst0
+= dstPitch
>> 1;
983 dst1
+= dstPitch
>> 1;
984 internal_scale2x_32_mmx(dst0
, dst1
, src0
, src1
, src1
, width
);
987 internal_scale2x_32_def(dst0
, src0
, src0
, src1
, width
);
988 internal_scale2x_32_def(dst1
, src1
, src0
, src0
, width
);
994 dst0
+= dstPitch
>> 1;
995 dst1
+= dstPitch
>> 1;
996 internal_scale2x_32_def(dst0
, src0
, src1
, src2
, width
);
997 internal_scale2x_32_def(dst1
, src2
, src1
, src0
, width
);
1000 src2
+= srcPitch
>> 2;
1003 dst0
+= dstPitch
>> 1;
1004 dst1
+= dstPitch
>> 1;
1005 internal_scale2x_32_def(dst0
, src0
, src1
, src1
, width
);
1006 internal_scale2x_32_def(dst1
, src1
, src1
, src0
, width
);