1 // VisualBoyAdvance - Nintendo Gameboy/GameboyAdvance (TM) emulator.
2 // Copyright (C) 1999-2003 Forgotten
3 // Copyright (C) 2004 Forgotten and the VBA development team
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2, or(at your option)
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
24 extern "C" bool cpu_mmx
;
28 * Thanks to Kawaks' Mr. K for the code
30 Incorporated into vba by Anthony Di Franco
33 static u8
*frm1
= NULL
;
34 static u8
*frm2
= NULL
;
35 static u8
*frm3
= NULL
;
37 extern int RGB_LOW_BITS_MASK
;
38 extern u32 qRGB_COLOR_MASK
[2];
42 frm1
= (u8
*)calloc(322*242,4);
44 frm2
= (u8
*)calloc(322*242,4);
46 frm3
= (u8
*)calloc(322*242,4);
50 void InterframeCleanup()
58 frm1
= frm2
= frm3
= NULL
;
62 static void SmartIB_MMX(u8
*srcPtr
, u32 srcPitch
, int width
, int height
)
64 u16
*src0
= (u16
*)srcPtr
;
65 u16
*src1
= (u16
*)frm1
;
66 u16
*src2
= (u16
*)frm2
;
67 u16
*src3
= (u16
*)frm3
;
69 int count
= width
>> 2;
71 for(int i
= 0; i
< height
; i
++) {
75 "movq 0(%5), %%mm7\n" // colorMask
77 "movq 0(%0), %%mm0\n" // src0
78 "movq 0(%1), %%mm1\n" // src1
79 "movq 0(%2), %%mm2\n" // src2
80 "movq 0(%3), %%mm3\n" // src3
81 "movq %%mm0, 0(%3)\n" // src3 = src0
84 "pcmpeqw %%mm2, %%mm5\n" // src1 == src2 (A)
85 "pcmpeqw %%mm3, %%mm4\n" // src3 == src0 (B)
86 "por %%mm5, %%mm4\n" // A | B
88 "pcmpeqw %%mm0, %%mm5\n" // src0 == src2 (C)
89 "pcmpeqw %%mm1, %%mm3\n" // src1 == src3 (D)
90 "por %%mm3, %%mm5\n" // C|D
91 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
93 "pand %%mm7, %%mm2\n" // color & colorMask
94 "pand %%mm7, %%mm1\n" // src1 & colorMask
95 "psrlw $1, %%mm2\n" // (color & colorMask) >> 1 (E)
96 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
97 "paddw %%mm2, %%mm1\n" // E+F
98 "pand %%mm4, %%mm1\n" // (E+F) & res
99 "pandn %%mm0, %%mm4\n" // color& !res
102 "movq %%mm4, 0(%0)\n" // src0 = res
113 : "+r" (src0
), "+r" (src1
), "+r" (src2
), "+r" (src3
)
114 : "r" (count
), "r" (qRGB_COLOR_MASK
)
118 movq mm7
, qword ptr
[qRGB_COLOR_MASK
];
125 movq mm0
, qword ptr
[eax
]; // src0
126 movq mm1
, qword ptr
[ebx
]; // src1
127 movq mm2
, qword ptr
[ecx
]; // src2
128 movq mm3
, qword ptr
[edx
]; // src3
129 movq qword ptr
[edx
], mm0
; // src3 = src0
132 pcmpeqw mm5
, mm2
; // src1 == src2 (A)
133 pcmpeqw mm4
, mm3
; // src3 == src0 (B)
134 por mm4
, mm5
; // A | B
136 pcmpeqw mm5
, mm0
; // src0 == src2 (C)
137 pcmpeqw mm3
, mm1
; // src1 == src3 (D)
139 pandn mm4
, mm5
; // (!(A|B))&(C|D)
141 pand mm2
, mm7
; // color & colorMask
142 pand mm1
, mm7
; // src1 & colorMask
143 psrlw mm2
, 1; // (color & colorMask) >> 1 (E)
144 psrlw mm1
, 1; // (src & colorMask) >> 1 (F)
145 paddw mm1
, mm2
; // E+F
146 pand mm1
, mm4
; // (E+F) & res
147 pandn mm4
, mm0
; // color & !res
150 movq qword ptr
[eax
], mm4
; // src0 = res
172 /* Swap buffers around */
180 void SmartIB(u8
*srcPtr
, u32 srcPitch
, int width
, int height
)
187 SmartIB_MMX(srcPtr
, srcPitch
, width
, height
);
192 u16 colorMask
= ~RGB_LOW_BITS_MASK
;
194 u16
*src0
= (u16
*)srcPtr
;
195 u16
*src1
= (u16
*)frm1
;
196 u16
*src2
= (u16
*)frm2
;
197 u16
*src3
= (u16
*)frm3
;
199 int sPitch
= srcPitch
>> 1;
202 for (int j
= 0; j
< height
; j
++)
203 for (int i
= 0; i
< sPitch
; i
++) {
204 u16 color
= src0
[pos
];
206 (src1
[pos
] != src2
[pos
]) &&
207 (src3
[pos
] != color
) &&
208 ((color
== src2
[pos
]) || (src1
[pos
] == src3
[pos
]))
209 ? (((color
& colorMask
) >> 1) + ((src1
[pos
] & colorMask
) >> 1)) :
211 src3
[pos
] = color
; /* oldest buffer now holds newest frame */
215 /* Swap buffers around */
223 static void SmartIB32_MMX(u8
*srcPtr
, u32 srcPitch
, int width
, int height
)
225 u32
*src0
= (u32
*)srcPtr
;
226 u32
*src1
= (u32
*)frm1
;
227 u32
*src2
= (u32
*)frm2
;
228 u32
*src3
= (u32
*)frm3
;
230 int count
= width
>> 1;
232 for(int i
= 0; i
< height
; i
++) {
236 "movq 0(%5), %%mm7\n" // colorMask
238 "movq 0(%0), %%mm0\n" // src0
239 "movq 0(%1), %%mm1\n" // src1
240 "movq 0(%2), %%mm2\n" // src2
241 "movq 0(%3), %%mm3\n" // src3
242 "movq %%mm0, 0(%3)\n" // src3 = src0
243 "movq %%mm0, %%mm4\n"
244 "movq %%mm1, %%mm5\n"
245 "pcmpeqd %%mm2, %%mm5\n" // src1 == src2 (A)
246 "pcmpeqd %%mm3, %%mm4\n" // src3 == src0 (B)
247 "por %%mm5, %%mm4\n" // A | B
248 "movq %%mm2, %%mm5\n"
249 "pcmpeqd %%mm0, %%mm5\n" // src0 == src2 (C)
250 "pcmpeqd %%mm1, %%mm3\n" // src1 == src3 (D)
251 "por %%mm3, %%mm5\n" // C|D
252 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
253 "movq %%mm0, %%mm2\n"
254 "pand %%mm7, %%mm2\n" // color & colorMask
255 "pand %%mm7, %%mm1\n" // src1 & colorMask
256 "psrld $1, %%mm2\n" // (color & colorMask) >> 1 (E)
257 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
258 "paddd %%mm2, %%mm1\n" // E+F
259 "pand %%mm4, %%mm1\n" // (E+F) & res
260 "pandn %%mm0, %%mm4\n" // color& !res
263 "movq %%mm4, 0(%0)\n" // src0 = res
274 : "+r" (src0
), "+r" (src1
), "+r" (src2
), "+r" (src3
)
275 : "r" (count
), "r" (qRGB_COLOR_MASK
)
279 movq mm7
, qword ptr
[qRGB_COLOR_MASK
];
286 movq mm0
, qword ptr
[eax
]; // src0
287 movq mm1
, qword ptr
[ebx
]; // src1
288 movq mm2
, qword ptr
[ecx
]; // src2
289 movq mm3
, qword ptr
[edx
]; // src3
290 movq qword ptr
[edx
], mm0
; // src3 = src0
293 pcmpeqd mm5
, mm2
; // src1 == src2 (A)
294 pcmpeqd mm4
, mm3
; // src3 == src0 (B)
295 por mm4
, mm5
; // A | B
297 pcmpeqd mm5
, mm0
; // src0 == src2 (C)
298 pcmpeqd mm3
, mm1
; // src1 == src3 (D)
300 pandn mm4
, mm5
; // (!(A|B))&(C|D)
302 pand mm2
, mm7
; // color & colorMask
303 pand mm1
, mm7
; // src1 & colorMask
304 psrld mm2
, 1; // (color & colorMask) >> 1 (E)
305 psrld mm1
, 1; // (src & colorMask) >> 1 (F)
306 paddd mm1
, mm2
; // E+F
307 pand mm1
, mm4
; // (E+F) & res
308 pandn mm4
, mm0
; // color & !res
311 movq qword ptr
[eax
], mm4
; // src0 = res
333 /* Swap buffers around */
341 void SmartIB32(u8
*srcPtr
, u32 srcPitch
, int width
, int height
)
348 SmartIB32_MMX(srcPtr
, srcPitch
, width
, height
);
353 u32
*src0
= (u32
*)srcPtr
;
354 u32
*src1
= (u32
*)frm1
;
355 u32
*src2
= (u32
*)frm2
;
356 u32
*src3
= (u32
*)frm3
;
358 u32 colorMask
= 0xfefefe;
360 int sPitch
= srcPitch
>> 2;
363 for (int j
= 0; j
< height
; j
++)
364 for (int i
= 0; i
< sPitch
; i
++) {
365 u32 color
= src0
[pos
];
367 (src1
[pos
] != src2
[pos
]) &&
368 (src3
[pos
] != color
) &&
369 ((color
== src2
[pos
]) || (src1
[pos
] == src3
[pos
]))
370 ? (((color
& colorMask
) >> 1) + ((src1
[pos
] & colorMask
) >> 1)) :
372 src3
[pos
] = color
; /* oldest buffer now holds newest frame */
376 /* Swap buffers around */
384 static void MotionBlurIB_MMX(u8
*srcPtr
, u32 srcPitch
, int width
, int height
)
386 u16
*src0
= (u16
*)srcPtr
;
387 u16
*src1
= (u16
*)frm1
;
389 int count
= width
>> 2;
391 for(int i
= 0; i
< height
; i
++) {
395 "movq 0(%3), %%mm7\n" // colorMask
397 "movq 0(%0), %%mm0\n" // src0
398 "movq 0(%1), %%mm1\n" // src1
399 "movq %%mm0, 0(%1)\n" // src1 = src0
400 "pand %%mm7, %%mm0\n" // color & colorMask
401 "pand %%mm7, %%mm1\n" // src1 & colorMask
402 "psrlw $1, %%mm0\n" // (color & colorMask) >> 1 (E)
403 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
404 "paddw %%mm1, %%mm0\n" // E+F
406 "movq %%mm0, 0(%0)\n" // src0 = res
415 : "+r" (src0
), "+r" (src1
)
416 : "r" (count
), "r" (qRGB_COLOR_MASK
)
420 movq mm7
, qword ptr
[qRGB_COLOR_MASK
];
425 movq mm0
, qword ptr
[eax
]; // src0
426 movq mm1
, qword ptr
[ebx
]; // src1
427 movq qword ptr
[ebx
], mm0
; // src1 = src0
428 pand mm0
, mm7
; // color & colorMask
429 pand mm1
, mm7
; // src1 & colorMask
430 psrlw mm0
, 1; // (color & colorMask) >> 1 (E)
431 psrlw mm1
, 1; // (src & colorMask) >> 1 (F)
432 paddw mm0
, mm1
; // E+F
434 movq qword ptr
[eax
], mm0
; // src0 = res
452 void MotionBlurIB(u8
*srcPtr
, u32 srcPitch
, int width
, int height
)
460 MotionBlurIB_MMX(srcPtr
, srcPitch
, width
, height
);
465 u16 colorMask
= ~RGB_LOW_BITS_MASK
;
467 u16
*src0
= (u16
*)srcPtr
;
468 u16
*src1
= (u16
*)frm1
;
470 int sPitch
= srcPitch
>> 1;
473 for (int j
= 0; j
< height
; j
++)
474 for (int i
= 0; i
< sPitch
; i
++) {
475 u16 color
= src0
[pos
];
477 (((color
& colorMask
) >> 1) + ((src1
[pos
] & colorMask
) >> 1));
484 static void MotionBlurIB32_MMX(u8
*srcPtr
, u32 srcPitch
, int width
, int height
)
486 u32
*src0
= (u32
*)srcPtr
;
487 u32
*src1
= (u32
*)frm1
;
489 int count
= width
>> 1;
491 for(int i
= 0; i
< height
; i
++) {
495 "movq 0(%3), %%mm7\n" // colorMask
497 "movq 0(%0), %%mm0\n" // src0
498 "movq 0(%1), %%mm1\n" // src1
499 "movq %%mm0, 0(%1)\n" // src1 = src0
500 "pand %%mm7, %%mm0\n" // color & colorMask
501 "pand %%mm7, %%mm1\n" // src1 & colorMask
502 "psrld $1, %%mm0\n" // (color & colorMask) >> 1 (E)
503 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
504 "paddd %%mm1, %%mm0\n" // E+F
506 "movq %%mm0, 0(%0)\n" // src0 = res
515 : "+r" (src0
), "+r" (src1
)
516 : "r" (count
), "r" (qRGB_COLOR_MASK
)
520 movq mm7
, qword ptr
[qRGB_COLOR_MASK
];
525 movq mm0
, qword ptr
[eax
]; // src0
526 movq mm1
, qword ptr
[ebx
]; // src1
527 movq qword ptr
[ebx
], mm0
; // src1 = src0
528 pand mm0
, mm7
; // color & colorMask
529 pand mm1
, mm7
; // src1 & colorMask
530 psrld mm0
, 1; // (color & colorMask) >> 1 (E)
531 psrld mm1
, 1; // (src & colorMask) >> 1 (F)
532 paddd mm0
, mm1
; // E+F
534 movq qword ptr
[eax
], mm0
; // src0 = res
552 void MotionBlurIB32(u8
*srcPtr
, u32 srcPitch
, int width
, int height
)
560 MotionBlurIB32_MMX(srcPtr
, srcPitch
, width
, height
);
565 u32
*src0
= (u32
*)srcPtr
;
566 u32
*src1
= (u32
*)frm1
;
568 u32 colorMask
= 0xfefefe;
570 int sPitch
= srcPitch
>> 2;
573 for (int j
= 0; j
< height
; j
++)
574 for (int i
= 0; i
< sPitch
; i
++) {
575 u32 color
= src0
[pos
];
576 src0
[pos
] = (((color
& colorMask
) >> 1) +
577 ((src1
[pos
] & colorMask
) >> 1));