rename all cpp into c, as they're 99.9% C
[rofl0r-VisualBoyAdvance.git] / src / interframe.c
blobe4a172277da1b0a5732ce11bf1e14c1d43f09c97
1 // VisualBoyAdvance - Nintendo Gameboy/GameboyAdvance (TM) emulator.
2 // Copyright (C) 1999-2003 Forgotten
3 // Copyright (C) 2004 Forgotten and the VBA development team
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 2, or(at your option)
8 // any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
15 // You should have received a copy of the GNU General Public License
16 // along with this program; if not, write to the Free Software Foundation,
17 // Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
19 #include "System.h"
20 #include <stdlib.h>
21 #include <memory.h>
23 #ifdef MMX
24 extern "C" bool cpu_mmx;
25 #endif
28 * Thanks to Kawaks' Mr. K for the code
30 Incorporated into vba by Anthony Di Franco
33 static u8 *frm1 = NULL;
34 static u8 *frm2 = NULL;
35 static u8 *frm3 = NULL;
37 extern int RGB_LOW_BITS_MASK;
38 extern u32 qRGB_COLOR_MASK[2];
40 static void Init()
42 frm1 = (u8 *)calloc(322*242,4);
43 // 1 frame ago
44 frm2 = (u8 *)calloc(322*242,4);
45 // 2 frames ago
46 frm3 = (u8 *)calloc(322*242,4);
47 // 3 frames ago
50 void InterframeCleanup()
52 if(frm1)
53 free(frm1);
54 if(frm2)
55 free(frm2);
56 if(frm3)
57 free(frm3);
58 frm1 = frm2 = frm3 = NULL;
61 #ifdef MMX
62 static void SmartIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
64 u16 *src0 = (u16 *)srcPtr;
65 u16 *src1 = (u16 *)frm1;
66 u16 *src2 = (u16 *)frm2;
67 u16 *src3 = (u16 *)frm3;
69 int count = width >> 2;
71 for(int i = 0; i < height; i++) {
72 #ifdef __GNUC__
73 asm volatile (
74 "push %4\n"
75 "movq 0(%5), %%mm7\n" // colorMask
76 "0:\n"
77 "movq 0(%0), %%mm0\n" // src0
78 "movq 0(%1), %%mm1\n" // src1
79 "movq 0(%2), %%mm2\n" // src2
80 "movq 0(%3), %%mm3\n" // src3
81 "movq %%mm0, 0(%3)\n" // src3 = src0
82 "movq %%mm0, %%mm4\n"
83 "movq %%mm1, %%mm5\n"
84 "pcmpeqw %%mm2, %%mm5\n" // src1 == src2 (A)
85 "pcmpeqw %%mm3, %%mm4\n" // src3 == src0 (B)
86 "por %%mm5, %%mm4\n" // A | B
87 "movq %%mm2, %%mm5\n"
88 "pcmpeqw %%mm0, %%mm5\n" // src0 == src2 (C)
89 "pcmpeqw %%mm1, %%mm3\n" // src1 == src3 (D)
90 "por %%mm3, %%mm5\n" // C|D
91 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
92 "movq %%mm0, %%mm2\n"
93 "pand %%mm7, %%mm2\n" // color & colorMask
94 "pand %%mm7, %%mm1\n" // src1 & colorMask
95 "psrlw $1, %%mm2\n" // (color & colorMask) >> 1 (E)
96 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
97 "paddw %%mm2, %%mm1\n" // E+F
98 "pand %%mm4, %%mm1\n" // (E+F) & res
99 "pandn %%mm0, %%mm4\n" // color& !res
101 "por %%mm1, %%mm4\n"
102 "movq %%mm4, 0(%0)\n" // src0 = res
104 "addl $8, %0\n"
105 "addl $8, %1\n"
106 "addl $8, %2\n"
107 "addl $8, %3\n"
109 "decl %4\n"
110 "jnz 0b\n"
111 "pop %4\n"
112 "emms\n"
113 : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
114 : "r" (count), "r" (qRGB_COLOR_MASK)
116 #else
117 __asm {
118 movq mm7, qword ptr [qRGB_COLOR_MASK];
119 mov eax, src0;
120 mov ebx, src1;
121 mov ecx, src2;
122 mov edx, src3;
123 mov edi, count;
124 label0:
125 movq mm0, qword ptr [eax]; // src0
126 movq mm1, qword ptr [ebx]; // src1
127 movq mm2, qword ptr [ecx]; // src2
128 movq mm3, qword ptr [edx]; // src3
129 movq qword ptr [edx], mm0; // src3 = src0
130 movq mm4, mm0;
131 movq mm5, mm1;
132 pcmpeqw mm5, mm2; // src1 == src2 (A)
133 pcmpeqw mm4, mm3; // src3 == src0 (B)
134 por mm4, mm5; // A | B
135 movq mm5, mm2;
136 pcmpeqw mm5, mm0; // src0 == src2 (C)
137 pcmpeqw mm3, mm1; // src1 == src3 (D)
138 por mm5, mm3; // C|D
139 pandn mm4, mm5; // (!(A|B))&(C|D)
140 movq mm2, mm0;
141 pand mm2, mm7; // color & colorMask
142 pand mm1, mm7; // src1 & colorMask
143 psrlw mm2, 1; // (color & colorMask) >> 1 (E)
144 psrlw mm1, 1; // (src & colorMask) >> 1 (F)
145 paddw mm1, mm2; // E+F
146 pand mm1, mm4; // (E+F) & res
147 pandn mm4, mm0; // color & !res
149 por mm4, mm1;
150 movq qword ptr [eax], mm4; // src0 = res
152 add eax, 8;
153 add ebx, 8;
154 add ecx, 8;
155 add edx, 8;
157 dec edi;
158 jnz label0;
159 mov src0, eax;
160 mov src1, ebx;
161 mov src2, ecx;
162 mov src3, edx;
163 emms;
165 #endif
166 src0+=2;
167 src1+=2;
168 src2+=2;
169 src3+=2;
172 /* Swap buffers around */
173 u8 *temp = frm1;
174 frm1 = frm3;
175 frm3 = frm2;
176 frm2 = temp;
178 #endif
180 void SmartIB(u8 *srcPtr, u32 srcPitch, int width, int height)
182 if(frm1 == NULL) {
183 Init();
185 #ifdef MMX
186 if(cpu_mmx) {
187 SmartIB_MMX(srcPtr, srcPitch, width, height);
188 return;
190 #endif
192 u16 colorMask = ~RGB_LOW_BITS_MASK;
194 u16 *src0 = (u16 *)srcPtr;
195 u16 *src1 = (u16 *)frm1;
196 u16 *src2 = (u16 *)frm2;
197 u16 *src3 = (u16 *)frm3;
199 int sPitch = srcPitch >> 1;
201 int pos = 0;
202 for (int j = 0; j < height; j++)
203 for (int i = 0; i < sPitch; i++) {
204 u16 color = src0[pos];
205 src0[pos] =
206 (src1[pos] != src2[pos]) &&
207 (src3[pos] != color) &&
208 ((color == src2[pos]) || (src1[pos] == src3[pos]))
209 ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
210 color;
211 src3[pos] = color; /* oldest buffer now holds newest frame */
212 pos++;
215 /* Swap buffers around */
216 u8 *temp = frm1;
217 frm1 = frm3;
218 frm3 = frm2;
219 frm2 = temp;
222 #ifdef MMX
223 static void SmartIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
225 u32 *src0 = (u32 *)srcPtr;
226 u32 *src1 = (u32 *)frm1;
227 u32 *src2 = (u32 *)frm2;
228 u32 *src3 = (u32 *)frm3;
230 int count = width >> 1;
232 for(int i = 0; i < height; i++) {
233 #ifdef __GNUC__
234 asm volatile (
235 "push %4\n"
236 "movq 0(%5), %%mm7\n" // colorMask
237 "0:\n"
238 "movq 0(%0), %%mm0\n" // src0
239 "movq 0(%1), %%mm1\n" // src1
240 "movq 0(%2), %%mm2\n" // src2
241 "movq 0(%3), %%mm3\n" // src3
242 "movq %%mm0, 0(%3)\n" // src3 = src0
243 "movq %%mm0, %%mm4\n"
244 "movq %%mm1, %%mm5\n"
245 "pcmpeqd %%mm2, %%mm5\n" // src1 == src2 (A)
246 "pcmpeqd %%mm3, %%mm4\n" // src3 == src0 (B)
247 "por %%mm5, %%mm4\n" // A | B
248 "movq %%mm2, %%mm5\n"
249 "pcmpeqd %%mm0, %%mm5\n" // src0 == src2 (C)
250 "pcmpeqd %%mm1, %%mm3\n" // src1 == src3 (D)
251 "por %%mm3, %%mm5\n" // C|D
252 "pandn %%mm5, %%mm4\n" // (!(A|B))&(C|D)
253 "movq %%mm0, %%mm2\n"
254 "pand %%mm7, %%mm2\n" // color & colorMask
255 "pand %%mm7, %%mm1\n" // src1 & colorMask
256 "psrld $1, %%mm2\n" // (color & colorMask) >> 1 (E)
257 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
258 "paddd %%mm2, %%mm1\n" // E+F
259 "pand %%mm4, %%mm1\n" // (E+F) & res
260 "pandn %%mm0, %%mm4\n" // color& !res
262 "por %%mm1, %%mm4\n"
263 "movq %%mm4, 0(%0)\n" // src0 = res
265 "addl $8, %0\n"
266 "addl $8, %1\n"
267 "addl $8, %2\n"
268 "addl $8, %3\n"
270 "decl %4\n"
271 "jnz 0b\n"
272 "pop %4\n"
273 "emms\n"
274 : "+r" (src0), "+r" (src1), "+r" (src2), "+r" (src3)
275 : "r" (count), "r" (qRGB_COLOR_MASK)
277 #else
278 __asm {
279 movq mm7, qword ptr [qRGB_COLOR_MASK];
280 mov eax, src0;
281 mov ebx, src1;
282 mov ecx, src2;
283 mov edx, src3;
284 mov edi, count;
285 label0:
286 movq mm0, qword ptr [eax]; // src0
287 movq mm1, qword ptr [ebx]; // src1
288 movq mm2, qword ptr [ecx]; // src2
289 movq mm3, qword ptr [edx]; // src3
290 movq qword ptr [edx], mm0; // src3 = src0
291 movq mm4, mm0;
292 movq mm5, mm1;
293 pcmpeqd mm5, mm2; // src1 == src2 (A)
294 pcmpeqd mm4, mm3; // src3 == src0 (B)
295 por mm4, mm5; // A | B
296 movq mm5, mm2;
297 pcmpeqd mm5, mm0; // src0 == src2 (C)
298 pcmpeqd mm3, mm1; // src1 == src3 (D)
299 por mm5, mm3; // C|D
300 pandn mm4, mm5; // (!(A|B))&(C|D)
301 movq mm2, mm0;
302 pand mm2, mm7; // color & colorMask
303 pand mm1, mm7; // src1 & colorMask
304 psrld mm2, 1; // (color & colorMask) >> 1 (E)
305 psrld mm1, 1; // (src & colorMask) >> 1 (F)
306 paddd mm1, mm2; // E+F
307 pand mm1, mm4; // (E+F) & res
308 pandn mm4, mm0; // color & !res
310 por mm4, mm1;
311 movq qword ptr [eax], mm4; // src0 = res
313 add eax, 8;
314 add ebx, 8;
315 add ecx, 8;
316 add edx, 8;
318 dec edi;
319 jnz label0;
320 mov src0, eax;
321 mov src1, ebx;
322 mov src2, ecx;
323 mov src3, edx;
324 emms;
326 #endif
328 src0++;
329 src1++;
330 src2++;
331 src3++;
333 /* Swap buffers around */
334 u8 *temp = frm1;
335 frm1 = frm3;
336 frm3 = frm2;
337 frm2 = temp;
339 #endif
341 void SmartIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
343 if(frm1 == NULL) {
344 Init();
346 #ifdef MMX
347 if(cpu_mmx) {
348 SmartIB32_MMX(srcPtr, srcPitch, width, height);
349 return;
351 #endif
353 u32 *src0 = (u32 *)srcPtr;
354 u32 *src1 = (u32 *)frm1;
355 u32 *src2 = (u32 *)frm2;
356 u32 *src3 = (u32 *)frm3;
358 u32 colorMask = 0xfefefe;
360 int sPitch = srcPitch >> 2;
361 int pos = 0;
363 for (int j = 0; j < height; j++)
364 for (int i = 0; i < sPitch; i++) {
365 u32 color = src0[pos];
366 src0[pos] =
367 (src1[pos] != src2[pos]) &&
368 (src3[pos] != color) &&
369 ((color == src2[pos]) || (src1[pos] == src3[pos]))
370 ? (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1)) :
371 color;
372 src3[pos] = color; /* oldest buffer now holds newest frame */
373 pos++;
376 /* Swap buffers around */
377 u8 *temp = frm1;
378 frm1 = frm3;
379 frm3 = frm2;
380 frm2 = temp;
383 #ifdef MMX
384 static void MotionBlurIB_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
386 u16 *src0 = (u16 *)srcPtr;
387 u16 *src1 = (u16 *)frm1;
389 int count = width >> 2;
391 for(int i = 0; i < height; i++) {
392 #ifdef __GNUC__
393 asm volatile (
394 "push %2\n"
395 "movq 0(%3), %%mm7\n" // colorMask
396 "0:\n"
397 "movq 0(%0), %%mm0\n" // src0
398 "movq 0(%1), %%mm1\n" // src1
399 "movq %%mm0, 0(%1)\n" // src1 = src0
400 "pand %%mm7, %%mm0\n" // color & colorMask
401 "pand %%mm7, %%mm1\n" // src1 & colorMask
402 "psrlw $1, %%mm0\n" // (color & colorMask) >> 1 (E)
403 "psrlw $1, %%mm1\n" // (src & colorMask) >> 1 (F)
404 "paddw %%mm1, %%mm0\n" // E+F
406 "movq %%mm0, 0(%0)\n" // src0 = res
408 "addl $8, %0\n"
409 "addl $8, %1\n"
411 "decl %2\n"
412 "jnz 0b\n"
413 "pop %2\n"
414 "emms\n"
415 : "+r" (src0), "+r" (src1)
416 : "r" (count), "r" (qRGB_COLOR_MASK)
418 #else
419 __asm {
420 movq mm7, qword ptr [qRGB_COLOR_MASK];
421 mov eax, src0;
422 mov ebx, src1;
423 mov edi, count;
424 label0:
425 movq mm0, qword ptr [eax]; // src0
426 movq mm1, qword ptr [ebx]; // src1
427 movq qword ptr [ebx], mm0; // src1 = src0
428 pand mm0, mm7; // color & colorMask
429 pand mm1, mm7; // src1 & colorMask
430 psrlw mm0, 1; // (color & colorMask) >> 1 (E)
431 psrlw mm1, 1; // (src & colorMask) >> 1 (F)
432 paddw mm0, mm1; // E+F
434 movq qword ptr [eax], mm0; // src0 = res
436 add eax, 8;
437 add ebx, 8;
439 dec edi;
440 jnz label0;
441 mov src0, eax;
442 mov src1, ebx;
443 emms;
445 #endif
446 src0+=2;
447 src1+=2;
450 #endif
452 void MotionBlurIB(u8 *srcPtr, u32 srcPitch, int width, int height)
454 if(frm1 == NULL) {
455 Init();
458 #ifdef MMX
459 if(cpu_mmx) {
460 MotionBlurIB_MMX(srcPtr, srcPitch, width, height);
461 return;
463 #endif
465 u16 colorMask = ~RGB_LOW_BITS_MASK;
467 u16 *src0 = (u16 *)srcPtr;
468 u16 *src1 = (u16 *)frm1;
470 int sPitch = srcPitch >> 1;
472 int pos = 0;
473 for (int j = 0; j < height; j++)
474 for (int i = 0; i < sPitch; i++) {
475 u16 color = src0[pos];
476 src0[pos] =
477 (((color & colorMask) >> 1) + ((src1[pos] & colorMask) >> 1));
478 src1[pos] = color;
479 pos++;
483 #ifdef MMX
484 static void MotionBlurIB32_MMX(u8 *srcPtr, u32 srcPitch, int width, int height)
486 u32 *src0 = (u32 *)srcPtr;
487 u32 *src1 = (u32 *)frm1;
489 int count = width >> 1;
491 for(int i = 0; i < height; i++) {
492 #ifdef __GNUC__
493 asm volatile (
494 "push %2\n"
495 "movq 0(%3), %%mm7\n" // colorMask
496 "0:\n"
497 "movq 0(%0), %%mm0\n" // src0
498 "movq 0(%1), %%mm1\n" // src1
499 "movq %%mm0, 0(%1)\n" // src1 = src0
500 "pand %%mm7, %%mm0\n" // color & colorMask
501 "pand %%mm7, %%mm1\n" // src1 & colorMask
502 "psrld $1, %%mm0\n" // (color & colorMask) >> 1 (E)
503 "psrld $1, %%mm1\n" // (src & colorMask) >> 1 (F)
504 "paddd %%mm1, %%mm0\n" // E+F
506 "movq %%mm0, 0(%0)\n" // src0 = res
508 "addl $8, %0\n"
509 "addl $8, %1\n"
511 "decl %2\n"
512 "jnz 0b\n"
513 "pop %2\n"
514 "emms\n"
515 : "+r" (src0), "+r" (src1)
516 : "r" (count), "r" (qRGB_COLOR_MASK)
518 #else
519 __asm {
520 movq mm7, qword ptr [qRGB_COLOR_MASK];
521 mov eax, src0;
522 mov ebx, src1;
523 mov edi, count;
524 label0:
525 movq mm0, qword ptr [eax]; // src0
526 movq mm1, qword ptr [ebx]; // src1
527 movq qword ptr [ebx], mm0; // src1 = src0
528 pand mm0, mm7; // color & colorMask
529 pand mm1, mm7; // src1 & colorMask
530 psrld mm0, 1; // (color & colorMask) >> 1 (E)
531 psrld mm1, 1; // (src & colorMask) >> 1 (F)
532 paddd mm0, mm1; // E+F
534 movq qword ptr [eax], mm0; // src0 = res
536 add eax, 8;
537 add ebx, 8;
539 dec edi;
540 jnz label0;
541 mov src0, eax;
542 mov src1, ebx;
543 emms;
545 #endif
546 src0++;
547 src1++;
550 #endif
552 void MotionBlurIB32(u8 *srcPtr, u32 srcPitch, int width, int height)
554 if(frm1 == NULL) {
555 Init();
558 #ifdef MMX
559 if(cpu_mmx) {
560 MotionBlurIB32_MMX(srcPtr, srcPitch, width, height);
561 return;
563 #endif
565 u32 *src0 = (u32 *)srcPtr;
566 u32 *src1 = (u32 *)frm1;
568 u32 colorMask = 0xfefefe;
570 int sPitch = srcPitch >> 2;
571 int pos = 0;
573 for (int j = 0; j < height; j++)
574 for (int i = 0; i < sPitch; i++) {
575 u32 color = src0[pos];
576 src0[pos] = (((color & colorMask) >> 1) +
577 ((src1[pos] & colorMask) >> 1));
578 src1[pos] = color;
579 pos++;