remove unportable,glibc/gcc specific remote profiling support
[rofl0r-VisualBoyAdvance.git] / src / i386 / 2xSaImmx.asm
blobe0f2e268b604fce4b40d75d68e93ab5f9adfe339
1 ;/*---------------------------------------------------------------------*
2 ; * The following (piece of) code, (part of) the 2xSaI engine, *
3 ; * copyright (c) 2001 by Derek Liauw Kie Fa. *
4 ; * Non-Commercial use of the engine is allowed and is encouraged, *
5 ; * provided that appropriate credit be given and that this copyright *
6 ; * notice will not be removed under any circumstance. *
7 ; * You may freely modify this code, but I request *
8 ; * that any improvements to the engine be submitted to me, so *
9 ; * that I can implement these improvements in newer versions of *
10 ; * the engine. *
11 ; * If you need more information, have any comments or suggestions, *
12 ; * you can e-mail me. My e-mail: DerekL666@yahoo.com *
13 ; *---------------------------------------------------------------------*/
14 ; modified by Spacy to compile with yasm [2006-06-20]
16 ;----------------------
17 ; 2xSaI, Super2xSaI, SuperEagle .. FINAL. no versioning anymore..
18 ;----------------------
20 BITS 32
21 %ifdef __DJGPP__
22 GLOBAL __2xSaILine
23 GLOBAL __2xSaISuperEagleLine
24 GLOBAL __2xSaISuper2xSaILine
25 GLOBAL _Init_2xSaIMMX
26 %else
27 GLOBAL _2xSaILine
28 GLOBAL _2xSaISuperEagleLine
29 GLOBAL _2xSaISuper2xSaILine
30 GLOBAL Init_2xSaIMMX
31 %endif
32 SECTION .text ALIGN = 32
34 %ifdef __DJGPP__
35 ;EXTERN_C void __2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
36 ; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
37 %else
38 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
39 ; uint8 *dstPtr, uint32 dstPitch);
40 %endif
42 srcPtr equ 8
43 deltaPtr equ 12
44 srcPitch equ 16
45 width equ 20
46 dstOffset equ 24
47 dstPitch equ 28
48 dstSegment equ 32
51 colorB0 equ -2
52 colorB1 equ 0
53 colorB2 equ 2
54 colorB3 equ 4
56 color7 equ -2
57 color8 equ 0
58 color9 equ 2
60 color4 equ -2
61 color5 equ 0
62 color6 equ 2
63 colorS2 equ 4
65 color1 equ -2
66 color2 equ 0
67 color3 equ 2
68 colorS1 equ 4
70 colorA0 equ -2
71 colorA1 equ 0
72 colorA2 equ 2
73 colorA3 equ 4
83 %ifdef __DJGPP__
84 __2xSaISuper2xSaILine:
85 %else
86 NEWSYM _2xSaISuper2xSaILine
87 %endif
88 ; Store some stuff
89 push ebp
90 mov ebp, esp
91 pushad
93 ; Prepare the destination
94 %ifdef __DJGPP__
95 ; Set the selector
96 mov eax, [ebp+dstSegment]
97 mov fs, ax
98 %endif
99 mov edx, [ebp+dstOffset] ; edx points to the screen
100 ; Prepare the source
101 ; eax points to colorA
102 mov eax, [ebp+srcPtr] ;eax points to colorA
103 mov ebx, [ebp+srcPitch] ;ebx contains the source pitch
104 mov ecx, [ebp+width] ;ecx contains the number of pixels to process
105 ; eax now points to colorB1
106 sub eax, ebx ;eax points to B1 which is the base
108 ; Main Loop
109 .Loop: push ecx
111 ;-----Check Delta------------------
112 mov ecx, [ebp+deltaPtr]
115 ;load source img
116 movq mm0, [eax+colorB0]
117 movq mm1, [eax+colorB3]
118 movq mm2, [eax+ebx+color4]
119 movq mm3, [eax+ebx+colorS2]
120 movq mm4, [eax+ebx+ebx+color1]
121 movq mm5, [eax+ebx+ebx+colorS1]
122 push eax
123 add eax, ebx
124 movq mm6, [eax+ebx+ebx+colorA0]
125 movq mm7, [eax+ebx+ebx+colorA3]
126 pop eax
128 ;compare to delta
129 pcmpeqw mm0, [ecx+2+colorB0]
130 pcmpeqw mm1, [ecx+2+colorB3]
131 pcmpeqw mm2, [ecx+ebx+2+color4]
132 pcmpeqw mm3, [ecx+ebx+2+colorS2]
133 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
134 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
135 add ecx, ebx
136 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
137 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
138 sub ecx, ebx
141 ;compose results
142 pand mm0, mm1
143 pand mm2, mm3
144 pand mm4, mm5
145 pand mm6, mm7
146 pand mm0, mm2
147 pand mm4, mm6
148 pxor mm7, mm7
149 pand mm0, mm4
150 movq mm6, [eax+colorB0]
151 pcmpeqw mm7, mm0 ;did any compare give us a zero ?
153 movq [ecx+2+colorB0], mm6
155 packsswb mm7, mm7
156 movd ecx, mm7
157 test ecx, ecx
158 jz near .SKIP_PROCESS ;no, so we can skip
160 ;End Delta
162 ;---------------------------------
163 movq mm0, [eax+ebx+color5]
164 movq mm1, [eax+ebx+color6]
165 movq mm2, mm0
166 movq mm3, mm1
167 movq mm4, mm0
168 movq mm5, mm1
170 pand mm0, [colorMask]
171 pand mm1, [colorMask]
173 psrlw mm0, 1
174 psrlw mm1, 1
176 pand mm3, [lowPixelMask]
177 paddw mm0, mm1
179 pand mm3, mm2
180 paddw mm0, mm3 ;mm0 contains the interpolated values
181 movq [I56Pixel], mm0
182 movq mm7, mm0
184 ;-------------------
185 movq mm0, mm7
186 movq mm1, mm4 ;5,5,5,6
187 movq mm2, mm0
188 movq mm3, mm1
190 pand mm0, [colorMask]
191 pand mm1, [colorMask]
193 psrlw mm0, 1
194 psrlw mm1, 1
196 pand mm3, [lowPixelMask]
197 paddw mm0, mm1
199 pand mm3, mm2
200 paddw mm0, mm3 ;mm0 contains the interpolated values
201 movq [I5556Pixel], mm0
202 ;--------------------
204 movq mm0, mm7
205 movq mm1, mm5 ;6,6,6,5
206 movq mm2, mm0
207 movq mm3, mm1
209 pand mm0, [colorMask]
210 pand mm1, [colorMask]
212 psrlw mm0, 1
213 psrlw mm1, 1
215 pand mm3, [lowPixelMask]
216 paddw mm0, mm1
218 pand mm3, mm2
219 paddw mm0, mm3
220 movq [I5666Pixel], mm0
222 ;-------------------------
223 ;-------------------------
224 movq mm0, [eax+ebx+ebx+color2]
225 movq mm1, [eax+ebx+ebx+color3]
226 movq mm2, mm0
227 movq mm3, mm1
228 movq mm4, mm0
229 movq mm5, mm1
231 pand mm0, [colorMask]
232 pand mm1, [colorMask]
234 psrlw mm0, 1
235 psrlw mm1, 1
237 pand mm3, [lowPixelMask]
238 paddw mm0, mm1
240 pand mm3, mm2
241 paddw mm0, mm3
242 movq [I23Pixel], mm0
243 movq mm7, mm0
245 ;---------------------
246 movq mm0, mm7
247 movq mm1, mm4 ;2,2,2,3
248 movq mm2, mm0
249 movq mm3, mm1
251 pand mm0, [colorMask]
252 pand mm1, [colorMask]
254 psrlw mm0, 1
255 psrlw mm1, 1
257 pand mm3, [lowPixelMask]
258 paddw mm0, mm1
260 pand mm3, mm2
261 paddw mm0, mm3
262 movq [I2223Pixel], mm0
264 ;----------------------
265 movq mm0, mm7
266 movq mm1, mm5 ;3,3,3,2
267 movq mm2, mm0
268 movq mm3, mm1
270 pand mm0, [colorMask]
271 pand mm1, [colorMask]
273 psrlw mm0, 1
274 psrlw mm1, 1
276 pand mm3, [lowPixelMask]
277 paddw mm0, mm1
279 pand mm3, mm2
280 paddw mm0, mm3
281 movq [I2333Pixel], mm0
284 ;--------------------
285 ;////////////////////////////////
286 ; Decide which "branch" to take
287 ;--------------------------------
288 movq mm0, [eax+ebx+color5]
289 movq mm1, [eax+ebx+color6]
290 movq mm6, mm0
291 movq mm7, mm1
292 pcmpeqw mm0, [eax+ebx+ebx+color3]
293 pcmpeqw mm1, [eax+ebx+ebx+color2]
294 pcmpeqw mm6, mm7
296 movq mm2, mm0
297 movq mm3, mm0
299 pand mm0, mm1 ;colorA == colorD && colorB == colorC
300 pxor mm7, mm7
302 pcmpeqw mm2, mm7
303 pand mm6, mm0
304 pand mm2, mm1 ;colorA != colorD && colorB == colorC
306 pcmpeqw mm1, mm7
308 pand mm1, mm3 ;colorA == colorD && colorB != colorC
309 pxor mm0, mm6
310 por mm1, mm6
311 movq mm7, mm0
312 movq [Mask26], mm2
313 packsswb mm7, mm7
314 movq [Mask35], mm1
316 movd ecx, mm7
317 test ecx, ecx
318 jz near .SKIP_GUESS
320 ;---------------------------------------------
321 movq mm6, mm0
322 movq mm4, [eax+ebx+colorA]
323 movq mm5, [eax+ebx+colorB]
324 pxor mm7, mm7
325 pand mm6, [ONE]
327 movq mm0, [eax+colorE]
328 movq mm1, [eax+ebx+colorG]
329 movq mm2, mm0
330 movq mm3, mm1
331 pcmpeqw mm0, mm4
332 pcmpeqw mm1, mm4
333 pcmpeqw mm2, mm5
334 pcmpeqw mm3, mm5
335 pand mm0, mm6
336 pand mm1, mm6
337 pand mm2, mm6
338 pand mm3, mm6
339 paddw mm0, mm1
340 paddw mm2, mm3
342 pxor mm3, mm3
343 pcmpgtw mm0, mm6
344 pcmpgtw mm2, mm6
345 pcmpeqw mm0, mm3
346 pcmpeqw mm2, mm3
347 pand mm0, mm6
348 pand mm2, mm6
349 paddw mm7, mm0
350 psubw mm7, mm2
352 movq mm0, [eax+colorF]
353 movq mm1, [eax+ebx+colorK]
354 movq mm2, mm0
355 movq mm3, mm1
356 pcmpeqw mm0, mm4
357 pcmpeqw mm1, mm4
358 pcmpeqw mm2, mm5
359 pcmpeqw mm3, mm5
360 pand mm0, mm6
361 pand mm1, mm6
362 pand mm2, mm6
363 pand mm3, mm6
364 paddw mm0, mm1
365 paddw mm2, mm3
367 pxor mm3, mm3
368 pcmpgtw mm0, mm6
369 pcmpgtw mm2, mm6
370 pcmpeqw mm0, mm3
371 pcmpeqw mm2, mm3
372 pand mm0, mm6
373 pand mm2, mm6
374 paddw mm7, mm0
375 psubw mm7, mm2
377 push eax
378 add eax, ebx
379 movq mm0, [eax+ebx+colorH]
380 movq mm1, [eax+ebx+ebx+colorN]
381 movq mm2, mm0
382 movq mm3, mm1
383 pcmpeqw mm0, mm4
384 pcmpeqw mm1, mm4
385 pcmpeqw mm2, mm5
386 pcmpeqw mm3, mm5
387 pand mm0, mm6
388 pand mm1, mm6
389 pand mm2, mm6
390 pand mm3, mm6
391 paddw mm0, mm1
392 paddw mm2, mm3
394 pxor mm3, mm3
395 pcmpgtw mm0, mm6
396 pcmpgtw mm2, mm6
397 pcmpeqw mm0, mm3
398 pcmpeqw mm2, mm3
399 pand mm0, mm6
400 pand mm2, mm6
401 paddw mm7, mm0
402 psubw mm7, mm2
404 movq mm0, [eax+ebx+colorL]
405 movq mm1, [eax+ebx+ebx+colorO]
406 movq mm2, mm0
407 movq mm3, mm1
408 pcmpeqw mm0, mm4
409 pcmpeqw mm1, mm4
410 pcmpeqw mm2, mm5
411 pcmpeqw mm3, mm5
412 pand mm0, mm6
413 pand mm1, mm6
414 pand mm2, mm6
415 pand mm3, mm6
416 paddw mm0, mm1
417 paddw mm2, mm3
419 pxor mm3, mm3
420 pcmpgtw mm0, mm6
421 pcmpgtw mm2, mm6
422 pcmpeqw mm0, mm3
423 pcmpeqw mm2, mm3
424 pand mm0, mm6
425 pand mm2, mm6
426 paddw mm7, mm0
427 psubw mm7, mm2
429 pop eax
430 movq mm1, mm7
431 pxor mm0, mm0
432 pcmpgtw mm7, mm0
433 pcmpgtw mm0, mm1
435 por mm7, [Mask35]
436 por mm0, [Mask26]
437 movq [Mask35], mm7
438 movq [Mask26], mm0
440 .SKIP_GUESS:
442 ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image...
445 movq mm0, [eax+ebx+color5]
446 movq mm1, [eax+ebx+ebx+color2]
447 movq mm2, mm0
448 movq mm3, mm1
449 movq mm4, mm0
450 movq mm5, mm1
452 pand mm0, [colorMask]
453 pand mm1, [colorMask]
455 psrlw mm0, 1
456 psrlw mm1, 1
458 pand mm3, [lowPixelMask]
459 paddw mm0, mm1
461 pand mm3, mm2
462 paddw mm0, mm3 ;mm0 contains the interpolated values
463 ;---------------------------
467 %ifdef dfhsdfhsdahdsfhdsfh
469 if (color5 == color3 && color2 != color6 && color4 == color5 && color5 != colorA2)
470 product2a = INTERPOLATE (color2, color5);
471 else
472 if (color5 == color1 && color6 == color5 && color4 != color2 && color5 != colorA0)
473 product2a = INTERPOLATE(color2, color5);
474 else
475 product2a = color2;
477 if (color2 == color6 && color5 != color3 && color1 == color2 && color2 != colorB2)
478 product1a = INTERPOLATE (color2, color5);
479 else
480 if (color4 == color2 && color3 == color2 && color1 != color5 && color2 != colorB0)
481 product1a = INTERPOLATE(color2, color5);
482 else
483 product1a = color5;
485 %endif
488 movq mm7, [Mask26]
489 movq mm6, [eax+colorB2]
490 movq mm5, [eax+ebx+ebx+color2]
491 movq mm4, [eax+ebx+ebx+color1]
492 pcmpeqw mm4, mm5
493 pcmpeqw mm6, mm5
494 pxor mm5, mm5
495 pand mm7, mm4
496 pcmpeqw mm6, mm5
497 pand mm7, mm6
501 movq mm6, [eax+ebx+ebx+color3]
502 movq mm5, [eax+ebx+ebx+color2]
503 movq mm4, [eax+ebx+ebx+color1]
504 movq mm2, [eax+ebx+color5]
505 movq mm1, [eax+ebx+color4]
506 movq mm3, [eax+colorB0]
508 pcmpeqw mm2, mm4
509 pcmpeqw mm6, mm5
510 pcmpeqw mm1, mm5
511 pcmpeqw mm3, mm5
512 pxor mm5, mm5
513 pcmpeqw mm2, mm5
514 pcmpeqw mm3, mm5
515 pand mm6, mm1
516 pand mm2, mm3
517 pand mm6, mm2
518 por mm7, mm6
521 movq mm6, mm7
522 pcmpeqw mm6, mm5
523 pand mm7, mm0
525 movq mm1, [eax+ebx+color5]
526 pand mm6, mm1
527 por mm7, mm6
528 movq [final1a], mm7 ;finished 1a
532 ;--------------------------------
534 movq mm7, [Mask35]
535 push eax
536 add eax, ebx
537 movq mm6, [eax+ebx+ebx+colorA2]
538 pop eax
539 movq mm5, [eax+ebx+color5]
540 movq mm4, [eax+ebx+color4]
541 pcmpeqw mm4, mm5
542 pcmpeqw mm6, mm5
543 pxor mm5, mm5
544 pand mm7, mm4
545 pcmpeqw mm6, mm5
546 pand mm7, mm6
550 movq mm6, [eax+ebx+color6]
551 movq mm5, [eax+ebx+color5]
552 movq mm4, [eax+ebx+color4]
553 movq mm2, [eax+ebx+ebx+color2]
554 movq mm1, [eax+ebx+ebx+color1]
555 push eax
556 add eax, ebx
557 movq mm3, [eax+ebx+ebx+colorA0]
558 pop eax
560 pcmpeqw mm2, mm4
561 pcmpeqw mm6, mm5
562 pcmpeqw mm1, mm5
563 pcmpeqw mm3, mm5
564 pxor mm5, mm5
565 pcmpeqw mm2, mm5
566 pcmpeqw mm3, mm5
567 pand mm6, mm1
568 pand mm2, mm3
569 pand mm6, mm2
570 por mm7, mm6
573 movq mm6, mm7
574 pcmpeqw mm6, mm5
575 pand mm7, mm0
577 movq mm1, [eax+ebx+ebx+color2]
578 pand mm6, mm1
579 por mm7, mm6
580 movq [final2a], mm7 ;finished 2a
583 ;--------------------------------------------
586 %ifdef dfhsdfhsdahdsfhdsfh
587 if (color6 == color3 && color3 == colorA1 && color2 != colorA2 && color3 != colorA0)
588 product2b = Q_INTERPOLATE (color3, color3, color3, color2);
589 else
590 if (color5 == color2 && color2 == colorA2 && colorA1 != color3 && color2 != colorA3)
591 product2b = Q_INTERPOLATE (color2, color2, color2, color3);
592 else
593 product2b = INTERPOLATE (color2, color3);
595 if (color6 == color3 && color6 == colorB1 && color5 != colorB2 && color6 != colorB0)
596 product1b = Q_INTERPOLATE (color6, color6, color6, color5);
597 else
598 if (color5 == color2 && color5 == colorB2 && colorB1 != color6 && color5 != colorB3)
599 product1b = Q_INTERPOLATE (color6, color5, color5, color5);
600 else
601 product1b = INTERPOLATE (color5, color6);
602 %endif
604 push eax
605 add eax, ebx
606 pxor mm7, mm7
607 movq mm0, [eax+ebx+ebx+colorA0]
608 movq mm1, [eax+ebx+ebx+colorA1]
609 movq mm2, [eax+ebx+ebx+colorA2]
610 movq mm3, [eax+ebx+ebx+colorA3]
611 pop eax
612 movq mm4, [eax+ebx+ebx+color2]
613 movq mm5, [eax+ebx+ebx+color3]
614 movq mm6, [eax+ebx+color6]
616 pcmpeqw mm6, mm5
617 pcmpeqw mm1, mm5
618 pcmpeqw mm4, mm2
619 pcmpeqw mm0, mm5
620 pcmpeqw mm4, mm7
621 pcmpeqw mm0, mm7
622 pand mm0, mm4
623 pand mm6, mm1
624 pand mm0, mm6
626 movq mm4, [eax+ebx+color2]
627 movq mm5, [eax+ebx+ebx+color5]
628 movq mm6, [eax+ebx+ebx+color3]
630 pcmpeqw mm5, mm4
631 pcmpeqw mm2, mm4
632 pcmpeqw mm1, mm6
633 pcmpeqw mm3, mm4
634 pcmpeqw mm1, mm7
635 pcmpeqw mm3, mm7
636 pand mm2, mm5
637 pand mm1, mm3
638 pand mm1, mm2
640 movq mm2, mm0
641 movq mm7, [I2333Pixel]
642 movq mm6, [I2223Pixel]
643 movq mm5, [I23Pixel]
644 movq mm4, [Mask35]
645 movq mm3, [Mask26]
647 por mm2, mm4
648 pand mm4, [eax+ebx+ebx+color3]
649 por mm2, mm3
650 pand mm3, [eax+ebx+ebx+color2]
651 por mm2, mm1
652 pand mm0, mm7
653 pand mm1, mm6
654 pxor mm7, mm7
655 pcmpeqw mm2, mm7
656 por mm0, mm1
657 por mm3, mm4
658 pand mm2, mm5
659 por mm0, mm3
660 por mm0, mm2
661 movq [final2b], mm0
663 ;-----------------------------------
666 pxor mm7, mm7
667 movq mm0, [eax+colorB0]
668 movq mm1, [eax+colorB1]
669 movq mm2, [eax+colorB2]
670 movq mm3, [eax+colorB3]
671 movq mm4, [eax+ebx+color5]
672 movq mm5, [eax+ebx+color6]
673 movq mm6, [eax+ebx+ebx+color3]
675 pcmpeqw mm6, mm5
676 pcmpeqw mm1, mm5
677 pcmpeqw mm4, mm2
678 pcmpeqw mm0, mm5
679 pcmpeqw mm4, mm7
680 pcmpeqw mm0, mm7
681 pand mm0, mm4
682 pand mm6, mm1
683 pand mm0, mm6
685 movq mm4, [eax+ebx+color5]
686 movq mm5, [eax+ebx+ebx+color2]
687 movq mm6, [eax+ebx+color6]
689 pcmpeqw mm5, mm4
690 pcmpeqw mm2, mm4
691 pcmpeqw mm1, mm6
692 pcmpeqw mm3, mm4
693 pcmpeqw mm1, mm7
694 pcmpeqw mm3, mm7
695 pand mm2, mm5
696 pand mm1, mm3
697 pand mm1, mm2
699 movq mm2, mm0
700 movq mm7, [I5666Pixel]
701 movq mm6, [I5556Pixel]
702 movq mm5, [I56Pixel]
703 movq mm4, [Mask35]
704 movq mm3, [Mask26]
706 por mm2, mm4
707 pand mm4, [eax+ebx+color5]
708 por mm2, mm3
709 pand mm3, [eax+ebx+color6]
710 por mm2, mm1
711 pand mm0, mm7
712 pand mm1, mm6
713 pxor mm7, mm7
714 pcmpeqw mm2, mm7
715 por mm0, mm1
716 por mm3, mm4
717 pand mm2, mm5
718 por mm0, mm3
719 por mm0, mm2
720 movq [final1b], mm0
722 ;---------
724 movq mm0, [final1a]
725 movq mm4, [final2a]
726 movq mm2, [final1b]
727 movq mm6, [final2b]
729 movq mm1, mm0
730 movq mm5, mm4
733 punpcklwd mm0, mm2
734 punpckhwd mm1, mm2
736 punpcklwd mm4, mm6
737 punpckhwd mm5, mm6
740 %ifdef FAR_POINTER
741 movq [fs:edx], mm0
742 movq [fs:edx+8], mm1
743 push edx
744 add edx, [ebp+dstPitch]
745 movq [fs:edx], mm4
746 movq [fs:edx+8], mm5
747 pop edx
748 %else
749 movq [es:edx], mm0
750 movq [es:edx+8], mm1
751 push edx
752 add edx, [ebp+dstPitch]
753 movq [es:edx], mm4
754 movq [es:edx+8], mm5
755 pop edx
756 %endif
757 .SKIP_PROCESS:
758 mov ecx, [ebp+deltaPtr]
759 add ecx, 8
760 mov [ebp+deltaPtr], ecx
761 add edx, 16
762 add eax, 8
764 pop ecx
765 sub ecx, 4
766 cmp ecx, 0
767 jg near .Loop
769 ; Restore some stuff
770 popad
771 mov esp, ebp
772 pop ebp
773 emms
777 ;-------------------------------------------------------------------------
778 ;-------------------------------------------------------------------------
779 ;-------------------------------------------------------------------------
780 ;-------------------------------------------------------------------------
781 ;-------------------------------------------------------------------------
782 ;-------------------------------------------------------------------------
783 ;-------------------------------------------------------------------------
791 %ifdef __DJGPP__
792 __2xSaISuperEagleLine:
793 %else
794 NEWSYM _2xSaISuperEagleLine
795 %endif
796 ; Store some stuff
797 push ebp
798 mov ebp, esp
799 pushad
801 ; Prepare the destination
802 %ifdef __DJGPP__
803 ; Set the selector
804 mov eax, [ebp+dstSegment]
805 mov fs, ax
806 %endif
807 mov edx, [ebp+dstOffset] ; edx points to the screen
808 ; Prepare the source
809 ; eax points to colorA
810 mov eax, [ebp+srcPtr]
811 mov ebx, [ebp+srcPitch]
812 mov ecx, [ebp+width]
813 ; eax now points to colorB1
814 sub eax, ebx
816 ; Main Loop
817 .Loop: push ecx
819 ;-----Check Delta------------------
820 mov ecx, [ebp+deltaPtr]
822 movq mm0, [eax+colorB0]
823 movq mm1, [eax+colorB3]
824 movq mm2, [eax+ebx+color4]
825 movq mm3, [eax+ebx+colorS2]
826 movq mm4, [eax+ebx+ebx+color1]
827 movq mm5, [eax+ebx+ebx+colorS1]
828 push eax
829 add eax, ebx
830 movq mm6, [eax+ebx+ebx+colorA0]
831 movq mm7, [eax+ebx+ebx+colorA3]
832 pop eax
834 pcmpeqw mm0, [ecx+2+colorB0]
835 pcmpeqw mm1, [ecx+2+colorB3]
836 pcmpeqw mm2, [ecx+ebx+2+color4]
837 pcmpeqw mm3, [ecx+ebx+2+colorS2]
838 pcmpeqw mm4, [ecx+ebx+ebx+2+color1]
839 pcmpeqw mm5, [ecx+ebx+ebx+2+colorS1]
840 add ecx, ebx
841 pcmpeqw mm6, [ecx+ebx+ebx+2+colorA0]
842 pcmpeqw mm7, [ecx+ebx+ebx+2+colorA3]
843 sub ecx, ebx
846 pand mm0, mm1
847 pand mm2, mm3
848 pand mm4, mm5
849 pand mm6, mm7
850 pand mm0, mm2
851 pand mm4, mm6
852 pxor mm7, mm7
853 pand mm0, mm4
854 movq mm6, [eax+colorB0]
855 pcmpeqw mm7, mm0
857 movq [ecx+2+colorB0], mm6
859 packsswb mm7, mm7
860 movd ecx, mm7
861 test ecx, ecx
862 jz near .SKIP_PROCESS
864 ;End Delta
866 ;---------------------------------
867 movq mm0, [eax+ebx+color5]
868 movq mm1, [eax+ebx+color6]
869 movq mm2, mm0
870 movq mm3, mm1
871 movq mm4, mm0
872 movq mm5, mm1
874 pand mm0, [colorMask]
875 pand mm1, [colorMask]
877 psrlw mm0, 1
878 psrlw mm1, 1
880 pand mm3, [lowPixelMask]
881 paddw mm0, mm1
883 pand mm3, mm2
884 paddw mm0, mm3 ;mm0 contains the interpolated values
885 movq [I56Pixel], mm0
886 movq mm7, mm0
888 ;-------------------
889 movq mm0, mm7
890 movq mm1, mm4 ;5,5,5,6
891 movq mm2, mm0
892 movq mm3, mm1
894 pand mm0, [colorMask]
895 pand mm1, [colorMask]
897 psrlw mm0, 1
898 psrlw mm1, 1
900 pand mm3, [lowPixelMask]
901 paddw mm0, mm1
903 pand mm3, mm2
904 paddw mm0, mm3 ;mm0 contains the interpolated values
905 movq [product1a], mm0
906 ;--------------------
908 movq mm0, mm7
909 movq mm1, mm5 ;6,6,6,5
910 movq mm2, mm0
911 movq mm3, mm1
913 pand mm0, [colorMask]
914 pand mm1, [colorMask]
916 psrlw mm0, 1
917 psrlw mm1, 1
919 pand mm3, [lowPixelMask]
920 paddw mm0, mm1
922 pand mm3, mm2
923 paddw mm0, mm3
924 movq [product1b], mm0
926 ;-------------------------
927 ;-------------------------
928 movq mm0, [eax+ebx+ebx+color2]
929 movq mm1, [eax+ebx+ebx+color3]
930 movq mm2, mm0
931 movq mm3, mm1
932 movq mm4, mm0
933 movq mm5, mm1
935 pand mm0, [colorMask]
936 pand mm1, [colorMask]
938 psrlw mm0, 1
939 psrlw mm1, 1
941 pand mm3, [lowPixelMask]
942 paddw mm0, mm1
944 pand mm3, mm2
945 paddw mm0, mm3
946 movq [I23Pixel], mm0
947 movq mm7, mm0
949 ;---------------------
950 movq mm0, mm7
951 movq mm1, mm4 ;2,2,2,3
952 movq mm2, mm0
953 movq mm3, mm1
955 pand mm0, [colorMask]
956 pand mm1, [colorMask]
958 psrlw mm0, 1
959 psrlw mm1, 1
961 pand mm3, [lowPixelMask]
962 paddw mm0, mm1
964 pand mm3, mm2
965 paddw mm0, mm3
966 movq [product2a], mm0
968 ;----------------------
969 movq mm0, mm7
970 movq mm1, mm5 ;3,3,3,2
971 movq mm2, mm0
972 movq mm3, mm1
974 pand mm0, [colorMask]
975 pand mm1, [colorMask]
977 psrlw mm0, 1
978 psrlw mm1, 1
980 pand mm3, [lowPixelMask]
981 paddw mm0, mm1
983 pand mm3, mm2
984 paddw mm0, mm3
985 movq [product2b], mm0
988 ;////////////////////////////////
989 ; Decide which "branch" to take
990 ;--------------------------------
991 movq mm4, [eax+ebx+color5]
992 movq mm5, [eax+ebx+color6]
993 movq mm6, [eax+ebx+ebx+color3]
994 movq mm7, [eax+ebx+ebx+color2]
996 pxor mm3, mm3
997 movq mm0, mm4
998 movq mm1, mm5
1000 pcmpeqw mm0, mm6
1001 pcmpeqw mm1, mm7
1002 pcmpeqw mm1, mm3
1003 pand mm0, mm1
1004 movq [Mask35], mm0
1006 movq mm0, [eax+ebx+ebx+colorS1]
1007 movq mm1, [eax+ebx+color4]
1008 push eax
1009 add eax, ebx
1010 movq mm2, [eax+ebx+ebx+colorA2]
1011 pop eax
1012 movq mm3, [eax+colorB1]
1013 pcmpeqw mm0, mm4
1014 pcmpeqw mm1, mm4
1015 pcmpeqw mm2, mm4
1016 pcmpeqw mm3, mm4
1017 pand mm0, mm1
1018 pand mm2, mm3
1019 por mm0, mm2
1020 pand mm0, [Mask35]
1021 movq [Mask35b], mm0
1023 ;-----------
1024 pxor mm3, mm3
1025 movq mm0, mm4
1026 movq mm1, mm5
1028 pcmpeqw mm0, mm6
1029 pcmpeqw mm1, mm7
1030 pcmpeqw mm0, mm3
1031 pand mm0, mm1
1032 movq [Mask26], mm0
1034 movq mm0, [eax+ebx+ebx+color1]
1035 movq mm1, [eax+ebx+colorS2]
1036 push eax
1037 add eax, ebx
1038 movq mm2, [eax+ebx+ebx+colorA1]
1039 pop eax
1040 movq mm3, [eax+colorB2]
1041 pcmpeqw mm0, mm5
1042 pcmpeqw mm1, mm5
1043 pcmpeqw mm2, mm5
1044 pcmpeqw mm3, mm5
1045 pand mm0, mm1
1046 pand mm2, mm3
1047 por mm0, mm2
1048 pand mm0, [Mask26]
1049 movq [Mask26b], mm0
1051 ;--------------------
1052 movq mm0, mm4
1053 movq mm1, mm5
1054 movq mm2, mm0
1056 pcmpeqw mm2, mm1
1057 pcmpeqw mm0, mm6
1058 pcmpeqw mm1, mm7
1059 pand mm0, mm1
1060 pand mm2, mm0
1061 pxor mm0, mm2
1062 movq mm7, mm0
1064 ;------------------
1065 packsswb mm7, mm7
1066 movd ecx, mm7
1067 test ecx, ecx
1068 jz near .SKIP_GUESS
1070 ;---------------------------------------------
1071 ; Map of the pixels: I|E F|J
1072 ; G|A B|K
1073 ; H|C D|L
1074 ; M|N O|P
1075 movq mm6, mm0
1076 movq mm4, [eax+ebx+color5]
1077 movq mm5, [eax+ebx+color6]
1078 pxor mm7, mm7
1079 pand mm6, [ONE]
1081 movq mm0, [eax+colorB1]
1082 movq mm1, [eax+ebx+color4]
1083 movq mm2, mm0
1084 movq mm3, mm1
1085 pcmpeqw mm0, mm4
1086 pcmpeqw mm1, mm4
1087 pcmpeqw mm2, mm5
1088 pcmpeqw mm3, mm5
1089 pand mm0, mm6
1090 pand mm1, mm6
1091 pand mm2, mm6
1092 pand mm3, mm6
1093 paddw mm0, mm1
1094 paddw mm2, mm3
1096 pxor mm3, mm3
1097 pcmpgtw mm0, mm6
1098 pcmpgtw mm2, mm6
1099 pcmpeqw mm0, mm3
1100 pcmpeqw mm2, mm3
1101 pand mm0, mm6
1102 pand mm2, mm6
1103 paddw mm7, mm0
1104 psubw mm7, mm2
1106 movq mm0, [eax+colorB2]
1107 movq mm1, [eax+ebx+colorS2]
1108 movq mm2, mm0
1109 movq mm3, mm1
1110 pcmpeqw mm0, mm4
1111 pcmpeqw mm1, mm4
1112 pcmpeqw mm2, mm5
1113 pcmpeqw mm3, mm5
1114 pand mm0, mm6
1115 pand mm1, mm6
1116 pand mm2, mm6
1117 pand mm3, mm6
1118 paddw mm0, mm1
1119 paddw mm2, mm3
1121 pxor mm3, mm3
1122 pcmpgtw mm0, mm6
1123 pcmpgtw mm2, mm6
1124 pcmpeqw mm0, mm3
1125 pcmpeqw mm2, mm3
1126 pand mm0, mm6
1127 pand mm2, mm6
1128 paddw mm7, mm0
1129 psubw mm7, mm2
1131 push eax
1132 add eax, ebx
1133 movq mm0, [eax+ebx+color1]
1134 movq mm1, [eax+ebx+ebx+colorA1]
1135 movq mm2, mm0
1136 movq mm3, mm1
1137 pcmpeqw mm0, mm4
1138 pcmpeqw mm1, mm4
1139 pcmpeqw mm2, mm5
1140 pcmpeqw mm3, mm5
1141 pand mm0, mm6
1142 pand mm1, mm6
1143 pand mm2, mm6
1144 pand mm3, mm6
1145 paddw mm0, mm1
1146 paddw mm2, mm3
1148 pxor mm3, mm3
1149 pcmpgtw mm0, mm6
1150 pcmpgtw mm2, mm6
1151 pcmpeqw mm0, mm3
1152 pcmpeqw mm2, mm3
1153 pand mm0, mm6
1154 pand mm2, mm6
1155 paddw mm7, mm0
1156 psubw mm7, mm2
1158 movq mm0, [eax+ebx+colorS1]
1159 movq mm1, [eax+ebx+ebx+colorA2]
1160 movq mm2, mm0
1161 movq mm3, mm1
1162 pcmpeqw mm0, mm4
1163 pcmpeqw mm1, mm4
1164 pcmpeqw mm2, mm5
1165 pcmpeqw mm3, mm5
1166 pand mm0, mm6
1167 pand mm1, mm6
1168 pand mm2, mm6
1169 pand mm3, mm6
1170 paddw mm0, mm1
1171 paddw mm2, mm3
1173 pxor mm3, mm3
1174 pcmpgtw mm0, mm6
1175 pcmpgtw mm2, mm6
1176 pcmpeqw mm0, mm3
1177 pcmpeqw mm2, mm3
1178 pand mm0, mm6
1179 pand mm2, mm6
1180 paddw mm7, mm0
1181 psubw mm7, mm2
1183 pop eax
1184 movq mm1, mm7
1185 pxor mm0, mm0
1186 pcmpgtw mm7, mm0
1187 pcmpgtw mm0, mm1
1189 por mm7, [Mask35]
1190 por mm1, [Mask26]
1191 movq [Mask35], mm7
1192 movq [Mask26], mm1
1194 .SKIP_GUESS:
1195 ;Start the ASSEMBLY !!!
1197 movq mm4, [Mask35]
1198 movq mm5, [Mask26]
1199 movq mm6, [Mask35b]
1200 movq mm7, [Mask26b]
1202 movq mm0, [eax+ebx+color5]
1203 movq mm1, [eax+ebx+color6]
1204 movq mm2, [eax+ebx+ebx+color2]
1205 movq mm3, [eax+ebx+ebx+color3]
1206 pcmpeqw mm0, mm2
1207 pcmpeqw mm1, mm3
1208 movq mm2, mm4
1209 movq mm3, mm5
1210 por mm0, mm1
1211 por mm2, mm3
1212 pand mm2, mm0
1213 pxor mm0, mm2
1214 movq mm3, mm0
1216 movq mm2, mm0
1217 pxor mm0, mm0
1218 por mm2, mm4
1219 pxor mm4, mm6
1220 por mm2, mm5
1221 pxor mm5, mm7
1222 pcmpeqw mm2, mm0
1223 ;----------------
1225 movq mm0, [eax+ebx+color5]
1226 movq mm1, mm3
1227 por mm1, mm4
1228 por mm1, mm6
1229 pand mm0, mm1
1230 movq mm1, mm5
1231 pand mm1, [I56Pixel]
1232 por mm0, mm1
1233 movq mm1, mm7
1234 pand mm1, [product1b]
1235 por mm0, mm1
1236 movq mm1, mm2
1237 pand mm1, [product1a]
1238 por mm0, mm1
1239 movq [final1a], mm0
1241 movq mm0, [eax+ebx+color6]
1242 movq mm1, mm3
1243 por mm1, mm5
1244 por mm1, mm7
1245 pand mm0, mm1
1246 movq mm1, mm4
1247 pand mm1, [I56Pixel]
1248 por mm0, mm1
1249 movq mm1, mm6
1250 pand mm1, [product1a]
1251 por mm0, mm1
1252 movq mm1, mm2
1253 pand mm1, [product1b]
1254 por mm0, mm1
1255 movq [final1b], mm0
1257 movq mm0, [eax+ebx+ebx+color2]
1258 movq mm1, mm3
1259 por mm1, mm5
1260 por mm1, mm7
1261 pand mm0, mm1
1262 movq mm1, mm4
1263 pand mm1, [I23Pixel]
1264 por mm0, mm1
1265 movq mm1, mm6
1266 pand mm1, [product2b]
1267 por mm0, mm1
1268 movq mm1, mm2
1269 pand mm1, [product2a]
1270 por mm0, mm1
1271 movq [final2a], mm0
1273 movq mm0, [eax+ebx+ebx+color3]
1274 movq mm1, mm3
1275 por mm1, mm4
1276 por mm1, mm6
1277 pand mm0, mm1
1278 movq mm1, mm5
1279 pand mm1, [I23Pixel]
1280 por mm0, mm1
1281 movq mm1, mm7
1282 pand mm1, [product2a]
1283 por mm0, mm1
1284 movq mm1, mm2
1285 pand mm1, [product2b]
1286 por mm0, mm1
1287 movq [final2b], mm0
1290 movq mm0, [final1a]
1291 movq mm2, [final1b]
1292 movq mm1, mm0
1293 movq mm4, [final2a]
1294 movq mm6, [final2b]
1295 movq mm5, mm4
1296 punpcklwd mm0, mm2
1297 punpckhwd mm1, mm2
1298 punpcklwd mm4, mm6
1299 punpckhwd mm5, mm6
1304 %ifdef __DJGPP__
1305 movq [fs:edx], mm0
1306 movq [fs:edx+8], mm1
1307 push edx
1308 add edx, [ebp+dstPitch]
1309 movq [fs:edx], mm4
1310 movq [fs:edx+8], mm5
1311 pop edx
1312 %else
1313 movq [es:edx], mm0
1314 movq [es:edx+8], mm1
1315 push edx
1316 add edx, [ebp+dstPitch]
1317 movq [es:edx], mm4
1318 movq [es:edx+8], mm5
1319 pop edx
1320 %endif
1321 .SKIP_PROCESS:
1322 mov ecx, [ebp+deltaPtr]
1323 add ecx, 8
1324 mov [ebp+deltaPtr], ecx
1325 add edx, 16
1326 add eax, 8
1328 pop ecx
1329 sub ecx, 4
1330 cmp ecx, 0
1331 jg near .Loop
1333 ; Restore some stuff
1334 popad
1335 mov esp, ebp
1336 pop ebp
1337 emms
1341 ;-------------------------------------------------------------------------
1342 ;-------------------------------------------------------------------------
1343 ;-------------------------------------------------------------------------
1344 ;-------------------------------------------------------------------------
1345 ;-------------------------------------------------------------------------
1346 ;-------------------------------------------------------------------------
1347 ;-------------------------------------------------------------------------
1350 ;This is 2xSaI
1351 colorI equ -2
1352 colorE equ 0
1353 colorF equ 2
1354 colorJ equ 4
1356 colorG equ -2
1357 colorA equ 0
1358 colorB equ 2
1359 colorK equ 4
1361 colorH equ -2
1362 colorC equ 0
1363 colorD equ 2
1364 colorL equ 4
1366 colorM equ -2
1367 colorN equ 0
1368 colorO equ 2
1369 colorP equ 4
1371 %ifdef __DJGPP__
1372 __2xSaILine:
1373 %else
1374 NEWSYM _2xSaILine
1375 %endif
1376 ; Store some stuff
1377 push ebp
1378 mov ebp, esp
1379 pushad
1381 ; Prepare the destination
1382 %ifdef __DJGPP__
1383 ; Set the selector
1384 mov eax, [ebp+dstSegment]
1385 mov fs, ax
1386 %endif
1387 mov edx, [ebp+dstOffset] ; edx points to the screen
1388 ; Prepare the source
1389 ; eax points to colorA
1390 mov eax, [ebp+srcPtr]
1391 mov ebx, [ebp+srcPitch]
1392 mov ecx, [ebp+width]
1393 ; eax now points to colorE
1394 sub eax, ebx
1397 ; Main Loop
1398 .Loop: push ecx
1400 ;-----Check Delta------------------
1401 mov ecx, [ebp+deltaPtr]
1403 movq mm0, [eax+colorI]
1404 movq mm1, [eax+colorJ]
1405 movq mm2, [eax+ebx+colorG]
1406 movq mm3, [eax+ebx+colorK]
1407 movq mm4, [eax+ebx+ebx+colorH]
1408 movq mm5, [eax+ebx+ebx+colorL]
1409 push eax
1410 add eax, ebx
1411 movq mm6, [eax+ebx+ebx+colorM]
1412 movq mm7, [eax+ebx+ebx+colorP]
1413 pop eax
1415 pcmpeqw mm0, [ecx+2+colorI]
1416 pcmpeqw mm1, [ecx+2+colorK]
1417 pcmpeqw mm2, [ecx+ebx+2+colorG]
1418 pcmpeqw mm3, [ecx+ebx+2+colorK]
1419 pcmpeqw mm4, [ecx+ebx+ebx+2+colorH]
1420 pcmpeqw mm5, [ecx+ebx+ebx+2+colorL]
1421 add ecx, ebx
1422 pcmpeqw mm6, [ecx+ebx+ebx+2+colorM]
1423 pcmpeqw mm7, [ecx+ebx+ebx+2+colorP]
1424 sub ecx, ebx
1427 pand mm0, mm1
1428 pand mm2, mm3
1429 pand mm4, mm5
1430 pand mm6, mm7
1431 pand mm0, mm2
1432 pand mm4, mm6
1433 pxor mm7, mm7
1434 pand mm0, mm4
1435 movq mm6, [eax+colorI]
1436 pcmpeqw mm7, mm0
1438 movq [ecx+2+colorI], mm6
1440 packsswb mm7, mm7
1441 movd ecx, mm7
1442 test ecx, ecx
1443 jz near .SKIP_PROCESS
1445 ;End Delta
1447 ;---------------------------------
1451 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
1452 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
1453 movq mm2, [eax+ebx+colorB] ;mm2 and mm3 contain colorB
1455 movq mm1, mm0
1456 movq mm3, mm2
1458 pcmpeqw mm0, [eax+ebx+ebx+colorD]
1459 pcmpeqw mm1, [eax+colorE]
1460 pcmpeqw mm2, [eax+ebx+ebx+colorL]
1461 pcmpeqw mm3, [eax+ebx+ebx+colorC]
1463 pand mm0, mm1
1464 pxor mm1, mm1
1465 pand mm0, mm2
1466 pcmpeqw mm3, mm1
1467 pand mm0, mm3 ;result in mm0
1469 ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
1470 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
1471 movq mm6, [eax+ebx+colorB] ;mm6 and mm7 contain colorB
1472 movq mm5, mm4
1473 movq mm7, mm6
1475 pcmpeqw mm4, [eax+ebx+ebx+colorC]
1476 pcmpeqw mm5, [eax+colorF]
1477 pcmpeqw mm6, [eax+colorJ]
1478 pcmpeqw mm7, [eax+colorE]
1480 pand mm4, mm5
1481 pxor mm5, mm5
1482 pand mm4, mm6
1483 pcmpeqw mm7, mm5
1484 pand mm4, mm7 ;result in mm4
1486 por mm0, mm4 ;combine the masks
1487 movq [Mask1], mm0
1489 ;--------------------------------------------
1492 ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
1493 movq mm0, [eax+ebx+colorB] ;mm0 and mm1 contain colorB
1494 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
1495 movq mm1, mm0
1496 movq mm3, mm2
1498 pcmpeqw mm0, [eax+ebx+ebx+colorC]
1499 pcmpeqw mm1, [eax+colorF]
1500 pcmpeqw mm2, [eax+ebx+ebx+colorH]
1501 pcmpeqw mm3, [eax+ebx+ebx+colorD]
1503 pand mm0, mm1
1504 pxor mm1, mm1
1505 pand mm0, mm2
1506 pcmpeqw mm3, mm1
1507 pand mm0, mm3 ;result in mm0
1509 ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
1510 movq mm4, [eax+ebx+colorB] ;mm4 and mm5 contain colorB
1511 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
1512 movq mm5, mm4
1513 movq mm7, mm6
1515 pcmpeqw mm4, [eax+ebx+ebx+colorD]
1516 pcmpeqw mm5, [eax+colorE]
1517 pcmpeqw mm6, [eax+colorI]
1518 pcmpeqw mm7, [eax+colorF]
1520 pand mm4, mm5
1521 pxor mm5, mm5
1522 pand mm4, mm6
1523 pcmpeqw mm7, mm5
1524 pand mm4, mm7 ;result in mm4
1526 por mm0, mm4 ;combine the masks
1527 movq [Mask2], mm0
1530 ;interpolate colorA and colorB
1531 movq mm0, [eax+ebx+colorA]
1532 movq mm1, [eax+ebx+colorB]
1534 movq mm2, mm0
1535 movq mm3, mm1
1537 pand mm0, [colorMask]
1538 pand mm1, [colorMask]
1540 psrlw mm0, 1
1541 psrlw mm1, 1
1543 pand mm3, [lowPixelMask]
1544 paddw mm0, mm1
1546 pand mm3, mm2
1547 paddw mm0, mm3 ;mm0 contains the interpolated values
1549 ;assemble the pixels
1550 movq mm1, [eax+ebx+colorA]
1551 movq mm2, [eax+ebx+colorB]
1553 movq mm3, [Mask1]
1554 movq mm5, mm1
1555 movq mm4, [Mask2]
1556 movq mm6, mm1
1558 pand mm1, mm3
1559 por mm3, mm4
1560 pxor mm7, mm7
1561 pand mm2, mm4
1563 pcmpeqw mm3, mm7
1564 por mm1, mm2
1565 pand mm0, mm3
1567 por mm0, mm1
1569 punpcklwd mm5, mm0
1570 punpckhwd mm6, mm0
1572 %ifdef __DJGPP__
1573 movq [fs:edx], mm5
1574 movq [fs:edx+8], mm6
1575 %else
1576 movq [es:edx], mm5
1577 movq [es:edx+8], mm6
1578 %endif
1580 ;------------------------------------------------
1581 ; Create the Nextline
1582 ;------------------------------------------------
1583 ;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
1584 movq mm0, [eax+ebx+colorA] ;mm0 and mm1 contain colorA
1585 movq mm2, [eax+ebx+ebx+colorC] ;mm2 and mm3 contain colorC
1586 movq mm1, mm0
1587 movq mm3, mm2
1589 push eax
1590 add eax, ebx
1591 pcmpeqw mm0, [eax+ebx+colorD]
1592 pcmpeqw mm1, [eax+colorG]
1593 pcmpeqw mm2, [eax+ebx+ebx+colorO]
1594 pcmpeqw mm3, [eax+colorB]
1595 pop eax
1597 pand mm0, mm1
1598 pxor mm1, mm1
1599 pand mm0, mm2
1600 pcmpeqw mm3, mm1
1601 pand mm0, mm3 ;result in mm0
1603 ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
1604 movq mm4, [eax+ebx+colorA] ;mm4 and mm5 contain colorA
1605 movq mm6, [eax+ebx+ebx+colorC] ;mm6 and mm7 contain colorC
1606 movq mm5, mm4
1607 movq mm7, mm6
1609 push eax
1610 add eax, ebx
1611 pcmpeqw mm4, [eax+ebx+colorH]
1612 pcmpeqw mm5, [eax+colorB]
1613 pcmpeqw mm6, [eax+ebx+ebx+colorM]
1614 pcmpeqw mm7, [eax+colorG]
1615 pop eax
1617 pand mm4, mm5
1618 pxor mm5, mm5
1619 pand mm4, mm6
1620 pcmpeqw mm7, mm5
1621 pand mm4, mm7 ;result in mm4
1623 por mm0, mm4 ;combine the masks
1624 movq [Mask1], mm0
1625 ;--------------------------------------------
1628 ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
1629 movq mm0, [eax+ebx+ebx+colorC] ;mm0 and mm1 contain colorC
1630 movq mm2, [eax+ebx+colorA] ;mm2 and mm3 contain colorA
1631 movq mm1, mm0
1632 movq mm3, mm2
1634 pcmpeqw mm0, [eax+ebx+colorB]
1635 pcmpeqw mm1, [eax+ebx+ebx+colorH]
1636 pcmpeqw mm2, [eax+colorF]
1637 pcmpeqw mm3, [eax+ebx+ebx+colorD]
1639 pand mm0, mm1
1640 pxor mm1, mm1
1641 pand mm0, mm2
1642 pcmpeqw mm3, mm1
1643 pand mm0, mm3 ;result in mm0
1645 ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
1646 movq mm4, [eax+ebx+ebx+colorC] ;mm4 and mm5 contain colorC
1647 movq mm6, [eax+ebx+colorA] ;mm6 and mm7 contain colorA
1648 movq mm5, mm4
1649 movq mm7, mm6
1651 pcmpeqw mm4, [eax+ebx+ebx+colorD]
1652 pcmpeqw mm5, [eax+ebx+colorG]
1653 pcmpeqw mm6, [eax+colorI]
1654 pcmpeqw mm7, [eax+ebx+ebx+colorH]
1656 pand mm4, mm5
1657 pxor mm5, mm5
1658 pand mm4, mm6
1659 pcmpeqw mm7, mm5
1660 pand mm4, mm7 ;result in mm4
1662 por mm0, mm4 ;combine the masks
1663 movq [Mask2], mm0
1664 ;----------------------------------------------
1666 ;interpolate colorA and colorC
1667 movq mm0, [eax+ebx+colorA]
1668 movq mm1, [eax+ebx+ebx+colorC]
1670 movq mm2, mm0
1671 movq mm3, mm1
1673 pand mm0, [colorMask]
1674 pand mm1, [colorMask]
1676 psrlw mm0, 1
1677 psrlw mm1, 1
1679 pand mm3, [lowPixelMask]
1680 paddw mm0, mm1
1682 pand mm3, mm2
1683 paddw mm0, mm3 ;mm0 contains the interpolated values
1684 ;-------------
1686 ;assemble the pixels
1687 movq mm1, [eax+ebx+colorA]
1688 movq mm2, [eax+ebx+ebx+colorC]
1690 movq mm3, [Mask1]
1691 movq mm4, [Mask2]
1693 pand mm1, mm3
1694 pand mm2, mm4
1696 por mm3, mm4
1697 pxor mm7, mm7
1698 por mm1, mm2
1700 pcmpeqw mm3, mm7
1701 pand mm0, mm3
1702 por mm0, mm1
1703 movq [ACPixel], mm0
1705 ;////////////////////////////////
1706 ; Decide which "branch" to take
1707 ;--------------------------------
1708 movq mm0, [eax+ebx+colorA]
1709 movq mm1, [eax+ebx+colorB]
1710 movq mm6, mm0
1711 movq mm7, mm1
1712 pcmpeqw mm0, [eax+ebx+ebx+colorD]
1713 pcmpeqw mm1, [eax+ebx+ebx+colorC]
1714 pcmpeqw mm6, mm7
1716 movq mm2, mm0
1717 movq mm3, mm0
1719 pand mm0, mm1 ;colorA == colorD && colorB == colorC
1720 pxor mm7, mm7
1722 pcmpeqw mm2, mm7
1723 pand mm6, mm0
1724 pand mm2, mm1 ;colorA != colorD && colorB == colorC
1726 pcmpeqw mm1, mm7
1728 pand mm1, mm3 ;colorA == colorD && colorB != colorC
1729 pxor mm0, mm6
1730 por mm1, mm6
1731 movq mm7, mm0
1732 movq [Mask2], mm2
1733 packsswb mm7, mm7
1734 movq [Mask1], mm1
1736 movd ecx, mm7
1737 test ecx, ecx
1738 jz near .SKIP_GUESS
1739 ;---------------------------------------------
1740 ; Map of the pixels: I|E F|J
1741 ; G|A B|K
1742 ; H|C D|L
1743 ; M|N O|P
1744 movq mm6, mm0
1745 movq mm4, [eax+ebx+colorA]
1746 movq mm5, [eax+ebx+colorB]
1747 pxor mm7, mm7
1748 pand mm6, [ONE]
1750 movq mm0, [eax+colorE]
1751 movq mm1, [eax+ebx+colorG]
1752 movq mm2, mm0
1753 movq mm3, mm1
1754 pcmpeqw mm0, mm4
1755 pcmpeqw mm1, mm4
1756 pcmpeqw mm2, mm5
1757 pcmpeqw mm3, mm5
1758 pand mm0, mm6
1759 pand mm1, mm6
1760 pand mm2, mm6
1761 pand mm3, mm6
1762 paddw mm0, mm1
1763 paddw mm2, mm3
1765 pxor mm3, mm3
1766 pcmpgtw mm0, mm6
1767 pcmpgtw mm2, mm6
1768 pcmpeqw mm0, mm3
1769 pcmpeqw mm2, mm3
1770 pand mm0, mm6
1771 pand mm2, mm6
1772 paddw mm7, mm0
1773 psubw mm7, mm2
1775 movq mm0, [eax+colorF]
1776 movq mm1, [eax+ebx+colorK]
1777 movq mm2, mm0
1778 movq mm3, mm1
1779 pcmpeqw mm0, mm4
1780 pcmpeqw mm1, mm4
1781 pcmpeqw mm2, mm5
1782 pcmpeqw mm3, mm5
1783 pand mm0, mm6
1784 pand mm1, mm6
1785 pand mm2, mm6
1786 pand mm3, mm6
1787 paddw mm0, mm1
1788 paddw mm2, mm3
1790 pxor mm3, mm3
1791 pcmpgtw mm0, mm6
1792 pcmpgtw mm2, mm6
1793 pcmpeqw mm0, mm3
1794 pcmpeqw mm2, mm3
1795 pand mm0, mm6
1796 pand mm2, mm6
1797 paddw mm7, mm0
1798 psubw mm7, mm2
1800 push eax
1801 add eax, ebx
1802 movq mm0, [eax+ebx+colorH]
1803 movq mm1, [eax+ebx+ebx+colorN]
1804 movq mm2, mm0
1805 movq mm3, mm1
1806 pcmpeqw mm0, mm4
1807 pcmpeqw mm1, mm4
1808 pcmpeqw mm2, mm5
1809 pcmpeqw mm3, mm5
1810 pand mm0, mm6
1811 pand mm1, mm6
1812 pand mm2, mm6
1813 pand mm3, mm6
1814 paddw mm0, mm1
1815 paddw mm2, mm3
1817 pxor mm3, mm3
1818 pcmpgtw mm0, mm6
1819 pcmpgtw mm2, mm6
1820 pcmpeqw mm0, mm3
1821 pcmpeqw mm2, mm3
1822 pand mm0, mm6
1823 pand mm2, mm6
1824 paddw mm7, mm0
1825 psubw mm7, mm2
1827 movq mm0, [eax+ebx+colorL]
1828 movq mm1, [eax+ebx+ebx+colorO]
1829 movq mm2, mm0
1830 movq mm3, mm1
1831 pcmpeqw mm0, mm4
1832 pcmpeqw mm1, mm4
1833 pcmpeqw mm2, mm5
1834 pcmpeqw mm3, mm5
1835 pand mm0, mm6
1836 pand mm1, mm6
1837 pand mm2, mm6
1838 pand mm3, mm6
1839 paddw mm0, mm1
1840 paddw mm2, mm3
1842 pxor mm3, mm3
1843 pcmpgtw mm0, mm6
1844 pcmpgtw mm2, mm6
1845 pcmpeqw mm0, mm3
1846 pcmpeqw mm2, mm3
1847 pand mm0, mm6
1848 pand mm2, mm6
1849 paddw mm7, mm0
1850 psubw mm7, mm2
1852 pop eax
1853 movq mm1, mm7
1854 pxor mm0, mm0
1855 pcmpgtw mm7, mm0
1856 pcmpgtw mm0, mm1
1858 por mm7, [Mask1]
1859 por mm1, [Mask2]
1860 movq [Mask1], mm7
1861 movq [Mask2], mm1
1863 .SKIP_GUESS:
1864 ;----------------------------
1865 ;interpolate A, B, C and D
1866 movq mm0, [eax+ebx+colorA]
1867 movq mm1, [eax+ebx+colorB]
1868 movq mm4, mm0
1869 movq mm2, [eax+ebx+ebx+colorC]
1870 movq mm5, mm1
1871 movq mm3, [qcolorMask]
1872 movq mm6, mm2
1873 movq mm7, [qlowpixelMask]
1875 pand mm0, mm3
1876 pand mm1, mm3
1877 pand mm2, mm3
1878 pand mm3, [eax+ebx+ebx+colorD]
1880 psrlw mm0, 2
1881 pand mm4, mm7
1882 psrlw mm1, 2
1883 pand mm5, mm7
1884 psrlw mm2, 2
1885 pand mm6, mm7
1886 psrlw mm3, 2
1887 pand mm7, [eax+ebx+ebx+colorD]
1889 paddw mm0, mm1
1890 paddw mm2, mm3
1892 paddw mm4, mm5
1893 paddw mm6, mm7
1895 paddw mm4, mm6
1896 paddw mm0, mm2
1897 psrlw mm4, 2
1898 pand mm4, [qlowpixelMask]
1899 paddw mm0, mm4 ;mm0 contains the interpolated value of A, B, C and D
1901 ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
1902 ;assemble the pixels
1903 movq mm1, [Mask1]
1904 movq mm2, [Mask2]
1905 movq mm4, [eax+ebx+colorA]
1906 movq mm5, [eax+ebx+colorB]
1907 pand mm4, mm1
1908 pand mm5, mm2
1910 pxor mm7, mm7
1911 por mm1, mm2
1912 por mm4, mm5
1913 pcmpeqw mm1, mm7
1914 pand mm0, mm1
1915 por mm4, mm0 ;mm4 contains the diagonal pixels
1917 movq mm0, [ACPixel]
1918 movq mm1, mm0
1919 punpcklwd mm0, mm4
1920 punpckhwd mm1, mm4
1922 push edx
1923 add edx, [ebp+dstPitch]
1925 %ifdef __DJGPP__
1926 movq [fs:edx], mm0
1927 movq [fs:edx+8], mm1
1928 %else
1929 movq [es:edx], mm0
1930 movq [es:edx+8], mm1
1931 %endif
1932 pop edx
1934 .SKIP_PROCESS:
1935 mov ecx, [ebp+deltaPtr]
1936 add ecx, 8
1937 mov [ebp+deltaPtr], ecx
1938 add edx, 16
1939 add eax, 8
1941 pop ecx
1942 sub ecx, 4
1943 cmp ecx, 0
1944 jg near .Loop
1946 ; Restore some stuff
1947 popad
1948 mov esp, ebp
1949 pop ebp
1950 emms
1953 ;-------------------------------------------------------------------------
1954 ;-------------------------------------------------------------------------
1955 ;-------------------------------------------------------------------------
1956 ;-------------------------------------------------------------------------
1957 ;-------------------------------------------------------------------------
1958 ;-------------------------------------------------------------------------
1959 ;-------------------------------------------------------------------------
1961 %ifdef __DJGPP__
1962 _Init_2xSaIMMX:
1963 %else
1964 NEWSYM Init_2xSaIMMX
1965 %endif
1966 ; Store some stuff
1967 push ebp
1968 mov ebp, esp
1969 push edx
1972 ;Damn thing doesn't work
1973 ; mov eax,1
1974 ; cpuid
1975 ; test edx, 0x00800000 ;test bit 23
1976 ; jz end2 ;bit not set => no MMX detected
1978 mov eax, [ebp+8] ;PixelFormat
1979 cmp eax, 555
1980 jz Bits555
1981 cmp eax, 565
1982 jz Bits565
1983 end2:
1984 mov eax, 1
1985 jmp end
1986 Bits555:
1987 mov edx, 0x7BDE7BDE
1988 mov eax, colorMask
1989 mov [eax], edx
1990 mov [eax+4], edx
1991 mov edx, 0x04210421
1992 mov eax, lowPixelMask
1993 mov [eax], edx
1994 mov [eax+4], edx
1995 mov edx, 0x739C739C
1996 mov eax, qcolorMask
1997 mov [eax], edx
1998 mov [eax+4], edx
1999 mov edx, 0x0C630C63
2000 mov eax, qlowpixelMask
2001 mov [eax], edx
2002 mov [eax+4], edx
2003 mov eax, 0
2004 jmp end
2005 Bits565:
2006 mov edx, 0xF7DEF7DE
2007 mov eax, colorMask
2008 mov [eax], edx
2009 mov [eax+4], edx
2010 mov edx, 0x08210821
2011 mov eax, lowPixelMask
2012 mov [eax], edx
2013 mov [eax+4], edx
2014 mov edx, 0xE79CE79C
2015 mov eax, qcolorMask
2016 mov [eax], edx
2017 mov [eax+4], edx
2018 mov edx, 0x18631863
2019 mov eax, qlowpixelMask
2020 mov [eax], edx
2021 mov [eax+4], edx
2022 mov eax, 0
2023 jmp end
2024 end:
2025 pop edx
2026 mov esp, ebp
2027 pop ebp
2031 ;-------------------------------------------------------------------------
2032 ;-------------------------------------------------------------------------
2033 ;-------------------------------------------------------------------------
2034 ;-------------------------------------------------------------------------
2035 ;-------------------------------------------------------------------------
2036 ;-------------------------------------------------------------------------
2037 ;-------------------------------------------------------------------------
2039 SECTION .data ALIGN = 32
2040 ;Some constants
2041 colorMask dd 0xF7DEF7DE, 0xF7DEF7DE
2042 lowPixelMask dd 0x08210821, 0x08210821
2044 qcolorMask dd 0xE79CE79C, 0xE79CE79C
2045 qlowpixelMask dd 0x18631863, 0x18631863
2047 FALSE dd 0x00000000, 0x00000000
2048 TRUE dd 0xffffffff, 0xffffffff
2049 ONE dd 0x00010001, 0x00010001
2052 SECTION .bss ALIGN = 32
2053 ACPixel resb 8
2054 Mask1 resb 8
2055 Mask2 resb 8
2057 I56Pixel resb 8
2058 I23Pixel resb 8
2059 I5556Pixel resb 8
2060 I2223Pixel resb 8
2061 I5666Pixel resb 8
2062 I2333Pixel resb 8
2063 Mask26 resb 8
2064 Mask35 resb 8
2065 Mask26b resb 8
2066 Mask35b resb 8
2067 product1a resb 8
2068 product1b resb 8
2069 product2a resb 8
2070 product2b resb 8
2071 final1a resb 8
2072 final1b resb 8
2073 final2a resb 8
2074 final2b resb 8