1 ;/*---------------------------------------------------------------------*
2 ; * The following (piece of) code, (part of) the 2xSaI engine, *
3 ; * copyright (c) 2001 by Derek Liauw Kie Fa. *
4 ; * Non-Commercial use of the engine is allowed and is encouraged, *
5 ; * provided that appropriate credit be given and that this copyright *
6 ; * notice will not be removed under any circumstance. *
7 ; * You may freely modify this code, but I request *
8 ; * that any improvements to the engine be submitted to me, so *
9 ; * that I can implement these improvements in newer versions of *
11 ; * If you need more information, have any comments or suggestions, *
12 ; * you can e-mail me. My e-mail: DerekL666@yahoo.com *
13 ; *---------------------------------------------------------------------*/
14 ; modified by Spacy to compile with yasm [2006-06-20]
16 ;----------------------
17 ; 2xSaI, Super2xSaI, SuperEagle .. FINAL. no versioning anymore..
18 ;----------------------
23 GLOBAL __2xSaISuperEagleLine
24 GLOBAL __2xSaISuper2xSaILine
28 GLOBAL _2xSaISuperEagleLine
29 GLOBAL _2xSaISuper2xSaILine
32 SECTION .text
ALIGN = 32
35 ;EXTERN_C void __2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
36 ; uint8 *dstPtr, uint32 dstPitch, uint16 dstSegment);
38 ;EXTERN_C void _2xSaILine (uint8 *srcPtr, uint32 srcPitch, uint32 width,
39 ; uint8 *dstPtr, uint32 dstPitch);
84 __2xSaISuper2xSaILine:
86 NEWSYM _2xSaISuper2xSaILine
93 ; Prepare the destination
96 mov eax, [ebp+dstSegment
]
99 mov edx, [ebp+dstOffset
] ; edx points to the screen
101 ; eax points to colorA
102 mov eax, [ebp+srcPtr
] ;eax points to colorA
103 mov ebx, [ebp+srcPitch
] ;ebx contains the source pitch
104 mov ecx, [ebp+width] ;ecx contains the number of pixels to process
105 ; eax now points to colorB1
106 sub eax, ebx ;eax points to B1 which is the base
111 ;-----Check Delta------------------
112 mov ecx, [ebp+deltaPtr
]
116 movq mm0
, [eax+colorB0
]
117 movq mm1
, [eax+colorB3
]
118 movq mm2
, [eax+ebx+color4
]
119 movq mm3
, [eax+ebx+colorS2
]
120 movq mm4
, [eax+ebx+ebx+color1
]
121 movq mm5
, [eax+ebx+ebx+colorS1
]
124 movq mm6
, [eax+ebx+ebx+colorA0
]
125 movq mm7
, [eax+ebx+ebx+colorA3
]
129 pcmpeqw mm0
, [ecx+2+colorB0
]
130 pcmpeqw mm1
, [ecx+2+colorB3
]
131 pcmpeqw mm2
, [ecx+ebx+2+color4
]
132 pcmpeqw mm3
, [ecx+ebx+2+colorS2
]
133 pcmpeqw mm4
, [ecx+ebx+ebx+2+color1
]
134 pcmpeqw mm5
, [ecx+ebx+ebx+2+colorS1
]
136 pcmpeqw mm6
, [ecx+ebx+ebx+2+colorA0
]
137 pcmpeqw mm7
, [ecx+ebx+ebx+2+colorA3
]
150 movq mm6
, [eax+colorB0
]
151 pcmpeqw mm7
, mm0
;did any compare give us a zero ?
153 movq
[ecx+2+colorB0
], mm6
158 jz near .SKIP_PROCESS
;no, so we can skip
162 ;---------------------------------
163 movq mm0
, [eax+ebx+color5
]
164 movq mm1
, [eax+ebx+color6
]
170 pand mm0
, [colorMask
]
171 pand mm1
, [colorMask
]
176 pand mm3
, [lowPixelMask
]
180 paddw mm0
, mm3
;mm0 contains the interpolated values
186 movq mm1
, mm4
;5,5,5,6
190 pand mm0
, [colorMask
]
191 pand mm1
, [colorMask
]
196 pand mm3
, [lowPixelMask
]
200 paddw mm0
, mm3
;mm0 contains the interpolated values
201 movq
[I5556Pixel
], mm0
202 ;--------------------
205 movq mm1
, mm5
;6,6,6,5
209 pand mm0
, [colorMask
]
210 pand mm1
, [colorMask
]
215 pand mm3
, [lowPixelMask
]
220 movq
[I5666Pixel
], mm0
222 ;-------------------------
223 ;-------------------------
224 movq mm0
, [eax+ebx+ebx+color2
]
225 movq mm1
, [eax+ebx+ebx+color3
]
231 pand mm0
, [colorMask
]
232 pand mm1
, [colorMask
]
237 pand mm3
, [lowPixelMask
]
245 ;---------------------
247 movq mm1
, mm4
;2,2,2,3
251 pand mm0
, [colorMask
]
252 pand mm1
, [colorMask
]
257 pand mm3
, [lowPixelMask
]
262 movq
[I2223Pixel
], mm0
264 ;----------------------
266 movq mm1
, mm5
;3,3,3,2
270 pand mm0
, [colorMask
]
271 pand mm1
, [colorMask
]
276 pand mm3
, [lowPixelMask
]
281 movq
[I2333Pixel
], mm0
284 ;--------------------
285 ;////////////////////////////////
286 ; Decide which "branch" to take
287 ;--------------------------------
288 movq mm0
, [eax+ebx+color5
]
289 movq mm1
, [eax+ebx+color6
]
292 pcmpeqw mm0
, [eax+ebx+ebx+color3
]
293 pcmpeqw mm1
, [eax+ebx+ebx+color2
]
299 pand mm0
, mm1
;colorA == colorD && colorB == colorC
304 pand mm2
, mm1
;colorA != colorD && colorB == colorC
308 pand mm1
, mm3
;colorA == colorD && colorB != colorC
320 ;---------------------------------------------
322 movq mm4
, [eax+ebx+colorA
]
323 movq mm5
, [eax+ebx+colorB
]
327 movq mm0
, [eax+colorE
]
328 movq mm1
, [eax+ebx+colorG
]
352 movq mm0
, [eax+colorF
]
353 movq mm1
, [eax+ebx+colorK
]
379 movq mm0
, [eax+ebx+colorH
]
380 movq mm1
, [eax+ebx+ebx+colorN
]
404 movq mm0
, [eax+ebx+colorL
]
405 movq mm1
, [eax+ebx+ebx+colorO
]
442 ;Start the ASSEMBLY !!! eh... compose all the results together to form the final image...
445 movq mm0
, [eax+ebx+color5
]
446 movq mm1
, [eax+ebx+ebx+color2
]
452 pand mm0
, [colorMask
]
453 pand mm1
, [colorMask
]
458 pand mm3
, [lowPixelMask
]
462 paddw mm0
, mm3
;mm0 contains the interpolated values
463 ;---------------------------
467 %ifdef dfhsdfhsdahdsfhdsfh
469 if
(color5
== color3
&& color2
!= color6
&& color4
== color5
&& color5
!= colorA2
)
470 product2a
= INTERPOLATE
(color2
, color5
);
472 if
(color5
== color1
&& color6
== color5
&& color4
!= color2
&& color5
!= colorA0
)
473 product2a
= INTERPOLATE
(color2
, color5
);
477 if
(color2
== color6
&& color5
!= color3
&& color1
== color2
&& color2
!= colorB2
)
478 product1a
= INTERPOLATE
(color2
, color5
);
480 if
(color4
== color2
&& color3
== color2
&& color1
!= color5
&& color2
!= colorB0
)
481 product1a
= INTERPOLATE
(color2
, color5
);
489 movq mm6
, [eax+colorB2
]
490 movq mm5
, [eax+ebx+ebx+color2
]
491 movq mm4
, [eax+ebx+ebx+color1
]
501 movq mm6
, [eax+ebx+ebx+color3
]
502 movq mm5
, [eax+ebx+ebx+color2
]
503 movq mm4
, [eax+ebx+ebx+color1
]
504 movq mm2
, [eax+ebx+color5
]
505 movq mm1
, [eax+ebx+color4
]
506 movq mm3
, [eax+colorB0
]
525 movq mm1
, [eax+ebx+color5
]
528 movq
[final1a
], mm7
;finished 1a
532 ;--------------------------------
537 movq mm6
, [eax+ebx+ebx+colorA2
]
539 movq mm5
, [eax+ebx+color5
]
540 movq mm4
, [eax+ebx+color4
]
550 movq mm6
, [eax+ebx+color6
]
551 movq mm5
, [eax+ebx+color5
]
552 movq mm4
, [eax+ebx+color4
]
553 movq mm2
, [eax+ebx+ebx+color2
]
554 movq mm1
, [eax+ebx+ebx+color1
]
557 movq mm3
, [eax+ebx+ebx+colorA0
]
577 movq mm1
, [eax+ebx+ebx+color2
]
580 movq
[final2a
], mm7
;finished 2a
583 ;--------------------------------------------
586 %ifdef dfhsdfhsdahdsfhdsfh
587 if
(color6
== color3
&& color3
== colorA1
&& color2
!= colorA2
&& color3
!= colorA0
)
588 product2b
= Q_INTERPOLATE
(color3
, color3
, color3
, color2
);
590 if
(color5
== color2
&& color2
== colorA2
&& colorA1
!= color3
&& color2
!= colorA3
)
591 product2b
= Q_INTERPOLATE
(color2
, color2
, color2
, color3
);
593 product2b
= INTERPOLATE
(color2
, color3
);
595 if
(color6
== color3
&& color6
== colorB1
&& color5
!= colorB2
&& color6
!= colorB0
)
596 product1b
= Q_INTERPOLATE
(color6
, color6
, color6
, color5
);
598 if
(color5
== color2
&& color5
== colorB2
&& colorB1
!= color6
&& color5
!= colorB3
)
599 product1b
= Q_INTERPOLATE
(color6
, color5
, color5
, color5
);
601 product1b
= INTERPOLATE
(color5
, color6
);
607 movq mm0
, [eax+ebx+ebx+colorA0
]
608 movq mm1
, [eax+ebx+ebx+colorA1
]
609 movq mm2
, [eax+ebx+ebx+colorA2
]
610 movq mm3
, [eax+ebx+ebx+colorA3
]
612 movq mm4
, [eax+ebx+ebx+color2
]
613 movq mm5
, [eax+ebx+ebx+color3
]
614 movq mm6
, [eax+ebx+color6
]
626 movq mm4
, [eax+ebx+color2
]
627 movq mm5
, [eax+ebx+ebx+color5
]
628 movq mm6
, [eax+ebx+ebx+color3
]
641 movq mm7
, [I2333Pixel
]
642 movq mm6
, [I2223Pixel
]
648 pand mm4
, [eax+ebx+ebx+color3
]
650 pand mm3
, [eax+ebx+ebx+color2
]
663 ;-----------------------------------
667 movq mm0
, [eax+colorB0
]
668 movq mm1
, [eax+colorB1
]
669 movq mm2
, [eax+colorB2
]
670 movq mm3
, [eax+colorB3
]
671 movq mm4
, [eax+ebx+color5
]
672 movq mm5
, [eax+ebx+color6
]
673 movq mm6
, [eax+ebx+ebx+color3
]
685 movq mm4
, [eax+ebx+color5
]
686 movq mm5
, [eax+ebx+ebx+color2
]
687 movq mm6
, [eax+ebx+color6
]
700 movq mm7
, [I5666Pixel
]
701 movq mm6
, [I5556Pixel
]
707 pand mm4
, [eax+ebx+color5
]
709 pand mm3
, [eax+ebx+color6
]
744 add edx, [ebp+dstPitch
]
752 add edx, [ebp+dstPitch
]
758 mov ecx, [ebp+deltaPtr
]
760 mov [ebp+deltaPtr
], ecx
777 ;-------------------------------------------------------------------------
778 ;-------------------------------------------------------------------------
779 ;-------------------------------------------------------------------------
780 ;-------------------------------------------------------------------------
781 ;-------------------------------------------------------------------------
782 ;-------------------------------------------------------------------------
783 ;-------------------------------------------------------------------------
792 __2xSaISuperEagleLine:
794 NEWSYM _2xSaISuperEagleLine
801 ; Prepare the destination
804 mov eax, [ebp+dstSegment
]
807 mov edx, [ebp+dstOffset
] ; edx points to the screen
809 ; eax points to colorA
810 mov eax, [ebp+srcPtr
]
811 mov ebx, [ebp+srcPitch
]
813 ; eax now points to colorB1
819 ;-----Check Delta------------------
820 mov ecx, [ebp+deltaPtr
]
822 movq mm0
, [eax+colorB0
]
823 movq mm1
, [eax+colorB3
]
824 movq mm2
, [eax+ebx+color4
]
825 movq mm3
, [eax+ebx+colorS2
]
826 movq mm4
, [eax+ebx+ebx+color1
]
827 movq mm5
, [eax+ebx+ebx+colorS1
]
830 movq mm6
, [eax+ebx+ebx+colorA0
]
831 movq mm7
, [eax+ebx+ebx+colorA3
]
834 pcmpeqw mm0
, [ecx+2+colorB0
]
835 pcmpeqw mm1
, [ecx+2+colorB3
]
836 pcmpeqw mm2
, [ecx+ebx+2+color4
]
837 pcmpeqw mm3
, [ecx+ebx+2+colorS2
]
838 pcmpeqw mm4
, [ecx+ebx+ebx+2+color1
]
839 pcmpeqw mm5
, [ecx+ebx+ebx+2+colorS1
]
841 pcmpeqw mm6
, [ecx+ebx+ebx+2+colorA0
]
842 pcmpeqw mm7
, [ecx+ebx+ebx+2+colorA3
]
854 movq mm6
, [eax+colorB0
]
857 movq
[ecx+2+colorB0
], mm6
862 jz near .SKIP_PROCESS
866 ;---------------------------------
867 movq mm0
, [eax+ebx+color5
]
868 movq mm1
, [eax+ebx+color6
]
874 pand mm0
, [colorMask
]
875 pand mm1
, [colorMask
]
880 pand mm3
, [lowPixelMask
]
884 paddw mm0
, mm3
;mm0 contains the interpolated values
890 movq mm1
, mm4
;5,5,5,6
894 pand mm0
, [colorMask
]
895 pand mm1
, [colorMask
]
900 pand mm3
, [lowPixelMask
]
904 paddw mm0
, mm3
;mm0 contains the interpolated values
905 movq
[product1a
], mm0
906 ;--------------------
909 movq mm1
, mm5
;6,6,6,5
913 pand mm0
, [colorMask
]
914 pand mm1
, [colorMask
]
919 pand mm3
, [lowPixelMask
]
924 movq
[product1b
], mm0
926 ;-------------------------
927 ;-------------------------
928 movq mm0
, [eax+ebx+ebx+color2
]
929 movq mm1
, [eax+ebx+ebx+color3
]
935 pand mm0
, [colorMask
]
936 pand mm1
, [colorMask
]
941 pand mm3
, [lowPixelMask
]
949 ;---------------------
951 movq mm1
, mm4
;2,2,2,3
955 pand mm0
, [colorMask
]
956 pand mm1
, [colorMask
]
961 pand mm3
, [lowPixelMask
]
966 movq
[product2a
], mm0
968 ;----------------------
970 movq mm1
, mm5
;3,3,3,2
974 pand mm0
, [colorMask
]
975 pand mm1
, [colorMask
]
980 pand mm3
, [lowPixelMask
]
985 movq
[product2b
], mm0
988 ;////////////////////////////////
989 ; Decide which "branch" to take
990 ;--------------------------------
991 movq mm4
, [eax+ebx+color5
]
992 movq mm5
, [eax+ebx+color6
]
993 movq mm6
, [eax+ebx+ebx+color3
]
994 movq mm7
, [eax+ebx+ebx+color2
]
1006 movq mm0
, [eax+ebx+ebx+colorS1
]
1007 movq mm1
, [eax+ebx+color4
]
1010 movq mm2
, [eax+ebx+ebx+colorA2
]
1012 movq mm3
, [eax+colorB1
]
1034 movq mm0
, [eax+ebx+ebx+color1
]
1035 movq mm1
, [eax+ebx+colorS2
]
1038 movq mm2
, [eax+ebx+ebx+colorA1
]
1040 movq mm3
, [eax+colorB2
]
1051 ;--------------------
1070 ;---------------------------------------------
1071 ; Map of the pixels: I|E F|J
1076 movq mm4
, [eax+ebx+color5
]
1077 movq mm5
, [eax+ebx+color6
]
1081 movq mm0
, [eax+colorB1
]
1082 movq mm1
, [eax+ebx+color4
]
1106 movq mm0
, [eax+colorB2
]
1107 movq mm1
, [eax+ebx+colorS2
]
1133 movq mm0
, [eax+ebx+color1
]
1134 movq mm1
, [eax+ebx+ebx+colorA1
]
1158 movq mm0
, [eax+ebx+colorS1
]
1159 movq mm1
, [eax+ebx+ebx+colorA2
]
1195 ;Start the ASSEMBLY !!!
1202 movq mm0
, [eax+ebx+color5
]
1203 movq mm1
, [eax+ebx+color6
]
1204 movq mm2
, [eax+ebx+ebx+color2
]
1205 movq mm3
, [eax+ebx+ebx+color3
]
1225 movq mm0
, [eax+ebx+color5
]
1231 pand mm1
, [I56Pixel
]
1234 pand mm1
, [product1b
]
1237 pand mm1
, [product1a
]
1241 movq mm0
, [eax+ebx+color6
]
1247 pand mm1
, [I56Pixel
]
1250 pand mm1
, [product1a
]
1253 pand mm1
, [product1b
]
1257 movq mm0
, [eax+ebx+ebx+color2
]
1263 pand mm1
, [I23Pixel
]
1266 pand mm1
, [product2b
]
1269 pand mm1
, [product2a
]
1273 movq mm0
, [eax+ebx+ebx+color3
]
1279 pand mm1
, [I23Pixel
]
1282 pand mm1
, [product2a
]
1285 pand mm1
, [product2b
]
1306 movq
[fs:edx+8], mm1
1308 add edx, [ebp+dstPitch
]
1310 movq
[fs:edx+8], mm5
1314 movq
[es:edx+8], mm1
1316 add edx, [ebp+dstPitch
]
1318 movq
[es:edx+8], mm5
1322 mov ecx, [ebp+deltaPtr
]
1324 mov [ebp+deltaPtr
], ecx
1333 ; Restore some stuff
1341 ;-------------------------------------------------------------------------
1342 ;-------------------------------------------------------------------------
1343 ;-------------------------------------------------------------------------
1344 ;-------------------------------------------------------------------------
1345 ;-------------------------------------------------------------------------
1346 ;-------------------------------------------------------------------------
1347 ;-------------------------------------------------------------------------
1381 ; Prepare the destination
1384 mov eax, [ebp+dstSegment
]
1387 mov edx, [ebp+dstOffset
] ; edx points to the screen
1388 ; Prepare the source
1389 ; eax points to colorA
1390 mov eax, [ebp+srcPtr
]
1391 mov ebx, [ebp+srcPitch
]
1392 mov ecx, [ebp+width]
1393 ; eax now points to colorE
1400 ;-----Check Delta------------------
1401 mov ecx, [ebp+deltaPtr
]
1403 movq mm0
, [eax+colorI
]
1404 movq mm1
, [eax+colorJ
]
1405 movq mm2
, [eax+ebx+colorG
]
1406 movq mm3
, [eax+ebx+colorK
]
1407 movq mm4
, [eax+ebx+ebx+colorH
]
1408 movq mm5
, [eax+ebx+ebx+colorL
]
1411 movq mm6
, [eax+ebx+ebx+colorM
]
1412 movq mm7
, [eax+ebx+ebx+colorP
]
1415 pcmpeqw mm0
, [ecx+2+colorI
]
1416 pcmpeqw mm1
, [ecx+2+colorK
]
1417 pcmpeqw mm2
, [ecx+ebx+2+colorG
]
1418 pcmpeqw mm3
, [ecx+ebx+2+colorK
]
1419 pcmpeqw mm4
, [ecx+ebx+ebx+2+colorH
]
1420 pcmpeqw mm5
, [ecx+ebx+ebx+2+colorL
]
1422 pcmpeqw mm6
, [ecx+ebx+ebx+2+colorM
]
1423 pcmpeqw mm7
, [ecx+ebx+ebx+2+colorP
]
1435 movq mm6
, [eax+colorI
]
1438 movq
[ecx+2+colorI
], mm6
1443 jz near .SKIP_PROCESS
1447 ;---------------------------------
1451 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorE) && (colorB == colorL)
1452 movq mm0
, [eax+ebx+colorA
] ;mm0 and mm1 contain colorA
1453 movq mm2
, [eax+ebx+colorB
] ;mm2 and mm3 contain colorB
1458 pcmpeqw mm0
, [eax+ebx+ebx+colorD
]
1459 pcmpeqw mm1
, [eax+colorE
]
1460 pcmpeqw mm2
, [eax+ebx+ebx+colorL
]
1461 pcmpeqw mm3
, [eax+ebx+ebx+colorC
]
1467 pand mm0
, mm3
;result in mm0
1469 ;if ((colorA == colorC) && (colorB != colorE) && (colorA == colorF) && (colorB == colorJ)
1470 movq mm4
, [eax+ebx+colorA
] ;mm4 and mm5 contain colorA
1471 movq mm6
, [eax+ebx+colorB
] ;mm6 and mm7 contain colorB
1475 pcmpeqw mm4
, [eax+ebx+ebx+colorC
]
1476 pcmpeqw mm5
, [eax+colorF
]
1477 pcmpeqw mm6
, [eax+colorJ
]
1478 pcmpeqw mm7
, [eax+colorE
]
1484 pand mm4
, mm7
;result in mm4
1486 por mm0
, mm4
;combine the masks
1489 ;--------------------------------------------
1492 ;if ((colorB == colorC) && (colorA != colorD) && (colorB == colorF) && (colorA == colorH)
1493 movq mm0
, [eax+ebx+colorB
] ;mm0 and mm1 contain colorB
1494 movq mm2
, [eax+ebx+colorA
] ;mm2 and mm3 contain colorA
1498 pcmpeqw mm0
, [eax+ebx+ebx+colorC
]
1499 pcmpeqw mm1
, [eax+colorF
]
1500 pcmpeqw mm2
, [eax+ebx+ebx+colorH
]
1501 pcmpeqw mm3
, [eax+ebx+ebx+colorD
]
1507 pand mm0
, mm3
;result in mm0
1509 ;if ((colorB == colorE) && (colorB == colorD) && (colorA != colorF) && (colorA == colorI)
1510 movq mm4
, [eax+ebx+colorB
] ;mm4 and mm5 contain colorB
1511 movq mm6
, [eax+ebx+colorA
] ;mm6 and mm7 contain colorA
1515 pcmpeqw mm4
, [eax+ebx+ebx+colorD
]
1516 pcmpeqw mm5
, [eax+colorE
]
1517 pcmpeqw mm6
, [eax+colorI
]
1518 pcmpeqw mm7
, [eax+colorF
]
1524 pand mm4
, mm7
;result in mm4
1526 por mm0
, mm4
;combine the masks
1530 ;interpolate colorA and colorB
1531 movq mm0
, [eax+ebx+colorA
]
1532 movq mm1
, [eax+ebx+colorB
]
1537 pand mm0
, [colorMask
]
1538 pand mm1
, [colorMask
]
1543 pand mm3
, [lowPixelMask
]
1547 paddw mm0
, mm3
;mm0 contains the interpolated values
1549 ;assemble the pixels
1550 movq mm1
, [eax+ebx+colorA
]
1551 movq mm2
, [eax+ebx+colorB
]
1574 movq
[fs:edx+8], mm6
1577 movq
[es:edx+8], mm6
1580 ;------------------------------------------------
1581 ; Create the Nextline
1582 ;------------------------------------------------
1583 ;3 ;if ((colorA == colorD) && (colorB != colorC) && (colorA == colorG) && (colorC == colorO)
1584 movq mm0
, [eax+ebx+colorA
] ;mm0 and mm1 contain colorA
1585 movq mm2
, [eax+ebx+ebx+colorC
] ;mm2 and mm3 contain colorC
1591 pcmpeqw mm0
, [eax+ebx+colorD
]
1592 pcmpeqw mm1
, [eax+colorG
]
1593 pcmpeqw mm2
, [eax+ebx+ebx+colorO
]
1594 pcmpeqw mm3
, [eax+colorB
]
1601 pand mm0
, mm3
;result in mm0
1603 ;if ((colorA == colorB) && (colorG != colorC) && (colorA == colorH) && (colorC == colorM)
1604 movq mm4
, [eax+ebx+colorA
] ;mm4 and mm5 contain colorA
1605 movq mm6
, [eax+ebx+ebx+colorC
] ;mm6 and mm7 contain colorC
1611 pcmpeqw mm4
, [eax+ebx+colorH
]
1612 pcmpeqw mm5
, [eax+colorB
]
1613 pcmpeqw mm6
, [eax+ebx+ebx+colorM
]
1614 pcmpeqw mm7
, [eax+colorG
]
1621 pand mm4
, mm7
;result in mm4
1623 por mm0
, mm4
;combine the masks
1625 ;--------------------------------------------
1628 ;if ((colorB == colorC) && (colorA != colorD) && (colorC == colorH) && (colorA == colorF)
1629 movq mm0
, [eax+ebx+ebx+colorC
] ;mm0 and mm1 contain colorC
1630 movq mm2
, [eax+ebx+colorA
] ;mm2 and mm3 contain colorA
1634 pcmpeqw mm0
, [eax+ebx+colorB
]
1635 pcmpeqw mm1
, [eax+ebx+ebx+colorH
]
1636 pcmpeqw mm2
, [eax+colorF
]
1637 pcmpeqw mm3
, [eax+ebx+ebx+colorD
]
1643 pand mm0
, mm3
;result in mm0
1645 ;if ((colorC == colorG) && (colorC == colorD) && (colorA != colorH) && (colorA == colorI)
1646 movq mm4
, [eax+ebx+ebx+colorC
] ;mm4 and mm5 contain colorC
1647 movq mm6
, [eax+ebx+colorA
] ;mm6 and mm7 contain colorA
1651 pcmpeqw mm4
, [eax+ebx+ebx+colorD
]
1652 pcmpeqw mm5
, [eax+ebx+colorG
]
1653 pcmpeqw mm6
, [eax+colorI
]
1654 pcmpeqw mm7
, [eax+ebx+ebx+colorH
]
1660 pand mm4
, mm7
;result in mm4
1662 por mm0
, mm4
;combine the masks
1664 ;----------------------------------------------
1666 ;interpolate colorA and colorC
1667 movq mm0
, [eax+ebx+colorA
]
1668 movq mm1
, [eax+ebx+ebx+colorC
]
1673 pand mm0
, [colorMask
]
1674 pand mm1
, [colorMask
]
1679 pand mm3
, [lowPixelMask
]
1683 paddw mm0
, mm3
;mm0 contains the interpolated values
1686 ;assemble the pixels
1687 movq mm1
, [eax+ebx+colorA
]
1688 movq mm2
, [eax+ebx+ebx+colorC
]
1705 ;////////////////////////////////
1706 ; Decide which "branch" to take
1707 ;--------------------------------
1708 movq mm0
, [eax+ebx+colorA
]
1709 movq mm1
, [eax+ebx+colorB
]
1712 pcmpeqw mm0
, [eax+ebx+ebx+colorD
]
1713 pcmpeqw mm1
, [eax+ebx+ebx+colorC
]
1719 pand mm0
, mm1
;colorA == colorD && colorB == colorC
1724 pand mm2
, mm1
;colorA != colorD && colorB == colorC
1728 pand mm1
, mm3
;colorA == colorD && colorB != colorC
1739 ;---------------------------------------------
1740 ; Map of the pixels: I|E F|J
1745 movq mm4
, [eax+ebx+colorA
]
1746 movq mm5
, [eax+ebx+colorB
]
1750 movq mm0
, [eax+colorE
]
1751 movq mm1
, [eax+ebx+colorG
]
1775 movq mm0
, [eax+colorF
]
1776 movq mm1
, [eax+ebx+colorK
]
1802 movq mm0
, [eax+ebx+colorH
]
1803 movq mm1
, [eax+ebx+ebx+colorN
]
1827 movq mm0
, [eax+ebx+colorL
]
1828 movq mm1
, [eax+ebx+ebx+colorO
]
1864 ;----------------------------
1865 ;interpolate A, B, C and D
1866 movq mm0
, [eax+ebx+colorA
]
1867 movq mm1
, [eax+ebx+colorB
]
1869 movq mm2
, [eax+ebx+ebx+colorC
]
1871 movq mm3
, [qcolorMask
]
1873 movq mm7
, [qlowpixelMask
]
1878 pand mm3
, [eax+ebx+ebx+colorD
]
1887 pand mm7
, [eax+ebx+ebx+colorD
]
1898 pand mm4
, [qlowpixelMask
]
1899 paddw mm0
, mm4
;mm0 contains the interpolated value of A, B, C and D
1901 ;\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
1902 ;assemble the pixels
1905 movq mm4
, [eax+ebx+colorA
]
1906 movq mm5
, [eax+ebx+colorB
]
1915 por mm4
, mm0
;mm4 contains the diagonal pixels
1923 add edx, [ebp+dstPitch
]
1927 movq
[fs:edx+8], mm1
1930 movq
[es:edx+8], mm1
1935 mov ecx, [ebp+deltaPtr
]
1937 mov [ebp+deltaPtr
], ecx
1946 ; Restore some stuff
1953 ;-------------------------------------------------------------------------
1954 ;-------------------------------------------------------------------------
1955 ;-------------------------------------------------------------------------
1956 ;-------------------------------------------------------------------------
1957 ;-------------------------------------------------------------------------
1958 ;-------------------------------------------------------------------------
1959 ;-------------------------------------------------------------------------
1964 NEWSYM Init_2xSaIMMX
1972 ;Damn thing doesn't work
1975 ; test edx, 0x00800000 ;test bit 23
1976 ; jz end2 ;bit not set => no MMX detected
1978 mov eax, [ebp+8] ;PixelFormat
1992 mov eax, lowPixelMask
2000 mov eax, qlowpixelMask
2011 mov eax, lowPixelMask
2019 mov eax, qlowpixelMask
2031 ;-------------------------------------------------------------------------
2032 ;-------------------------------------------------------------------------
2033 ;-------------------------------------------------------------------------
2034 ;-------------------------------------------------------------------------
2035 ;-------------------------------------------------------------------------
2036 ;-------------------------------------------------------------------------
2037 ;-------------------------------------------------------------------------
2039 SECTION .data
ALIGN = 32
2041 colorMask
dd 0xF7DEF7DE, 0xF7DEF7DE
2042 lowPixelMask
dd 0x08210821, 0x08210821
2044 qcolorMask
dd 0xE79CE79C, 0xE79CE79C
2045 qlowpixelMask
dd 0x18631863, 0x18631863
2047 FALSE
dd 0x00000000, 0x00000000
2048 TRUE
dd 0xffffffff, 0xffffffff
2049 ONE
dd 0x00010001, 0x00010001
2052 SECTION .bss
ALIGN = 32