Replace Tmem_nasm.asm with C++ code. Patch by pyro.
[Glide64.git] / TexLoad8b.h
blob9b7230caa39f06d084b1170eca76722e47444a20
1 /*
2 * Glide64 - Glide video plugin for Nintendo 64 emulators.
3 * Copyright (c) 2002 Dave2001
4 * Copyright (c) 2008 Günther <guenther.emu@freenet.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 //****************************************************************
23 // Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
24 // Project started on December 29th, 2001
26 // To modify Glide64:
27 // * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
28 // * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
30 // Official Glide64 development channel: #Glide64 on EFnet
32 // Original author: Dave2001 (Dave2999@hotmail.com)
33 // Other authors: Gonetz, Gugaman
35 //****************************************************************
37 DWORD Load8bCI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
39 if (wid_64 < 1) wid_64 = 1;
40 if (height < 1) height = 1;
41 int ext = (real_width - (wid_64 << 3)) << 1;
42 unsigned short * pal = rdp.pal_8;
44 if (rdp.tlut_mode == 2)
46 #ifndef GCC
47 __asm {
48 mov ebx,dword ptr [pal]
50 mov esi,dword ptr [src]
51 mov edi,dword ptr [dst]
53 mov ecx,dword ptr [height]
54 y_loop:
55 push ecx
57 mov ecx,dword ptr [wid_64]
58 x_loop:
59 push ecx
61 mov eax,dword ptr [esi] // read all 4 pixels
62 bswap eax
63 add esi,4
64 mov edx,eax
66 // 1st dword output {
67 shr eax,15
68 and eax,0x1FE
69 mov cx,word ptr [ebx+eax]
70 ror cx,1
71 shl ecx,16
73 mov eax,edx
74 shr eax,23
75 and eax,0x1FE
76 mov cx,word ptr [ebx+eax]
77 ror cx,1
79 mov dword ptr [edi],ecx
80 add edi,4
81 // }
83 // 2nd dword output {
84 mov eax,edx
85 shl eax,1
86 and eax,0x1FE
87 mov cx,word ptr [ebx+eax]
88 ror cx,1
89 shl ecx,16
91 shr edx,7
92 and edx,0x1FE
93 mov cx,word ptr [ebx+edx]
94 ror cx,1
96 mov dword ptr [edi],ecx
97 add edi,4
98 // }
100 // * copy
101 mov eax,dword ptr [esi] // read all 4 pixels
102 bswap eax
103 add esi,4
104 mov edx,eax
106 // 1st dword output {
107 shr eax,15
108 and eax,0x1FE
109 mov cx,word ptr [ebx+eax]
110 ror cx,1
111 shl ecx,16
113 mov eax,edx
114 shr eax,23
115 and eax,0x1FE
116 mov cx,word ptr [ebx+eax]
117 ror cx,1
119 mov dword ptr [edi],ecx
120 add edi,4
121 // }
123 // 2nd dword output {
124 mov eax,edx
125 shl eax,1
126 and eax,0x1FE
127 mov cx,word ptr [ebx+eax]
128 ror cx,1
129 shl ecx,16
131 shr edx,7
132 and edx,0x1FE
133 mov cx,word ptr [ebx+edx]
134 ror cx,1
136 mov dword ptr [edi],ecx
137 add edi,4
138 // }
139 // *
141 pop ecx
143 dec ecx
144 jnz x_loop
146 pop ecx
147 dec ecx
148 jz end_y_loop
149 push ecx
151 add esi,dword ptr [line]
152 add edi,dword ptr [ext]
154 mov ecx,dword ptr [wid_64]
155 x_loop_2:
156 push ecx
158 mov eax,dword ptr [esi+4] // read all 4 pixels
159 bswap eax
160 mov edx,eax
162 // 1st dword output {
163 shr eax,15
164 and eax,0x1FE
165 mov cx,word ptr [ebx+eax]
166 ror cx,1
167 shl ecx,16
169 mov eax,edx
170 shr eax,23
171 and eax,0x1FE
172 mov cx,word ptr [ebx+eax]
173 ror cx,1
175 mov dword ptr [edi],ecx
176 add edi,4
177 // }
179 // 2nd dword output {
180 mov eax,edx
181 shl eax,1
182 and eax,0x1FE
183 mov cx,word ptr [ebx+eax]
184 ror cx,1
185 shl ecx,16
187 shr edx,7
188 and edx,0x1FE
189 mov cx,word ptr [ebx+edx]
190 ror cx,1
192 mov dword ptr [edi],ecx
193 add edi,4
194 // }
196 // * copy
197 mov eax,dword ptr [esi] // read all 4 pixels
198 bswap eax
199 add esi,8
200 mov edx,eax
202 // 1st dword output {
203 shr eax,15
204 and eax,0x1FE
205 mov cx,word ptr [ebx+eax]
206 ror cx,1
207 shl ecx,16
209 mov eax,edx
210 shr eax,23
211 and eax,0x1FE
212 mov cx,word ptr [ebx+eax]
213 ror cx,1
215 mov dword ptr [edi],ecx
216 add edi,4
217 // }
219 // 2nd dword output {
220 mov eax,edx
221 shl eax,1
222 and eax,0x1FE
223 mov cx,word ptr [ebx+eax]
224 ror cx,1
225 shl ecx,16
227 shr edx,7
228 and edx,0x1FE
229 mov cx,word ptr [ebx+edx]
230 ror cx,1
232 mov dword ptr [edi],ecx
233 add edi,4
234 // }
235 // *
237 pop ecx
239 dec ecx
240 jnz x_loop_2
242 add esi,dword ptr [line]
243 add edi,dword ptr [ext]
245 pop ecx
246 dec ecx
247 jnz y_loop
249 end_y_loop:
251 #else // _WIN32
252 //printf("Load8bCI1\n");
253 long lTempX, lTempY, lHeight = (long) height;
254 intptr_t fake_eax, fake_edx;
255 asm volatile (
256 "y_loop4: \n"
257 "mov %[c], %[tempy] \n"
259 "mov %[wid_64], %%ecx \n"
260 "x_loop4: \n"
261 "mov %[c], %[tempx] \n"
263 "mov (%[src]), %%eax \n" // read all 4 pixels
264 "bswap %%eax \n"
265 "add $4, %[src] \n"
266 "mov %%eax, %%edx \n"
268 // 1st dword output {
269 "shr $15, %%eax \n"
270 "and $0x1FE, %%eax \n"
271 "mov (%[pal],%[a]), %%cx \n"
272 "ror $1, %%cx \n"
273 "shl $16, %%ecx \n"
275 "mov %%edx, %%eax \n"
276 "shr $23, %%eax \n"
277 "and $0x1FE, %%eax \n"
278 "mov (%[pal],%[a]), %%cx \n"
279 "ror $1, %%cx \n"
281 "mov %%ecx, (%[dst]) \n"
282 "add $4, %[dst] \n"
283 // }
285 // 2nd dword output {
286 "mov %%edx, %%eax \n"
287 "shl $1, %%eax \n"
288 "and $0x1FE, %%eax \n"
289 "mov (%[pal],%[a]), %%cx \n"
290 "ror $1, %%cx \n"
291 "shl $16, %%ecx \n"
293 "shr $7, %%edx \n"
294 "and $0x1FE, %%edx \n"
295 "mov (%[pal],%[d]), %%cx \n"
296 "ror $1, %%cx \n"
298 "mov %%ecx, (%[dst]) \n"
299 "add $4, %[dst] \n"
300 // }
302 // * copy
303 "mov (%[src]), %%eax \n" // read all 4 pixels
304 "bswap %%eax \n"
305 "add $4, %[src] \n"
306 "mov %%eax, %%edx \n"
308 // 1st dword output {
309 "shr $15, %%eax \n"
310 "and $0x1FE, %%eax \n"
311 "mov (%[pal],%[a]), %%cx \n"
312 "ror $1, %%cx \n"
313 "shl $16, %%ecx \n"
315 "mov %%edx, %%eax \n"
316 "shr $23, %%eax \n"
317 "and $0x1FE, %%eax \n"
318 "mov (%[pal],%[a]), %%cx \n"
319 "ror $1, %%cx \n"
321 "mov %%ecx, (%[dst]) \n"
322 "add $4, %[dst] \n"
323 // }
325 // 2nd dword output {
326 "mov %%edx, %%eax \n"
327 "shl $1, %%eax \n"
328 "and $0x1FE, %%eax \n"
329 "mov (%[pal],%[a]), %%cx \n"
330 "ror $1, %%cx \n"
331 "shl $16, %%ecx \n"
333 "shr $7, %%edx \n"
334 "and $0x1FE, %%edx \n"
335 "mov (%[pal],%[d]), %%cx \n"
336 "ror $1, %%cx \n"
338 "mov %%ecx, (%[dst]) \n"
339 "add $4, %[dst] \n"
340 // }
341 // *
343 "mov %[tempx], %[c] \n"
345 "dec %%ecx \n"
346 "jnz x_loop4 \n"
348 "mov %[tempy], %[c] \n"
349 "dec %%ecx \n"
350 "jz end_y_loop4 \n"
351 "mov %[c], %[tempy] \n"
353 "add %[line], %[src] \n"
354 "add %[ext], %[dst] \n"
356 "mov %[wid_64], %%ecx \n"
357 "x_loop_24: \n"
358 "mov %[c], %[tempx] \n"
360 "mov 4(%[src]), %%eax \n" // read all 4 pixels
361 "bswap %%eax \n"
362 "mov %%eax, %%edx \n"
364 // 1st dword output {
365 "shr $15, %%eax \n"
366 "and $0x1FE, %%eax \n"
367 "mov (%[pal],%[a]), %%cx \n"
368 "ror $1, %%cx \n"
369 "shl $16, %%ecx \n"
371 "mov %%edx, %%eax \n"
372 "shr $23, %%eax \n"
373 "and $0x1FE, %%eax \n"
374 "mov (%[pal],%[a]), %%cx \n"
375 "ror $1, %%cx \n"
377 "mov %%ecx, (%[dst]) \n"
378 "add $4, %[dst] \n"
379 // }
381 // 2nd dword output {
382 "mov %%edx, %%eax \n"
383 "shl $1, %%eax \n"
384 "and $0x1FE, %%eax \n"
385 "mov (%[pal],%[a]), %%cx \n"
386 "ror $1, %%cx \n"
387 "shl $16, %%ecx \n"
389 "shr $7, %%edx \n"
390 "and $0x1FE, %%edx \n"
391 "mov (%[pal],%[d]), %%cx \n"
392 "ror $1, %%cx \n"
394 "mov %%ecx, (%[dst]) \n"
395 "add $4, %[dst] \n"
396 // }
398 // * copy
399 "mov (%[src]), %%eax \n" // read all 4 pixels
400 "bswap %%eax \n"
401 "add $8, %[src] \n"
402 "mov %%eax, %%edx \n"
404 // 1st dword output {
405 "shr $15, %%eax \n"
406 "and $0x1FE, %%eax \n"
407 "mov (%[pal],%[a]), %%cx \n"
408 "ror $1, %%cx \n"
409 "shl $16, %%ecx \n"
411 "mov %%edx, %%eax \n"
412 "shr $23, %%eax \n"
413 "and $0x1FE, %%eax \n"
414 "mov (%[pal],%[a]), %%cx \n"
415 "ror $1, %%cx \n"
417 "mov %%ecx, (%[dst]) \n"
418 "add $4, %[dst] \n"
419 // }
421 // 2nd dword output {
422 "mov %%edx, %%eax \n"
423 "shl $1, %%eax \n"
424 "and $0x1FE, %%eax \n"
425 "mov (%[pal],%[a]), %%cx \n"
426 "ror $1, %%cx \n"
427 "shl $16, %%ecx \n"
429 "shr $7, %%edx \n"
430 "and $0x1FE, %%edx \n"
431 "mov (%[pal],%[d]), %%cx \n"
432 "ror $1, %%cx \n"
434 "mov %%ecx, (%[dst]) \n"
435 "add $4, %[dst] \n"
436 // }
437 // *
439 "mov %[tempx], %[c] \n"
440 "dec %%ecx \n"
441 "jnz x_loop_24 \n"
443 "add %[line], %[src] \n"
444 "add %[ext], %[dst] \n"
446 "mov %[tempy], %[c] \n"
447 "dec %%ecx \n"
448 "jnz y_loop4 \n"
450 "end_y_loop4: \n"
451 : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
452 : [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
453 : "memory", "cc"
455 #endif // _WIN32
456 return (1 << 16) | GR_TEXFMT_ARGB_1555;
458 else
460 #ifndef GCC
461 __asm {
462 mov ebx,dword ptr [pal]
464 mov esi,dword ptr [src]
465 mov edi,dword ptr [dst]
467 mov ecx,dword ptr [height]
468 ia_y_loop:
469 push ecx
471 mov ecx,dword ptr [wid_64]
472 ia_x_loop:
473 push ecx
475 mov eax,dword ptr [esi] // read all 4 pixels
476 bswap eax
477 add esi,4
478 mov edx,eax
480 // 1st dword output {
481 shr eax,15
482 and eax,0x1FE
483 mov cx,word ptr [ebx+eax]
484 ror cx,8
485 shl ecx,16
487 mov eax,edx
488 shr eax,23
489 and eax,0x1FE
490 mov cx,word ptr [ebx+eax]
491 ror cx,8
493 mov dword ptr [edi],ecx
494 add edi,4
495 // }
497 // 2nd dword output {
498 mov eax,edx
499 shl eax,1
500 and eax,0x1FE
501 mov cx,word ptr [ebx+eax]
502 ror cx,8
503 shl ecx,16
505 shr edx,7
506 and edx,0x1FE
507 mov cx,word ptr [ebx+edx]
508 ror cx,8
510 mov dword ptr [edi],ecx
511 add edi,4
512 // }
514 // * copy
515 mov eax,dword ptr [esi] // read all 4 pixels
516 bswap eax
517 add esi,4
518 mov edx,eax
520 // 1st dword output {
521 shr eax,15
522 and eax,0x1FE
523 mov cx,word ptr [ebx+eax]
524 ror cx,8
525 shl ecx,16
527 mov eax,edx
528 shr eax,23
529 and eax,0x1FE
530 mov cx,word ptr [ebx+eax]
531 ror cx,8
533 mov dword ptr [edi],ecx
534 add edi,4
535 // }
537 // 2nd dword output {
538 mov eax,edx
539 shl eax,1
540 and eax,0x1FE
541 mov cx,word ptr [ebx+eax]
542 ror cx,8
543 shl ecx,16
545 shr edx,7
546 and edx,0x1FE
547 mov cx,word ptr [ebx+edx]
548 ror cx,8
550 mov dword ptr [edi],ecx
551 add edi,4
552 // }
553 // *
555 pop ecx
557 dec ecx
558 jnz ia_x_loop
560 pop ecx
561 dec ecx
562 jz ia_end_y_loop
563 push ecx
565 add esi,dword ptr [line]
566 add edi,dword ptr [ext]
568 mov ecx,dword ptr [wid_64]
569 ia_x_loop_2:
570 push ecx
572 mov eax,dword ptr [esi+4] // read all 4 pixels
573 bswap eax
574 mov edx,eax
576 // 1st dword output {
577 shr eax,15
578 and eax,0x1FE
579 mov cx,word ptr [ebx+eax]
580 ror cx,8
581 shl ecx,16
583 mov eax,edx
584 shr eax,23
585 and eax,0x1FE
586 mov cx,word ptr [ebx+eax]
587 ror cx,8
589 mov dword ptr [edi],ecx
590 add edi,4
591 // }
593 // 2nd dword output {
594 mov eax,edx
595 shl eax,1
596 and eax,0x1FE
597 mov cx,word ptr [ebx+eax]
598 ror cx,8
599 shl ecx,16
601 shr edx,7
602 and edx,0x1FE
603 mov cx,word ptr [ebx+edx]
604 ror cx,8
606 mov dword ptr [edi],ecx
607 add edi,4
608 // }
610 // * copy
611 mov eax,dword ptr [esi] // read all 4 pixels
612 bswap eax
613 add esi,8
614 mov edx,eax
616 // 1st dword output {
617 shr eax,15
618 and eax,0x1FE
619 mov cx,word ptr [ebx+eax]
620 ror cx,8
621 shl ecx,16
623 mov eax,edx
624 shr eax,23
625 and eax,0x1FE
626 mov cx,word ptr [ebx+eax]
627 ror cx,8
629 mov dword ptr [edi],ecx
630 add edi,4
631 // }
633 // 2nd dword output {
634 mov eax,edx
635 shl eax,1
636 and eax,0x1FE
637 mov cx,word ptr [ebx+eax]
638 ror cx,8
639 shl ecx,16
641 shr edx,7
642 and edx,0x1FE
643 mov cx,word ptr [ebx+edx]
644 ror cx,8
646 mov dword ptr [edi],ecx
647 add edi,4
648 // }
649 // *
651 pop ecx
653 dec ecx
654 jnz ia_x_loop_2
656 add esi,dword ptr [line]
657 add edi,dword ptr [ext]
659 pop ecx
660 dec ecx
661 jnz ia_y_loop
663 ia_end_y_loop:
665 #else // _WIN32
666 //printf("Load8bCI1\n");
667 long lTempX, lTempY, lHeight = (long) height;
668 intptr_t fake_eax, fake_edx;
669 asm volatile (
670 "ia_y_loop2: \n"
671 "mov %[c], %[tempy] \n"
673 "mov %[wid_64], %%ecx \n"
674 "ia_x_loop2: \n"
675 "mov %[c], %[tempx] \n"
677 "mov (%[src]), %%eax \n" // read all 4 pixels
678 "bswap %%eax \n"
679 "add $4, %[src] \n"
680 "mov %%eax, %%edx \n"
682 // 1st dword output {
683 "shr $15, %%eax \n"
684 "and $0x1FE, %%eax \n"
685 "mov (%[pal],%[a]), %%cx \n"
686 "ror $8, %%cx \n"
687 "shl $16, %%ecx \n"
689 "mov %%edx, %%eax \n"
690 "shr $23, %%eax \n"
691 "and $0x1FE, %%eax \n"
692 "mov (%[pal],%[a]), %%cx \n"
693 "ror $8, %%cx \n"
695 "mov %%ecx, (%[dst]) \n"
696 "add $4, %[dst] \n"
697 // }
699 // 2nd dword output {
700 "mov %%edx, %%eax \n"
701 "shl $1, %%eax \n"
702 "and $0x1FE, %%eax \n"
703 "mov (%[pal],%[a]), %%cx \n"
704 "ror $8, %%cx \n"
705 "shl $16, %%ecx \n"
707 "shr $7, %%edx \n"
708 "and $0x1FE, %%edx \n"
709 "mov (%[pal],%[d]), %%cx \n"
710 "ror $8, %%cx \n"
712 "mov %%ecx, (%[dst]) \n"
713 "add $4, %[dst] \n"
714 // }
716 // * copy
717 "mov (%[src]), %%eax \n" // read all 4 pixels
718 "bswap %%eax \n"
719 "add $4, %[src] \n"
720 "mov %%eax, %%edx \n"
722 // 1st dword output {
723 "shr $15, %%eax \n"
724 "and $0x1FE, %%eax \n"
725 "mov (%[pal],%[a]), %%cx \n"
726 "ror $8, %%cx \n"
727 "shl $16, %%ecx \n"
729 "mov %%edx, %%eax \n"
730 "shr $23, %%eax \n"
731 "and $0x1FE, %%eax \n"
732 "mov (%[pal],%[a]), %%cx \n"
733 "ror $8, %%cx \n"
735 "mov %%ecx, (%[dst]) \n"
736 "add $4, %[dst] \n"
737 // }
739 // 2nd dword output {
740 "mov %%edx, %%eax \n"
741 "shl $1, %%eax \n"
742 "and $0x1FE, %%eax \n"
743 "mov (%[pal],%[a]), %%cx \n"
744 "ror $8, %%cx \n"
745 "shl $16, %%ecx \n"
747 "shr $7, %%edx \n"
748 "and $0x1FE, %%edx \n"
749 "mov (%[pal],%[d]), %%cx \n"
750 "ror $8, %%cx \n"
752 "mov %%ecx, (%[dst]) \n"
753 "add $4, %[dst] \n"
754 // }
755 // *
757 "mov %[tempx], %[c] \n"
758 "dec %%ecx \n"
759 "jnz ia_x_loop2 \n"
761 "mov %[tempy], %[c] \n"
762 "dec %%ecx \n"
763 "jz ia_end_y_loop2 \n"
764 "mov %[c], %[tempy] \n"
766 "add %[line], %[src] \n"
767 "add %[ext], %[dst] \n"
769 "mov %[wid_64], %%ecx \n"
770 "ia_x_loop_22: \n"
771 "mov %[c], %[tempx] \n"
773 "mov 4(%[src]), %%eax \n" // read all 4 pixels
774 "bswap %%eax \n"
775 "mov %%eax, %%edx \n"
777 // 1st dword output {
778 "shr $15, %%eax \n"
779 "and $0x1FE, %%eax \n"
780 "mov (%[pal],%[a]), %%cx \n"
781 "ror $8, %%cx \n"
782 "shl $16, %%ecx \n"
784 "mov %%edx, %%eax \n"
785 "shr $23, %%eax \n"
786 "and $0x1FE, %%eax \n"
787 "mov (%[pal],%[a]), %%cx \n"
788 "ror $8, %%cx \n"
790 "mov %%ecx, (%[dst]) \n"
791 "add $4, %[dst] \n"
792 // }
794 // 2nd dword output {
795 "mov %%edx, %%eax \n"
796 "shl $1, %%eax \n"
797 "and $0x1FE, %%eax \n"
798 "mov (%[pal],%[a]), %%cx \n"
799 "ror $8, %%cx \n"
800 "shl $16, %%ecx \n"
802 "shr $7, %%edx \n"
803 "and $0x1FE, %%edx \n"
804 "mov (%[pal],%[d]), %%cx \n"
805 "ror $8, %%cx \n"
807 "mov %%ecx, (%[dst]) \n"
808 "add $4, %[dst] \n"
809 // }
811 // * copy
812 "mov (%[src]), %%eax \n" // read all 4 pixels
813 "bswap %%eax \n"
814 "add $8, %[src] \n"
815 "mov %%eax, %%edx \n"
817 // 1st dword output {
818 "shr $15, %%eax \n"
819 "and $0x1FE, %%eax \n"
820 "mov (%[pal],%[a]), %%cx \n"
821 "ror $8, %%cx \n"
822 "shl $16, %%ecx \n"
824 "mov %%edx, %%eax \n"
825 "shr $23, %%eax \n"
826 "and $0x1FE, %%eax \n"
827 "mov (%[pal],%[a]), %%cx \n"
828 "ror $8, %%cx \n"
830 "mov %%ecx, (%[dst]) \n"
831 "add $4, %[dst] \n"
832 // }
834 // 2nd dword output {
835 "mov %%edx, %%eax \n"
836 "shl $1, %%eax \n"
837 "and $0x1FE, %%eax \n"
838 "mov (%[pal],%[a]), %%cx \n"
839 "ror $8, %%cx \n"
840 "shl $16, %%ecx \n"
842 "shr $7, %%edx \n"
843 "and $0x1FE, %%edx \n"
844 "mov (%[pal],%[d]), %%cx \n"
845 "ror $8, %%cx \n"
847 "mov %%ecx, (%[dst]) \n"
848 "add $4, %[dst] \n"
849 // }
850 // *
852 "mov %[tempx], %[c] \n"
853 "dec %%ecx \n"
854 "jnz ia_x_loop_22 \n"
856 "add %[line], %[src] \n"
857 "add %[ext], %[dst] \n"
859 "mov %[tempy], %[c] \n"
860 "dec %%ecx \n"
861 "jnz ia_y_loop2 \n"
863 "ia_end_y_loop2: \n"
864 : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
865 : [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
866 : "memory", "cc"
868 #endif // _WIN32
869 return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88;
872 return 0;
875 //****************************************************************
876 // Size: 1, Format: 3
878 // ** by Gugaman **
880 DWORD Load8bIA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
882 if (rdp.tlut_mode != 0)
883 return Load8bCI (dst, src, wid_64, height, line, real_width, tile);
885 if (wid_64 < 1) wid_64 = 1;
886 if (height < 1) height = 1;
887 int ext = (real_width - (wid_64 << 3));
888 #ifndef GCC
889 __asm {
890 mov esi,dword ptr [src]
891 mov edi,dword ptr [dst]
893 mov ecx,dword ptr [height]
894 y_loop:
895 push ecx
897 mov ecx,dword ptr [wid_64]
898 x_loop:
899 mov eax,dword ptr [esi] // read all 4 pixels
900 add esi,4
902 xor ebx,ebx
903 mov edx,eax
904 shr eax,4//all alpha
905 and eax,0x0F0F0F0F
906 or ebx,eax
907 mov eax,edx//intensity
908 shl eax,4
909 and eax,0xF0F0F0F0
910 or ebx,eax
912 mov dword ptr [edi],ebx // save dword
913 add edi,4
915 mov eax,dword ptr [esi] // read all 4 pixels
916 add esi,4
918 xor ebx,ebx
919 mov edx,eax
920 shr eax,4//all alpha
921 and eax,0x0F0F0F0F
922 or ebx,eax
923 mov eax,edx//intensity
924 shl eax,4
925 and eax,0xF0F0F0F0
926 or ebx,eax
928 mov dword ptr [edi],ebx // save dword
929 add edi,4
930 // *
932 dec ecx
933 jnz x_loop
935 pop ecx
936 dec ecx
937 jz end_y_loop
938 push ecx
940 add esi,dword ptr [line]
941 add edi,dword ptr [ext]
943 mov ecx,dword ptr [wid_64]
944 x_loop_2:
945 mov eax,dword ptr [esi+4] // read both pixels
947 xor ebx,ebx
948 mov edx,eax
949 shr eax,4//all alpha
950 and eax,0x0F0F0F0F
951 or ebx,eax
952 mov eax,edx//intensity
953 shl eax,4
954 and eax,0xF0F0F0F0
955 or ebx,eax
957 mov dword ptr [edi],ebx //save dword
958 add edi,4
960 mov eax,dword ptr [esi] // read both pixels
961 add esi,8
963 xor ebx,ebx
964 mov edx,eax
965 shr eax,4//all alpha
966 and eax,0x0F0F0F0F
967 or ebx,eax
968 mov eax,edx//intensity
969 shl eax,4
970 and eax,0xF0F0F0F0
971 or ebx,eax
973 mov dword ptr [edi],ebx //save dword
974 add edi,4
975 // *
977 dec ecx
978 jnz x_loop_2
980 add esi,dword ptr [line]
981 add edi,dword ptr [ext]
983 pop ecx
984 dec ecx
985 jnz y_loop
987 end_y_loop:
989 #else // _WIN32
990 //printf("Load8bIA\n");
991 long lTemp, lHeight = (long) height;
992 asm volatile (
993 "y_loop5: \n"
994 "mov %[c], %[temp] \n"
996 "mov %[wid_64], %%ecx \n"
997 "x_loop5: \n"
998 "mov (%[src]), %%eax \n" // read all 4 pixels
999 "add $4, %[src] \n"
1001 "xor %%ebx, %%ebx \n"
1002 "mov %%eax, %%edx \n"
1003 "shr $4, %%eax \n"//all alpha
1004 "and $0x0F0F0F0F, %%eax \n"
1005 "or %%eax, %%ebx \n"
1006 "mov %%edx, %%eax \n"//intensity
1007 "shl $4, %%eax \n"
1008 "and $0xF0F0F0F0, %%eax \n"
1009 "or %%eax, %%ebx \n"
1011 "mov %%ebx, (%[dst]) \n" // save dword
1012 "add $4, %[dst] \n"
1014 "mov (%[src]), %%eax \n" // read all 4 pixels
1015 "add $4, %[src] \n"
1017 "xor %%ebx, %%ebx \n"
1018 "mov %%eax, %%edx \n"
1019 "shr $4, %%eax \n"//all alpha
1020 "and $0x0F0F0F0F, %%eax \n"
1021 "or %%eax, %%ebx \n"
1022 "mov %%edx, %%eax \n"//intensity
1023 "shl $4, %%eax \n"
1024 "and $0xF0F0F0F0, %%eax \n"
1025 "or %%eax, %%ebx \n"
1027 "mov %%ebx, (%[dst]) \n" // save dword
1028 "add $4, %[dst] \n"
1029 // *
1031 "dec %%ecx \n"
1032 "jnz x_loop5 \n"
1034 "mov %[temp], %[c] \n"
1035 "dec %%ecx \n"
1036 "jz end_y_loop5 \n"
1037 "mov %[c], %[temp] \n"
1039 "add %[line], %[src] \n"
1040 "add %[ext], %[dst] \n"
1042 "mov %[wid_64], %%ecx \n"
1043 "x_loop_25: \n"
1044 "mov 4(%[src]), %%eax \n" // read both pixels
1046 "xor %%ebx, %%ebx \n"
1047 "mov %%eax, %%edx \n"
1048 "shr $4, %%eax \n"//all alpha
1049 "and $0x0F0F0F0F, %%eax \n"
1050 "or %%eax, %%ebx \n"
1051 "mov %%edx, %%eax \n"//intensity
1052 "shl $4, %%eax \n"
1053 "and $0xF0F0F0F0, %%eax \n"
1054 "or %%eax, %%ebx \n"
1056 "mov %%ebx, (%[dst]) \n" //save dword
1057 "add $4, %[dst] \n"
1059 "mov (%[src]), %%eax \n" // read both pixels
1060 "add $8, %[src] \n"
1062 "xor %%ebx, %%ebx \n"
1063 "mov %%eax, %%edx \n"
1064 "shr $4, %%eax \n"//all alpha
1065 "and $0x0F0F0F0F, %%eax \n"
1066 "or %%eax, %%ebx \n"
1067 "mov %%edx, %%eax \n"//intensity
1068 "shl $4, %%eax \n"
1069 "and $0xF0F0F0F0, %%eax \n"
1070 "or %%eax, %%ebx \n"
1072 "mov %%ebx, (%[dst]) \n" //save dword
1073 "add $4, %[dst] \n"
1074 // *
1076 "dec %%ecx \n"
1077 "jnz x_loop_25 \n"
1079 "add %[line], %[src] \n"
1080 "add %[ext], %[dst] \n"
1082 "mov %[temp], %[c] \n"
1083 "dec %%ecx \n"
1084 "jnz y_loop5 \n"
1086 "end_y_loop5: \n"
1087 : [temp]"=m"(lTemp), [src] "+S"(src), [dst] "+D"(dst), [c] "+c"(lHeight)
1088 : [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1089 : "memory", "cc", "eax", "edx", "ebx"
1091 #endif // _WIN32
1092 return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
1095 //****************************************************************
1096 // Size: 1, Format: 4
1098 // ** by Gugaman **
1100 DWORD Load8bI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
1102 if (rdp.tlut_mode != 0)
1103 return Load8bCI (dst, src, wid_64, height, line, real_width, tile);
1105 if (wid_64 < 1) wid_64 = 1;
1106 if (height < 1) height = 1;
1107 int ext = (real_width - (wid_64 << 3));
1108 #ifndef GCC
1109 __asm {
1110 mov esi,dword ptr [src]
1111 mov edi,dword ptr [dst]
1113 mov ecx,dword ptr [height]
1114 y_loop:
1115 push ecx
1117 mov ecx,dword ptr [wid_64]
1118 x_loop:
1119 mov eax,dword ptr [esi] // read all 4 pixels
1120 add esi,4
1122 mov dword ptr [edi],eax // save dword
1123 add edi,4
1125 mov eax,dword ptr [esi] // read all 4 pixels
1126 add esi,4
1128 mov dword ptr [edi],eax // save dword
1129 add edi,4
1130 // *
1132 dec ecx
1133 jnz x_loop
1135 pop ecx
1136 dec ecx
1137 jz end_y_loop
1138 push ecx
1140 add esi,dword ptr [line]
1141 add edi,dword ptr [ext]
1143 mov ecx,dword ptr [wid_64]
1144 x_loop_2:
1145 mov eax,dword ptr [esi+4] // read both pixels
1147 mov dword ptr [edi],eax //save dword
1148 add edi,4
1150 mov eax,dword ptr [esi] // read both pixels
1151 add esi,8
1153 mov dword ptr [edi],eax //save dword
1154 add edi,4
1155 // *
1157 dec ecx
1158 jnz x_loop_2
1160 add esi,dword ptr [line]
1161 add edi,dword ptr [ext]
1163 pop ecx
1164 dec ecx
1165 jnz y_loop
1167 end_y_loop:
1169 #else // _WIN32
1170 //printf("Load8bI\n");
1171 long lTemp, lHeight = (long) height;
1172 asm volatile (
1173 "y_loop6: \n"
1174 "mov %[c], %[temp] \n"
1176 "mov %[wid_64], %%ecx \n"
1177 "x_loop6: \n"
1178 "mov (%[src]), %%eax \n" // read all 4 pixels
1179 "add $4, %[src] \n"
1181 "mov %%eax, (%[dst]) \n" // save dword
1182 "add $4, %[dst] \n"
1184 "mov (%[src]), %%eax \n" // read all 4 pixels
1185 "add $4, %[src] \n"
1187 "mov %%eax, (%[dst]) \n" // save dword
1188 "add $4, %[dst] \n"
1189 // *
1191 "dec %%ecx \n"
1192 "jnz x_loop6 \n"
1194 "mov %[temp], %[c] \n"
1195 "dec %%ecx \n"
1196 "jz end_y_loop6 \n"
1197 "mov %[c], %[temp] \n"
1199 "add %[line], %[src] \n"
1200 "add %[ext], %[dst] \n"
1202 "mov %[wid_64], %%ecx \n"
1203 "x_loop_26: \n"
1204 "mov 4(%[src]), %%eax \n" // read both pixels
1206 "mov %%eax, (%[dst]) \n" //save dword
1207 "add $4, %[dst] \n"
1209 "mov (%[src]), %%eax \n" // read both pixels
1210 "add $8, %[src] \n"
1212 "mov %%eax, (%[dst]) \n" //save dword
1213 "add $4, %[dst] \n"
1214 // *
1216 "dec %%ecx \n"
1217 "jnz x_loop_26 \n"
1219 "add %[line], %[src] \n"
1220 "add %[ext], %[dst] \n"
1222 "mov %[temp], %[c] \n"
1223 "dec %%ecx \n"
1224 "jnz y_loop6 \n"
1226 "end_y_loop6: \n"
1227 : [temp]"=m"(lTemp), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
1228 : [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1229 : "memory", "cc", "eax", "edx", "ebx"
1231 #endif // _WIN32
1232 return /*(0 << 16) | */GR_TEXFMT_ALPHA_8;