TexLoad8b.h

   1 /*
   2 *       Glide64 - Glide video plugin for Nintendo 64 emulators.
   3 *       Copyright (c) 2002  Dave2001
   4 *       Copyright (c) 2008  Günther <guenther.emu@freenet.de>
   5 *
   6 *       This program is free software; you can redistribute it and/or modify
   7 *       it under the terms of the GNU General Public License as published by
   8 *       the Free Software Foundation; either version 2 of the License, or
   9 *       any later version.
  10 *
  11 *       This program is distributed in the hope that it will be useful,
  12 *       but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 *       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 *       GNU General Public License for more details.
  15 *
  16 *       You should have received a copy of the GNU General Public License
  17 *       along with this program; if not, write to the Free Software
  18 *       Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  19 */
  20
  21 //****************************************************************
  22 //
  23 // Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
  24 // Project started on December 29th, 2001
  25 //
  26 // To modify Glide64:
  27 // * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
  28 // * Do NOT send me the whole project or file that you modified.  Take out your modified code sections, and tell me where to put them.  If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
  29 //
  30 // Official Glide64 development channel: #Glide64 on EFnet
  31 //
  32 // Original author: Dave2001 (Dave2999@hotmail.com)
  33 // Other authors: Gonetz, Gugaman
  34 //
  35 //****************************************************************
  36
  37 DWORD Load8bCI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
  38 {
  39         if (wid_64 < 1) wid_64 = 1;
  40         if (height < 1) height = 1;
  41         int ext = (real_width - (wid_64 << 3)) << 1;
  42         unsigned short * pal = rdp.pal_8;
  43
  44         if (rdp.tlut_mode == 2)
  45         {
  46 #ifndef GCC
  47                 __asm {
  48                         mov ebx,dword ptr [pal]
  49
  50                                 mov esi,dword ptr [src]
  51                                 mov edi,dword ptr [dst]
  52
  53                                 mov ecx,dword ptr [height]
  54 y_loop:
  55                         push ecx
  56
  57                                 mov ecx,dword ptr [wid_64]
  58 x_loop:
  59                         push ecx
  60
  61                                 mov eax,dword ptr [esi]         // read all 4 pixels
  62                                 bswap eax
  63                                 add esi,4
  64                                 mov edx,eax
  65
  66                                 // 1st dword output {
  67                                 shr eax,15
  68                                 and eax,0x1FE
  69                                 mov cx,word ptr [ebx+eax]
  70                                 ror cx,1
  71                                 shl ecx,16
  72
  73                                 mov eax,edx
  74                                 shr eax,23
  75                                 and eax,0x1FE
  76                                 mov cx,word ptr [ebx+eax]
  77                                 ror cx,1
  78
  79                                 mov dword ptr [edi],ecx
  80                                 add edi,4
  81                                 // }
  82
  83                                 // 2nd dword output {
  84                                 mov eax,edx
  85                                 shl eax,1
  86                                 and eax,0x1FE
  87                                 mov cx,word ptr [ebx+eax]
  88                                 ror cx,1
  89                                 shl ecx,16
  90
  91                                 shr edx,7
  92                                 and edx,0x1FE
  93                                 mov cx,word ptr [ebx+edx]
  94                                 ror cx,1
  95
  96                                 mov dword ptr [edi],ecx
  97                                 add edi,4
  98                                 // }
  99
 100                                 // * copy
 101                                 mov eax,dword ptr [esi]         // read all 4 pixels
 102                                 bswap eax
 103                                 add esi,4
 104                                 mov edx,eax
 105
 106                                 // 1st dword output {
 107                                 shr eax,15
 108                                 and eax,0x1FE
 109                                 mov cx,word ptr [ebx+eax]
 110                                 ror cx,1
 111                                 shl ecx,16
 112
 113                                 mov eax,edx
 114                                 shr eax,23
 115                                 and eax,0x1FE
 116                                 mov cx,word ptr [ebx+eax]
 117                                 ror cx,1
 118
 119                                 mov dword ptr [edi],ecx
 120                                 add edi,4
 121                                 // }
 122
 123                                 // 2nd dword output {
 124                                 mov eax,edx
 125                                 shl eax,1
 126                                 and eax,0x1FE
 127                                 mov cx,word ptr [ebx+eax]
 128                                 ror cx,1
 129                                 shl ecx,16
 130
 131                                 shr edx,7
 132                                 and edx,0x1FE
 133                                 mov cx,word ptr [ebx+edx]
 134                                 ror cx,1
 135
 136                                 mov dword ptr [edi],ecx
 137                                 add edi,4
 138                                 // }
 139                                 // *
 140
 141                                 pop ecx
 142
 143                                 dec ecx
 144                                 jnz x_loop
 145
 146                                 pop ecx
 147                                 dec ecx
 148                                 jz end_y_loop
 149                                 push ecx
 150
 151                                 add esi,dword ptr [line]
 152                                 add edi,dword ptr [ext]
 153
 154                                 mov ecx,dword ptr [wid_64]
 155 x_loop_2:
 156                         push ecx
 157
 158                                 mov eax,dword ptr [esi+4]               // read all 4 pixels
 159                                 bswap eax
 160                                 mov edx,eax
 161
 162                                 // 1st dword output {
 163                                 shr eax,15
 164                                 and eax,0x1FE
 165                                 mov cx,word ptr [ebx+eax]
 166                                 ror cx,1
 167                                 shl ecx,16
 168
 169                                 mov eax,edx
 170                                 shr eax,23
 171                                 and eax,0x1FE
 172                                 mov cx,word ptr [ebx+eax]
 173                                 ror cx,1
 174
 175                                 mov dword ptr [edi],ecx
 176                                 add edi,4
 177                                 // }
 178
 179                                 // 2nd dword output {
 180                                 mov eax,edx
 181                                 shl eax,1
 182                                 and eax,0x1FE
 183                                 mov cx,word ptr [ebx+eax]
 184                                 ror cx,1
 185                                 shl ecx,16
 186
 187                                 shr edx,7
 188                                 and edx,0x1FE
 189                                 mov cx,word ptr [ebx+edx]
 190                                 ror cx,1
 191
 192                                 mov dword ptr [edi],ecx
 193                                 add edi,4
 194                                 // }
 195
 196                                 // * copy
 197                                 mov eax,dword ptr [esi]         // read all 4 pixels
 198                                 bswap eax
 199                                 add esi,8
 200                                 mov edx,eax
 201
 202                                 // 1st dword output {
 203                                 shr eax,15
 204                                 and eax,0x1FE
 205                                 mov cx,word ptr [ebx+eax]
 206                                 ror cx,1
 207                                 shl ecx,16
 208
 209                                 mov eax,edx
 210                                 shr eax,23
 211                                 and eax,0x1FE
 212                                 mov cx,word ptr [ebx+eax]
 213                                 ror cx,1
 214
 215                                 mov dword ptr [edi],ecx
 216                                 add edi,4
 217                                 // }
 218
 219                                 // 2nd dword output {
 220                                 mov eax,edx
 221                                 shl eax,1
 222                                 and eax,0x1FE
 223                                 mov cx,word ptr [ebx+eax]
 224                                 ror cx,1
 225                                 shl ecx,16
 226
 227                                 shr edx,7
 228                                 and edx,0x1FE
 229                                 mov cx,word ptr [ebx+edx]
 230                                 ror cx,1
 231
 232                                 mov dword ptr [edi],ecx
 233                                 add edi,4
 234                                 // }
 235                                 // *
 236
 237                                 pop ecx
 238
 239                                 dec ecx
 240                                 jnz x_loop_2
 241
 242                                 add esi,dword ptr [line]
 243                                 add edi,dword ptr [ext]
 244
 245                                 pop ecx
 246                                 dec ecx
 247                                 jnz y_loop
 248
 249 end_y_loop:
 250                 }
 251 #else // _WIN32
 252            //printf("Load8bCI1\n");
 253        long lTempX, lTempY, lHeight = (long) height;
 254        intptr_t fake_eax, fake_edx;
 255            asm volatile (
 256                          "y_loop4:                \n"
 257              "mov %[c], %[tempy]     \n"
 258
 259                          "mov %[wid_64], %%ecx   \n"
 260                          "x_loop4:                \n"
 261              "mov %[c], %[tempx]     \n"
 262
 263                          "mov (%[src]), %%eax      \n"          // read all 4 pixels
 264                          "bswap %%eax             \n"
 265                          "add $4, %[src]           \n"
 266                          "mov %%eax, %%edx        \n"
 267
 268                          // 1st dword output {
 269                          "shr $15, %%eax          \n"
 270                          "and $0x1FE, %%eax       \n"
 271                          "mov (%[pal],%[a]), %%cx \n"
 272                          "ror $1, %%cx            \n"
 273                          "shl $16, %%ecx          \n"
 274
 275                          "mov %%edx, %%eax        \n"
 276                          "shr $23, %%eax          \n"
 277                          "and $0x1FE, %%eax       \n"
 278                          "mov (%[pal],%[a]), %%cx \n"
 279                          "ror $1, %%cx            \n"
 280
 281                          "mov %%ecx, (%[dst])      \n"
 282                          "add $4, %[dst]           \n"
 283                          // }
 284
 285                          // 2nd dword output {
 286                          "mov %%edx, %%eax        \n"
 287                          "shl $1, %%eax           \n"
 288                          "and $0x1FE, %%eax       \n"
 289                          "mov (%[pal],%[a]), %%cx \n"
 290                          "ror $1, %%cx            \n"
 291                          "shl $16, %%ecx          \n"
 292
 293                          "shr $7, %%edx           \n"
 294                          "and $0x1FE, %%edx       \n"
 295                          "mov (%[pal],%[d]), %%cx \n"
 296                          "ror $1, %%cx            \n"
 297
 298                          "mov %%ecx, (%[dst])      \n"
 299                          "add $4, %[dst]           \n"
 300                          // }
 301
 302                          // * copy
 303                          "mov (%[src]), %%eax      \n"          // read all 4 pixels
 304                          "bswap %%eax             \n"
 305                          "add $4, %[src]           \n"
 306                          "mov %%eax, %%edx        \n"
 307
 308                          // 1st dword output {
 309                          "shr $15, %%eax          \n"
 310                          "and $0x1FE, %%eax       \n"
 311                          "mov (%[pal],%[a]), %%cx \n"
 312                          "ror $1, %%cx            \n"
 313                          "shl $16, %%ecx          \n"
 314
 315                          "mov %%edx, %%eax        \n"
 316                          "shr $23, %%eax          \n"
 317                          "and $0x1FE, %%eax       \n"
 318                          "mov (%[pal],%[a]), %%cx \n"
 319                          "ror $1, %%cx            \n"
 320
 321                          "mov %%ecx, (%[dst])      \n"
 322                          "add $4, %[dst]           \n"
 323                          // }
 324
 325                          // 2nd dword output {
 326                          "mov %%edx, %%eax        \n"
 327                          "shl $1, %%eax           \n"
 328                          "and $0x1FE, %%eax       \n"
 329                          "mov (%[pal],%[a]), %%cx \n"
 330                          "ror $1, %%cx            \n"
 331                          "shl $16, %%ecx          \n"
 332
 333                          "shr $7, %%edx           \n"
 334                          "and $0x1FE, %%edx       \n"
 335                          "mov (%[pal],%[d]), %%cx \n"
 336                          "ror $1, %%cx            \n"
 337
 338                          "mov %%ecx, (%[dst])      \n"
 339                          "add $4, %[dst]           \n"
 340                          // }
 341                          // *
 342
 343              "mov %[tempx], %[c]     \n"
 344
 345                          "dec %%ecx               \n"
 346                          "jnz x_loop4             \n"
 347
 348              "mov %[tempy], %[c]     \n"
 349                          "dec %%ecx               \n"
 350                          "jz end_y_loop4          \n"
 351              "mov %[c], %[tempy]     \n"
 352
 353                          "add %[line], %[src]     \n"
 354                          "add %[ext], %[dst]      \n"
 355
 356                          "mov %[wid_64], %%ecx   \n"
 357                          "x_loop_24:              \n"
 358              "mov %[c], %[tempx]     \n"
 359
 360                          "mov 4(%[src]), %%eax     \n"          // read all 4 pixels
 361                          "bswap %%eax             \n"
 362                          "mov %%eax, %%edx        \n"
 363
 364                          // 1st dword output {
 365                          "shr $15, %%eax          \n"
 366                          "and $0x1FE, %%eax       \n"
 367                          "mov (%[pal],%[a]), %%cx \n"
 368                          "ror $1, %%cx            \n"
 369                          "shl $16, %%ecx          \n"
 370
 371                          "mov %%edx, %%eax        \n"
 372                          "shr $23, %%eax          \n"
 373                          "and $0x1FE, %%eax       \n"
 374                          "mov (%[pal],%[a]), %%cx \n"
 375                          "ror $1, %%cx            \n"
 376
 377                          "mov %%ecx, (%[dst])      \n"
 378                          "add $4, %[dst]           \n"
 379                          // }
 380
 381                          // 2nd dword output {
 382                          "mov %%edx, %%eax        \n"
 383                          "shl $1, %%eax           \n"
 384                          "and $0x1FE, %%eax       \n"
 385                          "mov (%[pal],%[a]), %%cx \n"
 386                          "ror $1, %%cx            \n"
 387                          "shl $16, %%ecx          \n"
 388
 389                          "shr $7, %%edx           \n"
 390                          "and $0x1FE, %%edx       \n"
 391                          "mov (%[pal],%[d]), %%cx \n"
 392                          "ror $1, %%cx            \n"
 393
 394                          "mov %%ecx, (%[dst])      \n"
 395                          "add $4, %[dst]           \n"
 396                          // }
 397
 398                          // * copy
 399                          "mov (%[src]), %%eax      \n"          // read all 4 pixels
 400                          "bswap %%eax             \n"
 401                          "add $8, %[src]           \n"
 402                          "mov %%eax, %%edx        \n"
 403
 404                          // 1st dword output {
 405                          "shr $15, %%eax          \n"
 406                          "and $0x1FE, %%eax       \n"
 407                          "mov (%[pal],%[a]), %%cx \n"
 408                          "ror $1, %%cx            \n"
 409                          "shl $16, %%ecx          \n"
 410
 411                          "mov %%edx, %%eax        \n"
 412                          "shr $23, %%eax          \n"
 413                          "and $0x1FE, %%eax       \n"
 414                          "mov (%[pal],%[a]), %%cx \n"
 415                          "ror $1, %%cx            \n"
 416
 417                          "mov %%ecx, (%[dst])      \n"
 418                          "add $4, %[dst]           \n"
 419                          // }
 420
 421                          // 2nd dword output {
 422                          "mov %%edx, %%eax        \n"
 423                          "shl $1, %%eax           \n"
 424                          "and $0x1FE, %%eax       \n"
 425                          "mov (%[pal],%[a]), %%cx \n"
 426                          "ror $1, %%cx            \n"
 427                          "shl $16, %%ecx          \n"
 428
 429                          "shr $7, %%edx           \n"
 430                          "and $0x1FE, %%edx       \n"
 431                          "mov (%[pal],%[d]), %%cx \n"
 432                          "ror $1, %%cx            \n"
 433
 434                          "mov %%ecx, (%[dst])      \n"
 435                          "add $4, %[dst]           \n"
 436                          // }
 437                          // *
 438
 439              "mov %[tempx], %[c]     \n"
 440                          "dec %%ecx               \n"
 441                          "jnz x_loop_24           \n"
 442
 443                          "add %[line], %[src]     \n"
 444                          "add %[ext], %[dst]      \n"
 445
 446              "mov %[tempy], %[c]     \n"
 447                          "dec %%ecx               \n"
 448                          "jnz y_loop4             \n"
 449
 450                          "end_y_loop4:            \n"
 451                          : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
 452                          : [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
 453                          : "memory", "cc"
 454                          );
 455 #endif // _WIN32
 456         return (1 << 16) | GR_TEXFMT_ARGB_1555;
 457         }
 458         else
 459         {
 460 #ifndef GCC
 461                 __asm {
 462                         mov ebx,dword ptr [pal]
 463
 464                                 mov esi,dword ptr [src]
 465                                 mov edi,dword ptr [dst]
 466
 467                                 mov ecx,dword ptr [height]
 468 ia_y_loop:
 469                         push ecx
 470
 471                                 mov ecx,dword ptr [wid_64]
 472 ia_x_loop:
 473                         push ecx
 474
 475                                 mov eax,dword ptr [esi]         // read all 4 pixels
 476                                 bswap eax
 477                                 add esi,4
 478                                 mov edx,eax
 479
 480                                 // 1st dword output {
 481                                 shr eax,15
 482                                 and eax,0x1FE
 483                                 mov cx,word ptr [ebx+eax]
 484                                 ror cx,8
 485                                 shl ecx,16
 486
 487                                 mov eax,edx
 488                                 shr eax,23
 489                                 and eax,0x1FE
 490                                 mov cx,word ptr [ebx+eax]
 491                                 ror cx,8
 492
 493                                 mov dword ptr [edi],ecx
 494                                 add edi,4
 495                                 // }
 496
 497                                 // 2nd dword output {
 498                                 mov eax,edx
 499                                 shl eax,1
 500                                 and eax,0x1FE
 501                                 mov cx,word ptr [ebx+eax]
 502                                 ror cx,8
 503                                 shl ecx,16
 504
 505                                 shr edx,7
 506                                 and edx,0x1FE
 507                                 mov cx,word ptr [ebx+edx]
 508                                 ror cx,8
 509
 510                                 mov dword ptr [edi],ecx
 511                                 add edi,4
 512                                 // }
 513
 514                                 // * copy
 515                                 mov eax,dword ptr [esi]         // read all 4 pixels
 516                                 bswap eax
 517                                 add esi,4
 518                                 mov edx,eax
 519
 520                                 // 1st dword output {
 521                                 shr eax,15
 522                                 and eax,0x1FE
 523                                 mov cx,word ptr [ebx+eax]
 524                                 ror cx,8
 525                                 shl ecx,16
 526
 527                                 mov eax,edx
 528                                 shr eax,23
 529                                 and eax,0x1FE
 530                                 mov cx,word ptr [ebx+eax]
 531                                 ror cx,8
 532
 533                                 mov dword ptr [edi],ecx
 534                                 add edi,4
 535                                 // }
 536
 537                                 // 2nd dword output {
 538                                 mov eax,edx
 539                                 shl eax,1
 540                                 and eax,0x1FE
 541                                 mov cx,word ptr [ebx+eax]
 542                                 ror cx,8
 543                                 shl ecx,16
 544
 545                                 shr edx,7
 546                                 and edx,0x1FE
 547                                 mov cx,word ptr [ebx+edx]
 548                                 ror cx,8
 549
 550                                 mov dword ptr [edi],ecx
 551                                 add edi,4
 552                                 // }
 553                                 // *
 554
 555                                 pop ecx
 556
 557                                 dec ecx
 558                                 jnz ia_x_loop
 559
 560                                 pop ecx
 561                                 dec ecx
 562                                 jz ia_end_y_loop
 563                                 push ecx
 564
 565                                 add esi,dword ptr [line]
 566                                 add edi,dword ptr [ext]
 567
 568                                 mov ecx,dword ptr [wid_64]
 569 ia_x_loop_2:
 570                         push ecx
 571
 572                                 mov eax,dword ptr [esi+4]               // read all 4 pixels
 573                                 bswap eax
 574                                 mov edx,eax
 575
 576                                 // 1st dword output {
 577                                 shr eax,15
 578                                 and eax,0x1FE
 579                                 mov cx,word ptr [ebx+eax]
 580                                 ror cx,8
 581                                 shl ecx,16
 582
 583                                 mov eax,edx
 584                                 shr eax,23
 585                                 and eax,0x1FE
 586                                 mov cx,word ptr [ebx+eax]
 587                                 ror cx,8
 588
 589                                 mov dword ptr [edi],ecx
 590                                 add edi,4
 591                                 // }
 592
 593                                 // 2nd dword output {
 594                                 mov eax,edx
 595                                 shl eax,1
 596                                 and eax,0x1FE
 597                                 mov cx,word ptr [ebx+eax]
 598                                 ror cx,8
 599                                 shl ecx,16
 600
 601                                 shr edx,7
 602                                 and edx,0x1FE
 603                                 mov cx,word ptr [ebx+edx]
 604                                 ror cx,8
 605
 606                                 mov dword ptr [edi],ecx
 607                                 add edi,4
 608                                 // }
 609
 610                                 // * copy
 611                                 mov eax,dword ptr [esi]         // read all 4 pixels
 612                                 bswap eax
 613                                 add esi,8
 614                                 mov edx,eax
 615
 616                                 // 1st dword output {
 617                                 shr eax,15
 618                                 and eax,0x1FE
 619                                 mov cx,word ptr [ebx+eax]
 620                                 ror cx,8
 621                                 shl ecx,16
 622
 623                                 mov eax,edx
 624                                 shr eax,23
 625                                 and eax,0x1FE
 626                                 mov cx,word ptr [ebx+eax]
 627                                 ror cx,8
 628
 629                                 mov dword ptr [edi],ecx
 630                                 add edi,4
 631                                 // }
 632
 633                                 // 2nd dword output {
 634                                 mov eax,edx
 635                                 shl eax,1
 636                                 and eax,0x1FE
 637                                 mov cx,word ptr [ebx+eax]
 638                                 ror cx,8
 639                                 shl ecx,16
 640
 641                                 shr edx,7
 642                                 and edx,0x1FE
 643                                 mov cx,word ptr [ebx+edx]
 644                                 ror cx,8
 645
 646                                 mov dword ptr [edi],ecx
 647                                 add edi,4
 648                                 // }
 649                                 // *
 650
 651                                 pop ecx
 652
 653                                 dec ecx
 654                                 jnz ia_x_loop_2
 655
 656                                 add esi,dword ptr [line]
 657                                 add edi,dword ptr [ext]
 658
 659                                 pop ecx
 660                                 dec ecx
 661                                 jnz ia_y_loop
 662
 663 ia_end_y_loop:
 664         }
 665 #else // _WIN32
 666            //printf("Load8bCI1\n");
 667        long lTempX, lTempY, lHeight = (long) height;
 668                 intptr_t fake_eax, fake_edx;
 669            asm volatile (
 670                          "ia_y_loop2:             \n"
 671              "mov %[c], %[tempy]     \n"
 672
 673                          "mov %[wid_64], %%ecx   \n"
 674                          "ia_x_loop2:             \n"
 675              "mov %[c], %[tempx]     \n"
 676
 677                          "mov (%[src]), %%eax      \n"          // read all 4 pixels
 678                          "bswap %%eax             \n"
 679                          "add $4, %[src]           \n"
 680                          "mov %%eax, %%edx        \n"
 681
 682                          // 1st dword output {
 683                          "shr $15, %%eax          \n"
 684                          "and $0x1FE, %%eax       \n"
 685                          "mov (%[pal],%[a]), %%cx \n"
 686                          "ror $8, %%cx            \n"
 687                          "shl $16, %%ecx          \n"
 688
 689                          "mov %%edx, %%eax        \n"
 690                          "shr $23, %%eax          \n"
 691                          "and $0x1FE, %%eax       \n"
 692                          "mov (%[pal],%[a]), %%cx \n"
 693                          "ror $8, %%cx            \n"
 694
 695                          "mov %%ecx, (%[dst])      \n"
 696                          "add $4, %[dst]           \n"
 697                          // }
 698
 699                          // 2nd dword output {
 700                          "mov %%edx, %%eax        \n"
 701                          "shl $1, %%eax           \n"
 702                          "and $0x1FE, %%eax       \n"
 703                          "mov (%[pal],%[a]), %%cx \n"
 704                          "ror $8, %%cx            \n"
 705                          "shl $16, %%ecx          \n"
 706
 707                          "shr $7, %%edx           \n"
 708                          "and $0x1FE, %%edx       \n"
 709                          "mov (%[pal],%[d]), %%cx \n"
 710                          "ror $8, %%cx            \n"
 711
 712                          "mov %%ecx, (%[dst])      \n"
 713                          "add $4, %[dst]           \n"
 714                          // }
 715
 716                          // * copy
 717                          "mov (%[src]), %%eax      \n"          // read all 4 pixels
 718                          "bswap %%eax             \n"
 719                          "add $4, %[src]           \n"
 720                          "mov %%eax, %%edx        \n"
 721
 722                          // 1st dword output {
 723                          "shr $15, %%eax          \n"
 724                          "and $0x1FE, %%eax       \n"
 725                          "mov (%[pal],%[a]), %%cx \n"
 726                          "ror $8, %%cx            \n"
 727                          "shl $16, %%ecx          \n"
 728
 729                          "mov %%edx, %%eax        \n"
 730                          "shr $23, %%eax          \n"
 731                          "and $0x1FE, %%eax       \n"
 732                          "mov (%[pal],%[a]), %%cx \n"
 733                          "ror $8, %%cx            \n"
 734
 735                          "mov %%ecx, (%[dst])      \n"
 736                          "add $4, %[dst]           \n"
 737                          // }
 738
 739                          // 2nd dword output {
 740                          "mov %%edx, %%eax        \n"
 741                          "shl $1, %%eax           \n"
 742                          "and $0x1FE, %%eax       \n"
 743                          "mov (%[pal],%[a]), %%cx \n"
 744                          "ror $8, %%cx            \n"
 745                          "shl $16, %%ecx          \n"
 746
 747                          "shr $7, %%edx           \n"
 748                          "and $0x1FE, %%edx       \n"
 749                          "mov (%[pal],%[d]), %%cx \n"
 750                          "ror $8, %%cx            \n"
 751
 752                          "mov %%ecx, (%[dst])      \n"
 753                          "add $4, %[dst]           \n"
 754                          // }
 755                          // *
 756
 757              "mov %[tempx], %[c]     \n"
 758                          "dec %%ecx               \n"
 759                          "jnz ia_x_loop2          \n"
 760
 761              "mov %[tempy], %[c]     \n"
 762                          "dec %%ecx               \n"
 763                          "jz ia_end_y_loop2       \n"
 764              "mov %[c], %[tempy]     \n"
 765
 766                          "add %[line], %[src]     \n"
 767                          "add %[ext], %[dst]      \n"
 768
 769                          "mov %[wid_64], %%ecx   \n"
 770                          "ia_x_loop_22:           \n"
 771              "mov %[c], %[tempx]     \n"
 772
 773                          "mov 4(%[src]), %%eax     \n"          // read all 4 pixels
 774                          "bswap %%eax             \n"
 775                          "mov %%eax, %%edx        \n"
 776
 777                          // 1st dword output {
 778                          "shr $15, %%eax          \n"
 779                          "and $0x1FE, %%eax       \n"
 780                          "mov (%[pal],%[a]), %%cx \n"
 781                          "ror $8, %%cx            \n"
 782                          "shl $16, %%ecx          \n"
 783
 784                          "mov %%edx, %%eax        \n"
 785                          "shr $23, %%eax          \n"
 786                          "and $0x1FE, %%eax       \n"
 787                          "mov (%[pal],%[a]), %%cx \n"
 788                          "ror $8, %%cx            \n"
 789
 790                          "mov %%ecx, (%[dst])      \n"
 791                          "add $4, %[dst]           \n"
 792                          // }
 793
 794                          // 2nd dword output {
 795                          "mov %%edx, %%eax        \n"
 796                          "shl $1, %%eax           \n"
 797                          "and $0x1FE, %%eax       \n"
 798                          "mov (%[pal],%[a]), %%cx \n"
 799                          "ror $8, %%cx            \n"
 800                          "shl $16, %%ecx          \n"
 801
 802                          "shr $7, %%edx           \n"
 803                          "and $0x1FE, %%edx       \n"
 804                          "mov (%[pal],%[d]), %%cx \n"
 805                          "ror $8, %%cx            \n"
 806
 807                          "mov %%ecx, (%[dst])      \n"
 808                          "add $4, %[dst]           \n"
 809                          // }
 810
 811                          // * copy
 812                          "mov (%[src]), %%eax      \n"          // read all 4 pixels
 813                          "bswap %%eax             \n"
 814                          "add $8, %[src]           \n"
 815                          "mov %%eax, %%edx        \n"
 816
 817                          // 1st dword output {
 818                          "shr $15, %%eax          \n"
 819                          "and $0x1FE, %%eax       \n"
 820                          "mov (%[pal],%[a]), %%cx \n"
 821                          "ror $8, %%cx            \n"
 822                          "shl $16, %%ecx          \n"
 823
 824                          "mov %%edx, %%eax        \n"
 825                          "shr $23, %%eax          \n"
 826                          "and $0x1FE, %%eax       \n"
 827                          "mov (%[pal],%[a]), %%cx \n"
 828                          "ror $8, %%cx            \n"
 829
 830                          "mov %%ecx, (%[dst])      \n"
 831                          "add $4, %[dst]           \n"
 832                          // }
 833
 834                          // 2nd dword output {
 835                          "mov %%edx, %%eax        \n"
 836                          "shl $1, %%eax           \n"
 837                          "and $0x1FE, %%eax       \n"
 838                          "mov (%[pal],%[a]), %%cx \n"
 839                          "ror $8, %%cx            \n"
 840                          "shl $16, %%ecx          \n"
 841
 842                          "shr $7, %%edx           \n"
 843                          "and $0x1FE, %%edx       \n"
 844                          "mov (%[pal],%[d]), %%cx \n"
 845                          "ror $8, %%cx            \n"
 846
 847                          "mov %%ecx, (%[dst])      \n"
 848                          "add $4, %[dst]           \n"
 849                          // }
 850                          // *
 851
 852              "mov %[tempx], %[c]     \n"
 853                          "dec %%ecx               \n"
 854                          "jnz ia_x_loop_22        \n"
 855
 856                          "add %[line], %[src]     \n"
 857                          "add %[ext], %[dst]      \n"
 858
 859              "mov %[tempy], %[c]     \n"
 860                          "dec %%ecx               \n"
 861                          "jnz ia_y_loop2          \n"
 862
 863                          "ia_end_y_loop2:         \n"
 864                          : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a" (fake_eax), [d] "=&d" (fake_edx), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
 865              : [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
 866                          : "memory", "cc"
 867                          );
 868 #endif // _WIN32
 869         return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88;
 870         }
 871
 872         return 0;
 873 }
 874
 875 //****************************************************************
 876 // Size: 1, Format: 3
 877 //
 878 // ** by Gugaman **
 879
 880 DWORD Load8bIA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
 881 {
 882         if (rdp.tlut_mode != 0)
 883                 return Load8bCI (dst, src, wid_64, height, line, real_width, tile);
 884
 885         if (wid_64 < 1) wid_64 = 1;
 886         if (height < 1) height = 1;
 887         int ext = (real_width - (wid_64 << 3));
 888 #ifndef GCC
 889         __asm {
 890                 mov esi,dword ptr [src]
 891                         mov edi,dword ptr [dst]
 892
 893                         mov ecx,dword ptr [height]
 894 y_loop:
 895                 push ecx
 896
 897                         mov ecx,dword ptr [wid_64]
 898 x_loop:
 899                 mov eax,dword ptr [esi]          // read all 4 pixels
 900                         add esi,4
 901
 902                         xor ebx,ebx
 903                         mov edx,eax
 904                         shr eax,4//all alpha
 905                         and eax,0x0F0F0F0F
 906                         or ebx,eax
 907                         mov eax,edx//intensity
 908                         shl eax,4
 909                         and eax,0xF0F0F0F0
 910                         or ebx,eax
 911
 912                         mov dword ptr [edi],ebx // save dword
 913                         add edi,4
 914
 915                         mov eax,dword ptr [esi]          // read all 4 pixels
 916                         add esi,4
 917
 918                         xor ebx,ebx
 919                         mov edx,eax
 920                         shr eax,4//all alpha
 921                         and eax,0x0F0F0F0F
 922                         or ebx,eax
 923                         mov eax,edx//intensity
 924                         shl eax,4
 925                         and eax,0xF0F0F0F0
 926                         or ebx,eax
 927
 928                         mov dword ptr [edi],ebx // save dword
 929                         add edi,4
 930                         // *
 931
 932                         dec ecx
 933                         jnz x_loop
 934
 935                         pop ecx
 936                         dec ecx
 937                         jz end_y_loop
 938                         push ecx
 939
 940                         add esi,dword ptr [line]
 941                         add edi,dword ptr [ext]
 942
 943                         mov ecx,dword ptr [wid_64]
 944 x_loop_2:
 945                 mov eax,dword ptr [esi+4]          // read both pixels
 946
 947                         xor ebx,ebx
 948                         mov edx,eax
 949                         shr eax,4//all alpha
 950                         and eax,0x0F0F0F0F
 951                         or ebx,eax
 952                         mov eax,edx//intensity
 953                         shl eax,4
 954                         and eax,0xF0F0F0F0
 955                         or ebx,eax
 956
 957                         mov dword ptr [edi],ebx //save dword
 958                         add edi,4
 959
 960                         mov eax,dword ptr [esi]          // read both pixels
 961                         add esi,8
 962
 963                         xor ebx,ebx
 964                         mov edx,eax
 965                         shr eax,4//all alpha
 966                         and eax,0x0F0F0F0F
 967                         or ebx,eax
 968                         mov eax,edx//intensity
 969                         shl eax,4
 970                         and eax,0xF0F0F0F0
 971                         or ebx,eax
 972
 973                         mov dword ptr [edi],ebx //save dword
 974                         add edi,4
 975                         // *
 976
 977                         dec ecx
 978                         jnz x_loop_2
 979
 980                         add esi,dword ptr [line]
 981                         add edi,dword ptr [ext]
 982
 983                         pop ecx
 984                         dec ecx
 985                         jnz y_loop
 986
 987 end_y_loop:
 988         }
 989 #else // _WIN32
 990    //printf("Load8bIA\n");
 991    long lTemp, lHeight = (long) height;
 992    asm volatile (
 993                  "y_loop5:               \n"
 994                  "mov %[c], %[temp]      \n"
 995
 996                  "mov %[wid_64], %%ecx  \n"
 997                  "x_loop5:               \n"
 998                  "mov (%[src]), %%eax     \n"          // read all 4 pixels
 999                  "add $4, %[src]          \n"
1000
1001                  "xor %%ebx, %%ebx       \n"
1002                  "mov %%eax, %%edx       \n"
1003                  "shr $4, %%eax          \n"//all alpha
1004                  "and $0x0F0F0F0F, %%eax \n"
1005                  "or %%eax, %%ebx        \n"
1006                  "mov %%edx, %%eax       \n"//intensity
1007                  "shl $4, %%eax          \n"
1008                  "and $0xF0F0F0F0, %%eax \n"
1009                  "or %%eax, %%ebx        \n"
1010
1011                  "mov %%ebx, (%[dst])     \n" // save dword
1012                  "add $4, %[dst]          \n"
1013
1014                  "mov (%[src]), %%eax     \n"          // read all 4 pixels
1015                  "add $4, %[src]          \n"
1016
1017                  "xor %%ebx, %%ebx       \n"
1018                  "mov %%eax, %%edx       \n"
1019                  "shr $4, %%eax          \n"//all alpha
1020                  "and $0x0F0F0F0F, %%eax \n"
1021                  "or %%eax, %%ebx        \n"
1022                  "mov %%edx, %%eax       \n"//intensity
1023                  "shl $4, %%eax          \n"
1024                  "and $0xF0F0F0F0, %%eax \n"
1025                  "or %%eax, %%ebx        \n"
1026
1027                  "mov %%ebx, (%[dst])     \n" // save dword
1028                  "add $4, %[dst]          \n"
1029                  // *
1030
1031                  "dec %%ecx              \n"
1032                  "jnz x_loop5            \n"
1033
1034                  "mov %[temp], %[c]      \n"
1035                  "dec %%ecx              \n"
1036                  "jz end_y_loop5         \n"
1037                  "mov %[c], %[temp]      \n"
1038
1039                  "add %[line], %[src]    \n"
1040                  "add %[ext], %[dst]     \n"
1041
1042                  "mov %[wid_64], %%ecx  \n"
1043                  "x_loop_25:             \n"
1044                  "mov 4(%[src]), %%eax    \n"          // read both pixels
1045
1046                  "xor %%ebx, %%ebx       \n"
1047                  "mov %%eax, %%edx       \n"
1048                  "shr $4, %%eax          \n"//all alpha
1049                  "and $0x0F0F0F0F, %%eax \n"
1050                  "or %%eax, %%ebx        \n"
1051                  "mov %%edx, %%eax       \n"//intensity
1052                  "shl $4, %%eax          \n"
1053                  "and $0xF0F0F0F0, %%eax \n"
1054                  "or %%eax, %%ebx        \n"
1055
1056                  "mov %%ebx, (%[dst])     \n" //save dword
1057                  "add $4, %[dst]          \n"
1058
1059                  "mov (%[src]), %%eax     \n"          // read both pixels
1060                  "add $8, %[src]          \n"
1061
1062                  "xor %%ebx, %%ebx       \n"
1063                  "mov %%eax, %%edx       \n"
1064                  "shr $4, %%eax          \n"//all alpha
1065                  "and $0x0F0F0F0F, %%eax \n"
1066                  "or %%eax, %%ebx        \n"
1067                  "mov %%edx, %%eax       \n"//intensity
1068                  "shl $4, %%eax          \n"
1069                  "and $0xF0F0F0F0, %%eax \n"
1070                  "or %%eax, %%ebx        \n"
1071
1072                  "mov %%ebx, (%[dst])     \n" //save dword
1073                  "add $4, %[dst]          \n"
1074                  // *
1075
1076                  "dec %%ecx              \n"
1077                  "jnz x_loop_25          \n"
1078
1079                  "add %[line], %[src]    \n"
1080                  "add %[ext], %[dst]     \n"
1081
1082                  "mov %[temp], %[c]      \n"
1083                  "dec %%ecx              \n"
1084                  "jnz y_loop5            \n"
1085
1086                  "end_y_loop5:           \n"
1087                    : [temp]"=m"(lTemp), [src] "+S"(src), [dst] "+D"(dst), [c] "+c"(lHeight)
1088                    : [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1089                    : "memory", "cc", "eax", "edx", "ebx"
1090                    );
1091 #endif // _WIN32
1092         return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
1093 }
1094
1095 //****************************************************************
1096 // Size: 1, Format: 4
1097 //
1098 // ** by Gugaman **
1099
1100 DWORD Load8bI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile)
1101 {
1102         if (rdp.tlut_mode != 0)
1103                 return Load8bCI (dst, src, wid_64, height, line, real_width, tile);
1104
1105         if (wid_64 < 1) wid_64 = 1;
1106         if (height < 1) height = 1;
1107         int ext = (real_width - (wid_64 << 3));
1108 #ifndef GCC
1109         __asm {
1110                 mov esi,dword ptr [src]
1111                         mov edi,dword ptr [dst]
1112
1113                         mov ecx,dword ptr [height]
1114 y_loop:
1115                 push ecx
1116
1117                         mov ecx,dword ptr [wid_64]
1118 x_loop:
1119                 mov eax,dword ptr [esi]          // read all 4 pixels
1120                         add esi,4
1121
1122                         mov dword ptr [edi],eax // save dword
1123                         add edi,4
1124
1125                         mov eax,dword ptr [esi]          // read all 4 pixels
1126                         add esi,4
1127
1128                         mov dword ptr [edi],eax // save dword
1129                         add edi,4
1130                         // *
1131
1132                         dec ecx
1133                         jnz x_loop
1134
1135                         pop ecx
1136                         dec ecx
1137                         jz end_y_loop
1138                         push ecx
1139
1140                         add esi,dword ptr [line]
1141                         add edi,dword ptr [ext]
1142
1143                         mov ecx,dword ptr [wid_64]
1144 x_loop_2:
1145                 mov eax,dword ptr [esi+4]          // read both pixels
1146
1147                         mov dword ptr [edi],eax //save dword
1148                         add edi,4
1149
1150                         mov eax,dword ptr [esi]          // read both pixels
1151                         add esi,8
1152
1153                         mov dword ptr [edi],eax //save dword
1154                         add edi,4
1155                         // *
1156
1157                         dec ecx
1158                         jnz x_loop_2
1159
1160                         add esi,dword ptr [line]
1161                         add edi,dword ptr [ext]
1162
1163                         pop ecx
1164                         dec ecx
1165                         jnz y_loop
1166
1167 end_y_loop:
1168         }
1169 #else // _WIN32
1170    //printf("Load8bI\n");
1171    long lTemp, lHeight = (long) height;
1172    asm volatile (
1173                  "y_loop6:              \n"
1174          "mov %[c], %[temp]     \n"
1175
1176                  "mov %[wid_64], %%ecx \n"
1177                  "x_loop6:              \n"
1178                  "mov (%[src]), %%eax    \n"          // read all 4 pixels
1179                  "add $4, %[src]         \n"
1180
1181                  "mov %%eax, (%[dst])    \n" // save dword
1182                  "add $4, %[dst]         \n"
1183
1184                  "mov (%[src]), %%eax    \n"          // read all 4 pixels
1185                  "add $4, %[src]         \n"
1186
1187                  "mov %%eax, (%[dst])    \n" // save dword
1188                  "add $4, %[dst]         \n"
1189                  // *
1190
1191                  "dec %%ecx             \n"
1192                  "jnz x_loop6           \n"
1193
1194          "mov %[temp], %[c]     \n"
1195                  "dec %%ecx             \n"
1196                  "jz end_y_loop6        \n"
1197          "mov %[c], %[temp]     \n"
1198
1199                  "add %[line], %[src]   \n"
1200                  "add %[ext], %[dst]    \n"
1201
1202                  "mov %[wid_64], %%ecx \n"
1203                  "x_loop_26:            \n"
1204                  "mov 4(%[src]), %%eax   \n"          // read both pixels
1205
1206                  "mov %%eax, (%[dst])    \n" //save dword
1207                  "add $4, %[dst]         \n"
1208
1209                  "mov (%[src]), %%eax    \n"          // read both pixels
1210                  "add $8, %[src]         \n"
1211
1212                  "mov %%eax, (%[dst])    \n" //save dword
1213                  "add $4, %[dst]         \n"
1214                  // *
1215
1216                  "dec %%ecx             \n"
1217                  "jnz x_loop_26         \n"
1218
1219                  "add %[line], %[src]   \n"
1220                  "add %[ext], %[dst]    \n"
1221
1222          "mov %[temp], %[c]     \n"
1223                  "dec %%ecx             \n"
1224                  "jnz y_loop6           \n"
1225
1226                  "end_y_loop6:          \n"
1227                  : [temp]"=m"(lTemp), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight)
1228                  : [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext)
1229                  : "memory", "cc", "eax", "edx", "ebx"
1230                  );
1231 #endif // _WIN32
1232      return /*(0 << 16) | */GR_TEXFMT_ALPHA_8;
1233 }
1234