test: Add br3200749
[nasm/avx512.git] / test / tmap.nas
blob51b477fd12dee84ef6cb7b9a3acac8acd34e8072
1 ;; NASM note: this file abuses the section flags in such a way that
2 ;; NASM 0.98.37 broke when this was compiled with:
3 ;; nasm -o tmap.o -f elf -DLINUX tmap.nas
5 ;;-----------------------------------------------------------------------------
6 ;;
7 ;; $Id$
8 ;;
9 ;; Copyright (C) 1998-2000 by DooM Legacy Team.
11 ;; This program is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License
13 ;; as published by the Free Software Foundation; either version 2
14 ;; of the License, or (at your option) any later version.
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
22 ;; $Log$
23 ;; Revision 1.2 2003/09/10 23:33:38 hpa
24 ;; Use the version of tmap.nas that actually caused problems
26 ;; Revision 1.10 2001/02/24 13:35:21 bpereira
27 ;; no message
29 ;; Revision 1.9 2001/02/10 15:24:19 hurdler
30 ;; Apply Rob's patch for Linux version
32 ;; Revision 1.8 2000/11/12 09:48:15 bpereira
33 ;; no message
35 ;; Revision 1.7 2000/11/06 20:52:16 bpereira
36 ;; no message
38 ;; Revision 1.6 2000/11/03 11:48:40 hurdler
39 ;; Fix compiling problem under win32 with 3D-Floors and FragglScript (to verify!)
41 ;; Revision 1.5 2000/11/03 03:27:17 stroggonmeth
42 ;; Again with the bug fixing...
44 ;; Revision 1.4 2000/11/02 17:50:10 stroggonmeth
45 ;; Big 3Dfloors & FraggleScript commit!!
47 ;; Revision 1.3 2000/04/24 20:24:38 bpereira
48 ;; no message
50 ;; Revision 1.2 2000/02/27 00:42:11 hurdler
51 ;; fix CR+LF problem
53 ;; Revision 1.1.1.1 2000/02/22 20:32:32 hurdler
54 ;; Initial import into CVS (v1.29 pr3)
57 ;; DESCRIPTION:
58 ;; assembler optimised rendering code for software mode
59 ;; draw floor spans, and wall columns.
61 ;;-----------------------------------------------------------------------------
64 [BITS 32]
66 %ifdef LINUX
67 %macro cextern 1
68 [extern %1]
69 %endmacro
71 %macro cglobal 1
72 [global %1]
73 %endmacro
75 %define CODE_SEG .data
76 %else
77 %macro cextern 1
78 %define %1 _%1
79 [extern %1]
80 %endmacro
82 %macro cglobal 1
83 %define %1 _%1
84 [global %1]
85 %endmacro
87 %define CODE_SEG .text
88 %endif
91 ;; externs
92 ;; columns
93 cextern dc_x
94 cextern dc_yl
95 cextern dc_yh
96 cextern ylookup
97 cextern columnofs
98 cextern dc_source
99 cextern dc_texturemid
100 cextern dc_iscale
101 cextern centery
102 cextern dc_colormap
103 cextern dc_transmap
104 cextern colormaps
106 ;; spans
107 cextern ds_x1
108 cextern ds_x2
109 cextern ds_y
110 cextern ds_xfrac
111 cextern ds_yfrac
112 cextern ds_xstep
113 cextern ds_ystep
114 cextern ds_source
115 cextern ds_colormap
116 ;cextern ds_textureheight
118 ; polygon edge rasterizer
119 cextern prastertab
122 ;;----------------------------------------------------------------------
124 ;; R_DrawColumn
126 ;; New optimised version 10-01-1998 by D.Fabrice and P.Boris
127 ;; TO DO: optimise it much farther... should take at most 3 cycles/pix
128 ;; once it's fixed, add code to patch the offsets so that it
129 ;; works in every screen width.
131 ;;----------------------------------------------------------------------
133 [SECTION .data]
135 ;;.align 4
136 loopcount dd 0
137 pixelcount dd 0
138 tystep dd 0
140 [SECTION CODE_SEG write]
142 ;----------------------------------------------------------------------------
143 ;fixed_t FixedMul (fixed_t a, fixed_t b)
144 ;----------------------------------------------------------------------------
145 cglobal FixedMul
146 ; align 16
147 FixedMul:
148 mov eax,[esp+4]
149 imul dword [esp+8]
150 shrd eax,edx,16
153 ;----------------------------------------------------------------------------
154 ;fixed_t FixedDiv2 (fixed_t a, fixed_t b);
155 ;----------------------------------------------------------------------------
156 cglobal FixedDiv2
157 ; align 16
158 FixedDiv2:
159 mov eax,[esp+4]
160 mov edx,eax ;; these two instructions allow the next
161 sar edx,31 ;; two to pair, on the Pentium processor.
162 shld edx,eax,16
163 sal eax,16
164 idiv dword [esp+8]
167 ;----------------------------------------------------------------------------
168 ; void ASM_PatchRowBytes (int rowbytes);
169 ;----------------------------------------------------------------------------
170 cglobal ASM_PatchRowBytes
171 ; align 16
172 ASM_PatchRowBytes:
173 mov eax,[esp+4]
174 mov [p1+2],eax
175 mov [p2+2],eax
176 mov [p3+2],eax
177 mov [p4+2],eax
178 mov [p5+2],eax
179 mov [p6+2],eax
180 mov [p7+2],eax
181 mov [p8+2],eax
182 mov [p9+2],eax
183 mov [pa+2],eax
184 mov [pb+2],eax
185 mov [pc+2],eax
186 mov [pd+2],eax
187 mov [pe+2],eax
188 mov [pf+2],eax
189 mov [pg+2],eax
190 mov [ph+2],eax
191 mov [pi+2],eax
192 mov [pj+2],eax
193 mov [pk+2],eax
194 mov [pl+2],eax
195 mov [pm+2],eax
196 mov [pn+2],eax
197 mov [po+2],eax
198 mov [pp+2],eax
199 mov [pq+2],eax
200 add eax,eax
201 mov [q1+2],eax
202 mov [q2+2],eax
203 mov [q3+2],eax
204 mov [q4+2],eax
205 mov [q5+2],eax
206 mov [q6+2],eax
207 mov [q7+2],eax
208 mov [q8+2],eax
212 ;----------------------------------------------------------------------------
213 ; 8bpp column drawer
214 ;----------------------------------------------------------------------------
216 cglobal R_DrawColumn_8
217 ; align 16
218 R_DrawColumn_8:
219 push ebp ;; preserve caller's stack frame pointer
220 push esi ;; preserve register variables
221 push edi
222 push ebx
224 ;; dest = ylookup[dc_yl] + columnofs[dc_x];
226 mov ebp,[dc_yl]
227 mov ebx,ebp
228 mov edi,[ylookup+ebx*4]
229 mov ebx,[dc_x]
230 add edi,[columnofs+ebx*4] ;; edi = dest
232 ;; pixelcount = yh - yl + 1
234 mov eax,[dc_yh]
235 inc eax
236 sub eax,ebp ;; pixel count
237 mov [pixelcount],eax ;; save for final pixel
238 jle near vdone ;; nothing to scale
240 ;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
242 mov ecx,[dc_iscale] ;; fracstep
243 mov eax,[centery]
244 sub eax,ebp
245 imul eax,ecx
246 mov edx,[dc_texturemid]
247 sub edx,eax
248 mov ebx,edx
249 shr ebx,16 ;; frac int.
250 and ebx,0x7f
251 shl edx,16 ;; y frac up
253 mov ebp,ecx
254 shl ebp,16 ;; fracstep f. up
255 shr ecx,16 ;; fracstep i. ->cl
256 and cl,0x7f
257 mov esi,[dc_source]
259 ;; lets rock :) !
261 mov eax,[pixelcount]
262 mov dh,al
263 shr eax,2
264 mov ch,al ;; quad count
265 mov eax,[dc_colormap]
266 test dh,0x3
267 je near v4quadloop
269 ;; do un-even pixel
271 test dh,0x1
272 je two_uneven
274 mov al,[esi+ebx] ;; prep un-even loops
275 add edx,ebp ;; ypos f += ystep f
276 adc bl,cl ;; ypos i += ystep i
277 mov dl,[eax] ;; colormap texel
278 and bl,0x7f ;; mask 0-127 texture index
279 mov [edi],dl ;; output pixel
280 p1: add edi,0x12345678
282 ;; do two non-quad-aligned pixels
284 two_uneven:
285 test dh,0x2
286 je f3
288 mov al,[esi+ebx] ;; fetch source texel
289 add edx,ebp ;; ypos f += ystep f
290 adc bl,cl ;; ypos i += ystep i
291 mov dl,[eax] ;; colormap texel
292 and bl,0x7f ;; mask 0-127 texture index
293 mov [edi],dl ;; output pixel
294 mov al,[esi+ebx]
295 add edx,ebp ;; fetch source texel
296 adc bl,cl ;; ypos f += ystep f
297 mov dl,[eax] ;; ypos i += ystep i
298 and bl,0x7f ;; colormap texel
299 p2: add edi,0x12345678 ;; mask 0-127 texture index
300 mov [edi],dl
301 p3: add edi,0x12345678 ;; output pixel
303 ;; test if there was at least 4 pixels
306 test ch,0xff ;; test quad count
307 je near vdone
309 ;; ebp : ystep frac. upper 16 bits
310 ;; edx : y frac. upper 16 bits
311 ;; ebx : y i. lower 7 bits, masked for index
312 ;; ecx : ch = counter, cl = y step i.
313 ;; eax : colormap aligned 256
314 ;; esi : source texture column
315 ;; edi : dest screen
317 v4quadloop:
318 mov dh,0x7f ;; prep mask
319 align 4
320 vquadloop:
321 mov al,[esi+ebx] ;; prep loop
322 add edx,ebp ;; ypos f += ystep f
323 adc bl,cl ;; ypos i += ystep i
324 mov dl,[eax] ;; colormap texel
325 mov [edi],dl ;; output pixel
326 and bl,0x7f ;; mask 0-127 texture index
328 mov al,[esi+ebx] ;; fetch source texel
329 add edx,ebp
330 adc bl,cl
331 p4: add edi,0x12345678
332 mov dl,[eax]
333 and bl,0x7f
334 mov [edi],dl
336 mov al,[esi+ebx] ;; fetch source texel
337 add edx,ebp
338 adc bl,cl
339 p5: add edi,0x12345678
340 mov dl,[eax]
341 and bl,0x7f
342 mov [edi],dl
344 mov al,[esi+ebx] ;; fetch source texel
345 add edx,ebp
346 adc bl,cl
347 p6: add edi,0x12345678
348 mov dl,[eax]
349 and bl,0x7f
350 mov [edi],dl
352 p7: add edi,0x12345678
354 dec ch
355 jne vquadloop
357 vdone:
358 pop ebx ;; restore register variables
359 pop edi
360 pop esi
361 pop ebp ;; restore caller's stack frame pointer
364 ;;----------------------------------------------------------------------
365 ;;13-02-98:
366 ;; R_DrawSkyColumn : same as R_DrawColumn but:
368 ;; - wrap around 256 instead of 127.
369 ;; this is needed because we have a higher texture for mouselook,
370 ;; we need at least 200 lines for the sky.
372 ;; NOTE: the sky should never wrap, so it could use a faster method.
373 ;; for the moment, we'll still use a wrapping method...
375 ;; IT S JUST A QUICK CUT N PASTE, WAS NOT OPTIMISED AS IT SHOULD BE !!!
377 ;;----------------------------------------------------------------------
379 cglobal R_DrawSkyColumn_8
380 ; align 16
381 R_DrawSkyColumn_8:
382 push ebp
383 push esi
384 push edi
385 push ebx
387 ;; dest = ylookup[dc_yl] + columnofs[dc_x];
389 mov ebp,[dc_yl]
390 mov ebx,ebp
391 mov edi,[ylookup+ebx*4]
392 mov ebx,[dc_x]
393 add edi,[columnofs+ebx*4] ;; edi = dest
395 ;; pixelcount = yh - yl + 1
397 mov eax,[dc_yh]
398 inc eax
399 sub eax,ebp ;; pixel count
400 mov [pixelcount],eax ;; save for final pixel
401 jle near vskydone ;; nothing to scale
403 ;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
405 mov ecx,[dc_iscale] ;; fracstep
406 mov eax,[centery]
407 sub eax,ebp
408 imul eax,ecx
409 mov edx,[dc_texturemid]
410 sub edx,eax
411 mov ebx,edx
412 shr ebx,16 ;; frac int.
413 and ebx,0xff
414 shl edx,16 ;; y frac up
415 mov ebp,ecx
416 shl ebp,16 ;; fracstep f. up
417 shr ecx,16 ;; fracstep i. ->cl
418 mov esi,[dc_source]
420 ;; lets rock :) !
422 mov eax,[pixelcount]
423 mov dh,al
424 shr eax,0x2
425 mov ch,al ;; quad count
426 mov eax,[dc_colormap]
427 test dh,0x3
428 je vskyquadloop
430 ;; do un-even pixel
432 test dh,0x1
433 je f2
434 mov al,[esi+ebx] ;; prep un-even loops
435 add edx,ebp ;; ypos f += ystep f
436 adc bl,cl ;; ypos i += ystep i
437 mov dl,[eax] ;; colormap texel
438 mov [edi],dl ;; output pixel
439 p8: add edi,0x12345678
441 ;; do two non-quad-aligned pixels
443 f2: test dh,0x2
444 je skyf3
446 mov al,[esi+ebx] ;; fetch source texel
447 add edx,ebp ;; ypos f += ystep f
448 adc bl,cl ;; ypos i += ystep i
449 mov dl,[eax] ;; colormap texel
450 mov [edi],dl ;; output pixel
452 mov al,[esi+ebx] ;; fetch source texel
453 add edx,ebp ;; ypos f += ystep f
454 adc bl,cl ;; ypos i += ystep i
455 mov dl,[eax] ;; colormap texel
456 p9: add edi,0x12345678
457 mov [edi],dl ;; output pixel
459 pa: add edi,0x12345678
461 ;; test if there was at least 4 pixels
463 skyf3: test ch,0xff ;; test quad count
464 je vskydone
466 ;; ebp : ystep frac. upper 24 bits
467 ;; edx : y frac. upper 24 bits
468 ;; ebx : y i. lower 7 bits, masked for index
469 ;; ecx : ch = counter, cl = y step i.
470 ;; eax : colormap aligned 256
471 ;; esi : source texture column
472 ;; edi : dest screen
474 align 4
475 vskyquadloop:
476 mov al,[esi+ebx] ;; prep loop
477 add edx,ebp ;; ypos f += ystep f
478 mov dl,[eax] ;; colormap texel
479 adc bl,cl ;; ypos i += ystep i
480 mov [edi],dl ;; output pixel
482 mov al,[esi+ebx] ;; fetch source texel
483 add edx,ebp
484 adc bl,cl
485 pb: add edi,0x12345678
486 mov dl,[eax]
487 mov [edi],dl
489 mov al,[esi+ebx] ;; fetch source texel
490 add edx,ebp
491 adc bl,cl
492 pc: add edi,0x12345678
493 mov dl,[eax]
494 mov [edi],dl
496 mov al,[esi+ebx] ;; fetch source texel
497 add edx,ebp
498 adc bl,cl
499 pd: add edi,0x12345678
500 mov dl,[eax]
501 mov [edi],dl
503 pe: add edi,0x12345678
505 dec ch
506 jne vskyquadloop
507 vskydone:
508 pop ebx
509 pop edi
510 pop esi
511 pop ebp
515 ;;----------------------------------------------------------------------
516 ;; R_DrawTranslucentColumn_8
518 ;; Vertical column texture drawer, with transparency. Replaces Doom2's
519 ;; 'fuzz' effect, which was not so beautiful.
520 ;; Transparency is always impressive in some way, don't know why...
521 ;;----------------------------------------------------------------------
523 cglobal R_DrawTranslucentColumn_8
524 R_DrawTranslucentColumn_8:
525 push ebp ;; preserve caller's stack frame pointer
526 push esi ;; preserve register variables
527 push edi
528 push ebx
530 ;; dest = ylookup[dc_yl] + columnofs[dc_x];
532 mov ebp,[dc_yl]
533 mov ebx,ebp
534 mov edi,[ylookup+ebx*4]
535 mov ebx,[dc_x]
536 add edi,[columnofs+ebx*4] ;; edi = dest
538 ;; pixelcount = yh - yl + 1
540 mov eax,[dc_yh]
541 inc eax
542 sub eax,ebp ;; pixel count
543 mov [pixelcount],eax ;; save for final pixel
544 jle near vtdone ;; nothing to scale
546 ;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
548 mov ecx,[dc_iscale] ;; fracstep
549 mov eax,[centery]
550 sub eax,ebp
551 imul eax,ecx
552 mov edx,[dc_texturemid]
553 sub edx,eax
554 mov ebx,edx
556 shr ebx,16 ;; frac int.
557 and ebx,0x7f
558 shl edx,16 ;; y frac up
560 mov ebp,ecx
561 shl ebp,16 ;; fracstep f. up
562 shr ecx,16 ;; fracstep i. ->cl
563 and cl,0x7f
564 push cx
565 mov ecx,edx
566 pop cx
567 mov edx,[dc_colormap]
568 mov esi,[dc_source]
570 ;; lets rock :) !
572 mov eax,[pixelcount]
573 shr eax,0x2
574 test byte [pixelcount],0x3
575 mov ch,al ;; quad count
576 mov eax,[dc_transmap]
577 je vt4quadloop
579 ;; do un-even pixel
581 test byte [pixelcount],0x1
582 je trf2
584 mov ah,[esi+ebx] ;; fetch texel : colormap number
585 add ecx,ebp
586 adc bl,cl
587 mov al,[edi] ;; fetch dest : index into colormap
588 and bl,0x7f
589 mov dl,[eax]
590 mov dl,[edx]
591 mov [edi],dl
592 pf: add edi,0x12345678
594 ;; do two non-quad-aligned pixels
596 trf2: test byte [pixelcount],0x2
597 je trf3
599 mov ah,[esi+ebx] ;; fetch texel : colormap number
600 add ecx,ebp
601 adc bl,cl
602 mov al,[edi] ;; fetch dest : index into colormap
603 and bl,0x7f
604 mov dl,[eax]
605 mov dl,[edx]
606 mov [edi],dl
607 pg: add edi,0x12345678
609 mov ah,[esi+ebx] ;; fetch texel : colormap number
610 add ecx,ebp
611 adc bl,cl
612 mov al,[edi] ;; fetch dest : index into colormap
613 and bl,0x7f
614 mov dl,[eax]
615 mov dl,[edx]
616 mov [edi],dl
617 ph: add edi,0x12345678
619 ;; test if there was at least 4 pixels
621 trf3: test ch,0xff ;; test quad count
622 je near vtdone
625 ;; ebp : ystep frac. upper 24 bits
626 ;; edx : y frac. upper 24 bits
627 ;; ebx : y i. lower 7 bits, masked for index
628 ;; ecx : ch = counter, cl = y step i.
629 ;; eax : colormap aligned 256
630 ;; esi : source texture column
631 ;; edi : dest screen
633 vt4quadloop:
634 mov ah,[esi+ebx] ;; fetch texel : colormap number
635 mov [tystep],ebp
636 pi: add edi,0x12345678
637 mov al,[edi] ;; fetch dest : index into colormap
638 pj: sub edi,0x12345678
639 mov ebp,edi
640 pk: sub edi,0x12345678
641 jmp short inloop
642 align 4
643 vtquadloop:
644 add ecx,[tystep]
645 adc bl,cl
646 q1: add ebp,0x23456789
647 and bl,0x7f
648 mov dl,[eax]
649 mov ah,[esi+ebx] ;; fetch texel : colormap number
650 mov dl,[edx]
651 mov [edi],dl
652 mov al,[ebp] ;; fetch dest : index into colormap
653 inloop:
654 add ecx,[tystep]
655 adc bl,cl
656 q2: add edi,0x23456789
657 and bl,0x7f
658 mov dl,[eax]
659 mov ah,[esi+ebx] ;; fetch texel : colormap number
660 mov dl,[edx]
661 mov [ebp+0x0],dl
662 mov al,[edi] ;; fetch dest : index into colormap
664 add ecx,[tystep]
665 adc bl,cl
666 q3: add ebp,0x23456789
667 and bl,0x7f
668 mov dl,[eax]
669 mov ah,[esi+ebx] ;; fetch texel : colormap number
670 mov dl,[edx]
671 mov [edi],dl
672 mov al,[ebp] ;; fetch dest : index into colormap
674 add ecx,[tystep]
675 adc bl,cl
676 q4: add edi,0x23456789
677 and bl,0x7f
678 mov dl,[eax]
679 mov ah,[esi+ebx] ;; fetch texel : colormap number
680 mov dl,[edx]
681 mov [ebp],dl
682 mov al,[edi] ;; fetch dest : index into colormap
684 dec ch
685 jne vtquadloop
686 vtdone:
687 pop ebx
688 pop edi
689 pop esi
690 pop ebp
694 ;;----------------------------------------------------------------------
695 ;; R_DrawShadeColumn
697 ;; for smoke..etc.. test.
698 ;;----------------------------------------------------------------------
699 cglobal R_DrawShadeColumn_8
700 R_DrawShadeColumn_8:
701 push ebp ;; preserve caller's stack frame pointer
702 push esi ;; preserve register variables
703 push edi
704 push ebx
707 ;; dest = ylookup[dc_yl] + columnofs[dc_x];
709 mov ebp,[dc_yl]
710 mov ebx,ebp
711 mov edi,[ylookup+ebx*4]
712 mov ebx,[dc_x]
713 add edi,[columnofs+ebx*4] ;; edi = dest
715 ;; pixelcount = yh - yl + 1
717 mov eax,[dc_yh]
718 inc eax
719 sub eax,ebp ;; pixel count
720 mov [pixelcount],eax ;; save for final pixel
721 jle near shdone ;; nothing to scale
723 ;; frac = dc_texturemid - (centery-dc_yl)*fracstep;
725 mov ecx,[dc_iscale] ;; fracstep
726 mov eax,[centery]
727 sub eax,ebp
728 imul eax,ecx
729 mov edx,[dc_texturemid]
730 sub edx,eax
731 mov ebx,edx
732 shr ebx,16 ;; frac int.
733 and ebx,byte +0x7f
734 shl edx,16 ;; y frac up
736 mov ebp,ecx
737 shl ebp,16 ;; fracstep f. up
738 shr ecx,16 ;; fracstep i. ->cl
739 and cl,0x7f
741 mov esi,[dc_source]
743 ;; lets rock :) !
745 mov eax,[pixelcount]
746 mov dh,al
747 shr eax,2
748 mov ch,al ;; quad count
749 mov eax,[colormaps]
750 test dh,3
751 je sh4quadloop
753 ;; do un-even pixel
755 test dh,0x1
756 je shf2
758 mov ah,[esi+ebx] ;; fetch texel : colormap number
759 add edx,ebp
760 adc bl,cl
761 mov al,[edi] ;; fetch dest : index into colormap
762 and bl,0x7f
763 mov dl,[eax]
764 mov [edi],dl
765 pl: add edi,0x12345678
767 ;; do two non-quad-aligned pixels
769 shf2:
770 test dh,0x2
771 je shf3
773 mov ah,[esi+ebx] ;; fetch texel : colormap number
774 add edx,ebp
775 adc bl,cl
776 mov al,[edi] ;; fetch dest : index into colormap
777 and bl,0x7f
778 mov dl,[eax]
779 mov [edi],dl
780 pm: add edi,0x12345678
782 mov ah,[esi+ebx] ;; fetch texel : colormap number
783 add edx,ebp
784 adc bl,cl
785 mov al,[edi] ;; fetch dest : index into colormap
786 and bl,0x7f
787 mov dl,[eax]
788 mov [edi],dl
789 pn: add edi,0x12345678
791 ;; test if there was at least 4 pixels
793 shf3:
794 test ch,0xff ;; test quad count
795 je near shdone
798 ;; ebp : ystep frac. upper 24 bits
799 ;; edx : y frac. upper 24 bits
800 ;; ebx : y i. lower 7 bits, masked for index
801 ;; ecx : ch = counter, cl = y step i.
802 ;; eax : colormap aligned 256
803 ;; esi : source texture column
804 ;; edi : dest screen
806 sh4quadloop:
807 mov dh,0x7f ;; prep mask
808 mov ah,[esi+ebx] ;; fetch texel : colormap number
809 mov [tystep],ebp
810 po: add edi,0x12345678
811 mov al,[edi] ;; fetch dest : index into colormap
812 pp: sub edi,0x12345678
813 mov ebp,edi
814 pq: sub edi,0x12345678
815 jmp short shinloop
817 align 4
818 shquadloop:
819 add edx,[tystep]
820 adc bl,cl
821 and bl,dh
822 q5: add ebp,0x12345678
823 mov dl,[eax]
824 mov ah,[esi+ebx] ;; fetch texel : colormap number
825 mov [edi],dl
826 mov al,[ebp] ;; fetch dest : index into colormap
827 shinloop:
828 add edx,[tystep]
829 adc bl,cl
830 and bl,dh
831 q6: add edi,0x12345678
832 mov dl,[eax]
833 mov ah,[esi+ebx] ;; fetch texel : colormap number
834 mov [ebp],dl
835 mov al,[edi] ;; fetch dest : index into colormap
837 add edx,[tystep]
838 adc bl,cl
839 and bl,dh
840 q7: add ebp,0x12345678
841 mov dl,[eax]
842 mov ah,[esi+ebx] ;; fetch texel : colormap number
843 mov [edi],dl
844 mov al,[ebp] ;; fetch dest : index into colormap
846 add edx,[tystep]
847 adc bl,cl
848 and bl,dh
849 q8: add edi,0x12345678
850 mov dl,[eax]
851 mov ah,[esi+ebx] ;; fetch texel : colormap number
852 mov [ebp],dl
853 mov al,[edi] ;; fetch dest : index into colormap
855 dec ch
856 jne shquadloop
858 shdone:
859 pop ebx ;; restore register variables
860 pop edi
861 pop esi
862 pop ebp ;; restore caller's stack frame pointer
867 ;;----------------------------------------------------------------------
869 ;; R_DrawSpan
871 ;; Horizontal texture mapping
873 ;;----------------------------------------------------------------------
876 [SECTION .data]
878 oldcolormap dd 0
880 [SECTION CODE_SEG write]
882 cglobal R_DrawSpan_8
883 R_DrawSpan_8:
884 push ebp ;; preserve caller's stack frame pointer
885 push esi ;; preserve register variables
886 push edi
887 push ebx
889 ;; initilise registers
892 mov edx, [ds_xfrac]
893 mov eax, [ds_ystep]
894 ror edx, 14
895 ror eax, 15
896 mov bl, dl
897 mov ecx, [ds_xstep]
898 mov dh, al
899 mov ax, 1
900 mov [tystep], eax
903 mov eax, [ds_yfrac]
904 ror ecx, 13
905 ror eax, 16
906 mov dl, cl
907 mov bh, al
908 xor cx, cx
909 and ebx, 0x3fff
910 mov [pixelcount],ecx
912 mov ecx, [ds_x2]
913 mov edi, [ds_y]
914 mov esi, [ds_x1]
915 mov edi, [ylookup+edi*4]
916 mov ebp, ebx
917 add edi, [columnofs+esi*4]
918 sub esi, ecx ;; pixel count
919 shr ebp, 2
920 mov ecx, [ds_colormap]
921 mov ax, si
922 mov esi, [ds_source]
923 sar ax,1
924 jnc near .midloop ;; check parity
926 ; summary
927 ; edx = high16bit xfrac[0..13], ah=ystep[16..24] al=xtep[14..21]
928 ; ebx = high16bit =0, bh=yfrac[16..24], bl=xfrac[14..21]
929 ; ecx = colormap table cl=0 (colormap is aligned 8 bits)
930 ; eax = high16bit yfrac[0..15], dx = count
931 ; esi = flat texture source
932 ; edi = screeen buffer destination
933 ; ebp = work register
934 ; pixelcount = high16bit xstep[0..13] rest to 0
935 ; tystep = high16bit ystep[0..15] low 16 bit = 2 (increment of count)
937 align 4
938 .loop
939 add eax, [tystep]
940 mov cl, [esi+ebp]
941 adc bh, dh
942 mov cl, [ecx]
943 and bh, 0x3f
944 mov [edi], cl
945 mov ebp, ebx
946 inc edi
947 shr ebp, 2
949 .midloop:
950 add edx, [pixelcount]
951 mov cl, [esi+ebp]
952 adc bl, dl
953 mov cl, [ecx]
954 mov ebp, ebx
955 mov [edi], cl
956 inc edi
957 shr ebp, 2
959 test eax, 0xffff
960 jnz near .loop
962 .hdone: pop ebx ;; restore register variables
963 pop edi
964 pop esi
965 pop ebp ;; restore caller's stack frame pointer
969 [SECTION .data]
971 obelix dd 0
972 etaussi dd 0
974 [SECTION CODE_SEG]
976 cglobal R_DrawSpan_8_old
977 R_DrawSpan_8_old:
978 push ebp ;; preserve caller's stack frame pointer
979 push esi ;; preserve register variables
980 push edi
981 push ebx
983 ;; find loop count
985 mov eax,[ds_x2]
986 inc eax
987 sub eax,[ds_x1] ;; pixel count
988 mov [pixelcount],eax ;; save for final pixel
989 js near .hdone ;; nothing to scale
990 shr eax,0x1 ;; double pixel count
991 mov [loopcount],eax
993 ;; build composite position
995 mov ebp,[ds_xfrac]
996 shl ebp,10
997 and ebp,0xffff0000
998 mov eax,[ds_yfrac]
999 shr eax,6
1000 and eax,0xffff
1001 mov edi,[ds_y]
1002 or ebp,eax
1004 mov esi,[ds_source]
1006 ;; calculate screen dest
1008 mov edi,[ylookup+edi*4]
1009 mov eax,[ds_x1]
1010 add edi,[columnofs+eax*4]
1012 ;; build composite step
1014 mov ebx,[ds_xstep]
1015 shl ebx,10
1016 and ebx,0xffff0000
1017 mov eax,[ds_ystep]
1018 shr eax,6
1019 and eax,0xffff
1020 or ebx,eax
1022 mov [obelix],ebx
1023 mov [etaussi],esi
1025 ;; %eax aligned colormap
1026 ;; %ebx aligned colormap
1027 ;; %ecx,%edx scratch
1028 ;; %esi virtual source
1029 ;; %edi moving destination pointer
1030 ;; %ebp frac
1032 mov eax,[ds_colormap]
1033 mov ecx,ebp
1034 add ebp,ebx ;; advance frac pointer
1035 shr cx,10
1036 rol ecx,6
1037 and ecx,4095 ;; finish calculation for third pixel
1038 mov edx,ebp
1039 shr dx,10
1040 rol edx,6
1041 add ebp,ebx ;; advance frac pointer
1042 and edx,4095 ;; finish calculation for fourth pixel
1043 mov ebx,eax
1044 mov al,[esi+ecx] ;; get first pixel
1045 mov bl,[esi+edx] ;; get second pixel
1047 test dword [pixelcount],0xfffffffe
1049 mov dl,[eax] ;; color translate first pixel
1051 ;; movw $0xf0f0,%dx ;;see visplanes start
1053 je .hchecklast
1055 mov dh,[ebx] ;; color translate second pixel
1056 mov esi,[loopcount]
1057 align 4
1058 .hdoubleloop:
1059 mov ecx,ebp
1060 shr cx,10
1061 rol ecx,6
1062 add ebp,[obelix] ;; advance frac pointer
1063 mov [edi],dx ;; write first pixel
1064 and ecx,4095 ;; finish calculation for third pixel
1065 mov edx,ebp
1066 shr dx,10
1067 rol edx,6
1068 add ecx,[etaussi]
1069 and edx,4095 ;; finish calculation for fourth pixel
1070 mov al,[ecx] ;; get third pixel
1071 add ebp,[obelix] ;; advance frac pointer
1072 add edx,[etaussi]
1073 mov bl,[edx] ;; get fourth pixel
1074 mov dl,[eax] ;; color translate third pixel
1075 add edi,byte +0x2 ;; advance to third pixel destination
1076 dec esi ;; done with loop?
1077 mov dh,[ebx] ;; color translate fourth pixel
1078 jne .hdoubleloop
1079 ;; check for final pixel
1080 .hchecklast:
1081 test dword [pixelcount],0x1
1082 je .hdone
1083 mov [edi],dl ;; write final pixel
1084 .hdone: pop ebx ;; restore register variables
1085 pop edi
1086 pop esi
1087 pop ebp ;; restore caller's stack frame pointer
1091 ;; ========================================================================
1092 ;; Rasterization des segments d'un polyg“ne textur‚ de maniŠre LINEAIRE.
1093 ;; Il s'agit donc d'interpoler les coordonn‚es aux bords de la texture en
1094 ;; mˆme temps que les abscisses minx/maxx pour chaque ligne.
1095 ;; L'argument 'dir' indique quels bords de la texture sont interpolés:
1096 ;; 0 : segments associ‚s aux bord SUPERIEUR et INFERIEUR ( TY constant )
1097 ;; 1 : segments associ‚s aux bord GAUCHE et DROITE ( TX constant )
1098 ;; ========================================================================
1100 ;; void rasterize_segment_tex( LONG x1, LONG y1, LONG x2, LONG y2, LONG tv1, LONG tv2, LONG tc, LONG dir );
1101 ;; ARG1 ARG2 ARG3 ARG4 ARG5 ARG6 ARG7 ARG8
1103 ;; Pour dir = 0, (tv1,tv2) = (tX1,tX2), tc = tY, en effet TY est constant.
1105 ;; Pour dir = 1, (tv1,tv2) = (tY1,tY2), tc = tX, en effet TX est constant.
1108 ;; Uses: extern struct rastery *_rastertab;
1111 [SECTION CODE_SEG write]
1113 MINX EQU 0
1114 MAXX EQU 4
1115 TX1 EQU 8
1116 TY1 EQU 12
1117 TX2 EQU 16
1118 TY2 EQU 20
1119 RASTERY_SIZEOF EQU 24
1121 cglobal rasterize_segment_tex
1122 rasterize_segment_tex:
1123 push ebp
1124 mov ebp,esp
1126 sub esp,byte +0x8 ;; alloue les variables locales
1128 push ebx
1129 push esi
1130 push edi
1131 o16 mov ax,es
1132 push eax
1134 ;; #define DX [ebp-4]
1135 ;; #define TD [ebp-8]
1137 mov eax,[ebp+0xc] ;; y1
1138 mov ebx,[ebp+0x14] ;; y2
1139 cmp ebx,eax
1140 je near .L_finished ;; special (y1==y2) segment horizontal, exit!
1142 jg near .L_rasterize_right
1144 ;;rasterize_left: ;; on rasterize un segment … la GAUCHE du polyg“ne
1146 mov ecx,eax
1147 sub ecx,ebx
1148 inc ecx ;; y1-y2+1
1150 mov eax,RASTERY_SIZEOF
1151 mul ebx ;; * y2
1152 mov esi,[prastertab]
1153 add esi,eax ;; point into rastertab[y2]
1155 mov eax,[ebp+0x8] ;; ARG1
1156 sub eax,[ebp+0x10] ;; ARG3
1157 shl eax,0x10 ;; ((x1-x2)<<PRE) ...
1159 idiv ecx ;; dx = ... / (y1-y2+1)
1160 mov [ebp-0x4],eax ;; DX
1162 mov eax,[ebp+0x18] ;; ARG5
1163 sub eax,[ebp+0x1c] ;; ARG6
1164 shl eax,0x10
1166 idiv ecx ;; tdx =((tx1-tx2)<<PRE) / (y1-y2+1)
1167 mov [ebp-0x8],eax ;; idem tdy =((ty1-ty2)<<PRE) / (y1-y2+1)
1169 mov eax,[ebp+0x10] ;; ARG3
1170 shl eax,0x10 ;; x = x2<<PRE
1172 mov ebx,[ebp+0x1c] ;; ARG6
1173 shl ebx,0x10 ;; tx = tx2<<PRE d0
1174 ;; ty = ty2<<PRE d1
1175 mov edx,[ebp+0x20] ;; ARG7
1176 shl edx,0x10 ;; ty = ty<<PRE d0
1177 ;; tx = tx<<PRE d1
1178 push ebp
1179 mov edi,[ebp-0x4] ;; DX
1180 cmp dword [ebp+0x24],byte +0x0 ;; ARG8 direction ?
1182 mov ebp,[ebp-0x8] ;; TD
1183 je .L_rleft_h_loop
1185 ;; TY varie, TX est constant
1187 .L_rleft_v_loop:
1188 mov [esi+MINX],eax ;; rastertab[y].minx = x
1189 add ebx,ebp
1190 mov [esi+TX1],edx ;; .tx1 = tx
1191 add eax,edi
1192 mov [esi+TY1],ebx ;; .ty1 = ty
1194 ;;addl DX, %eax // x += dx
1195 ;;addl TD, %ebx // ty += tdy
1197 add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
1198 dec ecx
1199 jne .L_rleft_v_loop
1200 pop ebp
1201 jmp .L_finished
1203 ;; TX varie, TY est constant
1205 .L_rleft_h_loop:
1206 mov [esi+MINX],eax ;; rastertab[y].minx = x
1207 add eax,edi
1208 mov [esi+TX1],ebx ;; .tx1 = tx
1209 add ebx,ebp
1210 mov [esi+TY1],edx ;; .ty1 = ty
1212 ;;addl DX, %eax // x += dx
1213 ;;addl TD, %ebx // tx += tdx
1215 add esi,RASTERY_SIZEOF ;; next raster line into rastertab[]
1216 dec ecx
1217 jne .L_rleft_h_loop
1218 pop ebp
1219 jmp .L_finished
1221 ;; on rasterize un segment … la DROITE du polyg“ne
1223 .L_rasterize_right:
1224 mov ecx,ebx
1225 sub ecx,eax
1226 inc ecx ;; y2-y1+1
1228 mov ebx,RASTERY_SIZEOF
1229 mul ebx ;; * y1
1230 mov esi,[prastertab]
1231 add esi,eax ;; point into rastertab[y1]
1233 mov eax,[ebp+0x10] ;; ARG3
1234 sub eax,[ebp+0x8] ;; ARG1
1235 shl eax,0x10 ;; ((x2-x1)<<PRE) ...
1237 idiv ecx ;; dx = ... / (y2-y1+1)
1238 mov [ebp-0x4],eax ;; DX
1240 mov eax,[ebp+0x1c] ;; ARG6
1241 sub eax,[ebp+0x18] ;; ARG5
1242 shl eax,0x10
1244 idiv ecx ;; tdx =((tx2-tx1)<<PRE) / (y2-y1+1)
1245 mov [ebp-0x8],eax ;; idem tdy =((ty2-ty1)<<PRE) / (y2-y1+1)
1247 mov eax,[ebp+0x8] ;; ARG1
1248 shl eax,0x10 ;; x = x1<<PRE
1250 mov ebx,[ebp+0x18] ;; ARG5
1251 shl ebx,0x10 ;; tx = tx1<<PRE d0
1252 ;; ty = ty1<<PRE d1
1253 mov edx,[ebp+0x20] ;; ARG7
1254 shl edx,0x10 ;; ty = ty<<PRE d0
1255 ;; tx = tx<<PRE d1
1256 push ebp
1257 mov edi,[ebp-0x4] ;; DX
1259 cmp dword [ebp+0x24], 0 ;; direction ?
1261 mov ebp,[ebp-0x8] ;; TD
1262 je .L_rright_h_loop
1264 ;; TY varie, TX est constant
1266 .L_rright_v_loop:
1268 mov [esi+MAXX],eax ;; rastertab[y].maxx = x
1269 add ebx,ebp
1270 mov [esi+TX2],edx ;; .tx2 = tx
1271 add eax,edi
1272 mov [esi+TY2],ebx ;; .ty2 = ty
1274 ;;addl DX, %eax // x += dx
1275 ;;addl TD, %ebx // ty += tdy
1277 add esi,RASTERY_SIZEOF
1278 dec ecx
1279 jne .L_rright_v_loop
1281 pop ebp
1283 jmp short .L_finished
1285 ;; TX varie, TY est constant
1287 .L_rright_h_loop:
1288 mov [esi+MAXX],eax ;; rastertab[y].maxx = x
1289 add eax,edi
1290 mov [esi+TX2],ebx ;; .tx2 = tx
1291 add ebx,ebp
1292 mov [esi+TY2],edx ;; .ty2 = ty
1294 ;;addl DX, %eax // x += dx
1295 ;;addl TD, %ebx // tx += tdx
1297 add esi,RASTERY_SIZEOF
1298 dec ecx
1299 jne .L_rright_h_loop
1301 pop ebp
1303 .L_finished:
1304 pop eax
1305 o16 mov es,ax
1306 pop edi
1307 pop esi
1308 pop ebx
1310 mov esp,ebp
1311 pop ebp
1315 ;;; this version can draw 64x64 tiles, but they would have to be arranged 4 per row,
1316 ;; so that the stride from one line to the next is 256
1318 ;; .data
1319 ;;xstep dd 0
1320 ;;ystep dd 0
1321 ;;texwidth dd 64 ;; texture width
1322 ;; .text
1323 ;; this code is kept in case we add high-detail floor textures for example (256x256)
1324 ; align 16
1325 ;_R_DrawSpan_8:
1326 ; push ebp ;; preserve caller's stack frame pointer
1327 ; push esi ;; preserve register variables
1328 ; push edi
1329 ; push ebx
1331 ;; find loop count
1333 ; mov eax,[ds_x2]
1334 ; inc eax
1335 ; sub eax,[ds_x1] ;; pixel count
1336 ; mov [pixelcount],eax ;; save for final pixel
1337 ; js near .hdone ;; nothing to scale
1339 ;; calculate screen dest
1341 ; mov edi,[ds_y]
1342 ; mov edi,[ylookup+edi*4]
1343 ; mov eax,[ds_x1]
1344 ; add edi,[columnofs+eax*4]
1346 ;; prepare registers for inner loop
1348 ; xor eax,eax
1349 ; mov edx,[ds_xfrac]
1350 ; ror edx,16
1351 ; mov al,dl
1352 ; mov ecx,[ds_yfrac]
1353 ; ror ecx,16
1354 ; mov ah,cl
1356 ; mov ebx,[ds_xstep]
1357 ; ror ebx,16
1358 ; mov ch,bl
1359 ; and ebx,0xffff0000
1360 ; mov [xstep],ebx
1361 ; mov ebx,[ds_ystep]
1362 ; ror ebx,16
1363 ; mov dh,bl
1364 ; and ebx,0xffff0000
1365 ; mov [ystep],ebx
1367 ; mov esi,[ds_source]
1369 ;;; %eax Yi,Xi in %ah,%al
1370 ;;; %ebx aligned colormap
1371 ;;; %ecx Yfrac upper, dXi in %ch, %cl is counter (upto 1024pels, =4x256)
1372 ;;; %edx Xfrac upper, dYi in %dh, %dl receives mapped pixels from (ebx)
1373 ;;; ystep dYfrac, add to %ecx, low word is 0
1374 ;;; xstep dXfrac, add to %edx, low word is 0
1375 ;;; %ebp temporary register serves as offset like %eax
1376 ;;; %esi virtual source
1377 ;;; %edi moving destination pointer
1379 ; mov ebx,[pixelcount]
1380 ; shr ebx,0x2 ;; 4 pixels per loop
1381 ; test bl,0xff
1382 ; je near .hchecklast
1383 ; mov cl,bl
1385 ; mov ebx,[dc_colormap]
1387 ;;; prepare loop with first pixel
1389 ; add ecx,[ystep] ;;pr‚a1
1390 ; adc ah,dh
1391 ; add edx,[xstep]
1392 ; adc al,ch
1393 ; and eax,0x3f3f
1394 ; mov bl,[esi+eax] ;;pr‚b1
1395 ; mov dl,[ebx] ;;pr‚c1
1397 ; add ecx,[ystep] ;;a2
1398 ; adc ah,dh
1400 ;.hdoubleloop:
1401 ; mov [edi+1],dl
1402 ; add edx,[xstep]
1403 ; adc al,ch
1404 ; add edi,byte +0x2
1405 ; mov ebp,eax
1406 ; add ecx,[ystep]
1407 ; adc ah,dh
1408 ; and ebp,0x3f3f
1409 ; add edx,[xstep]
1410 ; mov bl,[esi+ebp]
1411 ; adc al,ch
1412 ; mov dl,[ebx]
1413 ; and eax,0x3f3f
1414 ; mov [edi],dl
1415 ; mov bl,[esi+eax]
1416 ; add ecx,[ystep]
1417 ; adc ah,dh
1418 ; add edx,[xstep]
1419 ; adc al,ch
1420 ; mov dl,[ebx]
1421 ; mov ebp,eax
1422 ; mov [edi+1],dl
1423 ; and ebp,0x3f3f
1424 ; add ecx,[ystep]
1425 ; adc ah,dh
1426 ; mov bl,[esi+ebp]
1427 ; add edi,byte +0x2
1428 ; add edx,[xstep]
1429 ; adc al,ch
1430 ; mov dl,[ebx]
1431 ; and eax,0x3f3f
1432 ; mov [edi],dl
1433 ; mov bl,[esi+eax]
1434 ; add ecx,[ystep]
1435 ; adc ah,dh
1436 ; mov dl,[ebx]
1437 ; dec cl
1438 ; jne near .hdoubleloop
1439 ;;; check for final pixel
1440 ;.hchecklast:
1441 ;;; to do
1442 ;.hdone:
1443 ; pop ebx
1444 ; pop edi
1445 ; pop esi
1446 ; pop ebp
1447 ; ret