Replace Tmem_nasm.asm with C++ code. Patch by pyro.
[Glide64.git] / DepthBufferRender.cpp
blob8e75806134e80739d589d16d48989e96d207979e
1 /*
2 * Glide64 - Glide video plugin for Nintendo 64 emulators.
3 * Copyright (c) 2002 Dave2001
4 * Copyright (c) 2008 Günther <guenther.emu@freenet.de>
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 //****************************************************************
23 // Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64)
24 // Project started on December 29th, 2001
26 // To modify Glide64:
27 // * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
28 // * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
30 // Official Glide64 development channel: #Glide64 on EFnet
32 //****************************************************************
34 // Software rendering into N64 depth buffer
35 // Idea and N64 depth value format by Orkin
36 // Polygon rasterization algorithm is taken from FATMAP2 engine by Mats Byggmastar, mri@penti.sit.fi
38 // Created by Gonetz, Dec 2004
40 //****************************************************************
42 #include "Gfx1.3.h"
43 #include "rdp.h"
44 #include "DepthBufferRender.h"
46 WORD * zLUT = 0;
48 void ZLUT_init()
50 if (zLUT)
51 return;
52 zLUT = new WORD[0x40000];
53 for(int i=0; i<0x40000; i++)
55 DWORD exponent = 0;
56 DWORD testbit = 1 << 17;
57 while((i & testbit) && (exponent < 7))
59 exponent++;
60 testbit = 1 << (17 - exponent);
63 DWORD mantissa = (i >> (6 - (6 < exponent ? 6 : exponent))) & 0x7ff;
64 zLUT[i] = (WORD)(((exponent << 11) | mantissa) << 2);
67 for(i=0; i<0x40000; i++)
69 int j = i + 1;
70 WORD z = zLUT[i];
71 while (zLUT[i] == zLUT[j])
72 j++;
73 int w = (j - i) >> 2;
74 if (w > 0)
76 int k;
77 for (k = 1; k < 4; k++)
78 for (int t = 0; t < w; t++)
79 zLUT[i+k*w+t] = z + k;
80 i = j - 1;
86 void ZLUT_release()
88 delete[] zLUT;
89 zLUT = 0;
92 static vertexi * max_vtx; // Max y vertex (ending vertex)
93 static vertexi * start_vtx, * end_vtx; // First and last vertex in array
94 static vertexi * right_vtx, * left_vtx; // Current right and left vertex
96 static int right_height, left_height;
97 static int right_x, right_dxdy, left_x, left_dxdy;
98 static int left_z, left_dzdy;
100 __inline int iceil(int x)
102 x += 0xffff;
103 return (x >> 16);
106 __inline int imul16(int x, int y) // (x * y) >> 16
108 return (((long long)x) * ((long long)y)) >> 16;
111 __inline int imul14(int x, int y) // (x * y) >> 14
113 return (((long long)x) * ((long long)y)) >> 14;
117 int idiv16(int x, int y); // (x << 16) / y
118 #pragma aux idiv16 = \
119 " mov edx,eax "\
120 " sar edx,16 "\
121 " shl eax,16 "\
122 " idiv ebx "\
123 parm [eax] [ebx] modify exact [eax edx] value [eax]
125 __inline int idiv16(int x, int y) // (x << 16) / y
127 //x = (((long long)x) << 16) / ((long long)y);
128 #ifndef GCC
129 __asm {
130 mov eax, x
131 mov ebx, y
132 mov edx,eax
133 sar edx,16
134 shl eax,16
135 idiv ebx
136 mov x, eax
138 #else // _WIN32
139 int reminder;
140 asm ("idivl %[divisor]"
141 : "=a" (x), "=d" (reminder)
142 : [divisor] "g" (y), "d" (x >> 16), "a" (x << 16));
143 #endif // _WIN32
144 return x;
149 void inner(void * dst, int width, int i);
150 #pragma aux inner = \
151 " rol ebx, 16 "\
152 " mov edx, [didx_frac] "\
153 " mov al, bl "\
154 " mov ah, byte ptr [didx_whole] "\
155 " next: "\
156 " mov [edi], al "\
157 " add ebx, edx "\
158 " adc al, ah "\
159 " inc edi "\
160 " dec ecx "\
161 " jnz next "\
162 parm [edi] [ecx] [ebx] modify [eax ebx ecx edx edi]
166 inline void inner(void * dst, int width, int i)
168 __asm {
169 mov edi, dst
170 mov ecx, width
171 mov ebx, i
172 rol ebx, 16
173 mov edx, [didx_frac]
174 mov al, bl
175 mov ah, byte ptr [didx_whole]
176 next:
177 mov [edi], al
178 add ebx, edx
179 adc al, ah
180 inc edi
181 dec ecx
182 jnz next
187 inline void inner(WORD * dst, int shift, int width, int i, int didx)
189 int z;
190 int idx;
191 WORD encodedZ;
192 for (int x = 0; x < width; x++)
194 z = i/8192;
195 if (z < 0) z = 0;
196 else if (z > 0x3FFFF) z = 0x3FFFF;
197 encodedZ = zLUT[z];
198 idx = (shift+x)^1;
199 if(encodedZ < dst[idx])
200 dst[idx] = encodedZ;
201 i += didx;
206 static void RightSection(void)
208 // Walk backwards trough the vertex array
210 vertexi * v2, * v1 = right_vtx;
211 if(right_vtx > start_vtx) v2 = right_vtx-1;
212 else v2 = end_vtx; // Wrap to end of array
213 right_vtx = v2;
215 // v1 = top vertex
216 // v2 = bottom vertex
218 // Calculate number of scanlines in this section
220 right_height = iceil(v2->y) - iceil(v1->y);
221 if(right_height <= 0) return;
223 // Guard against possible div overflows
225 if(right_height > 1) {
226 // OK, no worries, we have a section that is at least
227 // one pixel high. Calculate slope as usual.
229 int height = v2->y - v1->y;
230 right_dxdy = idiv16(v2->x - v1->x, height);
232 else {
233 // Height is less or equal to one pixel.
234 // Calculate slope = width * 1/height
235 // using 18:14 bit precision to avoid overflows.
237 int inv_height = (0x10000 << 14) / (v2->y - v1->y);
238 right_dxdy = imul14(v2->x - v1->x, inv_height);
241 // Prestep initial values
243 int prestep = (iceil(v1->y) << 16) - v1->y;
244 right_x = v1->x + imul16(prestep, right_dxdy);
247 static void LeftSection(void)
249 // Walk forward trough the vertex array
251 vertexi * v2, * v1 = left_vtx;
252 if(left_vtx < end_vtx) v2 = left_vtx+1;
253 else v2 = start_vtx; // Wrap to start of array
254 left_vtx = v2;
256 // v1 = top vertex
257 // v2 = bottom vertex
259 // Calculate number of scanlines in this section
261 left_height = iceil(v2->y) - iceil(v1->y);
262 if(left_height <= 0) return;
264 // Guard against possible div overflows
266 if(left_height > 1) {
267 // OK, no worries, we have a section that is at least
268 // one pixel high. Calculate slope as usual.
270 int height = v2->y - v1->y;
271 left_dxdy = idiv16(v2->x - v1->x, height);
272 left_dzdy = idiv16(v2->z - v1->z, height);
274 else {
275 // Height is less or equal to one pixel.
276 // Calculate slope = width * 1/height
277 // using 18:14 bit precision to avoid overflows.
279 int inv_height = (0x10000 << 14) / (v2->y - v1->y);
280 left_dxdy = imul14(v2->x - v1->x, inv_height);
281 left_dzdy = imul14(v2->z - v1->z, inv_height);
284 // Prestep initial values
286 int prestep = (iceil(v1->y) << 16) - v1->y;
287 left_x = v1->x + imul16(prestep, left_dxdy);
288 left_z = v1->z + imul16(prestep, left_dzdy);
292 void Rasterize(vertexi * vtx, int vertices, int dzdx)
294 start_vtx = vtx; // First vertex in array
296 // Search trough the vtx array to find min y, max y
297 // and the location of these structures.
299 vertexi * min_vtx = vtx;
300 max_vtx = vtx;
302 int min_y = vtx->y;
303 int max_y = vtx->y;
305 vtx++;
307 for(int n=1; n<vertices; n++) {
308 if(vtx->y < min_y) {
309 min_y = vtx->y;
310 min_vtx = vtx;
312 else
313 if(vtx->y > max_y) {
314 max_y = vtx->y;
315 max_vtx = vtx;
317 vtx++;
320 // OK, now we know where in the array we should start and
321 // where to end while scanning the edges of the polygon
323 left_vtx = min_vtx; // Left side starting vertex
324 right_vtx = min_vtx; // Right side starting vertex
325 end_vtx = vtx-1; // Last vertex in array
327 // Search for the first usable right section
329 do {
330 if(right_vtx == max_vtx) return;
331 RightSection();
332 } while(right_height <= 0);
334 // Search for the first usable left section
336 do {
337 if(left_vtx == max_vtx) return;
338 LeftSection();
339 } while(left_height <= 0);
341 WORD * destptr = (WORD*)(gfx.RDRAM+rdp.zimg);
342 int y1 = iceil(min_y);
343 int shift;
344 //destptr += iceil(min_y) * rdp.zi_width;
346 for(;;)
348 int x1 = iceil(left_x);
349 int width = iceil(right_x) - x1;
351 if(width > 0) {
353 // Prestep initial color intensity i
355 if (y1 >= rdp.zi_lry) return;
356 //if (x1+width > rdp.zi_lrx) width = rdp.zi_lrx-x1;
357 int prestep = (x1 << 16) - left_x;
358 int z = left_z + imul16(prestep, dzdx);
360 // if (y1 > max_y) return;
361 // FRDP("Depth render. x1: %d, y1: %d, width: %d\n", x1, y1, width);
362 shift = x1 + y1*rdp.zi_width;
363 // if (shift + width > rdp.zi_nb_pixels)
364 // return;
365 //draw to depth buffer
366 int trueZ;
367 int idx;
368 WORD encodedZ;
369 for (int x = 0; x < width; x++)
371 trueZ = z/8192;
372 if (trueZ < 0) trueZ = 0;
373 else if (trueZ > 0x3FFFF) trueZ = 0x3FFFF;
374 encodedZ = zLUT[trueZ];
375 idx = (shift+x)^1;
376 if(encodedZ < destptr[idx])
377 destptr[idx] = encodedZ;
378 z += dzdx;
382 //destptr += rdp.zi_width;
383 y1++;
385 // Scan the right side
387 if(--right_height <= 0) { // End of this section?
388 do {
389 if(right_vtx == max_vtx) return;
390 RightSection();
391 } while(right_height <= 0);
393 else
394 right_x += right_dxdy;
396 // Scan the left side
398 if(--left_height <= 0) { // End of this section?
399 do {
400 if(left_vtx == max_vtx) return;
401 LeftSection();
402 } while(left_height <= 0);
404 else {
405 left_x += left_dxdy;
406 left_z += left_dzdy;