2 * File not compiled. Included from mesh_mrm_skinned.cpp. It is an "old school" template.
5 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
6 // Copyright (C) 2010 Winch Gate Property Limited
8 // This program is free software: you can redistribute it and/or modify
9 // it under the terms of the GNU Affero General Public License as
10 // published by the Free Software Foundation, either version 3 of the
11 // License, or (at your option) any later version.
13 // This program is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 // GNU Affero General Public License for more details.
18 // You should have received a copy of the GNU Affero General Public License
19 // along with this program. If not, see <http://www.gnu.org/licenses/>.
27 // ***************************************************************************
28 // ***************************************************************************
29 // Raw "Vertex/Normal only" ApplySkin methods.
30 // ***************************************************************************
31 // ***************************************************************************
33 // ace: before including this, #define this define to use it
34 // the goal is to be able to compile every .cpp file with no
35 // special case (GNU/Linux needs)
36 #ifdef ADD_MESH_MRM_SKINNED_TEMPLATE
39 #define NL3D_RAWSKIN_NORMAL_OFF 12
40 #define NL3D_RAWSKIN_UV_OFF 24
41 #define NL3D_RAWSKIN_VERTEX_SIZE 32
44 /* Speed Feature test.
45 Don't use precaching for now, cause its seems to be slower on some configs (P4-2.4Ghz),
46 but maybe faster on other (P3-800)
47 On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm
48 saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms)
50 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
51 //#define NL3D_RAWSKIN_PRECACHE
52 #define NL3D_RAWSKIN_ASM
56 // ***************************************************************************
57 void CMeshMRMSkinnedGeom::applyArrayRawSkinNormal1(CRawVertexNormalSkinned1
*src
, uint8
*destVertexPtr
,
58 CMatrix3x4
*boneMat3x4
, uint nInf
)
60 // must write contigously in AGP, and ASM is hardcoded...
61 nlctassert(NL3D_RAWSKIN_NORMAL_OFF
==12);
62 nlctassert(NL3D_RAWSKIN_UV_OFF
==24);
64 /*extern uint TESTYOYO_NumRawSkinVertices1;
65 TESTYOYO_NumRawSkinVertices1+= nInf;
66 H_AUTO( TestYoyo_RawSkin1 );*/
68 #ifdef NL3D_RAWSKIN_PRECACHE
71 // number of vertices to process for this block.
72 uint nBlockInf
= min(NumCacheVertexNormal1
, nInf
);
76 // cache the data in L1 cache.
77 CFastMem::precache(src
, nBlockInf
* sizeof(CRawVertexNormalSkinned1
));
84 #ifndef NL3D_RAWSKIN_ASM
85 // for all InfluencedVertices only.
86 for(;nBlockInf
>0;nBlockInf
--, src
++, destVertexPtr
+=NL3D_RAWSKIN_VERTEX_SIZE
)
88 CVector
*dstVertex
= (CVector
*)(destVertexPtr
);
89 CVector
*dstNormal
= (CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
);
91 // For 1 matrix, can write directly to AGP (if destVertexPtr is AGP...)
93 boneMat3x4
[ src
->MatrixId
[0] ].mulSetPoint( src
->Vertex
, *(CVector
*)(destVertexPtr
) );
95 boneMat3x4
[ src
->MatrixId
[0] ].mulSetVector( src
->Normal
, *(CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
) );
97 *(CUV
*)(destVertexPtr
+ NL3D_RAWSKIN_UV_OFF
)= src
->UV
;
100 // ASM hard coded for 36
101 nlctassert(sizeof(CRawVertexNormalSkinned1
)==36);
103 /* 116 cycles / loop typical
104 58 cycles / loop in theory (no memory problem)
110 mov edi
, destVertexPtr
114 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) );
117 mov eax
, [esi
]src
.MatrixId
// uop: 0/1
120 add eax
, edx
// uop: 1/0
123 fld
[esi
]src
.Vertex
.x
// uop: 0/1
124 fld
[esi
]src
.Vertex
.y
// uop: 0/1
125 fld
[esi
]src
.Vertex
.z
// uop: 0/1
126 // vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
127 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
128 fmul st
, st(3) // uop: 1/0 (5)
129 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
130 fmul st
, st(3) // uop: 1/0 (5)
131 faddp
st(1), st
// uop: 1/0 (3)
132 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
133 fmul st
, st(2) // uop: 1/0 (5)
134 faddp
st(1), st
// uop: 1/0 (3)
135 fld
[eax
]CMatrix3x4
.a14
// uop: 0/1
136 faddp
st(1), st
// uop: 1/0 (3)
137 fstp dword ptr
[edi
] // uop: 0/0/1/1
138 // vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
139 fld
[eax
]CMatrix3x4
.a21
141 fld
[eax
]CMatrix3x4
.a22
144 fld
[eax
]CMatrix3x4
.a23
147 fld
[eax
]CMatrix3x4
.a24
149 fstp dword ptr
[edi
+4]
150 // vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
151 fld
[eax
]CMatrix3x4
.a31
153 fld
[eax
]CMatrix3x4
.a32
156 fld
[eax
]CMatrix3x4
.a33
159 fld
[eax
]CMatrix3x4
.a34
161 fstp dword ptr
[edi
+8]
169 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
172 fld
[esi
]src
.Normal
.x
173 fld
[esi
]src
.Normal
.y
174 fld
[esi
]src
.Normal
.z
175 // vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
176 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
177 fmul st
, st(3) // uop: 1/0 (5)
178 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
179 fmul st
, st(3) // uop: 1/0 (5)
180 faddp
st(1), st
// uop: 1/0 (3)
181 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
182 fmul st
, st(2) // uop: 1/0 (5)
183 faddp
st(1), st
// uop: 1/0 (3)
184 fstp dword ptr
[edi
+12] // uop: 0/0/1/1
185 // vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
186 fld
[eax
]CMatrix3x4
.a21
188 fld
[eax
]CMatrix3x4
.a22
191 fld
[eax
]CMatrix3x4
.a23
194 fstp dword ptr
[edi
+16]
195 // vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
196 fld
[eax
]CMatrix3x4
.a31
198 fld
[eax
]CMatrix3x4
.a32
201 fld
[eax
]CMatrix3x4
.a33
204 fstp dword ptr
[edi
+20]
212 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
213 mov eax
, [esi
]src
.UV
.U
// uop: 0/1
214 mov dword ptr
[edi
+24], eax
// uop: 0/0/1/1
215 mov eax
, [esi
]src
.UV
.V
// uop: 0/1
216 mov dword ptr
[edi
+28], eax
// uop: 0/0/1/1
220 add esi
, 36 // uop: 1/0
221 add edi
, NL3D_RAWSKIN_VERTEX_SIZE
// uop: 1/0
223 jnz theLoop
// uop: 1/1 (p1)
227 mov destVertexPtr
, edi
235 // ***************************************************************************
236 void CMeshMRMSkinnedGeom::applyArrayRawSkinNormal2(CRawVertexNormalSkinned2
*src
, uint8
*destVertexPtr
,
237 CMatrix3x4
*boneMat3x4
, uint nInf
)
239 // must write contigously in AGP, and ASM is hardcoded...
240 nlctassert(NL3D_RAWSKIN_NORMAL_OFF
==12);
241 nlctassert(NL3D_RAWSKIN_UV_OFF
==24);
243 /*extern uint TESTYOYO_NumRawSkinVertices2;
244 TESTYOYO_NumRawSkinVertices2+= nInf;
245 H_AUTO( TestYoyo_RawSkin2 );*/
247 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
250 #ifdef NL3D_RAWSKIN_PRECACHE
253 // number of vertices to process for this block.
254 uint nBlockInf
= min(NumCacheVertexNormal2
, nInf
);
258 // cache the data in L1 cache.
259 CFastMem::precache(src
, nBlockInf
* sizeof(CRawVertexNormalSkinned2
));
262 uint nBlockInf
= nInf
;
266 #ifndef NL3D_RAWSKIN_ASM
267 // for all InfluencedVertices only.
268 for(;nBlockInf
>0;nBlockInf
--, src
++, destVertexPtr
+=NL3D_RAWSKIN_VERTEX_SIZE
)
271 boneMat3x4
[ src
->MatrixId
[0] ].mulSetPoint( src
->Vertex
, src
->Weights
[0], tmpVert
);
272 boneMat3x4
[ src
->MatrixId
[1] ].mulAddPoint( src
->Vertex
, src
->Weights
[1], tmpVert
);
273 *(CVector
*)(destVertexPtr
)= tmpVert
;
275 boneMat3x4
[ src
->MatrixId
[0] ].mulSetVector( src
->Normal
, src
->Weights
[0], tmpVert
);
276 boneMat3x4
[ src
->MatrixId
[1] ].mulAddVector( src
->Normal
, src
->Weights
[1], tmpVert
);
277 *(CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
)= tmpVert
;
279 *(CUV
*)(destVertexPtr
+ NL3D_RAWSKIN_UV_OFF
)= src
->UV
;
282 // ASM harcoded for 48
283 nlctassert(sizeof(CRawVertexNormalSkinned2
)==48);
285 /* 154 cycles / loop typical
286 124 cycles / loop in theory (no memory problem)
292 mov edi
, destVertexPtr
296 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) );
299 mov eax
, [esi
+0]src
.MatrixId
// uop: 0/1
302 add eax
, edx
// uop: 1/0
304 mov ebx
, [esi
+4]src
.MatrixId
// uop: 0/1
307 add ebx
, edx
// uop: 1/0
310 fld
[esi
]src
.Vertex
.x
// uop: 0/1
311 fld
[esi
]src
.Vertex
.y
// uop: 0/1
312 fld
[esi
]src
.Vertex
.z
// uop: 0/1
314 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
316 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
317 fmul st
, st(3) // uop: 1/0 (5)
318 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
319 fmul st
, st(3) // uop: 1/0 (5)
320 faddp
st(1), st
// uop: 1/0 (3)
321 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
322 fmul st
, st(2) // uop: 1/0 (5)
323 faddp
st(1), st
// uop: 1/0 (3)
324 fld
[eax
]CMatrix3x4
.a14
// uop: 0/1
325 faddp
st(1), st
// uop: 1/0 (3)
327 fmul
[esi
+0]src
.Weights
330 fld
[ebx
]CMatrix3x4
.a11
332 fld
[ebx
]CMatrix3x4
.a12
335 fld
[ebx
]CMatrix3x4
.a13
338 fld
[ebx
]CMatrix3x4
.a14
340 // mul by scale, and append
341 fmul
[esi
+4]src
.Weights
345 fstp dword ptr
[edi
] // uop: 0/0/1/1
347 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
348 fld
[eax
]CMatrix3x4
.a21
350 fld
[eax
]CMatrix3x4
.a22
353 fld
[eax
]CMatrix3x4
.a23
356 fld
[eax
]CMatrix3x4
.a24
359 fmul
[esi
+0]src
.Weights
362 fld
[ebx
]CMatrix3x4
.a21
364 fld
[ebx
]CMatrix3x4
.a22
367 fld
[ebx
]CMatrix3x4
.a23
370 fld
[ebx
]CMatrix3x4
.a24
372 // mul by scale, and append
373 fmul
[esi
+4]src
.Weights
377 fstp dword ptr
[edi
+4]
379 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
380 fld
[eax
]CMatrix3x4
.a31
382 fld
[eax
]CMatrix3x4
.a32
385 fld
[eax
]CMatrix3x4
.a33
388 fld
[eax
]CMatrix3x4
.a34
391 fmul
[esi
+0]src
.Weights
394 fld
[ebx
]CMatrix3x4
.a31
396 fld
[ebx
]CMatrix3x4
.a32
399 fld
[ebx
]CMatrix3x4
.a33
402 fld
[ebx
]CMatrix3x4
.a34
404 // mul by scale, and append
405 fmul
[esi
+4]src
.Weights
409 fstp dword ptr
[edi
+8]
418 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
421 fld
[esi
]src
.Normal
.x
422 fld
[esi
]src
.Normal
.y
423 fld
[esi
]src
.Normal
.z
425 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
426 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
427 fmul st
, st(3) // uop: 1/0 (5)
428 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
429 fmul st
, st(3) // uop: 1/0 (5)
430 faddp
st(1), st
// uop: 1/0 (3)
431 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
432 fmul st
, st(2) // uop: 1/0 (5)
433 faddp
st(1), st
// uop: 1/0 (3)
435 fmul
[esi
+0]src
.Weights
438 fld
[ebx
]CMatrix3x4
.a11
440 fld
[ebx
]CMatrix3x4
.a12
443 fld
[ebx
]CMatrix3x4
.a13
446 // mul by scale, and append
447 fmul
[esi
+4]src
.Weights
451 fstp dword ptr
[edi
+12] // uop: 0/0/1/1
453 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
454 fld
[eax
]CMatrix3x4
.a21
456 fld
[eax
]CMatrix3x4
.a22
459 fld
[eax
]CMatrix3x4
.a23
463 fmul
[esi
+0]src
.Weights
466 fld
[ebx
]CMatrix3x4
.a21
468 fld
[ebx
]CMatrix3x4
.a22
471 fld
[ebx
]CMatrix3x4
.a23
474 // mul by scale, and append
475 fmul
[esi
+4]src
.Weights
479 fstp dword ptr
[edi
+16]
481 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
482 fld
[eax
]CMatrix3x4
.a31
484 fld
[eax
]CMatrix3x4
.a32
487 fld
[eax
]CMatrix3x4
.a33
491 fmul
[esi
+0]src
.Weights
494 fld
[ebx
]CMatrix3x4
.a31
496 fld
[ebx
]CMatrix3x4
.a32
499 fld
[ebx
]CMatrix3x4
.a33
502 // mul by scale, and append
503 fmul
[esi
+4]src
.Weights
507 fstp dword ptr
[edi
+20]
516 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
517 mov eax
, [esi
]src
.UV
.U
// uop: 0/1
518 mov dword ptr
[edi
+24], eax
// uop: 0/0/1/1
519 mov eax
, [esi
]src
.UV
.V
// uop: 0/1
520 mov dword ptr
[edi
+28], eax
// uop: 0/0/1/1
524 add esi
, 48 // uop: 1/0
525 add edi
, NL3D_RAWSKIN_VERTEX_SIZE
// uop: 1/0
527 jnz theLoop
// uop: 1/1 (p1)
531 mov destVertexPtr
, edi
538 // ***************************************************************************
539 void CMeshMRMSkinnedGeom::applyArrayRawSkinNormal3(CRawVertexNormalSkinned3
*src
, uint8
*destVertexPtr
,
540 CMatrix3x4
*boneMat3x4
, uint nInf
)
542 // must write contigously in AGP, and ASM is hardcoded...
543 nlctassert(NL3D_RAWSKIN_NORMAL_OFF
==12);
544 nlctassert(NL3D_RAWSKIN_UV_OFF
==24);
546 /*extern uint TESTYOYO_NumRawSkinVertices3;
547 TESTYOYO_NumRawSkinVertices3+= nInf;
548 H_AUTO( TestYoyo_RawSkin3 );*/
550 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
553 #ifdef NL3D_RAWSKIN_PRECACHE
556 // number of vertices to process for this block.
557 uint nBlockInf
= min(NumCacheVertexNormal3
, nInf
);
561 // cache the data in L1 cache.
562 CFastMem::precache(src
, nBlockInf
* sizeof(CRawVertexNormalSkinned3
));
565 uint nBlockInf
= nInf
;
569 #ifndef NL3D_RAWSKIN_ASM
570 // for all InfluencedVertices only.
571 for(;nBlockInf
>0;nBlockInf
--, src
++, destVertexPtr
+=NL3D_RAWSKIN_VERTEX_SIZE
)
574 boneMat3x4
[ src
->MatrixId
[0] ].mulSetPoint( src
->Vertex
, src
->Weights
[0], tmpVert
);
575 boneMat3x4
[ src
->MatrixId
[1] ].mulAddPoint( src
->Vertex
, src
->Weights
[1], tmpVert
);
576 boneMat3x4
[ src
->MatrixId
[2] ].mulAddPoint( src
->Vertex
, src
->Weights
[2], tmpVert
);
577 *(CVector
*)(destVertexPtr
)= tmpVert
;
579 boneMat3x4
[ src
->MatrixId
[0] ].mulSetVector( src
->Normal
, src
->Weights
[0], tmpVert
);
580 boneMat3x4
[ src
->MatrixId
[1] ].mulAddVector( src
->Normal
, src
->Weights
[1], tmpVert
);
581 boneMat3x4
[ src
->MatrixId
[2] ].mulAddVector( src
->Normal
, src
->Weights
[2], tmpVert
);
582 *(CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
)= tmpVert
;
584 *(CUV
*)(destVertexPtr
+ NL3D_RAWSKIN_UV_OFF
)= src
->UV
;
587 // ASM hard coded for 56
588 nlctassert(sizeof(CRawVertexNormalSkinned3
)==56);
591 /* 226 cycles / loop typical
592 192 cycles / loop in theory (no memory problem)
599 mov edi
, destVertexPtr
602 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex, *(CVector*)(destVertexPtr) );
605 mov eax
, [esi
+0]src
.MatrixId
// uop: 0/1
608 add eax
, boneMat3x4
// uop: 1/0
610 mov ebx
, [esi
+4]src
.MatrixId
// uop: 0/1
613 add ebx
, boneMat3x4
// uop: 1/0
615 mov edx
, [esi
+8]src
.MatrixId
// uop: 0/1
618 add edx
, boneMat3x4
// uop: 1/0
621 fld
[esi
]src
.Vertex
.x
// uop: 0/1
622 fld
[esi
]src
.Vertex
.y
// uop: 0/1
623 fld
[esi
]src
.Vertex
.z
// uop: 0/1
625 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
627 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
628 fmul st
, st(3) // uop: 1/0 (5)
629 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
630 fmul st
, st(3) // uop: 1/0 (5)
631 faddp
st(1), st
// uop: 1/0 (3)
632 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
633 fmul st
, st(2) // uop: 1/0 (5)
634 faddp
st(1), st
// uop: 1/0 (3)
635 fld
[eax
]CMatrix3x4
.a14
// uop: 0/1
636 faddp
st(1), st
// uop: 1/0 (3)
638 fmul
[esi
+0]src
.Weights
641 fld
[ebx
]CMatrix3x4
.a11
643 fld
[ebx
]CMatrix3x4
.a12
646 fld
[ebx
]CMatrix3x4
.a13
649 fld
[ebx
]CMatrix3x4
.a14
651 // mul by scale, and append
652 fmul
[esi
+4]src
.Weights
656 fld
[edx
]CMatrix3x4
.a11
658 fld
[edx
]CMatrix3x4
.a12
661 fld
[edx
]CMatrix3x4
.a13
664 fld
[edx
]CMatrix3x4
.a14
666 // mul by scale, and append
667 fmul
[esi
+8]src
.Weights
671 fstp dword ptr
[edi
] // uop: 0/0/1/1
673 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
674 fld
[eax
]CMatrix3x4
.a21
676 fld
[eax
]CMatrix3x4
.a22
679 fld
[eax
]CMatrix3x4
.a23
682 fld
[eax
]CMatrix3x4
.a24
685 fmul
[esi
+0]src
.Weights
688 fld
[ebx
]CMatrix3x4
.a21
690 fld
[ebx
]CMatrix3x4
.a22
693 fld
[ebx
]CMatrix3x4
.a23
696 fld
[ebx
]CMatrix3x4
.a24
698 // mul by scale, and append
699 fmul
[esi
+4]src
.Weights
703 fld
[edx
]CMatrix3x4
.a21
705 fld
[edx
]CMatrix3x4
.a22
708 fld
[edx
]CMatrix3x4
.a23
711 fld
[edx
]CMatrix3x4
.a24
713 // mul by scale, and append
714 fmul
[esi
+8]src
.Weights
718 fstp dword ptr
[edi
+4]
720 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
721 fld
[eax
]CMatrix3x4
.a31
723 fld
[eax
]CMatrix3x4
.a32
726 fld
[eax
]CMatrix3x4
.a33
729 fld
[eax
]CMatrix3x4
.a34
732 fmul
[esi
+0]src
.Weights
735 fld
[ebx
]CMatrix3x4
.a31
737 fld
[ebx
]CMatrix3x4
.a32
740 fld
[ebx
]CMatrix3x4
.a33
743 fld
[ebx
]CMatrix3x4
.a34
745 // mul by scale, and append
746 fmul
[esi
+4]src
.Weights
750 fld
[edx
]CMatrix3x4
.a31
752 fld
[edx
]CMatrix3x4
.a32
755 fld
[edx
]CMatrix3x4
.a33
758 fld
[edx
]CMatrix3x4
.a34
760 // mul by scale, and append
761 fmul
[esi
+8]src
.Weights
765 fstp dword ptr
[edi
+8]
774 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
777 fld
[esi
]src
.Normal
.x
778 fld
[esi
]src
.Normal
.y
779 fld
[esi
]src
.Normal
.z
780 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
781 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
782 fmul st
, st(3) // uop: 1/0 (5)
783 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
784 fmul st
, st(3) // uop: 1/0 (5)
785 faddp
st(1), st
// uop: 1/0 (3)
786 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
787 fmul st
, st(2) // uop: 1/0 (5)
788 faddp
st(1), st
// uop: 1/0 (3)
790 fmul
[esi
+0]src
.Weights
793 fld
[ebx
]CMatrix3x4
.a11
795 fld
[ebx
]CMatrix3x4
.a12
798 fld
[ebx
]CMatrix3x4
.a13
801 // mul by scale, and append
802 fmul
[esi
+4]src
.Weights
806 fld
[edx
]CMatrix3x4
.a11
808 fld
[edx
]CMatrix3x4
.a12
811 fld
[edx
]CMatrix3x4
.a13
814 // mul by scale, and append
815 fmul
[esi
+8]src
.Weights
819 fstp dword ptr
[edi
+12] // uop: 0/0/1/1
821 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
822 fld
[eax
]CMatrix3x4
.a21
824 fld
[eax
]CMatrix3x4
.a22
827 fld
[eax
]CMatrix3x4
.a23
831 fmul
[esi
+0]src
.Weights
834 fld
[ebx
]CMatrix3x4
.a21
836 fld
[ebx
]CMatrix3x4
.a22
839 fld
[ebx
]CMatrix3x4
.a23
842 // mul by scale, and append
843 fmul
[esi
+4]src
.Weights
847 fld
[edx
]CMatrix3x4
.a21
849 fld
[edx
]CMatrix3x4
.a22
852 fld
[edx
]CMatrix3x4
.a23
855 // mul by scale, and append
856 fmul
[esi
+8]src
.Weights
860 fstp dword ptr
[edi
+16]
862 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
863 fld
[eax
]CMatrix3x4
.a31
865 fld
[eax
]CMatrix3x4
.a32
868 fld
[eax
]CMatrix3x4
.a33
872 fmul
[esi
+0]src
.Weights
875 fld
[ebx
]CMatrix3x4
.a31
877 fld
[ebx
]CMatrix3x4
.a32
880 fld
[ebx
]CMatrix3x4
.a33
883 // mul by scale, and append
884 fmul
[esi
+4]src
.Weights
888 fld
[edx
]CMatrix3x4
.a31
890 fld
[edx
]CMatrix3x4
.a32
893 fld
[edx
]CMatrix3x4
.a33
896 // mul by scale, and append
897 fmul
[esi
+8]src
.Weights
901 fstp dword ptr
[edi
+20]
910 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->UV;
911 mov eax
, [esi
]src
.UV
.U
// uop: 0/1
912 mov dword ptr
[edi
+24], eax
// uop: 0/0/1/1
913 mov eax
, [esi
]src
.UV
.V
// uop: 0/1
914 mov dword ptr
[edi
+28], eax
// uop: 0/0/1/1
918 add esi
, 56 // uop: 1/0
919 add edi
, NL3D_RAWSKIN_VERTEX_SIZE
// uop: 1/0
921 jnz theLoop
// uop: 1/1 (p1)
925 mov destVertexPtr
, edi
932 // ***************************************************************************
933 void CMeshMRMSkinnedGeom::applyArrayRawSkinNormal4(CRawVertexNormalSkinned4
*src
, uint8
*destVertexPtr
,
934 CMatrix3x4
*boneMat3x4
, uint nInf
)
936 // must write contigously in AGP, and ASM is hardcoded...
937 nlctassert(NL3D_RAWSKIN_NORMAL_OFF
==12);
938 nlctassert(NL3D_RAWSKIN_UV_OFF
==24);
940 /*extern uint TESTYOYO_NumRawSkinVertices4;
941 TESTYOYO_NumRawSkinVertices4+= nInf;
942 H_AUTO( TestYoyo_RawSkin4 );*/
944 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
947 #ifdef NL3D_RAWSKIN_PRECACHE
950 // number of vertices to process for this block.
951 uint nBlockInf
= min(NumCacheVertexNormal4
, nInf
);
955 // cache the data in L1 cache.
956 CFastMem::precache(src
, nBlockInf
* sizeof(CRawVertexNormalSkinned4
));
959 uint nBlockInf
= nInf
;
962 // for all InfluencedVertices only.
963 for(;nBlockInf
>0;nBlockInf
--, src
++, destVertexPtr
+=NL3D_RAWSKIN_VERTEX_SIZE
)
966 boneMat3x4
[ src
->MatrixId
[0] ].mulSetPoint( src
->Vertex
, src
->Weights
[0], tmpVert
);
967 boneMat3x4
[ src
->MatrixId
[1] ].mulAddPoint( src
->Vertex
, src
->Weights
[1], tmpVert
);
968 boneMat3x4
[ src
->MatrixId
[2] ].mulAddPoint( src
->Vertex
, src
->Weights
[2], tmpVert
);
969 boneMat3x4
[ src
->MatrixId
[3] ].mulAddPoint( src
->Vertex
, src
->Weights
[3], tmpVert
);
970 *(CVector
*)(destVertexPtr
)= tmpVert
;
972 boneMat3x4
[ src
->MatrixId
[0] ].mulSetVector( src
->Normal
, src
->Weights
[0], tmpVert
);
973 boneMat3x4
[ src
->MatrixId
[1] ].mulAddVector( src
->Normal
, src
->Weights
[1], tmpVert
);
974 boneMat3x4
[ src
->MatrixId
[2] ].mulAddVector( src
->Normal
, src
->Weights
[2], tmpVert
);
975 boneMat3x4
[ src
->MatrixId
[3] ].mulAddVector( src
->Normal
, src
->Weights
[3], tmpVert
);
976 *(CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
)= tmpVert
;
978 *(CUV
*)(destVertexPtr
+ NL3D_RAWSKIN_UV_OFF
)= src
->UV
;
981 // NB: ASM not done for 4 vertices, cause very rare and negligeable ...
986 // ***************************************************************************
987 void CMeshMRMSkinnedGeom::applyRawSkinWithNormal(CLod
&lod
, CRawSkinnedNormalCache
&rawSkinLod
, const CSkeletonModel
*skeleton
, uint8
*vbHard
, float alphaLod
)
990 //===========================
992 // assert, code below is written especially for 4 per vertex.
993 nlassert( NL3D_MESH_MRM_SKINNED_MAX_MATRIX
==4 );
996 // Compute useful Matrix for this lod.
997 //===========================
998 // Those arrays map the array of bones in skeleton.
999 static vector
<CMatrix3x4
> boneMat3x4
;
1000 computeBoneMatrixes3x4(boneMat3x4
, lod
.MatrixInfluences
, skeleton
);
1004 /*extern uint TESTYOYO_NumRawSkinVertices;
1005 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices1.size();
1006 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices2.size();
1007 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices3.size();
1008 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices4.size();*/
1013 // Manage "SoftVertices"
1014 if(rawSkinLod
.TotalSoftVertices
)
1016 // apply skinning into Temp RAM for vertices that are Src of Geomorph
1017 //===========================
1018 static vector
<uint8
> tempSkin
;
1019 uint tempVbSize
= rawSkinLod
.TotalSoftVertices
*NL3D_RAWSKIN_VERTEX_SIZE
;
1020 if(tempSkin
.size() < tempVbSize
)
1021 tempSkin
.resize(tempVbSize
);
1022 uint8
*destVertexPtr
= &tempSkin
[0];
1025 nInf
= rawSkinLod
.SoftVertices
[0];
1028 applyArrayRawSkinNormal1(&rawSkinLod
.Vertices1
[0], destVertexPtr
, &boneMat3x4
[0], nInf
);
1029 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1032 nInf
= rawSkinLod
.SoftVertices
[1];
1035 applyArrayRawSkinNormal2(&rawSkinLod
.Vertices2
[0], destVertexPtr
, &boneMat3x4
[0], nInf
);
1036 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1039 nInf
= rawSkinLod
.SoftVertices
[2];
1042 applyArrayRawSkinNormal3(&rawSkinLod
.Vertices3
[0], destVertexPtr
, &boneMat3x4
[0], nInf
);
1043 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1046 nInf
= rawSkinLod
.SoftVertices
[3];
1049 applyArrayRawSkinNormal4(&rawSkinLod
.Vertices4
[0], destVertexPtr
, &boneMat3x4
[0], nInf
);
1050 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1053 // Fast Copy this into AGP Ram. NB: done before Geomorphs, because ensure some precaching this way!!
1054 //===========================
1056 uint8
*vbHardStart
= vbHard
+ rawSkinLod
.Geomorphs
.size()*NL3D_RAWSKIN_VERTEX_SIZE
;
1059 CFastMem::memcpy(vbHardStart
, &tempSkin
[0], tempVbSize
);
1061 // Geomorphs directly into AGP Ram
1062 //===========================
1063 clamp(alphaLod
, 0.f
, 1.f
);
1065 float a1
= 1 - alphaLod
;
1068 applyGeomorphPosNormalUV0(rawSkinLod
.Geomorphs
, &tempSkin
[0], vbHard
, NL3D_RAWSKIN_VERTEX_SIZE
, a
, a1
);
1071 // Manage HardVertices
1072 if(rawSkinLod
.TotalHardVertices
)
1074 // apply skinning directly into AGP RAM for vertices that are not Src of Geomorph
1075 //===========================
1078 // Skip Geomorphs and SoftVertices.
1079 uint8
*destVertexPtr
= vbHard
+ (rawSkinLod
.Geomorphs
.size()+rawSkinLod
.TotalSoftVertices
)*NL3D_RAWSKIN_VERTEX_SIZE
;
1082 nInf
= rawSkinLod
.HardVertices
[0];
1083 startId
= rawSkinLod
.SoftVertices
[0];
1086 applyArrayRawSkinNormal1(&rawSkinLod
.Vertices1
[startId
], destVertexPtr
, &boneMat3x4
[0], nInf
);
1087 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1090 nInf
= rawSkinLod
.HardVertices
[1];
1091 startId
= rawSkinLod
.SoftVertices
[1];
1094 applyArrayRawSkinNormal2(&rawSkinLod
.Vertices2
[startId
], destVertexPtr
, &boneMat3x4
[0], nInf
);
1095 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1098 nInf
= rawSkinLod
.HardVertices
[2];
1099 startId
= rawSkinLod
.SoftVertices
[2];
1102 applyArrayRawSkinNormal3(&rawSkinLod
.Vertices3
[startId
], destVertexPtr
, &boneMat3x4
[0], nInf
);
1103 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1106 nInf
= rawSkinLod
.HardVertices
[3];
1107 startId
= rawSkinLod
.SoftVertices
[3];
1110 applyArrayRawSkinNormal4(&rawSkinLod
.Vertices4
[startId
], destVertexPtr
, &boneMat3x4
[0], nInf
);
1111 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1117 #endif // ADD_MESH_MRM_SKINNED_TEMPLATE