2 * File not compiled. Included from mesh_mrm_skin.cpp. It is a "old school" template.
5 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
6 // Copyright (C) 2010 Winch Gate Property Limited
8 // This source file has been modified by the following contributors:
9 // Copyright (C) 2014 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU Affero General Public License as
13 // published by the Free Software Foundation, either version 3 of the
14 // License, or (at your option) any later version.
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU Affero General Public License for more details.
21 // You should have received a copy of the GNU Affero General Public License
22 // along with this program. If not, see <http://www.gnu.org/licenses/>.
30 // ace: before including this, #define this define to use it
31 // the goal is to be able to compile every .cpp file with no
32 // special case (GNU/Linux needs)
33 #ifdef ADD_MESH_MRM_SKIN_TEMPLATE
35 // ***************************************************************************
36 // ***************************************************************************
37 // "Templates" for VertexSkinning with any input matrix type.
38 // ***************************************************************************
39 // ***************************************************************************
42 // ***************************************************************************
43 static void applyArraySkinNormalT(uint numMatrixes
, uint32
*infPtr
, CMesh::CSkinWeight
*srcSkinPtr
,
44 CVector
*srcVertexPtr
, CVector
*srcNormalPtr
, uint normalOff
,
45 uint8
*destVertexPtr
, vector
<CMatrix3x4
> &boneMat3x4
, uint vertexSize
, uint nInf
)
47 /* Prefetch all vertex/normal before, it is to be faster.
52 uint32
*infTmpPtr
= infPtr
;
53 for(;nInfTmp
>0;nInfTmp
--, infTmpPtr
++)
55 uint index
= *infTmpPtr
;
56 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
57 CVector
*srcVertex
= srcVertexPtr
+ index
;
58 CVector
*srcNormal
= srcNormalPtr
+ index
;
60 _mm_prefetch((const char *)(void *)srcSkin
, _MM_HINT_T1
);
61 _mm_prefetch((const char *)(void *)srcVertex
, _MM_HINT_T1
);
62 _mm_prefetch((const char *)(void *)srcNormal
, _MM_HINT_T1
);
65 #elif defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
68 uint32
*infTmpPtr
= infPtr
;
69 for(;nInfTmp
>0;nInfTmp
--, infTmpPtr
++)
71 uint index
= *infTmpPtr
;
72 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
73 CVector
*srcVertex
= srcVertexPtr
+ index
;
74 CVector
*srcNormal
= srcNormalPtr
+ index
;
95 // Special case for Vertices influenced by one matrix. Just copy result of mul.
96 // for all InfluencedVertices only.
97 for(;nInf
>0;nInf
--, infPtr
++)
100 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
101 CVector
*srcVertex
= srcVertexPtr
+ index
;
102 CVector
*srcNormal
= srcNormalPtr
+ index
;
103 uint8
*dstVertexVB
= destVertexPtr
+ index
* vertexSize
;
104 CVector
*dstVertex
= (CVector
*)(dstVertexVB
);
105 CVector
*dstNormal
= (CVector
*)(dstVertexVB
+ normalOff
);
109 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetPoint( *srcVertex
, *dstVertex
);
111 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcNormal
, *dstNormal
);
117 // for all InfluencedVertices only.
118 for(;nInf
>0;nInf
--, infPtr
++)
121 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
122 CVector
*srcVertex
= srcVertexPtr
+ index
;
123 CVector
*srcNormal
= srcNormalPtr
+ index
;
124 uint8
*dstVertexVB
= destVertexPtr
+ index
* vertexSize
;
125 CVector
*dstVertex
= (CVector
*)(dstVertexVB
);
126 CVector
*dstNormal
= (CVector
*)(dstVertexVB
+ normalOff
);
130 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetPoint( *srcVertex
, srcSkin
->Weights
[0], *dstVertex
);
131 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[1], *dstVertex
);
133 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcNormal
, srcSkin
->Weights
[0], *dstNormal
);
134 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[1], *dstNormal
);
140 // for all InfluencedVertices only.
141 for(;nInf
>0;nInf
--, infPtr
++)
144 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
145 CVector
*srcVertex
= srcVertexPtr
+ index
;
146 CVector
*srcNormal
= srcNormalPtr
+ index
;
147 uint8
*dstVertexVB
= destVertexPtr
+ index
* vertexSize
;
148 CVector
*dstVertex
= (CVector
*)(dstVertexVB
);
149 CVector
*dstNormal
= (CVector
*)(dstVertexVB
+ normalOff
);
153 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetPoint( *srcVertex
, srcSkin
->Weights
[0], *dstVertex
);
154 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[1], *dstVertex
);
155 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[2], *dstVertex
);
157 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcNormal
, srcSkin
->Weights
[0], *dstNormal
);
158 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[1], *dstNormal
);
159 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[2], *dstNormal
);
165 // for all InfluencedVertices only.
166 for(;nInf
>0;nInf
--, infPtr
++)
169 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
170 CVector
*srcVertex
= srcVertexPtr
+ index
;
171 CVector
*srcNormal
= srcNormalPtr
+ index
;
172 uint8
*dstVertexVB
= destVertexPtr
+ index
* vertexSize
;
173 CVector
*dstVertex
= (CVector
*)(dstVertexVB
);
174 CVector
*dstNormal
= (CVector
*)(dstVertexVB
+ normalOff
);
178 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetPoint( *srcVertex
, srcSkin
->Weights
[0], *dstVertex
);
179 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[1], *dstVertex
);
180 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[2], *dstVertex
);
181 boneMat3x4
[ srcSkin
->MatrixId
[3] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[3], *dstVertex
);
183 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcNormal
, srcSkin
->Weights
[0], *dstNormal
);
184 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[1], *dstNormal
);
185 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[2], *dstNormal
);
186 boneMat3x4
[ srcSkin
->MatrixId
[3] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[3], *dstNormal
);
195 // ***************************************************************************
196 static void applyArraySkinTangentSpaceT(uint numMatrixes
, uint32
*infPtr
, CMesh::CSkinWeight
*srcSkinPtr
,
197 CVector
*srcVertexPtr
, CVector
*srcNormalPtr
, CVector
*tgSpacePtr
, uint normalOff
, uint tgSpaceOff
,
198 uint8
*destVertexPtr
, vector
<CMatrix3x4
> &boneMat3x4
, uint vertexSize
, uint nInf
)
200 /* Prefetch all vertex/normal/tgSpace before, it is faster.
205 uint32
*infTmpPtr
= infPtr
;
206 for(;nInfTmp
>0;nInfTmp
--, infTmpPtr
++)
208 uint index
= *infTmpPtr
;
209 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
210 CVector
*srcVertex
= srcVertexPtr
+ index
;
211 CVector
*srcNormal
= srcNormalPtr
+ index
;
212 CVector
*srcTgSpace
= tgSpacePtr
+ index
;
214 _mm_prefetch((const char *)(void *)srcSkin
, _MM_HINT_T1
);
215 _mm_prefetch((const char *)(void *)srcVertex
, _MM_HINT_T1
);
216 _mm_prefetch((const char *)(void *)srcNormal
, _MM_HINT_T1
);
217 _mm_prefetch((const char *)(void *)srcTgSpace
, _MM_HINT_T1
);
220 #elif defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
223 uint32
*infTmpPtr
= infPtr
;
224 for(;nInfTmp
>0;nInfTmp
--, infTmpPtr
++)
226 uint index
= *infTmpPtr
;
227 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
228 CVector
*srcVertex
= srcVertexPtr
+ index
;
229 CVector
*srcNormal
= srcNormalPtr
+ index
;
230 CVector
*srcTgSpace
= tgSpacePtr
+ index
;
253 // Special case for Vertices influenced by one matrix. Just copy result of mul.
254 // for all InfluencedVertices only.
255 for(;nInf
>0;nInf
--, infPtr
++)
258 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
259 CVector
*srcVertex
= srcVertexPtr
+ index
;
260 CVector
*srcNormal
= srcNormalPtr
+ index
;
261 CVector
*srcTgSpace
= tgSpacePtr
+ index
;
263 uint8
*dstVertexVB
= destVertexPtr
+ index
* vertexSize
;
264 CVector
*dstVertex
= (CVector
*)(dstVertexVB
);
265 CVector
*dstNormal
= (CVector
*)(dstVertexVB
+ normalOff
);
266 CVector
*dstTgSpace
= (CVector
*)(dstVertexVB
+ tgSpaceOff
);
271 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetPoint( *srcVertex
, *dstVertex
);
273 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcNormal
, *dstNormal
);
275 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcTgSpace
, *dstTgSpace
);
282 // for all InfluencedVertices only.
283 for(;nInf
>0;nInf
--, infPtr
++)
286 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
287 CVector
*srcVertex
= srcVertexPtr
+ index
;
288 CVector
*srcNormal
= srcNormalPtr
+ index
;
289 CVector
*srcTgSpace
= tgSpacePtr
+ index
;
291 uint8
*dstVertexVB
= destVertexPtr
+ index
* vertexSize
;
292 CVector
*dstVertex
= (CVector
*)(dstVertexVB
);
293 CVector
*dstNormal
= (CVector
*)(dstVertexVB
+ normalOff
);
294 CVector
*dstTgSpace
= (CVector
*)(dstVertexVB
+ tgSpaceOff
);
297 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetPoint( *srcVertex
, srcSkin
->Weights
[0], *dstVertex
);
298 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[1], *dstVertex
);
300 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcNormal
, srcSkin
->Weights
[0], *dstNormal
);
301 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[1], *dstNormal
);
303 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcTgSpace
, srcSkin
->Weights
[0], *dstTgSpace
);
304 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcTgSpace
, srcSkin
->Weights
[1], *dstTgSpace
);
310 // for all InfluencedVertices only.
311 for(;nInf
>0;nInf
--, infPtr
++)
314 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
315 CVector
*srcVertex
= srcVertexPtr
+ index
;
316 CVector
*srcNormal
= srcNormalPtr
+ index
;
317 CVector
*srcTgSpace
= tgSpacePtr
+ index
;
319 uint8
*dstVertexVB
= destVertexPtr
+ index
* vertexSize
;
320 CVector
*dstVertex
= (CVector
*)(dstVertexVB
);
321 CVector
*dstNormal
= (CVector
*)(dstVertexVB
+ normalOff
);
322 CVector
*dstTgSpace
= (CVector
*)(dstVertexVB
+ tgSpaceOff
);
325 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetPoint( *srcVertex
, srcSkin
->Weights
[0], *dstVertex
);
326 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[1], *dstVertex
);
327 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[2], *dstVertex
);
329 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcNormal
, srcSkin
->Weights
[0], *dstNormal
);
330 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[1], *dstNormal
);
331 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[2], *dstNormal
);
333 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcTgSpace
, srcSkin
->Weights
[0], *dstTgSpace
);
334 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcTgSpace
, srcSkin
->Weights
[1], *dstTgSpace
);
335 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddVector( *srcTgSpace
, srcSkin
->Weights
[2], *dstTgSpace
);
341 // for all InfluencedVertices only.
342 for(;nInf
>0;nInf
--, infPtr
++)
345 CMesh::CSkinWeight
*srcSkin
= srcSkinPtr
+ index
;
346 CVector
*srcVertex
= srcVertexPtr
+ index
;
347 CVector
*srcNormal
= srcNormalPtr
+ index
;
348 CVector
*srcTgSpace
= tgSpacePtr
+ index
;
350 uint8
*dstVertexVB
= destVertexPtr
+ index
* vertexSize
;
351 CVector
*dstVertex
= (CVector
*)(dstVertexVB
);
352 CVector
*dstNormal
= (CVector
*)(dstVertexVB
+ normalOff
);
353 CVector
*dstTgSpace
= (CVector
*)(dstVertexVB
+ tgSpaceOff
);
356 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetPoint( *srcVertex
, srcSkin
->Weights
[0], *dstVertex
);
357 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[1], *dstVertex
);
358 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[2], *dstVertex
);
359 boneMat3x4
[ srcSkin
->MatrixId
[3] ].mulAddPoint( *srcVertex
, srcSkin
->Weights
[3], *dstVertex
);
361 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcNormal
, srcSkin
->Weights
[0], *dstNormal
);
362 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[1], *dstNormal
);
363 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[2], *dstNormal
);
364 boneMat3x4
[ srcSkin
->MatrixId
[3] ].mulAddVector( *srcNormal
, srcSkin
->Weights
[3], *dstNormal
);
366 boneMat3x4
[ srcSkin
->MatrixId
[0] ].mulSetVector( *srcTgSpace
, srcSkin
->Weights
[0], *dstTgSpace
);
367 boneMat3x4
[ srcSkin
->MatrixId
[1] ].mulAddVector( *srcTgSpace
, srcSkin
->Weights
[1], *dstTgSpace
);
368 boneMat3x4
[ srcSkin
->MatrixId
[2] ].mulAddVector( *srcTgSpace
, srcSkin
->Weights
[2], *dstTgSpace
);
369 boneMat3x4
[ srcSkin
->MatrixId
[3] ].mulAddVector( *srcTgSpace
, srcSkin
->Weights
[3], *dstTgSpace
);
379 // ***************************************************************************
380 // ***************************************************************************
381 // ApplySkin methods.
382 // ***************************************************************************
383 // ***************************************************************************
386 // ***************************************************************************
387 void CMeshMRMGeom::applySkinWithNormal(CLod
&lod
, const CSkeletonModel
*skeleton
)
390 if(_SkinWeights
.empty())
393 // get vertexPtr / normalOff.
394 //===========================
395 CVertexBufferReadWrite vba
;
396 _VBufferFinal
.lock (vba
);
397 uint8
*destVertexPtr
= (uint8
*)vba
.getVertexCoordPointer();
398 uint flags
= _VBufferFinal
.getVertexFormat();
399 sint32 vertexSize
= _VBufferFinal
.getVertexSize();
400 // must have XYZ and Normal.
401 nlassert((flags
& CVertexBuffer::PositionFlag
)
402 && (flags
& CVertexBuffer::NormalFlag
)
406 // Compute offset of each component of the VB.
408 normalOff
= _VBufferFinal
.getNormalOff();
411 // compute src array.
412 CMesh::CSkinWeight
*srcSkinPtr
;
413 CVector
*srcVertexPtr
;
414 CVector
*srcNormalPtr
= NULL
;
415 srcSkinPtr
= &_SkinWeights
[0];
416 srcVertexPtr
= &_OriginalSkinVertices
[0];
417 srcNormalPtr
= &(_OriginalSkinNormals
[0]);
421 // Compute useful Matrix for this lod.
422 //===========================
423 // Those arrays map the array of bones in skeleton.
424 static vector
<CMatrix3x4
> boneMat3x4
;
425 computeBoneMatrixes3x4(boneMat3x4
, lod
.MatrixInfluences
, skeleton
);
429 //===========================
430 // assert, code below is written especially for 4 per vertex.
431 nlassert(NL3D_MESH_SKINNING_MAX_MATRIX
==4);
432 for(uint i
=0;i
<NL3D_MESH_SKINNING_MAX_MATRIX
;i
++)
434 uint nInf
= (uint
)lod
.InfluencedVertices
[i
].size();
437 uint32
*infPtr
= &(lod
.InfluencedVertices
[i
][0]);
440 /*extern uint TESTYOYO_NumStdSkinVertices;
441 TESTYOYO_NumStdSkinVertices+= nInf;*/
443 // apply the skin to the vertices
444 applyArraySkinNormalT(i
, infPtr
, srcSkinPtr
, srcVertexPtr
, srcNormalPtr
,
445 normalOff
, destVertexPtr
,
446 boneMat3x4
, vertexSize
, nInf
);
451 // ***************************************************************************
452 void CMeshMRMGeom::applySkinWithTangentSpace(CLod
&lod
, const CSkeletonModel
*skeleton
,
453 uint tangentSpaceTexCoord
)
456 if(_SkinWeights
.empty())
459 // get vertexPtr / normalOff / tangent space offset.
460 //===========================
461 CVertexBufferReadWrite vba
;
462 _VBufferFinal
.lock (vba
);
463 uint8
*destVertexPtr
= (uint8
*)vba
.getVertexCoordPointer();
464 uint flags
= _VBufferFinal
.getVertexFormat();
465 sint32 vertexSize
= _VBufferFinal
.getVertexSize();
467 // if there's tangent space, there also must be a normal there.
468 nlassert((flags
& CVertexBuffer::PositionFlag
)
469 && (flags
& CVertexBuffer::NormalFlag
)
473 // Compute offset of each component of the VB.
475 normalOff
= _VBufferFinal
.getNormalOff();
478 sint32 tgSpaceOff
= _VBufferFinal
.getTexCoordOff((uint8
) tangentSpaceTexCoord
);
480 // compute src array.
481 CMesh::CSkinWeight
*srcSkinPtr
;
482 CVector
*srcVertexPtr
;
483 CVector
*srcNormalPtr
;
486 srcSkinPtr
= &_SkinWeights
[0];
487 srcVertexPtr
= &_OriginalSkinVertices
[0];
488 srcNormalPtr
= &(_OriginalSkinNormals
[0]);
489 tgSpacePtr
= &(_OriginalTGSpace
[0]);
493 // Compute useful Matrix for this lod.
494 //===========================
495 // Those arrays map the array of bones in skeleton.
496 static vector
<CMatrix3x4
> boneMat3x4
;
497 computeBoneMatrixes3x4(boneMat3x4
, lod
.MatrixInfluences
, skeleton
);
500 // apply skinning (with tangent space added)
501 //===========================
502 // assert, code below is written especially for 4 per vertex.
503 nlassert(NL3D_MESH_SKINNING_MAX_MATRIX
==4);
504 for(uint i
=0;i
<NL3D_MESH_SKINNING_MAX_MATRIX
;i
++)
506 uint nInf
= (uint
)lod
.InfluencedVertices
[i
].size();
509 uint32
*infPtr
= &(lod
.InfluencedVertices
[i
][0]);
511 // apply the skin to the vertices
512 applyArraySkinTangentSpaceT(i
, infPtr
, srcSkinPtr
, srcVertexPtr
, srcNormalPtr
, tgSpacePtr
,
513 normalOff
, tgSpaceOff
, destVertexPtr
,
514 boneMat3x4
, vertexSize
, nInf
);
520 // ***************************************************************************
521 // ***************************************************************************
522 // Raw "Vertex/Normal only" ApplySkin methods.
523 // ***************************************************************************
524 // ***************************************************************************
527 #define NL3D_RAWSKIN_NORMAL_OFF 12
528 #define NL3D_RAWSKIN_UV_OFF 24
529 #define NL3D_RAWSKIN_VERTEX_SIZE 32
532 /* Speed Feature test.
533 Don't use precaching for now, cause its seems to be slower on some configs (P4-2.4Ghz),
534 but maybe faster on other (P3-800)
535 On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm
536 saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms)
538 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
539 //#define NL3D_RAWSKIN_PRECACHE
540 #define NL3D_RAWSKIN_ASM
544 // ***************************************************************************
545 void CMeshMRMGeom::applyArrayRawSkinNormal1(CRawVertexNormalSkin1
*src
, uint8
*destVertexPtr
,
546 CMatrix3x4
*boneMat3x4
, uint nInf
)
548 // must write contigously in AGP, and ASM is hardcoded...
549 nlctassert(NL3D_RAWSKIN_NORMAL_OFF
==12);
550 nlctassert(NL3D_RAWSKIN_UV_OFF
==24);
552 /*extern uint TESTYOYO_NumRawSkinVertices1;
553 TESTYOYO_NumRawSkinVertices1+= nInf;
554 H_AUTO( TestYoyo_RawSkin1 );*/
556 #ifdef NL3D_RAWSKIN_PRECACHE
559 // number of vertices to process for this block.
560 uint nBlockInf
= min(NumCacheVertexNormal1
, nInf
);
564 // cache the data in L1 cache.
565 CFastMem::precache(src
, nBlockInf
* sizeof(CRawVertexNormalSkin1
));
568 uint nBlockInf
= nInf
;
572 #ifndef NL3D_RAWSKIN_ASM
573 // for all InfluencedVertices only.
574 for(;nBlockInf
>0;nBlockInf
--, src
++, destVertexPtr
+=NL3D_RAWSKIN_VERTEX_SIZE
)
576 CVector
*dstVertex
= (CVector
*)(destVertexPtr
);
577 CVector
*dstNormal
= (CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
);
579 // For 1 matrix, can write directly to AGP (if destVertexPtr is AGP...)
581 boneMat3x4
[ src
->MatrixId
[0] ].mulSetPoint( src
->Vertex
.Pos
, *(CVector
*)(destVertexPtr
) );
583 boneMat3x4
[ src
->MatrixId
[0] ].mulSetVector( src
->Vertex
.Normal
, *(CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
) );
585 *(CUV
*)(destVertexPtr
+ NL3D_RAWSKIN_UV_OFF
)= src
->Vertex
.UV
;
588 // ASM harcoded for 36
589 nlctassert(sizeof(CRawVertexNormalSkin1
)==36);
591 /* 116 cycles / loop typical
592 58 cycles / loop in theory (no memory problem)
598 mov edi
, destVertexPtr
602 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) );
605 mov eax
, [esi
]src
.MatrixId
// uop: 0/1
608 add eax
, edx
// uop: 1/0
611 fld
[esi
]src
.Vertex
.Pos
.x
// uop: 0/1
612 fld
[esi
]src
.Vertex
.Pos
.y
// uop: 0/1
613 fld
[esi
]src
.Vertex
.Pos
.z
// uop: 0/1
614 // vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
615 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
616 fmul st
, st(3) // uop: 1/0 (5)
617 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
618 fmul st
, st(3) // uop: 1/0 (5)
619 faddp
st(1), st
// uop: 1/0 (3)
620 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
621 fmul st
, st(2) // uop: 1/0 (5)
622 faddp
st(1), st
// uop: 1/0 (3)
623 fld
[eax
]CMatrix3x4
.a14
// uop: 0/1
624 faddp
st(1), st
// uop: 1/0 (3)
625 fstp dword ptr
[edi
] // uop: 0/0/1/1
626 // vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
627 fld
[eax
]CMatrix3x4
.a21
629 fld
[eax
]CMatrix3x4
.a22
632 fld
[eax
]CMatrix3x4
.a23
635 fld
[eax
]CMatrix3x4
.a24
637 fstp dword ptr
[edi
+4]
638 // vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
639 fld
[eax
]CMatrix3x4
.a31
641 fld
[eax
]CMatrix3x4
.a32
644 fld
[eax
]CMatrix3x4
.a33
647 fld
[eax
]CMatrix3x4
.a34
649 fstp dword ptr
[edi
+8]
657 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
660 fld
[esi
]src
.Vertex
.Normal
.x
661 fld
[esi
]src
.Vertex
.Normal
.y
662 fld
[esi
]src
.Vertex
.Normal
.z
663 // vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
664 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
665 fmul st
, st(3) // uop: 1/0 (5)
666 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
667 fmul st
, st(3) // uop: 1/0 (5)
668 faddp
st(1), st
// uop: 1/0 (3)
669 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
670 fmul st
, st(2) // uop: 1/0 (5)
671 faddp
st(1), st
// uop: 1/0 (3)
672 fstp dword ptr
[edi
+12] // uop: 0/0/1/1
673 // vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
674 fld
[eax
]CMatrix3x4
.a21
676 fld
[eax
]CMatrix3x4
.a22
679 fld
[eax
]CMatrix3x4
.a23
682 fstp dword ptr
[edi
+16]
683 // vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
684 fld
[eax
]CMatrix3x4
.a31
686 fld
[eax
]CMatrix3x4
.a32
689 fld
[eax
]CMatrix3x4
.a33
692 fstp dword ptr
[edi
+20]
700 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
701 mov eax
, [esi
]src
.Vertex
.UV
.U
// uop: 0/1
702 mov dword ptr
[edi
+24], eax
// uop: 0/0/1/1
703 mov eax
, [esi
]src
.Vertex
.UV
.V
// uop: 0/1
704 mov dword ptr
[edi
+28], eax
// uop: 0/0/1/1
708 add esi
, 36 // uop: 1/0
709 add edi
, NL3D_RAWSKIN_VERTEX_SIZE
// uop: 1/0
711 jnz theLoop
// uop: 1/1 (p1)
715 mov destVertexPtr
, edi
723 // ***************************************************************************
724 void CMeshMRMGeom::applyArrayRawSkinNormal2(CRawVertexNormalSkin2
*src
, uint8
*destVertexPtr
,
725 CMatrix3x4
*boneMat3x4
, uint nInf
)
727 // must write contigously in AGP, and ASM is hardcoded...
728 nlctassert(NL3D_RAWSKIN_NORMAL_OFF
==12);
729 nlctassert(NL3D_RAWSKIN_UV_OFF
==24);
731 /*extern uint TESTYOYO_NumRawSkinVertices2;
732 TESTYOYO_NumRawSkinVertices2+= nInf;
733 H_AUTO( TestYoyo_RawSkin2 );*/
735 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
738 #ifdef NL3D_RAWSKIN_PRECACHE
741 // number of vertices to process for this block.
742 uint nBlockInf
= min(NumCacheVertexNormal2
, nInf
);
746 // cache the data in L1 cache.
747 CFastMem::precache(src
, nBlockInf
* sizeof(CRawVertexNormalSkin2
));
750 uint nBlockInf
= nInf
;
754 #ifndef NL3D_RAWSKIN_ASM
755 // for all InfluencedVertices only.
756 for(;nBlockInf
>0;nBlockInf
--, src
++, destVertexPtr
+=NL3D_RAWSKIN_VERTEX_SIZE
)
759 boneMat3x4
[ src
->MatrixId
[0] ].mulSetPoint( src
->Vertex
.Pos
, src
->Weights
[0], tmpVert
);
760 boneMat3x4
[ src
->MatrixId
[1] ].mulAddPoint( src
->Vertex
.Pos
, src
->Weights
[1], tmpVert
);
761 *(CVector
*)(destVertexPtr
)= tmpVert
;
763 boneMat3x4
[ src
->MatrixId
[0] ].mulSetVector( src
->Vertex
.Normal
, src
->Weights
[0], tmpVert
);
764 boneMat3x4
[ src
->MatrixId
[1] ].mulAddVector( src
->Vertex
.Normal
, src
->Weights
[1], tmpVert
);
765 *(CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
)= tmpVert
;
767 *(CUV
*)(destVertexPtr
+ NL3D_RAWSKIN_UV_OFF
)= src
->Vertex
.UV
;
770 // ASM harcoded for 48
771 nlctassert(sizeof(CRawVertexNormalSkin2
)==48);
773 /* 154 cycles / loop typical
774 124 cycles / loop in theory (no memory problem)
780 mov edi
, destVertexPtr
784 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) );
787 mov eax
, [esi
+0]src
.MatrixId
// uop: 0/1
790 add eax
, edx
// uop: 1/0
792 mov ebx
, [esi
+4]src
.MatrixId
// uop: 0/1
795 add ebx
, edx
// uop: 1/0
798 fld
[esi
]src
.Vertex
.Pos
.x
// uop: 0/1
799 fld
[esi
]src
.Vertex
.Pos
.y
// uop: 0/1
800 fld
[esi
]src
.Vertex
.Pos
.z
// uop: 0/1
802 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
804 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
805 fmul st
, st(3) // uop: 1/0 (5)
806 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
807 fmul st
, st(3) // uop: 1/0 (5)
808 faddp
st(1), st
// uop: 1/0 (3)
809 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
810 fmul st
, st(2) // uop: 1/0 (5)
811 faddp
st(1), st
// uop: 1/0 (3)
812 fld
[eax
]CMatrix3x4
.a14
// uop: 0/1
813 faddp
st(1), st
// uop: 1/0 (3)
815 fmul
[esi
+0]src
.Weights
818 fld
[ebx
]CMatrix3x4
.a11
820 fld
[ebx
]CMatrix3x4
.a12
823 fld
[ebx
]CMatrix3x4
.a13
826 fld
[ebx
]CMatrix3x4
.a14
828 // mul by scale, and append
829 fmul
[esi
+4]src
.Weights
833 fstp dword ptr
[edi
] // uop: 0/0/1/1
835 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
836 fld
[eax
]CMatrix3x4
.a21
838 fld
[eax
]CMatrix3x4
.a22
841 fld
[eax
]CMatrix3x4
.a23
844 fld
[eax
]CMatrix3x4
.a24
847 fmul
[esi
+0]src
.Weights
850 fld
[ebx
]CMatrix3x4
.a21
852 fld
[ebx
]CMatrix3x4
.a22
855 fld
[ebx
]CMatrix3x4
.a23
858 fld
[ebx
]CMatrix3x4
.a24
860 // mul by scale, and append
861 fmul
[esi
+4]src
.Weights
865 fstp dword ptr
[edi
+4]
867 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
868 fld
[eax
]CMatrix3x4
.a31
870 fld
[eax
]CMatrix3x4
.a32
873 fld
[eax
]CMatrix3x4
.a33
876 fld
[eax
]CMatrix3x4
.a34
879 fmul
[esi
+0]src
.Weights
882 fld
[ebx
]CMatrix3x4
.a31
884 fld
[ebx
]CMatrix3x4
.a32
887 fld
[ebx
]CMatrix3x4
.a33
890 fld
[ebx
]CMatrix3x4
.a34
892 // mul by scale, and append
893 fmul
[esi
+4]src
.Weights
897 fstp dword ptr
[edi
+8]
906 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
909 fld
[esi
]src
.Vertex
.Normal
.x
910 fld
[esi
]src
.Vertex
.Normal
.y
911 fld
[esi
]src
.Vertex
.Normal
.z
913 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
914 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
915 fmul st
, st(3) // uop: 1/0 (5)
916 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
917 fmul st
, st(3) // uop: 1/0 (5)
918 faddp
st(1), st
// uop: 1/0 (3)
919 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
920 fmul st
, st(2) // uop: 1/0 (5)
921 faddp
st(1), st
// uop: 1/0 (3)
923 fmul
[esi
+0]src
.Weights
926 fld
[ebx
]CMatrix3x4
.a11
928 fld
[ebx
]CMatrix3x4
.a12
931 fld
[ebx
]CMatrix3x4
.a13
934 // mul by scale, and append
935 fmul
[esi
+4]src
.Weights
939 fstp dword ptr
[edi
+12] // uop: 0/0/1/1
941 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
942 fld
[eax
]CMatrix3x4
.a21
944 fld
[eax
]CMatrix3x4
.a22
947 fld
[eax
]CMatrix3x4
.a23
951 fmul
[esi
+0]src
.Weights
954 fld
[ebx
]CMatrix3x4
.a21
956 fld
[ebx
]CMatrix3x4
.a22
959 fld
[ebx
]CMatrix3x4
.a23
962 // mul by scale, and append
963 fmul
[esi
+4]src
.Weights
967 fstp dword ptr
[edi
+16]
969 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
970 fld
[eax
]CMatrix3x4
.a31
972 fld
[eax
]CMatrix3x4
.a32
975 fld
[eax
]CMatrix3x4
.a33
979 fmul
[esi
+0]src
.Weights
982 fld
[ebx
]CMatrix3x4
.a31
984 fld
[ebx
]CMatrix3x4
.a32
987 fld
[ebx
]CMatrix3x4
.a33
990 // mul by scale, and append
991 fmul
[esi
+4]src
.Weights
995 fstp dword ptr
[edi
+20]
1004 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
1005 mov eax
, [esi
]src
.Vertex
.UV
.U
// uop: 0/1
1006 mov dword ptr
[edi
+24], eax
// uop: 0/0/1/1
1007 mov eax
, [esi
]src
.Vertex
.UV
.V
// uop: 0/1
1008 mov dword ptr
[edi
+28], eax
// uop: 0/0/1/1
1012 add esi
, 48 // uop: 1/0
1013 add edi
, NL3D_RAWSKIN_VERTEX_SIZE
// uop: 1/0
1015 jnz theLoop
// uop: 1/1 (p1)
1019 mov destVertexPtr
, edi
1026 // ***************************************************************************
1027 void CMeshMRMGeom::applyArrayRawSkinNormal3(CRawVertexNormalSkin3
*src
, uint8
*destVertexPtr
,
1028 CMatrix3x4
*boneMat3x4
, uint nInf
)
1030 // must write contigously in AGP, and ASM is hardcoded...
1031 nlctassert(NL3D_RAWSKIN_NORMAL_OFF
==12);
1032 nlctassert(NL3D_RAWSKIN_UV_OFF
==24);
1034 /*extern uint TESTYOYO_NumRawSkinVertices3;
1035 TESTYOYO_NumRawSkinVertices3+= nInf;
1036 H_AUTO( TestYoyo_RawSkin3 );*/
1038 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
1041 #ifdef NL3D_RAWSKIN_PRECACHE
1044 // number of vertices to process for this block.
1045 uint nBlockInf
= min(NumCacheVertexNormal3
, nInf
);
1049 // cache the data in L1 cache.
1050 CFastMem::precache(src
, nBlockInf
* sizeof(CRawVertexNormalSkin3
));
1053 uint nBlockInf
= nInf
;
1057 #ifndef NL3D_RAWSKIN_ASM
1058 // for all InfluencedVertices only.
1059 for(;nBlockInf
>0;nBlockInf
--, src
++, destVertexPtr
+=NL3D_RAWSKIN_VERTEX_SIZE
)
1062 boneMat3x4
[ src
->MatrixId
[0] ].mulSetPoint( src
->Vertex
.Pos
, src
->Weights
[0], tmpVert
);
1063 boneMat3x4
[ src
->MatrixId
[1] ].mulAddPoint( src
->Vertex
.Pos
, src
->Weights
[1], tmpVert
);
1064 boneMat3x4
[ src
->MatrixId
[2] ].mulAddPoint( src
->Vertex
.Pos
, src
->Weights
[2], tmpVert
);
1065 *(CVector
*)(destVertexPtr
)= tmpVert
;
1067 boneMat3x4
[ src
->MatrixId
[0] ].mulSetVector( src
->Vertex
.Normal
, src
->Weights
[0], tmpVert
);
1068 boneMat3x4
[ src
->MatrixId
[1] ].mulAddVector( src
->Vertex
.Normal
, src
->Weights
[1], tmpVert
);
1069 boneMat3x4
[ src
->MatrixId
[2] ].mulAddVector( src
->Vertex
.Normal
, src
->Weights
[2], tmpVert
);
1070 *(CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
)= tmpVert
;
1072 *(CUV
*)(destVertexPtr
+ NL3D_RAWSKIN_UV_OFF
)= src
->Vertex
.UV
;
1075 // ASM harcoded for 56
1076 nlctassert(sizeof(CRawVertexNormalSkin3
)==56);
1079 /* 226 cycles / loop typical
1080 192 cycles / loop in theory (no memory problem)
1087 mov edi
, destVertexPtr
1090 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) );
1093 mov eax
, [esi
+0]src
.MatrixId
// uop: 0/1
1094 lea eax
, [eax
*2+eax
]
1096 add eax
, boneMat3x4
// uop: 1/0
1098 mov ebx
, [esi
+4]src
.MatrixId
// uop: 0/1
1099 lea ebx
, [ebx
*2+ebx
]
1101 add ebx
, boneMat3x4
// uop: 1/0
1103 mov edx
, [esi
+8]src
.MatrixId
// uop: 0/1
1104 lea edx
, [edx
*2+edx
]
1106 add edx
, boneMat3x4
// uop: 1/0
1109 fld
[esi
]src
.Vertex
.Pos
.x
// uop: 0/1
1110 fld
[esi
]src
.Vertex
.Pos
.y
// uop: 0/1
1111 fld
[esi
]src
.Vertex
.Pos
.z
// uop: 0/1
1113 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
1115 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
1116 fmul st
, st(3) // uop: 1/0 (5)
1117 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
1118 fmul st
, st(3) // uop: 1/0 (5)
1119 faddp
st(1), st
// uop: 1/0 (3)
1120 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
1121 fmul st
, st(2) // uop: 1/0 (5)
1122 faddp
st(1), st
// uop: 1/0 (3)
1123 fld
[eax
]CMatrix3x4
.a14
// uop: 0/1
1124 faddp
st(1), st
// uop: 1/0 (3)
1126 fmul
[esi
+0]src
.Weights
1129 fld
[ebx
]CMatrix3x4
.a11
1131 fld
[ebx
]CMatrix3x4
.a12
1134 fld
[ebx
]CMatrix3x4
.a13
1137 fld
[ebx
]CMatrix3x4
.a14
1139 // mul by scale, and append
1140 fmul
[esi
+4]src
.Weights
1144 fld
[edx
]CMatrix3x4
.a11
1146 fld
[edx
]CMatrix3x4
.a12
1149 fld
[edx
]CMatrix3x4
.a13
1152 fld
[edx
]CMatrix3x4
.a14
1154 // mul by scale, and append
1155 fmul
[esi
+8]src
.Weights
1159 fstp dword ptr
[edi
] // uop: 0/0/1/1
1161 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
1162 fld
[eax
]CMatrix3x4
.a21
1164 fld
[eax
]CMatrix3x4
.a22
1167 fld
[eax
]CMatrix3x4
.a23
1170 fld
[eax
]CMatrix3x4
.a24
1173 fmul
[esi
+0]src
.Weights
1176 fld
[ebx
]CMatrix3x4
.a21
1178 fld
[ebx
]CMatrix3x4
.a22
1181 fld
[ebx
]CMatrix3x4
.a23
1184 fld
[ebx
]CMatrix3x4
.a24
1186 // mul by scale, and append
1187 fmul
[esi
+4]src
.Weights
1191 fld
[edx
]CMatrix3x4
.a21
1193 fld
[edx
]CMatrix3x4
.a22
1196 fld
[edx
]CMatrix3x4
.a23
1199 fld
[edx
]CMatrix3x4
.a24
1201 // mul by scale, and append
1202 fmul
[esi
+8]src
.Weights
1206 fstp dword ptr
[edi
+4]
1208 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
1209 fld
[eax
]CMatrix3x4
.a31
1211 fld
[eax
]CMatrix3x4
.a32
1214 fld
[eax
]CMatrix3x4
.a33
1217 fld
[eax
]CMatrix3x4
.a34
1220 fmul
[esi
+0]src
.Weights
1223 fld
[ebx
]CMatrix3x4
.a31
1225 fld
[ebx
]CMatrix3x4
.a32
1228 fld
[ebx
]CMatrix3x4
.a33
1231 fld
[ebx
]CMatrix3x4
.a34
1233 // mul by scale, and append
1234 fmul
[esi
+4]src
.Weights
1238 fld
[edx
]CMatrix3x4
.a31
1240 fld
[edx
]CMatrix3x4
.a32
1243 fld
[edx
]CMatrix3x4
.a33
1246 fld
[edx
]CMatrix3x4
.a34
1248 // mul by scale, and append
1249 fmul
[esi
+8]src
.Weights
1253 fstp dword ptr
[edi
+8]
1262 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
1265 fld
[esi
]src
.Vertex
.Normal
.x
1266 fld
[esi
]src
.Vertex
.Normal
.y
1267 fld
[esi
]src
.Vertex
.Normal
.z
1268 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
1269 fld
[eax
]CMatrix3x4
.a11
// uop: 0/1
1270 fmul st
, st(3) // uop: 1/0 (5)
1271 fld
[eax
]CMatrix3x4
.a12
// uop: 0/1
1272 fmul st
, st(3) // uop: 1/0 (5)
1273 faddp
st(1), st
// uop: 1/0 (3)
1274 fld
[eax
]CMatrix3x4
.a13
// uop: 0/1
1275 fmul st
, st(2) // uop: 1/0 (5)
1276 faddp
st(1), st
// uop: 1/0 (3)
1278 fmul
[esi
+0]src
.Weights
1281 fld
[ebx
]CMatrix3x4
.a11
1283 fld
[ebx
]CMatrix3x4
.a12
1286 fld
[ebx
]CMatrix3x4
.a13
1289 // mul by scale, and append
1290 fmul
[esi
+4]src
.Weights
1294 fld
[edx
]CMatrix3x4
.a11
1296 fld
[edx
]CMatrix3x4
.a12
1299 fld
[edx
]CMatrix3x4
.a13
1302 // mul by scale, and append
1303 fmul
[esi
+8]src
.Weights
1307 fstp dword ptr
[edi
+12] // uop: 0/0/1/1
1309 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
1310 fld
[eax
]CMatrix3x4
.a21
1312 fld
[eax
]CMatrix3x4
.a22
1315 fld
[eax
]CMatrix3x4
.a23
1319 fmul
[esi
+0]src
.Weights
1322 fld
[ebx
]CMatrix3x4
.a21
1324 fld
[ebx
]CMatrix3x4
.a22
1327 fld
[ebx
]CMatrix3x4
.a23
1330 // mul by scale, and append
1331 fmul
[esi
+4]src
.Weights
1335 fld
[edx
]CMatrix3x4
.a21
1337 fld
[edx
]CMatrix3x4
.a22
1340 fld
[edx
]CMatrix3x4
.a23
1343 // mul by scale, and append
1344 fmul
[esi
+8]src
.Weights
1348 fstp dword ptr
[edi
+16]
1350 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
1351 fld
[eax
]CMatrix3x4
.a31
1353 fld
[eax
]CMatrix3x4
.a32
1356 fld
[eax
]CMatrix3x4
.a33
1360 fmul
[esi
+0]src
.Weights
1363 fld
[ebx
]CMatrix3x4
.a31
1365 fld
[ebx
]CMatrix3x4
.a32
1368 fld
[ebx
]CMatrix3x4
.a33
1371 // mul by scale, and append
1372 fmul
[esi
+4]src
.Weights
1376 fld
[edx
]CMatrix3x4
.a31
1378 fld
[edx
]CMatrix3x4
.a32
1381 fld
[edx
]CMatrix3x4
.a33
1384 // mul by scale, and append
1385 fmul
[esi
+8]src
.Weights
1389 fstp dword ptr
[edi
+20]
1398 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
1399 mov eax
, [esi
]src
.Vertex
.UV
.U
// uop: 0/1
1400 mov dword ptr
[edi
+24], eax
// uop: 0/0/1/1
1401 mov eax
, [esi
]src
.Vertex
.UV
.V
// uop: 0/1
1402 mov dword ptr
[edi
+28], eax
// uop: 0/0/1/1
1406 add esi
, 56 // uop: 1/0
1407 add edi
, NL3D_RAWSKIN_VERTEX_SIZE
// uop: 1/0
1409 jnz theLoop
// uop: 1/1 (p1)
1413 mov destVertexPtr
, edi
1420 // ***************************************************************************
1421 void CMeshMRMGeom::applyArrayRawSkinNormal4(CRawVertexNormalSkin4
*src
, uint8
*destVertexPtr
,
1422 CMatrix3x4
*boneMat3x4
, uint nInf
)
1424 // must write contigously in AGP, and ASM is hardcoded...
1425 nlctassert(NL3D_RAWSKIN_NORMAL_OFF
==12);
1426 nlctassert(NL3D_RAWSKIN_UV_OFF
==24);
1428 /*extern uint TESTYOYO_NumRawSkinVertices4;
1429 TESTYOYO_NumRawSkinVertices4+= nInf;
1430 H_AUTO( TestYoyo_RawSkin4 );*/
1432 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
1435 #ifdef NL3D_RAWSKIN_PRECACHE
1438 // number of vertices to process for this block.
1439 uint nBlockInf
= min(NumCacheVertexNormal4
, nInf
);
1443 // cache the data in L1 cache.
1444 CFastMem::precache(src
, nBlockInf
* sizeof(CRawVertexNormalSkin4
));
1447 uint nBlockInf
= nInf
;
1450 // for all InfluencedVertices only.
1451 for(;nBlockInf
>0;nBlockInf
--, src
++, destVertexPtr
+=NL3D_RAWSKIN_VERTEX_SIZE
)
1454 boneMat3x4
[ src
->MatrixId
[0] ].mulSetPoint( src
->Vertex
.Pos
, src
->Weights
[0], tmpVert
);
1455 boneMat3x4
[ src
->MatrixId
[1] ].mulAddPoint( src
->Vertex
.Pos
, src
->Weights
[1], tmpVert
);
1456 boneMat3x4
[ src
->MatrixId
[2] ].mulAddPoint( src
->Vertex
.Pos
, src
->Weights
[2], tmpVert
);
1457 boneMat3x4
[ src
->MatrixId
[3] ].mulAddPoint( src
->Vertex
.Pos
, src
->Weights
[3], tmpVert
);
1458 *(CVector
*)(destVertexPtr
)= tmpVert
;
1460 boneMat3x4
[ src
->MatrixId
[0] ].mulSetVector( src
->Vertex
.Normal
, src
->Weights
[0], tmpVert
);
1461 boneMat3x4
[ src
->MatrixId
[1] ].mulAddVector( src
->Vertex
.Normal
, src
->Weights
[1], tmpVert
);
1462 boneMat3x4
[ src
->MatrixId
[2] ].mulAddVector( src
->Vertex
.Normal
, src
->Weights
[2], tmpVert
);
1463 boneMat3x4
[ src
->MatrixId
[3] ].mulAddVector( src
->Vertex
.Normal
, src
->Weights
[3], tmpVert
);
1464 *(CVector
*)(destVertexPtr
+ NL3D_RAWSKIN_NORMAL_OFF
)= tmpVert
;
1466 *(CUV
*)(destVertexPtr
+ NL3D_RAWSKIN_UV_OFF
)= src
->Vertex
.UV
;
1469 // NB: ASM not done for 4 vertices, cause very rare and negligeable ...
1474 // ***************************************************************************
1475 void CMeshMRMGeom::applyRawSkinWithNormal(CLod
&lod
, CRawSkinNormalCache
&rawSkinLod
, const CSkeletonModel
*skeleton
, uint8
*vbHard
, float alphaLod
)
1478 if(_SkinWeights
.empty())
1482 //===========================
1483 // must have XYZ, Normal and UV only
1484 nlassert( _VBufferFinal
.getVertexFormat() == (CVertexBuffer::PositionFlag
| CVertexBuffer::NormalFlag
| CVertexBuffer::TexCoord0Flag
) );
1485 nlassert( _VBufferFinal
.getValueType(CVertexBuffer::TexCoord0
) == CVertexBuffer::Float2
);
1486 nlassert( _VBufferFinal
.getVertexSize() ==NL3D_RAWSKIN_VERTEX_SIZE
);
1488 // HardCoded for normalOff==12 (see applyArrayRawSkinNormal*)
1489 nlassert( _VBufferFinal
.getNormalOff()==NL3D_RAWSKIN_NORMAL_OFF
);
1490 nlassert( _VBufferFinal
.getTexCoordOff()==NL3D_RAWSKIN_UV_OFF
);
1491 // assert, code below is written especially for 4 per vertex.
1492 nlassert( NL3D_MESH_SKINNING_MAX_MATRIX
==4 );
1495 // Compute useful Matrix for this lod.
1496 //===========================
1497 // Those arrays map the array of bones in skeleton.
1498 static vector
<CMatrix3x4
> boneMat3x4
;
1499 computeBoneMatrixes3x4(boneMat3x4
, lod
.MatrixInfluences
, skeleton
);
1503 /*extern uint TESTYOYO_NumRawSkinVertices;
1504 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices1.size();
1505 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices2.size();
1506 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices3.size();
1507 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices4.size();*/
1512 // Manage "SoftVertices"
1513 if(rawSkinLod
.TotalSoftVertices
)
1515 // apply skinning into Temp RAM for vertices that are Src of Geomorph
1516 //===========================
1517 static vector
<uint8
> tempSkin
;
1518 uint tempVbSize
= rawSkinLod
.TotalSoftVertices
*NL3D_RAWSKIN_VERTEX_SIZE
;
1519 if(tempSkin
.size() < tempVbSize
)
1520 tempSkin
.resize(tempVbSize
);
1521 uint8
*destVertexPtr
= &tempSkin
[0];
1524 nInf
= rawSkinLod
.SoftVertices
[0];
1527 applyArrayRawSkinNormal1(&rawSkinLod
.Vertices1
[0], destVertexPtr
, &boneMat3x4
[0], nInf
);
1528 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1531 nInf
= rawSkinLod
.SoftVertices
[1];
1534 applyArrayRawSkinNormal2(&rawSkinLod
.Vertices2
[0], destVertexPtr
, &boneMat3x4
[0], nInf
);
1535 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1538 nInf
= rawSkinLod
.SoftVertices
[2];
1541 applyArrayRawSkinNormal3(&rawSkinLod
.Vertices3
[0], destVertexPtr
, &boneMat3x4
[0], nInf
);
1542 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1545 nInf
= rawSkinLod
.SoftVertices
[3];
1548 applyArrayRawSkinNormal4(&rawSkinLod
.Vertices4
[0], destVertexPtr
, &boneMat3x4
[0], nInf
);
1549 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1552 // Fast Copy this into AGP Ram. NB: done before Geomorphs, because ensure some precaching this way!!
1553 //===========================
1555 uint8
*vbHardStart
= vbHard
+ rawSkinLod
.Geomorphs
.size()*NL3D_RAWSKIN_VERTEX_SIZE
;
1558 CFastMem::memcpy(vbHardStart
, &tempSkin
[0], tempVbSize
);
1560 // Geomorphs directly into AGP Ram
1561 //===========================
1562 clamp(alphaLod
, 0.f
, 1.f
);
1564 float a1
= 1 - alphaLod
;
1567 applyGeomorphPosNormalUV0(rawSkinLod
.Geomorphs
, &tempSkin
[0], vbHard
, NL3D_RAWSKIN_VERTEX_SIZE
, a
, a1
);
1570 // Manage HardVertices
1571 if(rawSkinLod
.TotalHardVertices
)
1573 // apply skinning directly into AGP RAM for vertices that are not Src of Geomorph
1574 //===========================
1577 // Skip Geomorphs and SoftVertices.
1578 uint8
*destVertexPtr
= vbHard
+ (rawSkinLod
.Geomorphs
.size()+rawSkinLod
.TotalSoftVertices
)*NL3D_RAWSKIN_VERTEX_SIZE
;
1581 nInf
= rawSkinLod
.HardVertices
[0];
1582 startId
= rawSkinLod
.SoftVertices
[0];
1585 applyArrayRawSkinNormal1(&rawSkinLod
.Vertices1
[startId
], destVertexPtr
, &boneMat3x4
[0], nInf
);
1586 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1589 nInf
= rawSkinLod
.HardVertices
[1];
1590 startId
= rawSkinLod
.SoftVertices
[1];
1593 applyArrayRawSkinNormal2(&rawSkinLod
.Vertices2
[startId
], destVertexPtr
, &boneMat3x4
[0], nInf
);
1594 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1597 nInf
= rawSkinLod
.HardVertices
[2];
1598 startId
= rawSkinLod
.SoftVertices
[2];
1601 applyArrayRawSkinNormal3(&rawSkinLod
.Vertices3
[startId
], destVertexPtr
, &boneMat3x4
[0], nInf
);
1602 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1605 nInf
= rawSkinLod
.HardVertices
[3];
1606 startId
= rawSkinLod
.SoftVertices
[3];
1609 applyArrayRawSkinNormal4(&rawSkinLod
.Vertices4
[startId
], destVertexPtr
, &boneMat3x4
[0], nInf
);
1610 destVertexPtr
+= nInf
* NL3D_RAWSKIN_VERTEX_SIZE
;
1615 #endif // ADD_MESH_MRM_SKIN_TEMPLATE