Merge branch 'main/rendor-staging' into fixes
[ryzomcore.git] / nel / src / 3d / mesh_mrm_skin_template.cpp
blob05fb52852c2fc2a14298d70beb3e59e1d7504545
1 /**
2 * File not compiled. Included from mesh_mrm_skin.cpp. It is a "old school" template.
3 */
5 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
6 // Copyright (C) 2010 Winch Gate Property Limited
7 //
8 // This source file has been modified by the following contributors:
9 // Copyright (C) 2014 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU Affero General Public License as
13 // published by the Free Software Foundation, either version 3 of the
14 // License, or (at your option) any later version.
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU Affero General Public License for more details.
21 // You should have received a copy of the GNU Affero General Public License
22 // along with this program. If not, see <http://www.gnu.org/licenses/>.
24 #include "std3d.h"
26 #ifdef DEBUG_NEW
27 #define new DEBUG_NEW
28 #endif
30 // ace: before including this, #define this define to use it
31 // the goal is to be able to compile every .cpp file with no
32 // special case (GNU/Linux needs)
33 #ifdef ADD_MESH_MRM_SKIN_TEMPLATE
35 // ***************************************************************************
36 // ***************************************************************************
37 // "Templates" for VertexSkinning with any input matrix type.
38 // ***************************************************************************
39 // ***************************************************************************
42 // ***************************************************************************
43 static void applyArraySkinNormalT(uint numMatrixes, uint32 *infPtr, CMesh::CSkinWeight *srcSkinPtr,
44 CVector *srcVertexPtr, CVector *srcNormalPtr, uint normalOff,
45 uint8 *destVertexPtr, vector<CMatrix3x4> &boneMat3x4, uint vertexSize, uint nInf)
47 /* Prefetch all vertex/normal before, it is to be faster.
49 #ifdef NL_HAS_SSE2
51 uint nInfTmp= nInf;
52 uint32 *infTmpPtr= infPtr;
53 for(;nInfTmp>0;nInfTmp--, infTmpPtr++)
55 uint index= *infTmpPtr;
56 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
57 CVector *srcVertex= srcVertexPtr + index;
58 CVector *srcNormal= srcNormalPtr + index;
60 _mm_prefetch((const char *)(void *)srcSkin, _MM_HINT_T1);
61 _mm_prefetch((const char *)(void *)srcVertex, _MM_HINT_T1);
62 _mm_prefetch((const char *)(void *)srcNormal, _MM_HINT_T1);
65 #elif defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
67 uint nInfTmp= nInf;
68 uint32 *infTmpPtr= infPtr;
69 for(;nInfTmp>0;nInfTmp--, infTmpPtr++)
71 uint index= *infTmpPtr;
72 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
73 CVector *srcVertex= srcVertexPtr + index;
74 CVector *srcNormal= srcNormalPtr + index;
76 __asm
78 mov eax, srcSkin
79 mov ebx, srcVertex
80 mov ecx, srcNormal
81 mov edx, [eax]
82 mov edx, [ebx]
83 mov edx, [ecx]
88 #endif
90 // Process vertices.
91 switch(numMatrixes)
93 //=========
94 case 0:
95 // Special case for Vertices influenced by one matrix. Just copy result of mul.
96 // for all InfluencedVertices only.
97 for(;nInf>0;nInf--, infPtr++)
99 uint index= *infPtr;
100 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
101 CVector *srcVertex= srcVertexPtr + index;
102 CVector *srcNormal= srcNormalPtr + index;
103 uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
104 CVector *dstVertex= (CVector*)(dstVertexVB);
105 CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
108 // Vertex.
109 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex);
110 // Normal.
111 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, *dstNormal);
113 break;
115 //=========
116 case 1:
117 // for all InfluencedVertices only.
118 for(;nInf>0;nInf--, infPtr++)
120 uint index= *infPtr;
121 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
122 CVector *srcVertex= srcVertexPtr + index;
123 CVector *srcNormal= srcNormalPtr + index;
124 uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
125 CVector *dstVertex= (CVector*)(dstVertexVB);
126 CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
129 // Vertex.
130 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
131 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
132 // Normal.
133 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
134 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
136 break;
138 //=========
139 case 2:
140 // for all InfluencedVertices only.
141 for(;nInf>0;nInf--, infPtr++)
143 uint index= *infPtr;
144 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
145 CVector *srcVertex= srcVertexPtr + index;
146 CVector *srcNormal= srcNormalPtr + index;
147 uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
148 CVector *dstVertex= (CVector*)(dstVertexVB);
149 CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
152 // Vertex.
153 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
154 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
155 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
156 // Normal.
157 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
158 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
159 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal);
161 break;
163 //=========
164 case 3:
165 // for all InfluencedVertices only.
166 for(;nInf>0;nInf--, infPtr++)
168 uint index= *infPtr;
169 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
170 CVector *srcVertex= srcVertexPtr + index;
171 CVector *srcNormal= srcNormalPtr + index;
172 uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
173 CVector *dstVertex= (CVector*)(dstVertexVB);
174 CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
177 // Vertex.
178 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
179 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
180 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
181 boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex);
182 // Normal.
183 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
184 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
185 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal);
186 boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], *dstNormal);
188 break;
195 // ***************************************************************************
196 static void applyArraySkinTangentSpaceT(uint numMatrixes, uint32 *infPtr, CMesh::CSkinWeight *srcSkinPtr,
197 CVector *srcVertexPtr, CVector *srcNormalPtr, CVector *tgSpacePtr, uint normalOff, uint tgSpaceOff,
198 uint8 *destVertexPtr, vector<CMatrix3x4> &boneMat3x4, uint vertexSize, uint nInf)
200 /* Prefetch all vertex/normal/tgSpace before, it is faster.
202 #ifdef NL_HAS_SSE2
204 uint nInfTmp= nInf;
205 uint32 *infTmpPtr= infPtr;
206 for(;nInfTmp>0;nInfTmp--, infTmpPtr++)
208 uint index= *infTmpPtr;
209 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
210 CVector *srcVertex= srcVertexPtr + index;
211 CVector *srcNormal= srcNormalPtr + index;
212 CVector *srcTgSpace= tgSpacePtr + index;
214 _mm_prefetch((const char *)(void *)srcSkin, _MM_HINT_T1);
215 _mm_prefetch((const char *)(void *)srcVertex, _MM_HINT_T1);
216 _mm_prefetch((const char *)(void *)srcNormal, _MM_HINT_T1);
217 _mm_prefetch((const char *)(void *)srcTgSpace, _MM_HINT_T1);
220 #elif defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
222 uint nInfTmp= nInf;
223 uint32 *infTmpPtr= infPtr;
224 for(;nInfTmp>0;nInfTmp--, infTmpPtr++)
226 uint index= *infTmpPtr;
227 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
228 CVector *srcVertex= srcVertexPtr + index;
229 CVector *srcNormal= srcNormalPtr + index;
230 CVector *srcTgSpace= tgSpacePtr + index;
232 __asm
234 mov eax, srcSkin
235 mov ebx, srcVertex
236 mov ecx, srcNormal
237 mov esi, srcTgSpace
238 mov edx, [eax]
239 mov edx, [ebx]
240 mov edx, [ecx]
241 mov edx, [esi]
246 #endif
248 // Process vertices.
249 switch(numMatrixes)
251 //=========
252 case 0:
253 // Special case for Vertices influenced by one matrix. Just copy result of mul.
254 // for all InfluencedVertices only.
255 for(;nInf>0;nInf--, infPtr++)
257 uint index= *infPtr;
258 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
259 CVector *srcVertex= srcVertexPtr + index;
260 CVector *srcNormal= srcNormalPtr + index;
261 CVector *srcTgSpace= tgSpacePtr + index;
263 uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
264 CVector *dstVertex= (CVector*)(dstVertexVB);
265 CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
266 CVector *dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff);
270 // Vertex.
271 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, *dstVertex);
272 // Normal.
273 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, *dstNormal);
274 // Tg space
275 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, *dstTgSpace);
278 break;
280 //=========
281 case 1:
282 // for all InfluencedVertices only.
283 for(;nInf>0;nInf--, infPtr++)
285 uint index= *infPtr;
286 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
287 CVector *srcVertex= srcVertexPtr + index;
288 CVector *srcNormal= srcNormalPtr + index;
289 CVector *srcTgSpace= tgSpacePtr + index;
291 uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
292 CVector *dstVertex= (CVector*)(dstVertexVB);
293 CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
294 CVector *dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff);
296 // Vertex.
297 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
298 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
299 // Normal.
300 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
301 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
302 // Tg space
303 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace);
304 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace);
306 break;
308 //=========
309 case 2:
310 // for all InfluencedVertices only.
311 for(;nInf>0;nInf--, infPtr++)
313 uint index= *infPtr;
314 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
315 CVector *srcVertex= srcVertexPtr + index;
316 CVector *srcNormal= srcNormalPtr + index;
317 CVector *srcTgSpace= tgSpacePtr + index;
319 uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
320 CVector *dstVertex= (CVector*)(dstVertexVB);
321 CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
322 CVector *dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff);
324 // Vertex.
325 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
326 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
327 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
328 // Normal.
329 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
330 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
331 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal);
332 // Tg space
333 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace);
334 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace);
335 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], *dstTgSpace);
337 break;
339 //=========
340 case 3:
341 // for all InfluencedVertices only.
342 for(;nInf>0;nInf--, infPtr++)
344 uint index= *infPtr;
345 CMesh::CSkinWeight *srcSkin= srcSkinPtr + index;
346 CVector *srcVertex= srcVertexPtr + index;
347 CVector *srcNormal= srcNormalPtr + index;
348 CVector *srcTgSpace= tgSpacePtr + index;
350 uint8 *dstVertexVB= destVertexPtr + index * vertexSize;
351 CVector *dstVertex= (CVector*)(dstVertexVB);
352 CVector *dstNormal= (CVector*)(dstVertexVB + normalOff);
353 CVector *dstTgSpace= (CVector*)(dstVertexVB + tgSpaceOff);
355 // Vertex.
356 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetPoint( *srcVertex, srcSkin->Weights[0], *dstVertex);
357 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddPoint( *srcVertex, srcSkin->Weights[1], *dstVertex);
358 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddPoint( *srcVertex, srcSkin->Weights[2], *dstVertex);
359 boneMat3x4[ srcSkin->MatrixId[3] ].mulAddPoint( *srcVertex, srcSkin->Weights[3], *dstVertex);
360 // Normal.
361 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcNormal, srcSkin->Weights[0], *dstNormal);
362 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcNormal, srcSkin->Weights[1], *dstNormal);
363 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcNormal, srcSkin->Weights[2], *dstNormal);
364 boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcNormal, srcSkin->Weights[3], *dstNormal);
365 // Tg space
366 boneMat3x4[ srcSkin->MatrixId[0] ].mulSetVector( *srcTgSpace, srcSkin->Weights[0], *dstTgSpace);
367 boneMat3x4[ srcSkin->MatrixId[1] ].mulAddVector( *srcTgSpace, srcSkin->Weights[1], *dstTgSpace);
368 boneMat3x4[ srcSkin->MatrixId[2] ].mulAddVector( *srcTgSpace, srcSkin->Weights[2], *dstTgSpace);
369 boneMat3x4[ srcSkin->MatrixId[3] ].mulAddVector( *srcTgSpace, srcSkin->Weights[3], *dstTgSpace);
371 break;
379 // ***************************************************************************
380 // ***************************************************************************
381 // ApplySkin methods.
382 // ***************************************************************************
383 // ***************************************************************************
386 // ***************************************************************************
387 void CMeshMRMGeom::applySkinWithNormal(CLod &lod, const CSkeletonModel *skeleton)
389 nlassert(_Skinned);
390 if(_SkinWeights.empty())
391 return;
393 // get vertexPtr / normalOff.
394 //===========================
395 CVertexBufferReadWrite vba;
396 _VBufferFinal.lock (vba);
397 uint8 *destVertexPtr= (uint8*)vba.getVertexCoordPointer();
398 uint flags= _VBufferFinal.getVertexFormat();
399 sint32 vertexSize= _VBufferFinal.getVertexSize();
400 // must have XYZ and Normal.
401 nlassert((flags & CVertexBuffer::PositionFlag)
402 && (flags & CVertexBuffer::NormalFlag)
406 // Compute offset of each component of the VB.
407 sint32 normalOff;
408 normalOff= _VBufferFinal.getNormalOff();
411 // compute src array.
412 CMesh::CSkinWeight *srcSkinPtr;
413 CVector *srcVertexPtr;
414 CVector *srcNormalPtr= NULL;
415 srcSkinPtr= &_SkinWeights[0];
416 srcVertexPtr= &_OriginalSkinVertices[0];
417 srcNormalPtr= &(_OriginalSkinNormals[0]);
421 // Compute useful Matrix for this lod.
422 //===========================
423 // Those arrays map the array of bones in skeleton.
424 static vector<CMatrix3x4> boneMat3x4;
425 computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);
428 // apply skinning.
429 //===========================
430 // assert, code below is written especially for 4 per vertex.
431 nlassert(NL3D_MESH_SKINNING_MAX_MATRIX==4);
432 for(uint i=0;i<NL3D_MESH_SKINNING_MAX_MATRIX;i++)
434 uint nInf= (uint)lod.InfluencedVertices[i].size();
435 if( nInf==0 )
436 continue;
437 uint32 *infPtr= &(lod.InfluencedVertices[i][0]);
439 // TestYoyo
440 /*extern uint TESTYOYO_NumStdSkinVertices;
441 TESTYOYO_NumStdSkinVertices+= nInf;*/
443 // apply the skin to the vertices
444 applyArraySkinNormalT(i, infPtr, srcSkinPtr, srcVertexPtr, srcNormalPtr,
445 normalOff, destVertexPtr,
446 boneMat3x4, vertexSize, nInf);
451 // ***************************************************************************
452 void CMeshMRMGeom::applySkinWithTangentSpace(CLod &lod, const CSkeletonModel *skeleton,
453 uint tangentSpaceTexCoord)
455 nlassert(_Skinned);
456 if(_SkinWeights.empty())
457 return;
459 // get vertexPtr / normalOff / tangent space offset.
460 //===========================
461 CVertexBufferReadWrite vba;
462 _VBufferFinal.lock (vba);
463 uint8 *destVertexPtr= (uint8*)vba.getVertexCoordPointer();
464 uint flags= _VBufferFinal.getVertexFormat();
465 sint32 vertexSize= _VBufferFinal.getVertexSize();
466 // must have XYZ.
467 // if there's tangent space, there also must be a normal there.
468 nlassert((flags & CVertexBuffer::PositionFlag)
469 && (flags & CVertexBuffer::NormalFlag)
473 // Compute offset of each component of the VB.
474 sint32 normalOff;
475 normalOff= _VBufferFinal.getNormalOff();
477 // tg space offset
478 sint32 tgSpaceOff = _VBufferFinal.getTexCoordOff((uint8) tangentSpaceTexCoord);
480 // compute src array.
481 CMesh::CSkinWeight *srcSkinPtr;
482 CVector *srcVertexPtr;
483 CVector *srcNormalPtr;
484 CVector *tgSpacePtr;
486 srcSkinPtr= &_SkinWeights[0];
487 srcVertexPtr= &_OriginalSkinVertices[0];
488 srcNormalPtr= &(_OriginalSkinNormals[0]);
489 tgSpacePtr = &(_OriginalTGSpace[0]);
493 // Compute useful Matrix for this lod.
494 //===========================
495 // Those arrays map the array of bones in skeleton.
496 static vector<CMatrix3x4> boneMat3x4;
497 computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);
500 // apply skinning (with tangent space added)
501 //===========================
502 // assert, code below is written especially for 4 per vertex.
503 nlassert(NL3D_MESH_SKINNING_MAX_MATRIX==4);
504 for(uint i=0;i<NL3D_MESH_SKINNING_MAX_MATRIX;i++)
506 uint nInf= (uint)lod.InfluencedVertices[i].size();
507 if( nInf==0 )
508 continue;
509 uint32 *infPtr= &(lod.InfluencedVertices[i][0]);
511 // apply the skin to the vertices
512 applyArraySkinTangentSpaceT(i, infPtr, srcSkinPtr, srcVertexPtr, srcNormalPtr, tgSpacePtr,
513 normalOff, tgSpaceOff, destVertexPtr,
514 boneMat3x4, vertexSize, nInf);
520 // ***************************************************************************
521 // ***************************************************************************
522 // Raw "Vertex/Normal only" ApplySkin methods.
523 // ***************************************************************************
524 // ***************************************************************************
527 #define NL3D_RAWSKIN_NORMAL_OFF 12
528 #define NL3D_RAWSKIN_UV_OFF 24
529 #define NL3D_RAWSKIN_VERTEX_SIZE 32
532 /* Speed Feature test.
533 Don't use precaching for now, cause its seems to be slower on some configs (P4-2.4Ghz),
534 but maybe faster on other (P3-800)
535 On a P4-2.4Ghz, for 40000 vertices skinned, both no precaching and asm
536 saves 27% of execution time in the applyRawSkinNormal*() loop (ie 1 ms)
538 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
539 //#define NL3D_RAWSKIN_PRECACHE
540 #define NL3D_RAWSKIN_ASM
541 #endif
544 // ***************************************************************************
545 void CMeshMRMGeom::applyArrayRawSkinNormal1(CRawVertexNormalSkin1 *src, uint8 *destVertexPtr,
546 CMatrix3x4 *boneMat3x4, uint nInf)
548 // must write contigously in AGP, and ASM is hardcoded...
549 nlctassert(NL3D_RAWSKIN_NORMAL_OFF==12);
550 nlctassert(NL3D_RAWSKIN_UV_OFF==24);
552 /*extern uint TESTYOYO_NumRawSkinVertices1;
553 TESTYOYO_NumRawSkinVertices1+= nInf;
554 H_AUTO( TestYoyo_RawSkin1 );*/
556 #ifdef NL3D_RAWSKIN_PRECACHE
557 for(;nInf>0;)
559 // number of vertices to process for this block.
560 uint nBlockInf= min(NumCacheVertexNormal1, nInf);
561 // next block.
562 nInf-= nBlockInf;
564 // cache the data in L1 cache.
565 CFastMem::precache(src, nBlockInf * sizeof(CRawVertexNormalSkin1));
566 #else
568 uint nBlockInf= nInf;
569 #endif
572 #ifndef NL3D_RAWSKIN_ASM
573 // for all InfluencedVertices only.
574 for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
576 CVector *dstVertex= (CVector*)(destVertexPtr);
577 CVector *dstNormal= (CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF);
579 // For 1 matrix, can write directly to AGP (if destVertexPtr is AGP...)
580 // Vertex.
581 boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) );
582 // Normal.
583 boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
584 // UV copy.
585 *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
587 #else
588 // ASM harcoded for 36
589 nlctassert(sizeof(CRawVertexNormalSkin1)==36);
591 /* 116 cycles / loop typical
592 58 cycles / loop in theory (no memory problem)
594 __asm
596 mov ecx, nBlockInf
597 mov esi, src
598 mov edi, destVertexPtr
599 mov edx, boneMat3x4
600 theLoop:
601 // Vertex.
602 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) );
604 // eax= matrix
605 mov eax, [esi]src.MatrixId // uop: 0/1
606 lea eax, [eax*2+eax]
607 shl eax, 4
608 add eax, edx // uop: 1/0
610 // load x y z
611 fld [esi]src.Vertex.Pos.x // uop: 0/1
612 fld [esi]src.Vertex.Pos.y // uop: 0/1
613 fld [esi]src.Vertex.Pos.z // uop: 0/1
614 // vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
615 fld [eax]CMatrix3x4.a11 // uop: 0/1
616 fmul st, st(3) // uop: 1/0 (5)
617 fld [eax]CMatrix3x4.a12 // uop: 0/1
618 fmul st, st(3) // uop: 1/0 (5)
619 faddp st(1), st // uop: 1/0 (3)
620 fld [eax]CMatrix3x4.a13 // uop: 0/1
621 fmul st, st(2) // uop: 1/0 (5)
622 faddp st(1), st // uop: 1/0 (3)
623 fld [eax]CMatrix3x4.a14 // uop: 0/1
624 faddp st(1), st // uop: 1/0 (3)
625 fstp dword ptr[edi] // uop: 0/0/1/1
626 // vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
627 fld [eax]CMatrix3x4.a21
628 fmul st, st(3)
629 fld [eax]CMatrix3x4.a22
630 fmul st, st(3)
631 faddp st(1), st
632 fld [eax]CMatrix3x4.a23
633 fmul st, st(2)
634 faddp st(1), st
635 fld [eax]CMatrix3x4.a24
636 faddp st(1), st
637 fstp dword ptr[edi+4]
638 // vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
639 fld [eax]CMatrix3x4.a31
640 fmul st, st(3)
641 fld [eax]CMatrix3x4.a32
642 fmul st, st(3)
643 faddp st(1), st
644 fld [eax]CMatrix3x4.a33
645 fmul st, st(2)
646 faddp st(1), st
647 fld [eax]CMatrix3x4.a34
648 faddp st(1), st
649 fstp dword ptr[edi+8]
650 // free x y z
651 fstp st // uop: 1/0
652 fstp st // uop: 1/0
653 fstp st // uop: 1/0
656 // Normal
657 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
659 // load x y z
660 fld [esi]src.Vertex.Normal.x
661 fld [esi]src.Vertex.Normal.y
662 fld [esi]src.Vertex.Normal.z
663 // vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
664 fld [eax]CMatrix3x4.a11 // uop: 0/1
665 fmul st, st(3) // uop: 1/0 (5)
666 fld [eax]CMatrix3x4.a12 // uop: 0/1
667 fmul st, st(3) // uop: 1/0 (5)
668 faddp st(1), st // uop: 1/0 (3)
669 fld [eax]CMatrix3x4.a13 // uop: 0/1
670 fmul st, st(2) // uop: 1/0 (5)
671 faddp st(1), st // uop: 1/0 (3)
672 fstp dword ptr[edi+12] // uop: 0/0/1/1
673 // vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
674 fld [eax]CMatrix3x4.a21
675 fmul st, st(3)
676 fld [eax]CMatrix3x4.a22
677 fmul st, st(3)
678 faddp st(1), st
679 fld [eax]CMatrix3x4.a23
680 fmul st, st(2)
681 faddp st(1), st
682 fstp dword ptr[edi+16]
683 // vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
684 fld [eax]CMatrix3x4.a31
685 fmul st, st(3)
686 fld [eax]CMatrix3x4.a32
687 fmul st, st(3)
688 faddp st(1), st
689 fld [eax]CMatrix3x4.a33
690 fmul st, st(2)
691 faddp st(1), st
692 fstp dword ptr[edi+20]
693 // free x y z
694 fstp st
695 fstp st
696 fstp st
699 // UV copy.
700 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
701 mov eax, [esi]src.Vertex.UV.U // uop: 0/1
702 mov dword ptr[edi+24], eax // uop: 0/0/1/1
703 mov eax, [esi]src.Vertex.UV.V // uop: 0/1
704 mov dword ptr[edi+28], eax // uop: 0/0/1/1
707 // **** next
708 add esi, 36 // uop: 1/0
709 add edi, NL3D_RAWSKIN_VERTEX_SIZE // uop: 1/0
710 dec ecx // uop: 1/0
711 jnz theLoop // uop: 1/1 (p1)
713 mov nBlockInf, ecx
714 mov src, esi
715 mov destVertexPtr, edi
717 #endif
723 // ***************************************************************************
724 void CMeshMRMGeom::applyArrayRawSkinNormal2(CRawVertexNormalSkin2 *src, uint8 *destVertexPtr,
725 CMatrix3x4 *boneMat3x4, uint nInf)
727 // must write contigously in AGP, and ASM is hardcoded...
728 nlctassert(NL3D_RAWSKIN_NORMAL_OFF==12);
729 nlctassert(NL3D_RAWSKIN_UV_OFF==24);
731 /*extern uint TESTYOYO_NumRawSkinVertices2;
732 TESTYOYO_NumRawSkinVertices2+= nInf;
733 H_AUTO( TestYoyo_RawSkin2 );*/
735 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
736 CVector tmpVert;
738 #ifdef NL3D_RAWSKIN_PRECACHE
739 for(;nInf>0;)
741 // number of vertices to process for this block.
742 uint nBlockInf= min(NumCacheVertexNormal2, nInf);
743 // next block.
744 nInf-= nBlockInf;
746 // cache the data in L1 cache.
747 CFastMem::precache(src, nBlockInf * sizeof(CRawVertexNormalSkin2));
748 #else
750 uint nBlockInf= nInf;
751 #endif
754 #ifndef NL3D_RAWSKIN_ASM
755 // for all InfluencedVertices only.
756 for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
758 // Vertex.
759 boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, src->Weights[0], tmpVert);
760 boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert);
761 *(CVector*)(destVertexPtr)= tmpVert;
762 // Normal.
763 boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert);
764 boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert);
765 *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
766 // UV copy.
767 *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
769 #else
770 // ASM harcoded for 48
771 nlctassert(sizeof(CRawVertexNormalSkin2)==48);
773 /* 154 cycles / loop typical
774 124 cycles / loop in theory (no memory problem)
776 __asm
778 mov ecx, nBlockInf
779 mov esi, src
780 mov edi, destVertexPtr
781 mov edx, boneMat3x4
782 theLoop:
783 // Vertex.
784 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) );
786 // eax= matrix0
787 mov eax, [esi+0]src.MatrixId // uop: 0/1
788 lea eax, [eax*2+eax]
789 shl eax, 4
790 add eax, edx // uop: 1/0
791 // ebx= matrix1
792 mov ebx, [esi+4]src.MatrixId // uop: 0/1
793 lea ebx, [ebx*2+ebx]
794 shl ebx, 4
795 add ebx, edx // uop: 1/0
797 // load x y z
798 fld [esi]src.Vertex.Pos.x // uop: 0/1
799 fld [esi]src.Vertex.Pos.y // uop: 0/1
800 fld [esi]src.Vertex.Pos.z // uop: 0/1
802 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
803 // 1st Matrix
804 fld [eax]CMatrix3x4.a11 // uop: 0/1
805 fmul st, st(3) // uop: 1/0 (5)
806 fld [eax]CMatrix3x4.a12 // uop: 0/1
807 fmul st, st(3) // uop: 1/0 (5)
808 faddp st(1), st // uop: 1/0 (3)
809 fld [eax]CMatrix3x4.a13 // uop: 0/1
810 fmul st, st(2) // uop: 1/0 (5)
811 faddp st(1), st // uop: 1/0 (3)
812 fld [eax]CMatrix3x4.a14 // uop: 0/1
813 faddp st(1), st // uop: 1/0 (3)
814 // mul by scale
815 fmul [esi+0]src.Weights
817 // 2nd matrix
818 fld [ebx]CMatrix3x4.a11
819 fmul st, st(4)
820 fld [ebx]CMatrix3x4.a12
821 fmul st, st(4)
822 faddp st(1), st
823 fld [ebx]CMatrix3x4.a13
824 fmul st, st(3)
825 faddp st(1), st
826 fld [ebx]CMatrix3x4.a14
827 faddp st(1), st
828 // mul by scale, and append
829 fmul [esi+4]src.Weights
830 faddp st(1), st
832 // store
833 fstp dword ptr[edi] // uop: 0/0/1/1
835 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
836 fld [eax]CMatrix3x4.a21
837 fmul st, st(3)
838 fld [eax]CMatrix3x4.a22
839 fmul st, st(3)
840 faddp st(1), st
841 fld [eax]CMatrix3x4.a23
842 fmul st, st(2)
843 faddp st(1), st
844 fld [eax]CMatrix3x4.a24
845 faddp st(1), st
846 // mul by scale
847 fmul [esi+0]src.Weights
849 // 2nd matrix
850 fld [ebx]CMatrix3x4.a21
851 fmul st, st(4)
852 fld [ebx]CMatrix3x4.a22
853 fmul st, st(4)
854 faddp st(1), st
855 fld [ebx]CMatrix3x4.a23
856 fmul st, st(3)
857 faddp st(1), st
858 fld [ebx]CMatrix3x4.a24
859 faddp st(1), st
860 // mul by scale, and append
861 fmul [esi+4]src.Weights
862 faddp st(1), st
864 // store
865 fstp dword ptr[edi+4]
867 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
868 fld [eax]CMatrix3x4.a31
869 fmul st, st(3)
870 fld [eax]CMatrix3x4.a32
871 fmul st, st(3)
872 faddp st(1), st
873 fld [eax]CMatrix3x4.a33
874 fmul st, st(2)
875 faddp st(1), st
876 fld [eax]CMatrix3x4.a34
877 faddp st(1), st
878 // mul by scale
879 fmul [esi+0]src.Weights
881 // 2nd matrix
882 fld [ebx]CMatrix3x4.a31
883 fmul st, st(4)
884 fld [ebx]CMatrix3x4.a32
885 fmul st, st(4)
886 faddp st(1), st
887 fld [ebx]CMatrix3x4.a33
888 fmul st, st(3)
889 faddp st(1), st
890 fld [ebx]CMatrix3x4.a34
891 faddp st(1), st
892 // mul by scale, and append
893 fmul [esi+4]src.Weights
894 faddp st(1), st
896 // store
897 fstp dword ptr[edi+8]
899 // free x y z
900 fstp st // uop: 1/0
901 fstp st // uop: 1/0
902 fstp st // uop: 1/0
905 // Normal
906 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
908 // load x y z
909 fld [esi]src.Vertex.Normal.x
910 fld [esi]src.Vertex.Normal.y
911 fld [esi]src.Vertex.Normal.z
913 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
914 fld [eax]CMatrix3x4.a11 // uop: 0/1
915 fmul st, st(3) // uop: 1/0 (5)
916 fld [eax]CMatrix3x4.a12 // uop: 0/1
917 fmul st, st(3) // uop: 1/0 (5)
918 faddp st(1), st // uop: 1/0 (3)
919 fld [eax]CMatrix3x4.a13 // uop: 0/1
920 fmul st, st(2) // uop: 1/0 (5)
921 faddp st(1), st // uop: 1/0 (3)
922 // mul by scale
923 fmul [esi+0]src.Weights
925 // 2nd matrix
926 fld [ebx]CMatrix3x4.a11
927 fmul st, st(4)
928 fld [ebx]CMatrix3x4.a12
929 fmul st, st(4)
930 faddp st(1), st
931 fld [ebx]CMatrix3x4.a13
932 fmul st, st(3)
933 faddp st(1), st
934 // mul by scale, and append
935 fmul [esi+4]src.Weights
936 faddp st(1), st
938 // store
939 fstp dword ptr[edi+12] // uop: 0/0/1/1
941 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
942 fld [eax]CMatrix3x4.a21
943 fmul st, st(3)
944 fld [eax]CMatrix3x4.a22
945 fmul st, st(3)
946 faddp st(1), st
947 fld [eax]CMatrix3x4.a23
948 fmul st, st(2)
949 faddp st(1), st
950 // mul by scale
951 fmul [esi+0]src.Weights
953 // 2nd matrix
954 fld [ebx]CMatrix3x4.a21
955 fmul st, st(4)
956 fld [ebx]CMatrix3x4.a22
957 fmul st, st(4)
958 faddp st(1), st
959 fld [ebx]CMatrix3x4.a23
960 fmul st, st(3)
961 faddp st(1), st
962 // mul by scale, and append
963 fmul [esi+4]src.Weights
964 faddp st(1), st
966 // store
967 fstp dword ptr[edi+16]
969 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
970 fld [eax]CMatrix3x4.a31
971 fmul st, st(3)
972 fld [eax]CMatrix3x4.a32
973 fmul st, st(3)
974 faddp st(1), st
975 fld [eax]CMatrix3x4.a33
976 fmul st, st(2)
977 faddp st(1), st
978 // mul by scale
979 fmul [esi+0]src.Weights
981 // 2nd matrix
982 fld [ebx]CMatrix3x4.a31
983 fmul st, st(4)
984 fld [ebx]CMatrix3x4.a32
985 fmul st, st(4)
986 faddp st(1), st
987 fld [ebx]CMatrix3x4.a33
988 fmul st, st(3)
989 faddp st(1), st
990 // mul by scale, and append
991 fmul [esi+4]src.Weights
992 faddp st(1), st
994 // store
995 fstp dword ptr[edi+20]
997 // free x y z
998 fstp st
999 fstp st
1000 fstp st
1003 // UV copy.
1004 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
1005 mov eax, [esi]src.Vertex.UV.U // uop: 0/1
1006 mov dword ptr[edi+24], eax // uop: 0/0/1/1
1007 mov eax, [esi]src.Vertex.UV.V // uop: 0/1
1008 mov dword ptr[edi+28], eax // uop: 0/0/1/1
1011 // **** next
1012 add esi, 48 // uop: 1/0
1013 add edi, NL3D_RAWSKIN_VERTEX_SIZE // uop: 1/0
1014 dec ecx // uop: 1/0
1015 jnz theLoop // uop: 1/1 (p1)
1017 mov nBlockInf, ecx
1018 mov src, esi
1019 mov destVertexPtr, edi
1021 #endif
1026 // ***************************************************************************
1027 void CMeshMRMGeom::applyArrayRawSkinNormal3(CRawVertexNormalSkin3 *src, uint8 *destVertexPtr,
1028 CMatrix3x4 *boneMat3x4, uint nInf)
1030 // must write contigously in AGP, and ASM is hardcoded...
1031 nlctassert(NL3D_RAWSKIN_NORMAL_OFF==12);
1032 nlctassert(NL3D_RAWSKIN_UV_OFF==24);
1034 /*extern uint TESTYOYO_NumRawSkinVertices3;
1035 TESTYOYO_NumRawSkinVertices3+= nInf;
1036 H_AUTO( TestYoyo_RawSkin3 );*/
1038 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
1039 CVector tmpVert;
1041 #ifdef NL3D_RAWSKIN_PRECACHE
1042 for(;nInf>0;)
1044 // number of vertices to process for this block.
1045 uint nBlockInf= min(NumCacheVertexNormal3, nInf);
1046 // next block.
1047 nInf-= nBlockInf;
1049 // cache the data in L1 cache.
1050 CFastMem::precache(src, nBlockInf * sizeof(CRawVertexNormalSkin3));
1051 #else
1053 uint nBlockInf= nInf;
1054 #endif
1057 #ifndef NL3D_RAWSKIN_ASM
1058 // for all InfluencedVertices only.
1059 for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
1061 // Vertex.
1062 boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, src->Weights[0], tmpVert);
1063 boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert);
1064 boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex.Pos, src->Weights[2], tmpVert);
1065 *(CVector*)(destVertexPtr)= tmpVert;
1066 // Normal.
1067 boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert);
1068 boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert);
1069 boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Vertex.Normal, src->Weights[2], tmpVert);
1070 *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
1071 // UV copy.
1072 *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
1074 #else
1075 // ASM harcoded for 56
1076 nlctassert(sizeof(CRawVertexNormalSkin3)==56);
1079 /* 226 cycles / loop typical
1080 192 cycles / loop in theory (no memory problem)
1081 148 optimal
1083 __asm
1085 mov ecx, nBlockInf
1086 mov esi, src
1087 mov edi, destVertexPtr
1088 theLoop:
1089 // Vertex.
1090 // **** boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, *(CVector*)(destVertexPtr) );
1092 // eax= matrix0
1093 mov eax, [esi+0]src.MatrixId // uop: 0/1
1094 lea eax, [eax*2+eax]
1095 shl eax, 4
1096 add eax, boneMat3x4 // uop: 1/0
1097 // ebx= matrix1
1098 mov ebx, [esi+4]src.MatrixId // uop: 0/1
1099 lea ebx, [ebx*2+ebx]
1100 shl ebx, 4
1101 add ebx, boneMat3x4 // uop: 1/0
1102 // edx= matrix2
1103 mov edx, [esi+8]src.MatrixId // uop: 0/1
1104 lea edx, [edx*2+edx]
1105 shl edx, 4
1106 add edx, boneMat3x4 // uop: 1/0
1108 // load x y z
1109 fld [esi]src.Vertex.Pos.x // uop: 0/1
1110 fld [esi]src.Vertex.Pos.y // uop: 0/1
1111 fld [esi]src.Vertex.Pos.z // uop: 0/1
1113 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
1114 // 1st Matrix
1115 fld [eax]CMatrix3x4.a11 // uop: 0/1
1116 fmul st, st(3) // uop: 1/0 (5)
1117 fld [eax]CMatrix3x4.a12 // uop: 0/1
1118 fmul st, st(3) // uop: 1/0 (5)
1119 faddp st(1), st // uop: 1/0 (3)
1120 fld [eax]CMatrix3x4.a13 // uop: 0/1
1121 fmul st, st(2) // uop: 1/0 (5)
1122 faddp st(1), st // uop: 1/0 (3)
1123 fld [eax]CMatrix3x4.a14 // uop: 0/1
1124 faddp st(1), st // uop: 1/0 (3)
1125 // mul by scale
1126 fmul [esi+0]src.Weights
1128 // 2nd matrix
1129 fld [ebx]CMatrix3x4.a11
1130 fmul st, st(4)
1131 fld [ebx]CMatrix3x4.a12
1132 fmul st, st(4)
1133 faddp st(1), st
1134 fld [ebx]CMatrix3x4.a13
1135 fmul st, st(3)
1136 faddp st(1), st
1137 fld [ebx]CMatrix3x4.a14
1138 faddp st(1), st
1139 // mul by scale, and append
1140 fmul [esi+4]src.Weights
1141 faddp st(1), st
1143 // 3rd matrix
1144 fld [edx]CMatrix3x4.a11
1145 fmul st, st(4)
1146 fld [edx]CMatrix3x4.a12
1147 fmul st, st(4)
1148 faddp st(1), st
1149 fld [edx]CMatrix3x4.a13
1150 fmul st, st(3)
1151 faddp st(1), st
1152 fld [edx]CMatrix3x4.a14
1153 faddp st(1), st
1154 // mul by scale, and append
1155 fmul [esi+8]src.Weights
1156 faddp st(1), st
1158 // store
1159 fstp dword ptr[edi] // uop: 0/0/1/1
1161 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
1162 fld [eax]CMatrix3x4.a21
1163 fmul st, st(3)
1164 fld [eax]CMatrix3x4.a22
1165 fmul st, st(3)
1166 faddp st(1), st
1167 fld [eax]CMatrix3x4.a23
1168 fmul st, st(2)
1169 faddp st(1), st
1170 fld [eax]CMatrix3x4.a24
1171 faddp st(1), st
1172 // mul by scale
1173 fmul [esi+0]src.Weights
1175 // 2nd matrix
1176 fld [ebx]CMatrix3x4.a21
1177 fmul st, st(4)
1178 fld [ebx]CMatrix3x4.a22
1179 fmul st, st(4)
1180 faddp st(1), st
1181 fld [ebx]CMatrix3x4.a23
1182 fmul st, st(3)
1183 faddp st(1), st
1184 fld [ebx]CMatrix3x4.a24
1185 faddp st(1), st
1186 // mul by scale, and append
1187 fmul [esi+4]src.Weights
1188 faddp st(1), st
1190 // 3rd matrix
1191 fld [edx]CMatrix3x4.a21
1192 fmul st, st(4)
1193 fld [edx]CMatrix3x4.a22
1194 fmul st, st(4)
1195 faddp st(1), st
1196 fld [edx]CMatrix3x4.a23
1197 fmul st, st(3)
1198 faddp st(1), st
1199 fld [edx]CMatrix3x4.a24
1200 faddp st(1), st
1201 // mul by scale, and append
1202 fmul [esi+8]src.Weights
1203 faddp st(1), st
1205 // store
1206 fstp dword ptr[edi+4]
1208 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
1209 fld [eax]CMatrix3x4.a31
1210 fmul st, st(3)
1211 fld [eax]CMatrix3x4.a32
1212 fmul st, st(3)
1213 faddp st(1), st
1214 fld [eax]CMatrix3x4.a33
1215 fmul st, st(2)
1216 faddp st(1), st
1217 fld [eax]CMatrix3x4.a34
1218 faddp st(1), st
1219 // mul by scale
1220 fmul [esi+0]src.Weights
1222 // 2nd matrix
1223 fld [ebx]CMatrix3x4.a31
1224 fmul st, st(4)
1225 fld [ebx]CMatrix3x4.a32
1226 fmul st, st(4)
1227 faddp st(1), st
1228 fld [ebx]CMatrix3x4.a33
1229 fmul st, st(3)
1230 faddp st(1), st
1231 fld [ebx]CMatrix3x4.a34
1232 faddp st(1), st
1233 // mul by scale, and append
1234 fmul [esi+4]src.Weights
1235 faddp st(1), st
1237 // 3rd matrix
1238 fld [edx]CMatrix3x4.a31
1239 fmul st, st(4)
1240 fld [edx]CMatrix3x4.a32
1241 fmul st, st(4)
1242 faddp st(1), st
1243 fld [edx]CMatrix3x4.a33
1244 fmul st, st(3)
1245 faddp st(1), st
1246 fld [edx]CMatrix3x4.a34
1247 faddp st(1), st
1248 // mul by scale, and append
1249 fmul [esi+8]src.Weights
1250 faddp st(1), st
1252 // store
1253 fstp dword ptr[edi+8]
1255 // free x y z
1256 fstp st // uop: 1/0
1257 fstp st // uop: 1/0
1258 fstp st // uop: 1/0
1261 // Normal
1262 // **** boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF) );
1264 // load x y z
1265 fld [esi]src.Vertex.Normal.x
1266 fld [esi]src.Vertex.Normal.y
1267 fld [esi]src.Vertex.Normal.z
1268 // **** vout.x= (a11*vin.x + a12*vin.y + a13*vin.z + a14);
1269 fld [eax]CMatrix3x4.a11 // uop: 0/1
1270 fmul st, st(3) // uop: 1/0 (5)
1271 fld [eax]CMatrix3x4.a12 // uop: 0/1
1272 fmul st, st(3) // uop: 1/0 (5)
1273 faddp st(1), st // uop: 1/0 (3)
1274 fld [eax]CMatrix3x4.a13 // uop: 0/1
1275 fmul st, st(2) // uop: 1/0 (5)
1276 faddp st(1), st // uop: 1/0 (3)
1277 // mul by scale
1278 fmul [esi+0]src.Weights
1280 // 2nd matrix
1281 fld [ebx]CMatrix3x4.a11
1282 fmul st, st(4)
1283 fld [ebx]CMatrix3x4.a12
1284 fmul st, st(4)
1285 faddp st(1), st
1286 fld [ebx]CMatrix3x4.a13
1287 fmul st, st(3)
1288 faddp st(1), st
1289 // mul by scale, and append
1290 fmul [esi+4]src.Weights
1291 faddp st(1), st
1293 // 3rd matrix
1294 fld [edx]CMatrix3x4.a11
1295 fmul st, st(4)
1296 fld [edx]CMatrix3x4.a12
1297 fmul st, st(4)
1298 faddp st(1), st
1299 fld [edx]CMatrix3x4.a13
1300 fmul st, st(3)
1301 faddp st(1), st
1302 // mul by scale, and append
1303 fmul [esi+8]src.Weights
1304 faddp st(1), st
1306 // store
1307 fstp dword ptr[edi+12] // uop: 0/0/1/1
1309 // **** vout.y= (a21*vin.x + a22*vin.y + a23*vin.z + a24);
1310 fld [eax]CMatrix3x4.a21
1311 fmul st, st(3)
1312 fld [eax]CMatrix3x4.a22
1313 fmul st, st(3)
1314 faddp st(1), st
1315 fld [eax]CMatrix3x4.a23
1316 fmul st, st(2)
1317 faddp st(1), st
1318 // mul by scale
1319 fmul [esi+0]src.Weights
1321 // 2nd matrix
1322 fld [ebx]CMatrix3x4.a21
1323 fmul st, st(4)
1324 fld [ebx]CMatrix3x4.a22
1325 fmul st, st(4)
1326 faddp st(1), st
1327 fld [ebx]CMatrix3x4.a23
1328 fmul st, st(3)
1329 faddp st(1), st
1330 // mul by scale, and append
1331 fmul [esi+4]src.Weights
1332 faddp st(1), st
1334 // 3rd matrix
1335 fld [edx]CMatrix3x4.a21
1336 fmul st, st(4)
1337 fld [edx]CMatrix3x4.a22
1338 fmul st, st(4)
1339 faddp st(1), st
1340 fld [edx]CMatrix3x4.a23
1341 fmul st, st(3)
1342 faddp st(1), st
1343 // mul by scale, and append
1344 fmul [esi+8]src.Weights
1345 faddp st(1), st
1347 // store
1348 fstp dword ptr[edi+16]
1350 // **** vout.z= (a31*vin.x + a32*vin.y + a33*vin.z + a34);
1351 fld [eax]CMatrix3x4.a31
1352 fmul st, st(3)
1353 fld [eax]CMatrix3x4.a32
1354 fmul st, st(3)
1355 faddp st(1), st
1356 fld [eax]CMatrix3x4.a33
1357 fmul st, st(2)
1358 faddp st(1), st
1359 // mul by scale
1360 fmul [esi+0]src.Weights
1362 // 2nd matrix
1363 fld [ebx]CMatrix3x4.a31
1364 fmul st, st(4)
1365 fld [ebx]CMatrix3x4.a32
1366 fmul st, st(4)
1367 faddp st(1), st
1368 fld [ebx]CMatrix3x4.a33
1369 fmul st, st(3)
1370 faddp st(1), st
1371 // mul by scale, and append
1372 fmul [esi+4]src.Weights
1373 faddp st(1), st
1375 // 3rd matrix
1376 fld [edx]CMatrix3x4.a31
1377 fmul st, st(4)
1378 fld [edx]CMatrix3x4.a32
1379 fmul st, st(4)
1380 faddp st(1), st
1381 fld [edx]CMatrix3x4.a33
1382 fmul st, st(3)
1383 faddp st(1), st
1384 // mul by scale, and append
1385 fmul [esi+8]src.Weights
1386 faddp st(1), st
1388 // store
1389 fstp dword ptr[edi+20]
1391 // free x y z
1392 fstp st
1393 fstp st
1394 fstp st
1397 // UV copy.
1398 // **** *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
1399 mov eax, [esi]src.Vertex.UV.U // uop: 0/1
1400 mov dword ptr[edi+24], eax // uop: 0/0/1/1
1401 mov eax, [esi]src.Vertex.UV.V // uop: 0/1
1402 mov dword ptr[edi+28], eax // uop: 0/0/1/1
1405 // **** next
1406 add esi, 56 // uop: 1/0
1407 add edi, NL3D_RAWSKIN_VERTEX_SIZE // uop: 1/0
1408 dec ecx // uop: 1/0
1409 jnz theLoop // uop: 1/1 (p1)
1411 mov nBlockInf, ecx
1412 mov src, esi
1413 mov destVertexPtr, edi
1415 #endif
1420 // ***************************************************************************
1421 void CMeshMRMGeom::applyArrayRawSkinNormal4(CRawVertexNormalSkin4 *src, uint8 *destVertexPtr,
1422 CMatrix3x4 *boneMat3x4, uint nInf)
1424 // must write contigously in AGP, and ASM is hardcoded...
1425 nlctassert(NL3D_RAWSKIN_NORMAL_OFF==12);
1426 nlctassert(NL3D_RAWSKIN_UV_OFF==24);
1428 /*extern uint TESTYOYO_NumRawSkinVertices4;
1429 TESTYOYO_NumRawSkinVertices4+= nInf;
1430 H_AUTO( TestYoyo_RawSkin4 );*/
1432 // Since VertexPtr may be a AGP Ram, MUST NOT read into it! (mulAdd*() do it!)
1433 CVector tmpVert;
1435 #ifdef NL3D_RAWSKIN_PRECACHE
1436 for(;nInf>0;)
1438 // number of vertices to process for this block.
1439 uint nBlockInf= min(NumCacheVertexNormal4, nInf);
1440 // next block.
1441 nInf-= nBlockInf;
1443 // cache the data in L1 cache.
1444 CFastMem::precache(src, nBlockInf * sizeof(CRawVertexNormalSkin4));
1445 #else
1447 uint nBlockInf= nInf;
1448 #endif
1450 // for all InfluencedVertices only.
1451 for(;nBlockInf>0;nBlockInf--, src++, destVertexPtr+=NL3D_RAWSKIN_VERTEX_SIZE)
1453 // Vertex.
1454 boneMat3x4[ src->MatrixId[0] ].mulSetPoint( src->Vertex.Pos, src->Weights[0], tmpVert);
1455 boneMat3x4[ src->MatrixId[1] ].mulAddPoint( src->Vertex.Pos, src->Weights[1], tmpVert);
1456 boneMat3x4[ src->MatrixId[2] ].mulAddPoint( src->Vertex.Pos, src->Weights[2], tmpVert);
1457 boneMat3x4[ src->MatrixId[3] ].mulAddPoint( src->Vertex.Pos, src->Weights[3], tmpVert);
1458 *(CVector*)(destVertexPtr)= tmpVert;
1459 // Normal.
1460 boneMat3x4[ src->MatrixId[0] ].mulSetVector( src->Vertex.Normal, src->Weights[0], tmpVert);
1461 boneMat3x4[ src->MatrixId[1] ].mulAddVector( src->Vertex.Normal, src->Weights[1], tmpVert);
1462 boneMat3x4[ src->MatrixId[2] ].mulAddVector( src->Vertex.Normal, src->Weights[2], tmpVert);
1463 boneMat3x4[ src->MatrixId[3] ].mulAddVector( src->Vertex.Normal, src->Weights[3], tmpVert);
1464 *(CVector*)(destVertexPtr + NL3D_RAWSKIN_NORMAL_OFF)= tmpVert;
1465 // UV copy.
1466 *(CUV*)(destVertexPtr + NL3D_RAWSKIN_UV_OFF)= src->Vertex.UV;
1469 // NB: ASM not done for 4 vertices, cause very rare and negligeable ...
1474 // ***************************************************************************
1475 void CMeshMRMGeom::applyRawSkinWithNormal(CLod &lod, CRawSkinNormalCache &rawSkinLod, const CSkeletonModel *skeleton, uint8 *vbHard, float alphaLod)
1477 nlassert(_Skinned);
1478 if(_SkinWeights.empty())
1479 return;
1481 // Some assert
1482 //===========================
1483 // must have XYZ, Normal and UV only
1484 nlassert( _VBufferFinal.getVertexFormat() == (CVertexBuffer::PositionFlag | CVertexBuffer::NormalFlag | CVertexBuffer::TexCoord0Flag) );
1485 nlassert( _VBufferFinal.getValueType(CVertexBuffer::TexCoord0) == CVertexBuffer::Float2 );
1486 nlassert( _VBufferFinal.getVertexSize() ==NL3D_RAWSKIN_VERTEX_SIZE);
1488 // HardCoded for normalOff==12 (see applyArrayRawSkinNormal*)
1489 nlassert( _VBufferFinal.getNormalOff()==NL3D_RAWSKIN_NORMAL_OFF );
1490 nlassert( _VBufferFinal.getTexCoordOff()==NL3D_RAWSKIN_UV_OFF );
1491 // assert, code below is written especially for 4 per vertex.
1492 nlassert( NL3D_MESH_SKINNING_MAX_MATRIX==4 );
1495 // Compute useful Matrix for this lod.
1496 //===========================
1497 // Those arrays map the array of bones in skeleton.
1498 static vector<CMatrix3x4> boneMat3x4;
1499 computeBoneMatrixes3x4(boneMat3x4, lod.MatrixInfluences, skeleton);
1502 // TestYoyo
1503 /*extern uint TESTYOYO_NumRawSkinVertices;
1504 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices1.size();
1505 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices2.size();
1506 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices3.size();
1507 TESTYOYO_NumRawSkinVertices+= rawSkinLod.Vertices4.size();*/
1510 uint nInf;
1512 // Manage "SoftVertices"
1513 if(rawSkinLod.TotalSoftVertices)
1515 // apply skinning into Temp RAM for vertices that are Src of Geomorph
1516 //===========================
1517 static vector<uint8> tempSkin;
1518 uint tempVbSize= rawSkinLod.TotalSoftVertices*NL3D_RAWSKIN_VERTEX_SIZE;
1519 if(tempSkin.size() < tempVbSize)
1520 tempSkin.resize(tempVbSize);
1521 uint8 *destVertexPtr= &tempSkin[0];
1523 // 1 Matrix
1524 nInf= rawSkinLod.SoftVertices[0];
1525 if(nInf>0)
1527 applyArrayRawSkinNormal1(&rawSkinLod.Vertices1[0], destVertexPtr, &boneMat3x4[0], nInf);
1528 destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1530 // 2 Matrix
1531 nInf= rawSkinLod.SoftVertices[1];
1532 if(nInf>0)
1534 applyArrayRawSkinNormal2(&rawSkinLod.Vertices2[0], destVertexPtr, &boneMat3x4[0], nInf);
1535 destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1537 // 3 Matrix
1538 nInf= rawSkinLod.SoftVertices[2];
1539 if(nInf>0)
1541 applyArrayRawSkinNormal3(&rawSkinLod.Vertices3[0], destVertexPtr, &boneMat3x4[0], nInf);
1542 destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1544 // 4 Matrix
1545 nInf= rawSkinLod.SoftVertices[3];
1546 if(nInf>0)
1548 applyArrayRawSkinNormal4(&rawSkinLod.Vertices4[0], destVertexPtr, &boneMat3x4[0], nInf);
1549 destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1552 // Fast Copy this into AGP Ram. NB: done before Geomorphs, because ensure some precaching this way!!
1553 //===========================
1554 // Skin geomorphs.
1555 uint8 *vbHardStart= vbHard + rawSkinLod.Geomorphs.size()*NL3D_RAWSKIN_VERTEX_SIZE;
1557 // fast copy
1558 CFastMem::memcpy(vbHardStart, &tempSkin[0], tempVbSize);
1560 // Geomorphs directly into AGP Ram
1561 //===========================
1562 clamp(alphaLod, 0.f, 1.f);
1563 float a= alphaLod;
1564 float a1= 1 - alphaLod;
1566 // Fast Geomorph
1567 applyGeomorphPosNormalUV0(rawSkinLod.Geomorphs, &tempSkin[0], vbHard, NL3D_RAWSKIN_VERTEX_SIZE, a, a1);
1570 // Manage HardVertices
1571 if(rawSkinLod.TotalHardVertices)
1573 // apply skinning directly into AGP RAM for vertices that are not Src of Geomorph
1574 //===========================
1575 uint startId;
1577 // Skip Geomorphs and SoftVertices.
1578 uint8 *destVertexPtr= vbHard + (rawSkinLod.Geomorphs.size()+rawSkinLod.TotalSoftVertices)*NL3D_RAWSKIN_VERTEX_SIZE;
1580 // 1 Matrix
1581 nInf= rawSkinLod.HardVertices[0];
1582 startId= rawSkinLod.SoftVertices[0];
1583 if(nInf>0)
1585 applyArrayRawSkinNormal1(&rawSkinLod.Vertices1[startId], destVertexPtr, &boneMat3x4[0], nInf);
1586 destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1588 // 2 Matrix
1589 nInf= rawSkinLod.HardVertices[1];
1590 startId= rawSkinLod.SoftVertices[1];
1591 if(nInf>0)
1593 applyArrayRawSkinNormal2(&rawSkinLod.Vertices2[startId], destVertexPtr, &boneMat3x4[0], nInf);
1594 destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1596 // 3 Matrix
1597 nInf= rawSkinLod.HardVertices[2];
1598 startId= rawSkinLod.SoftVertices[2];
1599 if(nInf>0)
1601 applyArrayRawSkinNormal3(&rawSkinLod.Vertices3[startId], destVertexPtr, &boneMat3x4[0], nInf);
1602 destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1604 // 4 Matrix
1605 nInf= rawSkinLod.HardVertices[3];
1606 startId= rawSkinLod.SoftVertices[3];
1607 if(nInf>0)
1609 applyArrayRawSkinNormal4(&rawSkinLod.Vertices4[startId], destVertexPtr, &boneMat3x4[0], nInf);
1610 destVertexPtr+= nInf * NL3D_RAWSKIN_VERTEX_SIZE;
1615 #endif // ADD_MESH_MRM_SKIN_TEMPLATE