1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
4 // This source file has been modified by the following contributors:
5 // Copyright (C) 2020 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Affero General Public License as
9 // published by the Free Software Foundation, either version 3 of the
10 // License, or (at your option) any later version.
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Affero General Public License for more details.
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "nel/misc/common.h"
23 #include "nel/3d/lod_character_manager.h"
24 #include "nel/3d/lod_character_shape.h"
25 #include "nel/3d/lod_character_shape_bank.h"
26 #include "nel/3d/lod_character_instance.h"
27 #include "nel/misc/hierarchical_timer.h"
28 #include "nel/misc/fast_floor.h"
29 #include "nel/3d/lod_character_texture.h"
30 #include "nel/3d/ray_mesh.h"
31 #include "nel/misc/file.h"
32 #include "nel/misc/algo.h"
33 #include "nel/misc/fast_mem.h"
34 #include "nel/misc/system_info.h"
38 using namespace NLMISC
;
48 // ***************************************************************************
49 // Dest is without Normal because precomputed
50 #define NL3D_CLOD_VERTEX_FORMAT (CVertexBuffer::PositionFlag | CVertexBuffer::TexCoord0Flag | CVertexBuffer::PrimaryColorFlag)
51 #define NL3D_CLOD_VERTEX_SIZE 24
52 #define NL3D_CLOD_UV_OFF 12
53 #define NL3D_CLOD_COLOR_OFF 20
55 // size (in block) of the big texture.
56 #define NL3D_CLOD_TEXT_NLOD_WIDTH 16
57 #define NL3D_CLOD_TEXT_NLOD_HEIGHT 16
58 #define NL3D_CLOD_TEXT_NUM_IDS NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_NLOD_HEIGHT
59 #define NL3D_CLOD_BIGTEXT_WIDTH NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_WIDTH
60 #define NL3D_CLOD_BIGTEXT_HEIGHT NL3D_CLOD_TEXT_NLOD_HEIGHT*NL3D_CLOD_TEXT_HEIGHT
62 // Default texture color. Alpha must be 255
63 #define NL3D_CLOD_DEFAULT_TEXCOLOR CRGBA(255,255,255,255)
66 // ***************************************************************************
67 CLodCharacterManager::CLodCharacterManager()
69 _MaxNumVertices
= 3000;
75 _BigTexture
= new CTextureBlank
;
76 // The texture always reside in memory... This take 1Mo of RAM. (16*32*16*32 * 4)
77 // NB: this is simplier like that, and this is not a problem, since only 1 or 2 Mo are allocated :o)
78 _BigTexture
->setReleasable(false);
80 _BigTexture
->resize(NL3D_CLOD_BIGTEXT_WIDTH
, NL3D_CLOD_BIGTEXT_HEIGHT
, CBitmap::RGBA
);
81 // Format of texture, 16 bits and no mipmaps.
82 _BigTexture
->setUploadFormat(ITexture::RGB565
);
83 _BigTexture
->setFilterMode(ITexture::Linear
, ITexture::LinearMipMapOff
);
84 _BigTexture
->setWrapS(ITexture::Clamp
);
85 _BigTexture
->setWrapT(ITexture::Clamp
);
88 _FreeIds
.resize(NL3D_CLOD_TEXT_NUM_IDS
);
89 for(uint i
=0;i
<_FreeIds
.size();i
++)
95 _Material
.initUnlit();
96 _Material
.setAlphaTest(true);
97 _Material
.setDoubleSided(true);
98 _Material
.setTexture(0, _BigTexture
);
100 // setup for lighting, Default for Ryzom setup
101 _LightCorrectionMatrix
.rotateZ((float)Pi
/2);
102 _LightCorrectionMatrix
.invert();
103 NL_SET_IB_NAME(_Triangles
, "CLodCharacterManager::_Triangles");
107 // ***************************************************************************
108 CLodCharacterManager::~CLodCharacterManager()
113 // ***************************************************************************
114 void CLodCharacterManager::reset()
116 nlassert(!isRendering());
118 // delete shapeBanks.
119 for(uint i
=0;i
<_ShapeBankArray
.size();i
++)
121 if(_ShapeBankArray
[i
])
122 delete _ShapeBankArray
[i
];
126 contReset(_ShapeBankArray
);
127 contReset(_ShapeMap
);
129 // reset render part.
130 _VertexStream
.release();
133 // ***************************************************************************
134 uint32
CLodCharacterManager::createShapeBank()
136 // search a free entry
137 for(uint i
=0;i
<_ShapeBankArray
.size();i
++)
140 if(_ShapeBankArray
[i
]==NULL
)
142 _ShapeBankArray
[i
]= new CLodCharacterShapeBank
;
147 // no free entrey, resize array.
148 _ShapeBankArray
.push_back(new CLodCharacterShapeBank
);
149 return (uint32
)_ShapeBankArray
.size()-1;
152 // ***************************************************************************
153 const CLodCharacterShapeBank
*CLodCharacterManager::getShapeBank(uint32 bankId
) const
155 if(bankId
>=_ShapeBankArray
.size())
158 return _ShapeBankArray
[bankId
];
161 // ***************************************************************************
162 CLodCharacterShapeBank
*CLodCharacterManager::getShapeBank(uint32 bankId
)
164 if(bankId
>=_ShapeBankArray
.size())
167 return _ShapeBankArray
[bankId
];
170 // ***************************************************************************
171 void CLodCharacterManager::deleteShapeBank(uint32 bankId
)
173 if(bankId
>=_ShapeBankArray
.size())
175 if(_ShapeBankArray
[bankId
])
177 delete _ShapeBankArray
[bankId
];
178 _ShapeBankArray
[bankId
]= NULL
;
183 // ***************************************************************************
184 sint32
CLodCharacterManager::getShapeIdByName(const std::string
&name
) const
186 CstItStrIdMap it
= _ShapeMap
.find(name
);
187 if(it
==_ShapeMap
.end())
193 // ***************************************************************************
194 const CLodCharacterShape
*CLodCharacterManager::getShape(uint32 shapeId
) const
197 uint bankId
= shapeId
>> 16;
198 uint shapeInBankId
= shapeId
&0xFFFF;
201 const CLodCharacterShapeBank
*shapeBank
= getShapeBank(bankId
);
204 // return the shape from the bank
205 return shapeBank
->getShape(shapeInBankId
);
211 // ***************************************************************************
212 bool CLodCharacterManager::compile()
217 contReset(_ShapeMap
);
220 for(uint i
=0; i
<_ShapeBankArray
.size(); i
++)
222 if(_ShapeBankArray
[i
])
225 for(uint j
=0; j
<_ShapeBankArray
[i
]->getNumShapes(); j
++)
227 // build the shape Id
228 uint shapeId
= (i
<<16) + j
;
231 const CLodCharacterShape
*shape
= _ShapeBankArray
[i
]->getShape(j
);
234 const string
&name
= shape
->getName();
235 ItStrIdMap it
= _ShapeMap
.find(name
);
236 if(it
== _ShapeMap
.end())
237 // insert the id in the map
238 _ShapeMap
.insert(make_pair(name
, shapeId
));
242 nlwarning("Found a Character Lod with same name in the manager: %s", name
.c_str());
252 // ***************************************************************************
253 // ***************************************************************************
255 // ***************************************************************************
256 // ***************************************************************************
259 // ***************************************************************************
260 void CLodCharacterManager::setMaxVertex(uint32 maxVertex
)
262 // we must not be between beginRender() and endRender()
263 nlassert(!isRendering());
264 _MaxNumVertices
= maxVertex
;
267 // ***************************************************************************
268 void CLodCharacterManager::setVertexStreamNumVBHard(uint32 numVBHard
)
270 // we must not be between beginRender() and endRender()
271 nlassert(!isRendering());
272 _NumVBHard
= numVBHard
;
275 // ***************************************************************************
276 void CLodCharacterManager::beginRender(IDriver
*driver
, const CVector
&managerPos
)
278 H_AUTO( NL3D_CharacterLod_beginRender
);
280 // we must not be between beginRender() and endRender()
281 nlassert(!isRendering());
292 // test change of vertexStream setup
293 bool mustChangeVertexStream
= _VertexStream
.getDriver() != driver
;
294 if(!mustChangeVertexStream
)
296 mustChangeVertexStream
= _MaxNumVertices
!= _VertexStream
.getMaxVertices();
297 mustChangeVertexStream
= mustChangeVertexStream
|| _NumVBHard
!= _VertexStream
.getNumVB();
300 if( mustChangeVertexStream
)
304 vb
.setVertexFormat(NL3D_CLOD_VERTEX_FORMAT
);
305 // NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
306 nlassert( NL3D_CLOD_UV_OFF
== vb
.getTexCoordOff());
307 nlassert( NL3D_CLOD_COLOR_OFF
== vb
.getColorOff());
309 // Setup the vertex stream
310 _VertexStream
.release();
311 _VertexStream
.init(driver
, NL3D_CLOD_VERTEX_FORMAT
, _MaxNumVertices
, _NumVBHard
, "CLodManagerVB", false); // nb : don't use volatile lock as we keep the buffer locked
314 // prepare for render.
317 // Do not Lock Buffer now (will be done at the first instance added)
318 nlassert(!_LockDone
);
319 _VertexSize
= _VertexStream
.getVertexSize();
320 // NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
321 nlassert( _VertexSize
== NL3D_CLOD_VERTEX_SIZE
); // Vector + Normal + UV + RGBA
324 // Alloc a minimum of primitives (2*vertices), to avoid as possible reallocation in addRenderCharacterKey
325 if(_Triangles
.getNumIndexes()<_MaxNumVertices
* 2)
327 _Triangles
.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT
);
328 _Triangles
.setNumIndexes(_MaxNumVertices
* 2);
331 // Local manager matrix
332 _ManagerMatrixPos
= managerPos
;
334 // Ok, start rendering
339 // ***************************************************************************
340 static inline void computeLodLighting(CRGBA
&lightRes
, const CVector
&lightObjectSpace
, const CVector
&normalPtr
, CRGBA ambient
, CRGBA diffuse
)
342 float f
= lightObjectSpace
* normalPtr
;
343 sint f8
= NLMISC::OptFastFloor(f
);
346 r
= (diffuse
.R
* f8
)>>8;
347 g
= (diffuse
.G
* f8
)>>8;
348 b
= (diffuse
.B
* f8
)>>8;
361 // ***************************************************************************
362 bool CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance
&instance
, const CMatrix
&worldMatrix
,
363 CRGBA paramAmbient
, CRGBA paramDiffuse
, const CVector
&lightDir
)
365 H_AUTO ( NL3D_CharacterLod_AddRenderKey
)
367 nlassert(_VertexStream
.getDriver());
368 // we must be between beginRender() and endRender()
369 nlassert(isRendering());
372 // regroup all variables that will be accessed in the ASM loop (minimize cache problems)
374 const CLodCharacterShape::CVector3s
*vertPtr
;
375 const CVector
*normalPtr
;
377 const uint8
*alphaPtr
;
378 CVector lightObjectSpace
;
384 CRGBA ambient
= paramAmbient
;
385 CRGBA diffuse
= paramDiffuse
;
386 // For ASM / MMX, must set 0 to alpha part, because replaced by *alphaPtr (with add)
391 // Get the Shape and current key.
395 const CLodCharacterShape
*clod
= getShape(instance
.ShapeId
);
396 // if not found quit, return true
400 // get UV/Normal array. NULL => error
401 normalPtr
= clod
->getNormals();
402 // get UV of the instance
403 uvPtr
= instance
.getUVs();
404 // uvPtr is NULL means that initInstance() has not been called!!
405 nlassert(normalPtr
&& uvPtr
);
408 CVector unPackScaleFactor
;
409 vertPtr
= clod
->getAnimKey(instance
.AnimId
, instance
.AnimTime
, instance
.WrapMode
, unPackScaleFactor
);
410 // if not found quit, return true
414 numVertices
= clod
->getNumVertices();
420 // If too many vertices, quit, returning false.
421 if(_CurrentVertexId
+numVertices
> _MaxNumVertices
)
425 static vector
<uint8
> defaultAlphaArray
;
426 // get the instance alpha if correctly setuped
427 if(instance
.VertexAlphas
.size() == numVertices
)
429 alphaPtr
= &instance
.VertexAlphas
[0];
431 // if error, take 255 as alpha.
434 // NB: still use an array. This case should never arise, but support it not at full optim.
435 if(defaultAlphaArray
.size()<numVertices
)
436 defaultAlphaArray
.resize(numVertices
, 255);
437 alphaPtr
= &defaultAlphaArray
[0];
440 // Lock Buffer if not done
443 // Do this after code above because we are sure that we will fill something (numVertices>0)
446 _VertexData
= _VertexStream
.lock();
450 // After lock, For D3D, the VertexColor may be in BGRA format
451 if(_VertexStream
.isBRGA())
453 // then swap only the B and R (no cpu cycle added per vertex)
462 // HTimerInfo: all this block takes 0.1%
465 matPos
= worldMatrix
.getPos();
466 // compute in manager space.
467 matPos
-= _ManagerMatrixPos
;
468 // Get rotation line vectors
469 const float *worldM
= worldMatrix
.get();
470 a00
= worldM
[0]; a01
= worldM
[4]; a02
= worldM
[8];
471 a10
= worldM
[1]; a11
= worldM
[5]; a12
= worldM
[9];
472 a20
= worldM
[2]; a21
= worldM
[6]; a22
= worldM
[10];
474 // get the light in object space.
475 // Multiply light dir with transpose of worldMatrix. This may be not exact (not uniform scale) but sufficient.
476 lightObjectSpace
.x
= a00
* lightDir
.x
+ a10
* lightDir
.y
+ a20
* lightDir
.z
;
477 lightObjectSpace
.y
= a01
* lightDir
.x
+ a11
* lightDir
.y
+ a21
* lightDir
.z
;
478 lightObjectSpace
.z
= a02
* lightDir
.x
+ a12
* lightDir
.y
+ a22
* lightDir
.z
;
479 // animation User correction
480 lightObjectSpace
= _LightCorrectionMatrix
.mulVector(lightObjectSpace
);
481 // normalize, and neg for Dot Product.
482 lightObjectSpace
.normalize();
483 lightObjectSpace
= -lightObjectSpace
;
484 // preMul by 255 for RGBA uint8
485 lightObjectSpace
*= 255;
487 // multiply matrix with scale factor for Pos.
488 a00
*= unPackScaleFactor
.x
; a01
*= unPackScaleFactor
.y
; a02
*= unPackScaleFactor
.z
;
489 a10
*= unPackScaleFactor
.x
; a11
*= unPackScaleFactor
.y
; a12
*= unPackScaleFactor
.z
;
490 a20
*= unPackScaleFactor
.x
; a21
*= unPackScaleFactor
.y
; a22
*= unPackScaleFactor
.z
;
494 dstPtr
= _VertexData
+ _CurrentVertexId
* _VertexSize
;
497 /* PreCaching Note: CFastMem::precache() has been tested (done on the 4 arrays) but not very interesting,
498 maybe because the cache miss improve //ism a bit below.
503 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
505 if(CSystemInfo::hasMMX())
507 H_AUTO( NL3D_CharacterLod_vertexFill
);
513 /* NB: order is important for AGP filling optimisation in dstPtr
515 Pentium2+ optimisation notes:
517 - "uop" comment formating:
518 A/B means "A micro-ops in port 0, and B micro-ops in port 2". (port 1 is very rare for FPU)
519 A/B/C/D means "A micro-ops in port 0, B in port 2, C in port 3 and D in port 4".
520 The number in () is the delay (if any).
521 - the "compute lighting part" must done first, because of the "fistp f8" mem writes that must
522 be place far away from the "mov eax, f8" read in clamp lighting part
523 (else seems that it crashes all the //ism)
524 - No need to Interleave on Pentium2+. But prevents "write/read stall" by putting the write
525 far away from the next read. Else stall of 3 cycles + BIG BREAK OF //ism (I think).
526 This had save me 120 cycles / 240 !!!
529 - The "transform vertex part" and "all next part" cost 42 cycles, but is somewhat optimal:
530 63 uop (=> min 21 cycles), but 36 uop in the P0 port (=> this is the bottleneck)
531 - The lighting part adds 1 cycle only ????? (44 cycles) But still relevant and optimal:
532 43 uop in port P0!!!!
533 - The UV part adds 4 cycles (47) (should not since 0 in Port P0), still acceptable.
534 - The clamp part adds 3 cycles (50), and add 11 cycles in "P0 or P1" (but heavy dependency)
535 If we assume all goes into P1, it should takes 0... still acceptable (optimal==43?)
536 - The alpha part adds 2 cycles (52, optimal=45). OK.
537 - The modulate part adds 15 cycles. OK
539 TOTAL: 67 cycles in theory (write in RAM, no cache miss problem)
540 BENCH: ASM version: 91 cycles (Write in AGP, some cache miss problems, still good against 67)
541 C version: 316 cycles.
547 // **** compute lighting
548 mov esi
,normalPtr
// uop: 0/1
550 fld dword ptr
[esi
] // uop: 0/1
551 fmul lightObjectSpace
.x
// uop: 1/1 (5)
552 fld dword ptr
[esi
+4] // uop: 0/1
553 fmul lightObjectSpace
.y
// uop: 1/1 (5)
554 faddp
st(1),st
// uop: 1/0 (3)
555 fld dword ptr
[esi
+8] // uop: 0/1
556 fmul lightObjectSpace
.z
// uop: 1/1 (5)
557 faddp
st(1),st
// uop: 1/0 (3)
558 fistp f8
// uop: 2/0/1/1 (5)
560 add esi
, 12 // uop: 1/0
561 mov normalPtr
, esi
// uop: 0/0/1/1
564 // **** transform vertex, and store
565 mov esi
, vertPtr
// uop: 0/1
566 fild word ptr
[esi
] // uop: 3/1 (5)
567 fild word ptr
[esi
+2] // uop: 3/1 (5)
568 fild word ptr
[esi
+4] // uop: 3/1 (5)
571 fmul st
, st(3) // uop: 1/0 (5)
573 fmul st
, st(3) // uop: 1/0 (5)
574 faddp
st(1), st
// uop: 1/0 (3)
576 fmul st
, st(2) // uop: 1/0 (5)
577 faddp
st(1), st
// uop: 1/0 (3)
578 fld matPos
.x
// uop: 0/1
579 faddp
st(1), st
// uop: 1/0 (3)
580 fstp dword ptr
[edi
] // uop: 0/0/1/1
592 fstp dword ptr
[edi
+4]
604 fstp dword ptr
[edi
+8]
610 add esi
, 6 // uop: 1/0
611 mov vertPtr
, esi
// uop: 0/0/1/1
615 mov esi
, uvPtr
// uop: 0/1
616 mov eax
, [esi
] // uop: 0/1
617 mov
[edi
+NL3D_CLOD_UV_OFF
], eax
// uop: 0/0/1/1
618 mov ebx
, [esi
+4] // uop: 0/1
619 mov
[edi
+NL3D_CLOD_UV_OFF
+4], ebx
// uop: 0/0/1/1
621 add esi
, 8 // uop: 1/0
622 mov uvPtr
, esi
// uop: 0/0/1/1
625 // **** Clamp lighting
626 // clamp to 0 only. will be clamped to 255 by MMX
627 mov eax
, f8
// uop: 0/1
628 cmp eax
, 0x80000000 // if>=0 => CF=1
629 sbb ebx
, ebx
// if>=0 => CF==1 => ebx=0xFFFFFFFF
630 and eax
, ebx
// if>=0 => eax unchanged, else eax=0 (clamped)
633 // **** Modulate lighting modulate with diffuse color, add ambient term, using MMX
634 movd mm0
, eax
// 0000000L uop: 1/0
635 packuswb mm0
, mm0
// 000L000L uop: 1/0 (p1)
636 packuswb mm0
, mm0
// 0L0L0L0L uop: 1/0 (p1)
637 movd mm1
, diffuse
// uop: 0/1
638 punpcklbw mm1
, blank
// uop: 1/1 (p1)
639 pmullw mm0
, mm1
// diffuse*L uop: 1/0 (3)
640 psrlw mm0
, 8 // 0A0B0G0R uop: 1/0 (p1)
641 packuswb mm0
, blank
// 0000ABGR uop: 1/1 (p1)
642 movd mm2
, ambient
// uop: 0/1
643 paddusb mm0
, mm2
// uop: 1/0
644 movd ebx
, mm0
// ebx= AABBGGRR uop: 1/0
645 // NB: emms is not so bad on P2+: delay of 6, +11 (NB: far better than no MMX instructions)
646 emms
// uop: 11/0 (6). (?????)
649 // **** append alpha, and store
650 mov esi
, alphaPtr
// uop: 0/1
651 movzx eax
, byte ptr
[esi
] // uop: 0/1
652 shl eax
, 24 // uop: 1/0
653 add ebx
, eax
// uop: 1/0
654 // now, ebx= AABBGGRR
655 mov
[edi
+NL3D_CLOD_COLOR_OFF
], ebx
// uop: 0/0/1/1
657 add esi
, 1 // uop: 1/0
658 mov alphaPtr
, esi
// uop: 0/0/1/1
662 add edi
, NL3D_CLOD_VERTEX_SIZE
// uop: 1/0
664 mov eax
, numVertices
// uop: 0/1
666 mov numVertices
, eax
// uop: 0/0/1/1
668 jnz theLoop
// uop: 1/1 (p1)
670 // To have same behavior than c code
678 H_AUTO( NL3D_CharacterLod_vertexFill
);
684 // NB: order is important for AGP filling optimisation
685 // transform vertex, and store.
686 CVector
*dstVector
= (CVector
*)dstPtr
;
687 fVect
.x
= vertPtr
->x
; fVect
.y
= vertPtr
->y
; fVect
.z
= vertPtr
->z
;
689 dstVector
->x
= a00
* fVect
.x
+ a01
* fVect
.y
+ a02
* fVect
.z
+ matPos
.x
;
690 dstVector
->y
= a10
* fVect
.x
+ a11
* fVect
.y
+ a12
* fVect
.z
+ matPos
.y
;
691 dstVector
->z
= a20
* fVect
.x
+ a21
* fVect
.y
+ a22
* fVect
.z
+ matPos
.z
;
693 *(CUV
*)(dstPtr
+ NL3D_CLOD_UV_OFF
)= *uvPtr
;
698 computeLodLighting(lightRes
, lightObjectSpace
, *normalPtr
, ambient
, diffuse
);
700 lightRes
.A
= *alphaPtr
;
703 *((CRGBA
*)(dstPtr
+ NL3D_CLOD_COLOR_OFF
))= lightRes
;
706 dstPtr
+= NL3D_CLOD_VERTEX_SIZE
;
715 H_AUTO( NL3D_CharacterLod_primitiveFill
)
717 // get number of tri indexes
718 uint numTriIdxs
= clod
->getNumTriangles() * 3;
720 // Yoyo: there is an assert with getPtr(). Not sure, but maybe arise if numTriIdxs==0
723 // realloc tris if needed.
724 if(_CurrentTriId
+numTriIdxs
> _Triangles
.getNumIndexes())
726 _Triangles
.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT
);
727 _Triangles
.setNumIndexes(_CurrentTriId
+numTriIdxs
);
730 // reindex and copy tris
731 CIndexBufferReadWrite iba
;
732 _Triangles
.lock(iba
);
733 const TLodCharacterIndexType
*srcIdx
= clod
->getTriangleArray();
734 nlassert(sizeof(TLodCharacterIndexType
) == _Triangles
.getIndexNumBytes());
735 TLodCharacterIndexType
*dstIdx
= (TLodCharacterIndexType
*) iba
.getPtr()+_CurrentTriId
;
736 for(;numTriIdxs
>0;numTriIdxs
--, srcIdx
++, dstIdx
++)
738 *dstIdx
= *srcIdx
+ _CurrentVertexId
;
747 _CurrentVertexId
+= clod
->getNumVertices();
749 _CurrentTriId
+= clod
->getNumTriangles() * 3;
756 // ***************************************************************************
757 void CLodCharacterManager::endRender()
759 H_AUTO ( NL3D_CharacterLod_endRender
);
761 IDriver
*driver
= _VertexStream
.getDriver();
763 // we must be between beginRender() and endRender()
764 nlassert(isRendering());
766 // if something rendered
770 _VertexStream
.unlock(_CurrentVertexId
);
773 // Render the VBuffer and the primitives.
777 CMatrix managerMatrix
;
778 managerMatrix
.setPos(_ManagerMatrixPos
);
779 driver
->setupModelMatrix(managerMatrix
);
782 _VertexStream
.activate();
785 driver
->activeIndexBuffer(_Triangles
);
786 driver
->renderTriangles(_Material
, 0, _CurrentTriId
/3);
789 // swap Stream VBHard
790 _VertexStream
.swapVBHard();
797 // ***************************************************************************
798 void CLodCharacterManager::setupNormalCorrectionMatrix(const CMatrix
&normalMatrix
)
800 _LightCorrectionMatrix
= normalMatrix
;
801 _LightCorrectionMatrix
.setPos(CVector::Null
);
802 _LightCorrectionMatrix
.invert();
806 // ***************************************************************************
807 // ***************************************************************************
809 // ***************************************************************************
810 // ***************************************************************************
813 // ***************************************************************************
814 CLodCharacterTmpBitmap::CLodCharacterTmpBitmap()
819 // ***************************************************************************
820 void CLodCharacterTmpBitmap::reset()
822 // setup a 1*1 bitmap
824 _Bitmap
[0]= CRGBA::Black
;
830 // ***************************************************************************
831 void CLodCharacterTmpBitmap::build(const NLMISC::CBitmap
&bmpIn
)
833 uint width
= bmpIn
.getWidth();
834 uint height
= bmpIn
.getHeight();
835 nlassert(width
>0 && width
<=256);
836 nlassert(height
>0 && height
<=256);
839 _Bitmap
.resize(width
*height
);
840 _WidthPower
= getPowerOf2(width
);
842 _UShift
= 8-getPowerOf2(width
);
843 _VShift
= 8-getPowerOf2(height
);
845 // convert the bitmap.
847 bmp
.convertToType(CBitmap::RGBA
);
848 CRGBA
*src
= (CRGBA
*)&bmp
.getPixels()[0];
849 CRGBA
*dst
= _Bitmap
.getPtr();
850 for(sint nPix
= width
*height
;nPix
>0;nPix
--, src
++, dst
++)
856 // ***************************************************************************
857 void CLodCharacterTmpBitmap::build(CRGBA col
)
859 // setup a 1*1 bitmap and set it with col
865 // ***************************************************************************
866 void CLodCharacterManager::initInstance(CLodCharacterInstance
&instance
)
868 // first release in (maybe) other manager.
870 instance
._Owner
->releaseInstance(instance
);
873 const CLodCharacterShape
*clod
= getShape(instance
.ShapeId
);
878 const CUV
*uvSrc
= clod
->getUVs();
883 instance
._Owner
= this;
884 instance
._UVs
.resize(clod
->getNumVertices());
886 // allocate an id. If cannot, then fill Uvs with 0 => filled with Black. (see endTextureCompute() why).
889 // set a "Not enough memory" id
890 instance
._TextureId
= NL3D_CLOD_TEXT_NUM_IDS
;
892 fill(instance
._UVs
.begin(), instance
._UVs
.end(), uv
);
894 // else OK, can instanciate the Uvs.
898 instance
._TextureId
= _FreeIds
.back();
901 uint xId
= instance
._TextureId
% NL3D_CLOD_TEXT_NLOD_WIDTH
;
902 uint yId
= instance
._TextureId
/ NL3D_CLOD_TEXT_NLOD_WIDTH
;
903 // compute the scale/bias to apply to Uvs.
904 float scaleU
= 1.0f
/ NL3D_CLOD_TEXT_NLOD_WIDTH
;
905 float scaleV
= 1.0f
/ NL3D_CLOD_TEXT_NLOD_HEIGHT
;
906 float biasU
= (float)xId
/ NL3D_CLOD_TEXT_NLOD_WIDTH
;
907 float biasV
= (float)yId
/ NL3D_CLOD_TEXT_NLOD_HEIGHT
;
908 // apply it to each UVs.
909 CUV
*uvDst
= &instance
._UVs
[0];
910 for(uint i
=0; i
<instance
._UVs
.size();i
++)
912 uvDst
[i
].U
= biasU
+ uvSrc
[i
].U
*scaleU
;
913 uvDst
[i
].V
= biasV
+ uvSrc
[i
].V
*scaleV
;
918 // ***************************************************************************
919 void CLodCharacterManager::releaseInstance(CLodCharacterInstance
&instance
)
921 if(instance
._Owner
==NULL
)
923 nlassert(this==instance
._Owner
);
925 // if the id is not a "Not enough memory" id, release it.
926 if(instance
._TextureId
>=0 && instance
._TextureId
<NL3D_CLOD_TEXT_NUM_IDS
)
927 _FreeIds
.push_back(instance
._TextureId
);
929 // reset the instance
930 instance
._Owner
= NULL
;
931 instance
._TextureId
= -1;
932 contReset(instance
._UVs
);
936 // ***************************************************************************
937 CRGBA
*CLodCharacterManager::getTextureInstance(CLodCharacterInstance
&instance
)
939 nlassert(instance
._Owner
==this);
940 nlassert(instance
._TextureId
!=-1);
941 // if the texture id is a "not enough memory", quit.
942 if(instance
._TextureId
==NL3D_CLOD_TEXT_NUM_IDS
)
946 uint xId
= instance
._TextureId
% NL3D_CLOD_TEXT_NLOD_WIDTH
;
947 uint yId
= instance
._TextureId
/ NL3D_CLOD_TEXT_NLOD_WIDTH
;
949 // get the ptr on the correct pixel.
950 CRGBA
*pix
= (CRGBA
*)&_BigTexture
->getPixels(0)[0];
951 return pix
+ yId
*NL3D_CLOD_TEXT_HEIGHT
*NL3D_CLOD_BIGTEXT_WIDTH
+ xId
*NL3D_CLOD_TEXT_WIDTH
;
955 // ***************************************************************************
956 bool CLodCharacterManager::startTextureCompute(CLodCharacterInstance
&instance
)
958 CRGBA
*dst
= getTextureInstance(instance
);
962 // erase the texture with 0,0,0,255. Alpha is actually the min "Quality" part of the CTUVQ.
963 CRGBA col
= NL3D_CLOD_DEFAULT_TEXCOLOR
;
964 for(uint y
=0;y
<NL3D_CLOD_TEXT_HEIGHT
;y
++)
967 for(uint x
=0;x
<NL3D_CLOD_TEXT_WIDTH
;x
++)
970 dst
+= NL3D_CLOD_BIGTEXT_WIDTH
;
976 // ***************************************************************************
977 void CLodCharacterManager::addTextureCompute(CLodCharacterInstance
&instance
, const CLodCharacterTexture
&lodTexture
)
979 CRGBA
*dst
= getTextureInstance(instance
);
984 nlassert(lodTexture
.Texture
.size()==NL3D_CLOD_TEXT_SIZE
);
985 if (lodTexture
.Texture
.size() < NL3D_CLOD_TEXT_SIZE
)
988 const CLodCharacterTexture::CTUVQ
*lookUpPtr
= &lodTexture
.Texture
[0];
990 // apply the lodTexture, taking only better quality (ie nearer 0)
991 for(uint y
=0;y
<NL3D_CLOD_TEXT_HEIGHT
;y
++)
994 for(uint x
=0;x
<NL3D_CLOD_TEXT_WIDTH
;x
++)
996 CLodCharacterTexture::CTUVQ lut
= *lookUpPtr
;
997 // if this quality is better than the one stored
1000 // get what texture to read, and read the pixel.
1001 CRGBA col
= _TmpBitmaps
[lut
.T
].getPixel(lut
.U
, lut
.V
);
1012 dst
+= NL3D_CLOD_BIGTEXT_WIDTH
;
1016 // ***************************************************************************
1017 void CLodCharacterManager::endTextureCompute(CLodCharacterInstance
&instance
, uint numBmpToReset
)
1019 CRGBA
*dst
= getTextureInstance(instance
);
1023 // reset All Alpha values to 255 => no AlphaTest problems
1024 for(uint y
=0;y
<NL3D_CLOD_TEXT_HEIGHT
;y
++)
1027 for(uint x
=0;x
<NL3D_CLOD_TEXT_WIDTH
;x
++)
1032 dst
+= NL3D_CLOD_BIGTEXT_WIDTH
;
1035 // If the id == 0 then must reset the 0,0 Pixel to black. for the "Not Enough memory" case in initInstance().
1036 if(instance
._TextureId
==0)
1037 *(CRGBA
*)&_BigTexture
->getPixels(0)[0]= NL3D_CLOD_DEFAULT_TEXCOLOR
;
1040 uint xId
= instance
._TextureId
% NL3D_CLOD_TEXT_NLOD_WIDTH
;
1041 uint yId
= instance
._TextureId
/ NL3D_CLOD_TEXT_NLOD_WIDTH
;
1042 // touch the texture for Driver update.
1043 _BigTexture
->touchRect(
1044 CRect(xId
*NL3D_CLOD_TEXT_WIDTH
, yId
*NL3D_CLOD_TEXT_HEIGHT
, NL3D_CLOD_TEXT_WIDTH
, NL3D_CLOD_TEXT_HEIGHT
) );
1046 // reset tmpBitmaps / free memory.
1047 for(uint i
=0; i
<numBmpToReset
; i
++)
1049 _TmpBitmaps
[i
].reset();
1053 /*NLMISC::COFile f("tam.tga");
1054 _BigTexture->writeTGA(f,32);*/
1058 // ***************************************************************************
1059 bool CLodCharacterManager::fastIntersect(const CLodCharacterInstance
&instance
, const NLMISC::CMatrix
&toRaySpace
, float &dist2D
, float &distZ
, bool computeDist2D
)
1061 H_AUTO ( NL3D_CharacterLod_fastIntersect
)
1064 const CLodCharacterShape::CVector3s
*vertPtr
;
1066 float a00
, a01
, a02
;
1067 float a10
, a11
, a12
;
1068 float a20
, a21
, a22
;
1071 // Get the Shape and current key.
1075 const CLodCharacterShape
*clod
= getShape(instance
.ShapeId
);
1076 // if not found quit
1081 CVector unPackScaleFactor
;
1082 vertPtr
= clod
->getAnimKey(instance
.AnimId
, instance
.AnimTime
, instance
.WrapMode
, unPackScaleFactor
);
1083 // if not found quit
1087 numVertices
= clod
->getNumVertices();
1093 // Prepare Transform
1097 matPos
= toRaySpace
.getPos();
1098 // Get rotation line vectors
1099 const float *rayM
= toRaySpace
.get();
1100 a00
= rayM
[0]; a01
= rayM
[4]; a02
= rayM
[8];
1101 a10
= rayM
[1]; a11
= rayM
[5]; a12
= rayM
[9];
1102 a20
= rayM
[2]; a21
= rayM
[6]; a22
= rayM
[10];
1104 // multiply matrix with scale factor for Pos.
1105 a00
*= unPackScaleFactor
.x
; a01
*= unPackScaleFactor
.y
; a02
*= unPackScaleFactor
.z
;
1106 a10
*= unPackScaleFactor
.x
; a11
*= unPackScaleFactor
.y
; a12
*= unPackScaleFactor
.z
;
1107 a20
*= unPackScaleFactor
.x
; a21
*= unPackScaleFactor
.y
; a22
*= unPackScaleFactor
.z
;
1110 // enlarge temp buffer
1111 static std::vector
<CVector
> lodInRaySpace
;
1112 if(numVertices
>lodInRaySpace
.size())
1113 lodInRaySpace
.resize(numVertices
);
1114 CVector
*dstPtr
= &lodInRaySpace
[0];
1117 // Fill the temp skin
1122 for(;numVertices
>0;)
1124 // transform vertex, and store.
1125 fVect
.x
= vertPtr
->x
; fVect
.y
= vertPtr
->y
; fVect
.z
= vertPtr
->z
;
1127 dstPtr
->x
= a00
* fVect
.x
+ a01
* fVect
.y
+ a02
* fVect
.z
+ matPos
.x
;
1128 dstPtr
->y
= a10
* fVect
.x
+ a11
* fVect
.y
+ a12
* fVect
.z
+ matPos
.y
;
1129 dstPtr
->z
= a20
* fVect
.x
+ a21
* fVect
.y
+ a22
* fVect
.z
+ matPos
.z
;
1137 // Test intersection
1140 return CRayMesh::getRayIntersection(lodInRaySpace
, clod
->getTriangleIndices(), dist2D
, distZ
, computeDist2D
);