Linux multi-monitor fullscreen support
[ryzomcore.git] / nel / src / 3d / lod_character_manager.cpp
blob8e062bf07583ed4af07790f6b2fa4dc0e2fd9f9f
1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
3 //
4 // This source file has been modified by the following contributors:
5 // Copyright (C) 2020 Jan BOON (Kaetemi) <jan.boon@kaetemi.be>
6 //
7 // This program is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU Affero General Public License as
9 // published by the Free Software Foundation, either version 3 of the
10 // License, or (at your option) any later version.
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU Affero General Public License for more details.
17 // You should have received a copy of the GNU Affero General Public License
18 // along with this program. If not, see <http://www.gnu.org/licenses/>.
20 #include "std3d.h"
22 #include "nel/misc/common.h"
23 #include "nel/3d/lod_character_manager.h"
24 #include "nel/3d/lod_character_shape.h"
25 #include "nel/3d/lod_character_shape_bank.h"
26 #include "nel/3d/lod_character_instance.h"
27 #include "nel/misc/hierarchical_timer.h"
28 #include "nel/misc/fast_floor.h"
29 #include "nel/3d/lod_character_texture.h"
30 #include "nel/3d/ray_mesh.h"
31 #include "nel/misc/file.h"
32 #include "nel/misc/algo.h"
33 #include "nel/misc/fast_mem.h"
34 #include "nel/misc/system_info.h"
37 using namespace std;
38 using namespace NLMISC;
40 #ifdef DEBUG_NEW
41 #define new DEBUG_NEW
42 #endif
44 namespace NL3D
48 // ***************************************************************************
49 // Dest is without Normal because precomputed
50 #define NL3D_CLOD_VERTEX_FORMAT (CVertexBuffer::PositionFlag | CVertexBuffer::TexCoord0Flag | CVertexBuffer::PrimaryColorFlag)
51 #define NL3D_CLOD_VERTEX_SIZE 24
52 #define NL3D_CLOD_UV_OFF 12
53 #define NL3D_CLOD_COLOR_OFF 20
55 // size (in block) of the big texture.
56 #define NL3D_CLOD_TEXT_NLOD_WIDTH 16
57 #define NL3D_CLOD_TEXT_NLOD_HEIGHT 16
58 #define NL3D_CLOD_TEXT_NUM_IDS NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_NLOD_HEIGHT
59 #define NL3D_CLOD_BIGTEXT_WIDTH NL3D_CLOD_TEXT_NLOD_WIDTH*NL3D_CLOD_TEXT_WIDTH
60 #define NL3D_CLOD_BIGTEXT_HEIGHT NL3D_CLOD_TEXT_NLOD_HEIGHT*NL3D_CLOD_TEXT_HEIGHT
62 // Default texture color. Alpha must be 255
63 #define NL3D_CLOD_DEFAULT_TEXCOLOR CRGBA(255,255,255,255)
66 // ***************************************************************************
67 CLodCharacterManager::CLodCharacterManager()
69 _MaxNumVertices= 3000;
70 _NumVBHard= 8;
71 _Rendering= false;
72 _LockDone= false;
74 // setup the texture.
75 _BigTexture= new CTextureBlank;
76 // The texture always reside in memory... This take 1Mo of RAM. (16*32*16*32 * 4)
77 // NB: this is simplier like that, and this is not a problem, since only 1 or 2 Mo are allocated :o)
78 _BigTexture->setReleasable(false);
79 // create the bitmap.
80 _BigTexture->resize(NL3D_CLOD_BIGTEXT_WIDTH, NL3D_CLOD_BIGTEXT_HEIGHT, CBitmap::RGBA);
81 // Format of texture, 16 bits and no mipmaps.
82 _BigTexture->setUploadFormat(ITexture::RGB565);
83 _BigTexture->setFilterMode(ITexture::Linear, ITexture::LinearMipMapOff);
84 _BigTexture->setWrapS(ITexture::Clamp);
85 _BigTexture->setWrapT(ITexture::Clamp);
87 // Alloc free Ids
88 _FreeIds.resize(NL3D_CLOD_TEXT_NUM_IDS);
89 for(uint i=0;i<_FreeIds.size();i++)
91 _FreeIds[i]= i;
94 // setup the material
95 _Material.initUnlit();
96 _Material.setAlphaTest(true);
97 _Material.setDoubleSided(true);
98 _Material.setTexture(0, _BigTexture);
100 // setup for lighting, Default for Ryzom setup
101 _LightCorrectionMatrix.rotateZ((float)Pi/2);
102 _LightCorrectionMatrix.invert();
103 NL_SET_IB_NAME(_Triangles, "CLodCharacterManager::_Triangles");
107 // ***************************************************************************
108 CLodCharacterManager::~CLodCharacterManager()
110 reset();
113 // ***************************************************************************
114 void CLodCharacterManager::reset()
116 nlassert(!isRendering());
118 // delete shapeBanks.
119 for(uint i=0;i<_ShapeBankArray.size();i++)
121 if(_ShapeBankArray[i])
122 delete _ShapeBankArray[i];
125 // clears containers
126 contReset(_ShapeBankArray);
127 contReset(_ShapeMap);
129 // reset render part.
130 _VertexStream.release();
133 // ***************************************************************************
134 uint32 CLodCharacterManager::createShapeBank()
136 // search a free entry
137 for(uint i=0;i<_ShapeBankArray.size();i++)
139 // if ree, use it.
140 if(_ShapeBankArray[i]==NULL)
142 _ShapeBankArray[i]= new CLodCharacterShapeBank;
143 return i;
147 // no free entrey, resize array.
148 _ShapeBankArray.push_back(new CLodCharacterShapeBank);
149 return (uint32)_ShapeBankArray.size()-1;
152 // ***************************************************************************
153 const CLodCharacterShapeBank *CLodCharacterManager::getShapeBank(uint32 bankId) const
155 if(bankId>=_ShapeBankArray.size())
156 return NULL;
157 else
158 return _ShapeBankArray[bankId];
161 // ***************************************************************************
162 CLodCharacterShapeBank *CLodCharacterManager::getShapeBank(uint32 bankId)
164 if(bankId>=_ShapeBankArray.size())
165 return NULL;
166 else
167 return _ShapeBankArray[bankId];
170 // ***************************************************************************
171 void CLodCharacterManager::deleteShapeBank(uint32 bankId)
173 if(bankId>=_ShapeBankArray.size())
175 if(_ShapeBankArray[bankId])
177 delete _ShapeBankArray[bankId];
178 _ShapeBankArray[bankId]= NULL;
183 // ***************************************************************************
184 sint32 CLodCharacterManager::getShapeIdByName(const std::string &name) const
186 CstItStrIdMap it= _ShapeMap.find(name);
187 if(it==_ShapeMap.end())
188 return -1;
189 else
190 return it->second;
193 // ***************************************************************************
194 const CLodCharacterShape *CLodCharacterManager::getShape(uint32 shapeId) const
196 // split the id
197 uint bankId= shapeId >> 16;
198 uint shapeInBankId= shapeId &0xFFFF;
200 // if valid bankId
201 const CLodCharacterShapeBank *shapeBank= getShapeBank(bankId);
202 if(shapeBank)
204 // return the shape from the bank
205 return shapeBank->getShape(shapeInBankId);
207 else
208 return NULL;
211 // ***************************************************************************
212 bool CLodCharacterManager::compile()
214 bool error= false;
216 // clear the map
217 contReset(_ShapeMap);
219 // build the map
220 for(uint i=0; i<_ShapeBankArray.size(); i++)
222 if(_ShapeBankArray[i])
224 // Parse all Shapes
225 for(uint j=0; j<_ShapeBankArray[i]->getNumShapes(); j++)
227 // build the shape Id
228 uint shapeId= (i<<16) + j;
230 // get the shape
231 const CLodCharacterShape *shape= _ShapeBankArray[i]->getShape(j);
232 if(shape)
234 const string &name= shape->getName();
235 ItStrIdMap it= _ShapeMap.find(name);
236 if(it == _ShapeMap.end())
237 // insert the id in the map
238 _ShapeMap.insert(make_pair(name, shapeId));
239 else
241 error= true;
242 nlwarning("Found a Character Lod with same name in the manager: %s", name.c_str());
249 return error;
252 // ***************************************************************************
253 // ***************************************************************************
254 // Render
255 // ***************************************************************************
256 // ***************************************************************************
259 // ***************************************************************************
260 void CLodCharacterManager::setMaxVertex(uint32 maxVertex)
262 // we must not be between beginRender() and endRender()
263 nlassert(!isRendering());
264 _MaxNumVertices= maxVertex;
267 // ***************************************************************************
268 void CLodCharacterManager::setVertexStreamNumVBHard(uint32 numVBHard)
270 // we must not be between beginRender() and endRender()
271 nlassert(!isRendering());
272 _NumVBHard= numVBHard;
275 // ***************************************************************************
276 void CLodCharacterManager::beginRender(IDriver *driver, const CVector &managerPos)
278 H_AUTO( NL3D_CharacterLod_beginRender );
280 // we must not be between beginRender() and endRender()
281 nlassert(!isRendering());
283 // Reset render
284 //=================
285 _CurrentVertexId=0;
286 _CurrentTriId= 0;
288 // update Driver.
289 //=================
290 nlassert(driver);
292 // test change of vertexStream setup
293 bool mustChangeVertexStream= _VertexStream.getDriver() != driver;
294 if(!mustChangeVertexStream)
296 mustChangeVertexStream= _MaxNumVertices != _VertexStream.getMaxVertices();
297 mustChangeVertexStream= mustChangeVertexStream || _NumVBHard != _VertexStream.getNumVB();
299 // re-init?
300 if( mustChangeVertexStream )
302 // chech offset
303 CVertexBuffer vb;
304 vb.setVertexFormat(NL3D_CLOD_VERTEX_FORMAT);
305 // NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
306 nlassert( NL3D_CLOD_UV_OFF == vb.getTexCoordOff());
307 nlassert( NL3D_CLOD_COLOR_OFF == vb.getColorOff());
309 // Setup the vertex stream
310 _VertexStream.release();
311 _VertexStream.init(driver, NL3D_CLOD_VERTEX_FORMAT, _MaxNumVertices, _NumVBHard, "CLodManagerVB", false); // nb : don't use volatile lock as we keep the buffer locked
314 // prepare for render.
315 //=================
317 // Do not Lock Buffer now (will be done at the first instance added)
318 nlassert(!_LockDone);
319 _VertexSize= _VertexStream.getVertexSize();
320 // NB: addRenderCharacterKey() loop hardCoded for Vertex+UV+Normal+Color only.
321 nlassert( _VertexSize == NL3D_CLOD_VERTEX_SIZE ); // Vector + Normal + UV + RGBA
324 // Alloc a minimum of primitives (2*vertices), to avoid as possible reallocation in addRenderCharacterKey
325 if(_Triangles.getNumIndexes()<_MaxNumVertices * 2)
327 _Triangles.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT);
328 _Triangles.setNumIndexes(_MaxNumVertices * 2);
331 // Local manager matrix
332 _ManagerMatrixPos= managerPos;
334 // Ok, start rendering
335 _Rendering= true;
339 // ***************************************************************************
340 static inline void computeLodLighting(CRGBA &lightRes, const CVector &lightObjectSpace, const CVector &normalPtr, CRGBA ambient, CRGBA diffuse)
342 float f= lightObjectSpace * normalPtr;
343 sint f8= NLMISC::OptFastFloor(f);
344 fastClamp8(f8);
345 sint r,g,b;
346 r= (diffuse.R * f8)>>8;
347 g= (diffuse.G * f8)>>8;
348 b= (diffuse.B * f8)>>8;
349 r+= ambient.R;
350 g+= ambient.G;
351 b+= ambient.B;
352 fastClamp8(r);
353 fastClamp8(g);
354 fastClamp8(b);
355 lightRes.R= r;
356 lightRes.G= g;
357 lightRes.B= b;
361 // ***************************************************************************
362 bool CLodCharacterManager::addRenderCharacterKey(CLodCharacterInstance &instance, const CMatrix &worldMatrix,
363 CRGBA paramAmbient, CRGBA paramDiffuse, const CVector &lightDir)
365 H_AUTO ( NL3D_CharacterLod_AddRenderKey )
367 nlassert(_VertexStream.getDriver());
368 // we must be between beginRender() and endRender()
369 nlassert(isRendering());
372 // regroup all variables that will be accessed in the ASM loop (minimize cache problems)
373 uint numVertices;
374 const CLodCharacterShape::CVector3s *vertPtr;
375 const CVector *normalPtr;
376 const CUV *uvPtr;
377 const uint8 *alphaPtr;
378 CVector lightObjectSpace;
379 CVector matPos;
380 float a00, a01, a02;
381 float a10, a11, a12;
382 float a20, a21, a22;
383 uint64 blank= 0;
384 CRGBA ambient= paramAmbient;
385 CRGBA diffuse= paramDiffuse;
386 // For ASM / MMX, must set 0 to alpha part, because replaced by *alphaPtr (with add)
387 ambient.A= 0;
388 diffuse.A= 0;
391 // Get the Shape and current key.
392 //=============
394 // get the shape
395 const CLodCharacterShape *clod= getShape(instance.ShapeId);
396 // if not found quit, return true
397 if(!clod)
398 return true;
400 // get UV/Normal array. NULL => error
401 normalPtr= clod->getNormals();
402 // get UV of the instance
403 uvPtr= instance.getUVs();
404 // uvPtr is NULL means that initInstance() has not been called!!
405 nlassert(normalPtr && uvPtr);
407 // get the anim key
408 CVector unPackScaleFactor;
409 vertPtr= clod->getAnimKey(instance.AnimId, instance.AnimTime, instance.WrapMode, unPackScaleFactor);
410 // if not found quit, return true
411 if(!vertPtr)
412 return true;
413 // get num verts
414 numVertices= clod->getNumVertices();
416 // empty shape??
417 if(numVertices==0)
418 return true;
420 // If too many vertices, quit, returning false.
421 if(_CurrentVertexId+numVertices > _MaxNumVertices)
422 return false;
424 // get alpha array
425 static vector<uint8> defaultAlphaArray;
426 // get the instance alpha if correctly setuped
427 if(instance.VertexAlphas.size() == numVertices)
429 alphaPtr= &instance.VertexAlphas[0];
431 // if error, take 255 as alpha.
432 else
434 // NB: still use an array. This case should never arise, but support it not at full optim.
435 if(defaultAlphaArray.size()<numVertices)
436 defaultAlphaArray.resize(numVertices, 255);
437 alphaPtr= &defaultAlphaArray[0];
440 // Lock Buffer if not done
441 //=============
443 // Do this after code above because we are sure that we will fill something (numVertices>0)
444 if(!_LockDone)
446 _VertexData= _VertexStream.lock();
447 _LockDone= true;
450 // After lock, For D3D, the VertexColor may be in BGRA format
451 if(_VertexStream.isBRGA())
453 // then swap only the B and R (no cpu cycle added per vertex)
454 ambient.swapBR();
455 diffuse.swapBR();
459 // Prepare Transform
460 //=============
462 // HTimerInfo: all this block takes 0.1%
464 // Get matrix pos.
465 matPos= worldMatrix.getPos();
466 // compute in manager space.
467 matPos -= _ManagerMatrixPos;
468 // Get rotation line vectors
469 const float *worldM= worldMatrix.get();
470 a00= worldM[0]; a01= worldM[4]; a02= worldM[8];
471 a10= worldM[1]; a11= worldM[5]; a12= worldM[9];
472 a20= worldM[2]; a21= worldM[6]; a22= worldM[10];
474 // get the light in object space.
475 // Multiply light dir with transpose of worldMatrix. This may be not exact (not uniform scale) but sufficient.
476 lightObjectSpace.x= a00 * lightDir.x + a10 * lightDir.y + a20 * lightDir.z;
477 lightObjectSpace.y= a01 * lightDir.x + a11 * lightDir.y + a21 * lightDir.z;
478 lightObjectSpace.z= a02 * lightDir.x + a12 * lightDir.y + a22 * lightDir.z;
479 // animation User correction
480 lightObjectSpace= _LightCorrectionMatrix.mulVector(lightObjectSpace);
481 // normalize, and neg for Dot Product.
482 lightObjectSpace.normalize();
483 lightObjectSpace= -lightObjectSpace;
484 // preMul by 255 for RGBA uint8
485 lightObjectSpace*= 255;
487 // multiply matrix with scale factor for Pos.
488 a00*= unPackScaleFactor.x; a01*= unPackScaleFactor.y; a02*= unPackScaleFactor.z;
489 a10*= unPackScaleFactor.x; a11*= unPackScaleFactor.y; a12*= unPackScaleFactor.z;
490 a20*= unPackScaleFactor.x; a21*= unPackScaleFactor.y; a22*= unPackScaleFactor.z;
492 // get dst Array.
493 uint8 *dstPtr;
494 dstPtr= _VertexData + _CurrentVertexId * _VertexSize;
497 /* PreCaching Note: CFastMem::precache() has been tested (done on the 4 arrays) but not very interesting,
498 maybe because the cache miss improve //ism a bit below.
501 // Fill the VB
502 //=============
503 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
504 // optimized version
505 if(CSystemInfo::hasMMX())
507 H_AUTO( NL3D_CharacterLod_vertexFill );
509 if(numVertices)
511 sint f8;
513 /* NB: order is important for AGP filling optimisation in dstPtr
515 Pentium2+ optimisation notes:
517 - "uop" comment formating:
518 A/B means "A micro-ops in port 0, and B micro-ops in port 2". (port 1 is very rare for FPU)
519 A/B/C/D means "A micro-ops in port 0, B in port 2, C in port 3 and D in port 4".
520 The number in () is the delay (if any).
521 - the "compute lighting part" must done first, because of the "fistp f8" mem writes that must
522 be place far away from the "mov eax, f8" read in clamp lighting part
523 (else seems that it crashes all the //ism)
524 - No need to Interleave on Pentium2+. But prevents "write/read stall" by putting the write
525 far away from the next read. Else stall of 3 cycles + BIG BREAK OF //ism (I think).
526 This had save me 120 cycles / 240 !!!
528 BenchResults:
529 - The "transform vertex part" and "all next part" cost 42 cycles, but is somewhat optimal:
530 63 uop (=> min 21 cycles), but 36 uop in the P0 port (=> this is the bottleneck)
531 - The lighting part adds 1 cycle only ????? (44 cycles) But still relevant and optimal:
532 43 uop in port P0!!!!
533 - The UV part adds 4 cycles (47) (should not since 0 in Port P0), still acceptable.
534 - The clamp part adds 3 cycles (50), and add 11 cycles in "P0 or P1" (but heavy dependency)
535 If we assume all goes into P1, it should takes 0... still acceptable (optimal==43?)
536 - The alpha part adds 2 cycles (52, optimal=45). OK.
537 - The modulate part adds 15 cycles. OK
539 TOTAL: 67 cycles in theory (write in RAM, no cache miss problem)
540 BENCH: ASM version: 91 cycles (Write in AGP, some cache miss problems, still good against 67)
541 C version: 316 cycles.
543 __asm
545 mov edi, dstPtr
546 theLoop:
547 // **** compute lighting
548 mov esi,normalPtr // uop: 0/1
549 // dot3
550 fld dword ptr [esi] // uop: 0/1
551 fmul lightObjectSpace.x // uop: 1/1 (5)
552 fld dword ptr [esi+4] // uop: 0/1
553 fmul lightObjectSpace.y // uop: 1/1 (5)
554 faddp st(1),st // uop: 1/0 (3)
555 fld dword ptr [esi+8] // uop: 0/1
556 fmul lightObjectSpace.z // uop: 1/1 (5)
557 faddp st(1),st // uop: 1/0 (3)
558 fistp f8 // uop: 2/0/1/1 (5)
559 // next
560 add esi, 12 // uop: 1/0
561 mov normalPtr, esi // uop: 0/0/1/1
564 // **** transform vertex, and store
565 mov esi, vertPtr // uop: 0/1
566 fild word ptr[esi] // uop: 3/1 (5)
567 fild word ptr[esi+2] // uop: 3/1 (5)
568 fild word ptr[esi+4] // uop: 3/1 (5)
569 // x
570 fld a00 // uop: 0/1
571 fmul st, st(3) // uop: 1/0 (5)
572 fld a01 // uop: 0/1
573 fmul st, st(3) // uop: 1/0 (5)
574 faddp st(1), st // uop: 1/0 (3)
575 fld a02 // uop: 0/1
576 fmul st, st(2) // uop: 1/0 (5)
577 faddp st(1), st // uop: 1/0 (3)
578 fld matPos.x // uop: 0/1
579 faddp st(1), st // uop: 1/0 (3)
580 fstp dword ptr[edi] // uop: 0/0/1/1
581 // y
582 fld a10
583 fmul st, st(3)
584 fld a11
585 fmul st, st(3)
586 faddp st(1), st
587 fld a12
588 fmul st, st(2)
589 faddp st(1), st
590 fld matPos.y
591 faddp st(1), st
592 fstp dword ptr[edi+4]
593 // z
594 fld a20
595 fmul st, st(3)
596 fld a21
597 fmul st, st(3)
598 faddp st(1), st
599 fld a22
600 fmul st, st(2)
601 faddp st(1), st
602 fld matPos.z
603 faddp st(1), st
604 fstp dword ptr[edi+8]
605 // flush stack
606 fstp st // uop: 1/0
607 fstp st // uop: 1/0
608 fstp st // uop: 1/0
609 // next
610 add esi, 6 // uop: 1/0
611 mov vertPtr, esi // uop: 0/0/1/1
614 // **** copy uv
615 mov esi, uvPtr // uop: 0/1
616 mov eax, [esi] // uop: 0/1
617 mov [edi+NL3D_CLOD_UV_OFF], eax // uop: 0/0/1/1
618 mov ebx, [esi+4] // uop: 0/1
619 mov [edi+NL3D_CLOD_UV_OFF+4], ebx // uop: 0/0/1/1
620 // next
621 add esi, 8 // uop: 1/0
622 mov uvPtr, esi // uop: 0/0/1/1
625 // **** Clamp lighting
626 // clamp to 0 only. will be clamped to 255 by MMX
627 mov eax, f8 // uop: 0/1
628 cmp eax, 0x80000000 // if>=0 => CF=1
629 sbb ebx, ebx // if>=0 => CF==1 => ebx=0xFFFFFFFF
630 and eax, ebx // if>=0 => eax unchanged, else eax=0 (clamped)
633 // **** Modulate lighting modulate with diffuse color, add ambient term, using MMX
634 movd mm0, eax // 0000000L uop: 1/0
635 packuswb mm0, mm0 // 000L000L uop: 1/0 (p1)
636 packuswb mm0, mm0 // 0L0L0L0L uop: 1/0 (p1)
637 movd mm1, diffuse // uop: 0/1
638 punpcklbw mm1, blank // uop: 1/1 (p1)
639 pmullw mm0, mm1 // diffuse*L uop: 1/0 (3)
640 psrlw mm0, 8 // 0A0B0G0R uop: 1/0 (p1)
641 packuswb mm0, blank // 0000ABGR uop: 1/1 (p1)
642 movd mm2, ambient // uop: 0/1
643 paddusb mm0, mm2 // uop: 1/0
644 movd ebx, mm0 // ebx= AABBGGRR uop: 1/0
645 // NB: emms is not so bad on P2+: delay of 6, +11 (NB: far better than no MMX instructions)
646 emms // uop: 11/0 (6). (?????)
649 // **** append alpha, and store
650 mov esi, alphaPtr // uop: 0/1
651 movzx eax, byte ptr[esi] // uop: 0/1
652 shl eax, 24 // uop: 1/0
653 add ebx, eax // uop: 1/0
654 // now, ebx= AABBGGRR
655 mov [edi+NL3D_CLOD_COLOR_OFF], ebx // uop: 0/0/1/1
656 // next
657 add esi, 1 // uop: 1/0
658 mov alphaPtr, esi // uop: 0/0/1/1
661 // **** next
662 add edi, NL3D_CLOD_VERTEX_SIZE // uop: 1/0
664 mov eax, numVertices // uop: 0/1
665 dec eax // uop: 1/0
666 mov numVertices, eax // uop: 0/0/1/1
668 jnz theLoop // uop: 1/1 (p1)
670 // To have same behavior than c code
671 mov dstPtr, edi
675 else
676 #endif
678 H_AUTO( NL3D_CharacterLod_vertexFill );
680 CVector fVect;
682 for(;numVertices>0;)
684 // NB: order is important for AGP filling optimisation
685 // transform vertex, and store.
686 CVector *dstVector= (CVector*)dstPtr;
687 fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
688 ++vertPtr;
689 dstVector->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
690 dstVector->y= a10 * fVect.x + a11 * fVect.y + a12 * fVect.z + matPos.y;
691 dstVector->z= a20 * fVect.x + a21 * fVect.y + a22 * fVect.z + matPos.z;
692 // Copy UV
693 *(CUV*)(dstPtr + NL3D_CLOD_UV_OFF)= *uvPtr;
694 ++uvPtr;
696 // Compute Lighting.
697 CRGBA lightRes;
698 computeLodLighting(lightRes, lightObjectSpace, *normalPtr, ambient, diffuse);
699 ++normalPtr;
700 lightRes.A= *alphaPtr;
701 ++alphaPtr;
702 // store.
703 *((CRGBA*)(dstPtr + NL3D_CLOD_COLOR_OFF))= lightRes;
705 // next
706 dstPtr+= NL3D_CLOD_VERTEX_SIZE;
707 numVertices--;
711 // Add Primitives.
712 //=============
715 H_AUTO( NL3D_CharacterLod_primitiveFill )
717 // get number of tri indexes
718 uint numTriIdxs= clod->getNumTriangles() * 3;
720 // Yoyo: there is an assert with getPtr(). Not sure, but maybe arise if numTriIdxs==0
721 if(numTriIdxs)
723 // realloc tris if needed.
724 if(_CurrentTriId+numTriIdxs > _Triangles.getNumIndexes())
726 _Triangles.setFormat(NL_LOD_CHARACTER_INDEX_FORMAT);
727 _Triangles.setNumIndexes(_CurrentTriId+numTriIdxs);
730 // reindex and copy tris
731 CIndexBufferReadWrite iba;
732 _Triangles.lock(iba);
733 const TLodCharacterIndexType *srcIdx= clod->getTriangleArray();
734 nlassert(sizeof(TLodCharacterIndexType) == _Triangles.getIndexNumBytes());
735 TLodCharacterIndexType *dstIdx= (TLodCharacterIndexType *) iba.getPtr()+_CurrentTriId;
736 for(;numTriIdxs>0;numTriIdxs--, srcIdx++, dstIdx++)
738 *dstIdx= *srcIdx + _CurrentVertexId;
743 // Next
744 //=============
746 // Inc Vertex count.
747 _CurrentVertexId+= clod->getNumVertices();
748 // Inc Prim count.
749 _CurrentTriId+= clod->getNumTriangles() * 3;
752 // key added
753 return true;
756 // ***************************************************************************
757 void CLodCharacterManager::endRender()
759 H_AUTO ( NL3D_CharacterLod_endRender );
761 IDriver *driver= _VertexStream.getDriver();
762 nlassert(driver);
763 // we must be between beginRender() and endRender()
764 nlassert(isRendering());
766 // if something rendered
767 if(_LockDone)
769 // UnLock Buffer.
770 _VertexStream.unlock(_CurrentVertexId);
771 _LockDone= false;
773 // Render the VBuffer and the primitives.
774 if(_CurrentTriId>0)
776 // setup matrix.
777 CMatrix managerMatrix;
778 managerMatrix.setPos(_ManagerMatrixPos);
779 driver->setupModelMatrix(managerMatrix);
781 // active VB
782 _VertexStream.activate();
784 // render triangles
785 driver->activeIndexBuffer(_Triangles);
786 driver->renderTriangles(_Material, 0, _CurrentTriId/3);
789 // swap Stream VBHard
790 _VertexStream.swapVBHard();
793 // Ok, end rendering
794 _Rendering= false;
797 // ***************************************************************************
798 void CLodCharacterManager::setupNormalCorrectionMatrix(const CMatrix &normalMatrix)
800 _LightCorrectionMatrix= normalMatrix;
801 _LightCorrectionMatrix.setPos(CVector::Null);
802 _LightCorrectionMatrix.invert();
806 // ***************************************************************************
807 // ***************************************************************************
808 // Texturing.
809 // ***************************************************************************
810 // ***************************************************************************
813 // ***************************************************************************
814 CLodCharacterTmpBitmap::CLodCharacterTmpBitmap()
816 reset();
819 // ***************************************************************************
820 void CLodCharacterTmpBitmap::reset()
822 // setup a 1*1 bitmap
823 _Bitmap.resize(1);
824 _Bitmap[0]= CRGBA::Black;
825 _WidthPower=0;
826 _UShift= 8;
827 _VShift= 8;
830 // ***************************************************************************
831 void CLodCharacterTmpBitmap::build(const NLMISC::CBitmap &bmpIn)
833 uint width= bmpIn.getWidth();
834 uint height= bmpIn.getHeight();
835 nlassert(width>0 && width<=256);
836 nlassert(height>0 && height<=256);
838 // resize bitmap.
839 _Bitmap.resize(width*height);
840 _WidthPower= getPowerOf2(width);
841 // compute shift
842 _UShift= 8-getPowerOf2(width);
843 _VShift= 8-getPowerOf2(height);
845 // convert the bitmap.
846 CBitmap bmp= bmpIn;
847 bmp.convertToType(CBitmap::RGBA);
848 CRGBA *src= (CRGBA*)&bmp.getPixels()[0];
849 CRGBA *dst= _Bitmap.getPtr();
850 for(sint nPix= width*height;nPix>0;nPix--, src++, dst++)
852 *dst= *src;
856 // ***************************************************************************
857 void CLodCharacterTmpBitmap::build(CRGBA col)
859 // setup a 1*1 bitmap and set it with col
860 reset();
861 _Bitmap[0]= col;
865 // ***************************************************************************
866 void CLodCharacterManager::initInstance(CLodCharacterInstance &instance)
868 // first release in (maybe) other manager.
869 if(instance._Owner)
870 instance._Owner->releaseInstance(instance);
872 // get the shape
873 const CLodCharacterShape *clod= getShape(instance.ShapeId);
874 // if not found quit
875 if(!clod)
876 return;
877 // get Uvs.
878 const CUV *uvSrc= clod->getUVs();
879 nlassert(uvSrc);
882 // Ok, init header
883 instance._Owner= this;
884 instance._UVs.resize(clod->getNumVertices());
886 // allocate an id. If cannot, then fill Uvs with 0 => filled with Black. (see endTextureCompute() why).
887 if(_FreeIds.empty())
889 // set a "Not enough memory" id
890 instance._TextureId= NL3D_CLOD_TEXT_NUM_IDS;
891 CUV uv(0,0);
892 fill(instance._UVs.begin(), instance._UVs.end(), uv);
894 // else OK, can instanciate the Uvs.
895 else
897 // get the id.
898 instance._TextureId= _FreeIds.back();
899 _FreeIds.pop_back();
900 // get the x/y.
901 uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
902 uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
903 // compute the scale/bias to apply to Uvs.
904 float scaleU= 1.0f / NL3D_CLOD_TEXT_NLOD_WIDTH;
905 float scaleV= 1.0f / NL3D_CLOD_TEXT_NLOD_HEIGHT;
906 float biasU= (float)xId / NL3D_CLOD_TEXT_NLOD_WIDTH;
907 float biasV= (float)yId / NL3D_CLOD_TEXT_NLOD_HEIGHT;
908 // apply it to each UVs.
909 CUV *uvDst= &instance._UVs[0];
910 for(uint i=0; i<instance._UVs.size();i++)
912 uvDst[i].U= biasU + uvSrc[i].U*scaleU;
913 uvDst[i].V= biasV + uvSrc[i].V*scaleV;
918 // ***************************************************************************
919 void CLodCharacterManager::releaseInstance(CLodCharacterInstance &instance)
921 if(instance._Owner==NULL)
922 return;
923 nlassert(this==instance._Owner);
925 // if the id is not a "Not enough memory" id, release it.
926 if(instance._TextureId>=0 && instance._TextureId<NL3D_CLOD_TEXT_NUM_IDS)
927 _FreeIds.push_back(instance._TextureId);
929 // reset the instance
930 instance._Owner= NULL;
931 instance._TextureId= -1;
932 contReset(instance._UVs);
936 // ***************************************************************************
937 CRGBA *CLodCharacterManager::getTextureInstance(CLodCharacterInstance &instance)
939 nlassert(instance._Owner==this);
940 nlassert(instance._TextureId!=-1);
941 // if the texture id is a "not enough memory", quit.
942 if(instance._TextureId==NL3D_CLOD_TEXT_NUM_IDS)
943 return NULL;
945 // get the x/y.
946 uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
947 uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
949 // get the ptr on the correct pixel.
950 CRGBA *pix= (CRGBA*)&_BigTexture->getPixels(0)[0];
951 return pix + yId*NL3D_CLOD_TEXT_HEIGHT*NL3D_CLOD_BIGTEXT_WIDTH + xId*NL3D_CLOD_TEXT_WIDTH;
955 // ***************************************************************************
956 bool CLodCharacterManager::startTextureCompute(CLodCharacterInstance &instance)
958 CRGBA *dst= getTextureInstance(instance);
959 if(!dst)
960 return false;
962 // erase the texture with 0,0,0,255. Alpha is actually the min "Quality" part of the CTUVQ.
963 CRGBA col= NL3D_CLOD_DEFAULT_TEXCOLOR;
964 for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
966 // erase the line
967 for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
968 dst[x]= col;
969 // Next line
970 dst+= NL3D_CLOD_BIGTEXT_WIDTH;
973 return true;
976 // ***************************************************************************
977 void CLodCharacterManager::addTextureCompute(CLodCharacterInstance &instance, const CLodCharacterTexture &lodTexture)
979 CRGBA *dst= getTextureInstance(instance);
980 if(!dst)
981 return;
983 // get lookup ptr.
984 nlassert(lodTexture.Texture.size()==NL3D_CLOD_TEXT_SIZE);
985 if (lodTexture.Texture.size() < NL3D_CLOD_TEXT_SIZE)
986 return;
988 const CLodCharacterTexture::CTUVQ *lookUpPtr= &lodTexture.Texture[0];
990 // apply the lodTexture, taking only better quality (ie nearer 0)
991 for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
993 // erase the line
994 for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
996 CLodCharacterTexture::CTUVQ lut= *lookUpPtr;
997 // if this quality is better than the one stored
998 if(lut.Q<dst[x].A)
1000 // get what texture to read, and read the pixel.
1001 CRGBA col= _TmpBitmaps[lut.T].getPixel(lut.U, lut.V);
1002 // set quality.
1003 col.A= lut.Q;
1004 // set in dest
1005 dst[x]= col;
1008 // next lookup
1009 lookUpPtr++;
1011 // Next line
1012 dst+= NL3D_CLOD_BIGTEXT_WIDTH;
1016 // ***************************************************************************
1017 void CLodCharacterManager::endTextureCompute(CLodCharacterInstance &instance, uint numBmpToReset)
1019 CRGBA *dst= getTextureInstance(instance);
1020 if(!dst)
1021 return;
1023 // reset All Alpha values to 255 => no AlphaTest problems
1024 for(uint y=0;y<NL3D_CLOD_TEXT_HEIGHT;y++)
1026 // erase the line
1027 for(uint x=0;x<NL3D_CLOD_TEXT_WIDTH;x++)
1029 dst[x].A= 255;
1031 // Next line
1032 dst+= NL3D_CLOD_BIGTEXT_WIDTH;
1035 // If the id == 0 then must reset the 0,0 Pixel to black. for the "Not Enough memory" case in initInstance().
1036 if(instance._TextureId==0)
1037 *(CRGBA*)&_BigTexture->getPixels(0)[0]= NL3D_CLOD_DEFAULT_TEXCOLOR;
1039 // get the x/y.
1040 uint xId= instance._TextureId % NL3D_CLOD_TEXT_NLOD_WIDTH;
1041 uint yId= instance._TextureId / NL3D_CLOD_TEXT_NLOD_WIDTH;
1042 // touch the texture for Driver update.
1043 _BigTexture->touchRect(
1044 CRect(xId*NL3D_CLOD_TEXT_WIDTH, yId*NL3D_CLOD_TEXT_HEIGHT, NL3D_CLOD_TEXT_WIDTH, NL3D_CLOD_TEXT_HEIGHT) );
1046 // reset tmpBitmaps / free memory.
1047 for(uint i=0; i<numBmpToReset; i++)
1049 _TmpBitmaps[i].reset();
1052 // TestYoyo
1053 /*NLMISC::COFile f("tam.tga");
1054 _BigTexture->writeTGA(f,32);*/
1058 // ***************************************************************************
1059 bool CLodCharacterManager::fastIntersect(const CLodCharacterInstance &instance, const NLMISC::CMatrix &toRaySpace, float &dist2D, float &distZ, bool computeDist2D)
1061 H_AUTO ( NL3D_CharacterLod_fastIntersect )
1063 uint numVertices;
1064 const CLodCharacterShape::CVector3s *vertPtr;
1065 CVector matPos;
1066 float a00, a01, a02;
1067 float a10, a11, a12;
1068 float a20, a21, a22;
1071 // Get the Shape and current key.
1072 //=============
1074 // get the shape
1075 const CLodCharacterShape *clod= getShape(instance.ShapeId);
1076 // if not found quit
1077 if(!clod)
1078 return false;
1080 // get the anim key
1081 CVector unPackScaleFactor;
1082 vertPtr= clod->getAnimKey(instance.AnimId, instance.AnimTime, instance.WrapMode, unPackScaleFactor);
1083 // if not found quit
1084 if(!vertPtr)
1085 return false;
1086 // get num verts
1087 numVertices= clod->getNumVertices();
1089 // empty shape??
1090 if(numVertices==0)
1091 return false;
1093 // Prepare Transform
1094 //=============
1096 // Get matrix pos.
1097 matPos= toRaySpace.getPos();
1098 // Get rotation line vectors
1099 const float *rayM= toRaySpace.get();
1100 a00= rayM[0]; a01= rayM[4]; a02= rayM[8];
1101 a10= rayM[1]; a11= rayM[5]; a12= rayM[9];
1102 a20= rayM[2]; a21= rayM[6]; a22= rayM[10];
1104 // multiply matrix with scale factor for Pos.
1105 a00*= unPackScaleFactor.x; a01*= unPackScaleFactor.y; a02*= unPackScaleFactor.z;
1106 a10*= unPackScaleFactor.x; a11*= unPackScaleFactor.y; a12*= unPackScaleFactor.z;
1107 a20*= unPackScaleFactor.x; a21*= unPackScaleFactor.y; a22*= unPackScaleFactor.z;
1109 // get dst Array.
1110 // enlarge temp buffer
1111 static std::vector<CVector> lodInRaySpace;
1112 if(numVertices>lodInRaySpace.size())
1113 lodInRaySpace.resize(numVertices);
1114 CVector *dstPtr= &lodInRaySpace[0];
1117 // Fill the temp skin
1118 //=============
1120 CVector fVect;
1122 for(;numVertices>0;)
1124 // transform vertex, and store.
1125 fVect.x= vertPtr->x; fVect.y= vertPtr->y; fVect.z= vertPtr->z;
1126 ++vertPtr;
1127 dstPtr->x= a00 * fVect.x + a01 * fVect.y + a02 * fVect.z + matPos.x;
1128 dstPtr->y= a10 * fVect.x + a11 * fVect.y + a12 * fVect.z + matPos.y;
1129 dstPtr->z= a20 * fVect.x + a21 * fVect.y + a22 * fVect.z + matPos.z;
1131 // next
1132 dstPtr++;
1133 numVertices--;
1137 // Test intersection
1138 //=============
1140 return CRayMesh::getRayIntersection(lodInRaySpace, clod->getTriangleIndices(), dist2D, distZ, computeDist2D);
1144 } // NL3D