Merge branch 'main/rendor-staging' into fixes
[ryzomcore.git] / nel / src / 3d / texture_far.cpp
blob594a7913115525ecba5337b0d89433fa2c7674b5
1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Affero General Public License as
6 // published by the Free Software Foundation, either version 3 of the
7 // License, or (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Affero General Public License for more details.
14 // You should have received a copy of the GNU Affero General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #include "std3d.h"
19 #include "nel/3d/texture_far.h"
20 #include "nel/3d/tile_far_bank.h"
21 #include "nel/3d/patch.h"
22 #include "nel/3d/tile_color.h"
23 #include "nel/3d/zone.h"
24 #include "nel/3d/landscape.h"
25 #include "nel/misc/system_info.h"
28 using namespace NLMISC;
29 using namespace NL3D;
30 using namespace std;
32 #ifdef DEBUG_NEW
33 #define new DEBUG_NEW
34 #endif
36 namespace NL3D {
38 CRGBA CTextureFar::_LightmapExpanded[NL_NUM_PIXELS_ON_FAR_TILE_EDGE*NL_MAX_TILES_BY_PATCH_EDGE*NL_NUM_PIXELS_ON_FAR_TILE_EDGE*NL_MAX_TILES_BY_PATCH_EDGE];
39 uint8 CTextureFar::_LumelExpanded[(NL_MAX_TILES_BY_PATCH_EDGE*NL_LUMEL_BY_TILE+1)*(NL_MAX_TILES_BY_PATCH_EDGE*NL_LUMEL_BY_TILE+1)];
40 CRGBA CTextureFar::_TileTLIColors[(NL_MAX_TILES_BY_PATCH_EDGE+1)*(NL_MAX_TILES_BY_PATCH_EDGE+1)];
42 // ***************************************************************************
43 CTextureFar::CTextureFar()
45 /* NB: define Values work only if NL_MAX_TILES_BY_PATCH_EDGE is 16.
46 Else must change NL_MAX_FAR_EDGE and NL_NUM_RECTANGLE_RATIO
48 nlctassert(NL_MAX_TILES_BY_PATCH_EDGE==16);
50 // This texture is releasable. It doesn't stays in standard memory after been uploaded into video memory.
51 setReleasable (true);
53 // Init upload format 16 bits
54 setUploadFormat(RGB565);
56 // Set filter mode. No mipmap!
57 setFilterMode (Linear, LinearMipMapOff);
59 // Wrap
60 setWrapS (Clamp);
61 setWrapT (Clamp);
63 // init update Lighting
64 _ULPrec= this;
65 _ULNext= this;
67 // Start With All Patch of Max Far (64x64) Frees!
68 uint freeListId= getFreeListId(NL_MAX_FAR_PATCH_EDGE, NL_MAX_FAR_PATCH_EDGE);
69 for(uint i=0;i<NL_NUM_FAR_BIGGEST_PATCH_PER_EDGE;i++)
71 for(uint j=0;j<NL_NUM_FAR_BIGGEST_PATCH_PER_EDGE;j++)
73 CVector2s pos;
74 pos.x= i*NL_MAX_FAR_PATCH_EDGE;
75 pos.y= j*NL_MAX_FAR_PATCH_EDGE;
77 // add this place to the free list.
78 _FreeSpaces[freeListId].push_back(pos);
82 // reset
83 _ItULPatch= _PatchToPosMap.end();
86 // ***************************************************************************
87 CTextureFar::~CTextureFar()
89 // verify the textureFar is correctly unlinked from any ciruclar list.
90 nlassert(_ULPrec==this && _ULNext==this);
94 // ***************************************************************************
95 void CTextureFar::linkBeforeUL(CTextureFar *textNext)
97 nlassert(textNext);
99 // first, unlink others from me. NB: works even if _ULPrec==_ULNext==this.
100 _ULNext->_ULPrec= _ULPrec;
101 _ULPrec->_ULNext= _ULNext;
102 // link to igNext.
103 _ULNext= textNext;
104 _ULPrec= textNext->_ULPrec;
105 // link others to me.
106 _ULNext->_ULPrec= this;
107 _ULPrec->_ULNext= this;
110 // ***************************************************************************
111 void CTextureFar::unlinkUL()
113 // first, unlink others from me. NB: works even if _ULPrec==_ULNext==this.
114 _ULNext->_ULPrec= _ULPrec;
115 _ULPrec->_ULNext= _ULNext;
116 // reset
117 _ULPrec= this;
118 _ULNext= this;
122 // ***************************************************************************
123 uint CTextureFar::getFreeListId(uint width, uint height)
125 nlassert(width>=height);
126 nlassert(isPowerOf2(width));
127 nlassert(isPowerOf2(height));
128 nlassert(width<=NL_MAX_FAR_PATCH_EDGE);
130 // compute the level index
131 uint sizeIndex= getPowerOf2(NL_MAX_FAR_PATCH_EDGE / width);
132 nlassert(sizeIndex < NL_NUM_FAR_PATCH_EDGE_LEVEL);
134 // Compute the aspect ratio index.
135 uint aspectRatioIndex= getPowerOf2(width/height);
136 nlassert(aspectRatioIndex < NL_NUM_FAR_RECTANGLE_RATIO );
138 return sizeIndex*NL_NUM_FAR_RECTANGLE_RATIO + aspectRatioIndex;
142 // ***************************************************************************
143 bool CTextureFar::getUpperSize(uint &width, uint &height)
145 nlassert(width>=height);
146 nlassert(isPowerOf2(width));
147 nlassert(isPowerOf2(height));
149 // if height is smaller than widht, then reduce the ratio
150 if(width>height)
152 height*= 2;
153 return true;
155 else
157 // else raise up to the next square level, if possible
158 if(width<NL_MAX_FAR_PATCH_EDGE)
160 width*= 2;
161 height*= 2;
162 return true;
164 else
165 return false;
170 // ***************************************************************************
171 sint CTextureFar::tryAllocatePatch (CPatch *pPatch, uint farIndex)
173 // get the size of the subtexture to allocate
174 uint width=(pPatch->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
175 uint height=(pPatch->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
177 // make width the biggest
178 if(width<height)
179 std::swap(width, height);
181 // get where to find a subtexture
182 uint freeListId= getFreeListId(width, height);
184 // if some place, ok!
185 if(!_FreeSpaces[freeListId].empty())
186 return 0;
187 else
189 // try to get the next size
190 while( getUpperSize(width, height) )
192 freeListId= getFreeListId(width, height);
193 // if some subtexture free
194 if(!_FreeSpaces[freeListId].empty())
196 // Ok! return the size of this texture we must split
197 return width*height;
201 // fail => no more space => -1
202 return -1;
206 // ***************************************************************************
207 void CTextureFar::recursSplitNext(uint wson, uint hson)
209 // get the upper subTexture
210 uint wup= wson, hup= hson;
211 nlverify( getUpperSize(wup, hup) );
213 // get the list id.
214 uint fatherListId= getFreeListId(wup, hup);
216 // if must split bigger patch...
217 if(_FreeSpaces[fatherListId].empty())
219 // recurs, try to get a bigger subtexture and split it.
220 recursSplitNext(wup, hup);
223 // OK, now we should have a free entry.
224 nlassert( !_FreeSpaces[fatherListId].empty() );
226 // remove from free list, because it is split now!
227 CVector2s fatherPos= _FreeSpaces[fatherListId].front();
228 _FreeSpaces[fatherListId].pop_front();
230 // Create New free rectangles for sons
231 uint sonListId= getFreeListId(wson, hson);
232 CVector2s sonPos;
234 // if my son is a rectangle son
235 if(wson>hson)
237 // Then Add 2 free Spaces!
238 sonPos.x= fatherPos.x;
239 // 1st.
240 sonPos.y= fatherPos.y;
241 _FreeSpaces[sonListId].push_back(sonPos);
242 // 2nd.
243 sonPos.y= fatherPos.y+hson;
244 _FreeSpaces[sonListId].push_back(sonPos);
246 else
248 // Then Add 4 free Spaces!
249 // 1st.
250 sonPos.x= fatherPos.x;
251 sonPos.y= fatherPos.y;
252 _FreeSpaces[sonListId].push_back(sonPos);
253 // 2nd.
254 sonPos.x= fatherPos.x+wson;
255 sonPos.y= fatherPos.y;
256 _FreeSpaces[sonListId].push_back(sonPos);
257 // 3rd.
258 sonPos.x= fatherPos.x;
259 sonPos.y= fatherPos.y+hson;
260 _FreeSpaces[sonListId].push_back(sonPos);
261 // 4th.
262 sonPos.x= fatherPos.x+wson;
263 sonPos.y= fatherPos.y+hson;
264 _FreeSpaces[sonListId].push_back(sonPos);
270 // ***************************************************************************
271 void CTextureFar::allocatePatch (CPatch *pPatch, uint farIndex, float& farUScale, float& farVScale, float& farUBias, float& farVBias, bool& bRot)
273 // get the size of the subtexture to allocate
274 uint width=(pPatch->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
275 uint height=(pPatch->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
277 // make width the biggest
278 if(width<height)
279 std::swap(width, height);
281 // get where to find a subtexture
282 uint freeListId= getFreeListId(width, height);
284 // if free list is empty, must split bigger patch...
285 if(_FreeSpaces[freeListId].empty())
287 // try to get a bigger subtexture and split it.
288 recursSplitNext(width, height);
291 // now the list should have som free space.
292 nlassert( !_FreeSpaces[freeListId].empty() );
293 CVector2s pos= _FreeSpaces[freeListId].front();
295 // Allocate. Add this entry to the maps
296 CPatchIdent pid;
297 pid.Patch= pPatch;
298 pid.FarIndex= farIndex;
299 // verify not already here.
300 nlassert( _PatchToPosMap.find(pid) == _PatchToPosMap.end() );
301 _PatchToPosMap[pid]= pos;
302 _PosToPatchMap[pos]= pid;
304 // remove from free list.
305 _FreeSpaces[freeListId].pop_front();
307 // Invalidate the rectangle
308 CRect rect (pos.x, pos.y, width, height);
309 ITexture::touchRect (rect);
311 // ** Return some values
313 // Rotation flag
314 bRot = ( pPatch->getOrderS() < pPatch->getOrderT() );
316 // Scale is the same for all
317 farUScale=(float)(width-1)/(float)NL_FAR_TEXTURE_EDGE_SIZE;
318 farVScale=(float)(height-1)/(float)NL_FAR_TEXTURE_EDGE_SIZE;
320 // UBias is the same for all
321 farUBias=((float)pos.x+0.5f)/(float)NL_FAR_TEXTURE_EDGE_SIZE;
323 // UBias is the same for all
324 farVBias=((float)pos.y+0.5f)/(float)NL_FAR_TEXTURE_EDGE_SIZE;
328 // ***************************************************************************
329 // Remove a patch in the CTexture Patch
330 void CTextureFar::removePatch (CPatch *pPatch, uint farIndex)
332 // must be found
333 CPatchIdent pid;
334 pid.Patch= pPatch;
335 pid.FarIndex= farIndex;
336 TPatchToPosMap::iterator it= _PatchToPosMap.find(pid);
337 nlassert( it != _PatchToPosMap.end() );
339 // get the pos where this patch texture is stored
340 CVector2s pos= it->second;
342 // If I erase the patch wihch must next UL, then update UL
343 if( it == _ItULPatch )
344 _ItULPatch++;
346 // erase from the 1st map
347 _PatchToPosMap.erase(it);
349 // erase from the second map
350 _PosToPatchMap.erase(pos);
352 // Append to the free list.
353 uint width=(pPatch->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
354 uint height=(pPatch->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
355 if(width<height)
356 std::swap(width, height);
357 uint freeListId= getFreeListId(width, height);
358 _FreeSpaces[freeListId].push_back(pos);
362 // ***************************************************************************
363 uint CTextureFar::touchPatchULAndNext()
365 // if there is still a patch here
366 if( _ItULPatch!=_PatchToPosMap.end() )
368 // Position of the invalide rectangle
369 int x = _ItULPatch->second.x;
370 int y = _ItULPatch->second.y;
371 uint farIndex= _ItULPatch->first.FarIndex;
372 CPatch *pPatch= _ItULPatch->first.Patch;
374 // recompute the correct size.
375 uint width=(pPatch->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
376 uint height=(pPatch->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(farIndex-1);
377 if(width<height)
378 std::swap(width, height);
380 // Invalidate the associated rectangle
381 CRect rect (x, y, width, height);
382 ITexture::touchRect (rect);
384 // Go next.
385 _ItULPatch++;
387 // return number of pixels touched
388 return width * height;
390 else
392 // no touch
393 return 0;
398 // ***************************************************************************
399 void CTextureFar::startPatchULTouch()
401 _ItULPatch= _PatchToPosMap.begin();
405 // ***************************************************************************
406 bool CTextureFar::endPatchULTouch() const
408 return _ItULPatch == _PatchToPosMap.end();
413 // ***************************************************************************
414 // Generate the texture. See ITexture::doGenerate().
415 void CTextureFar::doGenerate (bool async)
417 // Resize. But don't need to fill with 0!!
418 CBitmap::resize (NL_FAR_TEXTURE_EDGE_SIZE, NL_FAR_TEXTURE_EDGE_SIZE, RGBA, false);
420 // Rectangle invalidate ?
421 if (_ListInvalidRect.begin()!=_ListInvalidRect.end())
423 // Yes, rebuild only those rectangles.
425 // For each rectangle to compute
426 std::list<NLMISC::CRect>::iterator ite=_ListInvalidRect.begin();
427 while (ite!=_ListInvalidRect.end())
429 // Get the PatchIdent.
430 CVector2s pos((uint16)ite->left(), (uint16)ite->top());
431 TPosToPatchMap::iterator itPosToPid= _PosToPatchMap.find( pos );
432 // If the patch is still here...
433 if( itPosToPid!=_PosToPatchMap.end() )
435 // ReBuild the rectangle.
436 rebuildPatch (pos, itPosToPid->second);
439 // Next rectangle
440 ite++;
443 else
445 // Parse all existing Patchs.
446 TPosToPatchMap::iterator itPosToPid= _PosToPatchMap.begin();
447 while( itPosToPid!= _PosToPatchMap.end() )
449 // ReBuild the rectangle.
450 rebuildPatch (itPosToPid->first, itPosToPid->second);
452 itPosToPid++;
458 // ***************************************************************************
459 // Rebuild the rectangle passed with coordinate passed in parameter
460 void CTextureFar::rebuildPatch (const CVector2s texturePos, const CPatchIdent &pid)
462 uint x= texturePos.x;
463 uint y= texturePos.y;
465 // Patch pointer
466 CPatch* patch= pid.Patch;
468 // Check it exists
469 nlassert (patch);
471 // get the order
472 uint nS=patch->getOrderS();
473 uint nT=patch->getOrderT();
475 // get the size of the texture to compute
476 uint subTextWidth=(nS*NL_NUM_PIXELS_ON_FAR_TILE_EDGE)>>(pid.FarIndex-1);
478 // Check it is a 16 bits texture
479 nlassert (getPixelFormat()==RGBA);
481 // Check pixels exist
482 nlassert (getPixels().size()!=0);
484 // Base offset of the first pixel of the patch's texture
485 uint nBaseOffset;
487 // Delta to add to the destination offset when walk for a pixel to the right in the source tile
488 sint dstDeltaX;
490 // Delta to add to the destination offset when walk for a pixel to the bottom in the source tile
491 sint dstDeltaY;
493 // larger than higher (regular)
494 if (nS>=nT)
496 // Regular offset, top left
497 nBaseOffset= x + y*_Width;
499 // Regular deltaX, to the right
500 dstDeltaX=1;
502 // Regular deltaY, to the bottom
503 dstDeltaY=_Width;
505 // higher than larger (goofy), the patch is stored with a rotation of 1 (to the left of course)
506 else
508 // Goofy offset, bottom left
509 nBaseOffset= x + y*_Width;
510 nBaseOffset+=(subTextWidth-1)*_Width;
512 // Goofy deltaX, to the top
513 dstDeltaX=-(sint)_Width;
515 // Goofy deltaY, to the right
516 dstDeltaY=1;
519 // Compute the order of the patch
520 CTileFarBank::TFarOrder orderX=CTileFarBank::order0;
521 uint tileSize=0;
522 switch (pid.FarIndex)
524 case 3:
525 // Ratio 1:4
526 orderX=CTileFarBank::order2;
527 tileSize=NL_NUM_PIXELS_ON_FAR_TILE_EDGE>>2;
528 break;
529 case 2:
530 // Ratio 1:2
531 orderX=CTileFarBank::order1;
532 tileSize=NL_NUM_PIXELS_ON_FAR_TILE_EDGE>>1;
533 break;
534 case 1:
535 // Ratio 1:1
536 orderX=CTileFarBank::order0;
537 tileSize=NL_NUM_PIXELS_ON_FAR_TILE_EDGE;
538 break;
539 default:
540 // no!: must be one of the previous values
541 nlassert (0);
544 // Must have a far tile bank pointer set in the CFarTexture
545 nlassert (_Bank);
547 // For all the tiles in the textures
548 sint nTileInPatch=0;
550 // ** Fill the struct for the tile fill method for each layers
551 NL3D_CComputeTileFar TileFar;
552 TileFar.SrcDiffusePixels = NULL;
553 TileFar.SrcAdditivePixels = NULL;
554 TileFar.SrcDeltaX = 0;
555 TileFar.SrcDeltaY = 0;
556 TileFar.AsmMMX= false;
557 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
558 TileFar.AsmMMX= NLMISC::CSystemInfo::hasMMX();
559 #endif
561 // Destination pointer
563 // Destination delta
564 TileFar.DstDeltaX=dstDeltaX;
565 TileFar.DstDeltaY=dstDeltaY;
567 // ** Build expand lightmap..
568 NL3D_CExpandLightmap lightMap;
570 // Fill the structure
571 lightMap.MulFactor=tileSize;
572 lightMap.ColorTile=&patch->TileColors[0];
573 lightMap.Width=nS+1;
574 lightMap.Height=nT+1;
575 lightMap.StaticLightColor=patch->getZone()->getLandscape()->getStaticLight();
576 lightMap.DstPixels=_LightmapExpanded;
577 // Compute current TLI colors.
578 patch->computeCurrentTLILightmapDiv2(_TileTLIColors);
579 lightMap.TLIColor= _TileTLIColors;
581 // Expand the shadowmap
582 patch->unpackShadowMap (_LumelExpanded);
583 lightMap.LumelTile=_LumelExpanded;
585 // Expand the patch lightmap now
586 NL3D_expandLightmap (&lightMap);
588 // DeltaY for lightmap
589 TileFar.SrcLightingDeltaY=nS*tileSize;
591 // Base Dst pointer on the tile line
592 uint nBaseDstTileLine=nBaseOffset;
593 for (uint t=0; t<nT; t++)
595 // Base Dst pointer on the tile
596 uint nBaseDstTilePixels=nBaseDstTileLine;
598 // For each tile of the line
599 for (uint s=0; s<nS; s++)
601 // Base pointer of the destination texture
602 TileFar.DstPixels=(CRGBA*)&(getPixels()[0])+nBaseDstTilePixels;
604 // Lightmap pointer
605 TileFar.SrcLightingPixels=_LightmapExpanded+(s*tileSize)+(t*nS*tileSize*tileSize);
607 // For each layer of the tile
608 for (sint l=0; l<3; l++)
610 // Use of additive in this layer ?
611 bool bAdditive=false;
613 // Size of the edge far tile
614 TileFar.Size=tileSize;
616 // Get a tile element reference for this tile.
617 const CTileElement &tileElm=patch->Tiles[nTileInPatch];
619 // Check for 256 tiles...
620 bool is256x256;
621 uint8 uvOff;
622 tileElm.getTile256Info(is256x256, uvOff);
624 // Get the tile number
625 sint tile=tileElm.Tile[l];
627 // Is the last layer ?
628 bool lastLayer = ( (l == 2) || (tileElm.Tile[l+1] == NL_TILE_ELM_LAYER_EMPTY) );
630 // Is an non-empty layer ?
631 if (tile!=NL_TILE_ELM_LAYER_EMPTY)
633 // Get the read only pointer on the far tile
634 const CTileFarBank::CTileFar* pTile=_Bank->getTile (tile);
636 // This pointer must not be null, else the farBank is not valid!
637 if (pTile==NULL)
638 nlwarning ("FarBank is not valid!");
640 // If the tile exist
641 if (pTile)
643 // Tile exist ?
644 if (pTile->isFill (CTileFarBank::diffuse))
646 // Get rotation of the tile in this layer
647 sint nRot=tileElm.getTileOrient(l);
649 // Source pointer
650 const CRGBA* pSrcDiffusePixels=pTile->getPixels (CTileFarBank::diffuse, orderX);
651 const CRGBA* pSrcAdditivePixels=NULL;
653 // Additive ?
654 if (pTile->isFill (CTileFarBank::additive))
656 // Use it
657 bAdditive=true;
659 // Get additive pointer
660 pSrcAdditivePixels=pTile->getPixels (CTileFarBank::additive, orderX);
663 // Source size
664 sint sourceSize;
666 // Source offset (for 256)
667 uint sourceOffset=0;
669 // 256 ?
670 if (is256x256)
672 // On the left ?
673 if (uvOff&0x02)
674 sourceOffset+=tileSize;
676 // On the bottom ?
677 if ((uvOff==1)||(uvOff==2))
678 sourceOffset+=2*tileSize*tileSize;
680 // Yes, 256
681 sourceSize=tileSize<<1;
683 else
685 // No, 128
686 sourceSize=tileSize;
689 // Compute offset and deltas
690 switch (nRot)
692 case 0:
693 // Source pointers
694 TileFar.SrcDiffusePixels=pSrcDiffusePixels+sourceOffset;
695 TileFar.SrcAdditivePixels=pSrcAdditivePixels+sourceOffset;
697 // Source delta
698 TileFar.SrcDeltaX=1;
699 TileFar.SrcDeltaY=sourceSize;
700 break;
701 case 1:
703 // Source pointers
704 uint newOffset=sourceOffset+(tileSize-1);
705 TileFar.SrcDiffusePixels=pSrcDiffusePixels+newOffset;
706 TileFar.SrcAdditivePixels=pSrcAdditivePixels+newOffset;
708 // Source delta
709 TileFar.SrcDeltaX=sourceSize;
710 TileFar.SrcDeltaY=-1;
712 break;
713 case 2:
715 // Destination pointer
716 uint newOffset=sourceOffset+(tileSize-1)*sourceSize+tileSize-1;
717 TileFar.SrcDiffusePixels=pSrcDiffusePixels+newOffset;
718 TileFar.SrcAdditivePixels=pSrcAdditivePixels+newOffset;
720 // Source delta
721 TileFar.SrcDeltaX=-1;
722 TileFar.SrcDeltaY=-sourceSize;
724 break;
725 case 3:
727 // Destination pointer
728 uint newOffset=sourceOffset+(tileSize-1)*sourceSize;
729 TileFar.SrcDiffusePixels=pSrcDiffusePixels+newOffset;
730 TileFar.SrcAdditivePixels=pSrcAdditivePixels+newOffset;
732 // Source delta
733 TileFar.SrcDeltaX=-sourceSize;
734 TileFar.SrcDeltaY=1;
736 break;
739 // *** Draw the layer
741 // Alpha layer ?
742 if (l>0)
744 // Additive layer ?
745 if (bAdditive && lastLayer)
746 NL3D_drawFarTileInFarTextureAdditiveAlpha (&TileFar);
747 else // No additive layer
748 NL3D_drawFarTileInFarTextureAlpha (&TileFar);
750 else // no alpha
752 // Additive layer ?
753 if (bAdditive && lastLayer)
754 NL3D_drawFarTileInFarTextureAdditive (&TileFar);
755 else // No additive layer
756 NL3D_drawFarTileInFarTexture (&TileFar);
761 else
762 // Stop, no more layer
763 break;
766 // Next tile
767 nTileInPatch++;
769 // Next tile on the line
770 nBaseDstTilePixels+=dstDeltaX*tileSize;
773 // Next line of tiles
774 nBaseDstTileLine+=dstDeltaY*tileSize;
779 } // NL3D
782 // ***************************************************************************
783 // ***************************************************************************
784 // NL3D_ExpandLightmap. C and Asm Part
785 // ***************************************************************************
786 // ***************************************************************************
788 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
791 // EMMS called not in __asm block.
792 # pragma warning (disable : 4799)
795 // ***************************************************************************
796 inline void NL3D_asmEndMMX()
798 __asm
800 // close MMX computation
801 emms
806 // ***************************************************************************
807 /** Expand a line of color with MMX.
808 * NB: start to write at pixel 1.
810 inline void NL3D_asmExpandLineColor565(const uint16 *src, CRGBA *dst, uint du, uint len)
812 static uint64 blank = 0;
813 static uint64 cF800 = INT64_CONSTANT (0x0000F8000000F800);
814 static uint64 cE000 = INT64_CONSTANT (0x0000E0000000E000);
815 static uint64 c07E0 = INT64_CONSTANT (0x000007E0000007E0);
816 static uint64 c0600 = INT64_CONSTANT (0x0000060000000600);
817 static uint64 c001F = INT64_CONSTANT (0x0000001F0000001F);
818 static uint64 c001C = INT64_CONSTANT (0x0000001C0000001C);
819 if(len==0)
820 return;
823 // Loop for pix.
824 __asm
826 movq mm7, blank
828 // start at pixel 1 => increment dst, and start u= du
829 mov esi, src
830 mov edi, dst
831 add edi, 4
832 mov ecx, len
833 mov edx, du
835 // Loop
836 myLoop:
839 // Read 565 colors
840 //----------
841 // index u.
842 mov ebx, edx
843 shr ebx, 8
845 // pack the 2 colors in eax: // Hedx= color0, Ledx= color1
846 xor eax, eax // avoid partial stall.
847 mov ax, [esi + ebx*2]
848 shl eax, 16
849 mov ax, [esi + ebx*2 +2]
851 // store and unpack in mm2: Hmm2= color0, Lmm2= color1
852 movd mm2, eax
853 punpcklwd mm2, mm7
855 // reset accumulator mm3 to black
856 movq mm3, mm7
858 // Expand 565 to 888: color0 and color1 in parrallel
859 // R
860 movq mm0, mm2
861 movq mm1, mm2
862 pand mm0, cF800
863 pand mm1, cE000
864 psrld mm0, 8
865 psrld mm1, 13
866 por mm3, mm0
867 por mm3, mm1
868 // G
869 movq mm0, mm2
870 movq mm1, mm2
871 pand mm0, c07E0
872 pand mm1, c0600
873 pslld mm0, 5
874 psrld mm1, 1
875 por mm3, mm0
876 por mm3, mm1
877 // B
878 movq mm0, mm2
879 movq mm1, mm2
880 pand mm0, c001F
881 pand mm1, c001C
882 pslld mm0, 19
883 pslld mm1, 14
884 por mm3, mm0
885 por mm3, mm1
887 // unpack mm3 quad to mm0=color0 and mm1=color1.
888 movq mm0, mm3
889 movq mm1, mm3
890 psrlq mm0, 32
893 // Blend.
894 //----------
895 // blend factors
896 mov ebx, edx
897 mov eax, 256
899 and ebx, 0xFF
900 sub eax, ebx
902 movd mm2, ebx // mm2= factor
903 movd mm3, eax // mm3= 1-factor
904 // replicate to the 4 words.
905 punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
906 punpckldq mm3, mm3 // mm3= 0000 00AA 0000 00AA
907 packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
908 packssdw mm3, mm3 // mm3= 00AA 00AA 00AA 00AA
910 // mul
911 punpcklbw mm0, mm7
912 punpcklbw mm1, mm7
913 pmullw mm0, mm3 // color0*(1-factor)
914 pmullw mm1, mm2 // color1*factor
915 // add, and unpack
916 paddusw mm0, mm1
917 psrlw mm0, 8
918 packuswb mm0, mm0
920 // store
921 movd [edi], mm0
924 // next pix
925 add edx, du
926 add edi, 4
927 dec ecx
928 jnz myLoop
933 // ***************************************************************************
934 /** Expand a line of color with MMX.
935 * NB: start to write at pixel 1.
937 inline void NL3D_asmExpandLineColor8888(const CRGBA *src, CRGBA *dst, uint du, uint len)
939 static uint64 blank = 0;
940 if(len==0)
941 return;
944 // Loop for pix.
945 __asm
947 movq mm7, blank
949 // start at pixel 1 => increment dst, and start u= du
950 mov esi, src
951 mov edi, dst
952 add edi, 4
953 mov ecx, len
954 mov edx, du
956 // Loop
957 myLoop:
960 // Read 8888 colors
961 //----------
962 // index u.
963 mov ebx, edx
964 shr ebx, 8
966 // read the 2 colors: mm0= color0, mm1= color1
967 movd mm0 , [esi + ebx*4]
968 movd mm1 , [esi + ebx*4 + 4]
971 // Blend.
972 //----------
973 // blend factors
974 mov ebx, edx
975 mov eax, 256
977 and ebx, 0xFF
978 sub eax, ebx
980 movd mm2, ebx // mm2= factor
981 movd mm3, eax // mm3= 1-factor
982 // replicate to the 4 words.
983 punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
984 punpckldq mm3, mm3 // mm3= 0000 00AA 0000 00AA
985 packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
986 packssdw mm3, mm3 // mm3= 00AA 00AA 00AA 00AA
988 // mul
989 punpcklbw mm0, mm7
990 punpcklbw mm1, mm7
991 pmullw mm0, mm3 // color0*(1-factor)
992 pmullw mm1, mm2 // color1*factor
993 // add, and unpack
994 paddusw mm0, mm1
995 psrlw mm0, 8
996 packuswb mm0, mm0
998 // store
999 movd [edi], mm0
1002 // next pix
1003 add edx, du
1004 add edi, 4
1005 dec ecx
1006 jnz myLoop
1011 // ***************************************************************************
1012 /** Blend 2 lines of color into one line.
1013 * NB: start at pix 0 here
1015 inline void NL3D_asmBlendLines(CRGBA *dst, const CRGBA *src0, const CRGBA *src1, uint index, uint len)
1017 static uint64 blank = 0;
1018 if(len==0)
1019 return;
1022 // Loop for pix.
1023 __asm
1025 movq mm7, blank
1027 // read the factor and expand it to 4 words.
1028 mov ebx, index
1029 mov eax, 256
1030 and ebx, 0xFF
1031 sub eax, ebx
1032 movd mm2, ebx // mm2= factor
1033 movd mm3, eax // mm3= 1-factor
1034 punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
1035 punpckldq mm3, mm3 // mm3= 0000 00AA 0000 00AA
1036 packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
1037 packssdw mm3, mm3 // mm3= 00AA 00AA 00AA 00AA
1039 // setup ptrs
1040 mov esi, src0
1041 mov edx, src1
1042 sub edx, esi // difference between 2 src
1043 mov edi, dst
1044 mov ecx, len
1046 // Loop
1047 myLoop:
1049 // Read
1050 movd mm0, [esi]
1051 movd mm1, [esi+edx]
1053 // mul
1054 punpcklbw mm0, mm7
1055 punpcklbw mm1, mm7
1056 pmullw mm0, mm3 // color0*(1-factor)
1057 pmullw mm1, mm2 // color1*factor
1058 // add, and unpack
1059 paddusw mm0, mm1
1060 psrlw mm0, 8
1061 packuswb mm0, mm0
1063 // store
1064 movd [edi], mm0
1067 // next pix
1068 add esi, 4
1069 add edi, 4
1070 dec ecx
1071 jnz myLoop
1076 // ***************************************************************************
1077 /** Lightmap Combining for Far level 2 (farthest)
1078 * Average 16 lumels, and deals with UserColor and TLI
1080 static void NL3D_asmAssembleShading1x1(const uint8 *lumels, const CRGBA *colorMap,
1081 const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint lineWidth, uint nbTexel)
1083 static uint64 blank = 0;
1084 if(nbTexel==0)
1085 return;
1087 // local var
1088 uint offsetTLIs= ((uint)srcTLIs-(uint)dst);
1089 uint offsetUSCs= ((uint)srcUSCs-(uint)dst);
1091 // Loop for pix.
1092 __asm
1094 movq mm7, blank
1096 // setup ptrs
1097 mov esi, lumels
1098 mov edi, dst
1099 mov ecx, nbTexel
1101 // Loop
1102 myLoop:
1104 // Average shade part
1105 //------------
1106 mov ebx, colorMap
1107 mov edx, lineWidth
1109 // read and accumulate shade
1110 xor eax,eax // avoid partial stall
1111 // add with line 0
1112 mov al, [esi + 0]
1113 add al, [esi + 1]
1114 adc ah, 0
1115 add al, [esi + 2]
1116 adc ah, 0
1117 add al, [esi + 3]
1118 adc ah, 0
1119 // add with line 1
1120 add al, [esi + edx + 0]
1121 adc ah, 0
1122 add al, [esi + edx + 1]
1123 adc ah, 0
1124 add al, [esi + edx + 2]
1125 adc ah, 0
1126 add al, [esi + edx + 3]
1127 adc ah, 0
1128 // add with line 2
1129 add al, [esi + edx*2 + 0]
1130 adc ah, 0
1131 add al, [esi + edx*2 + 1]
1132 adc ah, 0
1133 add al, [esi + edx*2 + 2]
1134 adc ah, 0
1135 add al, [esi + edx*2 + 3]
1136 adc ah, 0
1137 // add with line 3
1138 lea edx, [edx + edx*2]
1139 add al, [esi + edx + 0]
1140 adc ah, 0
1141 add al, [esi + edx + 1]
1142 adc ah, 0
1143 add al, [esi + edx + 2]
1144 adc ah, 0
1145 add al, [esi + edx + 3]
1146 adc ah, 0
1147 // average
1148 shr eax, 4
1150 // convert to RGBA from the color Map
1151 movd mm0, [ebx + eax*4]
1153 // Assemble part
1154 //------------
1155 mov edx, offsetTLIs
1156 mov ebx, offsetUSCs
1158 // Add with TLI, and clamp.
1159 paddusb mm0, [edi + edx]
1161 // mul with USC
1162 movd mm1, [edi + ebx]
1163 punpcklbw mm0, mm7
1164 punpcklbw mm1, mm7
1165 pmullw mm0, mm1
1166 // unpack
1167 psrlw mm0, 7
1168 packuswb mm0, mm0
1170 // store
1171 movd [edi], mm0
1174 // next pix
1175 add esi, 4 // skip 4 lumels
1176 add edi, 4 // next texel
1177 dec ecx
1178 jnz myLoop
1183 // ***************************************************************************
1184 /** Lightmap Combining for Far level 1 (middle)
1185 * Average 4 lumels, and deals with UserColor and TLI
1187 static void NL3D_asmAssembleShading2x2(const uint8 *lumels, const CRGBA *colorMap,
1188 const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint lineWidth, uint nbTexel)
1190 static uint64 blank = 0;
1191 if(nbTexel==0)
1192 return;
1194 // local var
1195 uint offsetTLIs= ((uint)srcTLIs-(uint)dst);
1196 uint offsetUSCs= ((uint)srcUSCs-(uint)dst);
1198 // Loop for pix.
1199 __asm
1201 movq mm7, blank
1203 // setup ptrs
1204 mov esi, lumels
1205 mov edi, dst
1206 mov ecx, nbTexel
1208 // Loop
1209 myLoop:
1211 // Average shade part
1212 //------------
1213 mov ebx, colorMap
1214 mov edx, lineWidth
1216 // read and accumulate shade
1217 xor eax,eax // avoid partial stall
1218 mov al, [esi] // read lumel
1219 // add with nbors
1220 add al, [esi + 1]
1221 adc ah, 0
1222 add al, [esi + edx]
1223 adc ah, 0
1224 add al, [esi + edx + 1]
1225 adc ah, 0
1226 // average
1227 shr eax, 2
1229 // convert to RGBA from the color Map
1230 movd mm0, [ebx + eax*4]
1232 // Assemble part
1233 //------------
1234 mov edx, offsetTLIs
1235 mov ebx, offsetUSCs
1237 // Add with TLI, and clamp.
1238 paddusb mm0, [edi + edx]
1240 // mul with USC
1241 movd mm1, [edi + ebx]
1242 punpcklbw mm0, mm7
1243 punpcklbw mm1, mm7
1244 pmullw mm0, mm1
1245 // unpack
1246 psrlw mm0, 7
1247 packuswb mm0, mm0
1249 // store
1250 movd [edi], mm0
1253 // next pix
1254 add esi, 2 // skip 2 lumels
1255 add edi, 4 // next texel
1256 dec ecx
1257 jnz myLoop
1262 // ***************************************************************************
1263 # pragma warning (disable : 4731) // frame pointer register 'ebp' modified by inline assembly code
1264 /** Lightmap Combining for Far level 0 (nearest)
1265 * read 1 lumel, and deals with UserColor and TLI
1267 static void NL3D_asmAssembleShading4x4(const uint8 *lumels, const CRGBA *colorMap,
1268 const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint nbTexel)
1270 static uint64 blank = 0;
1271 if(nbTexel==0)
1272 return;
1274 // Loop for pix.
1275 __asm
1277 // Use ebp as a register for faster access...
1278 push ebp
1280 movq mm7, blank
1282 // setup ptrs
1283 mov esi, lumels
1284 mov edi, dst
1285 mov edx, srcTLIs
1286 sub edx, edi // difference src and dest
1287 mov ebx, srcUSCs
1288 sub ebx, edi // difference src and dest
1289 mov ecx, nbTexel
1291 // set ebp after reading locals...
1292 mov ebp, colorMap
1294 // Loop
1295 myLoop:
1297 // read shade RGBA into the color Map
1298 xor eax,eax // avoid partial stall
1299 mov al,[esi] // read lumel
1300 movd mm0, [ebp + eax*4]
1302 // Add with TLI, and clamp.
1303 paddusb mm0, [edi + edx]
1305 // mul with USC
1306 movd mm1, [edi + ebx]
1307 punpcklbw mm0, mm7
1308 punpcklbw mm1, mm7
1309 pmullw mm0, mm1
1310 // unpack
1311 psrlw mm0, 7
1312 packuswb mm0, mm0
1314 // store
1315 movd [edi], mm0
1318 // next pix
1319 add esi, 1 // next lumel
1320 add edi, 4 // next texel
1321 dec ecx
1322 jnz myLoop
1324 // restore
1325 pop ebp
1330 # pragma warning (default : 4731) // frame pointer register 'ebp' modified by inline assembly code
1333 #else // NL_OS_WINDOWS
1335 // Dummy for non-windows platforms
1336 inline void NL3D_asmEndMMX() {}
1337 inline void NL3D_asmExpandLineColor565(const uint16 *src, CRGBA *dst, uint du, uint len) {}
1338 inline void NL3D_asmExpandLineColor8888(const CRGBA *src, CRGBA *dst, uint du, uint len) {}
1339 inline void NL3D_asmBlendLines(CRGBA *dst, const CRGBA *src0, const CRGBA *src1, uint index, uint len) {}
1340 static void NL3D_asmAssembleShading1x1(const uint8 *lumels, const CRGBA *colorMap,
1341 const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint lineWidth, uint nbTexel)
1344 static void NL3D_asmAssembleShading2x2(const uint8 *lumels, const CRGBA *colorMap,
1345 const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint lineWidth, uint nbTexel)
1348 static void NL3D_asmAssembleShading4x4(const uint8 *lumels, const CRGBA *colorMap,
1349 const CRGBA *srcTLIs, const CRGBA *srcUSCs, CRGBA *dst, uint nbTexel)
1353 #endif // NL_OS_WINDOWS
1356 // ***************************************************************************
1357 extern "C" void NL3D_expandLightmap (const NL3D_CExpandLightmap* pLightmap)
1359 bool asmMMX= false;
1360 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
1361 asmMMX= CSystemInfo::hasMMX();
1362 // A CTileColor must be a 565 only.
1363 nlassert(sizeof(CTileColor)==2);
1364 #endif
1366 // Expanded width
1367 uint dstWidth=(pLightmap->Width-1)*pLightmap->MulFactor;
1368 uint dstHeight=(pLightmap->Height-1)*pLightmap->MulFactor;
1370 // *** First expand user color and TLI colors
1371 // First pass, expand on U
1372 static CRGBA expandedUserColorLine[ (NL_MAX_TILES_BY_PATCH_EDGE+1)*
1373 (NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE ];
1374 static CRGBA expandedTLIColorLine[ (NL_MAX_TILES_BY_PATCH_EDGE+1)*
1375 (NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE ];
1376 // Second pass, expand on V.
1377 static CRGBA expandedUserColor[ (NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE *
1378 (NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE ];
1379 static CRGBA expandedTLIColor[ (NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE *
1380 (NL_MAX_TILES_BY_PATCH_EDGE+1)*NL_LUMEL_BY_TILE ];
1383 // ** Expand on U
1384 //=========
1385 uint u, v;
1387 // Expansion factor
1388 uint expandFactor=((pLightmap->Width-1)<<8)/(dstWidth-1);
1390 // Destination pointer
1391 CRGBA *expandedUserColorLinePtr= expandedUserColorLine;
1392 CRGBA *expandedTLIColorLinePtr= expandedTLIColorLine;
1394 // Source pointer
1395 const NL3D::CTileColor *colorTilePtr=pLightmap->ColorTile;
1396 const NLMISC::CRGBA *colorTLIPtr= pLightmap->TLIColor;
1398 // Go for U
1399 for (v=0; v<pLightmap->Height; v++)
1401 // First pixel
1402 expandedUserColorLinePtr[0].set565 (colorTilePtr[0].Color565);
1403 expandedTLIColorLinePtr[0]= colorTLIPtr[0];
1405 // MMX implementation.
1406 //-------------
1407 if(asmMMX)
1409 NL3D_asmExpandLineColor565(&colorTilePtr->Color565, expandedUserColorLinePtr, expandFactor, dstWidth-2);
1410 NL3D_asmExpandLineColor8888(colorTLIPtr, expandedTLIColorLinePtr, expandFactor, dstWidth-2);
1412 // C implementation
1413 //-------------
1414 else
1416 // Index next pixel
1417 uint srcIndexPixel=expandFactor;
1419 for (u=1; u<dstWidth-1; u++)
1421 // Check
1422 nlassert ( (u+v*dstWidth) < (sizeof(expandedUserColorLine)/sizeof(CRGBA)) );
1424 // Color index
1425 uint srcIndex=srcIndexPixel>>8;
1426 //nlassert (srcIndex>=0); // uint => always >= 0
1427 nlassert (srcIndex<pLightmap->Width-1);
1429 // Compute current color
1430 CRGBA color0;
1431 CRGBA color1;
1432 color0.A = 255;
1433 color0.set565 (colorTilePtr[srcIndex].Color565);
1434 color1.A = 255;
1435 color1.set565 (colorTilePtr[srcIndex+1].Color565);
1436 expandedUserColorLinePtr[u].blendFromui (color0, color1, srcIndexPixel&0xff);
1437 // Compute current TLI color
1438 color0= colorTLIPtr[srcIndex];
1439 color1= colorTLIPtr[srcIndex+1];
1440 expandedTLIColorLinePtr[u].blendFromui (color0, color1, srcIndexPixel&0xff);
1442 // Next index
1443 srcIndexPixel+=expandFactor;
1447 // Last pixel
1448 expandedUserColorLinePtr[dstWidth-1].set565 (colorTilePtr[pLightmap->Width-1].Color565);
1449 expandedTLIColorLinePtr[dstWidth-1]= colorTLIPtr[pLightmap->Width-1];
1451 // Next line
1452 expandedUserColorLinePtr+= dstWidth;
1453 expandedTLIColorLinePtr+= dstWidth;
1454 colorTilePtr+=pLightmap->Width;
1455 colorTLIPtr+=pLightmap->Width;
1458 // stop MMX if used
1459 if(asmMMX)
1460 NL3D_asmEndMMX();
1462 // ** Expand on V
1463 //=========
1465 // Expansion factor
1466 expandFactor=((pLightmap->Height-1)<<8)/(dstHeight-1);
1468 // Destination pointer
1469 CRGBA *expandedUserColorPtr= expandedUserColor;
1470 CRGBA *expandedTLIColorPtr= expandedTLIColor;
1472 // Src pointer
1473 expandedUserColorLinePtr= expandedUserColorLine;
1474 expandedTLIColorLinePtr= expandedTLIColorLine;
1476 // Copy first row
1477 memcpy(expandedUserColorPtr, expandedUserColorLinePtr, dstWidth*sizeof(CRGBA));
1478 memcpy(expandedTLIColorPtr, expandedTLIColorLinePtr, dstWidth*sizeof(CRGBA));
1480 // Next line
1481 expandedUserColorPtr+=dstWidth;
1482 expandedTLIColorPtr+=dstWidth;
1484 // Index next pixel
1485 uint indexPixel=expandFactor;
1487 // Go for V
1488 for (v=1; v<dstHeight-1; v++)
1490 // Color index
1491 uint index=indexPixel>>8;
1493 // Source pointer
1494 CRGBA *colorTilePtr0= expandedUserColorLine + index*dstWidth;
1495 CRGBA *colorTilePtr1= expandedUserColorLine + (index+1)*dstWidth;
1496 CRGBA *colorTLIPtr0= expandedTLIColorLine + index*dstWidth;
1497 CRGBA *colorTLIPtr1= expandedTLIColorLine + (index+1)*dstWidth;
1499 // MMX implementation.
1500 //-------------
1501 if(asmMMX)
1503 NL3D_asmBlendLines(expandedUserColorPtr, colorTilePtr0, colorTilePtr1, indexPixel, dstWidth);
1504 NL3D_asmBlendLines(expandedTLIColorPtr, colorTLIPtr0, colorTLIPtr1, indexPixel, dstWidth);
1506 // C implementation
1507 //-------------
1508 else
1510 // Copy the row
1511 for (u=0; u<dstWidth; u++)
1513 expandedUserColorPtr[u].blendFromui (colorTilePtr0[u], colorTilePtr1[u], indexPixel&0xff);
1514 expandedTLIColorPtr[u].blendFromui (colorTLIPtr0[u], colorTLIPtr1[u], indexPixel&0xff);
1518 // Next index
1519 indexPixel+=expandFactor;
1521 // Next line
1522 expandedUserColorPtr+=dstWidth;
1523 expandedTLIColorPtr+=dstWidth;
1526 // stop MMX if used
1527 if(asmMMX)
1528 NL3D_asmEndMMX();
1530 // Last row
1531 // Destination pointer
1532 expandedUserColorPtr= expandedUserColor + dstWidth*(dstHeight-1);
1533 expandedTLIColorPtr= expandedTLIColor + dstWidth*(dstHeight-1);
1534 // Src pointer
1535 expandedUserColorLinePtr= expandedUserColorLine + dstWidth*(pLightmap->Height-1);
1536 expandedTLIColorLinePtr= expandedTLIColorLine + dstWidth*(pLightmap->Height-1);
1538 // Copy last row
1539 memcpy(expandedUserColorPtr, expandedUserColorLinePtr, dstWidth*sizeof(CRGBA));
1540 memcpy(expandedTLIColorPtr, expandedTLIColorLinePtr, dstWidth*sizeof(CRGBA));
1542 // *** Now combine with shading
1543 //=========
1545 // Switch to the optimal method for each expansion value
1546 switch (pLightmap->MulFactor)
1548 case 1:
1550 // Make 4x4 -> 1x1 blend
1551 CRGBA *lineUSCPtr= expandedUserColor;
1552 CRGBA *lineTLIPtr= expandedTLIColor;
1553 CRGBA *lineDestPtr=pLightmap->DstPixels;
1554 const uint8 *lineLumelPtr=pLightmap->LumelTile;
1555 uint lineWidth=dstWidth<<2;
1556 uint lineWidthx2=lineWidth<<1;
1557 uint lineWidthx3=lineWidthx2+lineWidth;
1558 uint lineWidthx4=lineWidth<<2;
1560 // For each line
1561 for (v=0; v<dstHeight; v++)
1563 // MMX implementation.
1564 //-------------
1565 if(asmMMX)
1567 NL3D_asmAssembleShading1x1(lineLumelPtr, pLightmap->StaticLightColor, lineTLIPtr, lineUSCPtr, lineDestPtr,
1568 lineWidth, dstWidth);
1570 // C implementation
1571 //-------------
1572 else
1574 // For each lumel block
1575 for (u=0; u<dstWidth; u++)
1577 // index
1578 uint lumelIndex=u<<2;
1580 // Shading is filtred
1581 uint shading=
1582 ((uint)lineLumelPtr[lumelIndex]+(uint)lineLumelPtr[lumelIndex+1]+(uint)lineLumelPtr[lumelIndex+2]+(uint)lineLumelPtr[lumelIndex+3]
1583 +(uint)lineLumelPtr[lumelIndex+lineWidth]+(uint)lineLumelPtr[lumelIndex+1+lineWidth]+(uint)lineLumelPtr[lumelIndex+2+lineWidth]+(uint)lineLumelPtr[lumelIndex+3+lineWidth]
1584 +(uint)lineLumelPtr[lumelIndex+lineWidthx2]+(uint)lineLumelPtr[lumelIndex+1+lineWidthx2]+(uint)lineLumelPtr[lumelIndex+2+lineWidthx2]+(uint)lineLumelPtr[lumelIndex+3+lineWidthx2]
1585 +(uint)lineLumelPtr[lumelIndex+lineWidthx3]+(uint)lineLumelPtr[lumelIndex+1+lineWidthx3]+(uint)lineLumelPtr[lumelIndex+2+lineWidthx3]+(uint)lineLumelPtr[lumelIndex+3+lineWidthx3]
1586 )>>4;
1588 // Add shading with TLI color.
1589 CRGBA col;
1590 col.addRGBOnly(pLightmap->StaticLightColor[shading], lineTLIPtr[u]);
1592 // Mul by the userColor
1593 lineDestPtr[u].modulateFromColorRGBOnly(col, lineUSCPtr[u]);
1595 lineDestPtr[u].R = min(((uint)lineDestPtr[u].R)*2, 255U);
1596 lineDestPtr[u].G = min(((uint)lineDestPtr[u].G)*2, 255U);
1597 lineDestPtr[u].B = min(((uint)lineDestPtr[u].B)*2, 255U);
1601 // Next line
1602 lineUSCPtr+=dstWidth;
1603 lineTLIPtr+=dstWidth;
1604 lineDestPtr+=dstWidth;
1605 lineLumelPtr+=lineWidthx4;
1607 break;
1609 case 2:
1611 // Make 2x2 -> 1x1 blend
1612 CRGBA *lineUSCPtr= expandedUserColor;
1613 CRGBA *lineTLIPtr= expandedTLIColor;
1614 CRGBA *lineDestPtr=pLightmap->DstPixels;
1615 const uint8 *lineLumelPtr=pLightmap->LumelTile;
1616 uint lineWidth=dstWidth*2;
1617 uint lineWidthx2=lineWidth<<1;
1619 // For each line
1620 for (v=0; v<dstHeight; v++)
1622 // MMX implementation.
1623 //-------------
1624 if(asmMMX)
1626 NL3D_asmAssembleShading2x2(lineLumelPtr, pLightmap->StaticLightColor, lineTLIPtr, lineUSCPtr, lineDestPtr,
1627 lineWidth, dstWidth);
1629 // C implementation
1630 //-------------
1631 else
1633 // For each lumel block
1634 for (u=0; u<dstWidth; u++)
1636 // index
1637 uint lumelIndex=u<<1;
1639 // Shading is filtred
1640 uint shading=
1641 ((uint)lineLumelPtr[lumelIndex]+(uint)lineLumelPtr[lumelIndex+1]+(uint)lineLumelPtr[lumelIndex+lineWidth]+(uint)lineLumelPtr[lumelIndex+1+lineWidth])>>2;
1643 // Add shading with TLI color.
1644 CRGBA col;
1645 col.addRGBOnly(pLightmap->StaticLightColor[shading], lineTLIPtr[u]);
1647 // Mul by the userColor
1648 lineDestPtr[u].modulateFromColorRGBOnly(col, lineUSCPtr[u]);
1650 lineDestPtr[u].R = min(((uint)lineDestPtr[u].R)*2, 255U);
1651 lineDestPtr[u].G = min(((uint)lineDestPtr[u].G)*2, 255U);
1652 lineDestPtr[u].B = min(((uint)lineDestPtr[u].B)*2, 255U);
1656 // Next line
1657 lineUSCPtr+=dstWidth;
1658 lineTLIPtr+=dstWidth;
1659 lineDestPtr+=dstWidth;
1660 lineLumelPtr+=lineWidthx2;
1662 break;
1665 case 4:
1666 // Make copy
1667 CRGBA *lineUSCPtr= expandedUserColor;
1668 CRGBA *lineTLIPtr= expandedTLIColor;
1669 CRGBA *lineDestPtr=pLightmap->DstPixels;
1670 const uint8 *lineLumelPtr=pLightmap->LumelTile;
1671 uint nbTexel=dstWidth*dstHeight;
1673 // MMX implementation.
1674 //-------------
1675 if(asmMMX)
1677 NL3D_asmAssembleShading4x4(lineLumelPtr, pLightmap->StaticLightColor, lineTLIPtr, lineUSCPtr, lineDestPtr,
1678 nbTexel);
1680 // C implementation
1681 //-------------
1682 else
1684 // For each pixel
1685 for (u=0; u<nbTexel; u++)
1687 // Shading is filtred
1688 uint shading=lineLumelPtr[u];
1690 // Add shading with TLI color.
1691 CRGBA col;
1692 col.addRGBOnly(pLightmap->StaticLightColor[shading], lineTLIPtr[u]);
1694 // Mul by the userColor
1695 lineDestPtr[u].modulateFromColorRGBOnly(col, lineUSCPtr[u]);
1697 lineDestPtr[u].R = min(((uint)lineDestPtr[u].R)*2, 255U);
1698 lineDestPtr[u].G = min(((uint)lineDestPtr[u].G)*2, 255U);
1699 lineDestPtr[u].B = min(((uint)lineDestPtr[u].B)*2, 255U);
1702 break;
1705 // stop MMX if used
1706 if(asmMMX)
1707 NL3D_asmEndMMX();
1712 // ***************************************************************************
1713 // ***************************************************************************
1714 // NL3D_drawFarTileInFar*. C and Asm Part
1715 // ***************************************************************************
1716 // ***************************************************************************
1719 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
1722 // ***************************************************************************
1723 inline void NL3D_asmModulateLineColors(CRGBA *dst, const CRGBA *src0, const CRGBA *src1,
1724 uint len, uint src0DeltaX, uint dstDeltaX)
1726 static uint64 blank= 0;
1727 if(len==0)
1728 return;
1730 __asm
1732 movq mm7, blank
1734 mov esi, src0 // esi point to src Pixels
1735 mov edx, src1 // edx point to src lighting pixels
1736 mov edi, dst
1737 mov ecx, len
1738 // compute increments for esi and edi
1739 mov eax, src0DeltaX
1740 mov ebx, dstDeltaX
1741 sal eax, 2
1742 sal ebx, 2
1744 myLoop:
1745 // read colors
1746 movd mm0, [esi]
1747 movd mm1, [edx]
1749 // mul mm0 and mm1
1750 punpcklbw mm0, mm7
1751 punpcklbw mm1, mm7
1752 pmullw mm0, mm1
1753 psrlw mm0, 8
1754 // pack
1755 packuswb mm0, mm0
1757 // out
1758 movd [edi], mm0
1760 // increment
1761 add esi, eax
1762 add edi, ebx
1763 add edx, 4
1764 dec ecx
1765 jnz myLoop
1770 // ***************************************************************************
1771 inline void NL3D_asmModulateAndBlendLineColors(CRGBA *dst, const CRGBA *src0, const CRGBA *src1,
1772 uint len, uint src0DeltaX, uint dstDeltaX)
1774 static uint64 blank= 0;
1775 static uint64 one= INT64_CONSTANT (0x0100010001000100);
1776 if(len==0)
1777 return;
1779 __asm
1781 movq mm7, blank
1782 movq mm6, one
1784 mov esi, src0 // esi point to src Pixels
1785 mov edx, src1 // edx point to src lighting pixels
1786 mov edi, dst
1787 mov ecx, len
1788 // compute increments for esi and edi
1789 mov eax, src0DeltaX
1790 mov ebx, dstDeltaX
1791 sal eax, 2
1792 sal ebx, 2
1794 myLoop:
1795 // read colors
1796 movd mm0, [esi]
1797 movd mm1, [edx]
1799 // save and unpack Alpha. NB: ABGR
1800 movq mm2, mm0
1801 psrld mm2, 24 // mm2= 0000 0000 0000 00AA
1802 punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
1803 packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
1804 // negate with 256.
1805 movq mm3, mm6
1806 psubusw mm3, mm2
1808 // mul mm0 and mm1
1809 punpcklbw mm0, mm7
1810 punpcklbw mm1, mm7
1811 pmullw mm0, mm1
1812 psrlw mm0, 8
1814 // Alpha Blend with mm3 and mm2
1815 movd mm1, [edi] // read dest
1816 punpcklbw mm1, mm7
1817 pmullw mm0, mm2 // mm0= srcColor*A
1818 pmullw mm1, mm3 // mm1= dstColor*(1-A)
1820 // add and pack
1821 paddusw mm0, mm1
1822 psrlw mm0, 8
1823 packuswb mm0, mm0
1825 // out
1826 movd [edi], mm0
1828 // increment
1829 add esi, eax
1830 add edi, ebx
1831 add edx, 4
1832 dec ecx
1833 jnz myLoop
1838 #else // NL_OS_WINDOWS
1840 // Dummy for non-windows platforms
1841 inline void NL3D_asmModulateLineColors(CRGBA *dst, const CRGBA *src0, const CRGBA *src1,
1842 uint len, uint src0DeltaX, uint dstDeltaX)
1845 inline void NL3D_asmModulateAndBlendLineColors(CRGBA *dst, const CRGBA *src0, const CRGBA *src1,
1846 uint len, uint src0DeltaX, uint dstDeltaX)
1850 #endif
1852 // ***************************************************************************
1853 void NL3D_drawFarTileInFarTexture (const NL3D_CComputeTileFar* pTileFar)
1855 // Pointer of the Src diffuse pixels
1856 const CRGBA* pSrcPixels=pTileFar->SrcDiffusePixels;
1858 // Pointer of the Dst pixels
1859 const CRGBA* pSrcLightPixels=pTileFar->SrcLightingPixels;
1861 // Pointer of the Dst pixels
1862 CRGBA* pDstPixels=pTileFar->DstPixels;
1864 // For each pixels
1865 int x, y;
1866 for (y=0; y<pTileFar->Size; y++)
1868 // MMX implementation
1869 //---------
1870 if(pTileFar->AsmMMX)
1872 NL3D_asmModulateLineColors(pDstPixels, pSrcPixels, pSrcLightPixels,
1873 pTileFar->Size, pTileFar->SrcDeltaX, pTileFar->DstDeltaX);
1875 // C Implementation.
1876 //---------
1877 else
1879 // Pointer of the source line
1880 const CRGBA* pSrcLine=pSrcPixels;
1882 // Pointer of the source lighting line
1883 const CRGBA* pSrcLightingLine=pSrcLightPixels;
1885 // Pointer of the destination line
1886 CRGBA* pDstLine=pDstPixels;
1888 // For each pixels on the line
1889 for (x=0; x<pTileFar->Size; x++)
1891 // Read and write a pixel
1892 pDstLine->R=(uint8)(((uint)pSrcLine->R*(uint)pSrcLightingLine->R)>>8);
1893 pDstLine->G=(uint8)(((uint)pSrcLine->G*(uint)pSrcLightingLine->G)>>8);
1894 pDstLine->B=(uint8)(((uint)pSrcLine->B*(uint)pSrcLightingLine->B)>>8);
1896 // Next pixel
1897 pSrcLine+=pTileFar->SrcDeltaX;
1898 pSrcLightingLine++;
1899 pDstLine+=pTileFar->DstDeltaX;
1903 // Next line
1904 pSrcPixels+=pTileFar->SrcDeltaY;
1905 pSrcLightPixels+=pTileFar->SrcLightingDeltaY;
1906 pDstPixels+=pTileFar->DstDeltaY;
1909 // stop MMX if used
1910 if(pTileFar->AsmMMX)
1911 NL3D_asmEndMMX();
1915 // ***************************************************************************
1916 void NL3D_drawFarTileInFarTextureAlpha (const NL3D_CComputeTileFar* pTileFar)
1918 // Pointer of the Src pixels
1919 const CRGBA* pSrcPixels=pTileFar->SrcDiffusePixels;
1921 // Pointer of the Dst pixels
1922 const CRGBA* pSrcLightPixels=pTileFar->SrcLightingPixels;
1924 // Pointer of the Dst pixels
1925 CRGBA* pDstPixels=pTileFar->DstPixels;
1927 // Fill the buffer with layer 0
1928 int x, y;
1929 for (y=0; y<pTileFar->Size; y++)
1931 // MMX implementation
1932 //---------
1933 if(pTileFar->AsmMMX)
1935 NL3D_asmModulateAndBlendLineColors(pDstPixels, pSrcPixels, pSrcLightPixels,
1936 pTileFar->Size, pTileFar->SrcDeltaX, pTileFar->DstDeltaX);
1938 // C Implementation.
1939 //---------
1940 else
1942 // Pointer of the source line
1943 const CRGBA* pSrcLine=pSrcPixels;
1945 // Pointer of the source lighting line
1946 const CRGBA* pSrcLightingLine=pSrcLightPixels;
1948 // Pointer of the Dst pixels
1949 CRGBA* pDstLine=pDstPixels;
1951 // For each pixels on the line
1952 for (x=0; x<pTileFar->Size; x++)
1954 // Read and write a pixel
1955 register uint alpha=pSrcLine->A;
1956 register uint oneLessAlpha=255-pSrcLine->A;
1957 pDstLine->R=(uint8)(((((uint)pSrcLine->R*(uint)pSrcLightingLine->R)>>8)*alpha+(uint)pDstLine->R*oneLessAlpha)>>8);
1958 pDstLine->G=(uint8)(((((uint)pSrcLine->G*(uint)pSrcLightingLine->G)>>8)*alpha+(uint)pDstLine->G*oneLessAlpha)>>8);
1959 pDstLine->B=(uint8)(((((uint)pSrcLine->B*(uint)pSrcLightingLine->B)>>8)*alpha+(uint)pDstLine->B*oneLessAlpha)>>8);
1961 // Next pixel
1962 pSrcLine+=pTileFar->SrcDeltaX;
1963 pSrcLightingLine++;
1964 pDstLine+=pTileFar->DstDeltaX;
1968 // Next line
1969 pSrcPixels+=pTileFar->SrcDeltaY;
1970 pSrcLightPixels+=pTileFar->SrcLightingDeltaY;
1971 pDstPixels+=pTileFar->DstDeltaY;
1974 // stop MMX if used
1975 if(pTileFar->AsmMMX)
1976 NL3D_asmEndMMX();
1980 // ***************************************************************************
1981 // TODO: asm implementation of this function \\//
1982 //#ifdef NL_NO_ASM
1983 void NL3D_drawFarTileInFarTextureAdditive (const NL3D_CComputeTileFar* pTileFar)
1985 // Pointer of the Src diffuse pixels
1986 const CRGBA* pSrcPixels=pTileFar->SrcDiffusePixels;
1988 // Pointer of the Src additive pixels
1989 const CRGBA* pSrcAddPixels=pTileFar->SrcAdditivePixels;
1991 // Pointer of the Dst pixels
1992 const CRGBA* pSrcLightPixels=pTileFar->SrcLightingPixels;
1994 // Pointer of the Dst pixels
1995 CRGBA* pDstPixels=pTileFar->DstPixels;
1997 // For each pixels
1998 int x, y;
1999 for (y=0; y<pTileFar->Size; y++)
2001 // Pointer of the source line
2002 const CRGBA* pSrcLine=pSrcPixels;
2004 // Pointer of the source line
2005 const CRGBA* pSrcAddLine=pSrcAddPixels;
2007 // Pointer of the source lighting line
2008 const CRGBA* pSrcLightingLine=pSrcLightPixels;
2010 // Pointer of the destination line
2011 CRGBA* pDstLine=pDstPixels;
2013 // For each pixels on the line
2014 for (x=0; x<pTileFar->Size; x++)
2016 // Read and write a pixel
2017 uint nTmp=(((uint)pSrcLine->R*(uint)pSrcLightingLine->R)>>8)+(uint)pSrcAddLine->R;
2018 if (nTmp>255)
2019 nTmp=255;
2020 pDstLine->R=(uint8)nTmp;
2021 nTmp=(((uint)pSrcLine->G*(uint)pSrcLightingLine->G)>>8)+(uint)pSrcAddLine->G;
2022 if (nTmp>255)
2023 nTmp=255;
2024 pDstLine->G=(uint8)nTmp;
2025 nTmp=(((uint)pSrcLine->B*(uint)pSrcLightingLine->B)>>8)+(uint)pSrcAddLine->B;
2026 if (nTmp>255)
2027 nTmp=255;
2028 pDstLine->B=(uint8)nTmp;
2030 // Next pixel
2031 pSrcLine+=pTileFar->SrcDeltaX;
2032 pSrcAddLine+=pTileFar->SrcDeltaX;
2033 pSrcLightingLine++;
2034 pDstLine+=pTileFar->DstDeltaX;
2037 // Next line
2038 pSrcPixels+=pTileFar->SrcDeltaY;
2039 pSrcAddPixels+=pTileFar->SrcDeltaY;
2040 pSrcLightPixels+=pTileFar->SrcLightingDeltaY;
2041 pDstPixels+=pTileFar->DstDeltaY;
2044 //#endif // NL_NO_ASM
2047 // ***************************************************************************
2048 // TODO: asm implementation of this function \\//
2049 //#ifdef NL_NO_ASM
2050 void NL3D_drawFarTileInFarTextureAdditiveAlpha (const NL3D_CComputeTileFar* pTileFar)
2052 // Pointer of the Src pixels
2053 const CRGBA* pSrcPixels=pTileFar->SrcDiffusePixels;
2055 // Pointer of the Src pixels
2056 const CRGBA* pSrcAddPixels=pTileFar->SrcAdditivePixels;
2058 // Pointer of the Src pixels
2059 const CRGBA* pSrcLightPixels=pTileFar->SrcLightingPixels;
2061 // Pointer of the Dst pixels
2062 CRGBA* pDstPixels=pTileFar->DstPixels;
2064 // Fill the buffer with layer 0
2065 int x, y;
2066 for (y=0; y<pTileFar->Size; y++)
2068 // Pointer of the source line
2069 const CRGBA* pSrcLine=pSrcPixels;
2071 // Pointer of the source line
2072 const CRGBA* pSrcAddLine=pSrcAddPixels;
2074 // Pointer of the source lighting line
2075 const CRGBA* pSrcLightingLine=pSrcLightPixels;
2077 // Pointer of the Dst pixels
2078 CRGBA* pDstLine=pDstPixels;
2080 // For each pixels on the line
2081 for (x=0; x<pTileFar->Size; x++)
2083 // Read and write a pixel
2084 register uint alpha=pSrcLine->A;
2085 register uint oneLessAlpha=255-pSrcLine->A;
2087 // Read and write a pixel
2088 uint nTmp=(((uint)pSrcLine->R*(uint)pSrcLightingLine->R)>>8)+(uint)pSrcAddLine->R;
2089 if (nTmp>255)
2090 nTmp=255;
2091 pDstLine->R=(uint8)((nTmp*alpha+pDstLine->R*oneLessAlpha)>>8);
2092 nTmp=(((uint)pSrcLine->G*(uint)pSrcLightingLine->G)>>8)+(uint)pSrcAddLine->G;
2093 if (nTmp>255)
2094 nTmp=255;
2095 pDstLine->G=(uint8)((nTmp*alpha+pDstLine->G*oneLessAlpha)>>8);
2096 nTmp=(((uint)pSrcLine->B*(uint)pSrcLightingLine->B)>>8)+(uint)pSrcAddLine->B;
2097 if (nTmp>255)
2098 nTmp=255;
2099 pDstLine->B=(uint8)((nTmp*alpha+pDstLine->B*oneLessAlpha)>>8);
2101 // Next pixel
2102 pSrcLine+=pTileFar->SrcDeltaX;
2103 pSrcAddLine+=pTileFar->SrcDeltaX;
2104 pSrcLightingLine++;
2105 pDstLine+=pTileFar->DstDeltaX;
2108 // Next line
2109 pSrcPixels+=pTileFar->SrcDeltaY;
2110 pSrcAddPixels+=pTileFar->SrcDeltaY;
2111 pSrcLightPixels+=pTileFar->SrcLightingDeltaY;
2112 pDstPixels+=pTileFar->DstDeltaY;
2115 //#endif // NL_NO_ASM