1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Affero General Public License as
6 // published by the Free Software Foundation, either version 3 of the
7 // License, or (at your option) any later version.
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Affero General Public License for more details.
14 // You should have received a copy of the GNU Affero General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
19 #include "nel/3d/texture_far.h"
20 #include "nel/3d/tile_far_bank.h"
21 #include "nel/3d/patch.h"
22 #include "nel/3d/tile_color.h"
23 #include "nel/3d/zone.h"
24 #include "nel/3d/landscape.h"
25 #include "nel/misc/system_info.h"
28 using namespace NLMISC
;
38 CRGBA
CTextureFar::_LightmapExpanded
[NL_NUM_PIXELS_ON_FAR_TILE_EDGE
*NL_MAX_TILES_BY_PATCH_EDGE
*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
*NL_MAX_TILES_BY_PATCH_EDGE
];
39 uint8
CTextureFar::_LumelExpanded
[(NL_MAX_TILES_BY_PATCH_EDGE
*NL_LUMEL_BY_TILE
+1)*(NL_MAX_TILES_BY_PATCH_EDGE
*NL_LUMEL_BY_TILE
+1)];
40 CRGBA
CTextureFar::_TileTLIColors
[(NL_MAX_TILES_BY_PATCH_EDGE
+1)*(NL_MAX_TILES_BY_PATCH_EDGE
+1)];
42 // ***************************************************************************
43 CTextureFar::CTextureFar()
45 /* NB: define Values work only if NL_MAX_TILES_BY_PATCH_EDGE is 16.
46 Else must change NL_MAX_FAR_EDGE and NL_NUM_RECTANGLE_RATIO
48 nlctassert(NL_MAX_TILES_BY_PATCH_EDGE
==16);
50 // This texture is releasable. It doesn't stays in standard memory after been uploaded into video memory.
53 // Init upload format 16 bits
54 setUploadFormat(RGB565
);
56 // Set filter mode. No mipmap!
57 setFilterMode (Linear
, LinearMipMapOff
);
63 // init update Lighting
67 // Start With All Patch of Max Far (64x64) Frees!
68 uint freeListId
= getFreeListId(NL_MAX_FAR_PATCH_EDGE
, NL_MAX_FAR_PATCH_EDGE
);
69 for(uint i
=0;i
<NL_NUM_FAR_BIGGEST_PATCH_PER_EDGE
;i
++)
71 for(uint j
=0;j
<NL_NUM_FAR_BIGGEST_PATCH_PER_EDGE
;j
++)
74 pos
.x
= i
*NL_MAX_FAR_PATCH_EDGE
;
75 pos
.y
= j
*NL_MAX_FAR_PATCH_EDGE
;
77 // add this place to the free list.
78 _FreeSpaces
[freeListId
].push_back(pos
);
83 _ItULPatch
= _PatchToPosMap
.end();
86 // ***************************************************************************
87 CTextureFar::~CTextureFar()
89 // verify the textureFar is correctly unlinked from any ciruclar list.
90 nlassert(_ULPrec
==this && _ULNext
==this);
94 // ***************************************************************************
95 void CTextureFar::linkBeforeUL(CTextureFar
*textNext
)
99 // first, unlink others from me. NB: works even if _ULPrec==_ULNext==this.
100 _ULNext
->_ULPrec
= _ULPrec
;
101 _ULPrec
->_ULNext
= _ULNext
;
104 _ULPrec
= textNext
->_ULPrec
;
105 // link others to me.
106 _ULNext
->_ULPrec
= this;
107 _ULPrec
->_ULNext
= this;
110 // ***************************************************************************
111 void CTextureFar::unlinkUL()
113 // first, unlink others from me. NB: works even if _ULPrec==_ULNext==this.
114 _ULNext
->_ULPrec
= _ULPrec
;
115 _ULPrec
->_ULNext
= _ULNext
;
122 // ***************************************************************************
123 uint
CTextureFar::getFreeListId(uint width
, uint height
)
125 nlassert(width
>=height
);
126 nlassert(isPowerOf2(width
));
127 nlassert(isPowerOf2(height
));
128 nlassert(width
<=NL_MAX_FAR_PATCH_EDGE
);
130 // compute the level index
131 uint sizeIndex
= getPowerOf2(NL_MAX_FAR_PATCH_EDGE
/ width
);
132 nlassert(sizeIndex
< NL_NUM_FAR_PATCH_EDGE_LEVEL
);
134 // Compute the aspect ratio index.
135 uint aspectRatioIndex
= getPowerOf2(width
/height
);
136 nlassert(aspectRatioIndex
< NL_NUM_FAR_RECTANGLE_RATIO
);
138 return sizeIndex
*NL_NUM_FAR_RECTANGLE_RATIO
+ aspectRatioIndex
;
142 // ***************************************************************************
143 bool CTextureFar::getUpperSize(uint
&width
, uint
&height
)
145 nlassert(width
>=height
);
146 nlassert(isPowerOf2(width
));
147 nlassert(isPowerOf2(height
));
149 // if height is smaller than widht, then reduce the ratio
157 // else raise up to the next square level, if possible
158 if(width
<NL_MAX_FAR_PATCH_EDGE
)
170 // ***************************************************************************
171 sint
CTextureFar::tryAllocatePatch (CPatch
*pPatch
, uint farIndex
)
173 // get the size of the subtexture to allocate
174 uint width
=(pPatch
->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(farIndex
-1);
175 uint height
=(pPatch
->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(farIndex
-1);
177 // make width the biggest
179 std::swap(width
, height
);
181 // get where to find a subtexture
182 uint freeListId
= getFreeListId(width
, height
);
184 // if some place, ok!
185 if(!_FreeSpaces
[freeListId
].empty())
189 // try to get the next size
190 while( getUpperSize(width
, height
) )
192 freeListId
= getFreeListId(width
, height
);
193 // if some subtexture free
194 if(!_FreeSpaces
[freeListId
].empty())
196 // Ok! return the size of this texture we must split
201 // fail => no more space => -1
206 // ***************************************************************************
207 void CTextureFar::recursSplitNext(uint wson
, uint hson
)
209 // get the upper subTexture
210 uint wup
= wson
, hup
= hson
;
211 nlverify( getUpperSize(wup
, hup
) );
214 uint fatherListId
= getFreeListId(wup
, hup
);
216 // if must split bigger patch...
217 if(_FreeSpaces
[fatherListId
].empty())
219 // recurs, try to get a bigger subtexture and split it.
220 recursSplitNext(wup
, hup
);
223 // OK, now we should have a free entry.
224 nlassert( !_FreeSpaces
[fatherListId
].empty() );
226 // remove from free list, because it is split now!
227 CVector2s fatherPos
= _FreeSpaces
[fatherListId
].front();
228 _FreeSpaces
[fatherListId
].pop_front();
230 // Create New free rectangles for sons
231 uint sonListId
= getFreeListId(wson
, hson
);
234 // if my son is a rectangle son
237 // Then Add 2 free Spaces!
238 sonPos
.x
= fatherPos
.x
;
240 sonPos
.y
= fatherPos
.y
;
241 _FreeSpaces
[sonListId
].push_back(sonPos
);
243 sonPos
.y
= fatherPos
.y
+hson
;
244 _FreeSpaces
[sonListId
].push_back(sonPos
);
248 // Then Add 4 free Spaces!
250 sonPos
.x
= fatherPos
.x
;
251 sonPos
.y
= fatherPos
.y
;
252 _FreeSpaces
[sonListId
].push_back(sonPos
);
254 sonPos
.x
= fatherPos
.x
+wson
;
255 sonPos
.y
= fatherPos
.y
;
256 _FreeSpaces
[sonListId
].push_back(sonPos
);
258 sonPos
.x
= fatherPos
.x
;
259 sonPos
.y
= fatherPos
.y
+hson
;
260 _FreeSpaces
[sonListId
].push_back(sonPos
);
262 sonPos
.x
= fatherPos
.x
+wson
;
263 sonPos
.y
= fatherPos
.y
+hson
;
264 _FreeSpaces
[sonListId
].push_back(sonPos
);
270 // ***************************************************************************
271 void CTextureFar::allocatePatch (CPatch
*pPatch
, uint farIndex
, float& farUScale
, float& farVScale
, float& farUBias
, float& farVBias
, bool& bRot
)
273 // get the size of the subtexture to allocate
274 uint width
=(pPatch
->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(farIndex
-1);
275 uint height
=(pPatch
->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(farIndex
-1);
277 // make width the biggest
279 std::swap(width
, height
);
281 // get where to find a subtexture
282 uint freeListId
= getFreeListId(width
, height
);
284 // if free list is empty, must split bigger patch...
285 if(_FreeSpaces
[freeListId
].empty())
287 // try to get a bigger subtexture and split it.
288 recursSplitNext(width
, height
);
291 // now the list should have som free space.
292 nlassert( !_FreeSpaces
[freeListId
].empty() );
293 CVector2s pos
= _FreeSpaces
[freeListId
].front();
295 // Allocate. Add this entry to the maps
298 pid
.FarIndex
= farIndex
;
299 // verify not already here.
300 nlassert( _PatchToPosMap
.find(pid
) == _PatchToPosMap
.end() );
301 _PatchToPosMap
[pid
]= pos
;
302 _PosToPatchMap
[pos
]= pid
;
304 // remove from free list.
305 _FreeSpaces
[freeListId
].pop_front();
307 // Invalidate the rectangle
308 CRect
rect (pos
.x
, pos
.y
, width
, height
);
309 ITexture::touchRect (rect
);
311 // ** Return some values
314 bRot
= ( pPatch
->getOrderS() < pPatch
->getOrderT() );
316 // Scale is the same for all
317 farUScale
=(float)(width
-1)/(float)NL_FAR_TEXTURE_EDGE_SIZE
;
318 farVScale
=(float)(height
-1)/(float)NL_FAR_TEXTURE_EDGE_SIZE
;
320 // UBias is the same for all
321 farUBias
=((float)pos
.x
+0.5f
)/(float)NL_FAR_TEXTURE_EDGE_SIZE
;
323 // UBias is the same for all
324 farVBias
=((float)pos
.y
+0.5f
)/(float)NL_FAR_TEXTURE_EDGE_SIZE
;
328 // ***************************************************************************
329 // Remove a patch in the CTexture Patch
330 void CTextureFar::removePatch (CPatch
*pPatch
, uint farIndex
)
335 pid
.FarIndex
= farIndex
;
336 TPatchToPosMap::iterator it
= _PatchToPosMap
.find(pid
);
337 nlassert( it
!= _PatchToPosMap
.end() );
339 // get the pos where this patch texture is stored
340 CVector2s pos
= it
->second
;
342 // If I erase the patch wihch must next UL, then update UL
343 if( it
== _ItULPatch
)
346 // erase from the 1st map
347 _PatchToPosMap
.erase(it
);
349 // erase from the second map
350 _PosToPatchMap
.erase(pos
);
352 // Append to the free list.
353 uint width
=(pPatch
->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(farIndex
-1);
354 uint height
=(pPatch
->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(farIndex
-1);
356 std::swap(width
, height
);
357 uint freeListId
= getFreeListId(width
, height
);
358 _FreeSpaces
[freeListId
].push_back(pos
);
362 // ***************************************************************************
363 uint
CTextureFar::touchPatchULAndNext()
365 // if there is still a patch here
366 if( _ItULPatch
!=_PatchToPosMap
.end() )
368 // Position of the invalide rectangle
369 int x
= _ItULPatch
->second
.x
;
370 int y
= _ItULPatch
->second
.y
;
371 uint farIndex
= _ItULPatch
->first
.FarIndex
;
372 CPatch
*pPatch
= _ItULPatch
->first
.Patch
;
374 // recompute the correct size.
375 uint width
=(pPatch
->getOrderS ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(farIndex
-1);
376 uint height
=(pPatch
->getOrderT ()*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(farIndex
-1);
378 std::swap(width
, height
);
380 // Invalidate the associated rectangle
381 CRect
rect (x
, y
, width
, height
);
382 ITexture::touchRect (rect
);
387 // return number of pixels touched
388 return width
* height
;
398 // ***************************************************************************
399 void CTextureFar::startPatchULTouch()
401 _ItULPatch
= _PatchToPosMap
.begin();
405 // ***************************************************************************
406 bool CTextureFar::endPatchULTouch() const
408 return _ItULPatch
== _PatchToPosMap
.end();
413 // ***************************************************************************
414 // Generate the texture. See ITexture::doGenerate().
415 void CTextureFar::doGenerate (bool async
)
417 // Resize. But don't need to fill with 0!!
418 CBitmap::resize (NL_FAR_TEXTURE_EDGE_SIZE
, NL_FAR_TEXTURE_EDGE_SIZE
, RGBA
, false);
420 // Rectangle invalidate ?
421 if (_ListInvalidRect
.begin()!=_ListInvalidRect
.end())
423 // Yes, rebuild only those rectangles.
425 // For each rectangle to compute
426 std::list
<NLMISC::CRect
>::iterator ite
=_ListInvalidRect
.begin();
427 while (ite
!=_ListInvalidRect
.end())
429 // Get the PatchIdent.
430 CVector2s
pos((uint16
)ite
->left(), (uint16
)ite
->top());
431 TPosToPatchMap::iterator itPosToPid
= _PosToPatchMap
.find( pos
);
432 // If the patch is still here...
433 if( itPosToPid
!=_PosToPatchMap
.end() )
435 // ReBuild the rectangle.
436 rebuildPatch (pos
, itPosToPid
->second
);
445 // Parse all existing Patchs.
446 TPosToPatchMap::iterator itPosToPid
= _PosToPatchMap
.begin();
447 while( itPosToPid
!= _PosToPatchMap
.end() )
449 // ReBuild the rectangle.
450 rebuildPatch (itPosToPid
->first
, itPosToPid
->second
);
458 // ***************************************************************************
459 // Rebuild the rectangle passed with coordinate passed in parameter
460 void CTextureFar::rebuildPatch (const CVector2s texturePos
, const CPatchIdent
&pid
)
462 uint x
= texturePos
.x
;
463 uint y
= texturePos
.y
;
466 CPatch
* patch
= pid
.Patch
;
472 uint nS
=patch
->getOrderS();
473 uint nT
=patch
->getOrderT();
475 // get the size of the texture to compute
476 uint subTextWidth
=(nS
*NL_NUM_PIXELS_ON_FAR_TILE_EDGE
)>>(pid
.FarIndex
-1);
478 // Check it is a 16 bits texture
479 nlassert (getPixelFormat()==RGBA
);
481 // Check pixels exist
482 nlassert (getPixels().size()!=0);
484 // Base offset of the first pixel of the patch's texture
487 // Delta to add to the destination offset when walk for a pixel to the right in the source tile
490 // Delta to add to the destination offset when walk for a pixel to the bottom in the source tile
493 // larger than higher (regular)
496 // Regular offset, top left
497 nBaseOffset
= x
+ y
*_Width
;
499 // Regular deltaX, to the right
502 // Regular deltaY, to the bottom
505 // higher than larger (goofy), the patch is stored with a rotation of 1 (to the left of course)
508 // Goofy offset, bottom left
509 nBaseOffset
= x
+ y
*_Width
;
510 nBaseOffset
+=(subTextWidth
-1)*_Width
;
512 // Goofy deltaX, to the top
513 dstDeltaX
=-(sint
)_Width
;
515 // Goofy deltaY, to the right
519 // Compute the order of the patch
520 CTileFarBank::TFarOrder orderX
=CTileFarBank::order0
;
522 switch (pid
.FarIndex
)
526 orderX
=CTileFarBank::order2
;
527 tileSize
=NL_NUM_PIXELS_ON_FAR_TILE_EDGE
>>2;
531 orderX
=CTileFarBank::order1
;
532 tileSize
=NL_NUM_PIXELS_ON_FAR_TILE_EDGE
>>1;
536 orderX
=CTileFarBank::order0
;
537 tileSize
=NL_NUM_PIXELS_ON_FAR_TILE_EDGE
;
540 // no!: must be one of the previous values
544 // Must have a far tile bank pointer set in the CFarTexture
547 // For all the tiles in the textures
550 // ** Fill the struct for the tile fill method for each layers
551 NL3D_CComputeTileFar TileFar
;
552 TileFar
.SrcDiffusePixels
= NULL
;
553 TileFar
.SrcAdditivePixels
= NULL
;
554 TileFar
.SrcDeltaX
= 0;
555 TileFar
.SrcDeltaY
= 0;
556 TileFar
.AsmMMX
= false;
557 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
558 TileFar
.AsmMMX
= NLMISC::CSystemInfo::hasMMX();
561 // Destination pointer
564 TileFar
.DstDeltaX
=dstDeltaX
;
565 TileFar
.DstDeltaY
=dstDeltaY
;
567 // ** Build expand lightmap..
568 NL3D_CExpandLightmap lightMap
;
570 // Fill the structure
571 lightMap
.MulFactor
=tileSize
;
572 lightMap
.ColorTile
=&patch
->TileColors
[0];
574 lightMap
.Height
=nT
+1;
575 lightMap
.StaticLightColor
=patch
->getZone()->getLandscape()->getStaticLight();
576 lightMap
.DstPixels
=_LightmapExpanded
;
577 // Compute current TLI colors.
578 patch
->computeCurrentTLILightmapDiv2(_TileTLIColors
);
579 lightMap
.TLIColor
= _TileTLIColors
;
581 // Expand the shadowmap
582 patch
->unpackShadowMap (_LumelExpanded
);
583 lightMap
.LumelTile
=_LumelExpanded
;
585 // Expand the patch lightmap now
586 NL3D_expandLightmap (&lightMap
);
588 // DeltaY for lightmap
589 TileFar
.SrcLightingDeltaY
=nS
*tileSize
;
591 // Base Dst pointer on the tile line
592 uint nBaseDstTileLine
=nBaseOffset
;
593 for (uint t
=0; t
<nT
; t
++)
595 // Base Dst pointer on the tile
596 uint nBaseDstTilePixels
=nBaseDstTileLine
;
598 // For each tile of the line
599 for (uint s
=0; s
<nS
; s
++)
601 // Base pointer of the destination texture
602 TileFar
.DstPixels
=(CRGBA
*)&(getPixels()[0])+nBaseDstTilePixels
;
605 TileFar
.SrcLightingPixels
=_LightmapExpanded
+(s
*tileSize
)+(t
*nS
*tileSize
*tileSize
);
607 // For each layer of the tile
608 for (sint l
=0; l
<3; l
++)
610 // Use of additive in this layer ?
611 bool bAdditive
=false;
613 // Size of the edge far tile
614 TileFar
.Size
=tileSize
;
616 // Get a tile element reference for this tile.
617 const CTileElement
&tileElm
=patch
->Tiles
[nTileInPatch
];
619 // Check for 256 tiles...
622 tileElm
.getTile256Info(is256x256
, uvOff
);
624 // Get the tile number
625 sint tile
=tileElm
.Tile
[l
];
627 // Is the last layer ?
628 bool lastLayer
= ( (l
== 2) || (tileElm
.Tile
[l
+1] == NL_TILE_ELM_LAYER_EMPTY
) );
630 // Is an non-empty layer ?
631 if (tile
!=NL_TILE_ELM_LAYER_EMPTY
)
633 // Get the read only pointer on the far tile
634 const CTileFarBank::CTileFar
* pTile
=_Bank
->getTile (tile
);
636 // This pointer must not be null, else the farBank is not valid!
638 nlwarning ("FarBank is not valid!");
644 if (pTile
->isFill (CTileFarBank::diffuse
))
646 // Get rotation of the tile in this layer
647 sint nRot
=tileElm
.getTileOrient(l
);
650 const CRGBA
* pSrcDiffusePixels
=pTile
->getPixels (CTileFarBank::diffuse
, orderX
);
651 const CRGBA
* pSrcAdditivePixels
=NULL
;
654 if (pTile
->isFill (CTileFarBank::additive
))
659 // Get additive pointer
660 pSrcAdditivePixels
=pTile
->getPixels (CTileFarBank::additive
, orderX
);
666 // Source offset (for 256)
674 sourceOffset
+=tileSize
;
677 if ((uvOff
==1)||(uvOff
==2))
678 sourceOffset
+=2*tileSize
*tileSize
;
681 sourceSize
=tileSize
<<1;
689 // Compute offset and deltas
694 TileFar
.SrcDiffusePixels
=pSrcDiffusePixels
+sourceOffset
;
695 TileFar
.SrcAdditivePixels
=pSrcAdditivePixels
+sourceOffset
;
699 TileFar
.SrcDeltaY
=sourceSize
;
704 uint newOffset
=sourceOffset
+(tileSize
-1);
705 TileFar
.SrcDiffusePixels
=pSrcDiffusePixels
+newOffset
;
706 TileFar
.SrcAdditivePixels
=pSrcAdditivePixels
+newOffset
;
709 TileFar
.SrcDeltaX
=sourceSize
;
710 TileFar
.SrcDeltaY
=-1;
715 // Destination pointer
716 uint newOffset
=sourceOffset
+(tileSize
-1)*sourceSize
+tileSize
-1;
717 TileFar
.SrcDiffusePixels
=pSrcDiffusePixels
+newOffset
;
718 TileFar
.SrcAdditivePixels
=pSrcAdditivePixels
+newOffset
;
721 TileFar
.SrcDeltaX
=-1;
722 TileFar
.SrcDeltaY
=-sourceSize
;
727 // Destination pointer
728 uint newOffset
=sourceOffset
+(tileSize
-1)*sourceSize
;
729 TileFar
.SrcDiffusePixels
=pSrcDiffusePixels
+newOffset
;
730 TileFar
.SrcAdditivePixels
=pSrcAdditivePixels
+newOffset
;
733 TileFar
.SrcDeltaX
=-sourceSize
;
739 // *** Draw the layer
745 if (bAdditive
&& lastLayer
)
746 NL3D_drawFarTileInFarTextureAdditiveAlpha (&TileFar
);
747 else // No additive layer
748 NL3D_drawFarTileInFarTextureAlpha (&TileFar
);
753 if (bAdditive
&& lastLayer
)
754 NL3D_drawFarTileInFarTextureAdditive (&TileFar
);
755 else // No additive layer
756 NL3D_drawFarTileInFarTexture (&TileFar
);
762 // Stop, no more layer
769 // Next tile on the line
770 nBaseDstTilePixels
+=dstDeltaX
*tileSize
;
773 // Next line of tiles
774 nBaseDstTileLine
+=dstDeltaY
*tileSize
;
782 // ***************************************************************************
783 // ***************************************************************************
784 // NL3D_ExpandLightmap. C and Asm Part
785 // ***************************************************************************
786 // ***************************************************************************
788 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
791 // EMMS called not in __asm block.
792 # pragma warning (disable : 4799)
795 // ***************************************************************************
796 inline void NL3D_asmEndMMX()
800 // close MMX computation
806 // ***************************************************************************
807 /** Expand a line of color with MMX.
808 * NB: start to write at pixel 1.
810 inline void NL3D_asmExpandLineColor565(const uint16
*src
, CRGBA
*dst
, uint du
, uint len
)
812 static uint64 blank
= 0;
813 static uint64 cF800
= INT64_CONSTANT (0x0000F8000000F800);
814 static uint64 cE000
= INT64_CONSTANT (0x0000E0000000E000);
815 static uint64 c07E0
= INT64_CONSTANT (0x000007E0000007E0);
816 static uint64 c0600
= INT64_CONSTANT (0x0000060000000600);
817 static uint64 c001F
= INT64_CONSTANT (0x0000001F0000001F);
818 static uint64 c001C
= INT64_CONSTANT (0x0000001C0000001C);
828 // start at pixel 1 => increment dst, and start u= du
845 // pack the 2 colors in eax: // Hedx= color0, Ledx= color1
846 xor eax
, eax
// avoid partial stall.
847 mov ax
, [esi
+ ebx
*2]
849 mov ax
, [esi
+ ebx
*2 +2]
851 // store and unpack in mm2: Hmm2= color0, Lmm2= color1
855 // reset accumulator mm3 to black
858 // Expand 565 to 888: color0 and color1 in parrallel
887 // unpack mm3 quad to mm0=color0 and mm1=color1.
902 movd mm2
, ebx
// mm2= factor
903 movd mm3
, eax
// mm3= 1-factor
904 // replicate to the 4 words.
905 punpckldq mm2
, mm2
// mm2= 0000 00AA 0000 00AA
906 punpckldq mm3
, mm3
// mm3= 0000 00AA 0000 00AA
907 packssdw mm2
, mm2
// mm2= 00AA 00AA 00AA 00AA
908 packssdw mm3
, mm3
// mm3= 00AA 00AA 00AA 00AA
913 pmullw mm0
, mm3
// color0*(1-factor)
914 pmullw mm1
, mm2
// color1*factor
933 // ***************************************************************************
934 /** Expand a line of color with MMX.
935 * NB: start to write at pixel 1.
937 inline void NL3D_asmExpandLineColor8888(const CRGBA
*src
, CRGBA
*dst
, uint du
, uint len
)
939 static uint64 blank
= 0;
949 // start at pixel 1 => increment dst, and start u= du
966 // read the 2 colors: mm0= color0, mm1= color1
967 movd mm0
, [esi
+ ebx
*4]
968 movd mm1
, [esi
+ ebx
*4 + 4]
980 movd mm2
, ebx
// mm2= factor
981 movd mm3
, eax
// mm3= 1-factor
982 // replicate to the 4 words.
983 punpckldq mm2
, mm2
// mm2= 0000 00AA 0000 00AA
984 punpckldq mm3
, mm3
// mm3= 0000 00AA 0000 00AA
985 packssdw mm2
, mm2
// mm2= 00AA 00AA 00AA 00AA
986 packssdw mm3
, mm3
// mm3= 00AA 00AA 00AA 00AA
991 pmullw mm0
, mm3
// color0*(1-factor)
992 pmullw mm1
, mm2
// color1*factor
1011 // ***************************************************************************
1012 /** Blend 2 lines of color into one line.
1013 * NB: start at pix 0 here
1015 inline void NL3D_asmBlendLines(CRGBA
*dst
, const CRGBA
*src0
, const CRGBA
*src1
, uint index
, uint len
)
1017 static uint64 blank
= 0;
1027 // read the factor and expand it to 4 words.
1032 movd mm2
, ebx
// mm2= factor
1033 movd mm3
, eax
// mm3= 1-factor
1034 punpckldq mm2
, mm2
// mm2= 0000 00AA 0000 00AA
1035 punpckldq mm3
, mm3
// mm3= 0000 00AA 0000 00AA
1036 packssdw mm2
, mm2
// mm2= 00AA 00AA 00AA 00AA
1037 packssdw mm3
, mm3
// mm3= 00AA 00AA 00AA 00AA
1042 sub edx
, esi
// difference between 2 src
1056 pmullw mm0
, mm3
// color0*(1-factor)
1057 pmullw mm1
, mm2
// color1*factor
1076 // ***************************************************************************
1077 /** Lightmap Combining for Far level 2 (farthest)
1078 * Average 16 lumels, and deals with UserColor and TLI
1080 static void NL3D_asmAssembleShading1x1(const uint8
*lumels
, const CRGBA
*colorMap
,
1081 const CRGBA
*srcTLIs
, const CRGBA
*srcUSCs
, CRGBA
*dst
, uint lineWidth
, uint nbTexel
)
1083 static uint64 blank
= 0;
1088 uint offsetTLIs
= ((uint
)srcTLIs
-(uint
)dst
);
1089 uint offsetUSCs
= ((uint
)srcUSCs
-(uint
)dst
);
1104 // Average shade part
1109 // read and accumulate shade
1110 xor eax
,eax
// avoid partial stall
1120 add al
, [esi
+ edx
+ 0]
1122 add al
, [esi
+ edx
+ 1]
1124 add al
, [esi
+ edx
+ 2]
1126 add al
, [esi
+ edx
+ 3]
1129 add al
, [esi
+ edx
*2 + 0]
1131 add al
, [esi
+ edx
*2 + 1]
1133 add al
, [esi
+ edx
*2 + 2]
1135 add al
, [esi
+ edx
*2 + 3]
1138 lea edx
, [edx
+ edx
*2]
1139 add al
, [esi
+ edx
+ 0]
1141 add al
, [esi
+ edx
+ 1]
1143 add al
, [esi
+ edx
+ 2]
1145 add al
, [esi
+ edx
+ 3]
1150 // convert to RGBA from the color Map
1151 movd mm0
, [ebx
+ eax
*4]
1158 // Add with TLI, and clamp.
1159 paddusb mm0
, [edi
+ edx
]
1162 movd mm1
, [edi
+ ebx
]
1175 add esi
, 4 // skip 4 lumels
1176 add edi
, 4 // next texel
1183 // ***************************************************************************
1184 /** Lightmap Combining for Far level 1 (middle)
1185 * Average 4 lumels, and deals with UserColor and TLI
1187 static void NL3D_asmAssembleShading2x2(const uint8
*lumels
, const CRGBA
*colorMap
,
1188 const CRGBA
*srcTLIs
, const CRGBA
*srcUSCs
, CRGBA
*dst
, uint lineWidth
, uint nbTexel
)
1190 static uint64 blank
= 0;
1195 uint offsetTLIs
= ((uint
)srcTLIs
-(uint
)dst
);
1196 uint offsetUSCs
= ((uint
)srcUSCs
-(uint
)dst
);
1211 // Average shade part
1216 // read and accumulate shade
1217 xor eax
,eax
// avoid partial stall
1218 mov al
, [esi
] // read lumel
1224 add al
, [esi
+ edx
+ 1]
1229 // convert to RGBA from the color Map
1230 movd mm0
, [ebx
+ eax
*4]
1237 // Add with TLI, and clamp.
1238 paddusb mm0
, [edi
+ edx
]
1241 movd mm1
, [edi
+ ebx
]
1254 add esi
, 2 // skip 2 lumels
1255 add edi
, 4 // next texel
1262 // ***************************************************************************
1263 # pragma warning (disable : 4731) // frame pointer register 'ebp' modified by inline assembly code
1264 /** Lightmap Combining for Far level 0 (nearest)
1265 * read 1 lumel, and deals with UserColor and TLI
1267 static void NL3D_asmAssembleShading4x4(const uint8
*lumels
, const CRGBA
*colorMap
,
1268 const CRGBA
*srcTLIs
, const CRGBA
*srcUSCs
, CRGBA
*dst
, uint nbTexel
)
1270 static uint64 blank
= 0;
1277 // Use ebp as a register for faster access...
1286 sub edx
, edi
// difference src and dest
1288 sub ebx
, edi
// difference src and dest
1291 // set ebp after reading locals...
1297 // read shade RGBA into the color Map
1298 xor eax
,eax
// avoid partial stall
1299 mov al
,[esi
] // read lumel
1300 movd mm0
, [ebp
+ eax
*4]
1302 // Add with TLI, and clamp.
1303 paddusb mm0
, [edi
+ edx
]
1306 movd mm1
, [edi
+ ebx
]
1319 add esi
, 1 // next lumel
1320 add edi
, 4 // next texel
1330 # pragma warning (default : 4731) // frame pointer register 'ebp' modified by inline assembly code
1333 #else // NL_OS_WINDOWS
1335 // Dummy for non-windows platforms
1336 inline void NL3D_asmEndMMX() {}
1337 inline void NL3D_asmExpandLineColor565(const uint16
*src
, CRGBA
*dst
, uint du
, uint len
) {}
1338 inline void NL3D_asmExpandLineColor8888(const CRGBA
*src
, CRGBA
*dst
, uint du
, uint len
) {}
1339 inline void NL3D_asmBlendLines(CRGBA
*dst
, const CRGBA
*src0
, const CRGBA
*src1
, uint index
, uint len
) {}
1340 static void NL3D_asmAssembleShading1x1(const uint8
*lumels
, const CRGBA
*colorMap
,
1341 const CRGBA
*srcTLIs
, const CRGBA
*srcUSCs
, CRGBA
*dst
, uint lineWidth
, uint nbTexel
)
1344 static void NL3D_asmAssembleShading2x2(const uint8
*lumels
, const CRGBA
*colorMap
,
1345 const CRGBA
*srcTLIs
, const CRGBA
*srcUSCs
, CRGBA
*dst
, uint lineWidth
, uint nbTexel
)
1348 static void NL3D_asmAssembleShading4x4(const uint8
*lumels
, const CRGBA
*colorMap
,
1349 const CRGBA
*srcTLIs
, const CRGBA
*srcUSCs
, CRGBA
*dst
, uint nbTexel
)
1353 #endif // NL_OS_WINDOWS
1356 // ***************************************************************************
1357 extern "C" void NL3D_expandLightmap (const NL3D_CExpandLightmap
* pLightmap
)
1360 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
1361 asmMMX
= CSystemInfo::hasMMX();
1362 // A CTileColor must be a 565 only.
1363 nlassert(sizeof(CTileColor
)==2);
1367 uint dstWidth
=(pLightmap
->Width
-1)*pLightmap
->MulFactor
;
1368 uint dstHeight
=(pLightmap
->Height
-1)*pLightmap
->MulFactor
;
1370 // *** First expand user color and TLI colors
1371 // First pass, expand on U
1372 static CRGBA expandedUserColorLine
[ (NL_MAX_TILES_BY_PATCH_EDGE
+1)*
1373 (NL_MAX_TILES_BY_PATCH_EDGE
+1)*NL_LUMEL_BY_TILE
];
1374 static CRGBA expandedTLIColorLine
[ (NL_MAX_TILES_BY_PATCH_EDGE
+1)*
1375 (NL_MAX_TILES_BY_PATCH_EDGE
+1)*NL_LUMEL_BY_TILE
];
1376 // Second pass, expand on V.
1377 static CRGBA expandedUserColor
[ (NL_MAX_TILES_BY_PATCH_EDGE
+1)*NL_LUMEL_BY_TILE
*
1378 (NL_MAX_TILES_BY_PATCH_EDGE
+1)*NL_LUMEL_BY_TILE
];
1379 static CRGBA expandedTLIColor
[ (NL_MAX_TILES_BY_PATCH_EDGE
+1)*NL_LUMEL_BY_TILE
*
1380 (NL_MAX_TILES_BY_PATCH_EDGE
+1)*NL_LUMEL_BY_TILE
];
1388 uint expandFactor
=((pLightmap
->Width
-1)<<8)/(dstWidth
-1);
1390 // Destination pointer
1391 CRGBA
*expandedUserColorLinePtr
= expandedUserColorLine
;
1392 CRGBA
*expandedTLIColorLinePtr
= expandedTLIColorLine
;
1395 const NL3D::CTileColor
*colorTilePtr
=pLightmap
->ColorTile
;
1396 const NLMISC::CRGBA
*colorTLIPtr
= pLightmap
->TLIColor
;
1399 for (v
=0; v
<pLightmap
->Height
; v
++)
1402 expandedUserColorLinePtr
[0].set565 (colorTilePtr
[0].Color565
);
1403 expandedTLIColorLinePtr
[0]= colorTLIPtr
[0];
1405 // MMX implementation.
1409 NL3D_asmExpandLineColor565(&colorTilePtr
->Color565
, expandedUserColorLinePtr
, expandFactor
, dstWidth
-2);
1410 NL3D_asmExpandLineColor8888(colorTLIPtr
, expandedTLIColorLinePtr
, expandFactor
, dstWidth
-2);
1417 uint srcIndexPixel
=expandFactor
;
1419 for (u
=1; u
<dstWidth
-1; u
++)
1422 nlassert ( (u
+v
*dstWidth
) < (sizeof(expandedUserColorLine
)/sizeof(CRGBA
)) );
1425 uint srcIndex
=srcIndexPixel
>>8;
1426 //nlassert (srcIndex>=0); // uint => always >= 0
1427 nlassert (srcIndex
<pLightmap
->Width
-1);
1429 // Compute current color
1433 color0
.set565 (colorTilePtr
[srcIndex
].Color565
);
1435 color1
.set565 (colorTilePtr
[srcIndex
+1].Color565
);
1436 expandedUserColorLinePtr
[u
].blendFromui (color0
, color1
, srcIndexPixel
&0xff);
1437 // Compute current TLI color
1438 color0
= colorTLIPtr
[srcIndex
];
1439 color1
= colorTLIPtr
[srcIndex
+1];
1440 expandedTLIColorLinePtr
[u
].blendFromui (color0
, color1
, srcIndexPixel
&0xff);
1443 srcIndexPixel
+=expandFactor
;
1448 expandedUserColorLinePtr
[dstWidth
-1].set565 (colorTilePtr
[pLightmap
->Width
-1].Color565
);
1449 expandedTLIColorLinePtr
[dstWidth
-1]= colorTLIPtr
[pLightmap
->Width
-1];
1452 expandedUserColorLinePtr
+= dstWidth
;
1453 expandedTLIColorLinePtr
+= dstWidth
;
1454 colorTilePtr
+=pLightmap
->Width
;
1455 colorTLIPtr
+=pLightmap
->Width
;
1466 expandFactor
=((pLightmap
->Height
-1)<<8)/(dstHeight
-1);
1468 // Destination pointer
1469 CRGBA
*expandedUserColorPtr
= expandedUserColor
;
1470 CRGBA
*expandedTLIColorPtr
= expandedTLIColor
;
1473 expandedUserColorLinePtr
= expandedUserColorLine
;
1474 expandedTLIColorLinePtr
= expandedTLIColorLine
;
1477 memcpy(expandedUserColorPtr
, expandedUserColorLinePtr
, dstWidth
*sizeof(CRGBA
));
1478 memcpy(expandedTLIColorPtr
, expandedTLIColorLinePtr
, dstWidth
*sizeof(CRGBA
));
1481 expandedUserColorPtr
+=dstWidth
;
1482 expandedTLIColorPtr
+=dstWidth
;
1485 uint indexPixel
=expandFactor
;
1488 for (v
=1; v
<dstHeight
-1; v
++)
1491 uint index
=indexPixel
>>8;
1494 CRGBA
*colorTilePtr0
= expandedUserColorLine
+ index
*dstWidth
;
1495 CRGBA
*colorTilePtr1
= expandedUserColorLine
+ (index
+1)*dstWidth
;
1496 CRGBA
*colorTLIPtr0
= expandedTLIColorLine
+ index
*dstWidth
;
1497 CRGBA
*colorTLIPtr1
= expandedTLIColorLine
+ (index
+1)*dstWidth
;
1499 // MMX implementation.
1503 NL3D_asmBlendLines(expandedUserColorPtr
, colorTilePtr0
, colorTilePtr1
, indexPixel
, dstWidth
);
1504 NL3D_asmBlendLines(expandedTLIColorPtr
, colorTLIPtr0
, colorTLIPtr1
, indexPixel
, dstWidth
);
1511 for (u
=0; u
<dstWidth
; u
++)
1513 expandedUserColorPtr
[u
].blendFromui (colorTilePtr0
[u
], colorTilePtr1
[u
], indexPixel
&0xff);
1514 expandedTLIColorPtr
[u
].blendFromui (colorTLIPtr0
[u
], colorTLIPtr1
[u
], indexPixel
&0xff);
1519 indexPixel
+=expandFactor
;
1522 expandedUserColorPtr
+=dstWidth
;
1523 expandedTLIColorPtr
+=dstWidth
;
1531 // Destination pointer
1532 expandedUserColorPtr
= expandedUserColor
+ dstWidth
*(dstHeight
-1);
1533 expandedTLIColorPtr
= expandedTLIColor
+ dstWidth
*(dstHeight
-1);
1535 expandedUserColorLinePtr
= expandedUserColorLine
+ dstWidth
*(pLightmap
->Height
-1);
1536 expandedTLIColorLinePtr
= expandedTLIColorLine
+ dstWidth
*(pLightmap
->Height
-1);
1539 memcpy(expandedUserColorPtr
, expandedUserColorLinePtr
, dstWidth
*sizeof(CRGBA
));
1540 memcpy(expandedTLIColorPtr
, expandedTLIColorLinePtr
, dstWidth
*sizeof(CRGBA
));
1542 // *** Now combine with shading
1545 // Switch to the optimal method for each expansion value
1546 switch (pLightmap
->MulFactor
)
1550 // Make 4x4 -> 1x1 blend
1551 CRGBA
*lineUSCPtr
= expandedUserColor
;
1552 CRGBA
*lineTLIPtr
= expandedTLIColor
;
1553 CRGBA
*lineDestPtr
=pLightmap
->DstPixels
;
1554 const uint8
*lineLumelPtr
=pLightmap
->LumelTile
;
1555 uint lineWidth
=dstWidth
<<2;
1556 uint lineWidthx2
=lineWidth
<<1;
1557 uint lineWidthx3
=lineWidthx2
+lineWidth
;
1558 uint lineWidthx4
=lineWidth
<<2;
1561 for (v
=0; v
<dstHeight
; v
++)
1563 // MMX implementation.
1567 NL3D_asmAssembleShading1x1(lineLumelPtr
, pLightmap
->StaticLightColor
, lineTLIPtr
, lineUSCPtr
, lineDestPtr
,
1568 lineWidth
, dstWidth
);
1574 // For each lumel block
1575 for (u
=0; u
<dstWidth
; u
++)
1578 uint lumelIndex
=u
<<2;
1580 // Shading is filtred
1582 ((uint
)lineLumelPtr
[lumelIndex
]+(uint
)lineLumelPtr
[lumelIndex
+1]+(uint
)lineLumelPtr
[lumelIndex
+2]+(uint
)lineLumelPtr
[lumelIndex
+3]
1583 +(uint
)lineLumelPtr
[lumelIndex
+lineWidth
]+(uint
)lineLumelPtr
[lumelIndex
+1+lineWidth
]+(uint
)lineLumelPtr
[lumelIndex
+2+lineWidth
]+(uint
)lineLumelPtr
[lumelIndex
+3+lineWidth
]
1584 +(uint
)lineLumelPtr
[lumelIndex
+lineWidthx2
]+(uint
)lineLumelPtr
[lumelIndex
+1+lineWidthx2
]+(uint
)lineLumelPtr
[lumelIndex
+2+lineWidthx2
]+(uint
)lineLumelPtr
[lumelIndex
+3+lineWidthx2
]
1585 +(uint
)lineLumelPtr
[lumelIndex
+lineWidthx3
]+(uint
)lineLumelPtr
[lumelIndex
+1+lineWidthx3
]+(uint
)lineLumelPtr
[lumelIndex
+2+lineWidthx3
]+(uint
)lineLumelPtr
[lumelIndex
+3+lineWidthx3
]
1588 // Add shading with TLI color.
1590 col
.addRGBOnly(pLightmap
->StaticLightColor
[shading
], lineTLIPtr
[u
]);
1592 // Mul by the userColor
1593 lineDestPtr
[u
].modulateFromColorRGBOnly(col
, lineUSCPtr
[u
]);
1595 lineDestPtr
[u
].R
= min(((uint
)lineDestPtr
[u
].R
)*2, 255U);
1596 lineDestPtr
[u
].G
= min(((uint
)lineDestPtr
[u
].G
)*2, 255U);
1597 lineDestPtr
[u
].B
= min(((uint
)lineDestPtr
[u
].B
)*2, 255U);
1602 lineUSCPtr
+=dstWidth
;
1603 lineTLIPtr
+=dstWidth
;
1604 lineDestPtr
+=dstWidth
;
1605 lineLumelPtr
+=lineWidthx4
;
1611 // Make 2x2 -> 1x1 blend
1612 CRGBA
*lineUSCPtr
= expandedUserColor
;
1613 CRGBA
*lineTLIPtr
= expandedTLIColor
;
1614 CRGBA
*lineDestPtr
=pLightmap
->DstPixels
;
1615 const uint8
*lineLumelPtr
=pLightmap
->LumelTile
;
1616 uint lineWidth
=dstWidth
*2;
1617 uint lineWidthx2
=lineWidth
<<1;
1620 for (v
=0; v
<dstHeight
; v
++)
1622 // MMX implementation.
1626 NL3D_asmAssembleShading2x2(lineLumelPtr
, pLightmap
->StaticLightColor
, lineTLIPtr
, lineUSCPtr
, lineDestPtr
,
1627 lineWidth
, dstWidth
);
1633 // For each lumel block
1634 for (u
=0; u
<dstWidth
; u
++)
1637 uint lumelIndex
=u
<<1;
1639 // Shading is filtred
1641 ((uint
)lineLumelPtr
[lumelIndex
]+(uint
)lineLumelPtr
[lumelIndex
+1]+(uint
)lineLumelPtr
[lumelIndex
+lineWidth
]+(uint
)lineLumelPtr
[lumelIndex
+1+lineWidth
])>>2;
1643 // Add shading with TLI color.
1645 col
.addRGBOnly(pLightmap
->StaticLightColor
[shading
], lineTLIPtr
[u
]);
1647 // Mul by the userColor
1648 lineDestPtr
[u
].modulateFromColorRGBOnly(col
, lineUSCPtr
[u
]);
1650 lineDestPtr
[u
].R
= min(((uint
)lineDestPtr
[u
].R
)*2, 255U);
1651 lineDestPtr
[u
].G
= min(((uint
)lineDestPtr
[u
].G
)*2, 255U);
1652 lineDestPtr
[u
].B
= min(((uint
)lineDestPtr
[u
].B
)*2, 255U);
1657 lineUSCPtr
+=dstWidth
;
1658 lineTLIPtr
+=dstWidth
;
1659 lineDestPtr
+=dstWidth
;
1660 lineLumelPtr
+=lineWidthx2
;
1667 CRGBA
*lineUSCPtr
= expandedUserColor
;
1668 CRGBA
*lineTLIPtr
= expandedTLIColor
;
1669 CRGBA
*lineDestPtr
=pLightmap
->DstPixels
;
1670 const uint8
*lineLumelPtr
=pLightmap
->LumelTile
;
1671 uint nbTexel
=dstWidth
*dstHeight
;
1673 // MMX implementation.
1677 NL3D_asmAssembleShading4x4(lineLumelPtr
, pLightmap
->StaticLightColor
, lineTLIPtr
, lineUSCPtr
, lineDestPtr
,
1685 for (u
=0; u
<nbTexel
; u
++)
1687 // Shading is filtred
1688 uint shading
=lineLumelPtr
[u
];
1690 // Add shading with TLI color.
1692 col
.addRGBOnly(pLightmap
->StaticLightColor
[shading
], lineTLIPtr
[u
]);
1694 // Mul by the userColor
1695 lineDestPtr
[u
].modulateFromColorRGBOnly(col
, lineUSCPtr
[u
]);
1697 lineDestPtr
[u
].R
= min(((uint
)lineDestPtr
[u
].R
)*2, 255U);
1698 lineDestPtr
[u
].G
= min(((uint
)lineDestPtr
[u
].G
)*2, 255U);
1699 lineDestPtr
[u
].B
= min(((uint
)lineDestPtr
[u
].B
)*2, 255U);
1712 // ***************************************************************************
1713 // ***************************************************************************
1714 // NL3D_drawFarTileInFar*. C and Asm Part
1715 // ***************************************************************************
1716 // ***************************************************************************
1719 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
1722 // ***************************************************************************
1723 inline void NL3D_asmModulateLineColors(CRGBA
*dst
, const CRGBA
*src0
, const CRGBA
*src1
,
1724 uint len
, uint src0DeltaX
, uint dstDeltaX
)
1726 static uint64 blank
= 0;
1734 mov esi
, src0
// esi point to src Pixels
1735 mov edx
, src1
// edx point to src lighting pixels
1738 // compute increments for esi and edi
1770 // ***************************************************************************
1771 inline void NL3D_asmModulateAndBlendLineColors(CRGBA
*dst
, const CRGBA
*src0
, const CRGBA
*src1
,
1772 uint len
, uint src0DeltaX
, uint dstDeltaX
)
1774 static uint64 blank
= 0;
1775 static uint64 one
= INT64_CONSTANT (0x0100010001000100);
1784 mov esi
, src0
// esi point to src Pixels
1785 mov edx
, src1
// edx point to src lighting pixels
1788 // compute increments for esi and edi
1799 // save and unpack Alpha. NB: ABGR
1801 psrld mm2
, 24 // mm2= 0000 0000 0000 00AA
1802 punpckldq mm2
, mm2
// mm2= 0000 00AA 0000 00AA
1803 packssdw mm2
, mm2
// mm2= 00AA 00AA 00AA 00AA
1814 // Alpha Blend with mm3 and mm2
1815 movd mm1
, [edi
] // read dest
1817 pmullw mm0
, mm2
// mm0= srcColor*A
1818 pmullw mm1
, mm3
// mm1= dstColor*(1-A)
1838 #else // NL_OS_WINDOWS
1840 // Dummy for non-windows platforms
1841 inline void NL3D_asmModulateLineColors(CRGBA
*dst
, const CRGBA
*src0
, const CRGBA
*src1
,
1842 uint len
, uint src0DeltaX
, uint dstDeltaX
)
1845 inline void NL3D_asmModulateAndBlendLineColors(CRGBA
*dst
, const CRGBA
*src0
, const CRGBA
*src1
,
1846 uint len
, uint src0DeltaX
, uint dstDeltaX
)
1852 // ***************************************************************************
1853 void NL3D_drawFarTileInFarTexture (const NL3D_CComputeTileFar
* pTileFar
)
1855 // Pointer of the Src diffuse pixels
1856 const CRGBA
* pSrcPixels
=pTileFar
->SrcDiffusePixels
;
1858 // Pointer of the Dst pixels
1859 const CRGBA
* pSrcLightPixels
=pTileFar
->SrcLightingPixels
;
1861 // Pointer of the Dst pixels
1862 CRGBA
* pDstPixels
=pTileFar
->DstPixels
;
1866 for (y
=0; y
<pTileFar
->Size
; y
++)
1868 // MMX implementation
1870 if(pTileFar
->AsmMMX
)
1872 NL3D_asmModulateLineColors(pDstPixels
, pSrcPixels
, pSrcLightPixels
,
1873 pTileFar
->Size
, pTileFar
->SrcDeltaX
, pTileFar
->DstDeltaX
);
1875 // C Implementation.
1879 // Pointer of the source line
1880 const CRGBA
* pSrcLine
=pSrcPixels
;
1882 // Pointer of the source lighting line
1883 const CRGBA
* pSrcLightingLine
=pSrcLightPixels
;
1885 // Pointer of the destination line
1886 CRGBA
* pDstLine
=pDstPixels
;
1888 // For each pixels on the line
1889 for (x
=0; x
<pTileFar
->Size
; x
++)
1891 // Read and write a pixel
1892 pDstLine
->R
=(uint8
)(((uint
)pSrcLine
->R
*(uint
)pSrcLightingLine
->R
)>>8);
1893 pDstLine
->G
=(uint8
)(((uint
)pSrcLine
->G
*(uint
)pSrcLightingLine
->G
)>>8);
1894 pDstLine
->B
=(uint8
)(((uint
)pSrcLine
->B
*(uint
)pSrcLightingLine
->B
)>>8);
1897 pSrcLine
+=pTileFar
->SrcDeltaX
;
1899 pDstLine
+=pTileFar
->DstDeltaX
;
1904 pSrcPixels
+=pTileFar
->SrcDeltaY
;
1905 pSrcLightPixels
+=pTileFar
->SrcLightingDeltaY
;
1906 pDstPixels
+=pTileFar
->DstDeltaY
;
1910 if(pTileFar
->AsmMMX
)
1915 // ***************************************************************************
1916 void NL3D_drawFarTileInFarTextureAlpha (const NL3D_CComputeTileFar
* pTileFar
)
1918 // Pointer of the Src pixels
1919 const CRGBA
* pSrcPixels
=pTileFar
->SrcDiffusePixels
;
1921 // Pointer of the Dst pixels
1922 const CRGBA
* pSrcLightPixels
=pTileFar
->SrcLightingPixels
;
1924 // Pointer of the Dst pixels
1925 CRGBA
* pDstPixels
=pTileFar
->DstPixels
;
1927 // Fill the buffer with layer 0
1929 for (y
=0; y
<pTileFar
->Size
; y
++)
1931 // MMX implementation
1933 if(pTileFar
->AsmMMX
)
1935 NL3D_asmModulateAndBlendLineColors(pDstPixels
, pSrcPixels
, pSrcLightPixels
,
1936 pTileFar
->Size
, pTileFar
->SrcDeltaX
, pTileFar
->DstDeltaX
);
1938 // C Implementation.
1942 // Pointer of the source line
1943 const CRGBA
* pSrcLine
=pSrcPixels
;
1945 // Pointer of the source lighting line
1946 const CRGBA
* pSrcLightingLine
=pSrcLightPixels
;
1948 // Pointer of the Dst pixels
1949 CRGBA
* pDstLine
=pDstPixels
;
1951 // For each pixels on the line
1952 for (x
=0; x
<pTileFar
->Size
; x
++)
1954 // Read and write a pixel
1955 register uint alpha
=pSrcLine
->A
;
1956 register uint oneLessAlpha
=255-pSrcLine
->A
;
1957 pDstLine
->R
=(uint8
)(((((uint
)pSrcLine
->R
*(uint
)pSrcLightingLine
->R
)>>8)*alpha
+(uint
)pDstLine
->R
*oneLessAlpha
)>>8);
1958 pDstLine
->G
=(uint8
)(((((uint
)pSrcLine
->G
*(uint
)pSrcLightingLine
->G
)>>8)*alpha
+(uint
)pDstLine
->G
*oneLessAlpha
)>>8);
1959 pDstLine
->B
=(uint8
)(((((uint
)pSrcLine
->B
*(uint
)pSrcLightingLine
->B
)>>8)*alpha
+(uint
)pDstLine
->B
*oneLessAlpha
)>>8);
1962 pSrcLine
+=pTileFar
->SrcDeltaX
;
1964 pDstLine
+=pTileFar
->DstDeltaX
;
1969 pSrcPixels
+=pTileFar
->SrcDeltaY
;
1970 pSrcLightPixels
+=pTileFar
->SrcLightingDeltaY
;
1971 pDstPixels
+=pTileFar
->DstDeltaY
;
1975 if(pTileFar
->AsmMMX
)
1980 // ***************************************************************************
1981 // TODO: asm implementation of this function \\//
1983 void NL3D_drawFarTileInFarTextureAdditive (const NL3D_CComputeTileFar
* pTileFar
)
1985 // Pointer of the Src diffuse pixels
1986 const CRGBA
* pSrcPixels
=pTileFar
->SrcDiffusePixels
;
1988 // Pointer of the Src additive pixels
1989 const CRGBA
* pSrcAddPixels
=pTileFar
->SrcAdditivePixels
;
1991 // Pointer of the Dst pixels
1992 const CRGBA
* pSrcLightPixels
=pTileFar
->SrcLightingPixels
;
1994 // Pointer of the Dst pixels
1995 CRGBA
* pDstPixels
=pTileFar
->DstPixels
;
1999 for (y
=0; y
<pTileFar
->Size
; y
++)
2001 // Pointer of the source line
2002 const CRGBA
* pSrcLine
=pSrcPixels
;
2004 // Pointer of the source line
2005 const CRGBA
* pSrcAddLine
=pSrcAddPixels
;
2007 // Pointer of the source lighting line
2008 const CRGBA
* pSrcLightingLine
=pSrcLightPixels
;
2010 // Pointer of the destination line
2011 CRGBA
* pDstLine
=pDstPixels
;
2013 // For each pixels on the line
2014 for (x
=0; x
<pTileFar
->Size
; x
++)
2016 // Read and write a pixel
2017 uint nTmp
=(((uint
)pSrcLine
->R
*(uint
)pSrcLightingLine
->R
)>>8)+(uint
)pSrcAddLine
->R
;
2020 pDstLine
->R
=(uint8
)nTmp
;
2021 nTmp
=(((uint
)pSrcLine
->G
*(uint
)pSrcLightingLine
->G
)>>8)+(uint
)pSrcAddLine
->G
;
2024 pDstLine
->G
=(uint8
)nTmp
;
2025 nTmp
=(((uint
)pSrcLine
->B
*(uint
)pSrcLightingLine
->B
)>>8)+(uint
)pSrcAddLine
->B
;
2028 pDstLine
->B
=(uint8
)nTmp
;
2031 pSrcLine
+=pTileFar
->SrcDeltaX
;
2032 pSrcAddLine
+=pTileFar
->SrcDeltaX
;
2034 pDstLine
+=pTileFar
->DstDeltaX
;
2038 pSrcPixels
+=pTileFar
->SrcDeltaY
;
2039 pSrcAddPixels
+=pTileFar
->SrcDeltaY
;
2040 pSrcLightPixels
+=pTileFar
->SrcLightingDeltaY
;
2041 pDstPixels
+=pTileFar
->DstDeltaY
;
2044 //#endif // NL_NO_ASM
2047 // ***************************************************************************
2048 // TODO: asm implementation of this function \\//
2050 void NL3D_drawFarTileInFarTextureAdditiveAlpha (const NL3D_CComputeTileFar
* pTileFar
)
2052 // Pointer of the Src pixels
2053 const CRGBA
* pSrcPixels
=pTileFar
->SrcDiffusePixels
;
2055 // Pointer of the Src pixels
2056 const CRGBA
* pSrcAddPixels
=pTileFar
->SrcAdditivePixels
;
2058 // Pointer of the Src pixels
2059 const CRGBA
* pSrcLightPixels
=pTileFar
->SrcLightingPixels
;
2061 // Pointer of the Dst pixels
2062 CRGBA
* pDstPixels
=pTileFar
->DstPixels
;
2064 // Fill the buffer with layer 0
2066 for (y
=0; y
<pTileFar
->Size
; y
++)
2068 // Pointer of the source line
2069 const CRGBA
* pSrcLine
=pSrcPixels
;
2071 // Pointer of the source line
2072 const CRGBA
* pSrcAddLine
=pSrcAddPixels
;
2074 // Pointer of the source lighting line
2075 const CRGBA
* pSrcLightingLine
=pSrcLightPixels
;
2077 // Pointer of the Dst pixels
2078 CRGBA
* pDstLine
=pDstPixels
;
2080 // For each pixels on the line
2081 for (x
=0; x
<pTileFar
->Size
; x
++)
2083 // Read and write a pixel
2084 register uint alpha
=pSrcLine
->A
;
2085 register uint oneLessAlpha
=255-pSrcLine
->A
;
2087 // Read and write a pixel
2088 uint nTmp
=(((uint
)pSrcLine
->R
*(uint
)pSrcLightingLine
->R
)>>8)+(uint
)pSrcAddLine
->R
;
2091 pDstLine
->R
=(uint8
)((nTmp
*alpha
+pDstLine
->R
*oneLessAlpha
)>>8);
2092 nTmp
=(((uint
)pSrcLine
->G
*(uint
)pSrcLightingLine
->G
)>>8)+(uint
)pSrcAddLine
->G
;
2095 pDstLine
->G
=(uint8
)((nTmp
*alpha
+pDstLine
->G
*oneLessAlpha
)>>8);
2096 nTmp
=(((uint
)pSrcLine
->B
*(uint
)pSrcLightingLine
->B
)>>8)+(uint
)pSrcAddLine
->B
;
2099 pDstLine
->B
=(uint8
)((nTmp
*alpha
+pDstLine
->B
*oneLessAlpha
)>>8);
2102 pSrcLine
+=pTileFar
->SrcDeltaX
;
2103 pSrcAddLine
+=pTileFar
->SrcDeltaX
;
2105 pDstLine
+=pTileFar
->DstDeltaX
;
2109 pSrcPixels
+=pTileFar
->SrcDeltaY
;
2110 pSrcAddPixels
+=pTileFar
->SrcDeltaY
;
2111 pSrcLightPixels
+=pTileFar
->SrcLightingDeltaY
;
2112 pDstPixels
+=pTileFar
->DstDeltaY
;
2115 //#endif // NL_NO_ASM