Linux multi-monitor fullscreen support
[ryzomcore.git] / nel / src / 3d / fasthls_modifier.cpp
blobcae414f2c8cd1cf72a03ca6b3a4835b43e3731ed
1 // NeL - MMORPG Framework <http://dev.ryzom.com/projects/nel/>
2 // Copyright (C) 2010 Winch Gate Property Limited
3 //
4 // This program is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU Affero General Public License as
6 // published by the Free Software Foundation, either version 3 of the
7 // License, or (at your option) any later version.
8 //
9 // This program is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU Affero General Public License for more details.
14 // You should have received a copy of the GNU Affero General Public License
15 // along with this program. If not, see <http://www.gnu.org/licenses/>.
18 #include "std3d.h"
19 #include "nel/3d/fasthls_modifier.h"
20 #include "nel/misc/fast_floor.h"
21 #include "nel/misc/bitmap.h"
22 #include "nel/misc/system_info.h"
23 #include "nel/misc/algo.h"
26 using namespace std;
27 using namespace NLMISC;
29 #ifdef DEBUG_NEW
30 #define new DEBUG_NEW
31 #endif
33 namespace NL3D
36 // ***************************************************************************
37 CFastHLSModifier *CFastHLSModifier::_Instance= NULL;
39 // ***************************************************************************
40 void CFastHLSModifier::releaseInstance()
42 if( _Instance )
44 delete _Instance;
45 _Instance = NULL;
49 // ***************************************************************************
50 CFastHLSModifier::CFastHLSModifier()
52 uint i;
53 // build the HueTable.
54 for(i=0;i<HueTableSize;i++)
56 _HueTable[i].buildFromHLS(360.0f*i/HueTableSize, 0.5f, 1);
58 // build conversion from uint16 to HLS.
59 for(i=0;i<65536;i++)
61 CRGBA col;
62 col.set565(i);
63 float h,l,s;
64 col.convertToHLS(h,l,s);
65 h= (float)floor(255*(h/360.f)+0.5f);
66 l= (float)floor(255*l+0.5f);
67 s= (float)floor(255*s+0.5f);
68 clamp(h,0,255);
69 clamp(l,0,255);
70 clamp(s,0,255);
71 _Color16ToHLS[i].H= (uint8)h;
72 _Color16ToHLS[i].L= (uint8)l;
73 _Color16ToHLS[i].S= (uint8)s;
74 _Color16ToHLS[i].A= 255;
78 // ***************************************************************************
79 CFastHLSModifier::~CFastHLSModifier()
83 // ***************************************************************************
84 CFastHLSModifier &CFastHLSModifier::getInstance()
86 if(!_Instance)
87 _Instance= new CFastHLSModifier;
88 return *_Instance;
92 // ***************************************************************************
93 CRGBA CFastHLSModifier::convert(uint H, uint L, uint S)
95 static CRGBA gray(128,128,128);
96 L+= L>>7;
97 S+= S>>7;
98 // H.
99 CRGBA col= _HueTable[H];
100 // S.
101 col.blendFromuiRGBOnly(gray, col, S);
102 // L.
103 if(L<=128)
105 col.modulateFromuiRGBOnly(col, L*2);
107 else
109 col.blendFromuiRGBOnly(col, CRGBA::White, (L-128)*2 );
112 return col;
115 #if defined(NL_COMP_VC) && (NL_COMP_VC_VERSION >= 71)
116 # pragma warning( push )
117 # pragma warning( disable : 4799 )
118 #endif
120 #ifdef NL_OS_WINDOWS
121 #pragma managed(push, off)
122 #endif
124 // ***************************************************************************
125 uint16 CFastHLSModifier::applyHLSMod(uint16 colorIn, uint8 dHue, uint dLum, uint dSat)
127 static uint64 mmBlank = 0;
128 static uint64 mmOne = INT64_CONSTANT(0x00FF00FF00FF00FF);
129 static uint64 mmGray = INT64_CONSTANT(0x0080008000800080);
132 dLum is actually 0xFFFFFF00 + realDLum
133 dSat is actually 0xFFFFFF00 + realDSat
136 uint16 retVal;
138 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
139 if(CSystemInfo::hasMMX())
141 static uint64 mmInterpBufer[4]= {0,0,0,INT64_CONSTANT(0x00FF00FF00FF00FF)};
143 __asm
145 mov edi, offset mmInterpBufer
146 mov ecx, this
148 // get HLS in edx.
149 mov eax, 0
150 mov ebx, 0
151 lea esi, [ecx]this._Color16ToHLS
152 mov ax, colorIn
153 mov edx, [esi+ eax*4]
155 // apply dh to H (ie dl!). Auto-wrap.
156 add dl, dHue
157 // get the color into mm0
158 mov bl, dl
159 lea esi, [ecx]this._HueTable
160 movd mm0, [esi+ ebx*4]
161 punpcklbw mm0, mmBlank
163 // get L into eax and S into ebx
164 mov eax, edx
165 mov ebx, edx
166 shr eax, 8
167 shr ebx, 16
168 and eax, 255
169 and ebx, 255
170 // add dLum/dSat and clamp to 1.
171 add eax, dLum
172 sbb ecx, ecx // ecx= FFFFFFFF if carry.
173 add ebx, dSat
174 sbb edx, edx
175 or eax, ecx // eax= FFFFFFFF if carry was set
176 or ebx, edx
177 // add Magic delta, and clamp to 0.
178 add eax, 256
179 sbb ecx, ecx // ecx= 0 if carry not set => result below 0.
180 add ebx, 256
181 sbb edx, edx
182 and eax, ecx // eax= 0 if result was below 0
183 and ebx, edx
185 // Load Sat/(1-Sat) into MMX
186 movd mm2, ebx
187 movq mm3, mmOne
188 punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
189 packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
190 movq mm1, mmGray
191 psubusw mm3, mm2 // mm3= 1-sat.
192 // combine Color and Sat
193 pmullw mm0, mm2 // mm0= color*sat
194 pmullw mm1, mm3 // mm1= gray*(1-sat)
195 paddusw mm0, mm1 // mm0= color saturated
196 // shift and store into the buffer for Luminance interpolation
197 psrlw mm0, 8
198 movq [edi+ 8], mm0
199 movq [edi+ 16], mm0
201 // use edx as index for luminance: 0: L=0 to 127. 1: L=128 to 255.
202 mov edx, eax
203 shl eax, 1
204 shr edx, 7
205 and eax, 255 // 0-127 and 128-255 transform auto to 0-254
206 // expand 0-254 to 0-255
207 mov ecx, eax
208 shl edx, 4
209 shr ecx, 7
210 add eax, ecx
212 // Combine color and Luminance into MMX. interpolate 0->col or col->white according to edx.
213 // Load Lum/(1-Lum) into MMX
214 movd mm2, eax
215 movq mm3, mmOne
216 punpckldq mm2, mm2 // mm2= 0000 00AA 0000 00AA
217 packssdw mm2, mm2 // mm2= 00AA 00AA 00AA 00AA
218 psubusw mm3, mm2 // mm3= 1-lum.
219 // Combine color and Sat into MMX
220 movq mm0, [edi+ edx]
221 movq mm1, [edi+ edx + 8]
222 pmullw mm0, mm3 // mm0= color0*(1-lum)
223 pmullw mm1, mm2 // mm1= color1*lum
224 paddusw mm0, mm1 // mm0= final color
226 // shift and unpack
227 psrlw mm0, 8
228 packuswb mm0, mm0
229 movd eax, mm0
231 // pack to 16bits.
232 mov ebx, eax
233 mov ecx, eax
234 shl eax, 8 // Red
235 shr ebx, 5 // Green
236 shr ecx, 19 // Blue
237 and eax, 0xF800
238 and ebx, 0x07E0
239 and ecx, 0x001F
240 or eax, ebx
241 or eax, ecx
243 mov retVal, ax
246 else
247 #endif // NL_OS_WINDOWS
249 CHLSA hls= _Color16ToHLS[colorIn];
250 // apply (C version) Dhue, dLum and dSat
251 hls.H= uint8((hls.H + dHue) & 0xFF);
252 sint v= (sint)hls.L + (sint)(dLum-0xFFFFFF00);
253 fastClamp8(v);
254 hls.L= v;
255 v= (sint)hls.S + (sint)(dSat-0xFFFFFF00);
256 fastClamp8(v);
257 hls.S= v;
259 CRGBA ret= convert(hls.H, hls.L, hls.S);
260 retVal= ret.get565();
263 return retVal;
265 #ifdef NL_OS_WINDOWS
266 #pragma managed(pop)
267 #endif
269 #if defined(NL_COMP_VC) && (NL_COMP_VC_VERSION >= 71)
270 # pragma warning( pop )
271 #endif
273 // ***************************************************************************
274 #ifdef NL_OS_WINDOWS
275 #pragma managed(push, off)
276 #endif
277 void CFastHLSModifier::convertDDSBitmapDXTC1Or1A(CBitmap &dst, const CBitmap &src, uint8 dh, uint dLum, uint dSat)
279 uint W= src.getWidth();
280 uint H= src.getHeight();
282 const uint8 *srcPix= &(src.getPixels()[0]);
283 uint8 *dstPix= &(dst.getPixels()[0]);
284 uint numBlock= (W*H)/16;
287 need to swap color and bits for DXTC1 or DXTC1A.
290 static uint32 bitLUT[8]= {
291 1,0,3,2, // reverse std order
292 1,0,2,3, // reverse order for "special 0/black packing"
295 // Do not use alpha mask for now.
296 for(;numBlock>0;numBlock--)
298 uint16 srcCol0= ((uint16*)srcPix)[0];
299 uint16 srcCol1= ((uint16*)srcPix)[1];
300 bool srcSign= srcCol0>srcCol1;
301 // apply modifiers for 2 colors.
302 uint16 dstCol0= applyHLSMod(srcCol0, dh,dLum,dSat);
303 uint16 dstCol1= applyHLSMod(srcCol1, dh,dLum,dSat);
304 bool dstSign= dstCol0>dstCol1;
305 if((uint)dstSign!=(uint)srcSign)
307 swap(dstCol0,dstCol1);
308 // must change bits too!
309 uint32 srcBits= ((uint32*)srcPix)[1];
310 uint32 dstBits= 0;
311 // take correct lut according to original sign
312 uint32 *lut;
313 if(srcCol0>srcCol1)
314 lut= bitLUT;
315 else
316 lut= bitLUT+4;
318 // for all bits, transpose with lut.
319 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
320 __asm
322 mov eax, srcBits
323 mov esi, lut
324 mov edx, 0
325 mov ecx, 16
326 // prepare 1st.
327 rol eax, 2
328 mov ebx, eax
329 and ebx, 2
330 // do it 16 times.
331 myLoop:
332 or edx, [esi+ebx*4]
333 rol eax, 2
334 rol edx, 2
335 mov ebx, eax
336 and ebx, 2
337 dec ecx
338 jnz myLoop
340 ror edx, 2
341 mov dstBits, edx
343 #else
344 for(uint n=16;n>0;n--)
346 // transform the id.
347 uint id= srcBits&3;
348 id= lut[id];
349 // write.
350 dstBits|= id<<30;
351 // don't decal last
352 if(n>1)
353 dstBits>>=2;
355 #endif
357 // store
358 ((uint32*)dstPix)[1]= dstBits;
360 else
361 // just copy bits
362 ((uint32*)dstPix)[1]= ((uint32*)srcPix)[3];
363 ((uint16*)dstPix)[0]= dstCol0;
364 ((uint16*)dstPix)[1]= dstCol1;
365 // skip.
366 srcPix+= 8;
367 dstPix+= 8;
370 // Must end MMX, for applyHLSMod()
371 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
372 if(CSystemInfo::hasMMX())
373 _asm emms;
374 #endif
376 #ifdef NL_OS_WINDOWS
377 #pragma managed(pop)
378 #endif
380 // ***************************************************************************
381 #ifdef NL_OS_WINDOWS
382 #pragma managed(push, off)
383 #endif
384 void CFastHLSModifier::convertDDSBitmapDXTC3Or5(CBitmap &dst, const CBitmap &src, uint8 dh, uint dLum, uint dSat)
386 uint W= src.getWidth();
387 uint H= src.getHeight();
389 const uint8 *srcPix= &(src.getPixels()[0]);
390 uint8 *dstPix= &(dst.getPixels()[0]);
391 uint numBlock= (W*H)/16;
394 NB: don't need to swap color and bits for DXTC3 or DXTC5.
397 // Do not use alpha mask for now.
398 for(;numBlock>0;numBlock--)
400 uint16 srcCol0= ((uint16*)srcPix)[4];
401 uint16 srcCol1= ((uint16*)srcPix)[5];
402 // apply modifiers for 2 colors.
403 ((uint16*)dstPix)[4]= applyHLSMod(srcCol0, dh,dLum,dSat);
404 ((uint16*)dstPix)[5]= applyHLSMod(srcCol1, dh,dLum,dSat);
405 // just copy bits
406 ((uint32*)dstPix)[3]= ((uint32*)srcPix)[3];
407 // copy alpha part.
408 ((uint32*)dstPix)[0]= ((uint32*)srcPix)[0];
409 ((uint32*)dstPix)[1]= ((uint32*)srcPix)[1];
410 // skip bits and alpha part.
411 srcPix+= 16;
412 dstPix+= 16;
415 // Must end MMX, for applyHLSMod()
416 #if defined(NL_OS_WINDOWS) && !defined(NL_NO_ASM)
417 if(CSystemInfo::hasMMX())
418 _asm emms;
419 #endif
421 #ifdef NL_OS_WINDOWS
422 #pragma managed(pop)
423 #endif
425 // ***************************************************************************
426 void CFastHLSModifier::convertDDSBitmap(CBitmap &dst, const CBitmap &src, uint8 dh, sint dl, sint ds)
428 nlassert(src.getPixelFormat()==dst.getPixelFormat());
429 nlassert(src.getWidth()==dst.getWidth() && src.getHeight()==dst.getHeight());
431 // Magic add clamp.
432 uint dLum= 0xFFFFFF00 + dl;
433 uint dSat= 0xFFFFFF00 + ds;
435 if(src.getPixelFormat()==CBitmap::DXTC1 || src.getPixelFormat()==CBitmap::DXTC1Alpha)
436 convertDDSBitmapDXTC1Or1A(dst, src, dh, dLum, dSat);
437 else if(src.getPixelFormat()==CBitmap::DXTC3 || src.getPixelFormat()==CBitmap::DXTC5)
438 convertDDSBitmapDXTC3Or5(dst, src, dh, dLum, dSat);
439 else
441 nlstop;
446 // ***************************************************************************
447 void CFastHLSModifier::convertRGBABitmap(CBitmap &dst, const CBitmap &src, uint8 dh, sint dl, sint ds)
449 nlassert(src.getPixelFormat()==dst.getPixelFormat());
450 nlassert(src.getPixelFormat()==CBitmap::RGBA);
452 uint W= src.getWidth();
453 uint H= src.getHeight();
455 const CRGBA *srcPix= (const CRGBA*)&(src.getPixels()[0]);
456 CRGBA *dstPix= (CRGBA*)&(dst.getPixels()[0]);
457 uint numPix= W*H;
459 // Do not use alpha mask for now.
460 for(;numPix>0;numPix--)
462 float H,L,S;
463 srcPix->convertToHLS(H,L,S);
464 H*= 256.f/360.f;
465 L*= 255.f;
466 S*= 255.f;
467 H+= dh+0.5f;
468 L+= dl+0.5f;
469 S+= ds+0.5f;
470 clamp(H, 0, 255);
471 clamp(L, 0, 255);
472 clamp(S, 0, 255);
473 uint8 H8= (uint8)NLMISC::OptFastFloor(H);
474 uint8 L8= (uint8)NLMISC::OptFastFloor(L);
475 uint8 S8= (uint8)NLMISC::OptFastFloor(S);
476 *dstPix= convert(H8, L8, S8);
477 srcPix++;
478 dstPix++;
483 } // NL3D