fixed: gcc8 compile issues
[opensg.git] / Source / System / Image / Squish / OSGSquishClusterfit.cpp
bloba02430cfccf61286574075e5133a7ae5f71c5565
1 /* -----------------------------------------------------------------------------
3 Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
4 Copyright (c) 2007 Ignacio Castano icastano@nvidia.com
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice shall be included
15 in all copies or substantial portions of the Software.
17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
21 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
22 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
23 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 -------------------------------------------------------------------------- */
27 #include "clusterfit.h"
28 #include "colourset.h"
29 #include "colourblock.h"
30 #include <cfloat>
32 namespace osgsquish {
34 ClusterFit::ClusterFit( ColourSet const* colours, int flags )
35 : ColourFit( colours, flags ),
36 m_iterationCount(0),
37 m_principle(),
38 m_xsum_wsum(),
39 m_metric(),
40 m_besterror()
42 // set the iteration count
43 m_iterationCount = ( m_flags & kColourIterativeClusterFit ) ? kMaxIterations : 1;
45 // initialise the best error
46 m_besterror = VEC4_CONST( FLT_MAX );
48 // initialise the metric
49 bool perceptual = ( ( m_flags & kColourMetricPerceptual ) != 0 );
50 if( perceptual )
51 m_metric = Vec4( 0.2126f, 0.7152f, 0.0722f, 0.0f );
52 else
53 m_metric = VEC4_CONST( 1.0f );
55 // cache some values
56 int const count = m_colours->GetCount();
57 Vec3 const* values = m_colours->GetPoints();
59 // get the covariance matrix
60 Sym3x3 covariance = ComputeWeightedCovariance( count, values, m_colours->GetWeights() );
62 // compute the principle component
63 m_principle = ComputePrincipleComponent( covariance );
66 bool ClusterFit::ConstructOrdering( Vec3 const& axis, int iteration )
68 // cache some values
69 int const count = m_colours->GetCount();
70 Vec3 const* values = m_colours->GetPoints();
72 // build the list of dot products
73 float dps[16];
74 u8* order = static_cast< u8*>(m_order) + 16*iteration;
75 for( int i = 0; i < count; ++i )
77 dps[i] = Dot( values[i], axis );
78 order[i] = u8(i);
81 // stable sort using them
82 for( int i = 0; i < count; ++i )
84 for( int j = i; j > 0 && dps[j] < dps[j - 1]; --j )
86 std::swap( dps[j], dps[j - 1] );
87 std::swap( order[j], order[j - 1] );
91 // check this ordering is unique
92 for( int it = 0; it < iteration; ++it )
94 u8 const* prev = static_cast< u8*>(m_order) + 16*it;
95 bool same = true;
96 for( int i = 0; i < count; ++i )
98 if( order[i] != prev[i] )
100 same = false;
101 break;
104 if( same )
105 return false;
108 // copy the ordering and weight all the points
109 Vec3 const* unweighted = m_colours->GetPoints();
110 float const* weights = m_colours->GetWeights();
111 m_xsum_wsum = VEC4_CONST( 0.0f );
112 for( int i = 0; i < count; ++i )
114 int j = order[i];
115 Vec4 p( unweighted[j].X(), unweighted[j].Y(), unweighted[j].Z(), 1.0f );
116 Vec4 w( weights[j] );
117 Vec4 x = p*w;
118 m_points_weights[i] = x;
119 m_xsum_wsum += x;
121 return true;
124 void ClusterFit::Compress3( void* block )
126 // declare variables
127 int const count = m_colours->GetCount();
128 Vec4 const two = VEC4_CONST( 2.0 );
129 Vec4 const one = VEC4_CONST( 1.0f );
130 Vec4 const half_half2( 0.5f, 0.5f, 0.5f, 0.25f );
131 Vec4 const zero = VEC4_CONST( 0.0f );
132 Vec4 const half = VEC4_CONST( 0.5f );
133 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
134 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
136 // prepare an ordering using the principle axis
137 ConstructOrdering( m_principle, 0 );
139 // check all possible clusters and iterate on the total order
140 Vec4 beststart = VEC4_CONST( 0.0f );
141 Vec4 bestend = VEC4_CONST( 0.0f );
142 Vec4 besterror = m_besterror;
143 u8 bestindices[16];
144 int bestiteration = 0;
145 int besti = 0, bestj = 0;
147 // loop over iterations (we avoid the case that all points in first or last cluster)
148 for( int iterationIndex = 0;; )
150 // first cluster [0,i) is at the start
151 Vec4 part0 = VEC4_CONST( 0.0f );
152 for( int i = 0; i < count; ++i )
154 // second cluster [i,j) is half along
155 Vec4 part1 = ( i == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
156 int jmin = ( i == 0 ) ? 1 : i;
157 for( int j = jmin;; )
159 // last cluster [j,count) is at the end
160 Vec4 part2 = m_xsum_wsum - part1 - part0;
162 // compute least squares terms directly
163 Vec4 alphax_sum = MultiplyAdd( part1, half_half2, part0 );
164 Vec4 alpha2_sum = alphax_sum.SplatW();
166 Vec4 betax_sum = MultiplyAdd( part1, half_half2, part2 );
167 Vec4 beta2_sum = betax_sum.SplatW();
169 Vec4 alphabeta_sum = ( part1*half_half2 ).SplatW();
171 // compute the least-squares optimal points
172 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
173 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
174 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
176 // clamp to the grid
177 a = Min( one, Max( zero, a ) );
178 b = Min( one, Max( zero, b ) );
179 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
180 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
182 // compute the error (we skip the constant xxsum)
183 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
184 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
185 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
186 Vec4 e4 = MultiplyAdd( two, e3, e1 );
188 // apply the metric to the error term
189 Vec4 e5 = e4*m_metric;
190 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
192 // keep the solution if it wins
193 if( CompareAnyLessThan( error, besterror ) )
195 beststart = a;
196 bestend = b;
197 besti = i;
198 bestj = j;
199 besterror = error;
200 bestiteration = iterationIndex;
203 // advance
204 if( j == count )
205 break;
206 part1 += m_points_weights[j];
207 ++j;
210 // advance
211 part0 += m_points_weights[i];
214 // stop if we didn't improve in this iteration
215 if( bestiteration != iterationIndex )
216 break;
218 // advance if possible
219 ++iterationIndex;
220 if( iterationIndex == m_iterationCount )
221 break;
223 // stop if a new iteration is an ordering that has already been tried
224 Vec3 axis = ( bestend - beststart ).GetVec3();
225 if( !ConstructOrdering( axis, iterationIndex ) )
226 break;
229 // save the block if necessary
230 if( CompareAnyLessThan( besterror, m_besterror ) )
232 // remap the indices
233 u8 const* order = static_cast<u8*>(m_order) + 16*bestiteration;
235 u8 unordered[16];
236 for( int m = 0; m < besti; ++m )
237 unordered[order[m]] = 0;
238 for( int m = besti; m < bestj; ++m )
239 unordered[order[m]] = 2;
240 for( int m = bestj; m < count; ++m )
241 unordered[order[m]] = 1;
243 m_colours->RemapIndices( unordered, bestindices );
245 // save the block
246 WriteColourBlock3( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
248 // save the error
249 m_besterror = besterror;
253 void ClusterFit::Compress4( void* block )
255 // declare variables
256 int const count = m_colours->GetCount();
257 Vec4 const two = VEC4_CONST( 2.0f );
258 Vec4 const one = VEC4_CONST( 1.0f );
259 Vec4 const onethird_onethird2( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
260 Vec4 const twothirds_twothirds2( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
261 Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
262 Vec4 const zero = VEC4_CONST( 0.0f );
263 Vec4 const half = VEC4_CONST( 0.5f );
264 Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
265 Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
267 // prepare an ordering using the principle axis
268 ConstructOrdering( m_principle, 0 );
270 // check all possible clusters and iterate on the total order
271 Vec4 beststart = VEC4_CONST( 0.0f );
272 Vec4 bestend = VEC4_CONST( 0.0f );
273 Vec4 besterror = m_besterror;
274 u8 bestindices[16];
275 int bestiteration = 0;
276 int besti = 0, bestj = 0, bestk = 0;
278 // loop over iterations (we avoid the case that all points in first or last cluster)
279 for( int iterationIndex = 0;; )
281 // first cluster [0,i) is at the start
282 Vec4 part0 = VEC4_CONST( 0.0f );
283 for( int i = 0; i < count; ++i )
285 // second cluster [i,j) is one third along
286 Vec4 part1 = VEC4_CONST( 0.0f );
287 for( int j = i;; )
289 // third cluster [j,k) is two thirds along
290 Vec4 part2 = ( j == 0 ) ? m_points_weights[0] : VEC4_CONST( 0.0f );
291 int kmin = ( j == 0 ) ? 1 : j;
292 for( int k = kmin;; )
294 // last cluster [k,count) is at the end
295 Vec4 part3 = m_xsum_wsum - part2 - part1 - part0;
297 // compute least squares terms directly
298 Vec4 const alphax_sum = MultiplyAdd( part2, onethird_onethird2, MultiplyAdd( part1, twothirds_twothirds2, part0 ) );
299 Vec4 const alpha2_sum = alphax_sum.SplatW();
301 Vec4 const betax_sum = MultiplyAdd( part1, onethird_onethird2, MultiplyAdd( part2, twothirds_twothirds2, part3 ) );
302 Vec4 const beta2_sum = betax_sum.SplatW();
304 Vec4 const alphabeta_sum = twonineths*( part1 + part2 ).SplatW();
306 // compute the least-squares optimal points
307 Vec4 factor = Reciprocal( NegativeMultiplySubtract( alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum ) );
308 Vec4 a = NegativeMultiplySubtract( betax_sum, alphabeta_sum, alphax_sum*beta2_sum )*factor;
309 Vec4 b = NegativeMultiplySubtract( alphax_sum, alphabeta_sum, betax_sum*alpha2_sum )*factor;
311 // clamp to the grid
312 a = Min( one, Max( zero, a ) );
313 b = Min( one, Max( zero, b ) );
314 a = Truncate( MultiplyAdd( grid, a, half ) )*gridrcp;
315 b = Truncate( MultiplyAdd( grid, b, half ) )*gridrcp;
317 // compute the error (we skip the constant xxsum)
318 Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
319 Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
320 Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
321 Vec4 e4 = MultiplyAdd( two, e3, e1 );
323 // apply the metric to the error term
324 Vec4 e5 = e4*m_metric;
325 Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
327 // keep the solution if it wins
328 if( CompareAnyLessThan( error, besterror ) )
330 beststart = a;
331 bestend = b;
332 besterror = error;
333 besti = i;
334 bestj = j;
335 bestk = k;
336 bestiteration = iterationIndex;
339 // advance
340 if( k == count )
341 break;
342 part2 += m_points_weights[k];
343 ++k;
346 // advance
347 if( j == count )
348 break;
349 part1 += m_points_weights[j];
350 ++j;
353 // advance
354 part0 += m_points_weights[i];
357 // stop if we didn't improve in this iteration
358 if( bestiteration != iterationIndex )
359 break;
361 // advance if possible
362 ++iterationIndex;
363 if( iterationIndex == m_iterationCount )
364 break;
366 // stop if a new iteration is an ordering that has already been tried
367 Vec3 axis = ( bestend - beststart ).GetVec3();
368 if( !ConstructOrdering( axis, iterationIndex ) )
369 break;
372 // save the block if necessary
373 if( CompareAnyLessThan( besterror, m_besterror ) )
375 // remap the indices
376 u8 const* order = static_cast<u8*>(m_order) + 16*bestiteration;
378 u8 unordered[16];
379 for( int m = 0; m < besti; ++m )
380 unordered[order[m]] = 0;
381 for( int m = besti; m < bestj; ++m )
382 unordered[order[m]] = 2;
383 for( int m = bestj; m < bestk; ++m )
384 unordered[order[m]] = 3;
385 for( int m = bestk; m < count; ++m )
386 unordered[order[m]] = 1;
388 m_colours->RemapIndices( unordered, bestindices );
390 // save the block
391 WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
393 // save the error
394 m_besterror = besterror;
398 } // namespace squish