chart2/source/tools/PolynomialRegressionCurveCalculator.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <PolynomialRegressionCurveCalculator.hxx>
  21 #include <RegressionCalculationHelper.hxx>
  22
  23 #include <cmath>
  24 #include <limits>
  25 #include <rtl/math.hxx>
  26 #include <rtl/ustrbuf.hxx>
  27
  28 #include <SpecialCharacters.hxx>
  29
  30 using namespace com::sun::star;
  31
  32 namespace chart
  33 {
  34
  35 static double lcl_GetDotProduct(std::vector<double>& aVec1, std::vector<double>& aVec2)
  36 {
  37     double fResult = 0.0;
  38     assert(aVec1.size() == aVec2.size());
  39     for (size_t i = 0; i < aVec1.size(); ++i)
  40         fResult += aVec1[i] * aVec2[i];
  41     return fResult;
  42 }
  43
  44 PolynomialRegressionCurveCalculator::PolynomialRegressionCurveCalculator()
  45 {}
  46
  47 PolynomialRegressionCurveCalculator::~PolynomialRegressionCurveCalculator()
  48 {}
  49
  50 void PolynomialRegressionCurveCalculator::computeCorrelationCoefficient(
  51     RegressionCalculationHelper::tDoubleVectorPair& rValues,
  52     const sal_Int32 aNoValues,
  53     double yAverage )
  54 {
  55     double aSumError = 0.0;
  56     double aSumTotal = 0.0;
  57     double aSumYpred2 = 0.0;
  58
  59     for( sal_Int32 i = 0; i < aNoValues; i++ )
  60     {
  61         double xValue = rValues.first[i];
  62         double yActual = rValues.second[i];
  63         double yPredicted = getCurveValue( xValue );
  64         aSumTotal += (yActual - yAverage) * (yActual - yAverage);
  65         aSumError += (yActual - yPredicted) * (yActual - yPredicted);
  66         if(mForceIntercept)
  67             aSumYpred2 += (yPredicted - mInterceptValue) * (yPredicted - mInterceptValue);
  68     }
  69
  70     double aRSquared = 0.0;
  71     if(mForceIntercept)
  72     {
  73         if (auto const div = aSumError + aSumYpred2)
  74         {
  75             aRSquared = aSumYpred2 / div;
  76         }
  77     }
  78     else if (aSumTotal != 0.0)
  79     {
  80         aRSquared = 1.0 - (aSumError / aSumTotal);
  81     }
  82
  83     if (aRSquared > 0.0)
  84         m_fCorrelationCoefficient = std::sqrt(aRSquared);
  85     else
  86         m_fCorrelationCoefficient = 0.0;
  87 }
  88
  89 // ____ XRegressionCurveCalculator ____
  90 void SAL_CALL PolynomialRegressionCurveCalculator::recalculateRegression(
  91     const uno::Sequence< double >& aXValues,
  92     const uno::Sequence< double >& aYValues )
  93 {
  94     m_fCorrelationCoefficient = std::numeric_limits<double>::quiet_NaN();
  95
  96     RegressionCalculationHelper::tDoubleVectorPair aValues(
  97         RegressionCalculationHelper::cleanup( aXValues, aYValues, RegressionCalculationHelper::isValid()));
  98
  99     const sal_Int32 aNoValues = aValues.first.size();
 100
 101     const sal_Int32 aNoPowers = mForceIntercept ? mDegree : mDegree + 1;
 102
 103     mCoefficients.clear();
 104     mCoefficients.resize(aNoPowers, 0.0);
 105
 106     double yAverage = 0.0;
 107
 108     std::vector<double> yVector;
 109     yVector.resize(aNoValues, 0.0);
 110
 111     for(sal_Int32 i = 0; i < aNoValues; i++)
 112     {
 113         double yValue = aValues.second[i];
 114         if (mForceIntercept)
 115             yValue -= mInterceptValue;
 116         yVector[i] = yValue;
 117         yAverage += yValue;
 118     }
 119     if (aNoValues != 0)
 120     {
 121         yAverage /= aNoValues;
 122     }
 123
 124     // Special case for single variable regression like in LINEST
 125     // implementation in Calc.
 126     if (mDegree == 1)
 127     {
 128         std::vector<double> xVector;
 129         xVector.resize(aNoValues, 0.0);
 130         double xAverage = 0.0;
 131
 132         for(sal_Int32 i = 0; i < aNoValues; ++i)
 133         {
 134             double xValue = aValues.first[i];
 135             xVector[i] = xValue;
 136             xAverage += xValue;
 137         }
 138         if (aNoValues != 0)
 139         {
 140             xAverage /= aNoValues;
 141         }
 142
 143         if (!mForceIntercept)
 144         {
 145             for (sal_Int32 i = 0; i < aNoValues; ++i)
 146             {
 147                 xVector[i] -= xAverage;
 148                 yVector[i] -= yAverage;
 149             }
 150         }
 151         double fSumXY = lcl_GetDotProduct(xVector, yVector);
 152         double fSumX2 = lcl_GetDotProduct(xVector, xVector);
 153
 154         double fSlope = fSumXY / fSumX2;
 155
 156         if (!mForceIntercept)
 157         {
 158             mInterceptValue = ::rtl::math::approxSub(yAverage, fSlope * xAverage);
 159             mCoefficients[0] = mInterceptValue;
 160             mCoefficients[1] = fSlope;
 161         }
 162         else
 163         {
 164             mCoefficients[0] = fSlope;
 165             mCoefficients.insert(mCoefficients.begin(), mInterceptValue);
 166         }
 167
 168         computeCorrelationCoefficient(aValues, aNoValues, yAverage);
 169         return;
 170     }
 171
 172     std::vector<double> aQRTransposed;
 173     aQRTransposed.resize(aNoValues * aNoPowers, 0.0);
 174
 175     for(sal_Int32 j = 0; j < aNoPowers; j++)
 176     {
 177         sal_Int32 aPower = mForceIntercept ? j+1 : j;
 178         sal_Int32 aColumnIndex = j * aNoValues;
 179         for(sal_Int32 i = 0; i < aNoValues; i++)
 180         {
 181             double xValue = aValues.first[i];
 182             aQRTransposed[i + aColumnIndex] = std::pow(xValue, static_cast<int>(aPower));
 183         }
 184     }
 185
 186     // QR decomposition - based on org.apache.commons.math.linear.QRDecomposition from apache commons math (ASF)
 187     sal_Int32 aMinorSize = std::min(aNoValues, aNoPowers);
 188
 189     std::vector<double> aDiagonal;
 190     aDiagonal.resize(aMinorSize, 0.0);
 191
 192     // Calculate Householder reflectors
 193     for (sal_Int32 aMinor = 0; aMinor < aMinorSize; aMinor++)
 194     {
 195         double aNormSqr = 0.0;
 196         for (sal_Int32 x = aMinor; x < aNoValues; x++)
 197         {
 198             double c = aQRTransposed[x + aMinor * aNoValues];
 199             aNormSqr += c * c;
 200         }
 201
 202         double a;
 203
 204         if (aQRTransposed[aMinor + aMinor * aNoValues] > 0.0)
 205             a = -std::sqrt(aNormSqr);
 206         else
 207             a = std::sqrt(aNormSqr);
 208
 209         aDiagonal[aMinor] = a;
 210
 211         if (a != 0.0)
 212         {
 213             aQRTransposed[aMinor + aMinor * aNoValues] -= a;
 214
 215             for (sal_Int32 aColumn = aMinor + 1; aColumn < aNoPowers; aColumn++)
 216             {
 217                 double alpha = 0.0;
 218                 for (sal_Int32 aRow = aMinor; aRow < aNoValues; aRow++)
 219                 {
 220                     alpha -= aQRTransposed[aRow + aColumn * aNoValues] * aQRTransposed[aRow + aMinor * aNoValues];
 221                 }
 222                 alpha /= a * aQRTransposed[aMinor + aMinor * aNoValues];
 223
 224                 for (sal_Int32 aRow = aMinor; aRow < aNoValues; aRow++)
 225                 {
 226                     aQRTransposed[aRow + aColumn * aNoValues] -= alpha * aQRTransposed[aRow + aMinor * aNoValues];
 227                 }
 228             }
 229         }
 230     }
 231
 232     // Solve the linear equation
 233     for (sal_Int32 aMinor = 0; aMinor < aMinorSize; aMinor++)
 234     {
 235         double aDotProduct = 0;
 236
 237         for (sal_Int32 aRow = aMinor; aRow < aNoValues; aRow++)
 238         {
 239             aDotProduct += yVector[aRow] * aQRTransposed[aRow + aMinor * aNoValues];
 240         }
 241         aDotProduct /= aDiagonal[aMinor] * aQRTransposed[aMinor + aMinor * aNoValues];
 242
 243         for (sal_Int32 aRow = aMinor; aRow < aNoValues; aRow++)
 244         {
 245             yVector[aRow] += aDotProduct * aQRTransposed[aRow + aMinor * aNoValues];
 246         }
 247
 248     }
 249
 250     for (sal_Int32 aRow = aDiagonal.size() - 1; aRow >= 0; aRow--)
 251     {
 252         yVector[aRow] /= aDiagonal[aRow];
 253         double yRow = yVector[aRow];
 254         mCoefficients[aRow] = yRow;
 255
 256         for (sal_Int32 i = 0; i < aRow; i++)
 257         {
 258             yVector[i] -= yRow * aQRTransposed[i + aRow * aNoValues];
 259         }
 260     }
 261
 262     if(mForceIntercept)
 263     {
 264         mCoefficients.insert(mCoefficients.begin(), mInterceptValue);
 265     }
 266
 267     // Calculate correlation coefficient
 268     computeCorrelationCoefficient(aValues, aNoValues, yAverage);
 269 }
 270
 271 double SAL_CALL PolynomialRegressionCurveCalculator::getCurveValue( double x )
 272 {
 273     if (mCoefficients.empty())
 274         return std::numeric_limits<double>::quiet_NaN();
 275
 276     sal_Int32 aNoCoefficients = static_cast<sal_Int32>(mCoefficients.size());
 277
 278     // Horner's method
 279     double fResult = 0.0;
 280     for (sal_Int32 i = aNoCoefficients - 1; i >= 0; i--)
 281     {
 282         fResult = mCoefficients[i] + (x * fResult);
 283     }
 284     return fResult;
 285 }
 286
 287 OUString PolynomialRegressionCurveCalculator::ImplGetRepresentation(
 288     const uno::Reference< util::XNumberFormatter >& xNumFormatter,
 289     sal_Int32 nNumberFormatKey, sal_Int32* pFormulaMaxWidth /* = nullptr */ ) const
 290 {
 291     OUStringBuffer aBuf( mYName + " = " );
 292
 293     sal_Int32 nValueLength=0;
 294     sal_Int32 aLastIndex = mCoefficients.size() - 1;
 295
 296     if ( pFormulaMaxWidth && *pFormulaMaxWidth > 0 )
 297     {
 298         sal_Int32 nCharMin = aBuf.getLength(); // count characters different from coefficients
 299         double nCoefficients = aLastIndex + 1.0; // number of coefficients
 300         for (sal_Int32 i = aLastIndex; i >= 0; i--)
 301         {
 302             double aValue = mCoefficients[i];
 303             if ( aValue == 0.0 )
 304             { // do not count coefficient if it is 0
 305                 nCoefficients --;
 306                 continue;
 307             }
 308             if ( rtl::math::approxEqual( fabs( aValue ) , 1.0 ) )
 309             { // do not count coefficient if it is 1
 310                 nCoefficients --;
 311                 if ( i == 0 ) // intercept = 1
 312                     nCharMin ++;
 313             }
 314             if ( i != aLastIndex )
 315                 nCharMin += 3; // " + "
 316             if ( i > 0 )
 317             {
 318                 nCharMin += mXName.getLength() + 1; // " x"
 319                 if ( i > 1 )
 320                     nCharMin +=1; // "^i"
 321                 if ( i >= 10 )
 322                     nCharMin ++; // 2 digits for i
 323             }
 324         }
 325         nValueLength = ( *pFormulaMaxWidth - nCharMin ) / nCoefficients;
 326         if ( nValueLength <= 0 )
 327             nValueLength = 1;
 328     }
 329
 330     bool bFindValue = false;
 331     sal_Int32 nLineLength = aBuf.getLength();
 332     for (sal_Int32 i = aLastIndex; i >= 0; i--)
 333     {
 334         double aValue = mCoefficients[i];
 335         OUStringBuffer aTmpBuf(""); // temporary buffer
 336         if (aValue == 0.0)
 337         {
 338             continue;
 339         }
 340         else if (aValue < 0.0)
 341         {
 342             if ( bFindValue ) // if it is not the first aValue
 343                 aTmpBuf.append( " " );
 344             aTmpBuf.append( OUStringChar(aMinusSign) + " ");
 345             aValue = - aValue;
 346         }
 347         else
 348         {
 349             if ( bFindValue ) // if it is not the first aValue
 350                 aTmpBuf.append( " + " );
 351         }
 352         bFindValue = true;
 353
 354         // if nValueLength not calculated then nullptr
 355         sal_Int32* pValueLength = nValueLength ? &nValueLength : nullptr;
 356         OUString aValueString = getFormattedString( xNumFormatter, nNumberFormatKey, aValue, pValueLength );
 357         if ( i == 0 || aValueString != "1" )  // aValueString may be rounded to 1 if nValueLength is small
 358         {
 359             aTmpBuf.append( aValueString );
 360             if ( i > 0 ) // insert blank between coefficient and x
 361                 aTmpBuf.append( " " );
 362         }
 363
 364         if(i > 0)
 365         {
 366             aTmpBuf.append( mXName );
 367             if (i > 1)
 368             {
 369                 if (i < 10) // simple case if only one digit
 370                     aTmpBuf.append( aSuperscriptFigures[ i ] );
 371                 else
 372                 {
 373                     OUString aValueOfi = OUString::number( i );
 374                     for ( sal_Int32 n = 0; n < aValueOfi.getLength() ; n++ )
 375                     {
 376                         sal_Int32 nIndex = aValueOfi[n] - u'0';
 377                         aTmpBuf.append( aSuperscriptFigures[ nIndex ] );
 378                     }
 379                 }
 380             }
 381         }
 382         addStringToEquation( aBuf, nLineLength, aTmpBuf, pFormulaMaxWidth );
 383     }
 384     if ( std::u16string_view(aBuf) == Concat2View( mYName + " = ") )
 385         aBuf.append( "0" );
 386
 387     return aBuf.makeStringAndClear();
 388 }
 389
 390 } //  namespace chart
 391
 392 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */