1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include "HistogramCalculator.hxx"
17 HistogramCalculator::HistogramCalculator() = default;
19 void HistogramCalculator::computeBinFrequencyHistogram(const std::vector
<double>& rDataPoints
)
21 if (rDataPoints
.empty())
27 maBinFrequencies
.clear();
29 // Calculate statistics
31 double fSquareSum
= 0.0;
32 double fMinValue
= rDataPoints
[0];
33 double fMaxValue
= rDataPoints
[0];
34 sal_Int32 nValidCount
= 0;
36 // Compute min and max values, ignoring non-finite values
37 for (const auto& rValue
: rDataPoints
)
39 if (std::isfinite(rValue
))
42 fSquareSum
+= rValue
* rValue
;
43 fMinValue
= std::min(fMinValue
, rValue
);
44 fMaxValue
= std::max(fMaxValue
, rValue
);
49 if (nValidCount
< 2 || fMinValue
== fMaxValue
) // Need at least two points for variance
53 maBinRanges
= { { std::floor(fMinValue
), std::ceil(fMinValue
+ 1.0) } };
54 maBinFrequencies
= { nValidCount
};
58 double fMean
= fSum
/ nValidCount
;
59 double fVariance
= (fSquareSum
- fSum
* fMean
) / (nValidCount
- 1);
60 double fStdDev
= std::sqrt(fVariance
);
62 // Apply Scott's rule for bin width
63 mfBinWidth
= (3.5 * fStdDev
) / std::cbrt(nValidCount
);
65 // Calculate number of bins
66 mnBins
= static_cast<sal_Int32
>(std::ceil((fMaxValue
- fMinValue
) / mfBinWidth
));
67 mnBins
= std::max
<sal_Int32
>(mnBins
, 1); // Ensure at least one bin
70 maBinRanges
.reserve(mnBins
);
71 double fBinStart
= fMinValue
;
73 for (sal_Int32 i
= 0; i
< mnBins
; ++i
)
75 double fBinEnd
= fBinStart
+ mfBinWidth
;
77 // Correct rounding to avoid discrepancies
78 fBinStart
= std::round(fBinStart
* 100.0) / 100.0;
79 fBinEnd
= std::round(fBinEnd
* 100.0) / 100.0;
83 // First bin includes the minimum value, so use closed interval [fMinValue, fBinEnd]
84 maBinRanges
.emplace_back(fMinValue
, fBinEnd
);
88 // Subsequent bins use half-open interval (fBinStart, fBinEnd]
89 maBinRanges
.emplace_back(fBinStart
, fBinEnd
);
94 // Adjust the last bin end to be inclusive
95 maBinRanges
.back().second
= std::max(maBinRanges
.back().second
, fMaxValue
);
97 // Calculate frequencies
98 maBinFrequencies
.assign(mnBins
, 0);
99 for (double fValue
: rDataPoints
)
101 if (std::isfinite(fValue
))
103 for (size_t i
= 0; i
< maBinRanges
.size(); ++i
)
105 if (i
== 0 && fValue
>= maBinRanges
[i
].first
&& fValue
<= maBinRanges
[i
].second
)
107 maBinFrequencies
[i
]++;
110 else if (i
> 0 && fValue
> maBinRanges
[i
].first
&& fValue
<= maBinRanges
[i
].second
)
112 maBinFrequencies
[i
]++;
122 /* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */