boost/accumulators/statistics/weighted_extended_p_square.hpp

   1 ///////////////////////////////////////////////////////////////////////////////
   2 // weighted_extended_p_square.hpp
   3 //
   4 //  Copyright 2005 Daniel Egloff. Distributed under the Boost
   5 //  Software License, Version 1.0. (See accompanying file
   6 //  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
   7
   8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_EXTENDED_P_SQUARE_HPP_DE_01_01_2006
   9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_EXTENDED_P_SQUARE_HPP_DE_01_01_2006
  10
  11 #include <vector>
  12 #include <functional>
  13 #include <boost/range/begin.hpp>
  14 #include <boost/range/end.hpp>
  15 #include <boost/range/iterator_range.hpp>
  16 #include <boost/iterator/transform_iterator.hpp>
  17 #include <boost/iterator/counting_iterator.hpp>
  18 #include <boost/iterator/permutation_iterator.hpp>
  19 #include <boost/parameter/keyword.hpp>
  20 #include <boost/mpl/placeholders.hpp>
  21 #include <boost/accumulators/framework/accumulator_base.hpp>
  22 #include <boost/accumulators/framework/extractor.hpp>
  23 #include <boost/accumulators/numeric/functional.hpp>
  24 #include <boost/accumulators/framework/parameters/sample.hpp>
  25 #include <boost/accumulators/framework/depends_on.hpp>
  26 #include <boost/accumulators/statistics_fwd.hpp>
  27 #include <boost/accumulators/statistics/count.hpp>
  28 #include <boost/accumulators/statistics/sum.hpp>
  29 #include <boost/accumulators/statistics/times2_iterator.hpp>
  30 #include <boost/accumulators/statistics/extended_p_square.hpp>
  31
  32 namespace boost { namespace accumulators
  33 {
  34
  35 namespace impl
  36 {
  37     ///////////////////////////////////////////////////////////////////////////////
  38     // weighted_extended_p_square_impl
  39     //  multiple quantile estimation with weighted samples
  40     /**
  41         @brief Multiple quantile estimation with the extended \f$P^2\f$ algorithm for weighted samples
  42
  43         This version of the extended \f$P^2\f$ algorithm extends the extended \f$P^2\f$ algorithm to
  44         support weighted samples. The extended \f$P^2\f$ algorithm dynamically estimates several
  45         quantiles without storing samples. Assume that \f$m\f$ quantiles
  46         \f$\xi_{p_1}, \ldots, \xi_{p_m}\f$ are to be estimated. Instead of storing the whole sample
  47         cumulative distribution, the algorithm maintains only \f$m+2\f$ principal markers and
  48         \f$m+1\f$ middle markers, whose positions are updated with each sample and whose heights
  49         are adjusted (if necessary) using a piecewise-parablic formula. The heights of the principal
  50         markers are the current estimates of the quantiles and are returned as an iterator range.
  51
  52         For further details, see
  53
  54         K. E. E. Raatikainen, Simultaneous estimation of several quantiles, Simulation, Volume 49,
  55         Number 4 (October), 1986, p. 159-164.
  56
  57         The extended \f$ P^2 \f$ algorithm generalizess the \f$ P^2 \f$ algorithm of
  58
  59         R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and
  60         histograms without storing observations, Communications of the ACM,
  61         Volume 28 (October), Number 10, 1985, p. 1076-1085.
  62
  63         @param extended_p_square_probabilities A vector of quantile probabilities.
  64     */
  65     template<typename Sample, typename Weight>
  66     struct weighted_extended_p_square_impl
  67       : accumulator_base
  68     {
  69         typedef typename numeric::functional::multiplies<Sample, Weight>::result_type weighted_sample;
  70         typedef typename numeric::functional::average<weighted_sample, std::size_t>::result_type float_type;
  71         typedef std::vector<float_type> array_type;
  72         // for boost::result_of
  73         typedef iterator_range<
  74             detail::lvalue_index_iterator<
  75                 permutation_iterator<
  76                     typename array_type::const_iterator
  77                   , detail::times2_iterator
  78                 >
  79             >
  80         > result_type;
  81
  82         template<typename Args>
  83         weighted_extended_p_square_impl(Args const &args)
  84           : probabilities(
  85                 boost::begin(args[extended_p_square_probabilities])
  86               , boost::end(args[extended_p_square_probabilities])
  87             )
  88           , heights(2 * probabilities.size() + 3)
  89           , actual_positions(heights.size())
  90           , desired_positions(heights.size())
  91         {
  92         }
  93
  94         template<typename Args>
  95         void operator ()(Args const &args)
  96         {
  97             std::size_t cnt = count(args);
  98             std::size_t sample_cell = 1; // k
  99             std::size_t num_quantiles = this->probabilities.size();
 100
 101             // m+2 principal markers and m+1 middle markers
 102             std::size_t num_markers = 2 * num_quantiles + 3;
 103
 104             // first accumulate num_markers samples
 105             if(cnt <= num_markers)
 106             {
 107                 this->heights[cnt - 1] = args[sample];
 108                 this->actual_positions[cnt - 1] = args[weight];
 109
 110                 // complete the initialization of heights (and actual_positions) by sorting
 111                 if(cnt == num_markers)
 112                 {
 113                     // TODO: we need to sort the initial samples (in heights) in ascending order and
 114                     // sort their weights (in actual_positions) the same way. The following lines do
 115                     // it, but there must be a better and more efficient way of doing this.
 116                     typename array_type::iterator it_begin, it_end, it_min;
 117
 118                     it_begin = this->heights.begin();
 119                     it_end   = this->heights.end();
 120
 121                     std::size_t pos = 0;
 122
 123                     while (it_begin != it_end)
 124                     {
 125                         it_min = std::min_element(it_begin, it_end);
 126                         std::size_t d = std::distance(it_begin, it_min);
 127                         std::swap(*it_begin, *it_min);
 128                         std::swap(this->actual_positions[pos], this->actual_positions[pos + d]);
 129                         ++it_begin;
 130                         ++pos;
 131                     }
 132
 133                     // calculate correct initial actual positions
 134                     for (std::size_t i = 1; i < num_markers; ++i)
 135                     {
 136                         actual_positions[i] += actual_positions[i - 1];
 137                     }
 138                 }
 139             }
 140             else
 141             {
 142                 if(args[sample] < this->heights[0])
 143                 {
 144                     this->heights[0] = args[sample];
 145                     this->actual_positions[0] = args[weight];
 146                     sample_cell = 1;
 147                 }
 148                 else if(args[sample] >= this->heights[num_markers - 1])
 149                 {
 150                     this->heights[num_markers - 1] = args[sample];
 151                     sample_cell = num_markers - 1;
 152                 }
 153                 else
 154                 {
 155                     // find cell k = sample_cell such that heights[k-1] <= sample < heights[k]
 156
 157                     typedef typename array_type::iterator iterator;
 158                     iterator it = std::upper_bound(
 159                         this->heights.begin()
 160                       , this->heights.end()
 161                       , args[sample]
 162                     );
 163
 164                     sample_cell = std::distance(this->heights.begin(), it);
 165                 }
 166
 167                 // update actual position of all markers above sample_cell
 168                 for(std::size_t i = sample_cell; i < num_markers; ++i)
 169                 {
 170                     this->actual_positions[i] += args[weight];
 171                 }
 172
 173                 // compute desired positions
 174                 {
 175                     this->desired_positions[0] = this->actual_positions[0];
 176                     this->desired_positions[num_markers - 1] = sum_of_weights(args);
 177                     this->desired_positions[1] = (sum_of_weights(args) - this->actual_positions[0]) * probabilities[0]
 178                                               / 2. + this->actual_positions[0];
 179                     this->desired_positions[num_markers - 2] = (sum_of_weights(args) - this->actual_positions[0])
 180                                                             * (probabilities[num_quantiles - 1] + 1.)
 181                                                             / 2. + this->actual_positions[0];
 182
 183                     for (std::size_t i = 0; i < num_quantiles; ++i)
 184                     {
 185                         this->desired_positions[2 * i + 2] = (sum_of_weights(args) - this->actual_positions[0])
 186                                                           * probabilities[i] + this->actual_positions[0];
 187                     }
 188
 189                     for (std::size_t i = 1; i < num_quantiles; ++i)
 190                     {
 191                         this->desired_positions[2 * i + 1] = (sum_of_weights(args) - this->actual_positions[0])
 192                                                       * (probabilities[i - 1] + probabilities[i])
 193                                                       / 2. + this->actual_positions[0];
 194                     }
 195                 }
 196
 197                 // adjust heights and actual_positions of markers 1 to num_markers - 2 if necessary
 198                 for (std::size_t i = 1; i <= num_markers - 2; ++i)
 199                 {
 200                     // offset to desired position
 201                     float_type d = this->desired_positions[i] - this->actual_positions[i];
 202
 203                     // offset to next position
 204                     float_type dp = this->actual_positions[i + 1] - this->actual_positions[i];
 205
 206                     // offset to previous position
 207                     float_type dm = this->actual_positions[i - 1] - this->actual_positions[i];
 208
 209                     // height ds
 210                     float_type hp = (this->heights[i + 1] - this->heights[i]) / dp;
 211                     float_type hm = (this->heights[i - 1] - this->heights[i]) / dm;
 212
 213                     if((d >= 1 && dp > 1) || (d <= -1 && dm < -1))
 214                     {
 215                         short sign_d = static_cast<short>(d / std::abs(d));
 216
 217                         float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm)*hp + (dp - sign_d) * hm);
 218
 219                         // try adjusting heights[i] using p-squared formula
 220                         if(this->heights[i - 1] < h && h < this->heights[i + 1])
 221                         {
 222                             this->heights[i] = h;
 223                         }
 224                         else
 225                         {
 226                             // use linear formula
 227                             if(d > 0)
 228                             {
 229                                 this->heights[i] += hp;
 230                             }
 231                             if(d < 0)
 232                             {
 233                                 this->heights[i] -= hm;
 234                             }
 235                         }
 236                         this->actual_positions[i] += sign_d;
 237                     }
 238                 }
 239             }
 240         }
 241
 242         result_type result(dont_care) const
 243         {
 244             // for i in [1,probabilities.size()], return heights[i * 2]
 245             detail::times2_iterator idx_begin = detail::make_times2_iterator(1);
 246             detail::times2_iterator idx_end = detail::make_times2_iterator(this->probabilities.size() + 1);
 247
 248             return result_type(
 249                 make_permutation_iterator(this->heights.begin(), idx_begin)
 250               , make_permutation_iterator(this->heights.begin(), idx_end)
 251             );
 252         }
 253
 254     private:
 255         array_type probabilities;         // the quantile probabilities
 256         array_type heights;               // q_i
 257         array_type actual_positions;      // n_i
 258         array_type desired_positions;     // d_i
 259     };
 260
 261 } // namespace impl
 262
 263 ///////////////////////////////////////////////////////////////////////////////
 264 // tag::weighted_extended_p_square
 265 //
 266 namespace tag
 267 {
 268     struct weighted_extended_p_square
 269       : depends_on<count, sum_of_weights>
 270       , extended_p_square_probabilities
 271     {
 272         typedef accumulators::impl::weighted_extended_p_square_impl<mpl::_1, mpl::_2> impl;
 273     };
 274 }
 275
 276 ///////////////////////////////////////////////////////////////////////////////
 277 // extract::weighted_extended_p_square
 278 //
 279 namespace extract
 280 {
 281     extractor<tag::weighted_extended_p_square> const weighted_extended_p_square = {};
 282 }
 283
 284 using extract::weighted_extended_p_square;
 285
 286 }} // namespace boost::accumulators
 287
 288 #endif