1 ///////////////////////////////////////////////////////////////////////////////
2 // weighted_p_square_cumulative_distribution.hpp
4 // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
5 // Software License, Version 1.0. (See accompanying file
6 // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8 #ifndef BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_CUMULATIVE_DISTRIBUTION_HPP_DE_01_01_2006
9 #define BOOST_ACCUMULATORS_STATISTICS_WEIGHTED_P_SQUARE_CUMULATIVE_DISTRIBUTION_HPP_DE_01_01_2006
13 #include <boost/parameter/keyword.hpp>
14 #include <boost/mpl/placeholders.hpp>
15 #include <boost/range.hpp>
16 #include <boost/accumulators/framework/accumulator_base.hpp>
17 #include <boost/accumulators/framework/extractor.hpp>
18 #include <boost/accumulators/numeric/functional.hpp>
19 #include <boost/accumulators/framework/parameters/sample.hpp>
20 #include <boost/accumulators/statistics_fwd.hpp>
21 #include <boost/accumulators/statistics/count.hpp>
22 #include <boost/accumulators/statistics/sum.hpp>
23 #include <boost/accumulators/statistics/p_square_cumulative_distribution.hpp> // for named parameter p_square_cumulative_distribution_num_cells
25 namespace boost
{ namespace accumulators
30 ///////////////////////////////////////////////////////////////////////////////
31 // weighted_p_square_cumulative_distribution_impl
32 // cumulative distribution calculation (as histogram)
34 @brief Histogram calculation of the cumulative distribution with the \f$P^2\f$ algorithm for weighted samples
36 A histogram of the sample cumulative distribution is computed dynamically without storing samples
37 based on the \f$ P^2 \f$ algorithm for weighted samples. The returned histogram has a specifiable
38 amount (num_cells) equiprobable (and not equal-sized) cells.
40 Note that applying importance sampling results in regions to be more and other regions to be less
41 accurately estimated than without importance sampling, i.e., with unweighted samples.
43 For further details, see
45 R. Jain and I. Chlamtac, The P^2 algorithmus for dynamic calculation of quantiles and
46 histograms without storing observations, Communications of the ACM,
47 Volume 28 (October), Number 10, 1985, p. 1076-1085.
49 @param p_square_cumulative_distribution_num_cells
51 template<typename Sample
, typename Weight
>
52 struct weighted_p_square_cumulative_distribution_impl
55 typedef typename
numeric::functional::multiplies
<Sample
, Weight
>::result_type weighted_sample
;
56 typedef typename
numeric::functional::average
<weighted_sample
, std::size_t>::result_type float_type
;
57 typedef std::vector
<std::pair
<float_type
, float_type
> > histogram_type
;
58 typedef std::vector
<float_type
> array_type
;
59 // for boost::result_of
60 typedef iterator_range
<typename
histogram_type::iterator
> result_type
;
62 template<typename Args
>
63 weighted_p_square_cumulative_distribution_impl(Args
const &args
)
64 : num_cells(args
[p_square_cumulative_distribution_num_cells
])
65 , heights(num_cells
+ 1)
66 , actual_positions(num_cells
+ 1)
67 , desired_positions(num_cells
+ 1)
68 , histogram(num_cells
+ 1)
73 template<typename Args
>
74 void operator ()(Args
const &args
)
76 this->is_dirty
= true;
78 std::size_t cnt
= count(args
);
79 std::size_t sample_cell
= 1; // k
80 std::size_t b
= this->num_cells
;
82 // accumulate num_cells + 1 first samples
85 this->heights
[cnt
- 1] = args
[sample
];
86 this->actual_positions
[cnt
- 1] = args
[weight
];
88 // complete the initialization of heights by sorting
91 //std::sort(this->heights.begin(), this->heights.end());
93 // TODO: we need to sort the initial samples (in heights) in ascending order and
94 // sort their weights (in actual_positions) the same way. The following lines do
95 // it, but there must be a better and more efficient way of doing this.
96 typename
array_type::iterator it_begin
, it_end
, it_min
;
98 it_begin
= this->heights
.begin();
99 it_end
= this->heights
.end();
103 while (it_begin
!= it_end
)
105 it_min
= std::min_element(it_begin
, it_end
);
106 std::size_t d
= std::distance(it_begin
, it_min
);
107 std::swap(*it_begin
, *it_min
);
108 std::swap(this->actual_positions
[pos
], this->actual_positions
[pos
+ d
]);
113 // calculate correct initial actual positions
114 for (std::size_t i
= 1; i
< b
; ++i
)
116 this->actual_positions
[i
] += this->actual_positions
[i
- 1];
122 // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values
123 if (args
[sample
] < this->heights
[0])
125 this->heights
[0] = args
[sample
];
126 this->actual_positions
[0] = args
[weight
];
129 else if (this->heights
[b
] <= args
[sample
])
131 this->heights
[b
] = args
[sample
];
136 typename
array_type::iterator it
;
137 it
= std::upper_bound(
138 this->heights
.begin()
139 , this->heights
.end()
143 sample_cell
= std::distance(this->heights
.begin(), it
);
146 // increment positions of markers above sample_cell
147 for (std::size_t i
= sample_cell
; i
< b
+ 1; ++i
)
149 this->actual_positions
[i
] += args
[weight
];
152 // determine desired marker positions
153 for (std::size_t i
= 1; i
< b
+ 1; ++i
)
155 this->desired_positions
[i
] = this->actual_positions
[0]
156 + numeric::average((i
-1) * (sum_of_weights(args
) - this->actual_positions
[0]), b
);
159 // adjust heights of markers 2 to num_cells if necessary
160 for (std::size_t i
= 1; i
< b
; ++i
)
162 // offset to desire position
163 float_type d
= this->desired_positions
[i
] - this->actual_positions
[i
];
165 // offset to next position
166 float_type dp
= this->actual_positions
[i
+ 1] - this->actual_positions
[i
];
168 // offset to previous position
169 float_type dm
= this->actual_positions
[i
- 1] - this->actual_positions
[i
];
172 float_type hp
= (this->heights
[i
+ 1] - this->heights
[i
]) / dp
;
173 float_type hm
= (this->heights
[i
- 1] - this->heights
[i
]) / dm
;
175 if ( ( d
>= 1. && dp
> 1. ) || ( d
<= -1. && dm
< -1. ) )
177 short sign_d
= static_cast<short>(d
/ std::abs(d
));
179 // try adjusting heights[i] using p-squared formula
180 float_type h
= this->heights
[i
] + sign_d
/ (dp
- dm
) * ( (sign_d
- dm
) * hp
+ (dp
- sign_d
) * hm
);
182 if ( this->heights
[i
- 1] < h
&& h
< this->heights
[i
+ 1] )
184 this->heights
[i
] = h
;
188 // use linear formula
191 this->heights
[i
] += hp
;
195 this->heights
[i
] -= hm
;
198 this->actual_positions
[i
] += sign_d
;
204 template<typename Args
>
205 result_type
result(Args
const &args
) const
209 this->is_dirty
= false;
211 // creates a vector of std::pair where each pair i holds
212 // the values heights[i] (x-axis of histogram) and
213 // actual_positions[i] / sum_of_weights (y-axis of histogram)
215 for (std::size_t i
= 0; i
< this->histogram
.size(); ++i
)
217 this->histogram
[i
] = std::make_pair(this->heights
[i
], numeric::average(this->actual_positions
[i
], sum_of_weights(args
)));
221 return make_iterator_range(this->histogram
);
225 std::size_t num_cells
; // number of cells b
226 array_type heights
; // q_i
227 array_type actual_positions
; // n_i
228 array_type desired_positions
; // n'_i
229 mutable histogram_type histogram
; // histogram
230 mutable bool is_dirty
;
233 } // namespace detail
235 ///////////////////////////////////////////////////////////////////////////////
236 // tag::weighted_p_square_cumulative_distribution
240 struct weighted_p_square_cumulative_distribution
241 : depends_on
<count
, sum_of_weights
>
242 , p_square_cumulative_distribution_num_cells
244 typedef accumulators::impl::weighted_p_square_cumulative_distribution_impl
<mpl::_1
, mpl::_2
> impl
;
248 ///////////////////////////////////////////////////////////////////////////////
249 // extract::weighted_p_square_cumulative_distribution
253 extractor
<tag::weighted_p_square_cumulative_distribution
> const weighted_p_square_cumulative_distribution
= {};
256 using extract::weighted_p_square_cumulative_distribution
;
258 }} // namespace boost::accumulators