Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / base / metrics / histogram.h
blob1f6e2a1ceeb7ac9f37aba10e2da37004d2d019dd
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 // Histogram is an object that aggregates statistics, and can summarize them in
6 // various forms, including ASCII graphical, HTML, and numerically (as a
7 // vector of numbers corresponding to each of the aggregating buckets).
9 // It supports calls to accumulate either time intervals (which are processed
10 // as integral number of milliseconds), or arbitrary integral units.
12 // For Histogram(exponential histogram), LinearHistogram and CustomHistogram,
13 // the minimum for a declared range is 1 (instead of 0), while the maximum is
14 // (HistogramBase::kSampleType_MAX - 1). Currently you can declare histograms
15 // with ranges exceeding those limits (e.g. 0 as minimal or
16 // HistogramBase::kSampleType_MAX as maximal), but those excesses will be
17 // silently clamped to those limits (for backwards compatibility with existing
18 // code). Best practice is to not exceed the limits.
20 // Each use of a histogram with the same name will reference the same underlying
21 // data, so it is safe to record to the same histogram from multiple locations
22 // in the code. It is a runtime error if all uses of the same histogram do not
23 // agree exactly in type, bucket size and range.
25 // For Histogram and LinearHistogram, the maximum for a declared range should
26 // always be larger (not equal) than minimal range. Zero and
27 // HistogramBase::kSampleType_MAX are implicitly added as first and last ranges,
28 // so the smallest legal bucket_count is 3. However CustomHistogram can have
29 // bucket count as 2 (when you give a custom ranges vector containing only 1
30 // range).
31 // For these 3 kinds of histograms, the max bucket count is always
32 // (Histogram::kBucketCount_MAX - 1).
34 // The buckets layout of class Histogram is exponential. For example, buckets
35 // might contain (sequentially) the count of values in the following intervals:
36 // [0,1), [1,2), [2,4), [4,8), [8,16), [16,32), [32,64), [64,infinity)
37 // That bucket allocation would actually result from construction of a histogram
38 // for values between 1 and 64, with 8 buckets, such as:
39 // Histogram count("some name", 1, 64, 8);
40 // Note that the underflow bucket [0,1) and the overflow bucket [64,infinity)
41 // are also counted by the constructor in the user supplied "bucket_count"
42 // argument.
43 // The above example has an exponential ratio of 2 (doubling the bucket width
44 // in each consecutive bucket. The Histogram class automatically calculates
45 // the smallest ratio that it can use to construct the number of buckets
46 // selected in the constructor. An another example, if you had 50 buckets,
47 // and millisecond time values from 1 to 10000, then the ratio between
48 // consecutive bucket widths will be approximately somewhere around the 50th
49 // root of 10000. This approach provides very fine grain (narrow) buckets
50 // at the low end of the histogram scale, but allows the histogram to cover a
51 // gigantic range with the addition of very few buckets.
53 // Usually we use macros to define and use a histogram, which are defined in
54 // base/metrics/histogram_macros.h. Note: Callers should include that header
55 // directly if they only access the histogram APIs through macros.
57 // Macros use a pattern involving a function static variable, that is a pointer
58 // to a histogram. This static is explicitly initialized on any thread
59 // that detects a uninitialized (NULL) pointer. The potentially racy
60 // initialization is not a problem as it is always set to point to the same
61 // value (i.e., the FactoryGet always returns the same value). FactoryGet
62 // is also completely thread safe, which results in a completely thread safe,
63 // and relatively fast, set of counters. To avoid races at shutdown, the static
64 // pointer is NOT deleted, and we leak the histograms at process termination.
66 #ifndef BASE_METRICS_HISTOGRAM_H_
67 #define BASE_METRICS_HISTOGRAM_H_
69 #include <map>
70 #include <string>
71 #include <vector>
73 #include "base/base_export.h"
74 #include "base/basictypes.h"
75 #include "base/compiler_specific.h"
76 #include "base/gtest_prod_util.h"
77 #include "base/logging.h"
78 #include "base/memory/scoped_ptr.h"
79 #include "base/metrics/bucket_ranges.h"
80 #include "base/metrics/histogram_base.h"
81 // TODO(asvitkine): Migrate callers to to include this directly and remove this.
82 #include "base/metrics/histogram_macros.h"
83 #include "base/metrics/histogram_samples.h"
84 #include "base/time/time.h"
86 namespace base {
88 class BooleanHistogram;
89 class CustomHistogram;
90 class Histogram;
91 class LinearHistogram;
92 class Pickle;
93 class PickleIterator;
94 class SampleVector;
96 class BASE_EXPORT Histogram : public HistogramBase {
97 public:
98 // Initialize maximum number of buckets in histograms as 16,384.
99 static const size_t kBucketCount_MAX;
101 typedef std::vector<Count> Counts;
103 //----------------------------------------------------------------------------
104 // For a valid histogram, input should follow these restrictions:
105 // minimum > 0 (if a minimum below 1 is specified, it will implicitly be
106 // normalized up to 1)
107 // maximum > minimum
108 // buckets > 2 [minimum buckets needed: underflow, overflow and the range]
109 // Additionally,
110 // buckets <= (maximum - minimum + 2) - this is to ensure that we don't have
111 // more buckets than the range of numbers; having more buckets than 1 per
112 // value in the range would be nonsensical.
113 static HistogramBase* FactoryGet(const std::string& name,
114 Sample minimum,
115 Sample maximum,
116 size_t bucket_count,
117 int32 flags);
118 static HistogramBase* FactoryTimeGet(const std::string& name,
119 base::TimeDelta minimum,
120 base::TimeDelta maximum,
121 size_t bucket_count,
122 int32 flags);
124 // Overloads of the above two functions that take a const char* |name| param,
125 // to avoid code bloat from the std::string constructor being inlined into
126 // call sites.
127 static HistogramBase* FactoryGet(const char* name,
128 Sample minimum,
129 Sample maximum,
130 size_t bucket_count,
131 int32 flags);
132 static HistogramBase* FactoryTimeGet(const char* name,
133 base::TimeDelta minimum,
134 base::TimeDelta maximum,
135 size_t bucket_count,
136 int32 flags);
138 static void InitializeBucketRanges(Sample minimum,
139 Sample maximum,
140 BucketRanges* ranges);
142 // This constant if for FindCorruption. Since snapshots of histograms are
143 // taken asynchronously relative to sampling, and our counting code currently
144 // does not prevent race conditions, it is pretty likely that we'll catch a
145 // redundant count that doesn't match the sample count. We allow for a
146 // certain amount of slop before flagging this as an inconsistency. Even with
147 // an inconsistency, we'll snapshot it again (for UMA in about a half hour),
148 // so we'll eventually get the data, if it was not the result of a corruption.
149 static const int kCommonRaceBasedCountMismatch;
151 // Check to see if bucket ranges, counts and tallies in the snapshot are
152 // consistent with the bucket ranges and checksums in our histogram. This can
153 // produce a false-alarm if a race occurred in the reading of the data during
154 // a SnapShot process, but should otherwise be false at all times (unless we
155 // have memory over-writes, or DRAM failures).
156 int FindCorruption(const HistogramSamples& samples) const override;
158 //----------------------------------------------------------------------------
159 // Accessors for factory construction, serialization and testing.
160 //----------------------------------------------------------------------------
161 Sample declared_min() const { return declared_min_; }
162 Sample declared_max() const { return declared_max_; }
163 virtual Sample ranges(size_t i) const;
164 virtual size_t bucket_count() const;
165 const BucketRanges* bucket_ranges() const { return bucket_ranges_; }
167 // This function validates histogram construction arguments. It returns false
168 // if some of the arguments are totally bad.
169 // Note. Currently it allow some bad input, e.g. 0 as minimum, but silently
170 // converts it to good input: 1.
171 // TODO(kaiwang): Be more restrict and return false for any bad input, and
172 // make this a readonly validating function.
173 static bool InspectConstructionArguments(const std::string& name,
174 Sample* minimum,
175 Sample* maximum,
176 size_t* bucket_count);
178 // HistogramBase implementation:
179 HistogramType GetHistogramType() const override;
180 bool HasConstructionArguments(Sample expected_minimum,
181 Sample expected_maximum,
182 size_t expected_bucket_count) const override;
183 void Add(Sample value) override;
184 void AddCount(Sample value, int count) override;
185 scoped_ptr<HistogramSamples> SnapshotSamples() const override;
186 void AddSamples(const HistogramSamples& samples) override;
187 bool AddSamplesFromPickle(base::PickleIterator* iter) override;
188 void WriteHTMLGraph(std::string* output) const override;
189 void WriteAscii(std::string* output) const override;
191 protected:
192 // |ranges| should contain the underflow and overflow buckets. See top
193 // comments for example.
194 Histogram(const std::string& name,
195 Sample minimum,
196 Sample maximum,
197 const BucketRanges* ranges);
199 ~Histogram() override;
201 // HistogramBase implementation:
202 bool SerializeInfoImpl(base::Pickle* pickle) const override;
204 // Method to override to skip the display of the i'th bucket if it's empty.
205 virtual bool PrintEmptyBucket(size_t index) const;
207 // Get normalized size, relative to the ranges(i).
208 virtual double GetBucketSize(Count current, size_t i) const;
210 // Return a string description of what goes in a given bucket.
211 // Most commonly this is the numeric value, but in derived classes it may
212 // be a name (or string description) given to the bucket.
213 virtual const std::string GetAsciiBucketRange(size_t it) const;
215 private:
216 // Allow tests to corrupt our innards for testing purposes.
217 FRIEND_TEST_ALL_PREFIXES(HistogramTest, BoundsTest);
218 FRIEND_TEST_ALL_PREFIXES(HistogramTest, BucketPlacementTest);
219 FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptBucketBounds);
220 FRIEND_TEST_ALL_PREFIXES(HistogramTest, CorruptSampleCounts);
221 FRIEND_TEST_ALL_PREFIXES(HistogramTest, NameMatchTest);
222 FRIEND_TEST_ALL_PREFIXES(HistogramTest, AddCountTest);
224 friend class StatisticsRecorder; // To allow it to delete duplicates.
225 friend class StatisticsRecorderTest;
227 friend BASE_EXPORT_PRIVATE HistogramBase* DeserializeHistogramInfo(
228 base::PickleIterator* iter);
229 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
231 // Implementation of SnapshotSamples function.
232 scoped_ptr<SampleVector> SnapshotSampleVector() const;
234 //----------------------------------------------------------------------------
235 // Helpers for emitting Ascii graphic. Each method appends data to output.
237 void WriteAsciiImpl(bool graph_it,
238 const std::string& newline,
239 std::string* output) const;
241 // Find out how large (graphically) the largest bucket will appear to be.
242 double GetPeakBucketSize(const SampleVector& samples) const;
244 // Write a common header message describing this histogram.
245 void WriteAsciiHeader(const SampleVector& samples,
246 Count sample_count,
247 std::string* output) const;
249 // Write information about previous, current, and next buckets.
250 // Information such as cumulative percentage, etc.
251 void WriteAsciiBucketContext(const int64 past, const Count current,
252 const int64 remaining, const size_t i,
253 std::string* output) const;
255 // WriteJSON calls these.
256 void GetParameters(DictionaryValue* params) const override;
258 void GetCountAndBucketData(Count* count,
259 int64* sum,
260 ListValue* buckets) const override;
262 // Does not own this object. Should get from StatisticsRecorder.
263 const BucketRanges* bucket_ranges_;
265 Sample declared_min_; // Less than this goes into the first bucket.
266 Sample declared_max_; // Over this goes into the last bucket.
268 // Finally, provide the state that changes with the addition of each new
269 // sample.
270 scoped_ptr<SampleVector> samples_;
272 DISALLOW_COPY_AND_ASSIGN(Histogram);
275 //------------------------------------------------------------------------------
277 // LinearHistogram is a more traditional histogram, with evenly spaced
278 // buckets.
279 class BASE_EXPORT LinearHistogram : public Histogram {
280 public:
281 ~LinearHistogram() override;
283 /* minimum should start from 1. 0 is as minimum is invalid. 0 is an implicit
284 default underflow bucket. */
285 static HistogramBase* FactoryGet(const std::string& name,
286 Sample minimum,
287 Sample maximum,
288 size_t bucket_count,
289 int32 flags);
290 static HistogramBase* FactoryTimeGet(const std::string& name,
291 TimeDelta minimum,
292 TimeDelta maximum,
293 size_t bucket_count,
294 int32 flags);
296 // Overloads of the above two functions that take a const char* |name| param,
297 // to avoid code bloat from the std::string constructor being inlined into
298 // call sites.
299 static HistogramBase* FactoryGet(const char* name,
300 Sample minimum,
301 Sample maximum,
302 size_t bucket_count,
303 int32 flags);
304 static HistogramBase* FactoryTimeGet(const char* name,
305 TimeDelta minimum,
306 TimeDelta maximum,
307 size_t bucket_count,
308 int32 flags);
310 struct DescriptionPair {
311 Sample sample;
312 const char* description; // Null means end of a list of pairs.
315 // Create a LinearHistogram and store a list of number/text values for use in
316 // writing the histogram graph.
317 // |descriptions| can be NULL, which means no special descriptions to set. If
318 // it's not NULL, the last element in the array must has a NULL in its
319 // "description" field.
320 static HistogramBase* FactoryGetWithRangeDescription(
321 const std::string& name,
322 Sample minimum,
323 Sample maximum,
324 size_t bucket_count,
325 int32 flags,
326 const DescriptionPair descriptions[]);
328 static void InitializeBucketRanges(Sample minimum,
329 Sample maximum,
330 BucketRanges* ranges);
332 // Overridden from Histogram:
333 HistogramType GetHistogramType() const override;
335 protected:
336 LinearHistogram(const std::string& name,
337 Sample minimum,
338 Sample maximum,
339 const BucketRanges* ranges);
341 double GetBucketSize(Count current, size_t i) const override;
343 // If we have a description for a bucket, then return that. Otherwise
344 // let parent class provide a (numeric) description.
345 const std::string GetAsciiBucketRange(size_t i) const override;
347 // Skip printing of name for numeric range if we have a name (and if this is
348 // an empty bucket).
349 bool PrintEmptyBucket(size_t index) const override;
351 private:
352 friend BASE_EXPORT_PRIVATE HistogramBase* DeserializeHistogramInfo(
353 base::PickleIterator* iter);
354 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
356 // For some ranges, we store a printable description of a bucket range.
357 // If there is no description, then GetAsciiBucketRange() uses parent class
358 // to provide a description.
359 typedef std::map<Sample, std::string> BucketDescriptionMap;
360 BucketDescriptionMap bucket_description_;
362 DISALLOW_COPY_AND_ASSIGN(LinearHistogram);
365 //------------------------------------------------------------------------------
367 // BooleanHistogram is a histogram for booleans.
368 class BASE_EXPORT BooleanHistogram : public LinearHistogram {
369 public:
370 static HistogramBase* FactoryGet(const std::string& name, int32 flags);
372 // Overload of the above function that takes a const char* |name| param,
373 // to avoid code bloat from the std::string constructor being inlined into
374 // call sites.
375 static HistogramBase* FactoryGet(const char* name, int32 flags);
377 HistogramType GetHistogramType() const override;
379 private:
380 BooleanHistogram(const std::string& name, const BucketRanges* ranges);
382 friend BASE_EXPORT_PRIVATE HistogramBase* DeserializeHistogramInfo(
383 base::PickleIterator* iter);
384 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
386 DISALLOW_COPY_AND_ASSIGN(BooleanHistogram);
389 //------------------------------------------------------------------------------
391 // CustomHistogram is a histogram for a set of custom integers.
392 class BASE_EXPORT CustomHistogram : public Histogram {
393 public:
394 // |custom_ranges| contains a vector of limits on ranges. Each limit should be
395 // > 0 and < kSampleType_MAX. (Currently 0 is still accepted for backward
396 // compatibility). The limits can be unordered or contain duplication, but
397 // client should not depend on this.
398 static HistogramBase* FactoryGet(const std::string& name,
399 const std::vector<Sample>& custom_ranges,
400 int32 flags);
402 // Overload of the above function that takes a const char* |name| param,
403 // to avoid code bloat from the std::string constructor being inlined into
404 // call sites.
405 static HistogramBase* FactoryGet(const char* name,
406 const std::vector<Sample>& custom_ranges,
407 int32 flags);
409 // Overridden from Histogram:
410 HistogramType GetHistogramType() const override;
412 // Helper method for transforming an array of valid enumeration values
413 // to the std::vector<int> expected by UMA_HISTOGRAM_CUSTOM_ENUMERATION.
414 // This function ensures that a guard bucket exists right after any
415 // valid sample value (unless the next higher sample is also a valid value),
416 // so that invalid samples never fall into the same bucket as valid samples.
417 // TODO(kaiwang): Change name to ArrayToCustomEnumRanges.
418 static std::vector<Sample> ArrayToCustomRanges(const Sample* values,
419 size_t num_values);
420 protected:
421 CustomHistogram(const std::string& name,
422 const BucketRanges* ranges);
424 // HistogramBase implementation:
425 bool SerializeInfoImpl(base::Pickle* pickle) const override;
427 double GetBucketSize(Count current, size_t i) const override;
429 private:
430 friend BASE_EXPORT_PRIVATE HistogramBase* DeserializeHistogramInfo(
431 base::PickleIterator* iter);
432 static HistogramBase* DeserializeInfoImpl(base::PickleIterator* iter);
434 static bool ValidateCustomRanges(const std::vector<Sample>& custom_ranges);
435 static BucketRanges* CreateBucketRangesFromCustomRanges(
436 const std::vector<Sample>& custom_ranges);
438 DISALLOW_COPY_AND_ASSIGN(CustomHistogram);
441 } // namespace base
443 #endif // BASE_METRICS_HISTOGRAM_H_