1 // Copyright (c) 2008, Google Inc.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // All Rights Reserved.
33 // Author: Daniel Ford
35 #ifndef TCMALLOC_SAMPLER_H_
36 #define TCMALLOC_SAMPLER_H_
39 #include <stddef.h> // for size_t
41 #include <stdint.h> // for uint64_t, uint32_t, int32_t
43 #include <string.h> // for memcpy
44 #include "base/basictypes.h" // for ASSERT
45 #include "internal_logging.h" // for ASSERT
49 //-------------------------------------------------------------------
50 // Sampler to decide when to create a sample trace for an allocation
51 // Not thread safe: Each thread should have it's own sampler object.
52 // Caller must use external synchronization if used
53 // from multiple threads.
55 // With 512K average sample step (the default):
56 // the probability of sampling a 4K allocation is about 0.00778
57 // the probability of sampling a 1MB allocation is about 0.865
58 // the probability of sampling a 1GB allocation is about 1.00000
59 // In general, the probablity of sampling is an allocation of size X
60 // given a flag value of Y (default 1M) is:
63 // With 128K average sample step:
64 // the probability of sampling a 1MB allocation is about 0.99966
65 // the probability of sampling a 1GB allocation is about 1.0
66 // (about 1 - 2**(-26))
67 // With 1M average sample step:
68 // the probability of sampling a 4K allocation is about 0.00390
69 // the probability of sampling a 1MB allocation is about 0.632
70 // the probability of sampling a 1GB allocation is about 1.0
72 // The sampler works by representing memory as a long stream from
73 // which allocations are taken. Some of the bytes in this stream are
74 // marked and if an allocation includes a marked byte then it is
75 // sampled. Bytes are marked according to a Poisson point process
76 // with each byte being marked independently with probability
77 // p = 1/tcmalloc_sample_parameter. This makes the probability
78 // of sampling an allocation of X bytes equal to the CDF of
79 // a geometric with mean tcmalloc_sample_parameter. (ie. the
80 // probability that at least one byte in the range is marked). This
81 // is accurately given by the CDF of the corresponding exponential
82 // distribution : 1 - e^(X/tcmalloc_sample_parameter_)
83 // Independence of the byte marking ensures independence of
84 // the sampling of each allocation.
86 // This scheme is implemented by noting that, starting from any
87 // fixed place, the number of bytes until the next marked byte
88 // is geometrically distributed. This number is recorded as
89 // bytes_until_sample_. Every allocation subtracts from this
90 // number until it is less than 0. When this happens the current
91 // allocation is sampled.
93 // When an allocation occurs, bytes_until_sample_ is reset to
94 // a new independtly sampled geometric number of bytes. The
95 // memoryless property of the point process means that this may
96 // be taken as the number of bytes after the end of the current
97 // allocation until the next marked byte. This ensures that
98 // very large allocations which would intersect many marked bytes
99 // only result in a single call to PickNextSamplingPoint.
100 //-------------------------------------------------------------------
102 class PERFTOOLS_DLL_DECL Sampler
{
104 // Initialize this sampler.
105 // Passing a seed of 0 gives a non-deterministic
106 // seed value given by casting the object ("this")
107 void Init(uint32_t seed
);
110 // Record allocation of "k" bytes. Return true iff allocation
112 bool SampleAllocation(size_t k
);
114 // Generate a geometric with mean 512K (or FLAG_tcmalloc_sample_parameter)
115 size_t PickNextSamplingPoint();
117 // Initialize the statics for the Sampler class
118 static void InitStatics();
120 // Returns the current sample period
121 int GetSamplePeriod();
123 // The following are public for the purposes of testing
124 static uint64_t NextRandom(uint64_t rnd_
); // Returns the next prng value
125 static double FastLog2(const double & d
); // Computes Log2(x) quickly
126 static void PopulateFastLog2Table(); // Populate the lookup table
129 size_t bytes_until_sample_
; // Bytes until we sample next
130 uint64_t rnd_
; // Cheap random number generator
132 // Statics for the fast log
133 // Note that this code may not depend on anything in //util
134 // hence the duplication of functionality here
135 static const int kFastlogNumBits
= 10;
136 static const int kFastlogMask
= (1 << kFastlogNumBits
) - 1;
137 static double log_table_
[1<<kFastlogNumBits
]; // Constant
140 inline bool Sampler::SampleAllocation(size_t k
) {
141 if (bytes_until_sample_
< k
) {
142 bytes_until_sample_
= PickNextSamplingPoint();
145 bytes_until_sample_
-= k
;
150 // Inline functions which are public for testing purposes
152 // Returns the next prng value.
153 // pRNG is: aX+b mod c with a = 0x5DEECE66D, b = 0xB, c = 1<<48
154 // This is the lrand64 generator.
155 inline uint64_t Sampler::NextRandom(uint64_t rnd
) {
156 const uint64_t prng_mult
= 0x5DEECE66DLL
;
157 const uint64_t prng_add
= 0xB;
158 const uint64_t prng_mod_power
= 48;
159 const uint64_t prng_mod_mask
=
160 ~((~static_cast<uint64_t>(0)) << prng_mod_power
);
161 return (prng_mult
* rnd
+ prng_add
) & prng_mod_mask
;
164 // Adapted from //util/math/fastmath.[h|cc] by Noam Shazeer
165 // This mimics the VeryFastLog2 code in those files
166 inline double Sampler::FastLog2(const double & d
) {
168 COMPILE_ASSERT(sizeof(d
) == sizeof(uint64_t), DoubleMustBe64Bits
);
170 memcpy(&x
, &d
, sizeof(x
)); // we depend on the compiler inlining this
171 const uint32_t x_high
= x
>> 32;
172 const uint32_t y
= x_high
>> (20 - kFastlogNumBits
) & kFastlogMask
;
173 const int32_t exponent
= ((x_high
>> 20) & 0x7FF) - 1023;
174 return exponent
+ log_table_
[y
];
177 } // namespace tcmalloc
179 #endif // TCMALLOC_SAMPLER_H_