1 // Copyright (c) 2005, Google Inc.
2 // All rights reserved.
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 // Author: Sanjay Ghemawat
33 // A fast map from addresses to values. Assumes that addresses are
34 // clustered. The main use is intended to be for heap-profiling.
35 // May be too memory-hungry for other uses.
37 // We use a user-defined allocator/de-allocator so that we can use
38 // this data structure during heap-profiling.
40 // IMPLEMENTATION DETAIL:
42 // Some default definitions/parameters:
43 // * Block -- aligned 128-byte region of the address space
44 // * Cluster -- aligned 1-MB region of the address space
45 // * Block-ID -- block-number within a cluster
46 // * Cluster-ID -- Starting address of cluster divided by cluster size
48 // We use a three-level map to represent the state:
49 // 1. A hash-table maps from a cluster-ID to the data for that cluster.
50 // 2. For each non-empty cluster we keep an array indexed by
51 // block-ID tht points to the first entry in the linked-list
53 // 3. At the bottom, we keep a singly-linked list of all
54 // entries in a block (for non-empty blocks).
58 // | id->cluster |---> ...
60 // | id->cluster |---> Cluster
61 // +-------------+ +-------+ Data for one block
62 // | nil | +------------------------------------+
63 // | ----+---|->[addr/value]-->[addr/value]-->... |
64 // | nil | +------------------------------------+
70 // Note that we require zero-bytes of overhead for completely empty
71 // clusters. The minimum space requirement for a cluster is the size
72 // of the hash-table entry plus a pointer value for each block in
73 // the cluster. Empty blocks impose no extra space requirement.
75 // The cost of a lookup is:
76 // a. A hash-table lookup to find the cluster
77 // b. An array access in the cluster structure
78 // c. A traversal over the linked-list for a block
80 #ifndef BASE_ADDRESSMAP_INL_H_
81 #define BASE_ADDRESSMAP_INL_H_
86 #if defined HAVE_STDINT_H
87 #include <stdint.h> // to get uint16_t (ISO naming madness)
88 #elif defined HAVE_INTTYPES_H
89 #include <inttypes.h> // another place uint16_t might be defined
91 #include <sys/types.h> // our last best hope
94 // This class is thread-unsafe -- that is, instances of this class can
95 // not be accessed concurrently by multiple threads -- because the
96 // callback function for Iterate() may mutate contained values. If the
97 // callback functions you pass do not mutate their Value* argument,
98 // AddressMap can be treated as thread-compatible -- that is, it's
99 // safe for multiple threads to call "const" methods on this class,
100 // but not safe for one thread to call const methods on this class
101 // while another thread is calling non-const methods on the class.
102 template <class Value
>
105 typedef void* (*Allocator
)(size_t size
);
106 typedef void (*DeAllocator
)(void* ptr
);
107 typedef const void* Key
;
109 // Create an AddressMap that uses the specified allocator/deallocator.
110 // The allocator/deallocator should behave like malloc/free.
111 // For instance, the allocator does not need to return initialized memory.
112 AddressMap(Allocator alloc
, DeAllocator dealloc
);
115 // If the map contains an entry for "key", return it. Else return NULL.
116 inline const Value
* Find(Key key
) const;
117 inline Value
* FindMutable(Key key
);
119 // Insert <key,value> into the map. Any old value associated
120 // with key is forgotten.
121 void Insert(Key key
, Value value
);
123 // Remove any entry for key in the map. If an entry was found
124 // and removed, stores the associated value in "*removed_value"
125 // and returns true. Else returns false.
126 bool FindAndRemove(Key key
, Value
* removed_value
);
128 // Similar to Find but we assume that keys are addresses of non-overlapping
129 // memory ranges whose sizes are given by size_func.
130 // If the map contains a range into which "key" points
131 // (at its start or inside of it, but not at the end),
132 // return the address of the associated value
133 // and store its key in "*res_key".
135 // max_size specifies largest range size possibly in existence now.
136 typedef size_t (*ValueSizeFunc
)(const Value
& v
);
137 const Value
* FindInside(ValueSizeFunc size_func
, size_t max_size
,
138 Key key
, Key
* res_key
);
140 // Iterate over the address map calling 'callback'
141 // for all stored key-value pairs and passing 'arg' to it.
142 // We don't use full Closure/Callback machinery not to add
143 // unnecessary dependencies to this class with low-level uses.
145 inline void Iterate(void (*callback
)(Key
, Value
*, Type
), Type arg
) const;
148 typedef uintptr_t Number
;
150 // The implementation assumes that addresses inserted into the map
151 // will be clustered. We take advantage of this fact by splitting
152 // up the address-space into blocks and using a linked-list entry
155 // Size of each block. There is one linked-list for each block, so
156 // do not make the block-size too big. Oterwise, a lot of time
157 // will be spent traversing linked lists.
158 static const int kBlockBits
= 7;
159 static const int kBlockSize
= 1 << kBlockBits
;
161 // Entry kept in per-block linked-list
168 // We further group a sequence of consecutive blocks into a cluster.
169 // The data for a cluster is represented as a dense array of
170 // linked-lists, one list per contained block.
171 static const int kClusterBits
= 13;
172 static const Number kClusterSize
= 1 << (kBlockBits
+ kClusterBits
);
173 static const int kClusterBlocks
= 1 << kClusterBits
;
175 // We use a simple chaining hash-table to represent the clusters.
177 Cluster
* next
; // Next cluster in hash table chain
178 Number id
; // Cluster ID
179 Entry
* blocks
[kClusterBlocks
]; // Per-block linked-lists
182 // Number of hash-table entries. With the block-size/cluster-size
183 // defined above, each cluster covers 1 MB, so an 4K entry
184 // hash-table will give an average hash-chain length of 1 for 4GB of
186 static const int kHashBits
= 12;
187 static const int kHashSize
= 1 << 12;
189 // Number of entry objects allocated at a time
190 static const int ALLOC_COUNT
= 64;
192 Cluster
** hashtable_
; // The hash-table
193 Entry
* free_
; // Free list of unused Entry objects
195 // Multiplicative hash function:
196 // The value "kHashMultiplier" is the bottom 32 bits of
197 // int((sqrt(5)-1)/2 * 2^32)
198 // This is a good multiplier as suggested in CLR, Knuth. The hash
199 // value is taken to be the top "k" bits of the bottom 32 bits
200 // of the muliplied value.
201 static const uint32_t kHashMultiplier
= 2654435769u;
202 static int HashInt(Number x
) {
203 // Multiply by a constant and take the top bits of the result.
204 const uint32_t m
= static_cast<uint32_t>(x
) * kHashMultiplier
;
205 return static_cast<int>(m
>> (32 - kHashBits
));
208 // Find cluster object for specified address. If not found
209 // and "create" is true, create the object. If not found
210 // and "create" is false, return NULL.
212 // This method is bitwise-const if create is false.
213 Cluster
* FindCluster(Number address
, bool create
) {
215 const Number cluster_id
= address
>> (kBlockBits
+ kClusterBits
);
216 const int h
= HashInt(cluster_id
);
217 for (Cluster
* c
= hashtable_
[h
]; c
!= NULL
; c
= c
->next
) {
218 if (c
->id
== cluster_id
) {
223 // Create cluster if necessary
225 Cluster
* c
= New
<Cluster
>(1);
227 c
->next
= hashtable_
[h
];
234 // Return the block ID for an address within its cluster
235 static int BlockID(Number address
) {
236 return (address
>> kBlockBits
) & (kClusterBlocks
- 1);
239 //--------------------------------------------------------------
240 // Memory management -- we keep all objects we allocate linked
241 // together in a singly linked list so we can get rid of them
242 // when we are all done. Furthermore, we allow the client to
243 // pass in custom memory allocator/deallocator routines.
244 //--------------------------------------------------------------
247 // The real data starts here
250 Allocator alloc_
; // The allocator
251 DeAllocator dealloc_
; // The deallocator
252 Object
* allocated_
; // List of allocated objects
254 // Allocates a zeroed array of T with length "num". Also inserts
255 // the allocated block into a linked list so it can be deallocated
256 // when we are all done.
257 template <class T
> T
* New(int num
) {
258 void* ptr
= (*alloc_
)(sizeof(Object
) + num
*sizeof(T
));
259 memset(ptr
, 0, sizeof(Object
) + num
*sizeof(T
));
260 Object
* obj
= reinterpret_cast<Object
*>(ptr
);
261 obj
->next
= allocated_
;
263 return reinterpret_cast<T
*>(reinterpret_cast<Object
*>(ptr
) + 1);
267 // More implementation details follow:
269 template <class Value
>
270 AddressMap
<Value
>::AddressMap(Allocator alloc
, DeAllocator dealloc
)
275 hashtable_
= New
<Cluster
*>(kHashSize
);
278 template <class Value
>
279 AddressMap
<Value
>::~AddressMap() {
280 // De-allocate all of the objects we allocated
281 for (Object
* obj
= allocated_
; obj
!= NULL
; /**/) {
282 Object
* next
= obj
->next
;
288 template <class Value
>
289 inline const Value
* AddressMap
<Value
>::Find(Key key
) const {
290 return const_cast<AddressMap
*>(this)->FindMutable(key
);
293 template <class Value
>
294 inline Value
* AddressMap
<Value
>::FindMutable(Key key
) {
295 const Number num
= reinterpret_cast<Number
>(key
);
296 const Cluster
* const c
= FindCluster(num
, false/*do not create*/);
298 for (Entry
* e
= c
->blocks
[BlockID(num
)]; e
!= NULL
; e
= e
->next
) {
307 template <class Value
>
308 void AddressMap
<Value
>::Insert(Key key
, Value value
) {
309 const Number num
= reinterpret_cast<Number
>(key
);
310 Cluster
* const c
= FindCluster(num
, true/*create*/);
312 // Look in linked-list for this block
313 const int block
= BlockID(num
);
314 for (Entry
* e
= c
->blocks
[block
]; e
!= NULL
; e
= e
->next
) {
323 // Allocate a new batch of entries and add to free-list
324 Entry
* array
= New
<Entry
>(ALLOC_COUNT
);
325 for (int i
= 0; i
< ALLOC_COUNT
-1; i
++) {
326 array
[i
].next
= &array
[i
+1];
328 array
[ALLOC_COUNT
-1].next
= free_
;
335 e
->next
= c
->blocks
[block
];
336 c
->blocks
[block
] = e
;
339 template <class Value
>
340 bool AddressMap
<Value
>::FindAndRemove(Key key
, Value
* removed_value
) {
341 const Number num
= reinterpret_cast<Number
>(key
);
342 Cluster
* const c
= FindCluster(num
, false/*do not create*/);
344 for (Entry
** p
= &c
->blocks
[BlockID(num
)]; *p
!= NULL
; p
= &(*p
)->next
) {
347 *removed_value
= e
->value
;
348 *p
= e
->next
; // Remove e from linked-list
349 e
->next
= free_
; // Add e to free-list
358 template <class Value
>
359 const Value
* AddressMap
<Value
>::FindInside(ValueSizeFunc size_func
,
363 const Number key_num
= reinterpret_cast<Number
>(key
);
364 Number num
= key_num
; // we'll move this to move back through the clusters
366 const Cluster
* c
= FindCluster(num
, false/*do not create*/);
369 const int block
= BlockID(num
);
370 bool had_smaller_key
= false;
371 for (const Entry
* e
= c
->blocks
[block
]; e
!= NULL
; e
= e
->next
) {
372 const Number e_num
= reinterpret_cast<Number
>(e
->key
);
373 if (e_num
<= key_num
) {
374 if (e_num
== key_num
|| // to handle 0-sized ranges
375 key_num
< e_num
+ (*size_func
)(e
->value
)) {
379 had_smaller_key
= true;
382 if (had_smaller_key
) return NULL
; // got a range before 'key'
383 // and it did not contain 'key'
384 if (block
== 0) break;
385 // try address-wise previous block
386 num
|= kBlockSize
- 1; // start at the last addr of prev block
388 if (key_num
- num
> max_size
) return NULL
;
391 if (num
< kClusterSize
) return NULL
; // first cluster
392 // go to address-wise previous cluster to try
393 num
|= kClusterSize
- 1; // start at the last block of previous cluster
395 if (key_num
- num
> max_size
) return NULL
;
396 // Having max_size to limit the search is crucial: else
397 // we have to traverse a lot of empty clusters (or blocks).
398 // We can avoid needing max_size if we put clusters into
399 // a search tree, but performance suffers considerably
400 // if we use this approach by using stl::set.
404 template <class Value
>
405 template <class Type
>
406 inline void AddressMap
<Value
>::Iterate(void (*callback
)(Key
, Value
*, Type
),
408 // We could optimize this by traversing only non-empty clusters and/or blocks
409 // but it does not speed up heap-checker noticeably.
410 for (int h
= 0; h
< kHashSize
; ++h
) {
411 for (const Cluster
* c
= hashtable_
[h
]; c
!= NULL
; c
= c
->next
) {
412 for (int b
= 0; b
< kClusterBlocks
; ++b
) {
413 for (Entry
* e
= c
->blocks
[b
]; e
!= NULL
; e
= e
->next
) {
414 callback(e
->key
, &e
->value
, arg
);
421 #endif // BASE_ADDRESSMAP_INL_H_