initial
[prop.git] / include / AD / hash / bhash.h
blobfaf4c02c56eefb788c8601d7478c02f40ba3a918
1 //////////////////////////////////////////////////////////////////////////////
2 // NOTICE:
3 //
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
9 // your programs.
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
16 // code.
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
21 // Allen Leung
22 // 1994
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef hash_table_with_Brents_hashing_h
26 #define hash_table_with_Brents_hashing_h
28 ////////////////////////////////////////////////////////////////////////////
29 // Class BHashTable implements a hash table using Brent's reordering
30 // scheme\cite{Algorithms}.
31 ////////////////////////////////////////////////////////////////////////////
33 #include <string.h>
34 #include <AD/generic/ordering.h>
36 ////////////////////////////////////////////////////////////////////////////
37 // Class |BHashTable| is parameterized with the class of the
38 // key and the class of the value. Furthermore, the functions
39 // unsigned int hash(const K&); and
40 // Bool equal(const K&, const K&);
41 // must be defined by the client that uses this template.
42 ////////////////////////////////////////////////////////////////////////////
43 template <class K, class V>
44 class BHashTable {
46 K * keys; // the array of keys
47 V * values; // the array of values
48 char * status; // status of cell
49 int table_size; // size of the array
50 int elem_count; // number of elements
51 double max_load_ratio; // maximum load ratio (> 0 && < 1)
52 double growth_ratio; // amount to expand when resizing
53 int max_load; // maximum elements before resizing
55 public:
56 ////////////////////////////////////////////////////////////////////
57 // Constructor and destructor
58 ////////////////////////////////////////////////////////////////////
59 BHashTable(int initial_size = 32,
60 double max_load_ratio = 0.0,
61 double growth_ratio = 2.0);
62 ~BHashTable();
64 ////////////////////////////////////////////////////////////////////
65 // Assignment
66 ////////////////////////////////////////////////////////////////////
67 void operator = (const BHashTable&);
69 ////////////////////////////////////////////////////////////////////
70 // Selectors
71 ////////////////////////////////////////////////////////////////////
72 inline int capacity() const { return table_size; } // current capacity
73 inline int size() const { return elem_count; } // number of elements
74 inline Bool is_empty() const { return elem_count == 0; }
75 inline Bool is_full() const { return elem_count == table_size; }
76 inline Bool contains(const K& k) const { return lookup(k) != 0; }
77 inline const V& operator [] (const K& k) const { return value(lookup(k)); }
78 inline V& operator [] (const K& k) { return value(lookup(k)); }
80 ////////////////////////////////////////////////////////////////////
81 // Insertion and deletion.
82 ////////////////////////////////////////////////////////////////////
83 void clear(); // clears out the hash table
84 Ix lookup(const K&) const; // lookup entry by key
85 Ix insert(const K&, const V&); // insert a new entry
86 Bool remove(const K&); // remove an old entry
88 ////////////////////////////////////////////////////////////////////
89 // Iteration:
90 // first() start the iteration
91 // next() get index to the next element; or 0 if none
92 // key() get the key on index
93 // value() get the value on index
94 // Implementation note: Ix's are represented internally as 1-based
95 // array index.
96 ////////////////////////////////////////////////////////////////////
97 inline Ix first() const { return find_next(0); }
98 inline Ix next(Ix i) const { return find_next((int)i); }
99 inline const K& key(Ix i) const { return keys[(int)i-1]; }
100 inline const V& value(Ix i) const { return values[(int)i-1]; }
101 inline V& value(Ix i) { return values[(int)i-1]; }
103 ////////////////////////////////////////////////////////////////////
104 // Resize the hash table
105 ////////////////////////////////////////////////////////////////////
106 void resize(int new_size = 0);
108 private:
109 ////////////////////////////////////////////////////////////////////
110 // Addition implementation methods
111 ////////////////////////////////////////////////////////////////////
112 inline Ix find_next(int i) const; // locate the next used entry
115 //////////////////////////////////////////////////////////////////////////
116 // Implementation of the template methods
117 //////////////////////////////////////////////////////////////////////////
119 //////////////////////////////////////////////////////////////////////////
120 // Locate the next used cell; called by the iterator functions
121 //////////////////////////////////////////////////////////////////////////
122 template <class K, class V>
123 inline Ix BHashTable<K,V>::find_next(register int i) const
124 { while (i < table_size) if (status[i++] == Cell_used) return (Ix)i;
125 return (Ix)0;
128 //////////////////////////////////////////////////////////////////////////
129 // Create a new table.
130 // Implementation note: each end of each chain of the buckets are
131 // linked to the next. This makes it possible to find the next entry
132 // during iteration quickly.
133 //////////////////////////////////////////////////////////////////////////
134 template <class K, class V>
135 BHashTable<K,V>::BHashTable
136 (int size, double maximum_load_ratio, double growth)
137 : keys(new K [size]), values(new V [size]),
138 status(new char [size]),
139 table_size(size)
140 { clear();
141 if (maximum_load_ratio >= 0.9 || maximum_load_ratio <= 0.1)
142 max_load_ratio = .7;
143 else
144 max_load_ratio = maximum_load_ratio;
145 if (growth <= 1.2 || growth >= 5.0) growth_ratio = 2.0;
146 else growth_ratio = growth;
147 max_load = (int)(max_load_ratio * size);
148 if (max_load >= size) max_load = size - 1;
151 //////////////////////////////////////////////////////////////////////////
152 // Destroy a table
153 //////////////////////////////////////////////////////////////////////////
154 template <class K, class V>
155 BHashTable<K,V>::~BHashTable()
156 { delete [] keys; delete [] values; delete [] status; }
158 //////////////////////////////////////////////////////////////////////////
159 // Assignment
160 //////////////////////////////////////////////////////////////////////////
161 template <class K, class V>
162 void BHashTable<K,V>::operator = (const BHashTable<K,V>& t)
163 { if (this != &t) {
164 delete [] keys; delete [] values; delete [] status;
165 elem_count = t.elem_count;
166 table_size = t.table_size;
167 keys = new K [table_size];
168 values = new V [table_size];
169 status = new char [table_size];
170 for (int i = 0; i < table_size; i++) {
171 if ((status[i] = t.status[i]) == Cell_used) {
172 keys[i] = t.keys[i]; values[i] = t.values[i];
178 //////////////////////////////////////////////////////////////////////////
179 // Clear a table.
180 // We'll traverse thru all the buckets and delete each one iteratively.
181 //////////////////////////////////////////////////////////////////////////
182 template <class K, class V>
183 void BHashTable<K,V>::clear()
184 { memset(status,0,table_size); elem_count = 0; }
186 //////////////////////////////////////////////////////////////////////////
187 // Lookup an entry by key; if the entry is not found, return (Ix)0.
188 //////////////////////////////////////////////////////////////////////////
189 template <class K, class V>
190 Ix BHashTable<K,V>::lookup(const K& key) const
191 { unsigned int h = hash(key);
192 unsigned int i = h % table_size;
193 unsigned int inc = 0; // increment
194 unsigned int last;
196 ////////////////////////////////////////////////////////////////////
197 // Since the size of the hash table is not necessary a prime,
198 // care must be taken so that each probing sequence covers the
199 // entire table. The simple strategy of computing new location as
200 // i = (i + inc) % table_size
201 // cannot be used.
202 ////////////////////////////////////////////////////////////////////
203 for (;;) {
204 switch (status[i]) {
205 case Cell_unused: return (Ix)0;
206 case Cell_used: if (equal(key,keys[i])) return (Ix)(i+1);
208 ////////////////////////////////////////////////////////////////////
209 // Compute increment only if the initial probe fails.
210 ////////////////////////////////////////////////////////////////////
211 if (inc == 0) {
212 // recycle those higher order bits of hash value
213 inc = ( h / table_size ) % table_size;
214 if (inc == 0) inc = 1; // use linear probing if all else fails
215 last = i;
217 i += inc;
218 if (i >= table_size) i -= table_size;
219 if (i == last) { last = ++i; }
223 //////////////////////////////////////////////////////////////////////////
224 // Insert a new entry; there are two different cases of behavior:
225 // (1) If the key doesn't already exists, new key/value pair will be
226 // inserted into the table.
227 // (2) If the key already exists, then the old value will be overwritten
228 // by the new value.
229 // Also, if the number of elements have exceeded the maximum load,
230 // the table will be automatically resized.
231 //////////////////////////////////////////////////////////////////////////
232 template <class K, class V>
233 Ix BHashTable<K,V>::insert(const K& key, const V& value)
235 /////////////////////////////////////////////////////////////////////
236 // Make sure we have at least one unused cell.
237 /////////////////////////////////////////////////////////////////////
238 if (elem_count >= max_load) resize();
239 unsigned int h = hash(key);
240 unsigned int i = h % table_size;
241 unsigned int inc = 0;
242 unsigned int last;
243 int deleted;
245 /////////////////////////////////////////////////////////////////////
246 // Loop until one of the following:
247 // (1) The key is found; in which case the value is updated.
248 // (2) An unused cell is found; then we'll use the first
249 // deleted cell found along the way. If there is none,
250 // we'll use the unused cell. This is done to minimize
251 // the effect of contamination.
252 /////////////////////////////////////////////////////////////////////
253 for (deleted = -1;;) {
254 switch (status[i]) {
255 case Cell_deleted: if (deleted < 0) deleted = i; break;
256 case Cell_unused: goto found;
257 case Cell_used: if (equal(key,keys[i]))
258 { values[i] = value; return (Ix)(i+1); }
260 if (inc == 0) {
261 // recycle those higher order bits of hash value
262 inc = ( h / table_size ) % table_size;
263 if (inc == 0) inc = 1; // use linear probing if all else fails
264 last = i;
266 i += inc;
267 if (i >= table_size) i -= table_size;
268 if (i == last) { last = ++i; }
270 found:
271 if (deleted >= 0) i = deleted; elem_count++;
272 keys[i] = key; values[i] = value; status[i] = Cell_used;
273 return (Ix)(i+1);
276 //////////////////////////////////////////////////////////////////////////
277 // Resizing the hash table. All entries are completed rehashed.
278 //////////////////////////////////////////////////////////////////////////
279 template <class K, class V>
280 void BHashTable<K,V>::resize(int new_size)
281 { if (new_size <= elem_count) new_size = (int)(table_size * growth_ratio);
283 char * new_status = new char [ new_size ];
284 K * new_keys = new K [ new_size ];
285 V * new_values = new V [ new_size ];
286 memset(new_status,0,new_size);
288 //////////////////////////////////////////////////////////////////
289 // Rehash all used cells one by one. Notice that since all keys
290 // are unique, we don't have to do any comparison.
291 //////////////////////////////////////////////////////////////////
292 for (int i = 0; i < table_size; i++) {
293 if (status[i] == Cell_used) {
294 unsigned int h = hash(keys[i]);
295 int j = h % new_size;
296 unsigned int inc = 0, last;
297 for (;;) {
298 if (new_status[j] != Cell_used) {
299 new_keys[j] = keys[i]; new_values[j] = values[i];
300 new_status[j] = Cell_used; break;
302 if (inc == 0) {
303 inc = ( h / new_size ) % new_size;
304 if (inc == 0) inc = 1; last = j;
306 j += inc;
307 if (j >= new_size) j -= new_size;
308 if (j == last) { last = ++j; }
312 delete [] keys; delete [] values; delete [] status;
313 keys = new_keys; values = new_values; status = new_status;
314 table_size = new_size;
315 max_load = (int)(max_load_ratio * table_size);
318 //////////////////////////////////////////////////////////////////////////
319 // Remove an entry from the table; there are two different cases:
320 // (1) If the key exists within the table, the key/value pair will be
321 // removed; otherwise
322 // (2) The table will be unaltered.
323 // If the removal operation successfully deletes the entry,
324 // we'll also return true to the client.
325 //////////////////////////////////////////////////////////////////////////
326 template <class K, class V>
327 Bool BHashTable<K,V>::remove(const K& key)
328 { Ix i;
329 ///////////////////////////////////////////////////////////////////
330 // We'll just call lookup() to do the dirty work of locating the
331 // appropriate entry.
332 ///////////////////////////////////////////////////////////////////
333 if ((i = lookup(key))) {
334 elem_count--; status[(int)i-1] = Cell_deleted; return true;
335 } else return false;
338 #endif