1 //////////////////////////////////////////////////////////////////////////////
4 // ADLib, Prop and their related set of tools and documentation are in the
5 // public domain. The author(s) of this software reserve no copyrights on
6 // the source code and any code generated using the tools. You are encouraged
7 // to use ADLib and Prop to develop software, in both academic and commercial
8 // settings, and are free to incorporate any part of ADLib and Prop into
11 // Although you are under no obligation to do so, we strongly recommend that
12 // you give away all software developed using our tools.
14 // We also ask that credit be given to us when ADLib and/or Prop are used in
15 // your programs, and that this notice be preserved intact in all the source
18 // This software is still under development and we welcome any suggestions
19 // and help from the users.
23 //////////////////////////////////////////////////////////////////////////////
25 #ifndef hash_table_with_double_hashing2_h
26 #define hash_table_with_double_hashing2_h
28 ////////////////////////////////////////////////////////////////////////////
29 // Class DHashTable2 implements a hash table using a double hashing
30 // scheme\cite{Algorithms}.
31 ////////////////////////////////////////////////////////////////////////////
34 #include <AD/generic/ordering.h>
36 ////////////////////////////////////////////////////////////////////////////
37 // Class |DHashTable2| is parameterized with the class of the
38 // key and the class of the value. Furthermore, the functions
39 // unsigned int hash(const K&); and
40 // Bool equal(const K&, const K&);
41 // must be defined by the client that uses this template.
42 ////////////////////////////////////////////////////////////////////////////
43 template <class K
, class V
, class C
>
46 K
* keys
; // the array of keys
47 V
* values
; // the array of values
48 char * status
; // status of cell
49 int table_size
; // size of the array
50 int elem_count
; // number of elements
51 double max_load_ratio
; // maximum load ratio (> 0 && < 1)
52 double growth_ratio
; // amount to expand when resizing
53 int max_load
; // maximum elements before resizing
56 ////////////////////////////////////////////////////////////////////
57 // Constructor and destructor
58 ////////////////////////////////////////////////////////////////////
59 DHashTable2(int initial_size
= 32,
60 double max_load_ratio
= 0.0,
61 double growth_ratio
= 2.0);
64 ////////////////////////////////////////////////////////////////////
66 ////////////////////////////////////////////////////////////////////
67 void operator = (const DHashTable2
&);
69 ////////////////////////////////////////////////////////////////////
71 ////////////////////////////////////////////////////////////////////
72 inline int capacity() const { return table_size
; } // current capacity
73 inline int size() const { return elem_count
; } // number of elements
74 inline Bool
is_empty() const { return elem_count
== 0; }
75 inline Bool
is_full() const { return elem_count
== table_size
; }
76 inline Bool
contains(const K
& k
) const { return lookup(k
) != 0; }
77 inline const V
& operator [] (const K
& k
) const { return value(lookup(k
)); }
78 inline V
& operator [] (const K
& k
) { return value(lookup(k
)); }
80 ////////////////////////////////////////////////////////////////////
81 // Insertion and deletion.
82 ////////////////////////////////////////////////////////////////////
83 void clear(); // clears out the hash table
84 Ix
lookup(const K
&) const; // lookup entry by key
85 Ix
insert(const K
&, const V
&); // insert a new entry
86 Bool
remove(const K
&); // remove an old entry
88 ////////////////////////////////////////////////////////////////////
90 // first() start the iteration
91 // next() get index to the next element; or 0 if none
92 // key() get the key on index
93 // value() get the value on index
94 // Implementation note: Ix's are represented internally as 1-based
96 ////////////////////////////////////////////////////////////////////
97 inline Ix
first() const { return find_next(0); }
98 inline Ix
next(Ix i
) const { return find_next((int)i
); }
99 inline const K
& key(Ix i
) const { return keys
[(int)i
-1]; }
100 inline const V
& value(Ix i
) const { return values
[(int)i
-1]; }
101 inline V
& value(Ix i
) { return values
[(int)i
-1]; }
103 ////////////////////////////////////////////////////////////////////
104 // Resize the hash table
105 ////////////////////////////////////////////////////////////////////
106 void resize(int new_size
= 0);
109 ////////////////////////////////////////////////////////////////////
110 // Addition implementation methods
111 ////////////////////////////////////////////////////////////////////
112 inline Ix
find_next(int i
) const
113 { while (i
< table_size
) if (status
[i
++] == Cell_used
) return (Ix
)i
;
118 //////////////////////////////////////////////////////////////////////////
119 // Implementation of the template methods
120 //////////////////////////////////////////////////////////////////////////
122 //////////////////////////////////////////////////////////////////////////
123 // Create a new table.
124 // Implementation note: each end of each chain of the buckets are
125 // linked to the next. This makes it possible to find the next entry
126 // during iteration quickly.
127 //////////////////////////////////////////////////////////////////////////
128 template <class K
, class V
, class C
>
129 DHashTable2
<K
,V
,C
>::DHashTable2
130 (int size
, double maximum_load_ratio
, double growth
)
131 : table_size(size
), keys(new K
[size
]), values(new V
[size
]),
132 status(new char [size
])
134 if (maximum_load_ratio
>= 0.9 || maximum_load_ratio
<= 0.1)
137 max_load_ratio
= maximum_load_ratio
;
138 if (growth
<= 1.2 || growth
>= 5.0) growth_ratio
= 2.0;
139 else growth_ratio
= growth
;
140 max_load
= (int)(max_load_ratio
* size
);
141 if (max_load
>= size
) max_load
= size
- 1;
144 //////////////////////////////////////////////////////////////////////////
146 //////////////////////////////////////////////////////////////////////////
147 template <class K
, class V
, class C
>
148 DHashTable2
<K
,V
,C
>::~DHashTable2()
149 { delete [] keys
; delete [] values
; delete [] status
; }
151 //////////////////////////////////////////////////////////////////////////
153 //////////////////////////////////////////////////////////////////////////
154 template <class K
, class V
, class C
>
155 void DHashTable2
<K
,V
,C
>::operator = (const DHashTable2
<K
,V
,C
>& t
)
157 delete [] keys
; delete [] values
; delete [] status
;
158 elem_count
= t
.elem_count
;
159 table_size
= t
.table_size
;
160 keys
= new K
[table_size
];
161 values
= new V
[table_size
];
162 status
= new char [table_size
];
163 for (int i
= 0; i
< table_size
; i
++) {
164 if ((status
[i
] = t
.status
[i
]) == Cell_used
) {
165 keys
[i
] = t
.keys
[i
]; values
[i
] = t
.values
[i
];
171 //////////////////////////////////////////////////////////////////////////
173 // We'll traverse thru all the buckets and delete each one iteratively.
174 //////////////////////////////////////////////////////////////////////////
175 template <class K
, class V
, class C
>
176 void DHashTable2
<K
,V
,C
>::clear()
177 { memset(status
,0,table_size
); elem_count
= 0; }
179 //////////////////////////////////////////////////////////////////////////
180 // Lookup an entry by key; if the entry is not found, return (Ix)0.
181 //////////////////////////////////////////////////////////////////////////
182 template <class K
, class V
, class C
>
183 Ix DHashTable2
<K
,V
,C
>::lookup(const K
& key
) const
184 { unsigned int h
= C::hash(key
);
185 unsigned int i
= h
% table_size
;
186 unsigned int inc
= 0; // increment
189 ////////////////////////////////////////////////////////////////////
190 // Since the size of the hash table is not necessary a prime,
191 // care must be taken so that each probing sequence covers the
192 // entire table. The simple strategy of computing new location as
193 // i = (i + inc) % table_size
195 ////////////////////////////////////////////////////////////////////
198 case Cell_unused
: return (Ix
)0;
199 case Cell_used
: if (C::equal(key
,keys
[i
])) return (Ix
)(i
+1);
201 ////////////////////////////////////////////////////////////////////
202 // Compute increment only if the initial probe fails.
203 ////////////////////////////////////////////////////////////////////
205 // recycle those higher order bits of hash value
206 inc
= ( h
/ table_size
) % table_size
;
207 if (inc
== 0) inc
= 1; // use linear probing if all else fails
211 if (i
>= table_size
) i
-= table_size
;
212 if (i
== last
) { last
= ++i
; }
216 //////////////////////////////////////////////////////////////////////////
217 // Insert a new entry; there are two different cases of behavior:
218 // (1) If the key doesn't already exists, new key/value pair will be
219 // inserted into the table.
220 // (2) If the key already exists, then the old value will be overwritten
222 // Also, if the number of elements have exceeded the maximum load,
223 // the table will be automatically resized.
224 //////////////////////////////////////////////////////////////////////////
225 template <class K
, class V
, class C
>
226 Ix DHashTable2
<K
,V
,C
>::insert(const K
& key
, const V
& value
)
228 /////////////////////////////////////////////////////////////////////
229 // Make sure we have at least one unused cell.
230 /////////////////////////////////////////////////////////////////////
231 if (elem_count
>= max_load
) resize();
232 unsigned int h
= C::hash(key
);
233 unsigned int i
= h
% table_size
;
234 unsigned int inc
= 0;
238 /////////////////////////////////////////////////////////////////////
239 // Loop until one of the following:
240 // (1) The key is found; in which case the value is updated.
241 // (2) An unused cell is found; then we'll use the first
242 // deleted cell found along the way. If there is none,
243 // we'll use the unused cell. This is done to minimize
244 // the effect of contamination.
245 /////////////////////////////////////////////////////////////////////
246 for (deleted
= -1;;) {
248 case Cell_deleted
: if (deleted
< 0) deleted
= i
; break;
249 case Cell_unused
: goto found
;
250 case Cell_used
: if (C::equal(key
,keys
[i
]))
251 { values
[i
] = value
; return (Ix
)(i
+1); }
254 // recycle those higher order bits of hash value
255 inc
= ( h
/ table_size
) % table_size
;
256 if (inc
== 0) inc
= 1; // use linear probing if all else fails
260 if (i
>= table_size
) i
-= table_size
;
261 if (i
== last
) { last
= ++i
; }
264 if (deleted
>= 0) i
= deleted
; elem_count
++;
265 keys
[i
] = key
; values
[i
] = value
; status
[i
] = Cell_used
;
269 //////////////////////////////////////////////////////////////////////////
270 // Resizing the hash table. All entries are completed rehashed.
271 //////////////////////////////////////////////////////////////////////////
272 template <class K
, class V
, class C
>
273 void DHashTable2
<K
,V
,C
>::resize(int new_size
)
274 { if (new_size
<= elem_count
) new_size
= (int)(table_size
* growth_ratio
);
276 char * new_status
= new char [ new_size
];
277 K
* new_keys
= new K
[ new_size
];
278 V
* new_values
= new V
[ new_size
];
279 memset(new_status
,0,new_size
);
281 //////////////////////////////////////////////////////////////////
282 // Rehash all used cells one by one. Notice that since all keys
283 // are unique, we don't have to do any comparison.
284 //////////////////////////////////////////////////////////////////
285 for (int i
= 0; i
< table_size
; i
++) {
286 if (status
[i
] == Cell_used
) {
287 unsigned int h
= C::hash(keys
[i
]);
288 int j
= h
% new_size
;
289 unsigned int inc
= 0, last
;
291 if (new_status
[j
] != Cell_used
) {
292 new_keys
[j
] = keys
[i
]; new_values
[j
] = values
[i
];
293 new_status
[j
] = Cell_used
; break;
296 inc
= ( h
/ new_size
) % new_size
;
297 if (inc
== 0) inc
= 1; last
= j
;
300 if (j
>= new_size
) j
-= new_size
;
301 if (j
== last
) { last
= ++j
; }
305 delete [] keys
; delete [] values
; delete [] status
;
306 keys
= new_keys
; values
= new_values
; status
= new_status
;
307 table_size
= new_size
;
308 max_load
= (int)(max_load_ratio
* table_size
);
311 //////////////////////////////////////////////////////////////////////////
312 // Remove an entry from the table; there are two different cases:
313 // (1) If the key exists within the table, the key/value pair will be
314 // removed; otherwise
315 // (2) The table will be unaltered.
316 // If the removal operation successfully deletes the entry,
317 // we'll also return true to the client.
318 //////////////////////////////////////////////////////////////////////////
319 template <class K
, class V
, class C
>
320 Bool DHashTable2
<K
,V
,C
>::remove(const K
& key
)
322 ///////////////////////////////////////////////////////////////////
323 // We'll just call lookup() to do the dirty work of locating the
324 // appropriate entry.
325 ///////////////////////////////////////////////////////////////////
326 if ((i
= lookup(key
))) {
327 elem_count
--; status
[(int)i
-1] = Cell_deleted
; return true;