3 Currently, two kinds of Open Addressed hash tables are available:
8 Please, consult each class documentation for more information
12 class HashTableError(Exception):
13 """Base class for hash table exceptions."""
16 class TableFull(HashTableError
):
17 """Raised when the hash table is full."""
19 return "<cannot insert, hash table is full>"
21 class _HashTable(dict):
22 """Base class for hash tables.
24 Defines the methods and attributes used by any hash table
27 def __init__(self
, positions
):
28 """Hash table constructor.
30 Parameter 'positions' is the number of positions in the
31 hash table, ie. its maximum size.
33 self
.positions
= positions
34 self
.nr_collisions
= 0
35 self
.gr_collision
= -1
40 """Hash function from module-init-tools' depmod.
42 Parameter 'name' is a string, whose hash coding will be
45 value
= 0x238F13AF * len(name
)
47 for i
in range(len(name
)):
48 value
+= (ord(name
[i
]) << (i
*5 % 24))
49 return (1103515243 * value
+ 12345)
52 """Return hash table's current size."""
56 """Print hash table's utilization info."""
57 # XXX: Makes this more generic
58 lfactor
= self
.size() / float(self
.positions
)
59 probes
= 1 / (1 - lfactor
)
60 print '-> Hash table statistics'
61 print ' Table size: %4d' % self
.positions
62 print ' o free: %4d' % (self
.positions
-self
.size())
63 print ' o in use: %4d' % self
.size()
64 print ' Perfect hashing: %4d' % self
.no_collision
65 print ' Collisions: %4d' % self
.nr_collisions
66 print ' o nr keys: %4d' % (self
.size()-self
.no_collision
)
67 print ' o greater: %4d' % (self
.gr_collision
+ 1)
68 print ' Load factor: %4.2f' % lfactor
69 print ' P. to probe: %4.2f' % probes
72 def load_from_file(self
, fname
):
73 """Load data from file into the hash table.
75 Parameter 'fname' is the file path.
77 The file is assumed to have one data per line, a trailing
78 new-line character is removed if present.
80 The hash character (#) is considered a comment, therefore
85 name
= line
.rstrip('\n')
91 class OpenAddressedDH(_HashTable
):
92 """Open Addressed hash table (Double Hashing).
95 o Doesn't suffer of primary clustering
98 o Table's size must be prime
99 o Bad cache performance is compared with Linear probing
101 def hash1(self
, key
):
102 return (key
% self
.positions
)
104 def hash2(self
, key
):
105 hash = (key
% (self
.positions
- 2)) + 1
108 def dhash(self
, name
):
109 key
= self
.hash(name
)
110 hash1
= self
.hash1(key
)
111 hash2
= self
.hash2(key
)
112 return (hash1
, hash2
)
114 def insert(self
, name
):
115 """Insert name into the hash table.
117 If the table is full a TableFull exception will be raised.
119 Complexity is O(1) if we approximate uniform hashing well.
121 # Note: This function does some debug info gathering, for
122 # full speed we could rename it to 'debug_insert()' and
123 # create a new insert() method w/o that additional code
125 hash1
, hash2
= self
.dhash(name
)
127 for i
in range(self
.positions
):
128 hash = (hash1
+ i
*hash2
) % self
.positions
129 if not self
.has_key(hash):
132 self
.no_collision
+= 1
133 elif i
> self
.gr_collision
:
134 self
.gr_collision
= i
136 self
.nr_collisions
+= 1
139 def lookup(self
, name
):
140 """Lookup name in the hash table.
142 If name is in the hash table its hash coding is returned,
143 otherwise this function returns -1.
145 Complexity is O(1) if we approximate uniform hashing well.
147 hash1
, hash2
= self
.dhash(name
)
149 for i
in range(self
.positions
):
150 hash = (hash1
+ i
*hash2
) % self
.positions
151 if self
.has_key(hash):
152 if self
[hash] == name
:
156 class OpenAddressedLP(_HashTable
):
157 """Open Addressed hash table (Linear Probing).
160 o Good cache performance
161 o Implementation is simple
162 o Doesn't require a prime number as the table's size
165 o Suffers of primary clustering (ie, create long sequences
168 Use this when not suffering of primary clustering, otherwise use
171 def insert(self
, name
):
172 """Insert name into the hash table.
174 If the table is full a TableFull exception will be raised.
176 Complexity is O(1) if we approximate uniform hashing well.
178 # Note: This function does some debug info gathering, for
179 # full speed we could rename it to 'debug_insert()' and
180 # create a new insert() method w/o that additional code
182 hash = self
.hash(name
) % self
.positions
184 for i
in range(self
.positions
):
185 if not self
.has_key(hash):
188 self
.no_collision
+= 1
189 elif i
> self
.gr_collision
:
190 self
.gr_collision
= i
192 self
.nr_collisions
+= 1
193 hash = (hash + 1) % self
.positions
196 def lookup(self
, name
):
197 """Lookup name in the hash table.
199 If name is in the hash table its hash coding is returned,
200 otherwise this function returns -1.
202 Complexity is O(1) if we approximate uniform hashing well.
205 hash = self
.hash(name
) % self
.positions
207 for i
in range(self
.positions
):
208 if self
.has_key(hash):
209 if self
[hash] == name
:
211 hash = (hash + 1) % self
.positions
214 class Chained(_HashTable
):
217 # 1. Review, was written in one shot
218 # 2. Write the print_status() method
220 def insert(self
, name
):
222 hash = self
.hash(name
) % self
.positions
224 if not self
.has_key(hash):
230 def lookup(self
, name
):
232 hash = self
.hash(name
) % self
.positions
234 if self
.has_key(hash):