Introduce old redir program
[lcapit-junk-code.git] / mega-sena / python / hashtable.py
blob4ec1d2b74c416b457109f1f52471721255334c9d
1 """Hash table classes.
3 Currently, two kinds of Open Addressed hash tables are available:
5 o Double Hashing
6 o Linear Probing
8 Please, consult each class documentation for more information
9 about them.
10 """
12 class HashTableError(Exception):
13 """Base class for hash table exceptions."""
14 pass
16 class TableFull(HashTableError):
17 """Raised when the hash table is full."""
18 def __str__(self):
19 return "<cannot insert, hash table is full>"
21 class _HashTable(dict):
22 """Base class for hash tables.
24 Defines the methods and attributes used by any hash table
25 implementation.
26 """
27 def __init__(self, positions):
28 """Hash table constructor.
30 Parameter 'positions' is the number of positions in the
31 hash table, ie. its maximum size.
32 """
33 self.positions = positions
34 self.nr_collisions = 0
35 self.gr_collision = -1
36 self.no_collision = 0
37 dict.__init__(self)
39 def hash(self, name):
40 """Hash function from module-init-tools' depmod.
42 Parameter 'name' is a string, whose hash coding will be
43 returned.
44 """
45 value = 0x238F13AF * len(name)
47 for i in range(len(name)):
48 value += (ord(name[i]) << (i*5 % 24))
49 return (1103515243 * value + 12345)
51 def size(self):
52 """Return hash table's current size."""
53 return len(self)
55 def show_stats(self):
56 """Print hash table's utilization info."""
57 # XXX: Makes this more generic
58 lfactor = self.size() / float(self.positions)
59 probes = 1 / (1 - lfactor)
60 print '-> Hash table statistics'
61 print ' Table size: %4d' % self.positions
62 print ' o free: %4d' % (self.positions -self.size())
63 print ' o in use: %4d' % self.size()
64 print ' Perfect hashing: %4d' % self.no_collision
65 print ' Collisions: %4d' % self.nr_collisions
66 print ' o nr keys: %4d' % (self.size()-self.no_collision)
67 print ' o greater: %4d' % (self.gr_collision + 1)
68 print ' Load factor: %4.2f' % lfactor
69 print ' P. to probe: %4.2f' % probes
70 print ''
72 def load_from_file(self, fname):
73 """Load data from file into the hash table.
75 Parameter 'fname' is the file path.
77 The file is assumed to have one data per line, a trailing
78 new-line character is removed if present.
80 The hash character (#) is considered a comment, therefore
81 it's ignored.
82 """
83 file = open(fname)
84 for line in file:
85 name = line.rstrip('\n')
86 if name[0] == '#':
87 continue
88 self.insert(name)
89 file.close()
91 class OpenAddressedDH(_HashTable):
92 """Open Addressed hash table (Double Hashing).
94 Advantages:
95 o Doesn't suffer of primary clustering
97 Disadvantages:
98 o Table's size must be prime
99 o Bad cache performance is compared with Linear probing
101 def hash1(self, key):
102 return (key % self.positions)
104 def hash2(self, key):
105 hash = (key % (self.positions - 2)) + 1
106 return hash
108 def dhash(self, name):
109 key = self.hash(name)
110 hash1 = self.hash1(key)
111 hash2 = self.hash2(key)
112 return (hash1, hash2)
114 def insert(self, name):
115 """Insert name into the hash table.
117 If the table is full a TableFull exception will be raised.
119 Complexity is O(1) if we approximate uniform hashing well.
121 # Note: This function does some debug info gathering, for
122 # full speed we could rename it to 'debug_insert()' and
123 # create a new insert() method w/o that additional code
125 hash1, hash2 = self.dhash(name)
127 for i in range(self.positions):
128 hash = (hash1 + i*hash2) % self.positions
129 if not self.has_key(hash):
130 self[hash] = name
131 if i == 0:
132 self.no_collision += 1
133 elif i > self.gr_collision:
134 self.gr_collision = i
135 return
136 self.nr_collisions += 1
137 raise TableFull
139 def lookup(self, name):
140 """Lookup name in the hash table.
142 If name is in the hash table its hash coding is returned,
143 otherwise this function returns -1.
145 Complexity is O(1) if we approximate uniform hashing well.
147 hash1, hash2 = self.dhash(name)
149 for i in range(self.positions):
150 hash = (hash1 + i*hash2) % self.positions
151 if self.has_key(hash):
152 if self[hash] == name:
153 return hash
154 return -1
156 class OpenAddressedLP(_HashTable):
157 """Open Addressed hash table (Linear Probing).
159 Advantages:
160 o Good cache performance
161 o Implementation is simple
162 o Doesn't require a prime number as the table's size
164 Disadvantages:
165 o Suffers of primary clustering (ie, create long sequences
166 of filled slots)
168 Use this when not suffering of primary clustering, otherwise use
169 Double Hashing.
171 def insert(self, name):
172 """Insert name into the hash table.
174 If the table is full a TableFull exception will be raised.
176 Complexity is O(1) if we approximate uniform hashing well.
178 # Note: This function does some debug info gathering, for
179 # full speed we could rename it to 'debug_insert()' and
180 # create a new insert() method w/o that additional code
182 hash = self.hash(name) % self.positions
184 for i in range(self.positions):
185 if not self.has_key(hash):
186 self[hash] = name
187 if i == 0:
188 self.no_collision += 1
189 elif i > self.gr_collision:
190 self.gr_collision = i
191 return
192 self.nr_collisions += 1
193 hash = (hash + 1) % self.positions
194 raise TableFull
196 def lookup(self, name):
197 """Lookup name in the hash table.
199 If name is in the hash table its hash coding is returned,
200 otherwise this function returns -1.
202 Complexity is O(1) if we approximate uniform hashing well.
205 hash = self.hash(name) % self.positions
207 for i in range(self.positions):
208 if self.has_key(hash):
209 if self[hash] == name:
210 return hash
211 hash = (hash + 1) % self.positions
212 return -1
214 class Chained(_HashTable):
216 # TODO:
217 # 1. Review, was written in one shot
218 # 2. Write the print_status() method
219 # 3. Documentation
220 def insert(self, name):
222 hash = self.hash(name) % self.positions
224 if not self.has_key(hash):
225 self[hash] = []
227 l = self.get(hash)
228 l.append(name)
230 def lookup(self, name):
232 hash = self.hash(name) % self.positions
234 if self.has_key(hash):
235 l = self.get(hash)
236 for i in l:
237 if i == name:
238 return (hash, i)
239 return -1