getting file size for all dict files to be downloaded. coming to be 400mb or so.
[worddb.git] / libs / openid / store / filestore.py
blobced3cee45e4c6886964388e8d3463cf1fac78431
1 """
2 This module contains an C{L{OpenIDStore}} implementation backed by
3 flat files.
4 """
6 import string
7 import os
8 import os.path
9 import time
11 from errno import EEXIST, ENOENT
13 try:
14 from tempfile import mkstemp
15 except ImportError:
16 # Python < 2.3
17 import warnings
18 warnings.filterwarnings("ignore",
19 "tempnam is a potential security risk",
20 RuntimeWarning,
21 "openid.store.filestore")
23 def mkstemp(dir):
24 for _ in range(5):
25 name = os.tempnam(dir)
26 try:
27 fd = os.open(name, os.O_CREAT | os.O_EXCL | os.O_RDWR, 0600)
28 except OSError, why:
29 if why.errno != EEXIST:
30 raise
31 else:
32 return fd, name
34 raise RuntimeError('Failed to get temp file after 5 attempts')
36 from openid.association import Association
37 from openid.store.interface import OpenIDStore
38 from openid.store import nonce
39 from openid import cryptutil, oidutil
41 _filename_allowed = string.ascii_letters + string.digits + '.'
42 try:
43 # 2.4
44 set
45 except NameError:
46 try:
47 # 2.3
48 import sets
49 except ImportError:
50 # Python < 2.2
51 d = {}
52 for c in _filename_allowed:
53 d[c] = None
54 _isFilenameSafe = d.has_key
55 del d
56 else:
57 _isFilenameSafe = sets.Set(_filename_allowed).__contains__
58 else:
59 _isFilenameSafe = set(_filename_allowed).__contains__
61 def _safe64(s):
62 h64 = oidutil.toBase64(cryptutil.sha1(s))
63 h64 = h64.replace('+', '_')
64 h64 = h64.replace('/', '.')
65 h64 = h64.replace('=', '')
66 return h64
68 def _filenameEscape(s):
69 filename_chunks = []
70 for c in s:
71 if _isFilenameSafe(c):
72 filename_chunks.append(c)
73 else:
74 filename_chunks.append('_%02X' % ord(c))
75 return ''.join(filename_chunks)
77 def _removeIfPresent(filename):
78 """Attempt to remove a file, returning whether the file existed at
79 the time of the call.
81 str -> bool
82 """
83 try:
84 os.unlink(filename)
85 except OSError, why:
86 if why.errno == ENOENT:
87 # Someone beat us to it, but it's gone, so that's OK
88 return 0
89 else:
90 raise
91 else:
92 # File was present
93 return 1
95 def _ensureDir(dir_name):
96 """Create dir_name as a directory if it does not exist. If it
97 exists, make sure that it is, in fact, a directory.
99 Can raise OSError
101 str -> NoneType
103 try:
104 os.makedirs(dir_name)
105 except OSError, why:
106 if why.errno != EEXIST or not os.path.isdir(dir_name):
107 raise
109 class FileOpenIDStore(OpenIDStore):
111 This is a filesystem-based store for OpenID associations and
112 nonces. This store should be safe for use in concurrent systems
113 on both windows and unix (excluding NFS filesystems). There are a
114 couple race conditions in the system, but those failure cases have
115 been set up in such a way that the worst-case behavior is someone
116 having to try to log in a second time.
118 Most of the methods of this class are implementation details.
119 People wishing to just use this store need only pay attention to
120 the C{L{__init__}} method.
122 Methods of this object can raise OSError if unexpected filesystem
123 conditions, such as bad permissions or missing directories, occur.
126 def __init__(self, directory):
128 Initializes a new FileOpenIDStore. This initializes the
129 nonce and association directories, which are subdirectories of
130 the directory passed in.
132 @param directory: This is the directory to put the store
133 directories in.
135 @type directory: C{str}
137 # Make absolute
138 directory = os.path.normpath(os.path.abspath(directory))
140 self.nonce_dir = os.path.join(directory, 'nonces')
142 self.association_dir = os.path.join(directory, 'associations')
144 # Temp dir must be on the same filesystem as the assciations
145 # directory
146 self.temp_dir = os.path.join(directory, 'temp')
148 self.max_nonce_age = 6 * 60 * 60 # Six hours, in seconds
150 self._setup()
152 def _setup(self):
153 """Make sure that the directories in which we store our data
154 exist.
156 () -> NoneType
158 _ensureDir(self.nonce_dir)
159 _ensureDir(self.association_dir)
160 _ensureDir(self.temp_dir)
162 def _mktemp(self):
163 """Create a temporary file on the same filesystem as
164 self.association_dir.
166 The temporary directory should not be cleaned if there are any
167 processes using the store. If there is no active process using
168 the store, it is safe to remove all of the files in the
169 temporary directory.
171 () -> (file, str)
173 fd, name = mkstemp(dir=self.temp_dir)
174 try:
175 file_obj = os.fdopen(fd, 'wb')
176 return file_obj, name
177 except:
178 _removeIfPresent(name)
179 raise
181 def getAssociationFilename(self, server_url, handle):
182 """Create a unique filename for a given server url and
183 handle. This implementation does not assume anything about the
184 format of the handle. The filename that is returned will
185 contain the domain name from the server URL for ease of human
186 inspection of the data directory.
188 (str, str) -> str
190 if server_url.find('://') == -1:
191 raise ValueError('Bad server URL: %r' % server_url)
193 proto, rest = server_url.split('://', 1)
194 domain = _filenameEscape(rest.split('/', 1)[0])
195 url_hash = _safe64(server_url)
196 if handle:
197 handle_hash = _safe64(handle)
198 else:
199 handle_hash = ''
201 filename = '%s-%s-%s-%s' % (proto, domain, url_hash, handle_hash)
203 return os.path.join(self.association_dir, filename)
205 def storeAssociation(self, server_url, association):
206 """Store an association in the association directory.
208 (str, Association) -> NoneType
210 association_s = association.serialize()
211 filename = self.getAssociationFilename(server_url, association.handle)
212 tmp_file, tmp = self._mktemp()
214 try:
215 try:
216 tmp_file.write(association_s)
217 os.fsync(tmp_file.fileno())
218 finally:
219 tmp_file.close()
221 try:
222 os.rename(tmp, filename)
223 except OSError, why:
224 if why.errno != EEXIST:
225 raise
227 # We only expect EEXIST to happen only on Windows. It's
228 # possible that we will succeed in unlinking the existing
229 # file, but not in putting the temporary file in place.
230 try:
231 os.unlink(filename)
232 except OSError, why:
233 if why.errno == ENOENT:
234 pass
235 else:
236 raise
238 # Now the target should not exist. Try renaming again,
239 # giving up if it fails.
240 os.rename(tmp, filename)
241 except:
242 # If there was an error, don't leave the temporary file
243 # around.
244 _removeIfPresent(tmp)
245 raise
247 def getAssociation(self, server_url, handle=None):
248 """Retrieve an association. If no handle is specified, return
249 the association with the latest expiration.
251 (str, str or NoneType) -> Association or NoneType
253 if handle is None:
254 handle = ''
256 # The filename with the empty handle is a prefix of all other
257 # associations for the given server URL.
258 filename = self.getAssociationFilename(server_url, handle)
260 if handle:
261 return self._getAssociation(filename)
262 else:
263 association_files = os.listdir(self.association_dir)
264 matching_files = []
265 # strip off the path to do the comparison
266 name = os.path.basename(filename)
267 for association_file in association_files:
268 if association_file.startswith(name):
269 matching_files.append(association_file)
271 matching_associations = []
272 # read the matching files and sort by time issued
273 for name in matching_files:
274 full_name = os.path.join(self.association_dir, name)
275 association = self._getAssociation(full_name)
276 if association is not None:
277 matching_associations.append(
278 (association.issued, association))
280 matching_associations.sort()
282 # return the most recently issued one.
283 if matching_associations:
284 (_, assoc) = matching_associations[-1]
285 return assoc
286 else:
287 return None
289 def _getAssociation(self, filename):
290 try:
291 assoc_file = file(filename, 'rb')
292 except IOError, why:
293 if why.errno == ENOENT:
294 # No association exists for that URL and handle
295 return None
296 else:
297 raise
298 else:
299 try:
300 assoc_s = assoc_file.read()
301 finally:
302 assoc_file.close()
304 try:
305 association = Association.deserialize(assoc_s)
306 except ValueError:
307 _removeIfPresent(filename)
308 return None
310 # Clean up expired associations
311 if association.getExpiresIn() == 0:
312 _removeIfPresent(filename)
313 return None
314 else:
315 return association
317 def removeAssociation(self, server_url, handle):
318 """Remove an association if it exists. Do nothing if it does not.
320 (str, str) -> bool
322 assoc = self.getAssociation(server_url, handle)
323 if assoc is None:
324 return 0
325 else:
326 filename = self.getAssociationFilename(server_url, handle)
327 return _removeIfPresent(filename)
329 def useNonce(self, server_url, timestamp, salt):
330 """Return whether this nonce is valid.
332 str -> bool
334 if abs(timestamp - time.time()) > nonce.SKEW:
335 return False
337 if server_url:
338 proto, rest = server_url.split('://', 1)
339 else:
340 # Create empty proto / rest values for empty server_url,
341 # which is part of a consumer-generated nonce.
342 proto, rest = '', ''
344 domain = _filenameEscape(rest.split('/', 1)[0])
345 url_hash = _safe64(server_url)
346 salt_hash = _safe64(salt)
348 filename = '%08x-%s-%s-%s-%s' % (timestamp, proto, domain,
349 url_hash, salt_hash)
351 filename = os.path.join(self.nonce_dir, filename)
352 try:
353 fd = os.open(filename, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0200)
354 except OSError, why:
355 if why.errno == EEXIST:
356 return False
357 else:
358 raise
359 else:
360 os.close(fd)
361 return True
363 def _allAssocs(self):
364 all_associations = []
366 association_filenames = map(
367 lambda filename: os.path.join(self.association_dir, filename),
368 os.listdir(self.association_dir))
369 for association_filename in association_filenames:
370 try:
371 association_file = file(association_filename, 'rb')
372 except IOError, why:
373 if why.errno == ENOENT:
374 oidutil.log("%s disappeared during %s._allAssocs" % (
375 association_filename, self.__class__.__name__))
376 else:
377 raise
378 else:
379 try:
380 assoc_s = association_file.read()
381 finally:
382 association_file.close()
384 # Remove expired or corrupted associations
385 try:
386 association = Association.deserialize(assoc_s)
387 except ValueError:
388 _removeIfPresent(association_filename)
389 else:
390 all_associations.append(
391 (association_filename, association))
393 return all_associations
395 def cleanup(self):
396 """Remove expired entries from the database. This is
397 potentially expensive, so only run when it is acceptable to
398 take time.
400 () -> NoneType
402 self.cleanupAssociations()
403 self.cleanupNonces()
405 def cleanupAssociations(self):
406 removed = 0
407 for assoc_filename, assoc in self._allAssocs():
408 if assoc.getExpiresIn() == 0:
409 _removeIfPresent(assoc_filename)
410 removed += 1
411 return removed
413 def cleanupNonces(self):
414 nonces = os.listdir(self.nonce_dir)
415 now = time.time()
417 removed = 0
418 # Check all nonces for expiry
419 for nonce_fname in nonces:
420 timestamp = nonce_fname.split('-', 1)[0]
421 timestamp = int(timestamp, 16)
422 if abs(timestamp - now) > nonce.SKEW:
423 filename = os.path.join(self.nonce_dir, nonce_fname)
424 _removeIfPresent(filename)
425 removed += 1
426 return removed