Make getCaptchaImage return (bytes, str).
[tor-bridgedb.git] / bridgedb / Storage.py
blobcfd60bb7d46f2c0137aa8be5b9918901aa76b8d4
1 # BridgeDB by Nick Mathewson.
2 # Copyright (c) 2007-2009, The Tor Project, Inc.
3 # See LICENSE for licensing information
5 import calendar
6 import logging
7 import binascii
8 import sqlite3
9 import time
10 import hashlib
11 from functools import wraps
12 from ipaddr import IPAddress
13 import sys
15 from bridgedb.Stability import BridgeHistory
16 import threading
18 toHex = binascii.b2a_hex
19 fromHex = binascii.a2b_hex
20 HEX_ID_LEN = 40
22 def _escapeValue(v):
23 return "'%s'" % v.replace("'", "''")
25 def timeToStr(t):
26 return time.strftime("%Y-%m-%d %H:%M", time.gmtime(t))
27 def strToTime(t):
28 return calendar.timegm(time.strptime(t, "%Y-%m-%d %H:%M"))
30 # The old DB system was just a key->value mapping DB, with special key
31 # prefixes to indicate which database they fell into.
33 # sp|<ID> -- given to bridgesplitter; maps bridgeID to ring name.
34 # em|<emailaddr> -- given to emailbaseddistributor; maps email address
35 # to concatenated ID.
36 # fs|<ID> -- Given to BridgeTracker, maps to time when a router was
37 # first seen (YYYY-MM-DD HH:MM)
38 # ls|<ID> -- given to bridgetracker, maps to time when a router was
39 # last seen (YYYY-MM-DD HH:MM)
41 # We no longer want to use em| at all, since we're not doing that kind
42 # of persistence any more.
44 # Here is the SQL schema.
45 SCHEMA2_SCRIPT = """
46 CREATE TABLE Config (
47 key PRIMARY KEY NOT NULL,
48 value
51 CREATE TABLE Bridges (
52 id INTEGER PRIMARY KEY NOT NULL,
53 hex_key,
54 address,
55 or_port,
56 distributor,
57 first_seen,
58 last_seen
61 CREATE UNIQUE INDEX BridgesKeyIndex ON Bridges ( hex_key );
63 CREATE TABLE EmailedBridges (
64 email PRIMARY KEY NOT NULL,
65 when_mailed
68 CREATE INDEX EmailedBridgesWhenMailed on EmailedBridges ( email );
70 CREATE TABLE BlockedBridges (
71 id INTEGER PRIMARY KEY NOT NULL,
72 hex_key,
73 blocking_country
76 CREATE INDEX BlockedBridgesBlockingCountry on BlockedBridges(hex_key);
78 CREATE TABLE WarnedEmails (
79 email PRIMARY KEY NOT NULL,
80 when_warned
83 CREATE INDEX WarnedEmailsWasWarned on WarnedEmails ( email );
85 INSERT INTO Config VALUES ( 'schema-version', 2 );
86 """
88 SCHEMA_2TO3_SCRIPT = """
89 CREATE TABLE BridgeHistory (
90 fingerprint PRIMARY KEY NOT NULL,
91 address,
92 port INT,
93 weightedUptime LONG,
94 weightedTime LONG,
95 weightedRunLength LONG,
96 totalRunWeights DOUBLE,
97 lastSeenWithDifferentAddressAndPort LONG,
98 lastSeenWithThisAddressAndPort LONG,
99 lastDiscountedHistoryValues LONG,
100 lastUpdatedWeightedTime LONG
103 CREATE INDEX BridgeHistoryIndex on BridgeHistory ( fingerprint );
105 INSERT OR REPLACE INTO Config VALUES ( 'schema-version', 3 );
107 SCHEMA3_SCRIPT = SCHEMA2_SCRIPT + SCHEMA_2TO3_SCRIPT
110 class BridgeData(object):
111 """Value class carrying bridge information:
112 hex_key - The unique hex key of the given bridge
113 address - Bridge IP address
114 or_port - Bridge TCP port
115 distributor - The distributor (or pseudo-distributor) through which
116 this bridge is being announced
117 first_seen - When did we first see this bridge online?
118 last_seen - When was the last time we saw this bridge online?
120 def __init__(self, hex_key, address, or_port, distributor="unallocated",
121 first_seen="", last_seen=""):
122 self.hex_key = hex_key
123 self.address = address
124 self.or_port = or_port
125 self.distributor = distributor
126 self.first_seen = first_seen
127 self.last_seen = last_seen
130 class Database(object):
131 def __init__(self, sqlite_fname):
132 self._conn = openDatabase(sqlite_fname)
133 self._cur = self._conn.cursor()
134 self.sqlite_fname = sqlite_fname
136 def commit(self):
137 self._conn.commit()
139 def rollback(self):
140 self._conn.rollback()
142 def close(self):
143 #print "Closing DB"
144 self._cur.close()
145 self._conn.close()
147 def getBridgeDistributor(self, bridge, validRings):
148 """If a ``bridge`` is already in the database, get its distributor.
150 :rtype: None or str
151 :returns: The ``bridge`` distribution method, if one was
152 already assigned, otherwise, returns None.
154 distribution_method = None
155 cur = self._cur
157 cur.execute("SELECT id, distributor FROM Bridges WHERE hex_key = ?",
158 (bridge.fingerprint,))
159 result = cur.fetchone()
161 if result:
162 if result[1] in validRings:
163 distribution_method = result[1]
165 return distribution_method
167 def insertBridgeAndGetRing(self, bridge, setRing, seenAt, validRings,
168 defaultPool="unallocated"):
169 '''Updates info about bridge, setting ring to setRing if none was set.
170 Also sets distributor to `defaultPool' if the bridge was found in
171 the database, but its distributor isn't valid anymore.
173 Returns the name of the distributor the bridge is assigned to.
175 cur = self._cur
177 t = timeToStr(seenAt)
178 h = bridge.fingerprint
179 assert len(h) == HEX_ID_LEN
181 cur.execute("SELECT id, distributor "
182 "FROM Bridges WHERE hex_key = ?", (h,))
183 v = cur.fetchone()
184 if v is not None:
185 i, ring = v
186 # Check if this is currently a valid ring name. If not, move back
187 # into default pool.
188 if ring not in validRings:
189 ring = defaultPool
190 # Update last_seen, address, port and (possibly) distributor.
191 cur.execute("UPDATE Bridges SET address = ?, or_port = ?, "
192 "distributor = ?, last_seen = ? WHERE id = ?",
193 (str(bridge.address), bridge.orPort, ring,
194 timeToStr(seenAt), i))
195 return ring
196 else:
197 # Check if this is currently a valid ring name. If not, move back
198 # into default pool.
199 if setRing not in validRings:
200 setRing = defaultPool
201 # Insert it.
202 cur.execute("INSERT INTO Bridges (hex_key, address, or_port, "
203 "distributor, first_seen, last_seen) "
204 "VALUES (?, ?, ?, ?, ?, ?)",
205 (h, str(bridge.address), bridge.orPort, setRing, t, t))
206 return setRing
208 def cleanEmailedBridges(self, expireBefore):
209 cur = self._cur
210 t = timeToStr(expireBefore)
211 cur.execute("DELETE FROM EmailedBridges WHERE when_mailed < ?", (t,))
213 def getEmailTime(self, addr):
214 addr = hashlib.sha1(addr.encode('utf-8')).hexdigest()
215 cur = self._cur
216 cur.execute("SELECT when_mailed FROM EmailedBridges WHERE email = ?", (addr,))
217 v = cur.fetchone()
218 if v is None:
219 return None
220 return strToTime(v[0])
222 def setEmailTime(self, addr, whenMailed):
223 addr = hashlib.sha1(addr.encode('utf-8')).hexdigest()
224 cur = self._cur
225 t = timeToStr(whenMailed)
226 cur.execute("INSERT OR REPLACE INTO EmailedBridges "
227 "(email,when_mailed) VALUES (?,?)", (addr, t))
229 def getAllBridges(self):
230 """Return a list of BridgeData value classes of all bridges in the
231 database
233 retBridges = []
234 cur = self._cur
235 cur.execute("SELECT hex_key, address, or_port, distributor, "
236 "first_seen, last_seen FROM Bridges")
237 for b in cur.fetchall():
238 bridge = BridgeData(b[0], b[1], b[2], b[3], b[4], b[5])
239 retBridges.append(bridge)
241 return retBridges
243 def getBridgesForDistributor(self, distributor):
244 """Return a list of BridgeData value classes of all bridges in the
245 database that are allocated to distributor 'distributor'
247 retBridges = []
248 cur = self._cur
249 cur.execute("SELECT hex_key, address, or_port, distributor, "
250 "first_seen, last_seen FROM Bridges WHERE "
251 "distributor = ?", (distributor, ))
252 for b in cur.fetchall():
253 bridge = BridgeData(b[0], b[1], b[2], b[3], b[4], b[5])
254 retBridges.append(bridge)
256 return retBridges
258 def updateDistributorForHexKey(self, distributor, hex_key):
259 cur = self._cur
260 cur.execute("UPDATE Bridges SET distributor = ? WHERE hex_key = ?",
261 (distributor, hex_key))
263 def getWarnedEmail(self, addr):
264 addr = hashlib.sha1(addr.encode('utf-8')).hexdigest()
265 cur = self._cur
266 cur.execute("SELECT * FROM WarnedEmails WHERE email = ?", (addr,))
267 v = cur.fetchone()
268 if v is None:
269 return False
270 return True
272 def setWarnedEmail(self, addr, warned=True, whenWarned=time.time()):
273 addr = hashlib.sha1(addr.encode('utf-8')).hexdigest()
274 t = timeToStr(whenWarned)
275 cur = self._cur
276 if warned == True:
277 cur.execute("INSERT INTO WarnedEmails"
278 "(email,when_warned) VALUES (?,?)", (addr, t,))
279 elif warned == False:
280 cur.execute("DELETE FROM WarnedEmails WHERE email = ?", (addr,))
282 def cleanWarnedEmails(self, expireBefore):
283 cur = self._cur
284 t = timeToStr(expireBefore)
286 cur.execute("DELETE FROM WarnedEmails WHERE when_warned < ?", (t,))
288 def updateIntoBridgeHistory(self, bh):
289 cur = self._cur
290 cur.execute("INSERT OR REPLACE INTO BridgeHistory values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
291 (bh.fingerprint, str(bh.ip), bh.port,
292 bh.weightedUptime, bh.weightedTime, bh.weightedRunLength,
293 bh.totalRunWeights, bh.lastSeenWithDifferentAddressAndPort,
294 bh.lastSeenWithThisAddressAndPort, bh.lastDiscountedHistoryValues,
295 bh.lastUpdatedWeightedTime))
296 return bh
298 def delBridgeHistory(self, fp):
299 cur = self._cur
300 cur.execute("DELETE FROM BridgeHistory WHERE fingerprint = ?", (fp,))
302 def getBridgeHistory(self, fp):
303 cur = self._cur
304 cur.execute("SELECT * FROM BridgeHistory WHERE fingerprint = ?", (fp,))
305 h = cur.fetchone()
306 if h is None:
307 return
308 return BridgeHistory(h[0],IPAddress(h[1]),h[2],h[3],h[4],h[5],h[6],h[7],h[8],h[9],h[10])
310 def getAllBridgeHistory(self):
311 cur = self._cur
312 v = cur.execute("SELECT * FROM BridgeHistory")
313 if v is None: return
314 for h in v:
315 yield BridgeHistory(h[0],IPAddress(h[1]),h[2],h[3],h[4],h[5],h[6],h[7],h[8],h[9],h[10])
317 def getBridgesLastUpdatedBefore(self, statusPublicationMillis):
318 cur = self._cur
319 v = cur.execute("SELECT * FROM BridgeHistory WHERE lastUpdatedWeightedTime < ?",
320 (statusPublicationMillis,))
321 if v is None: return
322 for h in v:
323 yield BridgeHistory(h[0],IPAddress(h[1]),h[2],h[3],h[4],h[5],h[6],h[7],h[8],h[9],h[10])
326 def openDatabase(sqlite_file):
327 conn = sqlite3.Connection(sqlite_file)
328 cur = conn.cursor()
329 try:
330 try:
331 cur.execute("SELECT value FROM Config WHERE key = 'schema-version'")
332 val, = cur.fetchone()
333 if val == 2:
334 logging.info("Adding new table BridgeHistory")
335 cur.executescript(SCHEMA_2TO3_SCRIPT)
336 elif val != 3:
337 logging.warn("Unknown schema version %s in database.", val)
338 except sqlite3.OperationalError:
339 logging.warn("No Config table found in DB; creating tables")
340 cur.executescript(SCHEMA3_SCRIPT)
341 conn.commit()
342 finally:
343 cur.close()
344 return conn
347 class DBGeneratorContextManager(object):
348 """Helper for @contextmanager decorator.
350 Overload __exit__() so we can call the generator many times
353 def __init__(self, gen):
354 self.gen = gen
356 def __enter__(self):
357 return next(self.gen)
359 def __exit__(self, type, value, traceback):
360 """Handle exiting a with statement block
362 Progress generator or throw exception
364 Significantly based on contextlib.py
366 :throws: `RuntimeError` if the generator doesn't stop after
367 exception is thrown
369 if type is None:
370 try:
371 next(self.gen)
372 except StopIteration:
373 return
374 return
375 else:
376 if value is None:
377 # Need to force instantiation so we can reliably
378 # tell if we get the same exception back
379 value = type()
380 try:
381 self.gen.throw(type, value, traceback)
382 raise RuntimeError("generator didn't stop after throw()")
383 except StopIteration as exc:
384 # Suppress the exception *unless* it's the same exception that
385 # was passed to throw(). This prevents a StopIteration
386 # raised inside the "with" statement from being suppressed
387 return exc is not value
388 except:
389 # only re-raise if it's *not* the exception that was
390 # passed to throw(), because __exit__() must not raise
391 # an exception unless __exit__() itself failed. But throw()
392 # has to raise the exception to signal propagation, so this
393 # fixes the impedance mismatch between the throw() protocol
394 # and the __exit__() protocol.
396 if sys.exc_info()[1] is not value:
397 raise
399 def contextmanager(func):
400 """Decorator to for :func:`Storage.getDB()`
402 Define getDB() for use by with statement content manager
404 @wraps(func)
405 def helper(*args, **kwds):
406 return DBGeneratorContextManager(func(*args, **kwds))
407 return helper
409 _DB_FNAME = None
410 _LOCK = None
411 _LOCKED = 0
412 _OPENED_DB = None
413 _REFCOUNT = 0
415 def clearGlobalDB():
416 """Start from scratch.
418 This is currently only used in unit tests.
420 global _DB_FNAME
421 global _LOCK
422 global _LOCKED
423 global _OPENED_DB
425 _DB_FNAME = None
426 _LOCK = None
427 _LOCKED = 0
428 _OPENED_DB = None
429 _REFCOUNT = 0
431 def initializeDBLock():
432 """Create the lock
434 This must be called before the first database query
436 global _LOCK
438 if not _LOCK:
439 _LOCK = threading.RLock()
440 assert _LOCK
442 def setDBFilename(sqlite_fname):
443 global _DB_FNAME
444 _DB_FNAME = sqlite_fname
446 @contextmanager
447 def getDB(block=True):
448 """Generator: Return a usable database handler
450 Always return a :class:`bridgedb.Storage.Database` that is
451 usable within the current thread. If a connection already exists
452 and it was created by the current thread, then return the
453 associated :class:`bridgedb.Storage.Database` instance. Otherwise,
454 create a new instance, blocking until the existing connection
455 is closed, if applicable.
457 Note: This is a blocking call (by default), be careful about
458 deadlocks!
460 :rtype: :class:`bridgedb.Storage.Database`
461 :returns: An instance of :class:`bridgedb.Storage.Database` used to
462 query the database
464 global _DB_FNAME
465 global _LOCK
466 global _LOCKED
467 global _OPENED_DB
468 global _REFCOUNT
470 assert _LOCK
471 try:
472 own_lock = _LOCK.acquire(block)
473 if own_lock:
474 _LOCKED += 1
476 if not _OPENED_DB:
477 assert _REFCOUNT == 0
478 _OPENED_DB = Database(_DB_FNAME)
480 _REFCOUNT += 1
481 yield _OPENED_DB
482 else:
483 yield False
484 finally:
485 assert own_lock
486 try:
487 _REFCOUNT -= 1
488 if _REFCOUNT == 0:
489 _OPENED_DB.close()
490 _OPENED_DB = None
491 finally:
492 _LOCKED -= 1
493 _LOCK.release()
495 def dbIsLocked():
496 return _LOCKED != 0