Bump version number to 0.10.1.
[tor-bridgedb.git] / bridgedb / captcha.py
blobadc1c46379ccbb808ec862b0f459d409c857fc51
1 # -*- coding: utf-8 -*-
2 #_____________________________________________________________________________
4 # This file is part of BridgeDB, a Tor bridge distribution system.
6 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
7 # Aaron Gibson 0x2C4B239DD876C9F6 <aagbsn@torproject.org>
8 # Nick Mathewson 0x21194EBB165733EA <nickm@torproject.org>
9 # please also see AUTHORS file
10 # :copyright: (c) 2007-2017, The Tor Project, Inc.
11 # (c) 2007-2017, all entities within the AUTHORS file
12 # (c) 2014-2017, Isis Lovecruft
13 # :license: see LICENSE for licensing information
14 #_____________________________________________________________________________
16 """This module implements various methods for obtaining or creating CAPTCHAs.
18 .. inheritance-diagram:: CaptchaExpired CaptchaKeyError GimpCaptchaError Captcha ReCaptcha GimpCaptcha
19 :parts: 1
21 **Module Overview:**
25 bridgedb.captcha
26 |- CaptchaExpired - Raised if a solution is given for a stale CAPTCHA.
27 |- CaptchaKeyError - Raised if a CAPTCHA system's keys are invalid/missing.
28 |- GimpCaptchaError - Raised when a Gimp CAPTCHA can't be retrieved.
30 \_ ICaptcha - Zope Interface specification for a generic CAPTCHA.
32 Captcha - Generic base class implementation for obtaining a CAPTCHA.
33 | |- image - The CAPTCHA image.
34 | |- challenge - A unique string associated with this CAPTCHA image.
35 | |- publicKey - The public key for this CAPTCHA system.
36 | |- secretKey - The secret key for this CAPTCHA system.
37 | \_ get() - Get a new pair of CAPTCHA image and challenge strings.
39 |- ReCaptcha - Obtain reCaptcha images and challenge strings.
40 | \_ get() - Request an image and challenge from a reCaptcha API server.
42 \_ GimpCaptcha - Class for obtaining a CAPTCHA from a local cache.
43 |- hmacKey - A client-specific key for HMAC generation.
44 |- cacheDir - The path to the local CAPTCHA cache directory.
45 |- sched - A class for timing out CAPTCHAs after an interval.
46 \_ get() - Get a CAPTCHA image from the cache and create a challenge.
50 There are two types of CAPTCHAs which BridgeDB knows how to serve: those
51 obtained by from a reCaptcha_ API server with
52 :class:`~bridgedb.captcha.Raptcha`, and those which have been generated with
53 gimp-captcha_ and then cached locally.
55 .. _reCaptcha : https://code.google.com/p/recaptcha/
56 .. _gimp-captcha: https://github.com/isislovecruft/gimp-captcha
57 """
59 from base64 import urlsafe_b64encode
60 from base64 import urlsafe_b64decode
62 import logging
63 import random
64 import os
65 import time
66 import urllib.request
68 from bs4 import BeautifulSoup
69 from zope.interface import Interface, Attribute, implementer
71 from bridgedb import crypto
72 from bridgedb import schedule
73 from bridgedb.txrecaptcha import API_SSL_SERVER
76 class CaptchaExpired(ValueError):
77 """Raised when a client's CAPTCHA is too stale."""
79 class CaptchaKeyError(Exception):
80 """Raised if a CAPTCHA system's keys are invalid or missing."""
82 class GimpCaptchaError(Exception):
83 """General exception raised when a Gimp CAPTCHA cannot be retrieved."""
86 class ICaptcha(Interface):
87 """Interface specification for CAPTCHAs."""
89 image = Attribute(
90 "A string containing the contents of a CAPTCHA image file.")
91 challenge = Attribute(
92 "A unique string associated with the dispursal of this CAPTCHA.")
93 publicKey = Attribute(
94 "A public key used for encrypting CAPTCHA challenge strings.")
95 secretKey = Attribute(
96 "A private key used for decrypting challenge strings during CAPTCHA"
97 "solution verification.")
99 def get():
100 """Retrieve a new CAPTCHA image."""
103 @implementer(ICaptcha)
104 class Captcha(object):
105 """A generic CAPTCHA base class.
107 :vartype image: str
108 :ivar image: The CAPTCHA image.
109 :vartype challenge: str
110 :ivar challenge: A challenge string which should permit checking of
111 the client's CAPTCHA solution in some manner. In stateless protocols
112 such as HTTP, this should be passed along to the client with the
113 CAPTCHA image.
114 :vartype publicKey: str
115 :ivar publicKey: A public key used for encrypting CAPTCHA challenge strings.
116 :vartype secretKey: str
117 :ivar secretKey: A private key used for decrypting challenge strings during
118 CAPTCHA solution verification.
121 def __init__(self, publicKey=None, secretKey=None):
122 """Obtain a new CAPTCHA for a client."""
123 self.image = None
124 self.challenge = None
125 self.publicKey = publicKey
126 self.secretKey = secretKey
128 def get(self):
129 """Retrieve a new CAPTCHA image and its associated challenge string.
131 The image and challenge will be stored as
132 :attr:`image <bridgedb.captcha.Captcha.image>` and
133 :attr:`challenge <bridgedb.captcha.Captcha.challenge>`, respectively.
135 self.image = None
136 self.challenge = None
139 class ReCaptcha(Captcha):
140 """A CAPTCHA obtained from a remote reCaptcha_ API server.
142 :vartype image: str
143 :ivar image: The CAPTCHA image.
144 :vartype challenge: str
145 :ivar challenge: The ``'recaptcha_challenge_response'`` HTTP form
146 field to pass to the client, along with the CAPTCHA image. See
147 :doc:`BridgeDB's captcha.html <templates/captcha.html>` Mako_ template
148 for an example usage.
149 :vartype publicKey: str
150 :ivar publicKey: The public reCaptcha API key.
151 :vartype secretKey: str
152 :ivar secretKey: The private reCaptcha API key.
154 .. _reCaptcha: https://code.google.com/p/recaptcha/
155 .. _Mako: http://docs.makotemplates.org/en/latest/syntax.html#page
158 def __init__(self, publicKey=None, secretKey=None):
159 """Create a new ReCaptcha CAPTCHA.
161 :param str publicKey: The public reCaptcha API key.
162 :param str secretKey: The private reCaptcha API key.
164 super(ReCaptcha, self).__init__(publicKey=publicKey,
165 secretKey=secretKey)
167 def get(self):
168 """Retrieve a CAPTCHA from the reCaptcha API server.
170 This simply requests a new CAPTCHA from
171 ``recaptcha.client.captcha.API_SSL_SERVER`` and parses the returned
172 HTML to extract the CAPTCHA image and challenge string. The image is
173 stored at ``ReCaptcha.image`` and the challenge string at
174 ``ReCaptcha.challenge``.
176 :raises CaptchaKeyError: If either the :attr:`publicKey` or
177 :attr:`secretKey` are missing.
178 :raises HTTPError: If the server returned any HTTP error status code.
180 if not self.publicKey or not self.secretKey:
181 raise CaptchaKeyError('You must supply recaptcha API keys')
183 urlbase = API_SSL_SERVER
184 form = "/noscript?k=%s" % self.publicKey
186 # Extract and store image from recaptcha
187 html = urllib.request.urlopen(urlbase + form).read()
188 # FIXME: The remaining lines currently cannot be reliably unit tested:
189 soup = BeautifulSoup(html) # pragma: no cover
190 imgurl = urlbase + "/" + soup.find('img')['src'] # pragma: no cover
191 cField = soup.find( # pragma: no cover
192 'input', {'name': 'recaptcha_challenge_field'}) # pragma: no cover
193 self.challenge = str(cField['value']) # pragma: no cover
194 self.image = urllib.request.urlopen(imgurl).read() # pragma: no cover
197 class GimpCaptcha(Captcha):
198 """A locally cached CAPTCHA image which was created with gimp-captcha_.
200 :vartype publicKey: str
201 :ivar publicKey: A PKCS#1 OAEP-padded, public RSA key. This is used to
202 hide the correct CAPTCHA solution within the
203 ``captcha_challenge_field`` HTML form field. That form field is given
204 to the a client along with the :attr:`image` during the initial
205 CAPTCHA request, and the client *should* give it back to us later
206 during the CAPTCHA solution verification step.
207 :vartype secretKey: str
208 :ivar secretKey: A PKCS#1 OAEP-padded, private RSA key, used for
209 verifying the client's solution to the CAPTCHA.
210 :vartype hmacKey: bytes
211 :ivar hmacKey: A client-specific HMAC secret key.
212 :vartype cacheDir: str
213 :ivar cacheDir: The local directory which pre-generated CAPTCHA images
214 have been stored in. This can be set via the ``GIMP_CAPTCHA_DIR``
215 setting in the config file.
216 :vartype sched: :class:`bridgedb.schedule.ScheduledInterval`
217 :ivar sched: A time interval. After this amount time has passed, the
218 CAPTCHA is considered stale, and all solutions are considered invalid
219 regardless of their correctness.
221 .. _gimp-captcha: https://github.com/isislovecruft/gimp-captcha
224 sched = schedule.ScheduledInterval(30, 'minutes')
226 def __init__(self, publicKey=None, secretKey=None, hmacKey=None,
227 cacheDir=None):
228 """Create a ``GimpCaptcha`` which retrieves images from **cacheDir**.
230 :param str publicKey: A PKCS#1 OAEP-padded, public RSA key, used for
231 creating the ``captcha_challenge_field`` string to give to a
232 client.
233 :param str secretKey: A PKCS#1 OAEP-padded, private RSA key, used for
234 verifying the client's solution to the CAPTCHA.
235 :param bytes hmacKey: A client-specific HMAC secret key.
236 :param str cacheDir: The local directory which pre-generated CAPTCHA
237 images have been stored in. This can be set via the
238 ``GIMP_CAPTCHA_DIR`` setting in the config file.
239 :raises GimpCaptchaError: if :attr:`cacheDir` is not a directory.
240 :raises CaptchaKeyError: if any of :attr:`secretKey`,
241 :attr:`publicKey`, or :attr:`hmacKey` are invalid or missing.
243 if not cacheDir or not os.path.isdir(cacheDir):
244 raise GimpCaptchaError("Gimp captcha cache isn't a directory: %r"
245 % cacheDir)
246 if not (publicKey and secretKey and hmacKey):
247 raise CaptchaKeyError(
248 "Invalid key supplied to GimpCaptcha: SK=%r PK=%r HMAC=%r"
249 % (secretKey, publicKey, hmacKey))
251 super(GimpCaptcha, self).__init__(publicKey=publicKey,
252 secretKey=secretKey)
253 self.hmacKey = hmacKey
254 self.cacheDir = cacheDir
255 self.answer = None
257 @classmethod
258 def check(cls, challenge, solution, secretKey, hmacKey):
259 """Check a client's CAPTCHA **solution** against the **challenge**.
261 :param str challenge: The contents of the
262 ``'captcha_challenge_field'`` HTTP form field.
263 :param str solution: The client's proposed solution to the CAPTCHA
264 that they were presented with.
265 :param str secretKey: A PKCS#1 OAEP-padded, private RSA key, used for
266 verifying the client's solution to the CAPTCHA.
267 :param bytes hmacKey: A private key for generating HMACs.
268 :raises CaptchaExpired: if the **solution** was for a stale CAPTCHA.
269 :rtype: bool
270 :returns: ``True`` if the CAPTCHA solution was correct and not
271 stale. ``False`` otherwise.
274 if isinstance(solution, bytes):
275 solution = solution.decode('utf-8')
277 hmacIsValid = False
279 if not solution:
280 return hmacIsValid
282 logging.debug("Checking CAPTCHA solution %r against challenge %r"
283 % (solution, challenge))
284 try:
285 decoded = urlsafe_b64decode(challenge)
286 hmacFromBlob = decoded[:20]
287 encBlob = decoded[20:]
288 hmacNew = crypto.getHMAC(hmacKey, encBlob)
289 hmacIsValid = hmacNew == hmacFromBlob
290 except Exception:
291 return False
292 finally:
293 if hmacIsValid:
294 try:
295 answerBlob = secretKey.decrypt(encBlob)
296 timestamp = answerBlob[:12].lstrip(b'0')
297 then = cls.sched.nextIntervalStarts(int(timestamp))
298 now = int(time.time())
299 answer = answerBlob[12:].decode('utf-8')
300 except Exception as error:
301 logging.warn(str(error))
302 else:
303 # If the beginning of the 'next' interval (the interval
304 # after the one when the CAPTCHA timestamp was created)
305 # has already passed, then the CAPTCHA is stale.
306 if now >= then:
307 exp = schedule.fromUnixSeconds(then).isoformat(sep=' ')
308 raise CaptchaExpired("Solution %r was for a CAPTCHA "
309 "which already expired at %s."
310 % (solution, exp))
311 if solution.lower() == answer.lower():
312 return True
313 return False
315 def createChallenge(self, answer):
316 """Encrypt-then-HMAC a timestamp plus the CAPTCHA **answer**.
318 A challenge string consists of a URL-safe, base64-encoded string which
319 contains an ``HMAC`` concatenated with an ``ENC_BLOB``, in the
320 following form::
322 CHALLENGE := B64( HMAC | ENC_BLOB )
323 ENC_BLOB := RSA_ENC( ANSWER_BLOB )
324 ANSWER_BLOB := ( TIMESTAMP | ANSWER )
326 where
327 * ``B64`` is a URL-safe base64-encode function,
328 * ``RSA_ENC`` is the PKCS#1 RSA-OAEP encryption function,
329 * and the remaining feilds are specified as follows:
331 +-------------+--------------------------------------------+----------+
332 | Field | Description | Length |
333 +=============+============================================+==========+
334 | HMAC | An HMAC of the ``ENC_BLOB``, created with | 20 bytes |
335 | | the client-specific :attr:`hmacKey`, by | |
336 | | applying :func:`~crypto.getHMAC` to the | |
337 | | ``ENC_BLOB``. | |
338 +-------------+--------------------------------------------+----------+
339 | ENC_BLOB | An encrypted ``ANSWER_BLOB``, created with | varies |
340 | | a PKCS#1 OAEP-padded RSA :attr:`publicKey`.| |
341 +-------------+--------------------------------------------+----------+
342 | ANSWER_BLOB | Contains the concatenated ``TIMESTAMP`` | varies |
343 | | and ``ANSWER``. | |
344 +-------------+--------------------------------------------+----------+
345 | TIMESTAMP | A Unix Epoch timestamp, in seconds, | 12 bytes |
346 | | left-padded with "0"s. | |
347 +-------------+--------------------------------------------+----------+
348 | ANSWER | A string containing answer to this | 8 bytes |
349 | | CAPTCHA :attr:`image`. | |
350 +-------------+--------------------------------------------+----------+
352 The steps taken to produce a ``CHALLENGE`` are then:
354 1. Create a ``TIMESTAMP``, and pad it on the left with ``0``s to 12
355 bytes in length.
356 2. Next, take the **answer** to this CAPTCHA :data:`image` and
357 concatenate the padded ``TIMESTAMP`` and the ``ANSWER``, forming
358 an ``ANSWER_BLOB``.
359 3. Encrypt the resulting ``ANSWER_BLOB`` to :data:`publicKey` to
360 create the ``ENC_BLOB``.
361 4. Use the client-specific :data:`hmacKey` to apply the
362 :func:`~crypto.getHMAC` function to the ``ENC_BLOB``, obtaining
363 an ``HMAC``.
364 5. Create the final ``CHALLENGE`` string by concatenating the
365 ``HMAC`` and ``ENC_BLOB``, then base64-encoding the result.
367 :param str answer: The answer to a CAPTCHA.
368 :rtype: str
369 :returns: A challenge string.
371 timestamp = str(int(time.time())).zfill(12)
372 blob = timestamp + answer
373 encBlob = self.publicKey.encrypt(blob.encode('utf-8'))
374 hmac = crypto.getHMAC(self.hmacKey, encBlob)
375 challenge = urlsafe_b64encode(hmac + encBlob)
376 return challenge.decode("utf-8")
378 def get(self):
379 """Get a random CAPTCHA from the cache directory.
381 This chooses a random CAPTCHA image file from the cache directory, and
382 reads the contents of the image into a string. Next, it creates a
383 challenge string for the CAPTCHA, via :meth:`createChallenge`.
385 :raises GimpCaptchaError: if the chosen CAPTCHA image file could not
386 be read, or if the :attr:`cacheDir` is empty.
387 :rtype: tuple
388 :returns: A 2-tuple containing the image file contents as a string,
389 and a challenge string (used for checking the client's solution).
391 try:
392 imageFilename = random.SystemRandom().choice(os.listdir(self.cacheDir))
393 imagePath = os.path.join(self.cacheDir, imageFilename)
394 with open(imagePath, 'rb') as imageFile:
395 self.image = imageFile.read()
396 except IndexError:
397 raise GimpCaptchaError("CAPTCHA cache dir appears empty: %r"
398 % self.cacheDir)
399 except (OSError, IOError):
400 raise GimpCaptchaError("Could not read Gimp captcha image file: %r"
401 % imageFilename)
403 self.answer = imageFilename.rsplit(os.path.extsep, 1)[0]
404 self.challenge = self.createChallenge(self.answer)
406 return (self.image, self.challenge)