1 # -*- coding: utf-8 -*-
2 #_____________________________________________________________________________
4 # This file is part of BridgeDB, a Tor bridge distribution system.
6 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
7 # Aaron Gibson 0x2C4B239DD876C9F6 <aagbsn@torproject.org>
8 # Nick Mathewson 0x21194EBB165733EA <nickm@torproject.org>
9 # please also see AUTHORS file
10 # :copyright: (c) 2007-2017, The Tor Project, Inc.
11 # (c) 2007-2017, all entities within the AUTHORS file
12 # (c) 2014-2017, Isis Lovecruft
13 # :license: see LICENSE for licensing information
14 #_____________________________________________________________________________
16 """This module implements various methods for obtaining or creating CAPTCHAs.
18 .. inheritance-diagram:: CaptchaExpired CaptchaKeyError GimpCaptchaError Captcha ReCaptcha GimpCaptcha
26 |- CaptchaExpired - Raised if a solution is given for a stale CAPTCHA.
27 |- CaptchaKeyError - Raised if a CAPTCHA system's keys are invalid/missing.
28 |- GimpCaptchaError - Raised when a Gimp CAPTCHA can't be retrieved.
30 \_ ICaptcha - Zope Interface specification for a generic CAPTCHA.
32 Captcha - Generic base class implementation for obtaining a CAPTCHA.
33 | |- image - The CAPTCHA image.
34 | |- challenge - A unique string associated with this CAPTCHA image.
35 | |- publicKey - The public key for this CAPTCHA system.
36 | |- secretKey - The secret key for this CAPTCHA system.
37 | \_ get() - Get a new pair of CAPTCHA image and challenge strings.
39 |- ReCaptcha - Obtain reCaptcha images and challenge strings.
40 | \_ get() - Request an image and challenge from a reCaptcha API server.
42 \_ GimpCaptcha - Class for obtaining a CAPTCHA from a local cache.
43 |- hmacKey - A client-specific key for HMAC generation.
44 |- cacheDir - The path to the local CAPTCHA cache directory.
45 |- sched - A class for timing out CAPTCHAs after an interval.
46 \_ get() - Get a CAPTCHA image from the cache and create a challenge.
50 There are two types of CAPTCHAs which BridgeDB knows how to serve: those
51 obtained by from a reCaptcha_ API server with
52 :class:`~bridgedb.captcha.Raptcha`, and those which have been generated with
53 gimp-captcha_ and then cached locally.
55 .. _reCaptcha : https://code.google.com/p/recaptcha/
56 .. _gimp-captcha: https://github.com/isislovecruft/gimp-captcha
59 from base64
import urlsafe_b64encode
60 from base64
import urlsafe_b64decode
68 from bs4
import BeautifulSoup
69 from zope
.interface
import Interface
, Attribute
, implementer
71 from bridgedb
import crypto
72 from bridgedb
import schedule
73 from bridgedb
.txrecaptcha
import API_SSL_SERVER
76 class CaptchaExpired(ValueError):
77 """Raised when a client's CAPTCHA is too stale."""
79 class CaptchaKeyError(Exception):
80 """Raised if a CAPTCHA system's keys are invalid or missing."""
82 class GimpCaptchaError(Exception):
83 """General exception raised when a Gimp CAPTCHA cannot be retrieved."""
86 class ICaptcha(Interface
):
87 """Interface specification for CAPTCHAs."""
90 "A string containing the contents of a CAPTCHA image file.")
91 challenge
= Attribute(
92 "A unique string associated with the dispursal of this CAPTCHA.")
93 publicKey
= Attribute(
94 "A public key used for encrypting CAPTCHA challenge strings.")
95 secretKey
= Attribute(
96 "A private key used for decrypting challenge strings during CAPTCHA"
97 "solution verification.")
100 """Retrieve a new CAPTCHA image."""
103 @implementer(ICaptcha
)
104 class Captcha(object):
105 """A generic CAPTCHA base class.
108 :ivar image: The CAPTCHA image.
109 :vartype challenge: str
110 :ivar challenge: A challenge string which should permit checking of
111 the client's CAPTCHA solution in some manner. In stateless protocols
112 such as HTTP, this should be passed along to the client with the
114 :vartype publicKey: str
115 :ivar publicKey: A public key used for encrypting CAPTCHA challenge strings.
116 :vartype secretKey: str
117 :ivar secretKey: A private key used for decrypting challenge strings during
118 CAPTCHA solution verification.
121 def __init__(self
, publicKey
=None, secretKey
=None):
122 """Obtain a new CAPTCHA for a client."""
124 self
.challenge
= None
125 self
.publicKey
= publicKey
126 self
.secretKey
= secretKey
129 """Retrieve a new CAPTCHA image and its associated challenge string.
131 The image and challenge will be stored as
132 :attr:`image <bridgedb.captcha.Captcha.image>` and
133 :attr:`challenge <bridgedb.captcha.Captcha.challenge>`, respectively.
136 self
.challenge
= None
139 class ReCaptcha(Captcha
):
140 """A CAPTCHA obtained from a remote reCaptcha_ API server.
143 :ivar image: The CAPTCHA image.
144 :vartype challenge: str
145 :ivar challenge: The ``'recaptcha_challenge_response'`` HTTP form
146 field to pass to the client, along with the CAPTCHA image. See
147 :doc:`BridgeDB's captcha.html <templates/captcha.html>` Mako_ template
148 for an example usage.
149 :vartype publicKey: str
150 :ivar publicKey: The public reCaptcha API key.
151 :vartype secretKey: str
152 :ivar secretKey: The private reCaptcha API key.
154 .. _reCaptcha: https://code.google.com/p/recaptcha/
155 .. _Mako: http://docs.makotemplates.org/en/latest/syntax.html#page
158 def __init__(self
, publicKey
=None, secretKey
=None):
159 """Create a new ReCaptcha CAPTCHA.
161 :param str publicKey: The public reCaptcha API key.
162 :param str secretKey: The private reCaptcha API key.
164 super(ReCaptcha
, self
).__init
__(publicKey
=publicKey
,
168 """Retrieve a CAPTCHA from the reCaptcha API server.
170 This simply requests a new CAPTCHA from
171 ``recaptcha.client.captcha.API_SSL_SERVER`` and parses the returned
172 HTML to extract the CAPTCHA image and challenge string. The image is
173 stored at ``ReCaptcha.image`` and the challenge string at
174 ``ReCaptcha.challenge``.
176 :raises CaptchaKeyError: If either the :attr:`publicKey` or
177 :attr:`secretKey` are missing.
178 :raises HTTPError: If the server returned any HTTP error status code.
180 if not self
.publicKey
or not self
.secretKey
:
181 raise CaptchaKeyError('You must supply recaptcha API keys')
183 urlbase
= API_SSL_SERVER
184 form
= "/noscript?k=%s" % self
.publicKey
186 # Extract and store image from recaptcha
187 html
= urllib
.request
.urlopen(urlbase
+ form
).read()
188 # FIXME: The remaining lines currently cannot be reliably unit tested:
189 soup
= BeautifulSoup(html
) # pragma: no cover
190 imgurl
= urlbase
+ "/" + soup
.find('img')['src'] # pragma: no cover
191 cField
= soup
.find( # pragma: no cover
192 'input', {'name': 'recaptcha_challenge_field'}) # pragma: no cover
193 self
.challenge
= str(cField
['value']) # pragma: no cover
194 self
.image
= urllib
.request
.urlopen(imgurl
).read() # pragma: no cover
197 class GimpCaptcha(Captcha
):
198 """A locally cached CAPTCHA image which was created with gimp-captcha_.
200 :vartype publicKey: str
201 :ivar publicKey: A PKCS#1 OAEP-padded, public RSA key. This is used to
202 hide the correct CAPTCHA solution within the
203 ``captcha_challenge_field`` HTML form field. That form field is given
204 to the a client along with the :attr:`image` during the initial
205 CAPTCHA request, and the client *should* give it back to us later
206 during the CAPTCHA solution verification step.
207 :vartype secretKey: str
208 :ivar secretKey: A PKCS#1 OAEP-padded, private RSA key, used for
209 verifying the client's solution to the CAPTCHA.
210 :vartype hmacKey: bytes
211 :ivar hmacKey: A client-specific HMAC secret key.
212 :vartype cacheDir: str
213 :ivar cacheDir: The local directory which pre-generated CAPTCHA images
214 have been stored in. This can be set via the ``GIMP_CAPTCHA_DIR``
215 setting in the config file.
216 :vartype sched: :class:`bridgedb.schedule.ScheduledInterval`
217 :ivar sched: A time interval. After this amount time has passed, the
218 CAPTCHA is considered stale, and all solutions are considered invalid
219 regardless of their correctness.
221 .. _gimp-captcha: https://github.com/isislovecruft/gimp-captcha
224 sched
= schedule
.ScheduledInterval(30, 'minutes')
226 def __init__(self
, publicKey
=None, secretKey
=None, hmacKey
=None,
228 """Create a ``GimpCaptcha`` which retrieves images from **cacheDir**.
230 :param str publicKey: A PKCS#1 OAEP-padded, public RSA key, used for
231 creating the ``captcha_challenge_field`` string to give to a
233 :param str secretKey: A PKCS#1 OAEP-padded, private RSA key, used for
234 verifying the client's solution to the CAPTCHA.
235 :param bytes hmacKey: A client-specific HMAC secret key.
236 :param str cacheDir: The local directory which pre-generated CAPTCHA
237 images have been stored in. This can be set via the
238 ``GIMP_CAPTCHA_DIR`` setting in the config file.
239 :raises GimpCaptchaError: if :attr:`cacheDir` is not a directory.
240 :raises CaptchaKeyError: if any of :attr:`secretKey`,
241 :attr:`publicKey`, or :attr:`hmacKey` are invalid or missing.
243 if not cacheDir
or not os
.path
.isdir(cacheDir
):
244 raise GimpCaptchaError("Gimp captcha cache isn't a directory: %r"
246 if not (publicKey
and secretKey
and hmacKey
):
247 raise CaptchaKeyError(
248 "Invalid key supplied to GimpCaptcha: SK=%r PK=%r HMAC=%r"
249 % (secretKey
, publicKey
, hmacKey
))
251 super(GimpCaptcha
, self
).__init
__(publicKey
=publicKey
,
253 self
.hmacKey
= hmacKey
254 self
.cacheDir
= cacheDir
258 def check(cls
, challenge
, solution
, secretKey
, hmacKey
):
259 """Check a client's CAPTCHA **solution** against the **challenge**.
261 :param str challenge: The contents of the
262 ``'captcha_challenge_field'`` HTTP form field.
263 :param str solution: The client's proposed solution to the CAPTCHA
264 that they were presented with.
265 :param str secretKey: A PKCS#1 OAEP-padded, private RSA key, used for
266 verifying the client's solution to the CAPTCHA.
267 :param bytes hmacKey: A private key for generating HMACs.
268 :raises CaptchaExpired: if the **solution** was for a stale CAPTCHA.
270 :returns: ``True`` if the CAPTCHA solution was correct and not
271 stale. ``False`` otherwise.
274 if isinstance(solution
, bytes
):
275 solution
= solution
.decode('utf-8')
282 logging
.debug("Checking CAPTCHA solution %r against challenge %r"
283 % (solution
, challenge
))
285 decoded
= urlsafe_b64decode(challenge
)
286 hmacFromBlob
= decoded
[:20]
287 encBlob
= decoded
[20:]
288 hmacNew
= crypto
.getHMAC(hmacKey
, encBlob
)
289 hmacIsValid
= hmacNew
== hmacFromBlob
295 answerBlob
= secretKey
.decrypt(encBlob
)
296 timestamp
= answerBlob
[:12].lstrip(b
'0')
297 then
= cls
.sched
.nextIntervalStarts(int(timestamp
))
298 now
= int(time
.time())
299 answer
= answerBlob
[12:].decode('utf-8')
300 except Exception as error
:
301 logging
.warn(str(error
))
303 # If the beginning of the 'next' interval (the interval
304 # after the one when the CAPTCHA timestamp was created)
305 # has already passed, then the CAPTCHA is stale.
307 exp
= schedule
.fromUnixSeconds(then
).isoformat(sep
=' ')
308 raise CaptchaExpired("Solution %r was for a CAPTCHA "
309 "which already expired at %s."
311 if solution
.lower() == answer
.lower():
315 def createChallenge(self
, answer
):
316 """Encrypt-then-HMAC a timestamp plus the CAPTCHA **answer**.
318 A challenge string consists of a URL-safe, base64-encoded string which
319 contains an ``HMAC`` concatenated with an ``ENC_BLOB``, in the
322 CHALLENGE := B64( HMAC | ENC_BLOB )
323 ENC_BLOB := RSA_ENC( ANSWER_BLOB )
324 ANSWER_BLOB := ( TIMESTAMP | ANSWER )
327 * ``B64`` is a URL-safe base64-encode function,
328 * ``RSA_ENC`` is the PKCS#1 RSA-OAEP encryption function,
329 * and the remaining feilds are specified as follows:
331 +-------------+--------------------------------------------+----------+
332 | Field | Description | Length |
333 +=============+============================================+==========+
334 | HMAC | An HMAC of the ``ENC_BLOB``, created with | 20 bytes |
335 | | the client-specific :attr:`hmacKey`, by | |
336 | | applying :func:`~crypto.getHMAC` to the | |
337 | | ``ENC_BLOB``. | |
338 +-------------+--------------------------------------------+----------+
339 | ENC_BLOB | An encrypted ``ANSWER_BLOB``, created with | varies |
340 | | a PKCS#1 OAEP-padded RSA :attr:`publicKey`.| |
341 +-------------+--------------------------------------------+----------+
342 | ANSWER_BLOB | Contains the concatenated ``TIMESTAMP`` | varies |
343 | | and ``ANSWER``. | |
344 +-------------+--------------------------------------------+----------+
345 | TIMESTAMP | A Unix Epoch timestamp, in seconds, | 12 bytes |
346 | | left-padded with "0"s. | |
347 +-------------+--------------------------------------------+----------+
348 | ANSWER | A string containing answer to this | 8 bytes |
349 | | CAPTCHA :attr:`image`. | |
350 +-------------+--------------------------------------------+----------+
352 The steps taken to produce a ``CHALLENGE`` are then:
354 1. Create a ``TIMESTAMP``, and pad it on the left with ``0``s to 12
356 2. Next, take the **answer** to this CAPTCHA :data:`image` and
357 concatenate the padded ``TIMESTAMP`` and the ``ANSWER``, forming
359 3. Encrypt the resulting ``ANSWER_BLOB`` to :data:`publicKey` to
360 create the ``ENC_BLOB``.
361 4. Use the client-specific :data:`hmacKey` to apply the
362 :func:`~crypto.getHMAC` function to the ``ENC_BLOB``, obtaining
364 5. Create the final ``CHALLENGE`` string by concatenating the
365 ``HMAC`` and ``ENC_BLOB``, then base64-encoding the result.
367 :param str answer: The answer to a CAPTCHA.
369 :returns: A challenge string.
371 timestamp
= str(int(time
.time())).zfill(12)
372 blob
= timestamp
+ answer
373 encBlob
= self
.publicKey
.encrypt(blob
.encode('utf-8'))
374 hmac
= crypto
.getHMAC(self
.hmacKey
, encBlob
)
375 challenge
= urlsafe_b64encode(hmac
+ encBlob
)
376 return challenge
.decode("utf-8")
379 """Get a random CAPTCHA from the cache directory.
381 This chooses a random CAPTCHA image file from the cache directory, and
382 reads the contents of the image into a string. Next, it creates a
383 challenge string for the CAPTCHA, via :meth:`createChallenge`.
385 :raises GimpCaptchaError: if the chosen CAPTCHA image file could not
386 be read, or if the :attr:`cacheDir` is empty.
388 :returns: A 2-tuple containing the image file contents as a string,
389 and a challenge string (used for checking the client's solution).
392 imageFilename
= random
.SystemRandom().choice(os
.listdir(self
.cacheDir
))
393 imagePath
= os
.path
.join(self
.cacheDir
, imageFilename
)
394 with
open(imagePath
, 'rb') as imageFile
:
395 self
.image
= imageFile
.read()
397 raise GimpCaptchaError("CAPTCHA cache dir appears empty: %r"
399 except (OSError, IOError):
400 raise GimpCaptchaError("Could not read Gimp captcha image file: %r"
403 self
.answer
= imageFilename
.rsplit(os
.path
.extsep
, 1)[0]
404 self
.challenge
= self
.createChallenge(self
.answer
)
406 return (self
.image
, self
.challenge
)