Re-flow indentation of bugfix items.
[tor-bridgedb.git] / bridgedb / txrecaptcha.py
blob72923603b8446482be835a28fd2d42e061509761
1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_txrecaptcha -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
6 # :copyright: (c) 2013-2017, Isis Lovecruft
7 # (c) 2007-2017, The Tor Project, Inc.
8 # :license: 3-Clause BSD, see LICENSE for licensing information
10 """Twisted-based reCAPTCHA client.
12 This client *always* uses TLS with strict hostname checking, unlike the
13 official Google Python recaptcha-client_, which is hardcoded_ to use plaintext
14 HTTP.
16 Small portions of this code were taken from the official Google Python
17 recaptcha-client_ module, version 1.0.6. Those portions are
18 :class:`RecaptchaResponse`, :data:`API_SERVER`, They total 5 lines of code,
19 which are copyright the authors of the recaptcha-client_ package.
21 .. _hardcoded: https://code.google.com/p/recaptcha/source/browse/trunk/recaptcha-plugins/python/recaptcha/client/captcha.py#76
22 .. _recaptcha-client: https://pypi.python.org/pypi/recaptcha-client/1.0.6
24 .. inheritance-diagram:: RecaptchaResponseError RecaptchaResponse RecaptchaResponseProtocol
25 :parts: 1
26 """
28 import logging
29 import urllib
31 from OpenSSL.crypto import FILETYPE_PEM
32 from OpenSSL.crypto import load_certificate
34 from twisted import version as _twistedversion
35 from twisted.internet import defer
36 from twisted.internet import protocol
37 from twisted.internet import reactor
38 from twisted.python import failure
39 from twisted.python.versions import Version
40 from twisted.web import client
41 from twisted.web.http_headers import Headers
42 from twisted.web.iweb import IBodyProducer
44 from zope.interface import implementer
46 from bridgedb.crypto import SSLVerifyingContextFactory
48 #: This was taken from :data:`recaptcha.client.captcha.API_SSL_SERVER`.
49 API_SSL_SERVER = API_SERVER = b"https://www.google.com/recaptcha/api"
50 API_SSL_VERIFY_URL = b"%s/verify" % API_SSL_SERVER
52 #: (:class:`OpenSSL.crypto.X509`) Only trust certificate for the reCAPTCHA
53 #: :data:`API_SSL_SERVER` which were signed by the Google Internet Authority CA.
54 GOOGLE_INTERNET_AUTHORITY_CA_CERT = load_certificate(FILETYPE_PEM, b"""\
55 -----BEGIN CERTIFICATE-----
56 MIICsDCCAhmgAwIBAgIDFXfhMA0GCSqGSIb3DQEBBQUAME4xCzAJBgNVBAYTAlVT
57 MRAwDgYDVQQKEwdFcXVpZmF4MS0wKwYDVQQLEyRFcXVpZmF4IFNlY3VyZSBDZXJ0
58 aWZpY2F0ZSBBdXRob3JpdHkwHhcNMTIxMjEyMTU1ODUwWhcNMTMxMjMxMTU1ODUw
59 WjBGMQswCQYDVQQGEwJVUzETMBEGA1UEChMKR29vZ2xlIEluYzEiMCAGA1UEAxMZ
60 R29vZ2xlIEludGVybmV0IEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAw
61 gYkCgYEAye23pIucV+eEPkB9hPSP0XFjU5nneXQUr0SZMyCSjXvlKAy6rWxJfoNf
62 NFlOCnowzdDXxFdF7dWq1nMmzq0yE7jXDx07393cCDaob1FEm8rWIFJztyaHNWrb
63 qeXUWaUr/GcZOfqTGBhs3t0lig4zFEfC7wFQeeT9adGnwKziV28CAwEAAaOBozCB
64 oDAfBgNVHSMEGDAWgBRI5mj5K9KylddH2CMgEE8zmJCf1DAdBgNVHQ4EFgQUv8Aw
65 6/VDET5nup6R+/xq2uNrEiQwEgYDVR0TAQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8E
66 BAMCAQYwOgYDVR0fBDMwMTAvoC2gK4YpaHR0cDovL2NybC5nZW90cnVzdC5jb20v
67 Y3Jscy9zZWN1cmVjYS5jcmwwDQYJKoZIhvcNAQEFBQADgYEAvprjecFG+iJsxzEF
68 ZUNgujFQodUovxOWZshcnDW7fZ7mTlk3zpeVJrGPZzhaDhvuJjIfKqHweFB7gwB+
69 ARlIjNvrPq86fpVg0NOTawALkSqOUMl3MynBQO+spR7EHcRbADQ/JemfTEh2Ycfl
70 vZqhEFBfurZkX0eTANq98ZvVfpg=
71 -----END CERTIFICATE-----""")
73 # `t.w.client.HTTPConnectionPool` isn't available in Twisted-12.0.0
74 # (see ticket #11219: https://bugs.torproject.org/11219):
75 _connectionPoolAvailable = _twistedversion >= Version('twisted', 12, 1, 0)
76 if _connectionPoolAvailable:
77 logging.info("Using HTTPConnectionPool for reCaptcha API server.")
78 _pool = client.HTTPConnectionPool(reactor, persistent=False)
79 _pool.maxPersistentPerHost = 5
80 _pool.cachedConnectionTimeout = 30
81 _agent = client.Agent(reactor, pool=_pool)
82 else:
83 logging.warn("Twisted-%s is too old for HTTPConnectionPool! Disabling..."
84 % _twistedversion.short())
85 _pool = None
86 _agent = client.Agent(reactor)
89 # Twisted>=14.0.0 changed the way in which hostname verification works.
90 if _twistedversion >= Version('twisted', 14, 0, 0):
91 from twisted.internet._sslverify import OpenSSLCertificateAuthorities
93 class RecaptchaOpenSSLCertificateAuthorities(OpenSSLCertificateAuthorities):
94 """The trusted CAs for connecting to reCAPTCHA servers."""
95 #: A list of `OpenSSL.crypto.X509` objects.
96 caCerts = [GOOGLE_INTERNET_AUTHORITY_CA_CERT,]
97 def __init__(self):
98 super(RecaptchaOpenSSLCertificateAuthorities, self).__init__(self.caCerts)
100 class RecaptchaPolicyForHTTPS(client.BrowserLikePolicyForHTTPS):
101 _trustRoot = RecaptchaOpenSSLCertificateAuthorities()
102 def __init__(self):
103 super(RecaptchaPolicyForHTTPS, self).__init__(trustRoot=self._trustRoot)
106 def _setAgent(agent):
107 """Set the global :attr:`agent`.
109 :param agent: An :api:`twisted.web.client.Agent` for issuing requests.
111 global _agent
112 _agent = agent
114 def _getAgent(reactor=reactor, url=API_SSL_VERIFY_URL, connectTimeout=30,
115 **kwargs):
116 """Create a :api:`twisted.web.client.Agent` which will verify the
117 certificate chain and hostname for the given **url**.
119 :param reactor: A provider of the
120 :api:`twisted.internet.interface.IReactorTCP` interface.
121 :param str url: The full URL which will be requested with the
122 ``Agent``. (default: :attr:`API_SSL_VERIFY_URL`)
123 :param pool: An :api:`twisted.web.client.HTTPConnectionPool`
124 instance. (default: :attr:`_pool`)
125 :type connectTimeout: None or int
126 :param connectTimeout: If not ``None``, the timeout passed to
127 :api:`twisted.internet.reactor.connectTCP` or
128 :api:`twisted.internet.reactor.connectSSL` for specifying the
129 connection timeout. (default: ``30``)
131 # Twisted>=14.0.0 changed the way in which hostname verification works.
132 if _twistedversion >= Version('twisted', 14, 0, 0):
133 contextFactory = RecaptchaPolicyForHTTPS()
134 else:
135 contextFactory = SSLVerifyingContextFactory(url)
137 if _connectionPoolAvailable:
138 return client.Agent(reactor,
139 contextFactory=contextFactory,
140 connectTimeout=connectTimeout,
141 pool=_pool,
142 **kwargs)
143 else:
144 return client.Agent(reactor,
145 contextFactory=contextFactory,
146 connectTimeout=connectTimeout,
147 **kwargs)
149 _setAgent(_getAgent())
152 class RecaptchaResponseError(ValueError):
153 """There was an error with the reCaptcha API server's response."""
156 class RecaptchaResponse(object):
157 """Taken from `recaptcha.client.captcha.RecaptchaResponse`__.
159 .. __: https://code.google.com/p/recaptcha/source/browse/trunk/recaptcha-plugins/python/recaptcha/client/captcha.py#7
161 def __init__(self, is_valid, error_code=None):
162 self.is_valid = is_valid
163 self.error_code = error_code
166 class RecaptchaResponseProtocol(protocol.Protocol):
167 """HTML parser which creates a :class:`RecaptchaResponse` from the body of
168 the reCaptcha API server's response.
171 def __init__(self, finished):
172 """Create a protocol for creating
173 :class:`RecaptchaResponses <bridgedb.txrecaptcha.RecaptchaResponse>`.
175 :type finished: :api:`twisted.internet.defer.Deferred`
176 :param finished: A deferred which will have its ``callback()`` called
177 with a :class:`RecaptchaResponse`.
179 self.finished = finished
180 self.remaining = 1024 * 10
181 self.response = ''
183 def dataReceived(self, data):
184 """Called when some **data** is received from the connection."""
185 if self.remaining:
186 received = data[:self.remaining]
187 self.response += received
188 self.remaining -= len(received)
190 def connectionLost(self, reason):
191 """Called when the connection was closed.
193 :type reason: :api:`twisted.python.failure.Failure`
194 :param reason: A string explaning why the connection was closed,
195 wrapped in a ``Failure`` instance.
197 valid = False
198 error = reason.getErrorMessage()
199 try:
200 (valid, error) = self.response.strip().split('\n', 1)
201 except ValueError:
202 error = "Couldn't parse response from reCaptcha API server"
204 valid = bool(valid == "true")
205 result = RecaptchaResponse(is_valid=valid, error_code=error)
206 logging.debug(
207 "ReCaptcha API server response: %s(is_valid=%s, error_code=%s)"
208 % (result.__class__.__name__, valid, error))
209 self.finished.callback(result)
212 @implementer(IBodyProducer)
213 class _BodyProducer(object):
214 """I write a string into the HTML body of an open request."""
216 def __init__(self, body):
217 self.body = body
218 self.length = len(body)
220 def startProducing(self, consumer):
221 """Start writing the HTML body."""
222 consumer.write(self.body)
223 return defer.succeed(None)
225 def pauseProducing(self):
226 pass
228 def stopProducing(self):
229 pass
231 def resumeProducing(self):
232 pass
235 def _cbRequest(response):
236 """Callback for a :api:`twisted.web.client.Agent.request` which delivers
237 the result to a :class:`RecaptchaResponseProtocol`.
239 :returns: A :api:`twisted.internet.defer.Deferred` which will callback
240 with a ``recaptcha.RecaptchaResponse`` for the request.
242 finished = defer.Deferred()
243 response.deliverBody(RecaptchaResponseProtocol(finished))
244 return finished
246 def _ebRequest(fail):
247 """Errback for a :api:`twisted.web.client.Agent.request`.
249 :param fail: A :api:`twisted.python.failure.Failure` which occurred during
250 the request.
252 logging.debug("txrecaptcha._ebRequest() called with %r" % fail)
253 error = fail.getErrorMessage() or "possible problem in _ebRequest()"
254 return RecaptchaResponse(is_valid=False, error_code=error)
256 def submit(recaptcha_challenge_field, recaptcha_response_field,
257 private_key, remoteip, agent=_agent):
258 """Submits a reCaptcha request for verification. This function is a patched
259 version of the ``recaptcha.client.captcha.submit()`` function in
260 reCaptcha's Python API.
262 It does two things differently:
263 1. It uses Twisted for everything.
264 2. It uses SSL/TLS for everything.
266 This function returns a :api:`twisted.internet.defer.Deferred`. If you
267 need a ``recaptcha.client.captcha.RecaptchaResponse`` to be returned, use
268 the :func:`submit` function, which is an ``@inlineCallbacks`` wrapper for
269 this function.
271 :param str recaptcha_challenge_field: The value of the HTTP POST
272 ``recaptcha_challenge_field`` argument from the form.
273 :param str recaptcha_response_field: The value of the HTTP POST
274 ``recaptcha_response_field`` argument from the form.
275 :param str private_key: The reCAPTCHA API private key.
276 :param str remoteip: An IP address to give to the reCaptcha API server.
277 :rtype: :api:`twisted.internet.defer.Deferred`
278 :returns: A ``Deferred`` which will callback with a
279 ``recaptcha.RecaptchaResponse`` for the request.
281 if not (recaptcha_response_field and len(recaptcha_response_field) and
282 recaptcha_challenge_field and len(recaptcha_challenge_field)):
283 d = defer.Deferred()
284 d.addBoth(_ebRequest) # We want `is_valid=False`
285 d.errback(failure.Failure(ValueError('incorrect-captcha-sol')))
286 return d
288 params = urllib.parse.urlencode({
289 'privatekey': private_key,
290 'remoteip': remoteip,
291 'challenge': recaptcha_challenge_field,
292 'response': recaptcha_response_field,
293 }).encode('utf-8')
294 body = _BodyProducer(params)
295 headers = Headers({"Content-type": ["application/x-www-form-urlencoded"],
296 "User-agent": ["reCAPTCHA Python"]})
297 d = agent.request(b'POST', API_SSL_VERIFY_URL, headers, body)
298 d.addCallbacks(_cbRequest, _ebRequest)
299 return d