1 # -*- coding: utf-8 ; test-case-name: bridgedb.test.test_txrecaptcha -*-
3 # This file is part of BridgeDB, a Tor bridge distribution system.
5 # :authors: Isis Lovecruft 0xA3ADB67A2CDB8B35 <isis@torproject.org>
6 # :copyright: (c) 2013-2017, Isis Lovecruft
7 # (c) 2007-2017, The Tor Project, Inc.
8 # :license: 3-Clause BSD, see LICENSE for licensing information
10 """Twisted-based reCAPTCHA client.
12 This client *always* uses TLS with strict hostname checking, unlike the
13 official Google Python recaptcha-client_, which is hardcoded_ to use plaintext
16 Small portions of this code were taken from the official Google Python
17 recaptcha-client_ module, version 1.0.6. Those portions are
18 :class:`RecaptchaResponse`, :data:`API_SERVER`, They total 5 lines of code,
19 which are copyright the authors of the recaptcha-client_ package.
21 .. _hardcoded: https://code.google.com/p/recaptcha/source/browse/trunk/recaptcha-plugins/python/recaptcha/client/captcha.py#76
22 .. _recaptcha-client: https://pypi.python.org/pypi/recaptcha-client/1.0.6
24 .. inheritance-diagram:: RecaptchaResponseError RecaptchaResponse RecaptchaResponseProtocol
31 from OpenSSL
.crypto
import FILETYPE_PEM
32 from OpenSSL
.crypto
import load_certificate
34 from twisted
import version
as _twistedversion
35 from twisted
.internet
import defer
36 from twisted
.internet
import protocol
37 from twisted
.internet
import reactor
38 from twisted
.python
import failure
39 from twisted
.python
.versions
import Version
40 from twisted
.web
import client
41 from twisted
.web
.http_headers
import Headers
42 from twisted
.web
.iweb
import IBodyProducer
44 from zope
.interface
import implementer
46 from bridgedb
.crypto
import SSLVerifyingContextFactory
48 #: This was taken from :data:`recaptcha.client.captcha.API_SSL_SERVER`.
49 API_SSL_SERVER
= API_SERVER
= b
"https://www.google.com/recaptcha/api"
50 API_SSL_VERIFY_URL
= b
"%s/verify" % API_SSL_SERVER
52 #: (:class:`OpenSSL.crypto.X509`) Only trust certificate for the reCAPTCHA
53 #: :data:`API_SSL_SERVER` which were signed by the Google Internet Authority CA.
54 GOOGLE_INTERNET_AUTHORITY_CA_CERT
= load_certificate(FILETYPE_PEM
, b
"""\
55 -----BEGIN CERTIFICATE-----
56 MIICsDCCAhmgAwIBAgIDFXfhMA0GCSqGSIb3DQEBBQUAME4xCzAJBgNVBAYTAlVT
57 MRAwDgYDVQQKEwdFcXVpZmF4MS0wKwYDVQQLEyRFcXVpZmF4IFNlY3VyZSBDZXJ0
58 aWZpY2F0ZSBBdXRob3JpdHkwHhcNMTIxMjEyMTU1ODUwWhcNMTMxMjMxMTU1ODUw
59 WjBGMQswCQYDVQQGEwJVUzETMBEGA1UEChMKR29vZ2xlIEluYzEiMCAGA1UEAxMZ
60 R29vZ2xlIEludGVybmV0IEF1dGhvcml0eTCBnzANBgkqhkiG9w0BAQEFAAOBjQAw
61 gYkCgYEAye23pIucV+eEPkB9hPSP0XFjU5nneXQUr0SZMyCSjXvlKAy6rWxJfoNf
62 NFlOCnowzdDXxFdF7dWq1nMmzq0yE7jXDx07393cCDaob1FEm8rWIFJztyaHNWrb
63 qeXUWaUr/GcZOfqTGBhs3t0lig4zFEfC7wFQeeT9adGnwKziV28CAwEAAaOBozCB
64 oDAfBgNVHSMEGDAWgBRI5mj5K9KylddH2CMgEE8zmJCf1DAdBgNVHQ4EFgQUv8Aw
65 6/VDET5nup6R+/xq2uNrEiQwEgYDVR0TAQH/BAgwBgEB/wIBADAOBgNVHQ8BAf8E
66 BAMCAQYwOgYDVR0fBDMwMTAvoC2gK4YpaHR0cDovL2NybC5nZW90cnVzdC5jb20v
67 Y3Jscy9zZWN1cmVjYS5jcmwwDQYJKoZIhvcNAQEFBQADgYEAvprjecFG+iJsxzEF
68 ZUNgujFQodUovxOWZshcnDW7fZ7mTlk3zpeVJrGPZzhaDhvuJjIfKqHweFB7gwB+
69 ARlIjNvrPq86fpVg0NOTawALkSqOUMl3MynBQO+spR7EHcRbADQ/JemfTEh2Ycfl
70 vZqhEFBfurZkX0eTANq98ZvVfpg=
71 -----END CERTIFICATE-----""")
73 # `t.w.client.HTTPConnectionPool` isn't available in Twisted-12.0.0
74 # (see ticket #11219: https://bugs.torproject.org/11219):
75 _connectionPoolAvailable
= _twistedversion
>= Version('twisted', 12, 1, 0)
76 if _connectionPoolAvailable
:
77 logging
.info("Using HTTPConnectionPool for reCaptcha API server.")
78 _pool
= client
.HTTPConnectionPool(reactor
, persistent
=False)
79 _pool
.maxPersistentPerHost
= 5
80 _pool
.cachedConnectionTimeout
= 30
81 _agent
= client
.Agent(reactor
, pool
=_pool
)
83 logging
.warn("Twisted-%s is too old for HTTPConnectionPool! Disabling..."
84 % _twistedversion
.short())
86 _agent
= client
.Agent(reactor
)
89 # Twisted>=14.0.0 changed the way in which hostname verification works.
90 if _twistedversion
>= Version('twisted', 14, 0, 0):
91 from twisted
.internet
._sslverify
import OpenSSLCertificateAuthorities
93 class RecaptchaOpenSSLCertificateAuthorities(OpenSSLCertificateAuthorities
):
94 """The trusted CAs for connecting to reCAPTCHA servers."""
95 #: A list of `OpenSSL.crypto.X509` objects.
96 caCerts
= [GOOGLE_INTERNET_AUTHORITY_CA_CERT
,]
98 super(RecaptchaOpenSSLCertificateAuthorities
, self
).__init
__(self
.caCerts
)
100 class RecaptchaPolicyForHTTPS(client
.BrowserLikePolicyForHTTPS
):
101 _trustRoot
= RecaptchaOpenSSLCertificateAuthorities()
103 super(RecaptchaPolicyForHTTPS
, self
).__init
__(trustRoot
=self
._trustRoot
)
106 def _setAgent(agent
):
107 """Set the global :attr:`agent`.
109 :param agent: An :api:`twisted.web.client.Agent` for issuing requests.
114 def _getAgent(reactor
=reactor
, url
=API_SSL_VERIFY_URL
, connectTimeout
=30,
116 """Create a :api:`twisted.web.client.Agent` which will verify the
117 certificate chain and hostname for the given **url**.
119 :param reactor: A provider of the
120 :api:`twisted.internet.interface.IReactorTCP` interface.
121 :param str url: The full URL which will be requested with the
122 ``Agent``. (default: :attr:`API_SSL_VERIFY_URL`)
123 :param pool: An :api:`twisted.web.client.HTTPConnectionPool`
124 instance. (default: :attr:`_pool`)
125 :type connectTimeout: None or int
126 :param connectTimeout: If not ``None``, the timeout passed to
127 :api:`twisted.internet.reactor.connectTCP` or
128 :api:`twisted.internet.reactor.connectSSL` for specifying the
129 connection timeout. (default: ``30``)
131 # Twisted>=14.0.0 changed the way in which hostname verification works.
132 if _twistedversion
>= Version('twisted', 14, 0, 0):
133 contextFactory
= RecaptchaPolicyForHTTPS()
135 contextFactory
= SSLVerifyingContextFactory(url
)
137 if _connectionPoolAvailable
:
138 return client
.Agent(reactor
,
139 contextFactory
=contextFactory
,
140 connectTimeout
=connectTimeout
,
144 return client
.Agent(reactor
,
145 contextFactory
=contextFactory
,
146 connectTimeout
=connectTimeout
,
149 _setAgent(_getAgent())
152 class RecaptchaResponseError(ValueError):
153 """There was an error with the reCaptcha API server's response."""
156 class RecaptchaResponse(object):
157 """Taken from `recaptcha.client.captcha.RecaptchaResponse`__.
159 .. __: https://code.google.com/p/recaptcha/source/browse/trunk/recaptcha-plugins/python/recaptcha/client/captcha.py#7
161 def __init__(self
, is_valid
, error_code
=None):
162 self
.is_valid
= is_valid
163 self
.error_code
= error_code
166 class RecaptchaResponseProtocol(protocol
.Protocol
):
167 """HTML parser which creates a :class:`RecaptchaResponse` from the body of
168 the reCaptcha API server's response.
171 def __init__(self
, finished
):
172 """Create a protocol for creating
173 :class:`RecaptchaResponses <bridgedb.txrecaptcha.RecaptchaResponse>`.
175 :type finished: :api:`twisted.internet.defer.Deferred`
176 :param finished: A deferred which will have its ``callback()`` called
177 with a :class:`RecaptchaResponse`.
179 self
.finished
= finished
180 self
.remaining
= 1024 * 10
183 def dataReceived(self
, data
):
184 """Called when some **data** is received from the connection."""
186 received
= data
[:self
.remaining
]
187 self
.response
+= received
188 self
.remaining
-= len(received
)
190 def connectionLost(self
, reason
):
191 """Called when the connection was closed.
193 :type reason: :api:`twisted.python.failure.Failure`
194 :param reason: A string explaning why the connection was closed,
195 wrapped in a ``Failure`` instance.
198 error
= reason
.getErrorMessage()
200 (valid
, error
) = self
.response
.strip().split('\n', 1)
202 error
= "Couldn't parse response from reCaptcha API server"
204 valid
= bool(valid
== "true")
205 result
= RecaptchaResponse(is_valid
=valid
, error_code
=error
)
207 "ReCaptcha API server response: %s(is_valid=%s, error_code=%s)"
208 % (result
.__class
__.__name
__, valid
, error
))
209 self
.finished
.callback(result
)
212 @implementer(IBodyProducer
)
213 class _BodyProducer(object):
214 """I write a string into the HTML body of an open request."""
216 def __init__(self
, body
):
218 self
.length
= len(body
)
220 def startProducing(self
, consumer
):
221 """Start writing the HTML body."""
222 consumer
.write(self
.body
)
223 return defer
.succeed(None)
225 def pauseProducing(self
):
228 def stopProducing(self
):
231 def resumeProducing(self
):
235 def _cbRequest(response
):
236 """Callback for a :api:`twisted.web.client.Agent.request` which delivers
237 the result to a :class:`RecaptchaResponseProtocol`.
239 :returns: A :api:`twisted.internet.defer.Deferred` which will callback
240 with a ``recaptcha.RecaptchaResponse`` for the request.
242 finished
= defer
.Deferred()
243 response
.deliverBody(RecaptchaResponseProtocol(finished
))
246 def _ebRequest(fail
):
247 """Errback for a :api:`twisted.web.client.Agent.request`.
249 :param fail: A :api:`twisted.python.failure.Failure` which occurred during
252 logging
.debug("txrecaptcha._ebRequest() called with %r" % fail
)
253 error
= fail
.getErrorMessage() or "possible problem in _ebRequest()"
254 return RecaptchaResponse(is_valid
=False, error_code
=error
)
256 def submit(recaptcha_challenge_field
, recaptcha_response_field
,
257 private_key
, remoteip
, agent
=_agent
):
258 """Submits a reCaptcha request for verification. This function is a patched
259 version of the ``recaptcha.client.captcha.submit()`` function in
260 reCaptcha's Python API.
262 It does two things differently:
263 1. It uses Twisted for everything.
264 2. It uses SSL/TLS for everything.
266 This function returns a :api:`twisted.internet.defer.Deferred`. If you
267 need a ``recaptcha.client.captcha.RecaptchaResponse`` to be returned, use
268 the :func:`submit` function, which is an ``@inlineCallbacks`` wrapper for
271 :param str recaptcha_challenge_field: The value of the HTTP POST
272 ``recaptcha_challenge_field`` argument from the form.
273 :param str recaptcha_response_field: The value of the HTTP POST
274 ``recaptcha_response_field`` argument from the form.
275 :param str private_key: The reCAPTCHA API private key.
276 :param str remoteip: An IP address to give to the reCaptcha API server.
277 :rtype: :api:`twisted.internet.defer.Deferred`
278 :returns: A ``Deferred`` which will callback with a
279 ``recaptcha.RecaptchaResponse`` for the request.
281 if not (recaptcha_response_field
and len(recaptcha_response_field
) and
282 recaptcha_challenge_field
and len(recaptcha_challenge_field
)):
284 d
.addBoth(_ebRequest
) # We want `is_valid=False`
285 d
.errback(failure
.Failure(ValueError('incorrect-captcha-sol')))
288 params
= urllib
.parse
.urlencode({
289 'privatekey': private_key
,
290 'remoteip': remoteip
,
291 'challenge': recaptcha_challenge_field
,
292 'response': recaptcha_response_field
,
294 body
= _BodyProducer(params
)
295 headers
= Headers({"Content-type": ["application/x-www-form-urlencoded"],
296 "User-agent": ["reCAPTCHA Python"]})
297 d
= agent
.request(b
'POST', API_SSL_VERIFY_URL
, headers
, body
)
298 d
.addCallbacks(_cbRequest
, _ebRequest
)