2 # The contents of this file are subject to the Common Public Attribution
3 # License Version 1.0. (the "License"); you may not use this file except in
4 # compliance with the License. You may obtain a copy of the License at
5 # http://code.reddit.com/LICENSE. The License is based on the Mozilla Public
6 # License Version 1.1, but Sections 14 and 15 have been added to cover use of
7 # software over a computer network and provide for limited attribution for the
8 # Original Developer. In addition, Exhibit A has been modified to be consistent
11 # Software distributed under the License is distributed on an "AS IS" basis,
12 # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
13 # the specific language governing rights and limitations under the License.
15 # The Original Code is reddit.
17 # The Original Developer is the Initial Developer. The Initial Developer of
18 # the Original Code is reddit Inc.
20 # All portions of the code written by reddit are Copyright (c) 2006-2015 reddit
21 # Inc. All Rights Reserved.
22 ###############################################################################
24 This is a tiny Flask app used for a couple of self-serve ad tracking
25 mechanisms. The URLs it provides are:
29 Promoted links have their URL replaced with a /click URL by the JS
30 (after a call to /fetch-trackers). Redirect to the actual URL after logging
31 the click. This must be run in a place whose logs are stored for traffic
34 For convenience, the script can compile itself into a Zip archive suitable for
35 use on Amazon Elastic Beanstalk (and possibly other systems).
46 from urlparse
import parse_qsl
, urlparse
, urlunparse
48 from ConfigParser
import RawConfigParser
49 from wsgiref
.handlers
import format_date_time
51 from flask
import Flask
, request
, json
, make_response
, abort
, redirect
54 application
= Flask(__name__
)
60 class ApplicationConfig(object):
61 """A thin wrapper around ConfigParser that remembers what we read.
63 The remembered settings can then be written out to a minimal config file
64 when building the Elastic Beanstalk zipfile.
68 self
.input = RawConfigParser()
69 config_filename
= os
.environ
.get("CONFIG", "production.ini")
70 with
open(config_filename
) as f
:
72 self
.output
= RawConfigParser()
74 def get(self
, section
, key
):
75 value
= self
.input.get(section
, key
)
77 # remember that we needed this configuration value
78 if (section
.upper() != "DEFAULT" and
79 not self
.output
.has_section(section
)):
80 self
.output
.add_section(section
)
81 self
.output
.set(section
, key
, value
)
86 io
= cStringIO
.StringIO()
91 config
= ApplicationConfig()
92 tracking_secret
= config
.get('DEFAULT', 'tracking_secret')
93 reddit_domain
= config
.get('DEFAULT', 'domain')
94 reddit_domain_prefix
= config
.get('DEFAULT', 'domain_prefix')
97 @application.route("/")
99 return "I am healthy."
102 @application.route('/click')
103 def click_redirect():
104 destination
= request
.args
['url'].encode('utf-8')
105 fullname
= request
.args
['id'].encode('utf-8')
106 observed_mac
= request
.args
['hash']
108 expected_hashable
= ''.join((destination
, fullname
))
109 expected_mac
= hmac
.new(
110 tracking_secret
, expected_hashable
, hashlib
.sha1
).hexdigest()
112 if not constant_time_compare(expected_mac
, observed_mac
):
115 # fix encoding in the query string of the destination
116 u
= urlparse(destination
)
118 u
= _fix_query_encoding(u
)
119 destination
= u
.geturl()
121 return _redirect_nocache(destination
)
124 @application.route('/event_redirect')
125 def event_redirect():
126 destination
= request
.args
['url'].encode('utf-8')
128 # Parse and avoid open redirects
129 netloc
= "%s.%s" % (reddit_domain_prefix
, reddit_domain
)
130 u
= urlparse(destination
)._replace
(netloc
=netloc
, scheme
="https")
133 u
= _fix_query_encoding(u
)
134 destination
= u
.geturl()
136 return _redirect_nocache(destination
)
139 @application.route('/event_click')
141 """Take in an evented request, append session data to payload, and redirect.
143 This is only useful for situations in which we're navigating from a request
144 that does not have session information - i.e. served from redditmedia.com.
145 If we want to track a click and the user that did so from these pages,
146 we need to identify the user before sending the payload.
148 Note: If we add hmac validation, this will need verify and resign before
149 redirecting. We can also probably drop a redirect here once we're not
150 relying on log files for event tracking and have a proper events endpoint.
153 session_str
= urllib
.unquote(request
.cookies
.get('reddit_session', ''))
154 user_id
= int(session_str
.split(',')[0])
158 args
= request
.args
.to_dict()
160 payload
= args
.get('data').encode('utf-8')
162 payload_json
= json
.loads(payload
)
164 # if we fail to load the JSON, continue on to the redirect to not
165 # block the user - ETL can deal with/report the malformed data.
168 payload_json
['user_id'] = user_id
169 args
['data'] = json
.dumps(payload_json
)
171 return _redirect_nocache('/event_redirect?%s' % urllib
.urlencode(args
))
174 def _fix_query_encoding(parse_result
):
175 "Fix encoding in the query string."
176 query_params
= parse_qsl(parse_result
.query
, keep_blank_values
=True)
178 # this effectively calls urllib.quote_plus on every query value
179 return parse_result
._replace
(query
=urllib
.urlencode(query_params
))
182 def _redirect_nocache(destination
):
183 now
= format_date_time(time
.time())
184 response
= redirect(destination
)
185 response
.headers
['Cache-control'] = 'no-cache'
186 response
.headers
['Pragma'] = 'no-cache'
187 response
.headers
['Date'] = now
188 response
.headers
['Expires'] = now
192 # copied from r2.lib.utils
193 def constant_time_compare(actual
, expected
):
195 Returns True if the two strings are equal, False otherwise
197 The time taken is dependent on the number of characters provided
198 instead of the number of characters that match.
200 actual_len
= len(actual
)
201 expected_len
= len(expected
)
202 result
= actual_len ^ expected_len
204 for i
in xrange(actual_len
):
205 result |
= ord(actual
[i
]) ^
ord(expected
[i
% expected_len
])
209 if __name__
== "__main__":
210 # package up for elastic beanstalk
213 with zipfile
.ZipFile("/tmp/tracker.zip", "w", zipfile
.ZIP_DEFLATED
) as zip:
214 zip.write(__file__
, "application.py")
215 zip.writestr("production.ini", config
.to_config())
216 zip.writestr("requirements.txt", "\n".join(REQUIRED_PACKAGES
) + "\n")