Work around missing seriesIndex API by using keyword=index
[python-iview.git] / iview / comm.py
blobec7e6fb851cb2fdcb751bc00e95e010002e08b47
1 import os
2 import urllib.request
3 import sys
4 import socket
5 from . import config
6 from . import parser
7 import gzip
8 from urllib.parse import urljoin, urlsplit
9 from urllib.parse import urlencode
10 from .utils import http_get
11 from base64 import b64encode
13 iview_config = None
15 def fetch_url(url, types=None, headers=()):
16 """Simple function that fetches a URL using urllib.
17 An exception is raised if an error (e.g. 404) occurs.
18 """
19 url = urljoin(config.base_url, url)
20 all_headers = dict(iview_config['headers'])
21 all_headers.update(headers)
23 # Not using plain urlopen() because the combination of
24 # urlopen()'s "Connection: close" header and
25 # a "gzip" encoded response
26 # sometimes seems to cause the server to truncate the HTTP response
27 from .utils import PersistentConnectionHandler
28 with PersistentConnectionHandler(timeout=30) as connection:
29 session = urllib.request.build_opener(connection)
30 try:
31 with http_get(session, url, types, headers=all_headers) as http:
32 headers = http.info()
33 if headers.get('content-encoding') == 'gzip':
34 return gzip.GzipFile(fileobj=http).read()
35 else:
36 return http.read()
37 except socket.timeout as error:
38 raise Error("Timeout accessing {!r}".format(url)) from error
40 def maybe_fetch(url, type=None, headers=()):
41 """Only fetches a URL if it is not in the cache directory.
42 In practice, this is really bad, and only useful for saving
43 bandwidth when debugging. For one, it doesn't respect
44 HTTP's wishes. Also, iView, by its very nature, changes daily.
45 """
47 if not config.cache:
48 return fetch_url(url, type, headers=headers)
50 if not os.path.isdir(config.cache):
51 os.mkdir(config.cache)
53 filename = os.path.join(config.cache, url.rsplit('/', 1)[-1])
55 if os.path.isfile(filename):
56 with open(filename, 'rb') as f:
57 data = f.read()
58 else:
59 data = fetch_url(url, type, headers=headers)
60 with open(filename, 'wb') as f:
61 f.write(data)
63 return data
65 def get_config(headers=()):
66 """This function fetches the iView "config". Among other things,
67 it tells us an always-metered "fallback" RTMP server, and points
68 us to many of iView's other XML files.
69 """
70 global iview_config
72 headers = dict(headers)
73 try:
74 headers['User-Agent'] = headers['User-Agent'] + ' '
75 except LookupError:
76 headers['User-Agent'] = ''
77 headers['User-Agent'] += config.user_agent
78 headers['Accept-Encoding'] = 'gzip'
79 iview_config = dict(headers=headers)
81 xml = maybe_fetch(config.config_url, ("application/xml", "text/xml"))
82 parsed = parser.parse_config(xml)
83 iview_config.update(parsed)
85 def get_auth():
86 """This function performs an authentication handshake with iView.
87 Among other things, it tells us if the connection is unmetered,
88 and gives us a one-time token we need to use to speak RTSP with
89 ABC's servers, and tells us what the RTMP URL is.
90 """
91 auth = iview_config['auth_url']
92 if config.ip:
93 query = urlsplit(auth).query
94 query = query and query + "&"
95 query += urlencode((("ip", config.ip),))
96 auth = urljoin(auth, "?" + query)
97 auth = fetch_url(auth, ("application/xml", "text/xml"))
98 return parser.parse_auth(auth, iview_config)
100 def get_categories():
101 """Returns the list of categories
103 url = iview_config['categories_url']
104 category_data = maybe_fetch(url, ("application/xml", "text/xml"))
105 categories = parser.parse_categories(category_data)
106 return categories
108 def get_index():
109 """This function pulls in the index, which contains the TV series
110 that are available to us. Returns a list of "dict" objects,
111 one for each series.
113 return get_keyword('index')
115 def get_series_items(series_id, get_meta=False):
116 """This function fetches the series detail page for the selected series,
117 which contain the items (i.e. the actual episodes). By
118 default, returns a list of "dict" objects, one for each
119 episode. If "get_meta" is set, returns a tuple with the first
120 element being the list of episodes, and the second element a
121 "dict" object of series infomation.
124 series = series_api('series', series_id)
126 for meta in series:
127 if meta['id'] == series_id:
128 break
129 else:
130 # Bad series number used to return an empty JSON string, so ignore it.
131 print('no results for series id {}, skipping'.format(series_id), file=sys.stderr)
132 meta = {'items': []}
134 items = meta['items']
135 if get_meta:
136 return (items, meta)
137 else:
138 return items
140 def get_keyword(keyword):
141 return series_api('keyword', keyword)
143 def series_api(key, value=""):
144 query = urlencode(((key, value),))
145 url = 'https://tviview.abc.net.au/iview/feed/panasonic/?' + query
146 type = "application/json"
147 credentials = b64encode(b"feedtest:abc123")
148 authorization = ('Authorization', b'Basic ' + credentials)
149 index_data = maybe_fetch(url, (type,), headers=(authorization,))
150 return parser.parse_json_feed(index_data)
152 def get_highlights():
153 # Reported as Content-Type: text/html
154 highlightXML = maybe_fetch(iview_config['highlights'])
155 return parser.parse_highlights(highlightXML)
157 def get_captions(url):
158 """This function takes a program name with the suffix stripped
159 (e.g. _video/news_730s_Tx_1506_650000) and
160 fetches the corresponding captions file. It then passes it to
161 parse_subtitle(), which converts it to SRT format.
163 if url.startswith('_video/'):
164 # Convert new URLs like the above example to "news_730s_tx_1506"
165 url = url.split('/', 1)[-1].rsplit('_', 1)[0].lower()
166 captions_url = urljoin('http://iview.abc.net.au/cc/', url + '.xml')
168 TYPES = ("text/xml", "application/xml")
169 xml = maybe_fetch(captions_url, TYPES)
170 return parser.parse_captions(xml)
172 def configure_socks_proxy():
173 """Import the modules necessary to support usage of a SOCKS proxy
174 and configure it using the current settings in iview.config
175 NOTE: It would be safe to call this function multiple times
176 from, say, a GTK settings dialog
178 try:
179 import socks
180 import socket
181 socket.socket = socks.socksocket
182 except:
183 sys.excepthook(*sys.exc_info())
184 print("The Python SOCKS client module is required for proxy support.", file=sys.stderr)
185 sys.exit(3)
187 socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, config.socks_proxy_host, config.socks_proxy_port)
189 class Error(EnvironmentError):
190 pass
192 if config.socks_proxy_host is not None:
193 configure_socks_proxy()