8 from urllib
.parse
import urljoin
, urlsplit
9 from urllib
.parse
import urlencode
10 from .utils
import http_get
11 from base64
import b64encode
15 def fetch_url(url
, types
=None, headers
=()):
16 """Simple function that fetches a URL using urllib.
17 An exception is raised if an error (e.g. 404) occurs.
19 url
= urljoin(config
.base_url
, url
)
20 all_headers
= dict(iview_config
['headers'])
21 all_headers
.update(headers
)
23 # Not using plain urlopen() because the combination of
24 # urlopen()'s "Connection: close" header and
25 # a "gzip" encoded response
26 # sometimes seems to cause the server to truncate the HTTP response
27 from .utils
import PersistentConnectionHandler
28 with
PersistentConnectionHandler(timeout
=30) as connection
:
29 session
= urllib
.request
.build_opener(connection
)
31 with
http_get(session
, url
, types
, headers
=all_headers
) as http
:
33 if headers
.get('content-encoding') == 'gzip':
34 return gzip
.GzipFile(fileobj
=http
).read()
37 except socket
.timeout
as error
:
38 raise Error("Timeout accessing {!r}".format(url
)) from error
40 def maybe_fetch(url
, type=None, headers
=()):
41 """Only fetches a URL if it is not in the cache directory.
42 In practice, this is really bad, and only useful for saving
43 bandwidth when debugging. For one, it doesn't respect
44 HTTP's wishes. Also, iView, by its very nature, changes daily.
48 return fetch_url(url
, type, headers
=headers
)
50 if not os
.path
.isdir(config
.cache
):
51 os
.mkdir(config
.cache
)
53 filename
= os
.path
.join(config
.cache
, url
.rsplit('/', 1)[-1])
55 if os
.path
.isfile(filename
):
56 with
open(filename
, 'rb') as f
:
59 data
= fetch_url(url
, type, headers
=headers
)
60 with
open(filename
, 'wb') as f
:
65 def get_config(headers
=()):
66 """This function fetches the iView "config". Among other things,
67 it tells us an always-metered "fallback" RTMP server, and points
68 us to many of iView's other XML files.
72 headers
= dict(headers
)
74 headers
['User-Agent'] = headers
['User-Agent'] + ' '
76 headers
['User-Agent'] = ''
77 headers
['User-Agent'] += config
.user_agent
78 headers
['Accept-Encoding'] = 'gzip'
79 iview_config
= dict(headers
=headers
)
81 xml
= maybe_fetch(config
.config_url
, ("application/xml", "text/xml"))
82 parsed
= parser
.parse_config(xml
)
83 iview_config
.update(parsed
)
86 """This function performs an authentication handshake with iView.
87 Among other things, it tells us if the connection is unmetered,
88 and gives us a one-time token we need to use to speak RTSP with
89 ABC's servers, and tells us what the RTMP URL is.
91 auth
= iview_config
['auth_url']
93 query
= urlsplit(auth
).query
94 query
= query
and query
+ "&"
95 query
+= urlencode((("ip", config
.ip
),))
96 auth
= urljoin(auth
, "?" + query
)
97 auth
= fetch_url(auth
, ("application/xml", "text/xml"))
98 return parser
.parse_auth(auth
, iview_config
)
100 def get_categories():
101 """Returns the list of categories
103 url
= iview_config
['categories_url']
104 category_data
= maybe_fetch(url
, ("application/xml", "text/xml"))
105 categories
= parser
.parse_categories(category_data
)
109 """This function pulls in the index, which contains the TV series
110 that are available to us. Returns a list of "dict" objects,
113 return get_keyword('index')
115 def get_series_items(series_id
, get_meta
=False):
116 """This function fetches the series detail page for the selected series,
117 which contain the items (i.e. the actual episodes). By
118 default, returns a list of "dict" objects, one for each
119 episode. If "get_meta" is set, returns a tuple with the first
120 element being the list of episodes, and the second element a
121 "dict" object of series infomation.
124 series
= series_api('series', series_id
)
127 if meta
['id'] == series_id
:
130 # Bad series number used to return an empty JSON string, so ignore it.
131 print('no results for series id {}, skipping'.format(series_id
), file=sys
.stderr
)
134 items
= meta
['items']
140 def get_keyword(keyword
):
141 return series_api('keyword', keyword
)
143 def series_api(key
, value
=""):
144 query
= urlencode(((key
, value
),))
145 url
= 'https://tviview.abc.net.au/iview/feed/panasonic/?' + query
146 type = "application/json"
147 credentials
= b64encode(b
"feedtest:abc123")
148 authorization
= ('Authorization', b
'Basic ' + credentials
)
149 index_data
= maybe_fetch(url
, (type,), headers
=(authorization
,))
150 return parser
.parse_json_feed(index_data
)
152 def get_highlights():
153 # Reported as Content-Type: text/html
154 highlightXML
= maybe_fetch(iview_config
['highlights'])
155 return parser
.parse_highlights(highlightXML
)
157 def get_captions(url
):
158 """This function takes a program name with the suffix stripped
159 (e.g. _video/news_730s_Tx_1506_650000) and
160 fetches the corresponding captions file. It then passes it to
161 parse_subtitle(), which converts it to SRT format.
163 if url
.startswith('_video/'):
164 # Convert new URLs like the above example to "news_730s_tx_1506"
165 url
= url
.split('/', 1)[-1].rsplit('_', 1)[0].lower()
166 captions_url
= urljoin('http://iview.abc.net.au/cc/', url
+ '.xml')
168 TYPES
= ("text/xml", "application/xml")
169 xml
= maybe_fetch(captions_url
, TYPES
)
170 return parser
.parse_captions(xml
)
172 def configure_socks_proxy():
173 """Import the modules necessary to support usage of a SOCKS proxy
174 and configure it using the current settings in iview.config
175 NOTE: It would be safe to call this function multiple times
176 from, say, a GTK settings dialog
181 socket
.socket
= socks
.socksocket
183 sys
.excepthook(*sys
.exc_info())
184 print("The Python SOCKS client module is required for proxy support.", file=sys
.stderr
)
187 socks
.setdefaultproxy(socks
.PROXY_TYPE_SOCKS5
, config
.socks_proxy_host
, config
.socks_proxy_port
)
189 class Error(EnvironmentError):
192 if config
.socks_proxy_host
is not None:
193 configure_socks_proxy()