iview/comm.py

   1 import os
   2 import urllib.request
   3 import sys
   4 import socket
   5 from . import config
   6 from . import parser
   7 import gzip
   8 from urllib.parse import urljoin, urlsplit
   9 from urllib.parse import urlencode
  10 from .utils import http_get
  11 from base64 import b64encode
  12
  13 iview_config = None
  14
  15 def fetch_url(url, types=None, headers=()):
  16     """Simple function that fetches a URL using urllib.
  17     An exception is raised if an error (e.g. 404) occurs.
  18     """
  19     url = urljoin(config.base_url, url)
  20     all_headers = dict(iview_config['headers'])
  21     all_headers.update(headers)
  22
  23     # Not using plain urlopen() because the combination of
  24     # urlopen()'s "Connection: close" header and
  25     # a "gzip" encoded response
  26     # sometimes seems to cause the server to truncate the HTTP response
  27     from .utils import PersistentConnectionHandler
  28     with PersistentConnectionHandler(timeout=30) as connection:
  29         session = urllib.request.build_opener(connection)
  30         try:
  31             with http_get(session, url, types, headers=all_headers) as http:
  32                 headers = http.info()
  33                 if headers.get('content-encoding') == 'gzip':
  34                     return gzip.GzipFile(fileobj=http).read()
  35                 else:
  36                     return http.read()
  37         except socket.timeout as error:
  38             raise Error("Timeout accessing {!r}".format(url)) from error
  39
  40 def maybe_fetch(url, type=None, headers=()):
  41     """Only fetches a URL if it is not in the cache directory.
  42     In practice, this is really bad, and only useful for saving
  43     bandwidth when debugging. For one, it doesn't respect
  44     HTTP's wishes. Also, iView, by its very nature, changes daily.
  45     """
  46
  47     if not config.cache:
  48         return fetch_url(url, type, headers=headers)
  49
  50     if not os.path.isdir(config.cache):
  51         os.mkdir(config.cache)
  52
  53     filename = os.path.join(config.cache, url.rsplit('/', 1)[-1])
  54
  55     if os.path.isfile(filename):
  56         with open(filename, 'rb') as f:
  57             data = f.read()
  58     else:
  59         data = fetch_url(url, type, headers=headers)
  60         with open(filename, 'wb') as f:
  61             f.write(data)
  62
  63     return data
  64
  65 def get_config(headers=()):
  66     """This function fetches the iView "config". Among other things,
  67     it tells us an always-metered "fallback" RTMP server, and points
  68     us to many of iView's other XML files.
  69     """
  70     global iview_config
  71
  72     headers = dict(headers)
  73     try:
  74         headers['User-Agent'] = headers['User-Agent'] + ' '
  75     except LookupError:
  76         headers['User-Agent'] = ''
  77     headers['User-Agent'] += config.user_agent
  78     headers['Accept-Encoding'] = 'gzip'
  79     iview_config = dict(headers=headers)
  80
  81     xml = maybe_fetch(config.config_url, ("application/xml", "text/xml"))
  82     parsed = parser.parse_config(xml)
  83     iview_config.update(parsed)
  84
  85 def get_auth():
  86     """This function performs an authentication handshake with iView.
  87     Among other things, it tells us if the connection is unmetered,
  88     and gives us a one-time token we need to use to speak RTSP with
  89     ABC's servers, and tells us what the RTMP URL is.
  90     """
  91     auth = iview_config['auth_url']
  92     if config.ip:
  93         query = urlsplit(auth).query
  94         query = query and query + "&"
  95         query += urlencode((("ip", config.ip),))
  96         auth = urljoin(auth, "?" + query)
  97     auth = fetch_url(auth, ("application/xml", "text/xml"))
  98     return parser.parse_auth(auth, iview_config)
  99
 100 def get_categories():
 101     """Returns the list of categories
 102     """
 103     url = iview_config['categories_url']
 104     category_data = maybe_fetch(url, ("application/xml", "text/xml"))
 105     categories = parser.parse_categories(category_data)
 106     return categories
 107
 108 def get_index():
 109     """This function pulls in the index, which contains the TV series
 110     that are available to us. Returns a list of "dict" objects,
 111     one for each series.
 112     """
 113     return get_keyword('index')
 114
 115 def get_series_items(series_id, get_meta=False):
 116     """This function fetches the series detail page for the selected series,
 117     which contain the items (i.e. the actual episodes). By
 118     default, returns a list of "dict" objects, one for each
 119     episode. If "get_meta" is set, returns a tuple with the first
 120     element being the list of episodes, and the second element a
 121     "dict" object of series infomation.
 122     """
 123
 124     series = series_api('series', series_id)
 125
 126     for meta in series:
 127         if meta['id'] == series_id:
 128             break
 129     else:
 130         # Bad series number used to return an empty JSON string, so ignore it.
 131         print('no results for series id {}, skipping'.format(series_id), file=sys.stderr)
 132         meta = {'items': []}
 133
 134     items = meta['items']
 135     if get_meta:
 136         return (items, meta)
 137     else:
 138         return items
 139
 140 def get_keyword(keyword):
 141     return series_api('keyword', keyword)
 142
 143 def series_api(key, value=""):
 144     query = urlencode(((key, value),))
 145     url = 'https://tviview.abc.net.au/iview/feed/panasonic/?' + query
 146     type = "application/json"
 147     credentials = b64encode(b"feedtest:abc123")
 148     authorization = ('Authorization', b'Basic ' + credentials)
 149     index_data = maybe_fetch(url, (type,), headers=(authorization,))
 150     return parser.parse_json_feed(index_data)
 151
 152 def get_highlights():
 153     # Reported as Content-Type: text/html
 154     highlightXML = maybe_fetch(iview_config['highlights'])
 155     return parser.parse_highlights(highlightXML)
 156
 157 def get_captions(url):
 158     """This function takes a program name with the suffix stripped
 159     (e.g. _video/news_730s_Tx_1506_650000) and
 160     fetches the corresponding captions file. It then passes it to
 161     parse_subtitle(), which converts it to SRT format.
 162     """
 163     if url.startswith('_video/'):
 164         # Convert new URLs like the above example to "news_730s_tx_1506"
 165         url = url.split('/', 1)[-1].rsplit('_', 1)[0].lower()
 166     captions_url = urljoin('http://iview.abc.net.au/cc/', url + '.xml')
 167
 168     TYPES = ("text/xml", "application/xml")
 169     xml = maybe_fetch(captions_url, TYPES)
 170     return parser.parse_captions(xml)
 171
 172 def configure_socks_proxy():
 173     """Import the modules necessary to support usage of a SOCKS proxy
 174     and configure it using the current settings in iview.config
 175     NOTE: It would be safe to call this function multiple times
 176     from, say, a GTK settings dialog
 177     """
 178     try:
 179         import socks
 180         import socket
 181         socket.socket = socks.socksocket
 182     except:
 183         sys.excepthook(*sys.exc_info())
 184         print("The Python SOCKS client module is required for proxy support.", file=sys.stderr)
 185         sys.exit(3)
 186
 187     socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, config.socks_proxy_host, config.socks_proxy_port)
 188
 189 class Error(EnvironmentError):
 190     pass
 191
 192 if config.socks_proxy_host is not None:
 193     configure_socks_proxy()