yt_dlp/extractor/internetvideoarchive.py

   1 import json
   2 import re
   3
   4 from .common import InfoExtractor
   5 from ..utils import parse_qs
   6
   7
   8 class InternetVideoArchiveIE(InfoExtractor):
   9     _VALID_URL = r'https?://video\.internetvideoarchive\.net/(?:player|flash/players)/.*?\?.*?publishedid.*?'
  10
  11     _TEST = {
  12         'url': 'http://video.internetvideoarchive.net/player/6/configuration.ashx?customerid=69249&publishedid=194487&reporttag=vdbetatitle&playerid=641&autolist=0&domain=www.videodetective.com&maxrate=high&minrate=low&socialplayer=false',
  13         'info_dict': {
  14             'id': '194487',
  15             'ext': 'mp4',
  16             'title': 'Kick-Ass 2',
  17             'description': 'md5:c189d5b7280400630a1d3dd17eaa8d8a',
  18         },
  19         'params': {
  20             # m3u8 download
  21             'skip_download': True,
  22         },
  23     }
  24
  25     @staticmethod
  26     def _build_json_url(query):
  27         return 'http://video.internetvideoarchive.net/player/6/configuration.ashx?' + query
  28
  29     def _real_extract(self, url):
  30         query = parse_qs(url)
  31         video_id = query['publishedid'][0]
  32         data = self._download_json(
  33             'https://video.internetvideoarchive.net/videojs7/videojs7.ivasettings.ashx',
  34             video_id, data=json.dumps({
  35                 'customerid': query['customerid'][0],
  36                 'publishedid': video_id,
  37             }).encode())
  38         title = data['Title']
  39         formats = self._extract_m3u8_formats(
  40             data['VideoUrl'], video_id, 'mp4',
  41             'm3u8_native', m3u8_id='hls', fatal=False)
  42         file_url = formats[0]['url']
  43         if '.ism/' in file_url:
  44             replace_url = lambda x: re.sub(r'\.ism/[^?]+', '.ism/' + x, file_url)
  45             formats.extend(self._extract_f4m_formats(
  46                 replace_url('.f4m'), video_id, f4m_id='hds', fatal=False))
  47             formats.extend(self._extract_mpd_formats(
  48                 replace_url('.mpd'), video_id, mpd_id='dash', fatal=False))
  49             formats.extend(self._extract_ism_formats(
  50                 replace_url('Manifest'), video_id, ism_id='mss', fatal=False))
  51
  52         return {
  53             'id': video_id,
  54             'title': title,
  55             'formats': formats,
  56             'thumbnail': data.get('PosterUrl'),
  57             'description': data.get('Description'),
  58         }