yt_dlp/extractor/viqeo.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     int_or_none,
   4     str_or_none,
   5     url_or_none,
   6 )
   7
   8
   9 class ViqeoIE(InfoExtractor):
  10     _WORKING = False
  11     _VALID_URL = r'''(?x)
  12                         (?:
  13                             viqeo:|
  14                             https?://cdn\.viqeo\.tv/embed/*\?.*?\bvid=|
  15                             https?://api\.viqeo\.tv/v\d+/data/startup?.*?\bvideo(?:%5B%5D|\[\])=
  16                         )
  17                         (?P<id>[\da-f]+)
  18                     '''
  19     _EMBED_REGEX = [r'<iframe[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cdn\.viqeo\.tv/embed/*\?.*?\bvid=[\da-f]+.*?)\1']
  20     _TESTS = [{
  21         'url': 'https://cdn.viqeo.tv/embed/?vid=cde96f09d25f39bee837',
  22         'md5': 'a169dd1a6426b350dca4296226f21e76',
  23         'info_dict': {
  24             'id': 'cde96f09d25f39bee837',
  25             'ext': 'mp4',
  26             'title': 'cde96f09d25f39bee837',
  27             'thumbnail': r're:^https?://.*\.jpg$',
  28             'duration': 76,
  29         },
  30     }, {
  31         'url': 'viqeo:cde96f09d25f39bee837',
  32         'only_matching': True,
  33     }, {
  34         'url': 'https://api.viqeo.tv/v1/data/startup?video%5B%5D=71bbec412ade45c3216c&profile=112',
  35         'only_matching': True,
  36     }]
  37
  38     def _real_extract(self, url):
  39         video_id = self._match_id(url)
  40
  41         webpage = self._download_webpage(
  42             'https://cdn.viqeo.tv/embed/?vid=%s' % video_id, video_id)
  43
  44         data = self._parse_json(
  45             self._search_regex(
  46                 r'SLOT_DATA\s*=\s*({.+?})\s*;', webpage, 'slot data'),
  47             video_id)
  48
  49         formats = []
  50         thumbnails = []
  51         for media_file in data['mediaFiles']:
  52             if not isinstance(media_file, dict):
  53                 continue
  54             media_url = url_or_none(media_file.get('url'))
  55             if not media_url or not media_url.startswith(('http', '//')):
  56                 continue
  57             media_type = str_or_none(media_file.get('type'))
  58             if not media_type:
  59                 continue
  60             media_kind = media_type.split('/')[0].lower()
  61             f = {
  62                 'url': media_url,
  63                 'width': int_or_none(media_file.get('width')),
  64                 'height': int_or_none(media_file.get('height')),
  65             }
  66             format_id = str_or_none(media_file.get('quality'))
  67             if media_kind == 'image':
  68                 f['id'] = format_id
  69                 thumbnails.append(f)
  70             elif media_kind in ('video', 'audio'):
  71                 is_audio = media_kind == 'audio'
  72                 f.update({
  73                     'format_id': 'audio' if is_audio else format_id,
  74                     'fps': int_or_none(media_file.get('fps')),
  75                     'vcodec': 'none' if is_audio else None,
  76                 })
  77                 formats.append(f)
  78
  79         duration = int_or_none(data.get('duration'))
  80
  81         return {
  82             'id': video_id,
  83             'title': video_id,
  84             'duration': duration,
  85             'thumbnails': thumbnails,
  86             'formats': formats,
  87         }