yt_dlp/extractor/tvc.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     clean_html,
   4     int_or_none,
   5 )
   6
   7
   8 class TVCIE(InfoExtractor):
   9     _VALID_URL = r'https?://(?:www\.)?tvc\.ru/video/iframe/id/(?P<id>\d+)'
  10     _EMBED_REGEX = [r'<iframe[^>]+?src=(["\'])(?P<url>(?:http:)?//(?:www\.)?tvc\.ru/video/iframe/id/[^"]+)\1']
  11     _TEST = {
  12         'url': 'http://www.tvc.ru/video/iframe/id/74622/isPlay/false/id_stat/channel/?acc_video_id=/channel/brand/id/17/show/episodes/episode_id/39702',
  13         'md5': 'bbc5ff531d1e90e856f60fc4b3afd708',
  14         'info_dict': {
  15             'id': '74622',
  16             'ext': 'mp4',
  17             'title': 'События. "События". Эфир от 22.05.2015 14:30',
  18             'thumbnail': r're:^https?://.*\.jpg$',
  19             'duration': 1122,
  20         },
  21     }
  22
  23     def _real_extract(self, url):
  24         video_id = self._match_id(url)
  25
  26         video = self._download_json(
  27             f'http://www.tvc.ru/video/json/id/{video_id}', video_id)
  28
  29         formats = []
  30         for info in video.get('path', {}).get('quality', []):
  31             video_url = info.get('url')
  32             if not video_url:
  33                 continue
  34             format_id = self._search_regex(
  35                 r'cdnvideo/([^/]+?)(?:-[^/]+?)?/', video_url,
  36                 'format id', default=None)
  37             formats.append({
  38                 'url': video_url,
  39                 'format_id': format_id,
  40                 'width': int_or_none(info.get('width')),
  41                 'height': int_or_none(info.get('height')),
  42                 'tbr': int_or_none(info.get('bitrate')),
  43             })
  44
  45         return {
  46             'id': video_id,
  47             'title': video['title'],
  48             'thumbnail': video.get('picture'),
  49             'duration': int_or_none(video.get('duration')),
  50             'formats': formats,
  51         }
  52
  53
  54 class TVCArticleIE(InfoExtractor):
  55     _VALID_URL = r'https?://(?:www\.)?tvc\.ru/(?!video/iframe/id/)(?P<id>[^?#]+)'
  56     _TESTS = [{
  57         'url': 'http://www.tvc.ru/channel/brand/id/29/show/episodes/episode_id/39702/',
  58         'info_dict': {
  59             'id': '74622',
  60             'ext': 'mp4',
  61             'title': 'События. "События". Эфир от 22.05.2015 14:30',
  62             'description': 'md5:ad7aa7db22903f983e687b8a3e98c6dd',
  63             'thumbnail': r're:^https?://.*\.jpg$',
  64             'duration': 1122,
  65         },
  66     }, {
  67         'url': 'http://www.tvc.ru/news/show/id/69944',
  68         'info_dict': {
  69             'id': '75399',
  70             'ext': 'mp4',
  71             'title': 'Эксперты: в столице встал вопрос о максимально безопасных остановках',
  72             'description': 'md5:f2098f71e21f309e89f69b525fd9846e',
  73             'thumbnail': r're:^https?://.*\.jpg$',
  74             'duration': 278,
  75         },
  76     }, {
  77         'url': 'http://www.tvc.ru/channel/brand/id/47/show/episodes#',
  78         'info_dict': {
  79             'id': '2185',
  80             'ext': 'mp4',
  81             'title': 'Ещё не поздно. Эфир от 03.08.2013',
  82             'description': 'md5:51fae9f3f8cfe67abce014e428e5b027',
  83             'thumbnail': r're:^https?://.*\.jpg$',
  84             'duration': 3316,
  85         },
  86     }]
  87
  88     def _real_extract(self, url):
  89         webpage = self._download_webpage(url, self._match_id(url))
  90         return {
  91             '_type': 'url_transparent',
  92             'ie_key': 'TVC',
  93             'url': self._og_search_video_url(webpage),
  94             'title': clean_html(self._og_search_title(webpage)),
  95             'description': clean_html(self._og_search_description(webpage)),
  96             'thumbnail': self._og_search_thumbnail(webpage),
  97         }