yt_dlp/extractor/nobelprize.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     js_to_json,
   4     mimetype2ext,
   5     determine_ext,
   6     update_url_query,
   7     get_element_by_attribute,
   8     int_or_none,
   9 )
  10
  11
  12 class NobelPrizeIE(InfoExtractor):
  13     _WORKING = False
  14     _VALID_URL = r'https?://(?:www\.)?nobelprize\.org/mediaplayer.*?\bid=(?P<id>\d+)'
  15     _TEST = {
  16         'url': 'http://www.nobelprize.org/mediaplayer/?id=2636',
  17         'md5': '04c81e5714bb36cc4e2232fee1d8157f',
  18         'info_dict': {
  19             'id': '2636',
  20             'ext': 'mp4',
  21             'title': 'Announcement of the 2016 Nobel Prize in Physics',
  22             'description': 'md5:05beba57f4f5a4bbd4cf2ef28fcff739',
  23         }
  24     }
  25
  26     def _real_extract(self, url):
  27         video_id = self._match_id(url)
  28         webpage = self._download_webpage(url, video_id)
  29         media = self._parse_json(self._search_regex(
  30             r'(?s)var\s*config\s*=\s*({.+?});', webpage,
  31             'config'), video_id, js_to_json)['media']
  32         title = media['title']
  33
  34         formats = []
  35         for source in media.get('source', []):
  36             source_src = source.get('src')
  37             if not source_src:
  38                 continue
  39             ext = mimetype2ext(source.get('type')) or determine_ext(source_src)
  40             if ext == 'm3u8':
  41                 formats.extend(self._extract_m3u8_formats(
  42                     source_src, video_id, 'mp4', 'm3u8_native',
  43                     m3u8_id='hls', fatal=False))
  44             elif ext == 'f4m':
  45                 formats.extend(self._extract_f4m_formats(
  46                     update_url_query(source_src, {'hdcore': '3.7.0'}),
  47                     video_id, f4m_id='hds', fatal=False))
  48             else:
  49                 formats.append({
  50                     'url': source_src,
  51                 })
  52
  53         return {
  54             'id': video_id,
  55             'title': title,
  56             'description': get_element_by_attribute('itemprop', 'description', webpage),
  57             'duration': int_or_none(media.get('duration')),
  58             'formats': formats,
  59         }