yt_dlp/extractor/springboardplatform.py

   1 import re
   2
   3 from .common import InfoExtractor
   4 from ..utils import (
   5     ExtractorError,
   6     int_or_none,
   7     unescapeHTML,
   8     unified_timestamp,
   9     xpath_attr,
  10     xpath_element,
  11     xpath_text,
  12 )
  13
  14
  15 class SpringboardPlatformIE(InfoExtractor):
  16     _VALID_URL = r'''(?x)
  17                     https?://
  18                         cms\.springboardplatform\.com/
  19                         (?:
  20                             (?:previews|embed_iframe)/(?P<index>\d+)/video/(?P<id>\d+)|
  21                             xml_feeds_advanced/index/(?P<index_2>\d+)/rss3/(?P<id_2>\d+)
  22                         )
  23                     '''
  24     _EMBED_REGEX = [r'<iframe\b[^>]+\bsrc=(["\'])(?P<url>(?:https?:)?//cms\.springboardplatform\.com/embed_iframe/\d+/video/\d+.*?)\1']
  25     _TESTS = [{
  26         'url': 'http://cms.springboardplatform.com/previews/159/video/981017/0/0/1',
  27         'md5': '5c3cb7b5c55740d482561099e920f192',
  28         'info_dict': {
  29             'id': '981017',
  30             'ext': 'mp4',
  31             'title': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
  32             'description': 'Redman "BUD like YOU" "Usher Good Kisser" REMIX',
  33             'thumbnail': r're:^https?://.*\.jpg$',
  34             'timestamp': 1409132328,
  35             'upload_date': '20140827',
  36             'duration': 193,
  37         },
  38     }, {
  39         'url': 'http://cms.springboardplatform.com/embed_iframe/159/video/981017/rab007/rapbasement.com/1/1',
  40         'only_matching': True,
  41     }, {
  42         'url': 'http://cms.springboardplatform.com/embed_iframe/20/video/1731611/ki055/kidzworld.com/10',
  43         'only_matching': True,
  44     }, {
  45         'url': 'http://cms.springboardplatform.com/xml_feeds_advanced/index/159/rss3/981017/0/0/1/',
  46         'only_matching': True,
  47     }]
  48
  49     def _real_extract(self, url):
  50         mobj = self._match_valid_url(url)
  51         video_id = mobj.group('id') or mobj.group('id_2')
  52         index = mobj.group('index') or mobj.group('index_2')
  53
  54         video = self._download_xml(
  55             f'http://cms.springboardplatform.com/xml_feeds_advanced/index/{index}/rss3/{video_id}', video_id)
  56
  57         item = xpath_element(video, './/item', 'item', fatal=True)
  58
  59         content = xpath_element(
  60             item, './{http://search.yahoo.com/mrss/}content', 'content',
  61             fatal=True)
  62         title = unescapeHTML(xpath_text(item, './title', 'title', fatal=True))
  63
  64         video_url = content.attrib['url']
  65
  66         if 'error_video.mp4' in video_url:
  67             raise ExtractorError(
  68                 f'Video {video_id} no longer exists', expected=True)
  69
  70         duration = int_or_none(content.get('duration'))
  71         tbr = int_or_none(content.get('bitrate'))
  72         filesize = int_or_none(content.get('fileSize'))
  73         width = int_or_none(content.get('width'))
  74         height = int_or_none(content.get('height'))
  75
  76         description = unescapeHTML(xpath_text(
  77             item, './description', 'description'))
  78         thumbnail = xpath_attr(
  79             item, './{http://search.yahoo.com/mrss/}thumbnail', 'url',
  80             'thumbnail')
  81
  82         timestamp = unified_timestamp(xpath_text(
  83             item, './{http://cms.springboardplatform.com/namespaces.html}created',
  84             'timestamp'))
  85
  86         formats = [{
  87             'url': video_url,
  88             'format_id': 'http',
  89             'tbr': tbr,
  90             'filesize': filesize,
  91             'width': width,
  92             'height': height,
  93         }]
  94
  95         m3u8_format = formats[0].copy()
  96         m3u8_format.update({
  97             'url': re.sub(r'(https?://)cdn\.', r'\1hls.', video_url) + '.m3u8',
  98             'ext': 'mp4',
  99             'format_id': 'hls',
 100             'protocol': 'm3u8_native',
 101         })
 102         formats.append(m3u8_format)
 103
 104         return {
 105             'id': video_id,
 106             'title': title,
 107             'description': description,
 108             'thumbnail': thumbnail,
 109             'timestamp': timestamp,
 110             'duration': duration,
 111             'formats': formats,
 112         }