yt_dlp/extractor/clipchamp.py

   1 from .common import InfoExtractor
   2 from ..utils import (
   3     ExtractorError,
   4     traverse_obj,
   5     unified_timestamp,
   6     url_or_none,
   7 )
   8
   9
  10 class ClipchampIE(InfoExtractor):
  11     _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
  12     _TESTS = [{
  13         'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
  14         'info_dict': {
  15             'id': 'gRXZ4ZhdDaU',
  16             'ext': 'mp4',
  17             'title': 'Untitled video',
  18             'uploader': 'Alexander Schwartz',
  19             'timestamp': 1680805580,
  20             'upload_date': '20230406',
  21             'thumbnail': r're:^https?://.+\.jpg',
  22         },
  23         'params': {'skip_download': 'm3u8'},
  24     }]
  25
  26     _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
  27     _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
  28
  29     def _real_extract(self, url):
  30         video_id = self._match_id(url)
  31         webpage = self._download_webpage(url, video_id)
  32         data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
  33
  34         storage_location = data.get('storage_location')
  35         if storage_location != 'cf_stream':
  36             raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
  37
  38         path = data['download_url']
  39         iframe = self._download_webpage(
  40             f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
  41         subdomain = self._search_regex(
  42             r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
  43             'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
  44
  45         formats = self._extract_mpd_formats(
  46             self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
  47             query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
  48         formats.extend(self._extract_m3u8_formats(
  49             self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
  50             query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
  51
  52         return {
  53             'id': video_id,
  54             'formats': formats,
  55             'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
  56             **traverse_obj(data, {
  57                 'title': ('project', 'project_name', {str}),
  58                 'timestamp': ('created_at', {unified_timestamp}),
  59                 'thumbnail': ('thumbnail_url', {url_or_none}),
  60             }),
  61         }