yt_dlp/extractor/mangomolo.py

   1 from .common import InfoExtractor
   2 from ..compat import (
   3     compat_b64decode,
   4     compat_urllib_parse_unquote,
   5 )
   6 from ..utils import classproperty, int_or_none
   7
   8
   9 class MangomoloBaseIE(InfoExtractor):
  10     _BASE_REGEX = r'(?:https?:)?//(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
  11     _SLUG = None
  12
  13     @classproperty
  14     def _VALID_URL(cls):
  15         return f'{cls._BASE_REGEX}{cls._SLUG}'
  16
  17     @classproperty
  18     def _EMBED_REGEX(cls):
  19         return [rf'<iframe[^>]+src=(["\'])(?P<url>{cls._VALID_URL}.+?)\1']
  20
  21     def _extract_from_webpage(self, url, webpage):
  22         for res in super()._extract_from_webpage(url, webpage):
  23             yield {
  24                 **res,
  25                 '_type': 'url_transparent',
  26                 'id': self._search_regex(self._SLUG, res['url'], 'id', group='id'),
  27                 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
  28             }
  29
  30     def _get_real_id(self, page_id):
  31         return page_id
  32
  33     def _real_extract(self, url):
  34         page_id = self._get_real_id(self._match_id(url))
  35         webpage = self._download_webpage(
  36             'https://player.mangomolo.com/v1/%s?%s' % (self._TYPE, url.split('?')[1]), page_id)
  37         hidden_inputs = self._hidden_inputs(webpage)
  38         m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
  39
  40         format_url = self._html_search_regex(
  41             [
  42                 r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
  43                 r'<a[^>]+href="(rtsp://[^"]+)"'
  44             ], webpage, 'format url')
  45         formats = self._extract_wowza_formats(
  46             format_url, page_id, m3u8_entry_protocol, ['smil'])
  47
  48         return {
  49             'id': page_id,
  50             'title': page_id,
  51             'uploader_id': hidden_inputs.get('userid'),
  52             'duration': int_or_none(hidden_inputs.get('duration')),
  53             'is_live': self._IS_LIVE,
  54             'formats': formats,
  55         }
  56
  57
  58 class MangomoloVideoIE(MangomoloBaseIE):
  59     _TYPE = 'video'
  60     IE_NAME = 'mangomolo:' + _TYPE
  61     _SLUG = r'video\?.*?\bid=(?P<id>\d+)'
  62
  63     _IS_LIVE = False
  64
  65
  66 class MangomoloLiveIE(MangomoloBaseIE):
  67     _TYPE = 'live'
  68     IE_NAME = 'mangomolo:' + _TYPE
  69     _SLUG = r'(?:live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
  70     _IS_LIVE = True
  71
  72     def _get_real_id(self, page_id):
  73         return compat_b64decode(compat_urllib_parse_unquote(page_id)).decode()