[ie/soundcloud] Various fixes (#11820)
[yt-dlp.git] / yt_dlp / extractor / mangomolo.py
blob2231f71e8f72441204c172bd3ce22320a0719559
1 import base64
2 import urllib.parse
4 from .common import InfoExtractor
5 from ..utils import classproperty, int_or_none
8 class MangomoloBaseIE(InfoExtractor):
9 _BASE_REGEX = r'(?:https?:)?//(?:admin\.mangomolo\.com/analytics/index\.php/customers/embed/|player\.mangomolo\.com/v1/)'
10 _SLUG = None
12 @classproperty
13 def _VALID_URL(cls):
14 return f'{cls._BASE_REGEX}{cls._SLUG}'
16 @classproperty
17 def _EMBED_REGEX(cls):
18 return [rf'<iframe[^>]+src=(["\'])(?P<url>{cls._VALID_URL}.+?)\1']
20 def _extract_from_webpage(self, url, webpage):
21 for res in super()._extract_from_webpage(url, webpage):
22 yield {
23 **res,
24 '_type': 'url_transparent',
25 'id': self._search_regex(self._SLUG, res['url'], 'id', group='id'),
26 'uploader': self._search_regex(r'^(?:https?://)?([^/]*)/.*', url, 'video uploader'),
29 def _get_real_id(self, page_id):
30 return page_id
32 def _real_extract(self, url):
33 page_id = self._get_real_id(self._match_id(url))
34 webpage = self._download_webpage(
35 'https://player.mangomolo.com/v1/{}?{}'.format(self._TYPE, url.split('?')[1]), page_id)
36 hidden_inputs = self._hidden_inputs(webpage)
37 m3u8_entry_protocol = 'm3u8' if self._IS_LIVE else 'm3u8_native'
39 format_url = self._html_search_regex(
41 r'(?:file|src)\s*:\s*"(https?://[^"]+?/playlist\.m3u8)',
42 r'<a[^>]+href="(rtsp://[^"]+)"',
43 ], webpage, 'format url')
44 formats = self._extract_wowza_formats(
45 format_url, page_id, m3u8_entry_protocol, ['smil'])
47 return {
48 'id': page_id,
49 'title': page_id,
50 'uploader_id': hidden_inputs.get('userid'),
51 'duration': int_or_none(hidden_inputs.get('duration')),
52 'is_live': self._IS_LIVE,
53 'formats': formats,
57 class MangomoloVideoIE(MangomoloBaseIE):
58 _TYPE = 'video'
59 IE_NAME = 'mangomolo:' + _TYPE
60 _SLUG = r'video\?.*?\bid=(?P<id>\d+)'
62 _IS_LIVE = False
65 class MangomoloLiveIE(MangomoloBaseIE):
66 _TYPE = 'live'
67 IE_NAME = 'mangomolo:' + _TYPE
68 _SLUG = r'(?:live|index)\?.*?\bchannelid=(?P<id>(?:[A-Za-z0-9+/=]|%2B|%2F|%3D)+)'
69 _IS_LIVE = True
71 def _get_real_id(self, page_id):
72 return base64.b64decode(urllib.parse.unquote(page_id)).decode()