3 from .common
import InfoExtractor
14 class MainStreamingIE(InfoExtractor
):
15 _VALID_URL
= r
'https?://(?:webtools-?)?(?P<host>[A-Za-z0-9-]*\.msvdn\.net)/(?:embed|amp_embed|content)/(?P<id>\w+)'
16 _EMBED_REGEX
= [rf
'<iframe[^>]+?src=["\']?(?P<url>{_VALID_URL})["\']?']
17 IE_DESC
= 'MainStreaming Player'
21 # Live stream offline, has alternative content id
22 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/53EN6GxbWaJC',
25 'title': 'Diretta homepage 2021-12-31 12:00',
27 'live_status': 'was_live',
29 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
31 'expected_warnings': [
32 'Ignoring alternative content ID: WDAF1KOWUpH3',
33 'MainStreaming said: Live event is OFFLINE',
35 'skip': 'live stream offline',
38 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3',
41 'title': 'Playlist homepage',
43 'playlist_mincount': 2,
46 'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw',
49 'title': r
're:Class CNBC Live \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
50 'live_status': 'is_live',
52 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
54 'skip': 'live stream',
56 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false',
59 'title': 'La Settimana ',
60 'description': '03 Ottobre ore 02:00',
62 'live_status': 'not_live',
63 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
67 # video without webtools- prefix
68 'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445',
71 'title': 'TG Mattina',
72 'description': '06 Ottobre ore 08:00',
74 'live_status': 'not_live',
75 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
79 # always-on livestream with DVR
80 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy',
83 'title': r
're:^Diretta LaC News24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
84 'description': 'canale all news',
85 'live_status': 'is_live',
87 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
90 'skip_download': True,
94 'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA',
95 'only_matching': True,
97 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw',
98 'only_matching': True,
100 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#',
101 'only_matching': True,
105 def _playlist_entries(self
, host
, playlist_content
):
106 for entry
in playlist_content
:
107 content_id
= entry
.get('contentID')
110 'ie_key': MainStreamingIE
.ie_key(),
112 'duration': int_or_none(traverse_obj(entry
, ('duration', 'totalSeconds'))),
113 'title': entry
.get('title'),
114 'url': f
'https://{host}/embed/{content_id}',
118 def _get_webtools_host(host
):
119 if not host
.startswith('webtools'):
120 host
= 'webtools' + ('-' if not host
.startswith('.') else '') + host
123 def _get_webtools_base_url(self
, host
):
124 return f
'{self.http_scheme()}//{self._get_webtools_host(host)}'
126 def _call_api(self
, host
: str, path
: str, item_id
: str, query
=None, note
='Downloading API JSON', fatal
=False):
127 # JSON API, does not appear to be documented
128 return self
._call
_webtools
_api
(host
, '/api/v2/' + path
, item_id
, query
, note
, fatal
)
130 def _call_webtools_api(self
, host
: str, path
: str, item_id
: str, query
=None, note
='Downloading webtools API JSON', fatal
=False):
131 # webtools docs: https://webtools.msvdn.net/
132 return self
._download
_json
(
133 urljoin(self
._get
_webtools
_base
_url
(host
), path
), item_id
, query
=query
, note
=note
, fatal
=fatal
)
135 def _real_extract(self
, url
):
136 host
, video_id
= self
._match
_valid
_url
(url
).groups()
137 content_info
= try_get(
139 host
, f
'content/{video_id}', video_id
, note
='Downloading content info API JSON'), lambda x
: x
['playerContentInfo'])
142 webpage
= self
._download
_webpage
(url
, video_id
)
143 player_config
= self
._parse
_json
(
145 r
'config\s*=\s*({.+?})\s*;', webpage
, 'mainstreaming player config',
146 default
='{}', flags
=re
.DOTALL
),
147 video_id
, transform_source
=js_to_json
, fatal
=False) or {}
148 content_info
= player_config
['contentInfo']
150 host
= content_info
.get('host') or host
151 video_id
= content_info
.get('contentID') or video_id
152 title
= content_info
.get('title')
153 description
= traverse_obj(content_info
, 'longDescription', 'shortDescription', expected_type
=str)
154 live_status
= 'not_live'
155 if content_info
.get('drmEnabled'):
156 self
.report_drm(video_id
)
158 alternative_content_id
= content_info
.get('alternativeContentID')
159 if alternative_content_id
:
160 self
.report_warning(f
'Ignoring alternative content ID: {alternative_content_id}')
162 content_type
= int_or_none(content_info
.get('contentType'))
163 format_base_url
= None
167 if content_type
== 20:
168 dvr_enabled
= traverse_obj(content_info
, ('playerSettings', 'dvrEnabled'), expected_type
=bool)
169 format_base_url
= f
"https://{host}/live/{content_info['liveSourceID']}/{video_id}/%s{'?DVR' if dvr_enabled else ''}"
170 live_status
= 'is_live'
171 heartbeat
= self
._call
_api
(host
, f
'heartbeat/{video_id}', video_id
, note
='Checking stream status') or {}
172 if heartbeat
.get('heartBeatUp') is False:
173 self
.raise_no_formats(f
'MainStreaming said: {heartbeat.get("responseMessage")}', expected
=True)
174 live_status
= 'was_live'
177 elif content_type
== 31:
178 return self
.playlist_result(
179 self
._playlist
_entries
(host
, content_info
.get('playlistContents')), video_id
, title
, description
)
180 # Normal video content?
181 elif content_type
== 10:
182 format_base_url
= f
'https://{host}/vod/{video_id}/%s'
184 # Note: in https://webtools.msvdn.net/loader/playerV2.js there is mention of original.mp3 format,
185 # however it seems to be the same as original.mp4?
186 formats
.append({'url': format_base_url
% 'original.mp4', 'format_note': 'original', 'quality': 1})
188 self
.raise_no_formats(f
'Unknown content type {content_type}')
191 m3u8_formats
, m3u8_subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
192 format_base_url
% 'playlist.m3u8', video_id
=video_id
, fatal
=False)
193 mpd_formats
, mpd_subs
= self
._extract
_mpd
_formats
_and
_subtitles
(
194 format_base_url
% 'manifest.mpd', video_id
=video_id
, fatal
=False)
196 subtitles
= self
._merge
_subtitles
(m3u8_subs
, mpd_subs
)
197 formats
.extend(m3u8_formats
+ mpd_formats
)
202 'description': description
,
204 'live_status': live_status
,
205 'duration': parse_duration(content_info
.get('duration')),
206 'tags': content_info
.get('tags'),
207 'subtitles': subtitles
,
208 'thumbnail': urljoin(self
._get
_webtools
_base
_url
(host
), f
'image/{video_id}/poster'),