[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / redge.py
blob5ae09a096bd1dfae36d9eb6d444b218f0f782c72
2 from .common import InfoExtractor
3 from ..networking import HEADRequest
4 from ..utils import (
5 float_or_none,
6 int_or_none,
7 join_nonempty,
8 parse_qs,
9 update_url_query,
11 from ..utils.traversal import traverse_obj
14 class RedCDNLivxIE(InfoExtractor):
15 _VALID_URL = r'https?://[^.]+\.(?:dcs\.redcdn|atmcdn)\.pl/(?:live(?:dash|hls|ss)|nvr)/o2/(?P<tenant>[^/?#]+)/(?P<id>[^?#]+)\.livx'
16 IE_NAME = 'redcdnlivx'
18 _TESTS = [{
19 'url': 'https://r.dcs.redcdn.pl/livedash/o2/senat/ENC02/channel.livx?indexMode=true&startTime=638272860000&stopTime=638292544000',
20 'info_dict': {
21 'id': 'ENC02-638272860000-638292544000',
22 'ext': 'mp4',
23 'title': 'ENC02',
24 'duration': 19683.982,
25 'live_status': 'was_live',
27 }, {
28 'url': 'https://r.dcs.redcdn.pl/livedash/o2/sejm/ENC18/live.livx?indexMode=true&startTime=722333096000&stopTime=722335562000',
29 'info_dict': {
30 'id': 'ENC18-722333096000-722335562000',
31 'ext': 'mp4',
32 'title': 'ENC18',
33 'duration': 2463.995,
34 'live_status': 'was_live',
36 }, {
37 'url': 'https://r.dcs.redcdn.pl/livehls/o2/sportevolution/live/triathlon2018/warsaw.livx/playlist.m3u8?startTime=550305000000&stopTime=550327620000',
38 'info_dict': {
39 'id': 'triathlon2018-warsaw-550305000000-550327620000',
40 'ext': 'mp4',
41 'title': 'triathlon2018/warsaw',
42 'duration': 22619.98,
43 'live_status': 'was_live',
45 }, {
46 'url': 'https://n-25-12.dcs.redcdn.pl/nvr/o2/sejm/Migacz-ENC01/1.livx?startTime=722347200000&stopTime=722367345000',
47 'only_matching': True,
48 }, {
49 'url': 'https://redir.atmcdn.pl/nvr/o2/sejm/ENC08/1.livx?startTime=503831270000&stopTime=503840040000',
50 'only_matching': True,
53 '''
54 Known methods (first in url path):
55 - `livedash` - DASH MPD
56 - `livehls` - HTTP Live Streaming
57 - `livess` - IIS Smooth Streaming
58 - `nvr` - CCTV mode, directly returns a file, typically flv, avc1, aac
59 - `sc` - shoutcast/icecast (audio streams, like radio)
60 '''
62 def _real_extract(self, url):
63 tenant, path = self._match_valid_url(url).group('tenant', 'id')
64 qs = parse_qs(url)
65 start_time = traverse_obj(qs, ('startTime', 0, {int_or_none}))
66 stop_time = traverse_obj(qs, ('stopTime', 0, {int_or_none}))
68 def livx_mode(mode):
69 suffix = ''
70 if mode == 'livess':
71 suffix = '/manifest'
72 elif mode == 'livehls':
73 suffix = '/playlist.m3u8'
74 file_qs = {}
75 if start_time:
76 file_qs['startTime'] = start_time
77 if stop_time:
78 file_qs['stopTime'] = stop_time
79 if mode == 'nvr':
80 file_qs['nolimit'] = 1
81 elif mode != 'sc':
82 file_qs['indexMode'] = 'true'
83 return update_url_query(f'https://r.dcs.redcdn.pl/{mode}/o2/{tenant}/{path}.livx{suffix}', file_qs)
85 # no id or title for a transmission. making ones up.
86 title = path \
87 .replace('/live', '').replace('live/', '') \
88 .replace('/channel', '').replace('channel/', '') \
89 .strip('/')
90 video_id = join_nonempty(title.replace('/', '-'), start_time, stop_time)
92 formats = []
93 # downloading the manifest separately here instead of _extract_ism_formats to also get some stream metadata
94 ism_res = self._download_xml_handle(
95 livx_mode('livess'), video_id,
96 note='Downloading ISM manifest',
97 errnote='Failed to download ISM manifest',
98 fatal=False)
99 ism_doc = None
100 if ism_res is not False:
101 ism_doc, ism_urlh = ism_res
102 formats, _ = self._parse_ism_formats_and_subtitles(ism_doc, ism_urlh.url, 'ss')
104 nvr_urlh = self._request_webpage(
105 HEADRequest(livx_mode('nvr')), video_id, 'Follow flv file redirect', fatal=False,
106 expected_status=lambda _: True)
107 if nvr_urlh and nvr_urlh.status == 200:
108 formats.append({
109 'url': nvr_urlh.url,
110 'ext': 'flv',
111 'format_id': 'direct-0',
112 'preference': -1, # might be slow
114 formats.extend(self._extract_mpd_formats(livx_mode('livedash'), video_id, mpd_id='dash', fatal=False))
115 formats.extend(self._extract_m3u8_formats(
116 livx_mode('livehls'), video_id, m3u8_id='hls', ext='mp4', fatal=False))
118 time_scale = traverse_obj(ism_doc, ('@TimeScale', {int_or_none})) or 10000000
119 duration = traverse_obj(
120 ism_doc, ('@Duration', {float_or_none(scale=time_scale)})) or None
122 live_status = None
123 if traverse_obj(ism_doc, '@IsLive') == 'TRUE':
124 live_status = 'is_live'
125 elif duration:
126 live_status = 'was_live'
128 return {
129 'id': video_id,
130 'title': title,
131 'formats': formats,
132 'duration': duration,
133 'live_status': live_status,