[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / opencast.py
bloba4b0a1989d3b33a6a3bb7f49c019943abb6c8253
1 import re
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 determine_ext,
7 int_or_none,
8 parse_iso8601,
9 traverse_obj,
10 variadic,
14 class OpencastBaseIE(InfoExtractor):
15 _INSTANCES_RE = r'''(?:
16 opencast\.informatik\.kit\.edu|
17 electures\.uni-muenster\.de|
18 oc-presentation\.ltcc\.tuwien\.ac\.at|
19 medien\.ph-noe\.ac\.at|
20 oc-video\.ruhr-uni-bochum\.de|
21 oc-video1\.ruhr-uni-bochum\.de|
22 opencast\.informatik\.uni-goettingen\.de|
23 heicast\.uni-heidelberg\.de|
24 opencast\.hawk\.de:8080|
25 opencast\.hs-osnabrueck\.de|
26 video[0-9]+\.virtuos\.uni-osnabrueck\.de|
27 opencast\.uni-koeln\.de|
28 media\.opencast\.hochschule-rhein-waal\.de|
29 matterhorn\.dce\.harvard\.edu|
30 hs-harz\.opencast\.uni-halle\.de|
31 videocampus\.urz\.uni-leipzig\.de|
32 media\.uct\.ac\.za|
33 vid\.igb\.illinois\.edu|
34 cursosabertos\.c3sl\.ufpr\.br|
35 mcmedia\.missioncollege\.org|
36 clases\.odon\.edu\.uy
37 )'''
38 _UUID_RE = r'[\da-fA-F]{8}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{4}-[\da-fA-F]{12}'
40 def _call_api(self, host, video_id, **kwargs):
41 return self._download_json(self._API_BASE % (host, video_id), video_id, **kwargs)
43 def _parse_mediapackage(self, video):
44 video_id = video.get('id')
45 if video_id is None:
46 raise ExtractorError('Video id was not found')
48 formats = []
49 for track in variadic(traverse_obj(video, ('media', 'track')) or []):
50 href = track.get('url')
51 if href is None:
52 continue
53 ext = determine_ext(href, None)
55 transport = track.get('transport')
57 if transport == 'DASH' or ext == 'mpd':
58 formats.extend(self._extract_mpd_formats(href, video_id, mpd_id='dash', fatal=False))
59 elif transport == 'HLS' or ext == 'm3u8':
60 formats.extend(self._extract_m3u8_formats(
61 href, video_id, m3u8_id='hls', entry_protocol='m3u8_native', fatal=False))
62 elif transport == 'HDS' or ext == 'f4m':
63 formats.extend(self._extract_f4m_formats(href, video_id, f4m_id='hds', fatal=False))
64 elif transport == 'SMOOTH':
65 formats.extend(self._extract_ism_formats(href, video_id, ism_id='smooth', fatal=False))
66 elif ext == 'smil':
67 formats.extend(self._extract_smil_formats(href, video_id, fatal=False))
68 else:
69 track_obj = {
70 'url': href,
71 'ext': ext,
72 'format_note': track.get('transport'),
73 'resolution': traverse_obj(track, ('video', 'resolution')),
74 'fps': int_or_none(traverse_obj(track, ('video', 'framerate'))),
75 'vbr': int_or_none(traverse_obj(track, ('video', 'bitrate')), scale=1000),
76 'vcodec': traverse_obj(track, ('video', 'encoder', 'type')) if track.get('video') else 'none',
77 'abr': int_or_none(traverse_obj(track, ('audio', 'bitrate')), scale=1000),
78 'asr': int_or_none(traverse_obj(track, ('audio', 'samplingrate'))),
79 'acodec': traverse_obj(track, ('audio', 'encoder', 'type')) if track.get('audio') else 'none',
82 if transport == 'RTMP':
83 m_obj = re.search(r'(?:rtmp://[^/]+/(?P<app>[^/]+))/(?P<ext>.+):(?P<playpath>.+)', href)
84 if not m_obj:
85 continue
86 track_obj.update({
87 'app': m_obj.group('app'),
88 'ext': m_obj.group('ext'),
89 'play_path': m_obj.group('ext') + ':' + m_obj.group('playpath'),
90 'rtmp_live': True,
91 'preference': -2,
93 formats.append(track_obj)
95 return {
96 'id': video_id,
97 'formats': formats,
98 'title': video.get('title'),
99 'series': video.get('seriestitle'),
100 'season_id': video.get('series'),
101 'creator': traverse_obj(video, ('creators', 'creator')),
102 'timestamp': parse_iso8601(video.get('start')),
103 'thumbnail': traverse_obj(video, ('attachments', 'attachment', ..., 'url'), get_all=False),
107 class OpencastIE(OpencastBaseIE):
108 _VALID_URL = rf'''(?x)
109 https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})/paella/ui/watch\.html\?
110 (?:[^#]+&)?id=(?P<id>{OpencastBaseIE._UUID_RE})'''
112 _API_BASE = 'https://%s/search/episode.json?id=%s'
114 _TESTS = [
116 'url': 'https://oc-video1.ruhr-uni-bochum.de/paella/ui/watch.html?id=ed063cd5-72c8-46b5-a60a-569243edcea8',
117 'md5': '554c8e99a90f7be7e874619fcf2a3bc9',
118 'info_dict': {
119 'id': 'ed063cd5-72c8-46b5-a60a-569243edcea8',
120 'ext': 'mp4',
121 'title': '11 - Kryptographie - 24.11.2015',
122 'thumbnail': r're:^https?://.*\.jpg$',
123 'timestamp': 1606208400,
124 'upload_date': '20201124',
125 'season_id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
126 'series': 'Kryptographie - WiSe 15/16',
127 'creator': 'Alexander May',
132 def _real_extract(self, url):
133 host, video_id = self._match_valid_url(url).group('host', 'id')
134 return self._parse_mediapackage(
135 self._call_api(host, video_id)['search-results']['result']['mediapackage'])
138 class OpencastPlaylistIE(OpencastBaseIE):
139 _VALID_URL = rf'''(?x)
140 https?://(?P<host>{OpencastBaseIE._INSTANCES_RE})(?:
141 /engage/ui/index\.html\?(?:[^#]+&)?epFrom=|
142 /ltitools/index\.html\?(?:[^#]+&)?series=
143 )(?P<id>{OpencastBaseIE._UUID_RE})'''
145 _API_BASE = 'https://%s/search/episode.json?sid=%s'
147 _TESTS = [
149 'url': 'https://oc-video1.ruhr-uni-bochum.de/engage/ui/index.html?epFrom=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
150 'info_dict': {
151 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
152 'title': 'Kryptographie - WiSe 15/16',
154 'playlist_mincount': 29,
157 'url': 'https://oc-video1.ruhr-uni-bochum.de/ltitools/index.html?subtool=series&series=cf68a4a1-36b1-4a53-a6ba-61af5705a0d0&lng=de',
158 'info_dict': {
159 'id': 'cf68a4a1-36b1-4a53-a6ba-61af5705a0d0',
160 'title': 'Kryptographie - WiSe 15/16',
162 'playlist_mincount': 29,
165 'url': 'https://electures.uni-muenster.de/engage/ui/index.html?e=1&p=1&epFrom=39391d10-a711-4d23-b21d-afd2ed7d758c',
166 'info_dict': {
167 'id': '39391d10-a711-4d23-b21d-afd2ed7d758c',
168 'title': '021670 Theologische Themen bei Hans Blumenberg WiSe 2017/18',
170 'playlist_mincount': 13,
174 def _real_extract(self, url):
175 host, video_id = self._match_valid_url(url).group('host', 'id')
177 entries = [
178 self._parse_mediapackage(episode['mediapackage'])
179 for episode in variadic(self._call_api(host, video_id)['search-results']['result'])
180 if episode.get('mediapackage')
183 return self.playlist_result(entries, video_id, traverse_obj(entries, (0, 'series')))