[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / videocampus_sachsen.py
blob6f98c685c905d5c3cb9a61ae0aad71378f2c9401
1 import functools
2 import re
4 from .common import InfoExtractor
5 from ..networking.exceptions import HTTPError
6 from ..utils import ExtractorError, OnDemandPagedList, urlencode_postdata
9 class VideocampusSachsenIE(InfoExtractor):
10 IE_NAME = 'ViMP'
11 _INSTANCES = (
12 'bergauf.tv',
13 'campus.demo.vimp.com',
14 'corporate.demo.vimp.com',
15 'dancehalldatabase.com',
16 'drehzahl.tv',
17 'educhannel.hs-gesundheit.de',
18 'emedia.ls.haw-hamburg.de',
19 'globale-evolution.net',
20 'hohu.tv',
21 'htvideos.hightechhigh.org',
22 'k210039.vimp.mivitec.net',
23 'media.cmslegal.com',
24 'media.hs-furtwangen.de',
25 'media.hwr-berlin.de',
26 'mediathek.dkfz.de',
27 'mediathek.htw-berlin.de',
28 'mediathek.polizei-bw.de',
29 'medien.hs-merseburg.de',
30 'mportal.europa-uni.de',
31 'pacific.demo.vimp.com',
32 'slctv.com',
33 'streaming.prairiesouth.ca',
34 'tube.isbonline.cn',
35 'univideo.uni-kassel.de',
36 'ursula2.genetics.emory.edu',
37 'ursulablicklevideoarchiv.com',
38 'v.agrarumweltpaedagogik.at',
39 'video.eplay-tv.de',
40 'video.fh-dortmund.de',
41 'video.hs-offenburg.de',
42 'video.hs-pforzheim.de',
43 'video.hspv.nrw.de',
44 'video.irtshdf.fr',
45 'video.pareygo.de',
46 'video.tu-freiberg.de',
47 'videocampus.sachsen.de',
48 'videoportal.uni-freiburg.de',
49 'videoportal.vm.uni-freiburg.de',
50 'videos.duoc.cl',
51 'videos.uni-paderborn.de',
52 'vimp-bemus.udk-berlin.de',
53 'vimp.aekwl.de',
54 'vimp.hs-mittweida.de',
55 'vimp.oth-regensburg.de',
56 'vimp.ph-heidelberg.de',
57 'vimp.sma-events.com',
58 'vimp.weka-fachmedien.de',
59 'webtv.univ-montp3.fr',
60 'www.b-tu.de/media',
61 'www.bergauf.tv',
62 'www.bigcitytv.de',
63 'www.cad-videos.de',
64 'www.drehzahl.tv',
65 'www.fh-bielefeld.de/medienportal',
66 'www.hohu.tv',
67 'www.orvovideo.com',
68 'www.rwe.tv',
69 'www.salzi.tv',
70 'www.wenglor-media.com',
71 'www2.univ-sba.dz',
73 _VALID_URL = r'''(?x)https?://(?P<host>{})/(?:
74 m/(?P<tmp_id>[0-9a-f]+)|
75 (?:category/)?video/(?P<display_id>[\w-]+)/(?P<id>[0-9a-f]{{32}})|
76 media/embed.*(?:\?|&)key=(?P<embed_id>[0-9a-f]{{32}}&?)
77 )'''.format('|'.join(map(re.escape, _INSTANCES)))
79 _TESTS = [
81 'url': 'https://videocampus.sachsen.de/m/e0d6c8ce6e394c188f1342f1ab7c50ed6fc4490b808699801def5cb2e46d76ca7367f622a9f516c542ffb805b24d6b643bd7c81f385acaac4c59081b87a2767b',
82 'info_dict': {
83 'id': 'e6b9349905c1628631f175712250f2a1',
84 'title': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
85 'description': 'Konstruktiver Entwicklungsprozess Vorlesung 7',
86 'thumbnail': 'https://videocampus.sachsen.de/cache/1a985379ad3aecba8097a6902c7daa4e.jpg',
87 'ext': 'mp4',
91 'url': 'https://videocampus.sachsen.de/video/Was-ist-selbstgesteuertes-Lernen/fc99c527e4205b121cb7c74433469262',
92 'info_dict': {
93 'id': 'fc99c527e4205b121cb7c74433469262',
94 'title': 'Was ist selbstgesteuertes Lernen?',
95 'description': 'md5:196aa3b0509a526db62f84679522a2f5',
96 'thumbnail': 'https://videocampus.sachsen.de/cache/6f4a85096ba24cb398e6ce54446b57ae.jpg',
97 'display_id': 'Was-ist-selbstgesteuertes-Lernen',
98 'ext': 'mp4',
102 'url': 'https://videocampus.sachsen.de/category/video/Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht/09d4ed029002eb1bdda610f1103dd54c/100',
103 'info_dict': {
104 'id': '09d4ed029002eb1bdda610f1103dd54c',
105 'title': 'Tutorial zur Nutzung von Adobe Connect aus Veranstalter-Sicht',
106 'description': 'md5:3d379ca3cc17b9da6784d7f58cca4d58',
107 'thumbnail': 'https://videocampus.sachsen.de/cache/2452498fe8c2d5a7dc79a05d30f407b6.jpg',
108 'display_id': 'Tutorial-zur-Nutzung-von-Adobe-Connect-aus-Veranstalter-Sicht',
109 'ext': 'mp4',
113 'url': 'https://www2.univ-sba.dz/video/Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122/0183356e41af7bfb83d7667b20d9b6a3',
114 'info_dict': {
115 'url': 'https://www2.univ-sba.dz/getMedium/0183356e41af7bfb83d7667b20d9b6a3.mp4',
116 'id': '0183356e41af7bfb83d7667b20d9b6a3',
117 'title': 'Présentation de la Faculté de droit et des sciences politiques - Journée portes ouvertes 2021/22',
118 'description': 'md5:508958bd93e0ca002ac731d94182a54f',
119 'thumbnail': 'https://www2.univ-sba.dz/cache/4d5d4a0b4189271a8cc6cb5328e14769.jpg',
120 'display_id': 'Presentation-de-la-Faculte-de-droit-et-des-sciences-politiques-Journee-portes-ouvertes-202122',
121 'ext': 'mp4',
125 'url': 'https://vimp.weka-fachmedien.de/video/Preisverleihung-Produkte-des-Jahres-2022/c8816f1cc942c12b6cce57c835cffd7c',
126 'info_dict': {
127 'id': 'c8816f1cc942c12b6cce57c835cffd7c',
128 'title': 'Preisverleihung »Produkte des Jahres 2022«',
129 'description': 'md5:60c347568ca89aa25b772c4ea564ebd3',
130 'thumbnail': 'https://vimp.weka-fachmedien.de/cache/da9f3090e9227b25beacf67ccf94de14.png',
131 'display_id': 'Preisverleihung-Produkte-des-Jahres-2022',
132 'ext': 'mp4',
136 'url': 'https://videocampus.sachsen.de/media/embed?key=fc99c527e4205b121cb7c74433469262',
137 'info_dict': {
138 'id': 'fc99c527e4205b121cb7c74433469262',
139 'title': 'Was ist selbstgesteuertes Lernen?',
140 'ext': 'mp4',
145 def _real_extract(self, url):
146 host, video_id, tmp_id, display_id, embed_id = self._match_valid_url(url).group(
147 'host', 'id', 'tmp_id', 'display_id', 'embed_id')
148 webpage = self._download_webpage(url, video_id or tmp_id, fatal=False) or ''
150 if not video_id:
151 video_id = embed_id or self._html_search_regex(
152 rf'src="https?://{host}/media/embed.*(?:\?|&)key=([0-9a-f]+)&?',
153 webpage, 'video_id')
155 if not (display_id or tmp_id):
156 # Title, description from embedded page's meta wouldn't be correct
157 title = self._html_search_regex(r'<video-js[^>]* data-piwik-title="([^"<]+)"', webpage, 'title', fatal=False)
158 description = None
159 thumbnail = None
160 else:
161 title = self._html_search_meta(('og:title', 'twitter:title', 'title'), webpage, fatal=False)
162 description = self._html_search_meta(
163 ('og:description', 'twitter:description', 'description'), webpage, fatal=False)
164 thumbnail = self._html_search_meta(('og:image', 'twitter:image'), webpage, fatal=False)
166 formats, subtitles = [], {}
167 try:
168 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
169 f'https://{host}/media/hlsMedium/key/{video_id}/format/auto/ext/mp4/learning/0/path/m3u8',
170 video_id, 'mp4', m3u8_id='hls', fatal=True)
171 except ExtractorError as e:
172 if not isinstance(e.cause, HTTPError) or e.cause.status not in (404, 500):
173 raise
175 formats.append({'url': f'https://{host}/getMedium/{video_id}.mp4'})
177 return {
178 'id': video_id,
179 'title': title,
180 'description': description,
181 'thumbnail': thumbnail,
182 'display_id': display_id,
183 'formats': formats,
184 'subtitles': subtitles,
188 class ViMPPlaylistIE(InfoExtractor):
189 IE_NAME = 'ViMP:Playlist'
190 _VALID_URL = r'''(?x)(?P<host>https?://(?:{}))/(?:
191 album/view/aid/(?P<album_id>[0-9]+)|
192 (?P<mode>category|channel)/(?P<name>[\w-]+)/(?P<id>[0-9]+)
193 )'''.format('|'.join(map(re.escape, VideocampusSachsenIE._INSTANCES)))
195 _TESTS = [{
196 'url': 'https://vimp.oth-regensburg.de/channel/Designtheorie-1-SoSe-2020/3',
197 'info_dict': {
198 'id': 'channel-3',
199 'title': 'Designtheorie 1 SoSe 2020 :: Channels :: ViMP OTH Regensburg',
201 'playlist_mincount': 9,
202 }, {
203 'url': 'https://www.fh-bielefeld.de/medienportal/album/view/aid/208',
204 'info_dict': {
205 'id': 'album-208',
206 'title': 'KG Praktikum ABT/MEC :: Playlists :: FH-Medienportal',
208 'playlist_mincount': 4,
209 }, {
210 'url': 'https://videocampus.sachsen.de/category/online-tutorials-onyx/91',
211 'info_dict': {
212 'id': 'category-91',
213 'title': 'Online-Seminare ONYX - BPS - Bildungseinrichtungen - VCS',
215 'playlist_mincount': 7,
217 _PAGE_SIZE = 10
219 def _fetch_page(self, host, url_part, playlist_id, data, page):
220 webpage = self._download_webpage(
221 f'{host}/media/ajax/component/boxList/{url_part}', playlist_id,
222 query={'page': page, 'page_only': 1}, data=urlencode_postdata(data))
223 urls = re.findall(r'"([^"]+/video/[^"]+)"', webpage)
225 for url in urls:
226 yield self.url_result(host + url, VideocampusSachsenIE)
228 def _real_extract(self, url):
229 host, album_id, mode, name, playlist_id = self._match_valid_url(url).group(
230 'host', 'album_id', 'mode', 'name', 'id')
232 webpage = self._download_webpage(url, album_id or playlist_id, fatal=False) or ''
233 title = (self._html_search_meta('title', webpage, fatal=False)
234 or self._html_extract_title(webpage))
236 url_part = (f'aid/{album_id}' if album_id
237 else f'category/{name}/category_id/{playlist_id}' if mode == 'category'
238 else f'title/{name}/channel/{playlist_id}')
240 mode = mode or 'album'
241 data = {
242 'vars[mode]': mode,
243 f'vars[{mode}]': album_id or playlist_id,
244 'vars[context]': '4' if album_id else '1' if mode == 'category' else '3',
245 'vars[context_id]': album_id or playlist_id,
246 'vars[layout]': 'thumb',
247 'vars[per_page][thumb]': str(self._PAGE_SIZE),
250 return self.playlist_result(
251 OnDemandPagedList(functools.partial(
252 self._fetch_page, host, url_part, album_id or playlist_id, data), self._PAGE_SIZE),
253 playlist_title=title, id=f'{mode}-{album_id or playlist_id}')