[ie/dplay] Fix extractors (#10471)
[yt-dlp3.git] / yt_dlp / extractor / rbgtum.py
blob5bb46553721eea18826ab8512ebfca6d19129ba8
1 import re
3 from .common import InfoExtractor
4 from ..utils import ExtractorError, parse_qs, remove_start, traverse_obj
7 class RbgTumIE(InfoExtractor):
8 _VALID_URL = r'https?://(?:live\.rbg\.tum\.de|tum\.live)/w/(?P<id>[^?#]+)'
9 _TESTS = [{
10 # Combined view
11 'url': 'https://live.rbg.tum.de/w/cpp/22128',
12 'md5': '53a5e7b3e07128e33bbf36687fe1c08f',
13 'info_dict': {
14 'id': 'cpp/22128',
15 'ext': 'mp4',
16 'title': 'Lecture: October 18. 2022',
17 'series': 'Concepts of C++ programming (IN2377)',
19 }, {
20 # Presentation only
21 'url': 'https://live.rbg.tum.de/w/I2DL/12349/PRES',
22 'md5': '36c584272179f3e56b0db5d880639cba',
23 'info_dict': {
24 'id': 'I2DL/12349/PRES',
25 'ext': 'mp4',
26 'title': 'Lecture 3: Introduction to Neural Networks',
27 'series': 'Introduction to Deep Learning (IN2346)',
29 }, {
30 # Camera only
31 'url': 'https://live.rbg.tum.de/w/fvv-info/16130/CAM',
32 'md5': 'e04189d92ff2f56aedf5cede65d37aad',
33 'info_dict': {
34 'id': 'fvv-info/16130/CAM',
35 'ext': 'mp4',
36 'title': 'Fachschaftsvollversammlung',
37 'series': 'Fachschaftsvollversammlung Informatik',
39 }, {
40 'url': 'https://tum.live/w/linalginfo/27102',
41 'only_matching': True,
44 def _real_extract(self, url):
45 video_id = self._match_id(url)
46 webpage = self._download_webpage(url, video_id)
48 m3u8 = self._html_search_regex(r'"(https://[^"]+\.m3u8[^"]*)', webpage, 'm3u8')
49 lecture_title = self._html_search_regex(r'<h1[^>]*>([^<]+)</h1>', webpage, 'title', fatal=False)
50 lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
52 formats = self._extract_m3u8_formats(m3u8, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id='hls')
54 return {
55 'id': video_id,
56 'title': lecture_title,
57 'series': lecture_series_title,
58 'formats': formats,
62 class RbgTumCourseIE(InfoExtractor):
63 _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/old/course/(?P<id>(?P<year>\d+)/(?P<term>\w+)/(?P<slug>[^/?#]+))'
64 _TESTS = [{
65 'url': 'https://live.rbg.tum.de/old/course/2022/S/fpv',
66 'info_dict': {
67 'title': 'Funktionale Programmierung und Verifikation (IN0003)',
68 'id': '2022/S/fpv',
70 'params': {
71 'noplaylist': False,
73 'playlist_count': 13,
74 }, {
75 'url': 'https://live.rbg.tum.de/old/course/2022/W/set',
76 'info_dict': {
77 'title': 'SET FSMPIC',
78 'id': '2022/W/set',
80 'params': {
81 'noplaylist': False,
83 'playlist_count': 6,
84 }, {
85 'url': 'https://tum.live/old/course/2023/S/linalginfo',
86 'only_matching': True,
89 def _real_extract(self, url):
90 course_id, hostname, year, term, slug = self._match_valid_url(url).group('id', 'hostname', 'year', 'term', 'slug')
91 meta = self._download_json(
92 f'https://{hostname}/api/courses/{slug}/', course_id, fatal=False,
93 query={'year': year, 'term': term}) or {}
94 lecture_series_title = meta.get('Name')
95 lectures = [self.url_result(f'https://{hostname}/w/{slug}/{stream_id}', RbgTumIE)
96 for stream_id in traverse_obj(meta, ('Streams', ..., 'ID'))]
98 if not lectures:
99 webpage = self._download_webpage(url, course_id)
100 lecture_series_title = remove_start(self._html_extract_title(webpage), 'TUM-Live | ')
101 lectures = [self.url_result(f'https://{hostname}{lecture_path}', RbgTumIE)
102 for lecture_path in re.findall(r'href="(/w/[^/"]+/[^/"]+)"', webpage)]
104 return self.playlist_result(lectures, course_id, lecture_series_title)
107 class RbgTumNewCourseIE(InfoExtractor):
108 _VALID_URL = r'https?://(?P<hostname>(?:live\.rbg\.tum\.de|tum\.live))/\?'
109 _TESTS = [{
110 'url': 'https://live.rbg.tum.de/?year=2022&term=S&slug=fpv&view=3',
111 'info_dict': {
112 'title': 'Funktionale Programmierung und Verifikation (IN0003)',
113 'id': '2022/S/fpv',
115 'params': {
116 'noplaylist': False,
118 'playlist_count': 13,
119 }, {
120 'url': 'https://live.rbg.tum.de/?year=2022&term=W&slug=set&view=3',
121 'info_dict': {
122 'title': 'SET FSMPIC',
123 'id': '2022/W/set',
125 'params': {
126 'noplaylist': False,
128 'playlist_count': 6,
129 }, {
130 'url': 'https://tum.live/?year=2023&term=S&slug=linalginfo&view=3',
131 'only_matching': True,
134 def _real_extract(self, url):
135 query = parse_qs(url)
136 errors = [key for key in ('year', 'term', 'slug') if not query.get(key)]
137 if errors:
138 raise ExtractorError(f'Input URL is missing query parameters: {", ".join(errors)}')
139 year, term, slug = query['year'][0], query['term'][0], query['slug'][0]
140 hostname = self._match_valid_url(url).group('hostname')
142 return self.url_result(f'https://{hostname}/old/course/{year}/{term}/{slug}', RbgTumCourseIE)