[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / frontendmasters.py
blobb5176aa9b646cb3c4ed0848aed9d8ad205ba3cd6
1 import re
2 import urllib.parse
4 from .common import InfoExtractor
5 from ..utils import (
6 ExtractorError,
7 parse_duration,
8 url_or_none,
9 urlencode_postdata,
13 class FrontendMastersBaseIE(InfoExtractor):
14 _API_BASE = 'https://api.frontendmasters.com/v1/kabuki'
15 _LOGIN_URL = 'https://frontendmasters.com/login/'
17 _NETRC_MACHINE = 'frontendmasters'
19 _QUALITIES = {
20 'low': {'width': 480, 'height': 360},
21 'mid': {'width': 1280, 'height': 720},
22 'high': {'width': 1920, 'height': 1080},
25 def _perform_login(self, username, password):
26 login_page = self._download_webpage(
27 self._LOGIN_URL, None, 'Downloading login page')
29 login_form = self._hidden_inputs(login_page)
31 login_form.update({
32 'username': username,
33 'password': password,
36 post_url = self._search_regex(
37 r'<form[^>]+action=(["\'])(?P<url>.+?)\1', login_page,
38 'post_url', default=self._LOGIN_URL, group='url')
40 if not post_url.startswith('http'):
41 post_url = urllib.parse.urljoin(self._LOGIN_URL, post_url)
43 response = self._download_webpage(
44 post_url, None, 'Logging in', data=urlencode_postdata(login_form),
45 headers={'Content-Type': 'application/x-www-form-urlencoded'})
47 # Successful login
48 if any(p in response for p in (
49 'wp-login.php?action=logout', '>Logout')):
50 return
52 error = self._html_search_regex(
53 r'class=(["\'])(?:(?!\1).)*\bMessageAlert\b(?:(?!\1).)*\1[^>]*>(?P<error>[^<]+)<',
54 response, 'error message', default=None, group='error')
55 if error:
56 raise ExtractorError(f'Unable to login: {error}', expected=True)
57 raise ExtractorError('Unable to log in')
60 class FrontendMastersPageBaseIE(FrontendMastersBaseIE):
61 def _download_course(self, course_name, url):
62 return self._download_json(
63 f'{self._API_BASE}/courses/{course_name}', course_name,
64 'Downloading course JSON', headers={'Referer': url})
66 @staticmethod
67 def _extract_chapters(course):
68 chapters = []
69 lesson_elements = course.get('lessonElements')
70 if isinstance(lesson_elements, list):
71 chapters = [url_or_none(e) for e in lesson_elements if url_or_none(e)]
72 return chapters
74 @staticmethod
75 def _extract_lesson(chapters, lesson_id, lesson):
76 title = lesson.get('title') or lesson_id
77 display_id = lesson.get('slug')
78 description = lesson.get('description')
79 thumbnail = lesson.get('thumbnail')
81 chapter_number = None
82 index = lesson.get('index')
83 element_index = lesson.get('elementIndex')
84 if (isinstance(index, int) and isinstance(element_index, int)
85 and index < element_index):
86 chapter_number = element_index - index
87 chapter = (chapters[chapter_number - 1]
88 if chapter_number - 1 < len(chapters) else None)
90 duration = None
91 timestamp = lesson.get('timestamp')
92 if isinstance(timestamp, str):
93 mobj = re.search(
94 r'(?P<start>\d{1,2}:\d{1,2}:\d{1,2})\s*-(?P<end>\s*\d{1,2}:\d{1,2}:\d{1,2})',
95 timestamp)
96 if mobj:
97 duration = parse_duration(mobj.group('end')) - parse_duration(
98 mobj.group('start'))
100 return {
101 '_type': 'url_transparent',
102 'url': f'frontendmasters:{lesson_id}',
103 'ie_key': FrontendMastersIE.ie_key(),
104 'id': lesson_id,
105 'display_id': display_id,
106 'title': title,
107 'description': description,
108 'thumbnail': thumbnail,
109 'duration': duration,
110 'chapter': chapter,
111 'chapter_number': chapter_number,
115 class FrontendMastersIE(FrontendMastersBaseIE):
116 _VALID_URL = r'(?:frontendmasters:|https?://api\.frontendmasters\.com/v\d+/kabuki/video/)(?P<id>[^/]+)'
117 _TESTS = [{
118 'url': 'https://api.frontendmasters.com/v1/kabuki/video/a2qogef6ba',
119 'md5': '7f161159710d6b7016a4f4af6fcb05e2',
120 'info_dict': {
121 'id': 'a2qogef6ba',
122 'ext': 'mp4',
123 'title': 'a2qogef6ba',
125 'skip': 'Requires FrontendMasters account credentials',
126 }, {
127 'url': 'frontendmasters:a2qogef6ba',
128 'only_matching': True,
131 def _real_extract(self, url):
132 lesson_id = self._match_id(url)
134 source_url = f'{self._API_BASE}/video/{lesson_id}/source'
136 formats = []
137 for ext in ('webm', 'mp4'):
138 for quality in ('low', 'mid', 'high'):
139 resolution = self._QUALITIES[quality].copy()
140 format_id = f'{ext}-{quality}'
141 format_url = self._download_json(
142 source_url, lesson_id,
143 f'Downloading {format_id} source JSON', query={
144 'f': ext,
145 'r': resolution['height'],
146 }, headers={
147 'Referer': url,
148 }, fatal=False)['url']
150 if not format_url:
151 continue
153 f = resolution.copy()
154 f.update({
155 'url': format_url,
156 'ext': ext,
157 'format_id': format_id,
159 formats.append(f)
161 subtitles = {
162 'en': [{
163 'url': f'{self._API_BASE}/transcripts/{lesson_id}.vtt',
167 return {
168 'id': lesson_id,
169 'title': lesson_id,
170 'formats': formats,
171 'subtitles': subtitles,
175 class FrontendMastersLessonIE(FrontendMastersPageBaseIE):
176 _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<course_name>[^/]+)/(?P<lesson_name>[^/]+)'
177 _TEST = {
178 'url': 'https://frontendmasters.com/courses/web-development/tools',
179 'info_dict': {
180 'id': 'a2qogef6ba',
181 'display_id': 'tools',
182 'ext': 'mp4',
183 'title': 'Tools',
184 'description': 'md5:82c1ea6472e88ed5acd1829fe992e4f7',
185 'thumbnail': r're:^https?://.*\.jpg$',
186 'chapter': 'Introduction',
187 'chapter_number': 1,
189 'params': {
190 'skip_download': True,
192 'skip': 'Requires FrontendMasters account credentials',
195 def _real_extract(self, url):
196 mobj = self._match_valid_url(url)
197 course_name, lesson_name = mobj.group('course_name', 'lesson_name')
199 course = self._download_course(course_name, url)
201 lesson_id, lesson = next(
202 (video_id, data)
203 for video_id, data in course['lessonData'].items()
204 if data.get('slug') == lesson_name)
206 chapters = self._extract_chapters(course)
207 return self._extract_lesson(chapters, lesson_id, lesson)
210 class FrontendMastersCourseIE(FrontendMastersPageBaseIE):
211 _VALID_URL = r'https?://(?:www\.)?frontendmasters\.com/courses/(?P<id>[^/]+)'
212 _TEST = {
213 'url': 'https://frontendmasters.com/courses/web-development/',
214 'info_dict': {
215 'id': 'web-development',
216 'title': 'Introduction to Web Development',
217 'description': 'md5:9317e6e842098bf725d62360e52d49a6',
219 'playlist_count': 81,
220 'skip': 'Requires FrontendMasters account credentials',
223 @classmethod
224 def suitable(cls, url):
225 return False if FrontendMastersLessonIE.suitable(url) else super(
226 FrontendMastersBaseIE, cls).suitable(url)
228 def _real_extract(self, url):
229 course_name = self._match_id(url)
231 course = self._download_course(course_name, url)
233 chapters = self._extract_chapters(course)
235 lessons = sorted(
236 course['lessonData'].values(), key=lambda data: data['index'])
238 entries = []
239 for lesson in lessons:
240 lesson_name = lesson.get('slug')
241 lesson_id = lesson.get('hash') or lesson.get('statsId')
242 if not lesson_id or not lesson_name:
243 continue
244 entries.append(self._extract_lesson(chapters, lesson_id, lesson))
246 title = course.get('title')
247 description = course.get('description')
249 return self.playlist_result(entries, course_name, title, description)