[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / cybrary.py
blob59c8ab473d7ecfcbb5c40271c7ca9d56f4c9e409
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 smuggle_url,
5 str_or_none,
6 traverse_obj,
7 urlencode_postdata,
11 class CybraryBaseIE(InfoExtractor):
12 _API_KEY = 'AIzaSyCX9ru6j70PX2My1Eq6Q1zoMAhuTdXlzSw'
13 _ENDPOINTS = {
14 'course': 'https://app.cybrary.it/courses/api/catalog/browse/course/{}',
15 'course_enrollment': 'https://app.cybrary.it/courses/api/catalog/{}/enrollment',
16 'enrollment': 'https://app.cybrary.it/courses/api/enrollment/{}',
17 'launch': 'https://app.cybrary.it/courses/api/catalog/{}/launch',
18 'vimeo_oembed': 'https://vimeo.com/api/oembed.json?url=https://vimeo.com/{}',
20 _NETRC_MACHINE = 'cybrary'
21 _TOKEN = None
23 def _perform_login(self, username, password):
24 CybraryBaseIE._TOKEN = self._download_json(
25 f'https://identitytoolkit.googleapis.com/v1/accounts:signInWithPassword?key={self._API_KEY}',
26 None, data=urlencode_postdata({'email': username, 'password': password, 'returnSecureToken': True}),
27 note='Logging in')['idToken']
29 def _real_initialize(self):
30 if not self._TOKEN:
31 self.raise_login_required(method='password')
33 def _call_api(self, endpoint, item_id):
34 return self._download_json(
35 self._ENDPOINTS[endpoint].format(item_id), item_id,
36 note=f'Downloading {endpoint} JSON metadata',
37 headers={'Authorization': f'Bearer {self._TOKEN}'})
39 def _get_vimeo_id(self, activity_id):
40 launch_api = self._call_api('launch', activity_id)
42 if launch_api.get('url'):
43 return self._search_regex(r'https?://player\.vimeo\.com/video/(?P<vimeo_id>[0-9]+)', launch_api['url'], 'vimeo_id')
44 return traverse_obj(launch_api, ('vendor_data', 'content', ..., 'videoId'), get_all=False)
47 class CybraryIE(CybraryBaseIE):
48 _VALID_URL = r'https?://app\.cybrary\.it/immersive/(?P<enrollment>[0-9]+)/activity/(?P<id>[0-9]+)'
49 _TESTS = [{
50 'url': 'https://app.cybrary.it/immersive/12487950/activity/63102',
51 'md5': '9ae12d37e555cb2ed554223a71a701d0',
52 'info_dict': {
53 'id': '646609770',
54 'ext': 'mp4',
55 'title': 'Getting Started',
56 'thumbnail': 'https://i.vimeocdn.com/video/1301817996-76a268f0c56cff18a5cecbbdc44131eb9dda0c80eb0b3a036_1280',
57 'series_id': '63111',
58 'uploader_url': 'https://vimeo.com/user30867300',
59 'duration': 88,
60 'uploader_id': 'user30867300',
61 'series': 'Cybrary Orientation',
62 'uploader': 'Cybrary',
63 'chapter': 'Cybrary Orientation Series',
64 'chapter_id': '63110',
66 'expected_warnings': ['No authenticators for vimeo'],
67 }, {
68 'url': 'https://app.cybrary.it/immersive/12747143/activity/52686',
69 'md5': '62f26547dccc59c44363e2a13d4ad08d',
70 'info_dict': {
71 'id': '445638073',
72 'ext': 'mp4',
73 'title': 'Azure Virtual Network IP Addressing',
74 'thumbnail': 'https://i.vimeocdn.com/video/936667051-1647ace66c627d4a2382185e0dae8deb830309bfddd53f8b2367b2f91e92ed0e-d_1280',
75 'series_id': '52733',
76 'uploader_url': 'https://vimeo.com/user30867300',
77 'duration': 426,
78 'uploader_id': 'user30867300',
79 'series': 'AZ-500: Microsoft Azure Security Technologies',
80 'uploader': 'Cybrary',
81 'chapter': 'Implement Network Security',
82 'chapter_id': '52693',
84 'expected_warnings': ['No authenticators for vimeo'],
87 def _real_extract(self, url):
88 activity_id, enrollment_id = self._match_valid_url(url).group('id', 'enrollment')
89 course = self._call_api('enrollment', enrollment_id)['content']
90 activity = traverse_obj(course, ('learning_modules', ..., 'activities', lambda _, v: int(activity_id) == v['id']), get_all=False)
92 if activity.get('type') not in ['Video Activity', 'Lesson Activity']:
93 raise ExtractorError('The activity is not a video', expected=True)
95 module = next((m for m in course.get('learning_modules') or []
96 if int(activity_id) in traverse_obj(m, ('activities', ..., 'id'))), None)
98 vimeo_id = self._get_vimeo_id(activity_id)
100 return {
101 '_type': 'url_transparent',
102 'series': traverse_obj(course, ('content_description', 'title')),
103 'series_id': str_or_none(traverse_obj(course, ('content_description', 'id'))),
104 'id': vimeo_id,
105 'chapter': module.get('title'),
106 'chapter_id': str_or_none(module.get('id')),
107 'title': activity.get('title'),
108 'url': smuggle_url(f'https://player.vimeo.com/video/{vimeo_id}', {'referer': 'https://api.cybrary.it'}),
112 class CybraryCourseIE(CybraryBaseIE):
113 _VALID_URL = r'https?://app\.cybrary\.it/browse/course/(?P<id>[\w-]+)/?(?:$|[#?])'
114 _TESTS = [{
115 'url': 'https://app.cybrary.it/browse/course/az-500-microsoft-azure-security-technologies',
116 'info_dict': {
117 'id': '898',
118 'title': 'AZ-500: Microsoft Azure Security Technologies',
119 'description': 'md5:69549d379c0fc1dec92926d4e8b6fbd4',
121 'playlist_count': 59,
122 }, {
123 'url': 'https://app.cybrary.it/browse/course/cybrary-orientation',
124 'info_dict': {
125 'id': '1245',
126 'title': 'Cybrary Orientation',
127 'description': 'md5:9e69ff66b32fe78744e0ad4babe2e88e',
129 'playlist_count': 4,
132 def _real_extract(self, url):
133 course_id = self._match_id(url)
134 course = self._call_api('course', course_id)
135 enrollment_info = self._call_api('course_enrollment', course['id'])
137 entries = [self.url_result(
138 f'https://app.cybrary.it/immersive/{enrollment_info["id"]}/activity/{activity["id"]}')
139 for activity in traverse_obj(course, ('content_item', 'learning_modules', ..., 'activities', ...))]
141 return self.playlist_result(
142 entries,
143 traverse_obj(course, ('content_item', 'id'), expected_type=str_or_none),
144 course.get('title'), course.get('short_description'))