[ie/dropbox] Fix password-protected video extraction (#11636)
[yt-dlp3.git] / yt_dlp / extractor / getcourseru.py
blobb7581d77e2f59b934b969acda881cc1428f0ae61
1 import re
2 import time
3 import urllib.parse
5 from .common import InfoExtractor
6 from ..utils import ExtractorError, int_or_none, url_or_none, urlencode_postdata
7 from ..utils.traversal import traverse_obj
10 class GetCourseRuPlayerIE(InfoExtractor):
11 _VALID_URL = r'https?://player02\.getcourse\.ru/sign-player/?\?(?:[^#]+&)?json=[^#&]+'
12 _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL}[^\'"]*)']
13 _TESTS = [{
14 'url': 'http://player02.getcourse.ru/sign-player/?json=eyJ2aWRlb19oYXNoIjoiMTkwYmRmOTNmMWIyOTczNTMwOTg1M2E3YTE5ZTI0YjMiLCJ1c2VyX2lkIjozNTk1MjUxODMsInN1Yl9sb2dpbl91c2VyX2lkIjpudWxsLCJsZXNzb25faWQiOm51bGwsImlwIjoiNDYuMTQyLjE4Mi4yNDciLCJnY19ob3N0IjoiYWNhZGVteW1lbC5vbmxpbmUiLCJ0aW1lIjoxNzA1NDQ5NjQyLCJwYXlsb2FkIjoidV8zNTk1MjUxODMiLCJ1aV9sYW5ndWFnZSI6InJ1IiwiaXNfaGF2ZV9jdXN0b21fc3R5bGUiOnRydWV9&s=354ad2c993d95d5ac629e3133d6cefea&vh-static-feature=zigzag',
15 'info_dict': {
16 'id': '513573381',
17 'title': '190bdf93f1b29735309853a7a19e24b3',
18 'ext': 'mp4',
19 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
20 'duration': 1693,
22 'skip': 'JWT expired',
25 def _real_extract(self, url):
26 webpage = self._download_webpage(url, None, 'Downloading player page')
27 window_configs = self._search_json(
28 r'window\.configs\s*=', webpage, 'config', None)
29 video_id = str(window_configs['gcFileId'])
30 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
31 window_configs['masterPlaylistUrl'], video_id)
33 return {
34 **traverse_obj(window_configs, {
35 'title': ('videoHash', {str}),
36 'thumbnail': ('previewUrl', {url_or_none}),
37 'duration': ('videoDuration', {int_or_none}),
38 }),
39 'id': video_id,
40 'formats': formats,
41 'subtitles': subtitles,
45 class GetCourseRuIE(InfoExtractor):
46 _NETRC_MACHINE = 'getcourseru'
47 _DOMAINS = [
48 'academymel.online',
49 'marafon.mani-beauty.com',
50 'on.psbook.ru',
52 _BASE_URL_RE = rf'https?://(?:(?!player02\.)[^.]+\.getcourse\.(?:ru|io)|{"|".join(map(re.escape, _DOMAINS))})'
53 _VALID_URL = [
54 rf'{_BASE_URL_RE}/(?!pl/|teach/)(?P<id>[^?#]+)',
55 rf'{_BASE_URL_RE}/(?:pl/)?teach/control/lesson/view\?(?:[^#]+&)?id=(?P<id>\d+)',
57 _TESTS = [{
58 'url': 'http://academymel.online/3video_1',
59 'info_dict': {
60 'id': '3059742',
61 'display_id': '3video_1',
62 'title': 'Промоуроки Академии МЕЛ',
64 'playlist_count': 1,
65 'playlist': [{
66 'info_dict': {
67 'id': '513573381',
68 'ext': 'mp4',
69 'title': 'Промоуроки Академии МЕЛ',
70 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
71 'duration': 1693,
73 }],
74 }, {
75 'url': 'https://academymel.getcourse.ru/3video_1',
76 'info_dict': {
77 'id': '3059742',
78 'display_id': '3video_1',
79 'title': 'Промоуроки Академии МЕЛ',
81 'playlist_count': 1,
82 'playlist': [{
83 'info_dict': {
84 'id': '513573381',
85 'ext': 'mp4',
86 'title': 'Промоуроки Академии МЕЛ',
87 'thumbnail': 'https://preview-htz.kinescopecdn.net/preview/190bdf93f1b29735309853a7a19e24b3/preview.jpg?version=1702370546&host=vh-80',
88 'duration': 1693,
90 }],
91 }, {
92 'url': 'https://academymel.getcourse.ru/pl/teach/control/lesson/view?id=319141781&editMode=0',
93 'info_dict': {
94 'id': '319141781',
95 'title': '1. Разминка у стены',
97 'playlist_count': 1,
98 'playlist': [{
99 'info_dict': {
100 'id': '4919601',
101 'ext': 'mp4',
102 'title': '1. Разминка у стены',
103 'thumbnail': 'https://preview-htz.vhcdn.com/preview/5a521788e7dc25b4f70c3dff6512d90e/preview.jpg?version=1703223532&host=vh-81',
104 'duration': 704,
107 'skip': 'paid lesson',
108 }, {
109 'url': 'https://manibeauty.getcourse.ru/pl/teach/control/lesson/view?id=272499894',
110 'info_dict': {
111 'id': '272499894',
112 'title': 'Мотивация к тренировкам',
114 'playlist_count': 1,
115 'playlist': [{
116 'info_dict': {
117 'id': '447479687',
118 'ext': 'mp4',
119 'title': 'Мотивация к тренировкам',
120 'thumbnail': 'https://preview-htz.vhcdn.com/preview/70ed5b9f489dd03b4aff55bfdff71a26/preview.jpg?version=1685115787&host=vh-71',
121 'duration': 30,
124 'skip': 'paid lesson',
125 }, {
126 'url': 'https://gaismasmandalas.getcourse.io/ATLAUTSEVBUT',
127 'only_matching': True,
130 _LOGIN_URL_PATH = '/cms/system/login'
132 def _login(self, hostname, username, password):
133 if self._get_cookies(f'https://{hostname}').get('PHPSESSID5'):
134 return
135 login_url = f'https://{hostname}{self._LOGIN_URL_PATH}'
136 webpage = self._download_webpage(login_url, None)
138 self._request_webpage(
139 login_url, None, 'Logging in', 'Failed to log in',
140 data=urlencode_postdata({
141 'action': 'processXdget',
142 'xdgetId': self._html_search_regex(
143 r'<form[^>]+\bclass="[^"]*\bstate-login[^"]*"[^>]+\bdata-xdget-id="([^"]+)"',
144 webpage, 'xdgetId'),
145 'params[action]': 'login',
146 'params[url]': login_url,
147 'params[object_type]': 'cms_page',
148 'params[object_id]': -1,
149 'params[email]': username,
150 'params[password]': password,
151 'requestTime': int(time.time()),
152 'requestSimpleSign': self._html_search_regex(
153 r'window.requestSimpleSign\s*=\s*"([\da-f]+)"', webpage, 'simple sign'),
156 def _real_extract(self, url):
157 hostname = urllib.parse.urlparse(url).hostname
158 username, password = self._get_login_info(netrc_machine=hostname)
159 if username:
160 self._login(hostname, username, password)
162 display_id = self._match_id(url)
163 webpage, urlh = self._download_webpage_handle(url, display_id)
164 if self._LOGIN_URL_PATH in urlh.url:
165 raise ExtractorError(
166 f'This video is only available for registered users. {self._login_hint("any", netrc=hostname)}',
167 expected=True)
169 playlist_id = self._search_regex(
170 r'window\.(?:lessonId|gcsObjectId)\s*=\s*(\d+)', webpage, 'playlist id', default=display_id)
171 title = self._og_search_title(webpage) or self._html_extract_title(webpage)
173 return self.playlist_from_matches(
174 re.findall(GetCourseRuPlayerIE._EMBED_REGEX[0], webpage),
175 playlist_id, title, display_id=display_id, ie=GetCourseRuPlayerIE, video_kwargs={
176 'url_transparent': True,
177 'title': title,