3 from .common
import InfoExtractor
16 class PlatziBaseIE(InfoExtractor
):
17 _LOGIN_URL
= 'https://platzi.com/login/'
18 _NETRC_MACHINE
= 'platzi'
20 def _perform_login(self
, username
, password
):
21 login_page
= self
._download
_webpage
(
22 self
._LOGIN
_URL
, None, 'Downloading login page')
24 login_form
= self
._hidden
_inputs
(login_page
)
31 urlh
= self
._request
_webpage
(
32 self
._LOGIN
_URL
, None, 'Logging in',
33 data
=urlencode_postdata(login_form
),
34 headers
={'Referer': self
._LOGIN
_URL
})
37 if 'platzi.com/login' not in urlh
.url
:
40 login_error
= self
._webpage
_read
_content
(
41 urlh
, self
._LOGIN
_URL
, None, 'Downloading login error page')
43 login
= self
._parse
_json
(
45 r
'login\s*=\s*({.+?})(?:\s*;|\s*</script)', login_error
, 'login'),
48 for kind
in ('error', 'password', 'nonFields'):
49 error
= str_or_none(login
.get(f
'{kind}Error'))
52 f
'Unable to login: {error}', expected
=True)
53 raise ExtractorError('Unable to log in')
56 class PlatziIE(PlatziBaseIE
):
60 platzi\.com/clases| # es version
61 courses\.platzi\.com/classes # en version
62 )/[^/]+/(?P<id>\d+)-[^/?\#&]+
66 'url': 'https://platzi.com/clases/1311-next-js/12074-creando-nuestra-primera-pagina/',
67 'md5': '8f56448241005b561c10f11a595b37e3',
71 'title': 'Creando nuestra primera página',
72 'description': 'md5:4c866e45034fc76412fbf6e60ae008bc',
75 'skip': 'Requires platzi account credentials',
77 'url': 'https://courses.platzi.com/classes/1367-communication-codestream/13430-background/',
81 'title': 'Background',
82 'description': 'md5:49c83c09404b15e6e71defaf87f6b305',
85 'skip': 'Requires platzi account credentials',
87 'skip_download': True,
91 def _real_extract(self
, url
):
92 lecture_id
= self
._match
_id
(url
)
94 webpage
= self
._download
_webpage
(url
, lecture_id
)
96 data
= self
._parse
_json
(
98 # client_data may contain "};" so that we have to try more
100 (r
'client_data\s*=\s*({.+?})\s*;\s*\n',
101 r
'client_data\s*=\s*({.+?})\s*;'),
102 webpage
, 'client data'),
105 material
= data
['initialState']['material']
106 desc
= material
['description']
107 title
= desc
['title']
110 for server_id
, server
in material
['videos'].items():
111 if not isinstance(server
, dict):
113 for format_id
in ('hls', 'dash'):
114 format_url
= url_or_none(server
.get(format_id
))
117 if format_id
== 'hls':
118 formats
.extend(self
._extract
_m
3u8_formats
(
119 format_url
, lecture_id
, 'mp4',
120 entry_protocol
='m3u8_native', m3u8_id
=format_id
,
121 note
=f
'Downloading {server_id} m3u8 information',
123 elif format_id
== 'dash':
124 formats
.extend(self
._extract
_mpd
_formats
(
125 format_url
, lecture_id
, mpd_id
=format_id
,
126 note
=f
'Downloading {server_id} MPD manifest',
129 content
= str_or_none(desc
.get('content'))
130 description
= (clean_html(base64
.b64decode(content
).decode('utf-8'))
131 if content
else None)
132 duration
= int_or_none(material
.get('duration'), invscale
=60)
137 'description': description
,
138 'duration': duration
,
143 class PlatziCourseIE(PlatziBaseIE
):
144 _VALID_URL
= r
'''(?x)
147 platzi\.com/clases| # es version
148 courses\.platzi\.com/classes # en version
152 'url': 'https://platzi.com/clases/next-js/',
155 'title': 'Curso de Next.js',
157 'playlist_count': 22,
159 'url': 'https://courses.platzi.com/classes/communication-codestream/',
162 'title': 'Codestream Course',
164 'playlist_count': 14,
168 def suitable(cls
, url
):
169 return False if PlatziIE
.suitable(url
) else super().suitable(url
)
171 def _real_extract(self
, url
):
172 course_name
= self
._match
_id
(url
)
174 webpage
= self
._download
_webpage
(url
, course_name
)
176 props
= self
._parse
_json
(
177 self
._search
_regex
(r
'data\s*=\s*({.+?})\s*;', webpage
, 'data'),
178 course_name
)['initialProps']
181 for chapter_num
, chapter
in enumerate(props
['concepts'], 1):
182 if not isinstance(chapter
, dict):
184 materials
= chapter
.get('materials')
185 if not materials
or not isinstance(materials
, list):
187 chapter_title
= chapter
.get('title')
188 chapter_id
= str_or_none(chapter
.get('id'))
189 for material
in materials
:
190 if not isinstance(material
, dict):
192 if material
.get('material_type') != 'video':
194 video_url
= urljoin(url
, material
.get('url'))
198 '_type': 'url_transparent',
200 'title': str_or_none(material
.get('name')),
201 'id': str_or_none(material
.get('id')),
202 'ie_key': PlatziIE
.ie_key(),
203 'chapter': chapter_title
,
204 'chapter_number': chapter_num
,
205 'chapter_id': chapter_id
,
208 course_id
= str(try_get(props
, lambda x
: x
['course']['id']))
209 course_title
= try_get(props
, lambda x
: x
['course']['name'], str)
211 return self
.playlist_result(entries
, course_id
, course_title
)