3 from .common
import InfoExtractor
18 class TeamTreeHouseIE(InfoExtractor
):
19 _VALID_URL
= r
'https?://(?:www\.)?teamtreehouse\.com/library/(?P<id>[^/]+)'
22 'url': 'https://teamtreehouse.com/library/introduction-to-user-authentication-in-php',
24 'id': 'introduction-to-user-authentication-in-php',
25 'title': 'Introduction to User Authentication in PHP',
26 'description': 'md5:405d7b4287a159b27ddf30ca72b5b053',
28 'playlist_mincount': 24,
31 'url': 'https://teamtreehouse.com/library/deploying-a-react-app',
33 'id': 'deploying-a-react-app',
34 'title': 'Deploying a React App',
35 'description': 'md5:10a82e3ddff18c14ac13581c9b8e5921',
37 'playlist_mincount': 4,
40 'url': 'https://teamtreehouse.com/library/application-overview-2',
42 'id': 'application-overview-2',
44 'title': 'Application Overview',
45 'description': 'md5:4b0a234385c27140a4378de5f1e15127',
47 'expected_warnings': ['This is just a preview'],
49 _NETRC_MACHINE
= 'teamtreehouse'
51 def _perform_login(self
, username
, password
):
53 signin_page
= self
._download
_webpage
(
54 'https://teamtreehouse.com/signin',
55 None, 'Downloading signin page')
56 data
= self
._form
_hidden
_inputs
('new_user_session', signin_page
)
58 'user_session[email]': username
,
59 'user_session[password]': password
,
61 error_message
= get_element_by_class('error-message', self
._download
_webpage
(
62 'https://teamtreehouse.com/person_session',
63 None, 'Logging in', data
=urlencode_postdata(data
)))
65 raise ExtractorError(clean_html(error_message
), expected
=True)
67 def _real_extract(self
, url
):
68 display_id
= self
._match
_id
(url
)
69 webpage
= self
._download
_webpage
(url
, display_id
)
70 title
= self
._html
_search
_meta
(['og:title', 'twitter:title'], webpage
)
71 description
= self
._html
_search
_meta
(
72 ['description', 'og:description', 'twitter:description'], webpage
)
73 entries
= self
._parse
_html
5_media
_entries
(url
, webpage
, display_id
)
77 for subtitles
in info
.get('subtitles', {}).values():
78 for subtitle
in subtitles
:
79 subtitle
['ext'] = determine_ext(subtitle
['url'], 'srt')
81 is_preview
= 'data-preview="true"' in webpage
84 'This is just a preview. You need to be signed in with a Basic account to download the entire video.', display_id
)
87 duration
= float_or_none(self
._search
_regex
(
88 r
'data-duration="(\d+)"', webpage
, 'duration'), 1000)
90 duration
= parse_duration(get_element_by_id(
91 'video-duration', webpage
))
96 'description': description
,
101 def extract_urls(html
, extract_info
=None):
102 for path
in re
.findall(r
'<a[^>]+href="([^"]+)"', html
):
103 page_url
= urljoin(url
, path
)
105 '_type': 'url_transparent',
106 'id': self
._match
_id
(page_url
),
108 'id_key': self
.ie_key(),
111 entry
.update(extract_info
)
112 entries
.append(entry
)
114 workshop_videos
= self
._search
_regex
(
115 r
'(?s)<ul[^>]+id="workshop-videos"[^>]*>(.+?)</ul>',
116 webpage
, 'workshop videos', default
=None)
118 extract_urls(workshop_videos
)
120 stages_path
= self
._search
_regex
(
121 r
'(?s)<div[^>]+id="syllabus-stages"[^>]+data-url="([^"]+)"',
122 webpage
, 'stages path')
124 stages_page
= self
._download
_webpage
(
125 urljoin(url
, stages_path
), display_id
, 'Downloading stages page')
126 for chapter_number
, (chapter
, steps_list
) in enumerate(re
.findall(r
'(?s)<h2[^>]*>\s*(.+?)\s*</h2>.+?<ul[^>]*>(.+?)</ul>', stages_page
), 1):
127 extract_urls(steps_list
, {
129 'chapter_number': chapter_number
,
131 title
= remove_end(title
, ' Course')
133 return self
.playlist_result(
134 entries
, display_id
, title
, description
)