3 from .common
import InfoExtractor
11 class StanfordOpenClassroomIE(InfoExtractor
):
12 IE_NAME
= 'stanfordoc'
13 IE_DESC
= 'Stanford Open ClassRoom'
14 _VALID_URL
= r
'https?://openclassroom\.stanford\.edu(?P<path>/?|(/MainFolder/(?:HomePage|CoursePage|VideoPage)\.php([?]course=(?P<course>[^&]+)(&video=(?P<video>[^&]+))?(&.*)?)?))$'
16 'url': 'http://openclassroom.stanford.edu/MainFolder/VideoPage.php?course=PracticalUnix&video=intro-environment&speed=100',
17 'md5': '544a9468546059d4e80d76265b0443b8',
19 'id': 'PracticalUnix_intro-environment',
21 'title': 'Intro Environment',
25 def _real_extract(self
, url
):
26 mobj
= self
._match
_valid
_url
(url
)
28 if mobj
.group('course') and mobj
.group('video'): # A specific video
29 course
= mobj
.group('course')
30 video
= mobj
.group('video')
32 'id': course
+ '_' + video
,
37 base_url
= 'http://openclassroom.stanford.edu/MainFolder/courses/' + course
+ '/videos/'
38 xml_url
= base_url
+ video
+ '.xml'
39 mdoc
= self
._download
_xml
(xml_url
, info
['id'])
41 info
['title'] = mdoc
.findall('./title')[0].text
42 info
['url'] = base_url
+ mdoc
.findall('./videoFile')[0].text
44 raise ExtractorError('Invalid metadata XML file')
46 elif mobj
.group('course'): # A course page
47 course
= mobj
.group('course')
55 coursepage
= self
._download
_webpage
(
57 note
='Downloading course info page',
58 errnote
='Unable to download course info page')
60 info
['title'] = self
._html
_search
_regex
(
61 r
'<h1>([^<]+)</h1>', coursepage
, 'title', default
=info
['id'])
63 info
['description'] = self
._html
_search
_regex
(
64 r
'(?s)<description>([^<]+)</description>',
65 coursepage
, 'description', fatal
=False)
67 links
= orderedSet(re
.findall(r
'<a href="(VideoPage\.php\?[^"]+)">', coursepage
))
68 info
['entries'] = [self
.url_result(
69 f
'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}',
74 'id': 'Stanford OpenClassroom',
79 info
['title'] = info
['id']
81 root_url
= 'http://openclassroom.stanford.edu/MainFolder/HomePage.php'
82 rootpage
= self
._download
_webpage
(root_url
, info
['id'],
83 errnote
='Unable to download course info page')
85 links
= orderedSet(re
.findall(r
'<a href="(CoursePage\.php\?[^"]+)">', rootpage
))
86 info
['entries'] = [self
.url_result(
87 f
'http://openclassroom.stanford.edu/MainFolder/{unescapeHTML(l)}',