4 from .common
import InfoExtractor
5 from .youtube
import YoutubeIE
13 class TechTVMITIE(InfoExtractor
):
14 IE_NAME
= 'techtv.mit.edu'
15 _VALID_URL
= r
'https?://techtv\.mit\.edu/(?:videos|embeds)/(?P<id>\d+)'
18 'url': 'http://techtv.mit.edu/videos/25418-mit-dna-learning-center-set',
19 'md5': '00a3a27ee20d44bcaa0933ccec4a2cf7',
23 'title': 'MIT DNA and Protein Sets',
24 'description': 'md5:46f5c69ce434f0a97e7c628cc142802d',
28 def _real_extract(self
, url
):
29 video_id
= self
._match
_id
(url
)
30 raw_page
= self
._download
_webpage
(
31 f
'http://techtv.mit.edu/videos/{video_id}', video_id
)
32 clean_page
= re
.compile(r
'<!--.*?-->', re
.S
).sub('', raw_page
)
34 base_url
= self
._proto
_relative
_url
(self
._search
_regex
(
35 r
'ipadUrl: \'(.+?cloudfront
.net
/)', raw_page, 'base url
'), 'http
:')
36 formats_json = self._search_regex(
37 r'bitrates
: (\
[.+?\
])', raw_page, 'video formats
')
38 formats_mit = json.loads(formats_json)
41 'format_id
': f['label
'],
42 'url
': base_url + f['url
'].partition(':')[2],
43 'ext
': f['url
'].partition(':')[0],
51 title = get_element_by_id('edit
-title
', clean_page)
52 description = clean_html(get_element_by_id('edit
-description
', clean_page))
53 thumbnail = self._search_regex(
54 r'playlist
:.*?url
: \'(.+?
)\'',
55 raw_page, 'thumbnail
', flags=re.DOTALL)
61 'description
': description,
62 'thumbnail
': thumbnail,
66 class OCWMITIE(InfoExtractor):
67 IE_NAME = 'ocw
.mit
.edu
'
68 _VALID_URL = r'^https?
://ocw\
.mit\
.edu
/courses
/(?P
<topic
>[a
-z0
-9\
-]+)'
69 _BASE_URL = 'http
://ocw
.mit
.edu
/'
73 'url
': 'http
://ocw
.mit
.edu
/courses
/electrical
-engineering
-and-computer
-science
/6-041-probabilistic
-systems
-analysis
-and-applied
-probability
-fall
-2010/video
-lectures
/lecture
-7-multiple
-variables
-expectations
-independence
/',
77 'title
': 'Lecture
7: Multiple Discrete Random Variables
: Expectations
, Conditioning
, Independence
',
78 'description
': 'In this lecture
, the professor discussed multiple random variables
, expectations
, and binomial distribution
.',
79 'upload_date
': '20121109',
81 'uploader
': 'MIT OpenCourseWare
',
85 'url
': 'http
://ocw
.mit
.edu
/courses
/mathematics
/18-01sc
-single
-variable
-calculus
-fall
-2010/1.-differentiation
/part
-a
-definition
-and-basic
-rules
/session
-1-introduction
-to
-derivatives
/',
89 'title
': 'Session
1: Introduction to Derivatives
',
90 'upload_date
': '20090818',
92 'uploader
': 'MIT OpenCourseWare
',
93 'description
': 'This section contains lecture video excerpts
, lecture notes
, an interactive mathlet with supporting documents
, and problem solving videos
.',
98 def _real_extract(self, url):
99 mobj = self._match_valid_url(url)
100 topic = mobj.group('topic
')
102 webpage = self._download_webpage(url, topic)
103 title = self._html_search_meta('WT
.cg_s
', webpage)
104 description = self._html_search_meta('Description
', webpage)
106 # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, start, stop, captions_file)
107 embed_chapter_media = re.search(r'ocw_embed_chapter_media\
((.+?
)\
)', webpage)
108 if embed_chapter_media:
109 metadata = re.sub(r'[\'"]', '', embed_chapter_media.group(1))
110 metadata = re.split(r', ?', metadata)
113 # search for call to ocw_embed_chapter_media(container_id, media_url, provider, page_url, image_url, captions_file)
114 embed_media = re.search(r'ocw_embed_media\((.+?)\)', webpage)
116 metadata = re.sub(r'[\'"]', '', embed_media.group(1))
117 metadata = re.split(r', ?
', metadata)
120 raise ExtractorError('Unable to find embedded YouTube video
.')
121 video_id = YoutubeIE.extract_id(yt)
124 '_type
': 'url_transparent
',
127 'description
': description,