1 from .common
import InfoExtractor
2 from ..utils
import ExtractorError
, unified_strdate
5 class JoveIE(InfoExtractor
):
6 _VALID_URL
= r
'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
7 _CHAPTERS_URL
= 'http://www.jove.com/video-chapters?videoid={video_id:}'
10 'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
11 'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
15 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
16 'description': 'md5:015dd4509649c0908bc27f049e0262c6',
17 'thumbnail': r
're:^https?://.*\.png$',
18 'upload_date': '20110523',
22 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
23 'md5': '914aeb356f416811d911996434811beb',
27 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
28 'description': 'md5:35ff029261900583970c4023b70f1dc9',
29 'thumbnail': r
're:^https?://.*\.png$',
30 'upload_date': '20140802',
36 def _real_extract(self
, url
):
37 mobj
= self
._match
_valid
_url
(url
)
38 video_id
= mobj
.group('id')
40 webpage
= self
._download
_webpage
(url
, video_id
)
42 chapters_id
= self
._html
_search
_regex
(
43 r
'/video-chapters\?videoid=([0-9]+)', webpage
, 'chapters id')
45 chapters_xml
= self
._download
_xml
(
46 self
._CHAPTERS
_URL
.format(video_id
=chapters_id
),
47 video_id
, note
='Downloading chapters XML',
48 errnote
='Failed to download chapters XML')
50 video_url
= chapters_xml
.attrib
.get('video')
52 raise ExtractorError('Failed to get the video URL')
54 title
= self
._html
_search
_meta
('citation_title', webpage
, 'title')
55 thumbnail
= self
._og
_search
_thumbnail
(webpage
)
56 description
= self
._html
_search
_regex
(
57 r
'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
58 webpage
, 'description', fatal
=False)
59 publish_date
= unified_strdate(self
._html
_search
_meta
(
60 'citation_publication_date', webpage
, 'publish date', fatal
=False))
61 comment_count
= int(self
._html
_search
_regex
(
62 r
'<meta name="num_comments" content="(\d+) Comments?"',
63 webpage
, 'comment count', fatal
=False))
69 'thumbnail': thumbnail
,
70 'description': description
,
71 'upload_date': publish_date
,
72 'comment_count': comment_count
,