[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / extractor / jove.py
blob6b37ccfdcc24f07c41b96e5d5be5b9928abe8af3
1 from .common import InfoExtractor
2 from ..utils import ExtractorError, unified_strdate
5 class JoveIE(InfoExtractor):
6 _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
7 _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
8 _TESTS = [
10 'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
11 'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
12 'info_dict': {
13 'id': '2744',
14 'ext': 'mp4',
15 'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
16 'description': 'md5:015dd4509649c0908bc27f049e0262c6',
17 'thumbnail': r're:^https?://.*\.png$',
18 'upload_date': '20110523',
22 'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
23 'md5': '914aeb356f416811d911996434811beb',
24 'info_dict': {
25 'id': '51796',
26 'ext': 'mp4',
27 'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
28 'description': 'md5:35ff029261900583970c4023b70f1dc9',
29 'thumbnail': r're:^https?://.*\.png$',
30 'upload_date': '20140802',
36 def _real_extract(self, url):
37 mobj = self._match_valid_url(url)
38 video_id = mobj.group('id')
40 webpage = self._download_webpage(url, video_id)
42 chapters_id = self._html_search_regex(
43 r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
45 chapters_xml = self._download_xml(
46 self._CHAPTERS_URL.format(video_id=chapters_id),
47 video_id, note='Downloading chapters XML',
48 errnote='Failed to download chapters XML')
50 video_url = chapters_xml.attrib.get('video')
51 if not video_url:
52 raise ExtractorError('Failed to get the video URL')
54 title = self._html_search_meta('citation_title', webpage, 'title')
55 thumbnail = self._og_search_thumbnail(webpage)
56 description = self._html_search_regex(
57 r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
58 webpage, 'description', fatal=False)
59 publish_date = unified_strdate(self._html_search_meta(
60 'citation_publication_date', webpage, 'publish date', fatal=False))
61 comment_count = int(self._html_search_regex(
62 r'<meta name="num_comments" content="(\d+) Comments?"',
63 webpage, 'comment count', fatal=False))
65 return {
66 'id': video_id,
67 'title': title,
68 'url': video_url,
69 'thumbnail': thumbnail,
70 'description': description,
71 'upload_date': publish_date,
72 'comment_count': comment_count,