[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / amara.py
blobed0f0cd357d965607aa9d55871c1f3a3859f50a1
1 from .common import InfoExtractor
2 from .vimeo import VimeoIE
3 from .youtube import YoutubeIE
4 from ..utils import (
5 int_or_none,
6 parse_iso8601,
7 update_url_query,
11 class AmaraIE(InfoExtractor):
12 _VALID_URL = r'https?://(?:www\.)?amara\.org/(?:\w+/)?videos/(?P<id>\w+)'
13 _TESTS = [{
14 # Youtube
15 'url': 'https://amara.org/en/videos/jVx79ZKGK1ky/info/why-jury-trials-are-becoming-less-common/?tab=video',
16 'md5': 'ea10daf2b6154b8c1ecf9922aca5e8ae',
17 'info_dict': {
18 'id': 'h6ZuVdvYnfE',
19 'ext': 'mp4',
20 'title': 'Why jury trials are becoming less common',
21 'description': 'md5:a61811c319943960b6ab1c23e0cbc2c1',
22 'thumbnail': r're:^https?://.*\.jpg$',
23 'subtitles': dict,
24 'upload_date': '20160813',
25 'uploader': 'PBS NewsHour',
26 'uploader_id': 'PBSNewsHour',
27 'timestamp': 1549639570,
29 }, {
30 # Vimeo
31 'url': 'https://amara.org/en/videos/kYkK1VUTWW5I/info/vimeo-at-ces-2011',
32 'md5': '99392c75fa05d432a8f11df03612195e',
33 'info_dict': {
34 'id': '18622084',
35 'ext': 'mov',
36 'title': 'Vimeo at CES 2011!',
37 'description': 'md5:d41d8cd98f00b204e9800998ecf8427e',
38 'thumbnail': r're:^https?://.*\.jpg$',
39 'subtitles': dict,
40 'timestamp': 1294763658,
41 'upload_date': '20110111',
42 'uploader': 'Sam Morrill',
43 'uploader_id': 'sammorrill',
45 }, {
46 # Direct Link
47 'url': 'https://amara.org/en/videos/s8KL7I3jLmh6/info/the-danger-of-a-single-story/',
48 'md5': 'd3970f08512738ee60c5807311ff5d3f',
49 'info_dict': {
50 'id': 's8KL7I3jLmh6',
51 'ext': 'mp4',
52 'title': 'The danger of a single story',
53 'description': 'md5:d769b31139c3b8bb5be9177f62ea3f23',
54 'thumbnail': r're:^https?://.*\.jpg$',
55 'subtitles': dict,
56 'upload_date': '20091007',
57 'timestamp': 1254942511,
61 def _real_extract(self, url):
62 video_id = self._match_id(url)
63 meta = self._download_json(
64 f'https://amara.org/api/videos/{video_id}/',
65 video_id, query={'format': 'json'})
66 title = meta['title']
67 video_url = meta['all_urls'][0]
69 subtitles = {}
70 for language in (meta.get('languages') or []):
71 subtitles_uri = language.get('subtitles_uri')
72 if not (subtitles_uri and language.get('published')):
73 continue
74 subtitle = subtitles.setdefault(language.get('code') or 'en', [])
75 for f in ('json', 'srt', 'vtt'):
76 subtitle.append({
77 'ext': f,
78 'url': update_url_query(subtitles_uri, {'format': f}),
81 info = {
82 'url': video_url,
83 'id': video_id,
84 'subtitles': subtitles,
85 'title': title,
86 'description': meta.get('description'),
87 'thumbnail': meta.get('thumbnail'),
88 'duration': int_or_none(meta.get('duration')),
89 'timestamp': parse_iso8601(meta.get('created')),
92 for ie in (YoutubeIE, VimeoIE):
93 if ie.suitable(video_url):
94 info.update({
95 '_type': 'url_transparent',
96 'ie_key': ie.ie_key(),
98 break
100 return info