[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / arnes.py
blobf196f611ab8efa08bcfd50a52cb17dca96ba4e42
1 import urllib.parse
3 from .common import InfoExtractor
4 from ..utils import (
5 float_or_none,
6 format_field,
7 int_or_none,
8 parse_iso8601,
9 remove_start,
13 class ArnesIE(InfoExtractor):
14 IE_NAME = 'video.arnes.si'
15 IE_DESC = 'Arnes Video'
16 _VALID_URL = r'https?://video\.arnes\.si/(?:[a-z]{2}/)?(?:watch|embed|api/(?:asset|public/video))/(?P<id>[0-9a-zA-Z]{12})'
17 _TESTS = [{
18 'url': 'https://video.arnes.si/watch/a1qrWTOQfVoU?t=10',
19 'md5': '4d0f4d0a03571b33e1efac25fd4a065d',
20 'info_dict': {
21 'id': 'a1qrWTOQfVoU',
22 'ext': 'mp4',
23 'title': 'Linearna neodvisnost, definicija',
24 'description': 'Linearna neodvisnost, definicija',
25 'license': 'PRIVATE',
26 'creator': 'Polona Oblak',
27 'timestamp': 1585063725,
28 'upload_date': '20200324',
29 'channel': 'Polona Oblak',
30 'channel_id': 'q6pc04hw24cj',
31 'channel_url': 'https://video.arnes.si/?channel=q6pc04hw24cj',
32 'duration': 596.75,
33 'view_count': int,
34 'tags': ['linearna_algebra'],
35 'start_time': 10,
37 }, {
38 'url': 'https://video.arnes.si/api/asset/s1YjnV7hadlC/play.mp4',
39 'only_matching': True,
40 }, {
41 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC',
42 'only_matching': True,
43 }, {
44 'url': 'https://video.arnes.si/en/watch/s1YjnV7hadlC',
45 'only_matching': True,
46 }, {
47 'url': 'https://video.arnes.si/embed/s1YjnV7hadlC?t=123&hideRelated=1',
48 'only_matching': True,
49 }, {
50 'url': 'https://video.arnes.si/api/public/video/s1YjnV7hadlC',
51 'only_matching': True,
53 _BASE_URL = 'https://video.arnes.si'
55 def _real_extract(self, url):
56 video_id = self._match_id(url)
58 video = self._download_json(
59 self._BASE_URL + '/api/public/video/' + video_id, video_id)['data']
60 title = video['title']
62 formats = []
63 for media in (video.get('media') or []):
64 media_url = media.get('url')
65 if not media_url:
66 continue
67 formats.append({
68 'url': self._BASE_URL + media_url,
69 'format_id': remove_start(media.get('format'), 'FORMAT_'),
70 'format_note': media.get('formatTranslation'),
71 'width': int_or_none(media.get('width')),
72 'height': int_or_none(media.get('height')),
75 channel = video.get('channel') or {}
76 channel_id = channel.get('url')
77 thumbnail = video.get('thumbnailUrl')
79 return {
80 'id': video_id,
81 'title': title,
82 'formats': formats,
83 'thumbnail': self._BASE_URL + thumbnail,
84 'description': video.get('description'),
85 'license': video.get('license'),
86 'creator': video.get('author'),
87 'timestamp': parse_iso8601(video.get('creationTime')),
88 'channel': channel.get('name'),
89 'channel_id': channel_id,
90 'channel_url': format_field(channel_id, None, f'{self._BASE_URL}/?channel=%s'),
91 'duration': float_or_none(video.get('duration'), 1000),
92 'view_count': int_or_none(video.get('views')),
93 'tags': video.get('hashtags'),
94 'start_time': int_or_none(urllib.parse.parse_qs(
95 urllib.parse.urlparse(url).query).get('t', [None])[0]),