[cleanup] Misc (#8968)
[yt-dlp.git] / yt_dlp / extractor / duoplay.py
blob18642fea39291d6c4b664d3bc6042a5b38a0a48c
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 extract_attributes,
5 get_element_text_and_html_by_tag,
6 int_or_none,
7 join_nonempty,
8 str_or_none,
9 try_call,
10 unified_timestamp,
12 from ..utils.traversal import traverse_obj
15 class DuoplayIE(InfoExtractor):
16 _VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)/[\w-]+/?(?:\?(?:[^#]+&)?ep=(?P<ep>\d+))?'
17 _TESTS = [{
18 'note': 'Siberi võmm S02E12',
19 'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
20 'md5': '1ff59d535310ac9c5cf5f287d8f91b2d',
21 'info_dict': {
22 'id': '4312_24',
23 'ext': 'mp4',
24 'title': 'Operatsioon "Öö"',
25 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
26 'description': 'md5:8ef98f38569d6b8b78f3d350ccc6ade8',
27 'upload_date': '20170523',
28 'timestamp': 1495567800,
29 'series': 'Siberi võmm',
30 'series_id': '4312',
31 'season': 'Season 2',
32 'season_number': 2,
33 'episode': 'Operatsioon "Öö"',
34 'episode_number': 12,
35 'episode_id': '24',
37 }, {
38 'note': 'Empty title',
39 'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
40 'md5': '6aca68be71112314738dd17cced7f8bf',
41 'info_dict': {
42 'id': '17_14',
43 'ext': 'mp4',
44 'title': 'Ühikarotid',
45 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
46 'description': 'md5:4719b418e058c209def41d48b601276e',
47 'upload_date': '20100916',
48 'timestamp': 1284661800,
49 'series': 'Ühikarotid',
50 'series_id': '17',
51 'season': 'Season 2',
52 'season_number': 2,
53 'episode_id': '14',
54 'release_year': 2010,
56 }, {
57 'note': 'Movie without expiry',
58 'url': 'https://duoplay.ee/5501/pilvede-all.-neljas-ode',
59 'md5': '7abf63d773a49ef7c39f2c127842b8fd',
60 'info_dict': {
61 'id': '5501',
62 'ext': 'mp4',
63 'title': 'Pilvede all. Neljas õde',
64 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
65 'description': 'md5:d86a70f8f31e82c369d4d4f4c79b1279',
66 'cast': 'count:9',
67 'upload_date': '20221214',
68 'timestamp': 1671054000,
69 'release_year': 2018,
73 def _real_extract(self, url):
74 telecast_id, episode = self._match_valid_url(url).group('id', 'ep')
75 video_id = join_nonempty(telecast_id, episode, delim='_')
76 webpage = self._download_webpage(url, video_id)
77 video_player = try_call(lambda: extract_attributes(
78 get_element_text_and_html_by_tag('video-player', webpage)[1]))
79 if not video_player or not video_player.get('manifest-url'):
80 raise ExtractorError('No video found', expected=True)
82 episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
84 return {
85 'id': video_id,
86 'formats': self._extract_m3u8_formats(video_player['manifest-url'], video_id, 'mp4'),
87 **traverse_obj(episode_attr, {
88 'title': 'title',
89 'description': 'synopsis',
90 'thumbnail': ('images', 'original'),
91 'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
92 'cast': ('cast', {lambda x: x.split(', ')}),
93 'release_year': ('year', {int_or_none}),
94 }),
95 **(traverse_obj(episode_attr, {
96 'title': (None, ('subtitle', ('episode_nr', {lambda x: f'Episode {x}' if x else None}))),
97 'series': 'title',
98 'series_id': ('telecast_id', {str_or_none}),
99 'season_number': ('season_id', {int_or_none}),
100 'episode': 'subtitle',
101 'episode_number': ('episode_nr', {int_or_none}),
102 'episode_id': ('episode_id', {str_or_none}),
103 }, get_all=False) if episode_attr.get('category') != 'movies' else {}),