[ie/soundcloud] Various fixes (#11820)
[yt-dlp.git] / yt_dlp / extractor / duoplay.py
blob75650c3a641ee7140f4251ac51f024c3e5350426
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 extract_attributes,
5 get_element_text_and_html_by_tag,
6 int_or_none,
7 join_nonempty,
8 parse_qs,
9 str_or_none,
10 try_call,
11 unified_timestamp,
13 from ..utils.traversal import traverse_obj, value
16 class DuoplayIE(InfoExtractor):
17 _VALID_URL = r'https?://duoplay\.ee/(?P<id>\d+)(?:[/?#]|$)'
18 _TESTS = [{
19 'note': 'Siberi võmm S02E12',
20 'url': 'https://duoplay.ee/4312/siberi-vomm?ep=24',
21 'md5': '1ff59d535310ac9c5cf5f287d8f91b2d',
22 'info_dict': {
23 'id': '4312_24',
24 'ext': 'mp4',
25 'title': 'Operatsioon "Öö"',
26 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
27 'description': 'md5:8ef98f38569d6b8b78f3d350ccc6ade8',
28 'upload_date': '20170523',
29 'timestamp': 1495567800,
30 'series': 'Siberi võmm',
31 'series_id': '4312',
32 'season': 'Season 2',
33 'season_number': 2,
34 'episode': 'Operatsioon "Öö"',
35 'episode_number': 12,
36 'episode_id': '24',
38 'skip': 'No video found',
39 }, {
40 'note': 'Empty title',
41 'url': 'https://duoplay.ee/17/uhikarotid?ep=14',
42 'md5': 'cba9f5dabf2582b224d80ac44fb80e47',
43 'info_dict': {
44 'id': '17_14',
45 'ext': 'mp4',
46 'title': 'Episode 14',
47 'thumbnail': r're:https?://.+\.jpg',
48 'description': 'md5:4719b418e058c209def41d48b601276e',
49 'upload_date': '20100916',
50 'timestamp': 1284661800,
51 'series': 'Ühikarotid',
52 'series_id': '17',
53 'season': 'Season 2',
54 'season_number': 2,
55 'episode_id': '14',
56 'release_year': 2010,
57 'episode': 'Episode 14',
58 'episode_number': 14,
60 }, {
61 'note': 'Movie without expiry',
62 'url': 'https://duoplay.ee/5501/pilvede-all.-neljas-ode',
63 'md5': '7abf63d773a49ef7c39f2c127842b8fd',
64 'info_dict': {
65 'id': '5501',
66 'ext': 'mp4',
67 'title': 'Pilvede all. Neljas õde',
68 'thumbnail': r're:https://.+\.jpg(?:\?c=\d+)?$',
69 'description': 'md5:d86a70f8f31e82c369d4d4f4c79b1279',
70 'cast': 'count:9',
71 'upload_date': '20221214',
72 'timestamp': 1671054000,
73 'release_year': 2018,
75 'skip': 'No video found',
76 }, {
77 'note': 'Episode url without show name',
78 'url': 'https://duoplay.ee/9644?ep=185',
79 'md5': '63f324b4fe2dbd8194dca16a6d52184a',
80 'info_dict': {
81 'id': '9644_185',
82 'ext': 'mp4',
83 'title': 'Episode 185',
84 'thumbnail': r're:https?://.+\.jpg',
85 'description': 'md5:ed25ba4e9e5d54bc291a4a0cdd241467',
86 'upload_date': '20241120',
87 'timestamp': 1732077000,
88 'episode': 'Episode 63',
89 'episode_id': '185',
90 'episode_number': 63,
91 'season': 'Season 2',
92 'season_number': 2,
93 'series': 'Telehommik',
94 'series_id': '9644',
98 def _real_extract(self, url):
99 telecast_id = self._match_id(url)
100 episode = traverse_obj(parse_qs(url), ('ep', 0, {int_or_none}, {str_or_none}))
101 video_id = join_nonempty(telecast_id, episode, delim='_')
102 webpage = self._download_webpage(url, video_id)
103 video_player = try_call(lambda: extract_attributes(
104 get_element_text_and_html_by_tag('video-player', webpage)[1]))
105 if not video_player or not video_player.get('manifest-url'):
106 raise ExtractorError('No video found', expected=True)
108 manifest_url = video_player['manifest-url']
109 session_token = self._download_json(
110 'https://sts.postimees.ee/session/register', video_id, 'Registering session',
111 'Unable to register session', headers={
112 'Accept': 'application/json',
113 'X-Original-URI': manifest_url,
114 })['session']
116 episode_attr = self._parse_json(video_player.get(':episode') or '', video_id, fatal=False) or {}
118 return {
119 'id': video_id,
120 'formats': self._extract_m3u8_formats(manifest_url, video_id, 'mp4', query={'s': session_token}),
121 **traverse_obj(episode_attr, {
122 'title': ('title', {str}),
123 'description': ('synopsis', {str}),
124 'thumbnail': ('images', 'original'),
125 'timestamp': ('airtime', {lambda x: unified_timestamp(x + ' +0200')}),
126 'cast': ('cast', filter, {lambda x: x.split(', ')}),
127 'release_year': ('year', {int_or_none}),
129 **(traverse_obj(episode_attr, {
130 'title': (None, (('subtitle', {str}, filter), {value(f'Episode {episode}' if episode else None)})),
131 'series': ('title', {str}),
132 'series_id': ('telecast_id', {str_or_none}),
133 'season_number': ('season_id', {int_or_none}),
134 'episode': ('subtitle', {str}, filter),
135 'episode_number': ('episode_nr', {int_or_none}),
136 'episode_id': ('episode_id', {str_or_none}),
137 }, get_all=False) if episode_attr.get('category') != 'movies' else {}),