[cleanup] Misc (#8968)
[yt-dlp.git] / yt_dlp / extractor / streamcz.py
blobc4537ba8df8a058d8687e43d4fb89b33811b2fe4
1 import json
3 from .common import InfoExtractor
4 from ..utils import (
5 float_or_none,
6 int_or_none,
7 parse_codecs,
8 traverse_obj,
9 urljoin,
13 class StreamCZIE(InfoExtractor):
14 _VALID_URL = r'https?://(?:www\.)?(?:stream|televizeseznam)\.cz/[^?#]+/(?P<display_id>[^?#]+)-(?P<id>[0-9]+)'
15 _TESTS = [{
16 'url': 'https://www.televizeseznam.cz/video/lajna/buh-57953890',
17 'md5': '40c41ade1464a390a0b447e333df4239',
18 'info_dict': {
19 'id': '57953890',
20 'ext': 'mp4',
21 'title': 'Bůh',
22 'display_id': 'buh',
23 'description': 'md5:8f5f09b9b7bc67df910486cdd88f7165',
24 'duration': 1369.6,
25 'view_count': int,
27 }, {
28 'url': 'https://www.stream.cz/kdo-to-mluvi/kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna-64087937',
29 'md5': '41fd358000086a1ccdb068c77809b158',
30 'info_dict': {
31 'id': '64087937',
32 'ext': 'mp4',
33 'title': 'Kdo to mluví? Velké odhalení přináší nový pořad už od 25. srpna',
34 'display_id': 'kdo-to-mluvi-velke-odhaleni-prinasi-novy-porad-uz-od-25-srpna',
35 'description': 'md5:97a811000a6460266029d6c1c2ebcd59',
36 'duration': 50.2,
37 'view_count': int,
39 }, {
40 'url': 'https://www.stream.cz/tajemno/znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili-64147267',
41 'md5': '3ee4d0be040e8f4a543e67e509d55e3f',
42 'info_dict': {
43 'id': '64147267',
44 'ext': 'mp4',
45 'title': 'Zničehonic jim skrz střechu prolítnul záhadný předmět. Badatelé vše objasnili',
46 'display_id': 'znicehonic-jim-skrz-strechu-prolitnul-zahadny-predmet-badatele-vse-objasnili',
47 'description': 'md5:4b8ada6718d34bb011c4e04ca4bc19bf',
48 'duration': 442.84,
49 'view_count': int,
53 def _extract_formats(self, spl_url, video):
54 for ext, pref, streams in (
55 ('ts', -1, traverse_obj(video, ('http_stream', 'qualities')) or {}),
56 ('mp4', 1, video.get('mp4') or {})):
57 for format_id, stream in streams.items():
58 if not stream.get('url'):
59 continue
60 yield {
61 'format_id': f'{format_id}-{ext}',
62 'ext': ext,
63 'source_preference': pref,
64 'url': urljoin(spl_url, stream['url']),
65 'tbr': float_or_none(stream.get('bandwidth'), scale=1000),
66 'duration': float_or_none(stream.get('duration'), scale=1000),
67 'width': traverse_obj(stream, ('resolution', 0)),
68 'height': traverse_obj(stream, ('resolution', 1)) or int_or_none(format_id.replace('p', '')),
69 **parse_codecs(stream.get('codec')),
72 def _real_extract(self, url):
73 display_id, video_id = self._match_valid_url(url).groups()
75 data = self._download_json(
76 'https://www.televizeseznam.cz/api/graphql', video_id, 'Downloading GraphQL result',
77 data=json.dumps({
78 'variables': {'urlName': video_id},
79 'query': '''
80 query LoadEpisode($urlName : String){ episode(urlName: $urlName){ ...VideoDetailFragmentOnEpisode } }
81 fragment VideoDetailFragmentOnEpisode on Episode {
83 spl
84 urlName
85 name
86 perex
87 duration
88 views
89 }'''
90 }).encode('utf-8'),
91 headers={'Content-Type': 'application/json;charset=UTF-8'}
92 )['data']['episode']
94 spl_url = data['spl'] + 'spl2,3'
95 metadata = self._download_json(spl_url, video_id, 'Downloading playlist')
96 if 'Location' in metadata and 'data' not in metadata:
97 spl_url = metadata['Location']
98 metadata = self._download_json(spl_url, video_id, 'Downloading redirected playlist')
99 video = metadata['data']
101 subtitles = {}
102 for subs in video.get('subtitles', {}).values():
103 if not subs.get('language'):
104 continue
105 for ext, sub_url in subs.get('urls').items():
106 subtitles.setdefault(subs['language'], []).append({
107 'ext': ext,
108 'url': urljoin(spl_url, sub_url)
111 formats = list(self._extract_formats(spl_url, video))
113 return {
114 'id': video_id,
115 'display_id': display_id,
116 'title': data.get('name'),
117 'description': data.get('perex'),
118 'duration': float_or_none(data.get('duration')),
119 'view_count': int_or_none(data.get('views')),
120 'formats': formats,
121 'subtitles': subtitles,