[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / kukululive.py
blob86ab5d40ecd00f7fd3456eda0cac97d4dab82760
1 import urllib.parse
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 clean_html,
7 filter_dict,
8 get_element_by_id,
9 int_or_none,
10 join_nonempty,
11 js_to_json,
12 qualities,
13 url_or_none,
14 urljoin,
16 from ..utils.traversal import traverse_obj
19 class KukuluLiveIE(InfoExtractor):
20 _VALID_URL = r'https?://live\.erinn\.biz/live\.php\?h(?P<id>\d+)'
21 _TESTS = [{
22 'url': 'https://live.erinn.biz/live.php?h675134569',
23 'md5': 'e380fa6a47fc703d91cea913ab44ec2e',
24 'info_dict': {
25 'id': '675134569',
26 'ext': 'mp4',
27 'title': 'プロセカ',
28 'description': 'テストも兼ねたプロセカ配信。',
29 'timestamp': 1702689148,
30 'upload_date': '20231216',
31 'thumbnail': r're:^https?://.*',
33 }, {
34 'url': 'https://live.erinn.biz/live.php?h102338092',
35 'md5': 'dcf5167a934b1c60333461e13a81a6e2',
36 'info_dict': {
37 'id': '102338092',
38 'ext': 'mp4',
39 'title': 'Among Usで遊びます!!',
40 'description': 'VTuberになりましたねんねこ㌨ですよろしくお願いします',
41 'timestamp': 1704603118,
42 'upload_date': '20240107',
43 'thumbnail': r're:^https?://.*',
45 }, {
46 'url': 'https://live.erinn.biz/live.php?h878049531',
47 'only_matching': True,
50 def _get_quality_meta(self, video_id, desc, code, force_h264=None):
51 desc += ' (force_h264)' if force_h264 else ''
52 qs = self._download_webpage(
53 'https://live.erinn.biz/live.player.fplayer.php', video_id,
54 f'Downloading {desc} quality metadata', f'Unable to download {desc} quality metadata',
55 query=filter_dict({
56 'hash': video_id,
57 'action': f'get{code}liveByAjax',
58 'force_h264': force_h264,
59 }))
60 return urllib.parse.parse_qs(qs)
62 def _add_quality_formats(self, formats, quality_meta):
63 vcodec = traverse_obj(quality_meta, ('vcodec', 0, {str}))
64 quality = traverse_obj(quality_meta, ('now_quality', 0, {str}))
65 quality_priority = qualities(('low', 'h264', 'high'))(quality)
66 if traverse_obj(quality_meta, ('hlsaddr', 0, {url_or_none})):
67 formats.append({
68 'format_id': quality,
69 'url': quality_meta['hlsaddr'][0],
70 'ext': 'mp4',
71 'vcodec': vcodec,
72 'quality': quality_priority,
74 if traverse_obj(quality_meta, ('hlsaddr_audioonly', 0, {url_or_none})):
75 formats.append({
76 'format_id': join_nonempty(quality, 'audioonly'),
77 'url': quality_meta['hlsaddr_audioonly'][0],
78 'ext': 'm4a',
79 'vcodec': 'none',
80 'quality': quality_priority,
83 def _real_extract(self, url):
84 video_id = self._match_id(url)
85 html = self._download_webpage(url, video_id)
87 if '>タイムシフトが見つかりませんでした。<' in html:
88 raise ExtractorError('This stream has expired', expected=True)
90 title = clean_html(
91 get_element_by_id('livetitle', html.replace('<SPAN', '<span').replace('SPAN>', 'span>')))
92 description = self._html_search_meta('Description', html)
93 thumbnail = self._html_search_meta(['og:image', 'twitter:image'], html)
95 if self._search_regex(r'(var\s+timeshift\s*=\s*false)', html, 'is livestream', default=False):
96 formats = []
97 for (desc, code) in [('high', 'Z'), ('low', 'ForceLow')]:
98 quality_meta = self._get_quality_meta(video_id, desc, code)
99 self._add_quality_formats(formats, quality_meta)
100 if desc == 'high' and traverse_obj(quality_meta, ('vcodec', 0)) == 'HEVC':
101 self._add_quality_formats(
102 formats, self._get_quality_meta(video_id, desc, code, force_h264='1'))
104 return {
105 'id': video_id,
106 'title': title,
107 'description': description,
108 'thumbnail': thumbnail,
109 'is_live': True,
110 'formats': formats,
113 # VOD extraction
114 player_html = self._download_webpage(
115 'https://live.erinn.biz/live.timeshift.fplayer.php', video_id,
116 'Downloading player html', 'Unable to download player html', query={'hash': video_id})
118 sources = traverse_obj(self._search_json(
119 r'var\s+fplayer_source\s*=', player_html, 'stream data', video_id,
120 contains_pattern=r'\[(?s:.+)\]', transform_source=js_to_json), lambda _, v: v['file'])
122 def entries(segments, playlist=True):
123 for i, segment in enumerate(segments, 1):
124 yield {
125 'id': f'{video_id}_{i}' if playlist else video_id,
126 'title': f'{title} (Part {i})' if playlist else title,
127 'description': description,
128 'timestamp': traverse_obj(segment, ('time_start', {int_or_none})),
129 'thumbnail': thumbnail,
130 'formats': [{
131 'url': urljoin('https://live.erinn.biz', segment['file']),
132 'ext': 'mp4',
133 'protocol': 'm3u8_native',
137 if len(sources) == 1:
138 return next(entries(sources, playlist=False))
140 return self.playlist_result(entries(sources), video_id, title, description, multi_video=True)