[cleanup] Misc (#8968)
[yt-dlp.git] / yt_dlp / extractor / mixch.py
blob4be6947289c4dad7f4fa1df47bf666999e1c74eb
1 from .common import InfoExtractor
2 from ..utils import UserNotLive, traverse_obj
5 class MixchIE(InfoExtractor):
6 IE_NAME = 'mixch'
7 _VALID_URL = r'https?://(?:www\.)?mixch\.tv/u/(?P<id>\d+)'
9 _TESTS = [{
10 'url': 'https://mixch.tv/u/16236849/live',
11 'skip': 'don\'t know if this live persists',
12 'info_dict': {
13 'id': '16236849',
14 'title': '24配信シェア⭕️投票🙏💦',
15 'comment_count': 13145,
16 'view_count': 28348,
17 'timestamp': 1636189377,
18 'uploader': '🦥伊咲👶🏻#フレアワ',
19 'uploader_id': '16236849',
21 }, {
22 'url': 'https://mixch.tv/u/16137876/live',
23 'only_matching': True,
26 def _real_extract(self, url):
27 video_id = self._match_id(url)
28 webpage = self._download_webpage(f'https://mixch.tv/u/{video_id}/live', video_id)
30 initial_js_state = self._parse_json(self._search_regex(
31 r'(?m)^\s*window\.__INITIAL_JS_STATE__\s*=\s*(\{.+?\});\s*$', webpage, 'initial JS state'), video_id)
32 if not initial_js_state.get('liveInfo'):
33 raise UserNotLive(video_id=video_id)
35 return {
36 'id': video_id,
37 'title': traverse_obj(initial_js_state, ('liveInfo', 'title')),
38 'comment_count': traverse_obj(initial_js_state, ('liveInfo', 'comments')),
39 'view_count': traverse_obj(initial_js_state, ('liveInfo', 'visitor')),
40 'timestamp': traverse_obj(initial_js_state, ('liveInfo', 'created')),
41 'uploader': traverse_obj(initial_js_state, ('broadcasterInfo', 'name')),
42 'uploader_id': video_id,
43 'formats': [{
44 'format_id': 'hls',
45 'url': (traverse_obj(initial_js_state, ('liveInfo', 'hls'))
46 or f'https://d1hd0ww6piyb43.cloudfront.net/hls/torte_{video_id}.m3u8'),
47 'ext': 'mp4',
48 'protocol': 'm3u8',
49 }],
50 'is_live': True,
54 class MixchArchiveIE(InfoExtractor):
55 IE_NAME = 'mixch:archive'
56 _VALID_URL = r'https?://(?:www\.)?mixch\.tv/archive/(?P<id>\d+)'
58 _TESTS = [{
59 'url': 'https://mixch.tv/archive/421',
60 'skip': 'paid video, no DRM. expires at Jan 23',
61 'info_dict': {
62 'id': '421',
63 'title': '96NEKO SHOW TIME',
67 def _real_extract(self, url):
68 video_id = self._match_id(url)
69 webpage = self._download_webpage(url, video_id)
71 html5_videos = self._parse_html5_media_entries(
72 url, webpage.replace('video-js', 'video'), video_id, 'hls')
73 if not html5_videos:
74 self.raise_login_required(method='cookies')
75 infodict = html5_videos[0]
76 infodict.update({
77 'id': video_id,
78 'title': self._html_search_regex(r'class="archive-title">(.+?)</', webpage, 'title')
81 return infodict