[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / whowatch.py
blob9ca5c3b1a88146e7401e3123ddeb1803d178124d
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 int_or_none,
5 qualities,
6 try_call,
7 try_get,
11 class WhoWatchIE(InfoExtractor):
12 IE_NAME = 'whowatch'
13 _VALID_URL = r'https?://whowatch\.tv/viewer/(?P<id>\d+)'
15 _TESTS = [{
16 'url': 'https://whowatch.tv/viewer/21450171',
17 'only_matching': True,
20 def _real_extract(self, url):
21 video_id = self._match_id(url)
22 self._download_webpage(url, video_id)
23 metadata = self._download_json(f'https://api.whowatch.tv/lives/{video_id}', video_id)
24 live_data = self._download_json(f'https://api.whowatch.tv/lives/{video_id}/play', video_id)
26 title = try_call(
27 lambda: live_data['share_info']['live_title'][1:-1],
28 lambda: metadata['live']['title'],
29 expected_type=str)
31 hls_url = live_data.get('hls_url')
32 if not hls_url:
33 raise ExtractorError(live_data.get('error_message') or 'The user is offline.', expected=True)
35 QUALITIES = qualities(['low', 'medium', 'high', 'veryhigh'])
36 formats = []
38 for i, fmt in enumerate(live_data.get('streams') or []):
39 name = fmt.get('quality') or fmt.get('name') or str(i)
40 hls_url = fmt.get('hls_url')
41 rtmp_url = fmt.get('rtmp_url')
42 audio_only = fmt.get('audio_only')
43 quality = QUALITIES(fmt.get('quality'))
45 if hls_url:
46 hls_fmts = self._extract_m3u8_formats(
47 hls_url, video_id, ext='mp4', m3u8_id=f'hls-{name}', quality=quality)
48 formats.extend(hls_fmts)
49 else:
50 hls_fmts = []
52 # RTMP url for audio_only is same as high format, so skip it
53 if rtmp_url and not audio_only:
54 formats.append({
55 'url': rtmp_url,
56 'format_id': f'rtmp-{name}',
57 'ext': 'mp4',
58 'protocol': 'rtmp_ffmpeg', # ffmpeg can, while rtmpdump can't
59 'vcodec': 'h264',
60 'acodec': 'aac',
61 'quality': quality,
62 'format_note': fmt.get('label'),
63 # note: HLS and RTMP have same resolution for now, so it's acceptable
64 'width': try_get(hls_fmts, lambda x: x[0]['width'], int),
65 'height': try_get(hls_fmts, lambda x: x[0]['height'], int),
68 # This contains the same formats as the above manifests and is used only as a fallback
69 formats.extend(self._extract_m3u8_formats(
70 hls_url, video_id, ext='mp4', m3u8_id='hls'))
71 self._remove_duplicate_formats(formats)
73 uploader_url = try_get(metadata, lambda x: x['live']['user']['user_path'], str)
74 if uploader_url:
75 uploader_url = f'https://whowatch.tv/profile/{uploader_url}'
76 uploader_id = str(try_get(metadata, lambda x: x['live']['user']['id'], int))
77 uploader = try_get(metadata, lambda x: x['live']['user']['name'], str)
78 thumbnail = try_get(metadata, lambda x: x['live']['latest_thumbnail_url'], str)
79 timestamp = int_or_none(try_get(metadata, lambda x: x['live']['started_at'], int), scale=1000)
80 view_count = try_get(metadata, lambda x: x['live']['total_view_count'], int)
81 comment_count = try_get(metadata, lambda x: x['live']['comment_count'], int)
83 return {
84 'id': video_id,
85 'title': title,
86 'uploader_id': uploader_id,
87 'uploader_url': uploader_url,
88 'uploader': uploader,
89 'formats': formats,
90 'thumbnail': thumbnail,
91 'timestamp': timestamp,
92 'view_count': view_count,
93 'comment_count': comment_count,
94 'is_live': True,