[ie/soundcloud] Various fixes (#11820)
[yt-dlp.git] / yt_dlp / extractor / ixigua.py
blob2868c2fc7cc602f889aedbadac76021e315125d7
1 import base64
3 from .common import InfoExtractor
4 from ..utils import (
5 ExtractorError,
6 get_element_by_id,
7 int_or_none,
8 js_to_json,
9 str_or_none,
10 traverse_obj,
14 class IxiguaIE(InfoExtractor):
15 _VALID_URL = r'https?://(?:\w+\.)?ixigua\.com/(?:video/)?(?P<id>\d+).+'
16 _TESTS = [{
17 'url': 'https://www.ixigua.com/6996881461559165471',
18 'info_dict': {
19 'id': '6996881461559165471',
20 'ext': 'mp4',
21 'title': '盲目涉水风险大,亲身示范高水位行车注意事项',
22 'description': 'md5:8c82f46186299add4a1c455430740229',
23 'tags': ['video_car'],
24 'like_count': int,
25 'dislike_count': int,
26 'view_count': int,
27 'uploader': '懂车帝原创',
28 'uploader_id': '6480145787',
29 'thumbnail': r're:^https?://.+\.(avif|webp)',
30 'timestamp': 1629088414,
31 'duration': 1030,
35 def _get_json_data(self, webpage, video_id):
36 js_data = get_element_by_id('SSR_HYDRATED_DATA', webpage)
37 if not js_data:
38 if self._cookies_passed:
39 raise ExtractorError('Failed to get SSR_HYDRATED_DATA')
40 raise ExtractorError('Cookies (not necessarily logged in) are needed', expected=True)
42 return self._parse_json(
43 js_data.replace('window._SSR_HYDRATED_DATA=', ''), video_id, transform_source=js_to_json)
45 def _media_selector(self, json_data):
46 for path, override in (
47 (('video_list', ), {}),
48 (('dynamic_video', 'dynamic_video_list'), {'acodec': 'none'}),
49 (('dynamic_video', 'dynamic_audio_list'), {'vcodec': 'none', 'ext': 'm4a'}),
51 for media in traverse_obj(json_data, (..., *path, lambda _, v: v['main_url'])):
52 yield {
53 'url': base64.b64decode(media['main_url']).decode(),
54 'width': int_or_none(media.get('vwidth')),
55 'height': int_or_none(media.get('vheight')),
56 'fps': int_or_none(media.get('fps')),
57 'vcodec': media.get('codec_type'),
58 'format_id': str_or_none(media.get('quality_type')),
59 'filesize': int_or_none(media.get('size')),
60 'ext': 'mp4',
61 **override,
64 def _real_extract(self, url):
65 video_id = self._match_id(url)
66 webpage = self._download_webpage(url, video_id)
67 json_data = self._get_json_data(webpage, video_id)['anyVideo']['gidInformation']['packerData']['video']
69 formats = list(self._media_selector(json_data.get('videoResource')))
70 return {
71 'id': video_id,
72 'title': json_data.get('title'),
73 'description': json_data.get('video_abstract'),
74 'formats': formats,
75 'like_count': json_data.get('video_like_count'),
76 'duration': int_or_none(json_data.get('duration')),
77 'tags': [json_data.get('tag')],
78 'uploader_id': traverse_obj(json_data, ('user_info', 'user_id')),
79 'uploader': traverse_obj(json_data, ('user_info', 'name')),
80 'view_count': json_data.get('video_watch_count'),
81 'dislike_count': json_data.get('video_unlike_count'),
82 'timestamp': int_or_none(json_data.get('video_publish_time')),