[cleanup] Make more playlist entries lazy (#11763)
[yt-dlp.git] / yt_dlp / extractor / taptap.py
blobe4c31da4e29a2816e5f6ceaf09ae361e7bac5795
1 import re
2 import uuid
4 from .common import InfoExtractor
5 from ..utils import (
6 clean_html,
7 int_or_none,
8 join_nonempty,
9 str_or_none,
10 url_or_none,
12 from ..utils.traversal import traverse_obj
15 class TapTapBaseIE(InfoExtractor):
16 _X_UA = 'V=1&PN=WebApp&LANG=zh_CN&VN_CODE=102&LOC=CN&PLT=PC&DS=Android&UID={uuid}&OS=Windows&OSV=10&DT=PC'
17 _VIDEO_API = 'https://www.taptap.cn/webapiv2/video-resource/v1/multi-get'
18 _INFO_API = None
19 _INFO_QUERY_KEY = 'id'
20 _DATA_PATH = None
21 _ID_PATH = None
22 _META_PATH = None
24 def _get_api(self, url, video_id, query, **kwargs):
25 query = {**query, 'X-UA': self._X_UA.format(uuid=uuid.uuid4())}
26 return self._download_json(url, video_id, query=query, **kwargs)['data']
28 def _extract_video(self, video_id):
29 video_data = self._get_api(self._VIDEO_API, video_id, query={'video_ids': video_id})['list'][0]
31 # h265 playlist contains both h265 and h264 formats
32 video_url = traverse_obj(video_data, ('play_url', ('url_h265', 'url'), {url_or_none}, any))
33 formats = self._extract_m3u8_formats(video_url, video_id, fatal=False)
34 for fmt in formats:
35 if re.search(r'^(hev|hvc|hvt)\d', fmt.get('vcodec', '')):
36 fmt['format_id'] = join_nonempty(fmt.get('format_id'), 'h265', delim='_')
38 return {
39 'id': str(video_id),
40 'formats': formats,
41 **traverse_obj(video_data, ({
42 'duration': ('info', 'duration', {int_or_none}),
43 'thumbnail': ('thumbnail', ('original_url', 'url'), {url_or_none}),
44 }), get_all=False),
47 def _real_extract(self, url):
48 video_id = self._match_id(url)
49 query = {self._INFO_QUERY_KEY: video_id}
51 data = traverse_obj(
52 self._get_api(self._INFO_API, video_id, query=query), self._DATA_PATH)
54 metainfo = traverse_obj(data, self._META_PATH)
55 entries = [{
56 **metainfo,
57 **self._extract_video(id_),
58 } for id_ in set(traverse_obj(data, self._ID_PATH))]
60 return self.playlist_result(entries, **metainfo, id=video_id)
63 class TapTapMomentIE(TapTapBaseIE):
64 _VALID_URL = r'https?://www\.taptap\.cn/moment/(?P<id>\d+)'
65 _INFO_API = 'https://www.taptap.cn/webapiv2/moment/v3/detail'
66 _ID_PATH = ('moment', 'topic', (('videos', ...), 'pin_video'), 'video_id')
67 _META_PATH = ('moment', {
68 'timestamp': ('created_time', {int_or_none}),
69 'modified_timestamp': ('edited_time', {int_or_none}),
70 'uploader': ('author', 'user', 'name', {str}),
71 'uploader_id': ('author', 'user', 'id', {int}, {str_or_none}),
72 'title': ('topic', 'title', {str}),
73 'description': ('topic', 'summary', {str}),
75 _TESTS = [{
76 'url': 'https://www.taptap.cn/moment/194618230982052443',
77 'info_dict': {
78 'id': '194618230982052443',
79 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
80 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
81 'timestamp': 1633453402,
82 'upload_date': '20211005',
83 'modified_timestamp': 1633453402,
84 'modified_date': '20211005',
85 'uploader': '乌酱',
86 'uploader_id': '532896',
88 'playlist_count': 1,
89 'playlist': [{
90 'info_dict': {
91 'id': '2202584',
92 'ext': 'mp4',
93 'title': '《崩坏3》开放世界「后崩坏书」新篇章 于淹没之地仰视辰星',
94 'description': 'md5:cf66f7819d413641b8b28c8543f4ecda',
95 'duration': 66,
96 'timestamp': 1633453402,
97 'upload_date': '20211005',
98 'modified_timestamp': 1633453402,
99 'modified_date': '20211005',
100 'uploader': '乌酱',
101 'uploader_id': '532896',
102 'thumbnail': r're:^https?://.*\.(png|jpg)',
105 'params': {'skip_download': 'm3u8'},
106 }, {
107 'url': 'https://www.taptap.cn/moment/521630629209573493',
108 'info_dict': {
109 'id': '521630629209573493',
110 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
111 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
112 'timestamp': 1711425600,
113 'upload_date': '20240326',
114 'modified_timestamp': 1711425600,
115 'modified_date': '20240326',
116 'uploader': '崩坏:星穹铁道',
117 'uploader_id': '414732580',
119 'playlist_count': 1,
120 'playlist': [{
121 'info_dict': {
122 'id': '4006511',
123 'ext': 'mp4',
124 'title': '《崩坏:星穹铁道》黄泉角色PV——「你的颜色」',
125 'description': 'md5:2c81245da864428c904d53ae4ad2182b',
126 'duration': 173,
127 'timestamp': 1711425600,
128 'upload_date': '20240326',
129 'modified_timestamp': 1711425600,
130 'modified_date': '20240326',
131 'uploader': '崩坏:星穹铁道',
132 'uploader_id': '414732580',
133 'thumbnail': r're:^https?://.*\.(png|jpg)',
136 'params': {'skip_download': 'm3u8'},
137 }, {
138 'url': 'https://www.taptap.cn/moment/540493587511511299',
139 'playlist_count': 2,
140 'info_dict': {
141 'id': '540493587511511299',
142 'title': '中式民俗解谜《纸嫁衣7》、新系列《纸不语》公布!',
143 'description': 'md5:d60842350e686ddb242291ddfb8e39c9',
144 'timestamp': 1715920200,
145 'upload_date': '20240517',
146 'modified_timestamp': 1715942225,
147 'modified_date': '20240517',
148 'uploader': 'TapTap 编辑',
149 'uploader_id': '7159244',
151 'params': {'skip_download': 'm3u8'},
155 class TapTapAppIE(TapTapBaseIE):
156 _VALID_URL = r'https?://www\.taptap\.cn/app/(?P<id>\d+)'
157 _INFO_API = 'https://www.taptap.cn/webapiv2/app/v4/detail'
158 _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
159 _META_PATH = {
160 'title': ('title', {str}),
161 'description': ('description', 'text', {str}, {clean_html}),
163 _TESTS = [{
164 'url': 'https://www.taptap.cn/app/168332',
165 'info_dict': {
166 'id': '168332',
167 'title': '原神',
168 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
170 'playlist_count': 2,
171 'playlist': [{
172 'info_dict': {
173 'id': '4058443',
174 'ext': 'mp4',
175 'title': '原神',
176 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
177 'duration': 26,
178 'thumbnail': r're:^https?://.*\.(png|jpg)',
180 }, {
181 'info_dict': {
182 'id': '4058462',
183 'ext': 'mp4',
184 'title': '原神',
185 'description': 'md5:e345f39a5fea5de2a46923f70d5f76ab',
186 'duration': 295,
187 'thumbnail': r're:^https?://.*\.(png|jpg)',
190 'params': {'skip_download': 'm3u8'},
194 class TapTapIntlBase(TapTapBaseIE):
195 _X_UA = 'V=1&PN=WebAppIntl2&LANG=zh_TW&VN_CODE=115&VN=0.1.0&LOC=CN&PLT=PC&DS=Android&UID={uuid}&CURR=&DT=PC&OS=Windows&OSV=NT%208.0.0'
196 _VIDEO_API = 'https://www.taptap.io/webapiv2/video-resource/v1/multi-get'
199 class TapTapAppIntlIE(TapTapIntlBase):
200 _VALID_URL = r'https?://www\.taptap\.io/app/(?P<id>\d+)'
201 _INFO_API = 'https://www.taptap.io/webapiv2/i/app/v5/detail'
202 _DATA_PATH = 'app'
203 _ID_PATH = (('app_videos', 'videos'), ..., 'video_id')
204 _META_PATH = {
205 'title': ('title', {str}),
206 'description': ('description', 'text', {str}, {clean_html}),
208 _TESTS = [{
209 'url': 'https://www.taptap.io/app/233287',
210 'info_dict': {
211 'id': '233287',
212 'title': '《虹彩六號 M》',
213 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
215 'playlist_count': 1,
216 'playlist': [{
217 'info_dict': {
218 'id': '2149708997',
219 'ext': 'mp4',
220 'title': '《虹彩六號 M》',
221 'description': 'md5:418285f9c15347fc3cf3e3a3c649f182',
222 'duration': 78,
223 'thumbnail': r're:^https?://.*\.(png|jpg)',
226 'params': {'skip_download': 'm3u8'},
230 class TapTapPostIntlIE(TapTapIntlBase):
231 _VALID_URL = r'https?://www\.taptap\.io/post/(?P<id>\d+)'
232 _INFO_API = 'https://www.taptap.io/webapiv2/creation/post/v1/detail'
233 _INFO_QUERY_KEY = 'id_str'
234 _DATA_PATH = 'post'
235 _ID_PATH = ((('videos', ...), 'pin_video'), 'video_id')
236 _META_PATH = {
237 'timestamp': ('published_time', {int_or_none}),
238 'modified_timestamp': ('edited_time', {int_or_none}),
239 'uploader': ('user', 'name', {str}),
240 'uploader_id': ('user', 'id', {int}, {str_or_none}),
241 'title': ('title', {str}),
242 'description': ('list_fields', 'summary', {str}),
244 _TESTS = [{
245 'url': 'https://www.taptap.io/post/571785',
246 'info_dict': {
247 'id': '571785',
248 'title': 'Arknights x Rainbow Six Siege | Event PV',
249 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
250 'timestamp': 1614664951,
251 'upload_date': '20210302',
252 'modified_timestamp': 1614664951,
253 'modified_date': '20210302',
254 'uploader': 'TapTap Editor',
255 'uploader_id': '80224473',
257 'playlist_count': 1,
258 'playlist': [{
259 'info_dict': {
260 'id': '2149491903',
261 'ext': 'mp4',
262 'title': 'Arknights x Rainbow Six Siege | Event PV',
263 'description': 'md5:f7717c13f6d3108e22db7303e6690bf7',
264 'duration': 122,
265 'timestamp': 1614664951,
266 'upload_date': '20210302',
267 'modified_timestamp': 1614664951,
268 'modified_date': '20210302',
269 'uploader': 'TapTap Editor',
270 'uploader_id': '80224473',
271 'thumbnail': r're:^https?://.*\.(png|jpg)',
274 'params': {'skip_download': 'm3u8'},