[ie/youtube] Fix `uploader_id` extraction (#11818)
[yt-dlp.git] / yt_dlp / extractor / skeb.py
blobbc5ec3da7ff437d61ceab59b78d0c8078865f601
1 from .common import InfoExtractor
2 from ..utils import ExtractorError, determine_ext, parse_qs, traverse_obj
5 class SkebIE(InfoExtractor):
6 _VALID_URL = r'https?://skeb\.jp/@[^/]+/works/(?P<id>\d+)'
8 _TESTS = [{
9 'url': 'https://skeb.jp/@riiru_wm/works/10',
10 'info_dict': {
11 'id': '466853',
12 'title': '内容はおまかせします! by 姫ノ森りぃる@一周年',
13 'description': 'md5:1ec50901efc3437cfbfe3790468d532d',
14 'uploader': '姫ノ森りぃる@一周年',
15 'uploader_id': 'riiru_wm',
16 'age_limit': 0,
17 'tags': [],
18 'url': r're:https://skeb.+',
19 'thumbnail': r're:https://skeb.+',
20 'subtitles': {
21 'jpn': [{
22 'url': r're:https://skeb.+',
23 'ext': 'vtt',
24 }],
26 'width': 720,
27 'height': 405,
28 'duration': 313,
29 'fps': 30,
30 'ext': 'mp4',
32 }, {
33 'url': 'https://skeb.jp/@furukawa_nob/works/3',
34 'info_dict': {
35 'id': '489408',
36 'title': 'いつもお世話になってお... by 古川ノブ@音楽とVlo...',
37 'description': 'md5:5adc2e41d06d33b558bf7b1faeb7b9c2',
38 'uploader': '古川ノブ@音楽とVlogのVtuber',
39 'uploader_id': 'furukawa_nob',
40 'age_limit': 0,
41 'tags': [
42 'よろしく', '大丈夫', 'お願い', 'でした',
43 '是非', 'O', 'バー', '遊び', 'おはよう',
44 'オーバ', 'ボイス',
46 'url': r're:https://skeb.+',
47 'thumbnail': r're:https://skeb.+',
48 'subtitles': {
49 'jpn': [{
50 'url': r're:https://skeb.+',
51 'ext': 'vtt',
52 }],
54 'duration': 98,
55 'ext': 'mp3',
56 'vcodec': 'none',
57 'abr': 128,
59 }, {
60 'url': 'https://skeb.jp/@mollowmollow/works/6',
61 'info_dict': {
62 'id': '6',
63 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
64 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
65 '_type': 'playlist',
66 'entries': [{
67 'id': '486430',
68 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
69 'description': 'md5:aa6cbf2ba320b50bce219632de195f07',
70 }, {
71 'id': '486431',
72 'title': 'ヒロ。\n\n私のキャラク... by 諸々',
73 }],
77 def _real_extract(self, url):
78 video_id = self._match_id(url)
79 nuxt_data = self._search_nuxt_data(self._download_webpage(url, video_id), video_id)
81 parent = {
82 'id': video_id,
83 'title': nuxt_data.get('title'),
84 'description': nuxt_data.get('description'),
85 'uploader': traverse_obj(nuxt_data, ('creator', 'name')),
86 'uploader_id': traverse_obj(nuxt_data, ('creator', 'screen_name')),
87 'age_limit': 18 if nuxt_data.get('nsfw') else 0,
88 'tags': nuxt_data.get('tag_list'),
91 entries = []
92 for item in nuxt_data.get('previews') or []:
93 vid_url = item.get('url')
94 given_ext = traverse_obj(item, ('information', 'extension'))
95 preview_ext = determine_ext(vid_url, default_ext=None)
96 if not preview_ext:
97 content_disposition = parse_qs(vid_url)['response-content-disposition'][0]
98 preview_ext = self._search_regex(
99 r'filename="[^"]+\.([^\.]+?)"', content_disposition,
100 'preview file extension', fatal=False, group=1)
101 if preview_ext not in ('mp4', 'mp3'):
102 continue
103 if not vid_url or not item.get('id'):
104 continue
105 width, height = traverse_obj(item, ('information', 'width')), traverse_obj(item, ('information', 'height'))
106 if width is not None and height is not None:
107 # the longest side is at most 720px for non-client viewers
108 max_size = max(width, height)
109 width, height = (x * 720 // max_size for x in (width, height))
110 entries.append({
111 **parent,
112 'id': str(item['id']),
113 'url': vid_url,
114 'thumbnail': item.get('poster_url'),
115 'subtitles': {
116 'jpn': [{
117 'url': item.get('vtt_url'),
118 'ext': 'vtt',
120 } if item.get('vtt_url') else None,
121 'width': width,
122 'height': height,
123 'duration': traverse_obj(item, ('information', 'duration')),
124 'fps': traverse_obj(item, ('information', 'frame_rate')),
125 'ext': preview_ext or given_ext,
126 'vcodec': 'none' if preview_ext == 'mp3' else None,
127 # you'll always get 128kbps MP3 for non-client viewers
128 'abr': 128 if preview_ext == 'mp3' else None,
131 if not entries:
132 raise ExtractorError('No video/audio attachment found in this commission.', expected=True)
133 elif len(entries) == 1:
134 return entries[0]
135 else:
136 parent.update({
137 '_type': 'playlist',
138 'entries': entries,
140 return parent