6 from .common
import InfoExtractor
7 from .openload
import PhantomJSwrapper
24 class DouyuBaseIE(InfoExtractor
):
25 def _download_cryptojs_md5(self
, video_id
):
27 # XXX: Do NOT use cdn.bootcdn.net; ref: https://sansec.io/research/polyfill-supply-chain-attack
28 'https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/rollups/md5.js',
29 'https://unpkg.com/cryptojslib@3.1.2/rollups/md5.js',
31 js_code
= self
._download
_webpage
(
32 url
, video_id
, note
='Downloading signing dependency', fatal
=False)
34 self
.cache
.store('douyu', 'crypto-js-md5', js_code
)
36 raise ExtractorError('Unable to download JS dependency (crypto-js/md5)')
38 def _get_cryptojs_md5(self
, video_id
):
39 return self
.cache
.load(
40 'douyu', 'crypto-js-md5', min_ver
='2024.07.04') or self
._download
_cryptojs
_md
5(video_id
)
42 def _calc_sign(self
, sign_func
, video_id
, a
):
44 c
= round(time
.time())
45 js_script
= f
'{self._get_cryptojs_md5(video_id)};{sign_func};console.log(ub98484234("{a}","{b}","{c}"))'
46 phantom
= PhantomJSwrapper(self
)
47 result
= phantom
.execute(js_script
, video_id
,
48 note
='Executing JS signing script').strip()
49 return {i
: v
[0] for i
, v
in urllib
.parse
.parse_qs(result
).items()}
51 def _search_js_sign_func(self
, webpage
, fatal
=True):
52 # The greedy look-behind ensures last possible script tag is matched
53 return self
._search
_regex
(
54 r
'(?:<script.*)?<script[^>]*>(.*?ub98484234.*?)</script>', webpage
, 'JS sign func', fatal
=fatal
)
57 class DouyuTVIE(DouyuBaseIE
):
59 _VALID_URL
= r
'https?://(?:www\.)?douyu(?:tv)?\.com/(topic/\w+\?rid=|(?:[^/]+/))*(?P<id>[A-Za-z0-9]+)'
61 'url': 'https://www.douyu.com/pigff',
64 'display_id': 'pigff',
66 'title': 're:^【PIGFF】.* [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
67 'description': r
'≥15级牌子看鱼吧置顶帖进粉丝vx群',
71 'live_status': 'is_live',
74 'skip_download': True,
77 'url': 'http://www.douyutv.com/85982',
80 'display_id': '85982',
82 'title': 're:^小漠从零单排记!——CSOL2躲猫猫 [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
83 'description': 'md5:746a2f7a253966a06755a912f0acc0d2',
84 'thumbnail': r
're:^https?://.*\.png',
85 'uploader': 'douyu小漠',
89 'skip_download': True,
91 'skip': 'Room not found',
93 'url': 'http://www.douyutv.com/17732',
96 'display_id': '17732',
98 'title': 're:^清晨醒脑!根本停不下来! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
99 'description': r
're:.*m7show@163\.com.*',
100 'thumbnail': r
're:^https?://.*\.png',
105 'skip_download': True,
108 'url': 'https://www.douyu.com/topic/ydxc?rid=6560603',
111 'display_id': '6560603',
113 'title': 're:^阿余:新年快乐恭喜发财! [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$',
114 'description': 're:.*直播时间.*',
115 'thumbnail': r
're:^https?://.*\.png',
116 'uploader': '阿涛皎月Carry',
117 'live_status': 'is_live',
120 'skip_download': True,
123 'url': 'http://www.douyu.com/xiaocang',
124 'only_matching': True,
127 'url': 'http://www.douyu.com/t/lpl',
128 'only_matching': True,
131 def _get_sign_func(self
, room_id
, video_id
):
132 return self
._download
_json
(
133 f
'https://www.douyu.com/swf_api/homeH5Enc?rids={room_id}', video_id
,
134 note
='Getting signing script')['data'][f
'room{room_id}']
136 def _extract_stream_formats(self
, stream_formats
):
138 for stream_info
in traverse_obj(stream_formats
, (..., 'data')):
139 stream_url
= urljoin(
140 traverse_obj(stream_info
, 'rtmp_url'), traverse_obj(stream_info
, 'rtmp_live'))
142 rate_id
= traverse_obj(stream_info
, ('rate', {int_or_none}
))
143 rate_info
= traverse_obj(stream_info
, ('multirates', lambda _
, v
: v
['rate'] == rate_id
), get_all
=False)
144 ext
= determine_ext(stream_url
)
147 'format_id': str_or_none(rate_id
),
148 'ext': 'mp4' if ext
== 'm3u8' else ext
,
149 'protocol': 'm3u8_native' if ext
== 'm3u8' else 'https',
150 'quality': rate_id
% -10000 if rate_id
is not None else None,
151 **traverse_obj(rate_info
, {
152 'format': ('name', {str_or_none}
),
153 'tbr': ('bit', {int_or_none}
),
158 def _real_extract(self
, url
):
159 video_id
= self
._match
_id
(url
)
161 webpage
= self
._download
_webpage
(url
, video_id
)
162 room_id
= self
._search
_regex
(r
'\$ROOM\.room_id\s*=\s*(\d+)', webpage
, 'room id')
164 if self
._search
_regex
(r
'"videoLoop"\s*:\s*(\d+)', webpage
, 'loop', default
='') == '1':
165 raise UserNotLive('The channel is auto-playing VODs', video_id
=video_id
)
166 if self
._search
_regex
(r
'\$ROOM\.show_status\s*=\s*(\d+)', webpage
, 'status', default
='') == '2':
167 raise UserNotLive(video_id
=video_id
)
169 # Grab metadata from API
173 'time': int(time
.time()),
175 params
['auth'] = hashlib
.md5(
176 f
'room/{room_id}?{urllib.parse.urlencode(params)}zNzMV1y4EMxOHS6I5WKm'.encode()).hexdigest()
177 room
= traverse_obj(self
._download
_json
(
178 f
'http://www.douyutv.com/api/v1/room/{room_id}', video_id
,
179 note
='Downloading room info', query
=params
, fatal
=False), 'data')
181 # 1 = live, 2 = offline
182 if traverse_obj(room
, 'show_status') == '2':
183 raise UserNotLive(video_id
=video_id
)
185 js_sign_func
= self
._search
_js
_sign
_func
(webpage
, fatal
=False) or self
._get
_sign
_func
(room_id
, video_id
)
188 **self
._calc
_sign
(js_sign_func
, video_id
, room_id
),
190 stream_formats
= [self
._download
_json
(
191 f
'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
192 video_id
, note
='Downloading livestream format',
193 data
=urlencode_postdata(form_data
))]
195 for rate_id
in traverse_obj(stream_formats
[0], ('data', 'multirates', ..., 'rate')):
196 if rate_id
!= traverse_obj(stream_formats
[0], ('data', 'rate')):
197 form_data
['rate'] = rate_id
198 stream_formats
.append(self
._download
_json
(
199 f
'https://www.douyu.com/lapi/live/getH5Play/{room_id}',
200 video_id
, note
=f
'Downloading livestream format {rate_id}',
201 data
=urlencode_postdata(form_data
)))
205 'formats': self
._extract
_stream
_formats
(stream_formats
),
207 **traverse_obj(room
, {
208 'display_id': ('url', {str}
, {lambda i
: i
[1:]}),
209 'title': ('room_name', {unescapeHTML}
),
210 'description': ('show_details', {str}
),
211 'uploader': ('nickname', {str}
),
212 'thumbnail': ('room_src', {url_or_none}
),
217 class DouyuShowIE(DouyuBaseIE
):
218 _VALID_URL
= r
'https?://v(?:mobile)?\.douyu\.com/show/(?P<id>[0-9a-zA-Z]+)'
221 'url': 'https://v.douyu.com/show/mPyq7oVNe5Yv1gLY',
223 'id': 'mPyq7oVNe5Yv1gLY',
225 'title': '四川人小时候的味道“蒜苗回锅肉”,传统菜不能丢,要常做来吃',
228 'uploader': '美食作家王刚V',
229 'uploader_id': 'OVAO4NVx1m7Q',
230 'timestamp': 1661850002,
231 'upload_date': '20220830',
233 'tags': ['美食', '美食综合'],
236 'url': 'https://vmobile.douyu.com/show/rjNBdvnVXNzvE2yw',
237 'only_matching': True,
253 'super': '1920x1080',
258 def _real_extract(self
, url
):
259 url
= url
.replace('vmobile.', 'v.')
260 video_id
= self
._match
_id
(url
)
262 webpage
= self
._download
_webpage
(url
, video_id
)
264 video_info
= self
._search
_json
(
265 r
'<script>\s*window\.\$DATA\s*=', webpage
,
266 'video info', video_id
, transform_source
=js_to_json
)
268 js_sign_func
= self
._search
_js
_sign
_func
(webpage
)
271 **self
._calc
_sign
(js_sign_func
, video_id
, video_info
['ROOM']['point_id']),
273 url_info
= self
._download
_json
(
274 'https://v.douyu.com/api/stream/getStreamUrl', video_id
,
275 data
=urlencode_postdata(form_data
), note
='Downloading video formats')
278 for name
, url
in traverse_obj(url_info
, ('data', 'thumb_video', {dict.items
}, ...)):
279 video_url
= traverse_obj(url
, ('url', {url_or_none}
))
281 ext
= determine_ext(video_url
)
283 'format': self
._FORMATS
.get(name
),
286 'quality': self
._QUALITIES
.get(name
),
287 'ext': 'mp4' if ext
== 'm3u8' else ext
,
288 'protocol': 'm3u8_native' if ext
== 'm3u8' else 'https',
289 **parse_resolution(self
._RESOLUTIONS
.get(name
)),
293 f
'"{self._FORMATS.get(name, name)}" format may require logging in. {self._login_hint()}')
298 **traverse_obj(video_info
, ('DATA', {
299 'title': ('content', 'title', {str}
),
300 'uploader': ('content', 'author', {str}
),
301 'uploader_id': ('content', 'up_id', {str_or_none}
),
302 'duration': ('content', 'video_duration', {int_or_none}
),
303 'thumbnail': ('content', 'video_pic', {url_or_none}
),
304 'timestamp': ('content', 'create_time', {int_or_none}
),
305 'view_count': ('content', 'view_num', {int_or_none}
),
306 'tags': ('videoTag', ..., 'tagName', {str}
),