5 from .common
import InfoExtractor
6 from ..networking
.exceptions
import HTTPError
18 class MGTVIE(InfoExtractor
):
19 _VALID_URL
= r
'https?://(?:w(?:ww)?\.)?mgtv\.com/(v|b)/(?:[^/]+/)*(?P<id>\d+)\.html'
24 'url': 'http://www.mgtv.com/v/1/290525/f/3116640.html',
29 'description': '我是歌手第四季双年巅峰会',
31 'thumbnail': r
're:^https?://.*\.jpg$',
33 'params': {'skip_download': 'm3u8'},
35 'url': 'https://w.mgtv.com/b/427837/15588271.html',
39 'title': '春日迟迟再出发 沉浸版第1期:陆莹结婚半年查出肾炎被离婚 吴雅婷把一半票根退给前夫',
40 'description': 'md5:a7a05a05b1aa87bd50cae619b19bbca6',
41 'thumbnail': r
're:^https?://.+\.jpg',
44 'params': {'skip_download': 'm3u8'},
46 'url': 'https://w.mgtv.com/b/333652/7329822.html',
51 'description': 'md5:cd81be6499bafe32e4d143abd822bf9c',
52 'thumbnail': r
're:^https?://.+\.jpg',
55 'params': {'skip_download': 'm3u8'},
57 'url': 'https://w.mgtv.com/b/427837/15591647.html',
58 'only_matching': True,
60 'url': 'https://w.mgtv.com/b/388252/15634192.html?fpa=33318&fpos=4&lastp=ch_home',
61 'only_matching': True,
63 'url': 'http://www.mgtv.com/b/301817/3826653.html',
64 'only_matching': True,
66 'url': 'https://w.mgtv.com/b/301817/3826653.html',
67 'only_matching': True,
71 '标清': ('480p', '854x480'),
72 '高清': ('540p', '960x540'),
73 '超清': ('720p', '1280x720'),
74 '蓝光': ('1080p', '1920x1080'),
77 def _real_extract(self
, url
):
78 video_id
= self
._match
_id
(url
)
79 tk2
= base64
.urlsafe_b64encode(
80 f
'did={str(uuid.uuid4())}|pno=1030|ver=0.3.0301|clit={int(time.time())}'.encode())[::-1]
82 api_data
= self
._download
_json
(
83 'https://pcweb.api.mgtv.com/player/video', video_id
, query
={
87 }, headers
=self
.geo_verification_headers())['data']
88 except ExtractorError
as e
:
89 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 401:
90 error
= self
._parse
_json
(e
.cause
.response
.read().decode(), None)
91 if error
.get('code') == 40005:
92 self
.raise_geo_restricted(countries
=self
._GEO
_COUNTRIES
)
93 raise ExtractorError(error
['msg'], expected
=True)
96 stream_data
= self
._download
_json
(
97 'https://pcweb.api.mgtv.com/player/getSource', video_id
, query
={
99 'pm2': api_data
['atc']['pm2'],
100 'video_id': video_id
,
103 }, headers
=self
.geo_verification_headers())['data']
104 stream_domain
= traverse_obj(stream_data
, ('stream_domain', ..., {url_or_none}
), get_all
=False)
107 for idx
, stream
in enumerate(traverse_obj(stream_data
, ('stream', lambda _
, v
: v
['url']))):
108 stream_name
= traverse_obj(stream
, 'name', 'standardName', 'barName', expected_type
=str)
109 resolution
= traverse_obj(
110 self
._RESOLUTIONS
, (stream_name
, 1 if stream
.get('scale') == '16:9' else 0))
111 format_url
= traverse_obj(self
._download
_json
(
112 urljoin(stream_domain
, stream
['url']), video_id
, fatal
=False,
113 note
=f
'Downloading video info for format {resolution or stream_name}'),
114 ('info', {url_or_none}
))
117 tbr
= int_or_none(stream
.get('filebitrate') or self
._search
_regex
(
118 r
'_(\d+)_mp4/', format_url
, 'tbr', default
=None))
120 'format_id': str(tbr
or idx
),
124 'vcodec': stream
.get('videoFormat'),
125 'acodec': stream
.get('audioFormat'),
126 **parse_resolution(resolution
),
127 'protocol': 'm3u8_native',
131 'format_note': stream_name
,
137 **traverse_obj(api_data
, ('info', {
138 'title': ('title', {str.strip
}),
139 'description': ('desc', {str}
),
140 'duration': ('duration', {int_or_none}
),
141 'thumbnail': ('thumb', {url_or_none}
),
143 'subtitles': self
.extract_subtitles(video_id
, stream_domain
),
146 def _get_subtitles(self
, video_id
, domain
):
147 info
= self
._download
_json
(f
'https://pcweb.api.mgtv.com/video/title?videoId={video_id}',
148 video_id
, fatal
=False) or {}
150 for sub
in try_get(info
, lambda x
: x
['data']['title']) or []:
151 url_sub
= sub
.get('url')
154 locale
= sub
.get('captionSimpleName') or 'en'
155 sub
= self
._download
_json
(f
'{domain}{url_sub}', video_id
, fatal
=False,
156 note
=f
'Download subtitle for locale {sub.get("name")} ({locale})') or {}
157 sub_url
= url_or_none(sub
.get('info'))
160 subtitles
.setdefault(locale
.lower(), []).append({
162 'name': sub
.get('name'),