3 from .common
import InfoExtractor
11 class CCTVIE(InfoExtractor
):
13 _VALID_URL
= r
'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P<id>[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)'
15 # fo.addVariable("videoCenterId","id")
16 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml',
17 'md5': 'd61ec00a493e09da810bf406a078f691',
19 'id': '5ecdbeab623f4973b40ff25f18b174e8',
21 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)',
22 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95',
24 'uploader': 'songjunjie',
25 'timestamp': 1455279956,
26 'upload_date': '20160212',
30 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml',
32 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae',
34 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)',
35 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。',
38 'timestamp': 1454677291,
39 'upload_date': '20160205',
42 'skip_download': True,
46 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml',
48 'id': '4bb9bb4db7a6471ba85fdeda5af0381e',
50 'title': 'NHnews008 ANNUAL POLITICAL SEASON',
51 'description': 'Four Comprehensives',
53 'uploader': 'zhangyunlei',
54 'timestamp': 1425385521,
55 'upload_date': '20150303',
58 'skip_download': True,
62 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml',
64 'id': 'b15f009ff45c43968b9af583fc2e04b2',
66 'title': 'Путь,усыпанный космеями Серия 1',
67 'description': 'Путь, усыпанный космеями',
70 'timestamp': 1477479241,
71 'upload_date': '20161026',
74 'skip_download': True,
77 # var initMyAray = 'id'
78 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml',
80 'id': 'a194cfa7f18c426b823d876668325946',
82 'title': '小泽征尔音乐塾 音乐梦想无国界',
84 'timestamp': 1369248264,
85 'upload_date': '20130522',
88 'skip_download': True,
92 'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
94 'id': '5c846c0518444308ba32c4159df3b3e0',
96 'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集:风物长宜放眼量',
97 'uploader': 'yangjuan',
98 'timestamp': 1708554940,
99 'upload_date': '20240221',
102 'skip_download': True,
106 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
108 'id': 'a8606119a4884588a79d81c02abecc16',
110 'title': '来自维也纳的新年贺礼',
111 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7',
114 'timestamp': 1482942419,
115 'upload_date': '20161228',
118 'skip_download': True,
120 'expected_warnings': ['Failed to download m3u8 information'],
122 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
123 'only_matching': True,
125 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44',
126 'only_matching': True,
128 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
129 'only_matching': True,
131 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
132 'only_matching': True,
134 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
135 'only_matching': True,
138 def _real_extract(self
, url
):
139 video_id
= self
._match
_id
(url
)
140 webpage
= self
._download
_webpage
(url
, video_id
)
142 video_id
= self
._search
_regex
(
143 [r
'var\s+guid\s*=\s*["\']([\da
-fA
-F
]+)',
144 r'videoCenterId(?
:["\']\s*,|:)\s*["\']([\da
-fA
-F
]+)',
145 r'changePlayer\s
*\
(\s
*["\']([\da-fA-F]+)',
146 r'load[Vv]ideo\s*\(\s*["\']([\da
-fA
-F
]+)',
147 r'var\s
+initMyAray\s
*=\s
*["\']([\da-fA-F]+)',
148 r'var\s+ids\s*=\s*\[["\']([\da
-fA
-F
]+)'],
151 data = self._download_json(
152 'http
://vdn
.apps
.cntv
.cn
/api
/getHttpVideoInfo
.do
', video_id,
161 title = data['title
']
165 video = data.get('video
')
166 if isinstance(video, dict):
167 for quality, chapters_key in enumerate(('lowChapters
', 'chapters
')):
169 video, lambda x: x[chapters_key][0]['url
'], str)
179 hls_url = try_get(data, lambda x: x['hls_url
'], str)
181 hls_url = re.sub(r'maxbr
=\d
+&?
', '', hls_url)
182 formats.extend(self._extract_m3u8_formats(
183 hls_url, video_id, 'mp4
', entry_protocol='m3u8_native
',
184 m3u8_id='hls
', fatal=False))
186 uploader = data.get('editer_name
')
187 description = self._html_search_meta(
188 'description
', webpage, default=None)
189 timestamp = unified_timestamp(data.get('f_pgmtime
'))
190 duration = float_or_none(try_get(video, lambda x: x['totalLength
']))
195 'description
': description,
196 'uploader
': uploader,
197 'timestamp
': timestamp,
198 'duration
': duration,