Release 2024.12.03
[yt-dlp3.git] / yt_dlp / extractor / cctv.py
blob18c080df1bc0efc486c76ff17db71afdcfc7bf30
1 import re
3 from .common import InfoExtractor
4 from ..utils import (
5 float_or_none,
6 try_get,
7 unified_timestamp,
11 class CCTVIE(InfoExtractor):
12 IE_DESC = '央视网'
13 _VALID_URL = r'https?://(?:(?:[^/]+)\.(?:cntv|cctv)\.(?:com|cn)|(?:www\.)?ncpa-classic\.com)/(?:[^/]+/)*?(?P<id>[^/?#&]+?)(?:/index)?(?:\.s?html|[?#&]|$)'
14 _TESTS = [{
15 # fo.addVariable("videoCenterId","id")
16 'url': 'http://sports.cntv.cn/2016/02/12/ARTIaBRxv4rTT1yWf1frW2wi160212.shtml',
17 'md5': 'd61ec00a493e09da810bf406a078f691',
18 'info_dict': {
19 'id': '5ecdbeab623f4973b40ff25f18b174e8',
20 'ext': 'mp4',
21 'title': '[NBA]二少联手砍下46分 雷霆主场击败鹈鹕(快讯)',
22 'description': 'md5:7e14a5328dc5eb3d1cd6afbbe0574e95',
23 'duration': 98,
24 'uploader': 'songjunjie',
25 'timestamp': 1455279956,
26 'upload_date': '20160212',
28 }, {
29 # var guid = "id"
30 'url': 'http://tv.cctv.com/2016/02/05/VIDEUS7apq3lKrHG9Dncm03B160205.shtml',
31 'info_dict': {
32 'id': 'efc5d49e5b3b4ab2b34f3a502b73d3ae',
33 'ext': 'mp4',
34 'title': '[赛车]“车王”舒马赫恢复情况成谜(快讯)',
35 'description': '2月4日,蒙特泽莫罗透露了关于“车王”舒马赫恢复情况,但情况是否属实遭到了质疑。',
36 'duration': 37,
37 'uploader': 'shujun',
38 'timestamp': 1454677291,
39 'upload_date': '20160205',
41 'params': {
42 'skip_download': True,
44 }, {
45 # changePlayer('id')
46 'url': 'http://english.cntv.cn/special/four_comprehensives/index.shtml',
47 'info_dict': {
48 'id': '4bb9bb4db7a6471ba85fdeda5af0381e',
49 'ext': 'mp4',
50 'title': 'NHnews008 ANNUAL POLITICAL SEASON',
51 'description': 'Four Comprehensives',
52 'duration': 60,
53 'uploader': 'zhangyunlei',
54 'timestamp': 1425385521,
55 'upload_date': '20150303',
57 'params': {
58 'skip_download': True,
60 }, {
61 # loadvideo('id')
62 'url': 'http://cctv.cntv.cn/lm/tvseries_russian/yilugesanghua/index.shtml',
63 'info_dict': {
64 'id': 'b15f009ff45c43968b9af583fc2e04b2',
65 'ext': 'mp4',
66 'title': 'Путь,усыпанный космеями Серия 1',
67 'description': 'Путь, усыпанный космеями',
68 'duration': 2645,
69 'uploader': 'renxue',
70 'timestamp': 1477479241,
71 'upload_date': '20161026',
73 'params': {
74 'skip_download': True,
76 }, {
77 # var initMyAray = 'id'
78 'url': 'http://www.ncpa-classic.com/2013/05/22/VIDE1369219508996867.shtml',
79 'info_dict': {
80 'id': 'a194cfa7f18c426b823d876668325946',
81 'ext': 'mp4',
82 'title': '小泽征尔音乐塾 音乐梦想无国界',
83 'duration': 2173,
84 'timestamp': 1369248264,
85 'upload_date': '20130522',
87 'params': {
88 'skip_download': True,
90 }, {
91 # videoCenterId: "id"
92 'url': 'http://news.cctv.com/2024/02/21/ARTIcU5tKIOIF2myEGCATkLo240221.shtml',
93 'info_dict': {
94 'id': '5c846c0518444308ba32c4159df3b3e0',
95 'ext': 'mp4',
96 'title': '《平“语”近人——习近平喜欢的典故》第三季 第5集:风物长宜放眼量',
97 'uploader': 'yangjuan',
98 'timestamp': 1708554940,
99 'upload_date': '20240221',
101 'params': {
102 'skip_download': True,
104 }, {
105 # var ids = ["id"]
106 'url': 'http://www.ncpa-classic.com/clt/more/416/index.shtml',
107 'info_dict': {
108 'id': 'a8606119a4884588a79d81c02abecc16',
109 'ext': 'mp3',
110 'title': '来自维也纳的新年贺礼',
111 'description': 'md5:f13764ae8dd484e84dd4b39d5bcba2a7',
112 'duration': 1578,
113 'uploader': 'djy',
114 'timestamp': 1482942419,
115 'upload_date': '20161228',
117 'params': {
118 'skip_download': True,
120 'expected_warnings': ['Failed to download m3u8 information'],
121 }, {
122 'url': 'http://ent.cntv.cn/2016/01/18/ARTIjprSSJH8DryTVr5Bx8Wb160118.shtml',
123 'only_matching': True,
124 }, {
125 'url': 'http://tv.cntv.cn/video/C39296/e0210d949f113ddfb38d31f00a4e5c44',
126 'only_matching': True,
127 }, {
128 'url': 'http://english.cntv.cn/2016/09/03/VIDEhnkB5y9AgHyIEVphCEz1160903.shtml',
129 'only_matching': True,
130 }, {
131 'url': 'http://tv.cctv.com/2016/09/07/VIDE5C1FnlX5bUywlrjhxXOV160907.shtml',
132 'only_matching': True,
133 }, {
134 'url': 'http://tv.cntv.cn/video/C39296/95cfac44cabd3ddc4a9438780a4e5c44',
135 'only_matching': True,
138 def _real_extract(self, url):
139 video_id = self._match_id(url)
140 webpage = self._download_webpage(url, video_id)
142 video_id = self._search_regex(
143 [r'var\s+guid\s*=\s*["\']([\da-fA-F]+)',
144 r'videoCenterId(?:["\']\s*,|:)\s*["\']([\da-fA-F]+)',
145 r'changePlayer\s*\(\s*["\']([\da-fA-F]+)',
146 r'load[Vv]ideo\s*\(\s*["\']([\da-fA-F]+)',
147 r'var\s+initMyAray\s*=\s*["\']([\da-fA-F]+)',
148 r'var\s+ids\s*=\s*\[["\']([\da-fA-F]+)'],
149 webpage, 'video id')
151 data = self._download_json(
152 'http://vdn.apps.cntv.cn/api/getHttpVideoInfo.do', video_id,
153 query={
154 'pid': video_id,
155 'url': url,
156 'idl': 32,
157 'idlr': 32,
158 'modifyed': 'false',
161 title = data['title']
163 formats = []
165 video = data.get('video')
166 if isinstance(video, dict):
167 for quality, chapters_key in enumerate(('lowChapters', 'chapters')):
168 video_url = try_get(
169 video, lambda x: x[chapters_key][0]['url'], str)
170 if video_url:
171 formats.append({
172 'url': video_url,
173 'format_id': 'http',
174 'quality': quality,
175 # Sample clip
176 'preference': -10,
179 hls_url = try_get(data, lambda x: x['hls_url'], str)
180 if hls_url:
181 hls_url = re.sub(r'maxbr=\d+&?', '', hls_url)
182 formats.extend(self._extract_m3u8_formats(
183 hls_url, video_id, 'mp4', entry_protocol='m3u8_native',
184 m3u8_id='hls', fatal=False))
186 uploader = data.get('editer_name')
187 description = self._html_search_meta(
188 'description', webpage, default=None)
189 timestamp = unified_timestamp(data.get('f_pgmtime'))
190 duration = float_or_none(try_get(video, lambda x: x['totalLength']))
192 return {
193 'id': video_id,
194 'title': title,
195 'description': description,
196 'uploader': uploader,
197 'timestamp': timestamp,
198 'duration': duration,
199 'formats': formats,