1 from .common
import InfoExtractor
8 class CGTNIE(InfoExtractor
):
9 _VALID_URL
= r
'https?://news\.cgtn\.com/news/[0-9]{4}-[0-9]{2}-[0-9]{2}/[a-zA-Z0-9-]+-(?P<id>[a-zA-Z0-9-]+)/index\.html'
12 'url': 'https://news.cgtn.com/news/2021-03-09/Up-and-Out-of-Poverty-Ep-1-A-solemn-promise-YuOUaOzGQU/index.html',
16 'title': 'Up and Out of Poverty Ep. 1: A solemn promise',
17 'thumbnail': r
're:^https?://.*\.jpg$',
18 'timestamp': 1615295940,
19 'upload_date': '20210309',
20 'categories': ['Video'],
23 'skip_download': True,
26 'url': 'https://news.cgtn.com/news/2021-06-06/China-Indonesia-vow-to-further-deepen-maritime-cooperation-10REvJCewCY/index.html',
30 'title': 'China, Indonesia vow to further deepen maritime cooperation',
31 'thumbnail': r
're:^https?://.*\.png$',
32 'description': 'China and Indonesia vowed to upgrade their cooperation into the maritime sector and also for political security, economy, and cultural and people-to-people exchanges.',
34 'categories': ['China'],
35 'timestamp': 1622950200,
36 'upload_date': '20210606',
39 'skip_download': False,
44 def _real_extract(self
, url
):
45 video_id
= self
._match
_id
(url
)
46 webpage
= self
._download
_webpage
(url
, video_id
)
48 download_url
= self
._html
_search
_regex
(r
'data-video ="(?P<url>.+m3u8)"', webpage
, 'download_url')
49 datetime_str
= self
._html
_search
_regex
(
50 r
'<span class="date">\s*(.+?)\s*</span>', webpage
, 'datetime_str', fatal
=False)
51 category
= self
._html
_search
_regex
(
52 r
'<span class="section">\s*(.+?)\s*</span>', webpage
, 'category', fatal
=False)
53 author
= self
._search
_regex
(
54 r
'<div class="news-author-name">\s*(.+?)\s*</div>', webpage
, 'author', default
=None)
58 'title': self
._og
_search
_title
(webpage
),
59 'description': self
._og
_search
_description
(webpage
, default
=None),
60 'thumbnail': self
._og
_search
_thumbnail
(webpage
),
61 'formats': self
._extract
_m
3u8_formats
(download_url
, video_id
, 'mp4', 'm3u8_native', m3u8_id
='hls'),
62 'categories': [category
] if category
else None,
63 'creators': [author
] if author
else None,
64 'timestamp': try_get(unified_timestamp(datetime_str
), lambda x
: x
- 8 * 3600),