3 from .common
import InfoExtractor
4 from ..utils
import ExtractorError
7 class NewsPicksIE(InfoExtractor
):
8 _VALID_URL
= r
'https?://newspicks\.com/movie-series/(?P<channel_id>\d+)\?movieId=(?P<id>\d+)'
11 'url': 'https://newspicks.com/movie-series/11?movieId=1813',
14 'title': '日本の課題を破壊せよ【ゲスト:成田悠輔】',
15 'description': 'md5:09397aad46d6ded6487ff13f138acadf',
16 'channel': 'HORIE ONE',
18 'release_date': '20220117',
19 'thumbnail': r
're:https://.+jpg',
24 def _real_extract(self
, url
):
25 video_id
, channel_id
= self
._match
_valid
_url
(url
).group('id', 'channel_id')
26 webpage
= self
._download
_webpage
(url
, video_id
)
27 entries
= self
._parse
_html
5_media
_entries
(
28 url
, webpage
.replace('movie-for-pc', 'movie'), video_id
, 'hls')
30 raise ExtractorError('No HTML5 media elements found')
33 title
= self
._html
_search
_meta
('og:title', webpage
, fatal
=False)
34 description
= self
._html
_search
_meta
(
35 ('og:description', 'twitter:title'), webpage
, fatal
=False)
36 channel
= self
._html
_search
_regex
(
37 r
'value="11".+?<div\s+class="title">(.+?)</div', webpage
, 'channel name', fatal
=False)
38 if not title
or not channel
:
39 title
, channel
= re
.split(r
'\s*|\s*', self
._html
_extract
_title
(webpage
))
41 release_date
= self
._search
_regex
(
42 r
'<span\s+class="on-air-date">\s*(\d+)年(\d+)月(\d+)日\s*</span>',
43 webpage
, 'release date', fatal
=False, group
=(1, 2, 3))
48 'description': description
,
50 'channel_id': channel_id
,
51 'release_date': ('%04d%02d%02d' % tuple(map(int, release_date
))) if release_date
else None,