5 from .common
import InfoExtractor
13 class JStreamIE(InfoExtractor
):
14 # group "id" only exists for compliance, not directly used in requests
15 # also all components are mandatory
16 _VALID_URL
= r
'jstream:(?P<host>www\d+):(?P<id>(?P<publisher>[a-z0-9]+):(?P<mid>\d+))'
19 'url': 'jstream:www50:eqd638pvwx:752',
21 'id': 'eqd638pvwx:752',
23 'title': '阪神淡路大震災 激震の記録2020年版 解説動画',
25 'thumbnail': r
're:https?://eqd638pvwx\.eq\.webcdn\.stream\.ne\.jp/.+\.jpg',
29 def _parse_jsonp(self
, callback
, string
, video_id
):
30 return self
._search
_json
(rf
'\s*{re.escape(callback)}\s*\(', string
, callback
, video_id
)
32 def _find_formats(self
, video_id
, movie_list_hls
, host
, publisher
, subtitles
):
33 for value
in movie_list_hls
:
34 text
= value
.get('text') or ''
35 if not text
.startswith('auto'):
37 m3u8_id
= remove_start(remove_start(text
, 'auto'), '_') or None
38 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
39 f
'https://{publisher}.eq.webcdn.stream.ne.jp/{host}/{publisher}/jmc_pub/{value.get("url")}', video_id
, 'mp4', m3u8_id
=m3u8_id
)
40 self
._merge
_subtitles
(subs
, target
=subtitles
)
43 def _real_extract(self
, url
):
44 host
, publisher
, mid
, video_id
= self
._match
_valid
_url
(url
).group('host', 'publisher', 'mid', 'id')
45 video_info_jsonp
= self
._download
_webpage
(
46 f
'https://{publisher}.eq.webcdn.stream.ne.jp/{host}/{publisher}/jmc_pub/eq_meta/v1/{mid}.jsonp',
47 video_id
, 'Requesting video info')
48 video_info
= self
._parse
_jsonp
('metaDataResult', video_info_jsonp
, video_id
)['movie']
50 formats
= list(self
._find
_formats
(video_id
, video_info
.get('movie_list_hls'), host
, publisher
, subtitles
))
51 self
._remove
_duplicate
_formats
(formats
)
54 'title': video_info
.get('title'),
55 'duration': float_or_none(video_info
.get('duration')),
56 'thumbnail': video_info
.get('thumbnail_url'),
58 'subtitles': subtitles
,
62 def _extract_embed_urls(cls
, url
, webpage
):
63 # check for eligiblity of webpage
64 # https://support.eq.stream.co.jp/hc/ja/articles/115008388147-%E3%83%97%E3%83%AC%E3%82%A4%E3%83%A4%E3%83%BCAPI%E3%81%AE%E3%82%B5%E3%83%B3%E3%83%97%E3%83%AB%E3%82%B3%E3%83%BC%E3%83%89
65 script_tag
= re
.search(r
'<script\s*[^>]+?src="https://ssl-cache\.stream\.ne\.jp/(?P<host>www\d+)/(?P<publisher>[a-z0-9]+)/[^"]+?/if\.js"', webpage
)
68 host
, publisher
= script_tag
.groups()
69 for m
in re
.finditer(r
'(?s)PlayerFactoryIF\.create\(\s*({[^\}]+?})\s*\)\s*;', webpage
):
70 # TODO: using json.loads here as InfoExtractor._parse_json is not classmethod
71 info
= json
.loads(js_to_json(m
.group(1)))
72 mid
= base64
.b64decode(info
.get('m')).decode()
73 yield f
'jstream:{host}:{publisher}:{mid}'