1 from .common
import InfoExtractor
2 from ..utils
import int_or_none
, urljoin
5 class StarTrekIE(InfoExtractor
):
7 _VALID_URL
= r
'(?P<base>https?://(?:intl|www)\.startrek\.com)/videos/(?P<id>[^/]+)'
9 'url': 'https://intl.startrek.com/videos/watch-welcoming-jess-bush-to-the-ready-room',
10 'md5': '491df5035c9d4dc7f63c79caaf9c839e',
12 'id': 'watch-welcoming-jess-bush-to-the-ready-room',
14 'title': 'WATCH: Welcoming Jess Bush to The Ready Room',
16 'timestamp': 1655388000,
17 'upload_date': '20220616',
18 'description': 'md5:1ffee884e3920afbdd6dd04e926a1221',
19 'thumbnail': r
're:https://(?:intl|www)\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14794_rr_thumb_107_yt_16x9\.jpg(?:\?.+)?',
20 'subtitles': {'en-US': [{
21 'url': r
're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_107_v4\.vtt',
23 'url': 'https://media.startrek.com/2022/06/16/2043801155561/1069981_hls/trr_snw_107_v4-c4bfc25d/stream_vtt.m3u8',
27 'url': 'https://www.startrek.com/videos/watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
28 'md5': 'f5ad74fbb86e91e0882fc0a333178d1d',
30 'id': 'watch-ethan-peck-and-gia-sandhu-beam-down-to-the-ready-room',
32 'title': 'WATCH: Ethan Peck and Gia Sandhu Beam Down to The Ready Room',
34 'timestamp': 1654221600,
35 'upload_date': '20220603',
36 'description': 'md5:b3aa0edacfe119386567362dec8ed51b',
37 'thumbnail': r
're:https://www\.startrek\.com/sites/default/files/styles/video_1920x1080/public/images/2022-06/pp_14792_rr_thumb_105_yt_16x9_1.jpg(?:\?.+)?',
38 'subtitles': {'en-US': [{
39 'url': r
're:https://(?:intl|www)\.startrek\.com/sites/default/files/video/captions/2022-06/TRR_SNW_105_v5\.vtt',
44 def _real_extract(self
, url
):
45 urlbase
, video_id
= self
._match
_valid
_url
(url
).group('base', 'id')
46 webpage
= self
._download
_webpage
(url
, video_id
)
48 player
= self
._search
_regex
(
49 r
'(<\s*div\s+id\s*=\s*"cvp-player-[^<]+<\s*/div\s*>)', webpage
, 'player')
51 hls
= self
._html
_search
_regex
(r
'\bdata-hls\s*=\s*"([^"]+)"', player
, 'HLS URL')
52 formats
, subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(hls
, video_id
, 'mp4')
54 captions
= self
._html
_search
_regex
(
55 r
'\bdata-captions-url\s*=\s*"([^"]+)"', player
, 'captions URL', fatal
=False)
57 subtitles
.setdefault('en-US', [])[:0] = [{'url': urljoin(urlbase
, captions
)}]
59 # NB: Most of the data in the json_ld is undesirable
60 json_ld
= self
._search
_json
_ld
(webpage
, video_id
, fatal
=False)
64 'title': self
._html
_search
_regex
(
65 r
'\bdata-title\s*=\s*"([^"]+)"', player
, 'title', json_ld
.get('title')),
66 'description': self
._html
_search
_regex
(
67 r
'(?s)<\s*div\s+class\s*=\s*"header-body"\s*>(.+?)<\s*/div\s*>',
68 webpage
, 'description', fatal
=False),
69 'duration': int_or_none(self
._html
_search
_regex
(
70 r
'\bdata-duration\s*=\s*"(\d+)"', player
, 'duration', fatal
=False)),
72 'subtitles': subtitles
,
73 'thumbnail': urljoin(urlbase
, self
._html
_search
_regex
(
74 r
'\bdata-poster-url\s*=\s*"([^"]+)"', player
, 'thumbnail', fatal
=False)),
75 'timestamp': json_ld
.get('timestamp'),