3 from .common
import InfoExtractor
19 class IcareusIE(InfoExtractor
):
20 _DOMAINS
= '|'.join(map(re
.escape
, (
27 'videos.minifiddlers.org',
29 _VALID_URL
= rf
'(?P<base_url>https?://(?:www\.)?(?:{_DOMAINS}))/[^?#]+/player/[^?#]+\?(?:[^#]+&)?(?:assetId|eventId)=(?P<id>\d+)'
31 'url': 'https://www.helsinkikanava.fi/fi_FI/web/helsinkikanava/player/vod?assetId=68021894',
32 'md5': 'ca0b62ffc814a5411dfa6349cf5adb8a',
36 'title': 'Perheiden parhaaksi',
37 'description': 'md5:295785ea408e5ac00708766465cc1325',
38 'thumbnail': 'https://www.helsinkikanava.fi/image/image_gallery?img_id=68022501',
39 'upload_date': '20200924',
40 'timestamp': 1600938300,
42 }, { # Recorded livestream
43 'url': 'https://www.helsinkikanava.fi/fi/web/helsinkikanava/player/event/view?eventId=76241489',
44 'md5': '014327e69dfa7b949fcc861f6d162d6d',
48 'title': 'Helsingin kaupungin ja HUSin tiedotustilaisuus koronaepidemiatilanteesta 24.11.2020',
49 'description': 'md5:3129d041c6fbbcdc7fe68d9a938fef1c',
50 'thumbnail': 'https://icareus-suite.secure2.footprint.net/image/image_gallery?img_id=76288630',
51 'upload_date': '20201124',
52 'timestamp': 1606206600,
54 }, { # Non-m3u8 stream
55 'url': 'https://suite.icareus.com/fi/web/westend-indians/player/vod?assetId=47567389',
56 'md5': '72fc04ee971bbedc44405cdf16c990b6',
60 'title': 'Omatoiminen harjoittelu - Laukominen',
62 'thumbnail': 'https://suite.icareus.com/image/image_gallery?img_id=47568162',
63 'upload_date': '20200319',
64 'timestamp': 1584658080,
67 'url': 'https://asahitv.fi/fi/web/asahi/player/vod?assetId=89415818',
68 'only_matching': True,
70 'url': 'https://hyvinvointitv.fi/fi/web/hyvinvointitv/player/vod?assetId=89149730',
71 'only_matching': True,
73 'url': 'https://inez.fi/fi/web/inez-media/player/vod?assetId=71328822',
74 'only_matching': True,
76 'url': 'https://www.permanto.fi/fi/web/alfatv/player/vod?assetId=135497515',
77 'only_matching': True,
79 'url': 'https://videos.minifiddlers.org/web/international-minifiddlers/player/vod?assetId=1982759',
80 'only_matching': True,
83 def _real_extract(self
, url
):
84 base_url
, temp_id
= self
._match
_valid
_url
(url
).groups()
85 webpage
= self
._download
_webpage
(url
, temp_id
)
87 video_id
= self
._search
_regex
(r
"_icareus\['itemId'\]\s*=\s*'(\d+)'", webpage
, 'video_id')
88 organization_id
= self
._search
_regex
(r
"_icareus\['organizationId'\]\s*=\s*'(\d+)'", webpage
, 'organization_id')
90 assets
= self
._download
_json
(
91 self
._search
_regex
(r
'var\s+publishingServiceURL\s*=\s*"(http[^"]+)";', webpage
, 'api_base'),
92 video_id
, data
=urlencode_postdata({
94 'action': 'getAssetPlaybackUrls',
95 'organizationId': organization_id
,
97 'token': self
._search
_regex
(r
"_icareus\['token'\]\s*=\s*'([a-f0-9]+)'", webpage
, 'icareus_token'),
101 remove_end(sdesc
.split(' ')[0], ':'): [{'url': url_or_none(surl
)}]
102 for _
, sdesc
, surl
in assets
.get('subtitles') or []
106 'format': item
.get('name'),
107 'format_id': 'audio',
109 'url': url_or_none(item
['url']),
110 'tbr': int_or_none(self
._search
_regex
(
111 r
'\((\d+)\s*k\)', item
.get('name') or '', 'audio bitrate', default
=None)),
112 } for item
in assets
.get('audio_urls') or [] if url_or_none(item
.get('url'))]
114 for item
in assets
.get('urls') or []:
115 video_url
= url_or_none(item
.get('url'))
116 if video_url
is None:
118 ext
= determine_ext(video_url
)
120 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
121 video_url
, video_id
, 'mp4', m3u8_id
='hls', fatal
=False)
123 self
._merge
_subtitles
(subs
, target
=subtitles
)
125 fmt
= item
.get('name')
129 'tbr': parse_bitrate(fmt
),
130 'format_id': str_or_none(item
.get('id')),
131 **parse_resolution(fmt
),
134 info
, token
, live_title
= self
._search
_json
_ld
(webpage
, video_id
, default
={}), None, None
136 token
= self
._search
_regex
(
137 r
'data\s*:\s*{action:"getAsset".*?token:\'([a
-f0
-9]+)\'}', webpage, 'token
', default=None)
139 live_title = get_element_by_class('unpublished
-info
-item future
-event
-title
', webpage)
142 metadata = self._download_json(
143 f'{base_url}
/icareus
-suite
-api
-portlet
/publishing
',
144 video_id, fatal=False, data=urlencode_postdata({
146 'action
': 'getAsset
',
147 'organizationId
': organization_id,
149 'languageId
': 'en_US
',
154 'title
': metadata.get('name
'),
155 'description
': metadata.get('description
'),
156 'timestamp
': int_or_none(metadata.get('date
'), scale=1000),
157 'duration
': int_or_none(metadata.get('duration
')),
158 'thumbnail
': url_or_none(metadata.get('thumbnailMedium
')),
160 elif live_title: # Recorded livestream
163 'description
': get_element_by_class('unpublished
-info
-item future
-event
-description
', webpage),
164 'timestamp
': int_or_none(self._search_regex(
165 r'var startEvent\s
*=\s
*(\d
+);', webpage, 'uploadDate
', fatal=False), scale=1000),
168 thumbnails = info.get('thumbnails
') or [{
169 'url
': url_or_none(info.get('thumbnail
') or assets.get('thumbnail
')),
176 'subtitles
': subtitles,
177 'description
': clean_html(info.get('description
')),
178 'thumbnails
': thumbnails if thumbnails[0]['url
'] else None,