8 from .common
import InfoExtractor
9 from .slideslive
import SlidesLiveIE
21 class VideoKenBaseIE(InfoExtractor
):
23 'videos.icts.res.in': 'icts',
24 'videos.cncf.io': 'cncf',
25 'videos.neurips.cc': 'neurips',
27 _BASE_URL_RE
= rf
'https?://(?P<host>{"|".join(map(re.escape, _ORGANIZATIONS))})/'
31 def _get_org_id_and_api_key(self
, org
, video_id
):
32 details
= self
._download
_json
(
33 f
'https://analytics.videoken.com/api/videolake/{org}/details', video_id
,
34 note
='Downloading organization ID and API key', headers
={
35 'Accept': 'application/json',
37 return details
['id'], details
['apikey']
39 def _create_slideslive_url(self
, video_url
, video_id
, referer
):
40 if not video_url
and not video_id
:
42 elif not video_url
or 'embed/sign-in' in video_url
:
43 video_url
= f
'https://slideslive.com/embed/{remove_start(video_id, "slideslive-")}'
44 if url_or_none(referer
):
45 return update_url_query(video_url
, {
46 'embed_parent_url': referer
,
47 'embed_container_origin': f
'https://{urllib.parse.urlparse(referer).hostname}',
51 def _extract_videos(self
, videos
, url
):
52 for video
in traverse_obj(videos
, (('videos', 'results'), ...)):
53 video_id
= traverse_obj(video
, 'youtube_id', 'videoid')
57 if traverse_obj(video
, 'type', 'source') == 'youtube':
61 video_url
= traverse_obj(video
, 'embed_url', 'embeddableurl', expected_type
=url_or_none
)
64 elif urllib
.parse
.urlparse(video_url
).hostname
== 'slideslive.com':
66 video_url
= self
._create
_slideslive
_url
(video_url
, video_id
, url
)
67 yield self
.url_result(video_url
, ie_key
, video_id
)
70 class VideoKenIE(VideoKenBaseIE
):
71 _VALID_URL
= VideoKenBaseIE
._BASE
_URL
_RE
+ r
'(?:(?:topic|category)/[^/#?]+/)?video/(?P<id>[\w-]+)'
73 # neurips -> videoken -> slideslive
74 'url': 'https://videos.neurips.cc/video/slideslive-38922815',
78 'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures',
79 'timestamp': 1630939331,
80 'upload_date': '20210906',
81 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
82 'thumbnails': 'count:330',
83 'chapters': 'count:329',
86 'skip_download': 'm3u8',
88 'expected_warnings': ['Failed to download VideoKen API JSON'],
90 # neurips -> videoken -> slideslive -> youtube
91 'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348',
95 'display_id': '38923348',
96 'title': 'Machine Education',
97 'description': 'Watch full version of this video at https://slideslive.com/38923348.',
98 'channel': 'SlidesLive Videos - G2',
99 'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w',
100 'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
101 'uploader': 'SlidesLive Videos - G2',
102 'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w',
103 'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
105 'timestamp': 1618922125,
106 'upload_date': '20200131',
108 'channel_follower_count': int,
110 'availability': 'unlisted',
111 'live_status': 'not_live',
112 'playable_in_embed': True,
113 'categories': ['People & Blogs'],
115 'thumbnail': r
're:^https?://.*\.(?:jpg|webp)',
116 'thumbnails': 'count:78',
117 'chapters': 'count:77',
120 'skip_download': 'm3u8',
122 'expected_warnings': ['Failed to download VideoKen API JSON'],
124 # icts -> videoken -> youtube
125 'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc',
129 'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad',
130 'description': 'md5:87433069d79719eeadc1962cc2ace00b',
131 'channel': 'International Centre for Theoretical Sciences',
132 'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ',
133 'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ',
134 'uploader': 'International Centre for Theoretical Sciences',
135 'uploader_id': 'ICTStalks',
136 'uploader_url': 'http://www.youtube.com/user/ICTStalks',
138 'upload_date': '20191004',
140 'live_status': 'not_live',
141 'availability': 'public',
142 'playable_in_embed': True,
143 'channel_follower_count': int,
146 'categories': ['Science & Technology'],
148 'thumbnail': r
're:^https?://.*\.(?:jpg|webp)',
149 'thumbnails': 'count:42',
150 'chapters': 'count:20',
153 'skip_download': 'm3u8',
156 'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8',
157 'only_matching': True,
159 'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI',
160 'only_matching': True,
162 'url': 'https://videos.icts.res.in/video/d7HuP_abpKU',
163 'only_matching': True,
166 def _real_extract(self
, url
):
167 hostname
, video_id
= self
._match
_valid
_url
(url
).group('host', 'id')
168 org_id
, _
= self
._get
_org
_id
_and
_api
_key
(self
._ORGANIZATIONS
[hostname
], video_id
)
169 details
= self
._download
_json
(
170 'https://analytics.videoken.com/api/videoinfo_private', video_id
, query
={
173 }, headers
={'Accept': 'application/json'}, note
='Downloading VideoKen API JSON',
174 errnote
='Failed to download VideoKen API JSON', fatal
=False)
176 return next(self
._extract
_videos
({'videos': [details
]}, url
))
177 # fallback for API error 400 response
178 elif video_id
.startswith('slideslive-'):
179 return self
.url_result(
180 self
._create
_slideslive
_url
(None, video_id
, url
), SlidesLiveIE
, video_id
)
181 elif re
.match(r
'^[\w-]{11}$', video_id
):
182 return self
.url_result(video_id
, 'Youtube', video_id
)
184 raise ExtractorError('Unable to extract without VideoKen API response')
187 class VideoKenPlayerIE(VideoKenBaseIE
):
188 _VALID_URL
= r
'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)'
190 'url': 'https://player.videoken.com/embed/slideslive-38968434',
194 'title': 'Deep Learning with Label Differential Privacy',
195 'timestamp': 1643377020,
196 'upload_date': '20220128',
197 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
198 'thumbnails': 'count:30',
199 'chapters': 'count:29',
202 'skip_download': 'm3u8',
206 def _real_extract(self
, url
):
207 video_id
= self
._match
_id
(url
)
208 return self
.url_result(
209 self
._create
_slideslive
_url
(None, video_id
, url
), SlidesLiveIE
, video_id
)
212 class VideoKenPlaylistIE(VideoKenBaseIE
):
213 _VALID_URL
= VideoKenBaseIE
._BASE
_URL
_RE
+ r
'(?:category/\d+/)?playlist/(?P<id>\d+)'
215 'url': 'https://videos.icts.res.in/category/1822/playlist/381',
216 'playlist_mincount': 117,
219 'title': 'Cosmology - The Next Decade',
223 def _real_extract(self
, url
):
224 hostname
, playlist_id
= self
._match
_valid
_url
(url
).group('host', 'id')
225 org_id
, _
= self
._get
_org
_id
_and
_api
_key
(self
._ORGANIZATIONS
[hostname
], playlist_id
)
226 videos
= self
._download
_json
(
227 f
'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/',
228 playlist_id
, headers
={'Accept': 'application/json'}, note
='Downloading API JSON')
229 return self
.playlist_result(self
._extract
_videos
(videos
, url
), playlist_id
, videos
.get('title'))
232 class VideoKenCategoryIE(VideoKenBaseIE
):
233 _VALID_URL
= VideoKenBaseIE
._BASE
_URL
_RE
+ r
'category/(?P<id>\d+)/?(?:$|[?#])'
235 'url': 'https://videos.icts.res.in/category/1822/',
236 'playlist_mincount': 500,
242 'url': 'https://videos.neurips.cc/category/350/',
243 'playlist_mincount': 34,
246 'title': 'NeurIPS 2018',
249 'url': 'https://videos.cncf.io/category/479/',
250 'playlist_mincount': 328,
253 'title': 'KubeCon + CloudNativeCon Europe\'19',
257 def _get_category_page(self
, category_id
, org_id
, page
=1, note
=None):
258 return self
._download
_json
(
259 f
'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id
,
260 fatal
=False, note
=note
if note
else f
'Downloading category page {page}',
262 'category_id': category_id
,
264 'length': self
._PAGE
_SIZE
,
265 }, headers
={'Accept': 'application/json'}) or {}
267 def _entries(self
, category_id
, org_id
, url
, page
):
268 videos
= self
._get
_category
_page
(category_id
, org_id
, page
+ 1)
269 yield from self
._extract
_videos
(videos
, url
)
271 def _real_extract(self
, url
):
272 hostname
, category_id
= self
._match
_valid
_url
(url
).group('host', 'id')
273 org_id
, _
= self
._get
_org
_id
_and
_api
_key
(self
._ORGANIZATIONS
[hostname
], category_id
)
274 category_info
= self
._get
_category
_page
(category_id
, org_id
, note
='Downloading category info')
275 category
= category_info
['category_name']
276 total_pages
= math
.ceil(int(category_info
['recordsTotal']) / self
._PAGE
_SIZE
)
277 return self
.playlist_result(InAdvancePagedList(
278 functools
.partial(self
._entries
, category_id
, org_id
, url
),
279 total_pages
, self
._PAGE
_SIZE
), category_id
, category
)
282 class VideoKenTopicIE(VideoKenBaseIE
):
283 _VALID_URL
= VideoKenBaseIE
._BASE
_URL
_RE
+ r
'topic/(?P<id>[^/#?]+)/?(?:$|[?#])'
285 'url': 'https://videos.neurips.cc/topic/machine%20learning/',
286 'playlist_mincount': 500,
288 'id': 'machine_learning',
289 'title': 'machine learning',
292 'url': 'https://videos.icts.res.in/topic/gravitational%20waves/',
293 'playlist_mincount': 77,
295 'id': 'gravitational_waves',
296 'title': 'gravitational waves',
299 'url': 'https://videos.cncf.io/topic/prometheus/',
300 'playlist_mincount': 134,
303 'title': 'prometheus',
307 def _get_topic_page(self
, topic
, org_id
, search_id
, api_key
, page
=1, note
=None):
308 return self
._download
_json
(
309 'https://es.videoken.com/api/v1.0/get_results', topic
, fatal
=False, query
={
311 'size': self
._PAGE
_SIZE
,
314 'sort': 'upload_desc',
319 'searchid': search_id
,
320 }, headers
={'Accept': 'application/json'},
321 note
=note
if note
else f
'Downloading topic page {page}') or {}
323 def _entries(self
, topic
, org_id
, search_id
, api_key
, url
, page
):
324 videos
= self
._get
_topic
_page
(topic
, org_id
, search_id
, api_key
, page
+ 1)
325 yield from self
._extract
_videos
(videos
, url
)
327 def _real_extract(self
, url
):
328 hostname
, topic_id
= self
._match
_valid
_url
(url
).group('host', 'id')
329 topic
= urllib
.parse
.unquote(topic_id
)
330 topic_id
= topic
.replace(' ', '_')
331 org_id
, api_key
= self
._get
_org
_id
_and
_api
_key
(self
._ORGANIZATIONS
[hostname
], topic
)
332 search_id
= base64
.b64encode(f
':{topic}:{int(time.time())}:transient'.encode()).decode()
333 total_pages
= int_or_none(self
._get
_topic
_page
(
334 topic
, org_id
, search_id
, api_key
, note
='Downloading topic info')['total_no_of_pages'])
335 return self
.playlist_result(InAdvancePagedList(
336 functools
.partial(self
._entries
, topic
, org_id
, search_id
, api_key
, url
),
337 total_pages
, self
._PAGE
_SIZE
), topic_id
, topic
)