8 from .common
import InfoExtractor
16 srt_subtitles_timecode
,
21 class PanoptoBaseIE(InfoExtractor
):
22 BASE_URL_RE
= r
'(?P<base_url>https?://[\w.-]+\.panopto.(?:com|eu)/Panopto)'
50 def _call_api(self
, base_url
, path
, video_id
, data
=None, fatal
=True, **kwargs
):
51 response
= self
._download
_json
(
52 base_url
+ path
, video_id
, data
=json
.dumps(data
).encode('utf8') if data
else None,
53 fatal
=fatal
, headers
={'accept': 'application/json', 'content-type': 'application/json'}, **kwargs
)
56 error_code
= traverse_obj(response
, 'ErrorCode')
58 self
.raise_login_required(method
='cookies')
59 elif error_code
is not None:
60 msg
= f
'Panopto said: {response.get("ErrorMessage")}'
62 raise ExtractorError(msg
, video_id
=video_id
, expected
=True)
64 self
.report_warning(msg
, video_id
=video_id
)
68 def _parse_fragment(url
):
69 return {k
: json
.loads(v
[0]) for k
, v
in urllib
.parse
.parse_qs(urllib
.parse
.urlparse(url
).fragment
).items()}
72 class PanoptoIE(PanoptoBaseIE
):
73 _VALID_URL
= PanoptoBaseIE
.BASE_URL_RE
+ r
'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)id=(?P<id>[a-f0-9-]+)'
74 _EMBED_REGEX
= [rf
'<iframe[^>]+src=["\'](?P<url>{PanoptoBaseIE.BASE_URL_RE}/Pages/(Viewer|Embed|Sessions/List)\.aspx[^"\']+)']
77 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
79 'id': '26b3ae9e-4a48-4dcc-96ba-0befba08a0fb',
80 'title': 'Panopto for Business - Use Cases',
81 'timestamp': 1459184200,
82 'thumbnail': r
're:https://demo\.hosted\.panopto\.com/.+',
83 'upload_date': '20160328',
87 'duration': 88.17099999999999,
88 'average_rating': int,
89 'uploader_id': '2db6b718-47a0-4b0b-9e17-ab0b00f42b1e',
90 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
91 'channel': 'Showcase Videos',
95 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
97 'id': 'ed01b077-c9e5-4c7b-b8ff-15fa306d7a59',
98 'title': 'Overcoming Top 4 Challenges of Enterprise Video',
99 'uploader': 'Panopto Support',
100 'timestamp': 1449409251,
101 'thumbnail': r
're:https://demo\.hosted\.panopto\.com/.+',
102 'upload_date': '20151206',
104 'chapters': 'count:12',
105 'cast': ['Panopto Support'],
106 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
107 'average_rating': int,
108 'description': 'md5:4391837802b3fc856dadf630c4b375d1',
109 'duration': 1088.2659999999998,
110 'channel_id': '9f3c1921-43bb-4bda-8b3a-b8d2f05a8546',
111 'channel': 'Webcasts',
115 # Extra params in URL
116 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?randomparam=thisisnotreal&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
118 'id': '5fa74e93-3d87-4694-b60e-aaa4012214ed',
121 'cast': ['Kathryn Kelly'],
122 'uploader_id': '316a0a58-7fa2-4cd9-be1c-64270d284a56',
123 'timestamp': 1569845768,
124 'tags': ['Viewer', 'Enterprise'],
126 'upload_date': '20190930',
127 'thumbnail': r
're:https://howtovideos\.hosted\.panopto\.com/.+',
128 'description': 'md5:2d844aaa1b1a14ad0e2601a0993b431f',
129 'title': 'Getting Started: View a Video',
130 'average_rating': int,
131 'uploader': 'Kathryn Kelly',
132 'channel_id': 'fb93bc3c-6750-4b80-a05b-a921013735d3',
133 'channel': 'Getting Started',
137 # Does not allow normal Viewer.aspx. AUDIO livestream has no url, so should be skipped and only give one stream.
138 'url': 'https://unisa.au.panopto.com/Panopto/Pages/Embed.aspx?id=9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
140 'id': '9d9a0fa3-e99a-4ebd-a281-aac2017f4da4',
142 'cast': ['LTS CLI Script'],
145 'description': 'md5:ee5cf653919f55b72bce2dbcf829c9fa',
146 'channel_id': 'b23e673f-c287-4cb1-8344-aae9005a69f8',
147 'average_rating': int,
148 'uploader_id': '38377323-6a23-41e2-9ff6-a8e8004bf6f7',
149 'uploader': 'LTS CLI Script',
150 'timestamp': 1572458134,
151 'title': 'WW2 Vets Interview 3 Ronald Stanley George',
152 'thumbnail': r
're:https://unisa\.au\.panopto\.com/.+',
153 'channel': 'World War II Veteran Interviews',
154 'upload_date': '20191030',
159 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=a7f12f1d-3872-4310-84b0-f8d8ab15326b',
161 'id': 'a7f12f1d-3872-4310-84b0-f8d8ab15326b',
163 'timestamp': 1448798857,
164 'duration': 4712.681,
165 'title': 'Cache Memory - CompSci 15-213, Lecture 12',
166 'channel_id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
167 'uploader_id': 'a96d1a31-b4de-489b-9eee-b4a5b414372c',
168 'upload_date': '20151129',
170 'uploader': 'Panopto Support',
171 'channel': 'Showcase Videos',
172 'description': 'md5:55e51d54233ddb0e6c2ed388ca73822c',
173 'cast': ['ISR Videographer', 'Panopto Support'],
174 'chapters': 'count:28',
175 'thumbnail': r
're:https://demo\.hosted\.panopto\.com/.+',
177 'params': {'format': 'mhtml', 'skip_download': True},
180 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=8285224a-9a2b-4957-84f2-acb0000c4ea9',
182 'id': '8285224a-9a2b-4957-84f2-acb0000c4ea9',
185 'title': 'Company Policy',
187 'timestamp': 1615058901,
188 'channel': 'Human Resources',
189 'tags': ['HumanResources'],
190 'duration': 1604.243,
191 'thumbnail': r
're:https://na-training-1\.hosted\.panopto\.com/.+',
192 'uploader_id': '8e8ba0a3-424f-40df-a4f1-ab3a01375103',
193 'uploader': 'Cait M.',
194 'upload_date': '20210306',
196 'subtitles': {'en-US': [{'ext': 'srt', 'data': 'md5:a3f4d25963fdeace838f327097c13265'}],
197 'es-ES': [{'ext': 'srt', 'data': 'md5:57e9dad365fd0fbaf0468eac4949f189'}]},
199 'params': {'writesubtitles': True, 'skip_download': True},
201 # On Panopto there are two subs: "Default" and en-US. en-US is blank and should be skipped.
202 'url': 'https://na-training-1.hosted.panopto.com/Panopto/Pages/Viewer.aspx?id=940cbd41-f616-4a45-b13e-aaf1000c915b',
204 'id': '940cbd41-f616-4a45-b13e-aaf1000c915b',
206 'subtitles': 'count:1',
207 'title': 'HR Benefits Review Meeting*',
208 'cast': ['Panopto Support'],
210 'timestamp': 1575024251,
211 'thumbnail': r
're:https://na-training-1\.hosted\.panopto\.com/.+',
213 'description': 'md5:04f90a9c2c68b7828144abfb170f0106',
214 'uploader': 'Panopto Support',
216 'duration': 409.34499999999997,
217 'uploader_id': 'b6ac04ad-38b8-4724-a004-a851004ea3df',
218 'upload_date': '20191129',
221 'params': {'writesubtitles': True, 'skip_download': True},
224 'url': 'https://ucc.cloud.panopto.eu/Panopto/Pages/Viewer.aspx?id=0e8484a4-4ceb-4d98-a63f-ac0200b455cb',
225 'only_matching': True,
228 'url': 'https://brown.hosted.panopto.com/Panopto/Pages/Embed.aspx?id=0b3ff73b-36a0-46c5-8455-aadf010a3638',
229 'only_matching': True,
234 def suitable(cls
, url
):
235 return False if PanoptoPlaylistIE
.suitable(url
) else super().suitable(url
)
237 def _mark_watched(self
, base_url
, video_id
, delivery_info
):
238 duration
= traverse_obj(delivery_info
, ('Delivery', 'Duration'), expected_type
=float)
239 invocation_id
= delivery_info
.get('InvocationId')
240 stream_id
= traverse_obj(delivery_info
, ('Delivery', 'Streams', ..., 'PublicID'), get_all
=False, expected_type
=str)
241 if invocation_id
and stream_id
and duration
:
242 timestamp_str
= f
'/Date({calendar.timegm(dt.datetime.now(dt.timezone.utc).timetuple())}000)/'
246 'ClientTimeStamp': timestamp_str
,
248 'InvocationID': invocation_id
,
250 'SecondsListened': duration
- 1,
251 'SecondsRejected': 0,
255 'StreamID': stream_id
,
256 'TimeStamp': timestamp_str
,
257 'UpdatesRejected': 0,
261 self
._download
_webpage
(
262 base_url
+ '/Services/Analytics.svc/AddStreamRequests', video_id
,
263 fatal
=False, data
=json
.dumps(data
).encode('utf8'), headers
={'content-type': 'application/json'},
264 note
='Marking watched', errnote
='Unable to mark watched')
267 def _extract_chapters(timestamps
):
269 for timestamp
in timestamps
or []:
270 caption
= timestamp
.get('Caption')
271 start
, duration
= int_or_none(timestamp
.get('Time')), int_or_none(timestamp
.get('Duration'))
272 if not caption
or start
is None or duration
is None:
276 'end_time': start
+ duration
,
282 def _extract_mhtml_formats(base_url
, timestamps
):
284 for timestamp
in timestamps
or []:
285 duration
= timestamp
.get('Duration')
286 obj_id
, obj_sn
= timestamp
.get('ObjectIdentifier'), timestamp
.get('ObjectSequenceNumber')
287 if timestamp
.get('EventTargetType') == 'PowerPoint' and obj_id
is not None and obj_sn
is not None:
288 image_frags
.setdefault('slides', []).append({
289 'url': base_url
+ f
'/Pages/Viewer/Image.aspx?id={obj_id}&number={obj_sn}',
290 'duration': duration
,
293 obj_pid
, session_id
, abs_time
= timestamp
.get('ObjectPublicIdentifier'), timestamp
.get('SessionID'), timestamp
.get('AbsoluteTime')
294 if None not in (obj_pid
, session_id
, abs_time
):
295 image_frags
.setdefault('chapter', []).append({
296 'url': base_url
+ f
'/Pages/Viewer/Thumb.aspx?eventTargetPID={obj_pid}&sessionPID={session_id}&number={obj_sn}&isPrimary=false&absoluteTime={abs_time}',
297 'duration': duration
,
299 for name
, fragments
in image_frags
.items():
306 'url': 'about:invalid',
307 'fragments': fragments
,
311 def _json2srt(data
, delivery
):
313 for i
, line
in enumerate(data
):
314 start_time
= line
['Time']
315 duration
= line
.get('Duration')
317 end_time
= start_time
+ duration
319 end_time
= traverse_obj(data
, (i
+ 1, 'Time')) or delivery
['Duration']
320 yield f
'{i + 1}\n{srt_subtitles_timecode(start_time)} --> {srt_subtitles_timecode(end_time)}\n{line["Caption"]}'
321 return '\n\n'.join(_gen_lines())
323 def _get_subtitles(self
, base_url
, video_id
, delivery
):
325 for lang
in delivery
.get('AvailableLanguages') or []:
326 response
= self
._call
_api
(
327 base_url
, '/Pages/Viewer/DeliveryInfo.aspx', video_id
, fatal
=False,
328 note
='Downloading captions JSON metadata', query
={
329 'deliveryId': video_id
,
331 'language': str(lang
),
332 'responseType': 'json',
335 if not isinstance(response
, list):
337 subtitles
.setdefault(self
._SUB
_LANG
_MAPPING
.get(lang
) or 'default', []).append({
339 'data': self
._json
2srt
(response
, delivery
),
343 def _extract_streams_formats_and_subtitles(self
, video_id
, streams
, **fmt_kwargs
):
346 for stream
in streams
or []:
348 http_stream_url
= stream
.get('StreamHttpUrl')
349 stream_url
= stream
.get('StreamUrl')
352 stream_formats
.append({'url': http_stream_url
})
355 media_type
= stream
.get('ViewerMediaFileTypeName')
356 if media_type
in ('hls', ):
357 m3u8_formats
, stream_subtitles
= self
._extract
_m
3u8_formats
_and
_subtitles
(stream_url
, video_id
)
358 stream_formats
.extend(m3u8_formats
)
359 subtitles
= self
._merge
_subtitles
(subtitles
, stream_subtitles
)
361 stream_formats
.append({
364 for fmt
in stream_formats
:
366 'format_note': stream
.get('Tag'),
369 formats
.extend(stream_formats
)
371 return formats
, subtitles
373 def _real_extract(self
, url
):
374 base_url
, video_id
= self
._match
_valid
_url
(url
).group('base_url', 'id')
375 delivery_info
= self
._call
_api
(
376 base_url
, '/Pages/Viewer/DeliveryInfo.aspx', video_id
,
378 'deliveryId': video_id
,
380 'isLiveNotes': 'false',
381 'refreshAuthCookie': 'true',
382 'isActiveBroadcast': 'false',
383 'isEditing': 'false',
384 'isKollectiveAgentInstalled': 'false',
386 'responseType': 'json',
390 delivery
= delivery_info
['Delivery']
391 session_start_time
= int_or_none(delivery
.get('SessionStartTime'))
392 timestamps
= delivery
.get('Timestamps')
394 # Podcast stream is usually the combined streams. We will prefer that by default.
395 podcast_formats
, podcast_subtitles
= self
._extract
_streams
_formats
_and
_subtitles
(
396 video_id
, delivery
.get('PodcastStreams'), format_note
='PODCAST')
398 streams_formats
, streams_subtitles
= self
._extract
_streams
_formats
_and
_subtitles
(
399 video_id
, delivery
.get('Streams'), preference
=-10)
401 formats
= podcast_formats
+ streams_formats
402 formats
.extend(self
._extract
_mhtml
_formats
(base_url
, timestamps
))
403 subtitles
= self
._merge
_subtitles
(
404 podcast_subtitles
, streams_subtitles
, self
.extract_subtitles(base_url
, video_id
, delivery
))
406 self
.mark_watched(base_url
, video_id
, delivery_info
)
410 'title': delivery
.get('SessionName'),
411 'cast': traverse_obj(delivery
, ('Contributors', ..., 'DisplayName'), expected_type
=lambda x
: x
or None),
412 'timestamp': session_start_time
- 11640000000 if session_start_time
else None,
413 'duration': delivery
.get('Duration'),
414 'thumbnail': base_url
+ f
'/Services/FrameGrabber.svc/FrameRedirect?objectId={video_id}&mode=Delivery&random={random.random()}',
415 'average_rating': delivery
.get('AverageRating'),
416 'chapters': self
._extract
_chapters
(timestamps
),
417 'uploader': delivery
.get('OwnerDisplayName') or None,
418 'uploader_id': delivery
.get('OwnerId'),
419 'description': delivery
.get('SessionAbstract'),
420 'tags': traverse_obj(delivery
, ('Tags', ..., 'Content')),
421 'channel_id': delivery
.get('SessionGroupPublicID'),
422 'channel': traverse_obj(delivery
, 'SessionGroupLongName', 'SessionGroupShortName', get_all
=False),
424 'subtitles': subtitles
,
428 class PanoptoPlaylistIE(PanoptoBaseIE
):
429 _VALID_URL
= PanoptoBaseIE
.BASE_URL_RE
+ r
'/Pages/(Viewer|Embed)\.aspx.*(?:\?|&)pid=(?P<id>[a-f0-9-]+)'
432 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=f3b39fcf-882f-4849-93d6-a9f401236d36&id=5fa74e93-3d87-4694-b60e-aaa4012214ed&advance=true',
434 'title': 'Featured Video Tutorials',
435 'id': 'f3b39fcf-882f-4849-93d6-a9f401236d36',
438 'playlist_mincount': 36,
441 'url': 'https://utsa.hosted.panopto.com/Panopto/Pages/Viewer.aspx?pid=e2900555-3ad4-4bdb-854d-ad2401686190',
443 'title': 'Library Website Introduction Playlist',
444 'id': 'e2900555-3ad4-4bdb-854d-ad2401686190',
445 'description': 'md5:f958bca50a1cbda15fdc1e20d32b3ecb',
447 'playlist_mincount': 4,
452 def _entries(self
, base_url
, playlist_id
, session_list_id
):
453 session_list_info
= self
._call
_api
(
454 base_url
, f
'/Api/SessionLists/{session_list_id}?collections[0].maxCount=500&collections[0].name=items', playlist_id
)
456 items
= session_list_info
['Items']
458 if item
.get('TypeName') != 'Session':
459 self
.report_warning('Got an item in the playlist that is not a Session' + bug_reports_message(), only_once
=True)
463 'id': item
.get('Id'),
464 'url': item
.get('ViewerUri'),
465 'title': item
.get('Name'),
466 'description': item
.get('Description'),
467 'duration': item
.get('Duration'),
468 'channel': traverse_obj(item
, ('Parent', 'Name')),
469 'channel_id': traverse_obj(item
, ('Parent', 'Id')),
472 def _real_extract(self
, url
):
473 base_url
, playlist_id
= self
._match
_valid
_url
(url
).group('base_url', 'id')
475 video_id
= get_first(parse_qs(url
), 'id')
477 if self
.get_param('noplaylist'):
478 self
.to_screen(f
'Downloading just video {video_id} because of --no-playlist')
479 return self
.url_result(base_url
+ f
'/Pages/Viewer.aspx?id={video_id}', ie_key
=PanoptoIE
.ie_key(), video_id
=video_id
)
481 self
.to_screen(f
'Downloading playlist {playlist_id}; add --no-playlist to just download video {video_id}')
483 playlist_info
= self
._call
_api
(base_url
, f
'/Api/Playlists/{playlist_id}', playlist_id
)
484 return self
.playlist_result(
485 self
._entries
(base_url
, playlist_id
, playlist_info
['SessionListId']),
486 playlist_id
=playlist_id
, playlist_title
=playlist_info
.get('Name'),
487 playlist_description
=playlist_info
.get('Description'))
490 class PanoptoListIE(PanoptoBaseIE
):
491 _VALID_URL
= PanoptoBaseIE
.BASE_URL_RE
+ r
'/Pages/Sessions/List\.aspx'
495 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#folderID=%22e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a%22',
497 'id': 'e4c6a2fc-1214-4ca0-8fb7-aef2e29ff63a',
498 'title': 'Showcase Videos',
500 'playlist_mincount': 140,
504 'url': 'https://demo.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx#view=2&maxResults=250',
506 'id': 'panopto_list',
507 'title': 'panopto_list',
509 'playlist_mincount': 300,
512 # Folder that contains 8 folders and a playlist
513 'url': 'https://howtovideos.hosted.panopto.com/Panopto/Pages/Sessions/List.aspx?noredirect=true#folderID=%224b9de7ae-0080-4158-8496-a9ba01692c2e%22',
515 'id': '4b9de7ae-0080-4158-8496-a9ba01692c2e',
516 'title': 'Video Tutorials',
518 'playlist_mincount': 9,
523 def _fetch_page(self
, base_url
, query_params
, display_id
, page
):
527 'getFolderData': True,
528 'includePlaylists': True,
531 'maxResults': self
._PAGE
_SIZE
,
534 response
= self
._call
_api
(
535 base_url
, '/Services/Data.svc/GetSessions', f
'{display_id} page {page + 1}',
536 data
={'queryParameters': params
}, fatal
=False)
538 for result
in get_first(response
, 'Results', default
=[]):
539 # This could be a video, playlist (or maybe something else)
540 item_id
= result
.get('DeliveryID')
544 'title': result
.get('SessionName'),
545 'url': traverse_obj(result
, 'ViewerUrl', 'EmbedUrl', get_all
=False) or (base_url
+ f
'/Pages/Viewer.aspx?id={item_id}'),
546 'duration': result
.get('Duration'),
547 'channel': result
.get('FolderName'),
548 'channel_id': result
.get('FolderID'),
551 for folder
in get_first(response
, 'Subfolders', default
=[]):
552 folder_id
= folder
.get('ID')
553 yield self
.url_result(
554 base_url
+ f
'/Pages/Sessions/List.aspx#folderID="{folder_id}"',
555 ie_key
=PanoptoListIE
.ie_key(), video_id
=folder_id
, title
=folder
.get('Name'))
557 def _extract_folder_metadata(self
, base_url
, folder_id
):
558 response
= self
._call
_api
(
559 base_url
, '/Services/Data.svc/GetFolderInfo', folder_id
,
560 data
={'folderID': folder_id
}, fatal
=False)
562 'title': get_first(response
, 'Name'),
565 def _real_extract(self
, url
):
566 mobj
= self
._match
_valid
_url
(url
)
567 base_url
= mobj
.group('base_url')
569 query_params
= self
._parse
_fragment
(url
)
570 folder_id
, display_id
= query_params
.get('folderID'), 'panopto_list'
572 if query_params
.get('isSubscriptionsPage'):
573 display_id
= 'subscriptions'
574 if not query_params
.get('subscribableTypes'):
575 query_params
['subscribableTypes'] = [0, 1, 2]
576 elif query_params
.get('isSharedWithMe'):
577 display_id
= 'sharedwithme'
579 display_id
= folder_id
581 query
= query_params
.get('query')
583 display_id
+= f
': query "{query}"'
591 info
.update(self
._extract
_folder
_metadata
(base_url
, folder_id
))
593 info
['entries'] = OnDemandPagedList(
594 functools
.partial(self
._fetch
_page
, base_url
, query_params
, display_id
), self
._PAGE
_SIZE
)