4 from .common
import InfoExtractor
5 from ..utils
import determine_ext
, int_or_none
, url_or_none
6 from ..utils
.traversal
import traverse_obj
9 class SharePointIE(InfoExtractor
):
10 _BASE_URL_RE
= r
'https?://[\w-]+\.sharepoint\.com/'
12 rf
'{_BASE_URL_RE}:v:/[a-z]/(?:[^/?#]+/)*(?P<id>[^/?#]{{46}})/?(?:$|[?#])',
13 rf
'{_BASE_URL_RE}(?!:v:)(?:[^/?#]+/)*stream\.aspx\?(?:[^#]+&)?id=(?P<id>[^&#]+)',
16 'url': 'https://lut-my.sharepoint.com/:v:/g/personal/juha_eerola_student_lab_fi/EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw?e=ZpQOOw',
17 'md5': '2950821d0d4937a0a76373782093b435',
19 'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
20 'display_id': 'EUrAmrktb4ZMhUcY9J2PqMEBD_9x_l0DyYWVgAvp-TTOMw',
24 'thumbnail': r
're:https://.+/thumbnail',
25 'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
28 'url': 'https://greaternyace.sharepoint.com/:v:/s/acementornydrive/ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg?e=PQUfVb',
29 'md5': 'c496a01644223273bff12e93e501afd1',
31 'id': '01QI4AVTZ3ESFZPAD42VCKB5CZKAGLFVYB',
32 'display_id': 'ETski5eAfNVEoPRZUAyy1wEBpLgVFYWso5bjbZjfBLlPUg',
34 'title': '930103681233985536',
36 'thumbnail': r
're:https://.+/thumbnail',
39 'url': 'https://lut-my.sharepoint.com/personal/juha_eerola_student_lab_fi/_layouts/15/stream.aspx?id=%2Fpersonal%2Fjuha_eerola_student_lab_fi%2FDocuments%2FM-DL%2FCmvpJST.mp4&ga=1&referrer=StreamWebApp.Web&referrerScenario=AddressBarCopied.view',
41 'id': '01EQRS7EKKYCNLSLLPQZGIKRYY6SOY7KGB',
42 'display_id': '/personal/juha_eerola_student_lab_fi/Documents/M-DL/CmvpJST.mp4',
46 'thumbnail': r
're:https://.+/thumbnail',
47 'uploader_id': '8dcec565-a956-4b91-95e5-bacfb8bc015f',
49 'skip': 'Session cookies needed',
51 'url': 'https://izoobasisschool.sharepoint.com/:v:/g/Eaqleq8COVBIvIPvod0U27oBypC6aWOkk8ptuDpmJ6arHw',
52 'only_matching': True,
54 'url': 'https://uskudaredutr-my.sharepoint.com/:v:/g/personal/songul_turkaydin_uskudar_edu_tr/EbTf-VRUIbtGuIN73tx1MuwBCHBOmNcWNqSLw61Fd2_o0g?e=n5Vkof',
55 'only_matching': True,
57 'url': 'https://epam-my.sharepoint.com/:v:/p/dzmitry_tamashevich/Ec4ZOs-rATZHjFYZWVxjczEB649FCoYFKDV_x3RxZiWAGA?e=4hswgA',
58 'only_matching': True,
60 'url': 'https://microsoft.sharepoint.com/:v:/t/MicrosoftSPARKRecordings-MSFTInternal/EWCyeqByVWBAt8wDvNZdV-UB0BvU5YVbKm0UHgdrUlI6dg?e=QbPck6',
61 'only_matching': True,
64 def _real_extract(self
, url
):
65 display_id
= urllib
.parse
.unquote(self
._match
_id
(url
))
66 webpage
, urlh
= self
._download
_webpage
_handle
(url
, display_id
)
67 if urllib
.parse
.urlparse(urlh
.url
).hostname
== 'login.microsoftonline.com':
68 self
.raise_login_required(
69 'Session cookies are required for this URL and can be passed '
70 'with the --cookies option. The --cookies-from-browser option will not work', method
=None)
72 video_data
= self
._search
_json
(r
'g_fileInfo\s*=', webpage
, 'player config', display_id
)
73 video_id
= video_data
['VroomItemId']
75 parsed_url
= urllib
.parse
.urlparse(video_data
['.transformUrl'])
76 base_media_url
= urllib
.parse
.urlunparse(parsed_url
._replace
(
77 path
=urllib
.parse
.urljoin(f
'{parsed_url.path}/', '../videomanifest'),
78 query
=urllib
.parse
.urlencode({
79 **urllib
.parse
.parse_qs(parsed_url
.query
),
80 'cTag': video_data
['.ctag'],
85 # Web player adds more params to the format URLs but we still get all formats without them
86 formats
= self
._extract
_mpd
_formats
(
87 base_media_url
, video_id
, mpd_id
='dash', query
={'format': 'dash'}, fatal
=False)
88 for hls_type
in ('hls', 'hls-vnext'):
89 formats
.extend(self
._extract
_m
3u8_formats
(
90 base_media_url
, video_id
, 'mp4', m3u8_id
=hls_type
,
91 query
={'format': hls_type
}, fatal
=False, quality
=-2))
93 if video_url
:= traverse_obj(video_data
, ('downloadUrl', {url_or_none}
)):
96 'ext': determine_ext(video_data
.get('extension') or video_data
.get('name')),
98 'format_id': 'source',
99 'filesize': int_or_none(video_data
.get('size')),
100 'vcodec': 'none' if video_data
.get('isAudio') is True else None,
106 'title': video_data
.get('title') or video_data
.get('displayName'),
107 'display_id': display_id
,
108 'uploader_id': video_data
.get('authorId'),
109 'duration': traverse_obj(video_data
, (
110 'MediaServiceFastMetadata', {json
.loads
}, 'media', 'duration', {lambda x
: x
/ 10000000})),
111 'thumbnail': url_or_none(video_data
.get('thumbnailUrl')),