Release 2024.12.23
[yt-dlp.git] / yt_dlp / extractor / dropbox.py
blob2bfeebc7cbba417ef28400dab6835f6ba5ac7293
1 import base64
2 import os.path
3 import re
4 import urllib.parse
6 from .common import InfoExtractor
7 from ..utils import (
8 ExtractorError,
9 update_url,
10 update_url_query,
11 url_basename,
12 urlencode_postdata,
16 class DropboxIE(InfoExtractor):
17 _VALID_URL = r'https?://(?:www\.)?dropbox\.com/(?:(?:e/)?scl/fi|sh?)/(?P<id>\w+)'
18 _TESTS = [
20 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh/youtube-dl%20test%20video%20%27%C3%A4%22BaW_jenozKc.mp4?dl=0',
21 'info_dict': {
22 'id': 'nelirfsxnmcfbfh',
23 'ext': 'mp4',
24 'title': 'youtube-dl test video \'รค"BaW_jenozKc',
26 }, {
27 'url': 'https://www.dropbox.com/s/nelirfsxnmcfbfh',
28 'only_matching': True,
29 }, {
30 'url': 'https://www.dropbox.com/sh/2mgpiuq7kv8nqdf/AABy-fW4dkydT4GmWi2mdOUDa?dl=0&preview=Drone+Shot.mp4',
31 'only_matching': True,
32 }, {
33 'url': 'https://www.dropbox.com/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
34 'only_matching': True,
35 }, {
36 'url': 'https://www.dropbox.com/e/scl/fi/r2kd2skcy5ylbbta5y1pz/DJI_0003.MP4?dl=0&rlkey=wcdgqangn7t3lnmmv6li9mu9h',
37 'only_matching': True,
41 def _yield_decoded_parts(self, webpage):
42 for encoded in reversed(re.findall(r'registerStreamedPrefetch\s*\(\s*"[\w/+=]+"\s*,\s*"([\w/+=]+)"', webpage)):
43 yield base64.b64decode(encoded).decode('utf-8', 'ignore')
45 def _real_extract(self, url):
46 mobj = self._match_valid_url(url)
47 video_id = mobj.group('id')
48 webpage = self._download_webpage(url, video_id)
49 fn = urllib.parse.unquote(url_basename(url))
50 title = os.path.splitext(fn)[0]
51 content_id = None
53 for part in self._yield_decoded_parts(webpage):
54 if '/sm/password' in part:
55 content_id = self._search_regex(r'content_id=([\w.+=/-]+)', part, 'content ID')
56 break
58 if content_id:
59 password = self.get_param('videopassword')
60 if not password:
61 raise ExtractorError('Password protected video, use --video-password <password>', expected=True)
63 response = self._download_json(
64 'https://www.dropbox.com/sm/auth', video_id, 'POSTing video password',
65 data=urlencode_postdata({
66 'is_xhr': 'true',
67 't': self._get_cookies('https://www.dropbox.com')['t'].value,
68 'content_id': content_id,
69 'password': password,
70 'url': update_url(url, scheme='', netloc=''),
71 }))
72 if response.get('status') != 'authed':
73 raise ExtractorError('Invalid password', expected=True)
75 webpage = self._download_webpage(url, video_id)
77 formats, subtitles = [], {}
78 has_anonymous_download = False
79 thumbnail = None
80 for part in self._yield_decoded_parts(webpage):
81 if not has_anonymous_download:
82 has_anonymous_download = self._search_regex(
83 r'(anonymous:\tanonymous)', part, 'anonymous', default=False)
84 transcode_url = self._search_regex(
85 r'\n.(https://[^\x03\x08\x12\n]+\.m3u8)', part, 'transcode url', default=None)
86 if not transcode_url:
87 continue
88 formats, subtitles = self._extract_m3u8_formats_and_subtitles(transcode_url, video_id, 'mp4')
89 thumbnail = self._search_regex(
90 r'(https://www\.dropbox\.com/temp_thumb_from_token/[\w/?&=]+)', part, 'thumbnail', default=None)
91 break
93 # downloads enabled we can get the original file
94 if has_anonymous_download:
95 formats.append({
96 'url': update_url_query(url, {'dl': '1'}),
97 'format_id': 'original',
98 'format_note': 'Original',
99 'quality': 1,
102 return {
103 'id': video_id,
104 'title': title,
105 'formats': formats,
106 'subtitles': subtitles,
107 'thumbnail': thumbnail,