[build] Bump PyInstaller version pin to `>=6.11.1` (#11507)
[yt-dlp3.git] / yt_dlp / cookies.py
blobe6734982444f5077a1bf20ba9297778d7afbf463
1 import base64
2 import collections
3 import contextlib
4 import datetime as dt
5 import functools
6 import glob
7 import hashlib
8 import http.cookiejar
9 import http.cookies
10 import io
11 import json
12 import os
13 import re
14 import shutil
15 import struct
16 import subprocess
17 import sys
18 import tempfile
19 import time
20 import urllib.request
21 from enum import Enum, auto
23 from .aes import (
24 aes_cbc_decrypt_bytes,
25 aes_gcm_decrypt_and_verify_bytes,
26 unpad_pkcs7,
28 from .compat import compat_os_name
29 from .dependencies import (
30 _SECRETSTORAGE_UNAVAILABLE_REASON,
31 secretstorage,
32 sqlite3,
34 from .minicurses import MultilinePrinter, QuietMultilinePrinter
35 from .utils import (
36 DownloadError,
37 YoutubeDLError,
38 Popen,
39 error_to_str,
40 expand_path,
41 is_path_like,
42 sanitize_url,
43 str_or_none,
44 try_call,
45 write_string,
47 from .utils._utils import _YDLLogger
48 from .utils.networking import normalize_url
50 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
51 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
54 class YDLLogger(_YDLLogger):
55 def warning(self, message, only_once=False): # compat
56 return super().warning(message, once=only_once)
58 class ProgressBar(MultilinePrinter):
59 _DELAY, _timer = 0.1, 0
61 def print(self, message):
62 if time.time() - self._timer > self._DELAY:
63 self.print_at_line(f'[Cookies] {message}', 0)
64 self._timer = time.time()
66 def progress_bar(self):
67 """Return a context manager with a print method. (Optional)"""
68 # Do not print to files/pipes, loggers, or when --no-progress is used
69 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
70 return
71 file = self._ydl._out_files.error
72 try:
73 if not file.isatty():
74 return
75 except BaseException:
76 return
77 return self.ProgressBar(file, preserve_output=False)
80 def _create_progress_bar(logger):
81 if hasattr(logger, 'progress_bar'):
82 printer = logger.progress_bar()
83 if printer:
84 return printer
85 printer = QuietMultilinePrinter()
86 printer.print = lambda _: None
87 return printer
90 class CookieLoadError(YoutubeDLError):
91 pass
94 def load_cookies(cookie_file, browser_specification, ydl):
95 try:
96 cookie_jars = []
97 if browser_specification is not None:
98 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
99 cookie_jars.append(
100 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
102 if cookie_file is not None:
103 is_filename = is_path_like(cookie_file)
104 if is_filename:
105 cookie_file = expand_path(cookie_file)
107 jar = YoutubeDLCookieJar(cookie_file)
108 if not is_filename or os.access(cookie_file, os.R_OK):
109 jar.load()
110 cookie_jars.append(jar)
112 return _merge_cookie_jars(cookie_jars)
113 except Exception:
114 raise CookieLoadError('failed to load cookies')
117 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
118 if browser_name == 'firefox':
119 return _extract_firefox_cookies(profile, container, logger)
120 elif browser_name == 'safari':
121 return _extract_safari_cookies(profile, logger)
122 elif browser_name in CHROMIUM_BASED_BROWSERS:
123 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
124 else:
125 raise ValueError(f'unknown browser: {browser_name}')
128 def _extract_firefox_cookies(profile, container, logger):
129 logger.info('Extracting cookies from firefox')
130 if not sqlite3:
131 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
132 'Please use a Python interpreter compiled with sqlite3 support')
133 return YoutubeDLCookieJar()
135 if profile is None:
136 search_roots = list(_firefox_browser_dirs())
137 elif _is_path(profile):
138 search_roots = [profile]
139 else:
140 search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
141 search_root = ', '.join(map(repr, search_roots))
143 cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
144 if cookie_database_path is None:
145 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
146 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
148 container_id = None
149 if container not in (None, 'none'):
150 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
151 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
152 raise FileNotFoundError(f'could not read containers.json in {search_root}')
153 with open(containers_path, encoding='utf8') as containers:
154 identities = json.load(containers).get('identities', [])
155 container_id = next((context.get('userContextId') for context in identities if container in (
156 context.get('name'),
157 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
158 )), None)
159 if not isinstance(container_id, int):
160 raise ValueError(f'could not find firefox container "{container}" in containers.json')
162 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
163 cursor = None
164 try:
165 cursor = _open_database_copy(cookie_database_path, tmpdir)
166 if isinstance(container_id, int):
167 logger.debug(
168 f'Only loading cookies from firefox container "{container}", ID {container_id}')
169 cursor.execute(
170 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
171 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
172 elif container == 'none':
173 logger.debug('Only loading cookies not belonging to any container')
174 cursor.execute(
175 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
176 else:
177 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
178 jar = YoutubeDLCookieJar()
179 with _create_progress_bar(logger) as progress_bar:
180 table = cursor.fetchall()
181 total_cookie_count = len(table)
182 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
183 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
184 cookie = http.cookiejar.Cookie(
185 version=0, name=name, value=value, port=None, port_specified=False,
186 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
187 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
188 comment=None, comment_url=None, rest={})
189 jar.set_cookie(cookie)
190 logger.info(f'Extracted {len(jar)} cookies from firefox')
191 return jar
192 finally:
193 if cursor is not None:
194 cursor.connection.close()
197 def _firefox_browser_dirs():
198 if sys.platform in ('cygwin', 'win32'):
199 yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
201 elif sys.platform == 'darwin':
202 yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
204 else:
205 yield from map(os.path.expanduser, (
206 '~/.mozilla/firefox',
207 '~/snap/firefox/common/.mozilla/firefox',
208 '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
212 def _firefox_cookie_dbs(roots):
213 for root in map(os.path.abspath, roots):
214 for pattern in ('', '*/', 'Profiles/*/'):
215 yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
218 def _get_chromium_based_browser_settings(browser_name):
219 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
220 if sys.platform in ('cygwin', 'win32'):
221 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
222 appdata_roaming = os.path.expandvars('%APPDATA%')
223 browser_dir = {
224 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
225 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
226 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
227 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
228 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
229 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
230 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
231 }[browser_name]
233 elif sys.platform == 'darwin':
234 appdata = os.path.expanduser('~/Library/Application Support')
235 browser_dir = {
236 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
237 'chrome': os.path.join(appdata, 'Google/Chrome'),
238 'chromium': os.path.join(appdata, 'Chromium'),
239 'edge': os.path.join(appdata, 'Microsoft Edge'),
240 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
241 'vivaldi': os.path.join(appdata, 'Vivaldi'),
242 'whale': os.path.join(appdata, 'Naver/Whale'),
243 }[browser_name]
245 else:
246 config = _config_home()
247 browser_dir = {
248 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
249 'chrome': os.path.join(config, 'google-chrome'),
250 'chromium': os.path.join(config, 'chromium'),
251 'edge': os.path.join(config, 'microsoft-edge'),
252 'opera': os.path.join(config, 'opera'),
253 'vivaldi': os.path.join(config, 'vivaldi'),
254 'whale': os.path.join(config, 'naver-whale'),
255 }[browser_name]
257 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
258 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
259 keyring_name = {
260 'brave': 'Brave',
261 'chrome': 'Chrome',
262 'chromium': 'Chromium',
263 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
264 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
265 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
266 'whale': 'Whale',
267 }[browser_name]
269 browsers_without_profiles = {'opera'}
271 return {
272 'browser_dir': browser_dir,
273 'keyring_name': keyring_name,
274 'supports_profiles': browser_name not in browsers_without_profiles,
278 def _extract_chrome_cookies(browser_name, profile, keyring, logger):
279 logger.info(f'Extracting cookies from {browser_name}')
281 if not sqlite3:
282 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
283 'Please use a Python interpreter compiled with sqlite3 support')
284 return YoutubeDLCookieJar()
286 config = _get_chromium_based_browser_settings(browser_name)
288 if profile is None:
289 search_root = config['browser_dir']
290 elif _is_path(profile):
291 search_root = profile
292 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
293 else:
294 if config['supports_profiles']:
295 search_root = os.path.join(config['browser_dir'], profile)
296 else:
297 logger.error(f'{browser_name} does not support profiles')
298 search_root = config['browser_dir']
300 cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
301 if cookie_database_path is None:
302 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
303 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
305 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
306 cursor = None
307 try:
308 cursor = _open_database_copy(cookie_database_path, tmpdir)
310 # meta_version is necessary to determine if we need to trim the hash prefix from the cookies
311 # Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223
312 meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0])
313 decryptor = get_cookie_decryptor(
314 config['browser_dir'], config['keyring_name'], logger,
315 keyring=keyring, meta_version=meta_version)
317 cursor.connection.text_factory = bytes
318 column_names = _get_column_names(cursor, 'cookies')
319 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
320 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
321 jar = YoutubeDLCookieJar()
322 failed_cookies = 0
323 unencrypted_cookies = 0
324 with _create_progress_bar(logger) as progress_bar:
325 table = cursor.fetchall()
326 total_cookie_count = len(table)
327 for i, line in enumerate(table):
328 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
329 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
330 if not cookie:
331 failed_cookies += 1
332 continue
333 elif not is_encrypted:
334 unencrypted_cookies += 1
335 jar.set_cookie(cookie)
336 if failed_cookies > 0:
337 failed_message = f' ({failed_cookies} could not be decrypted)'
338 else:
339 failed_message = ''
340 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
341 counts = decryptor._cookie_counts.copy()
342 counts['unencrypted'] = unencrypted_cookies
343 logger.debug(f'cookie version breakdown: {counts}')
344 return jar
345 except PermissionError as error:
346 if compat_os_name == 'nt' and error.errno == 13:
347 message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
348 logger.error(message)
349 raise DownloadError(message) # force exit
350 raise
351 finally:
352 if cursor is not None:
353 cursor.connection.close()
356 def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
357 host_key = host_key.decode()
358 name = name.decode()
359 value = value.decode()
360 path = path.decode()
361 is_encrypted = not value and encrypted_value
363 if is_encrypted:
364 value = decryptor.decrypt(encrypted_value)
365 if value is None:
366 return is_encrypted, None
368 # In chrome, session cookies have expires_utc set to 0
369 # In our cookie-store, cookies that do not expire should have expires set to None
370 if not expires_utc:
371 expires_utc = None
373 return is_encrypted, http.cookiejar.Cookie(
374 version=0, name=name, value=value, port=None, port_specified=False,
375 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
376 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
377 comment=None, comment_url=None, rest={})
380 class ChromeCookieDecryptor:
382 Overview:
384 Linux:
385 - cookies are either v10 or v11
386 - v10: AES-CBC encrypted with a fixed key
387 - also attempts empty password if decryption fails
388 - v11: AES-CBC encrypted with an OS protected key (keyring)
389 - also attempts empty password if decryption fails
390 - v11 keys can be stored in various places depending on the activate desktop environment [2]
392 Mac:
393 - cookies are either v10 or not v10
394 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
395 - not v10: 'old data' stored as plaintext
397 Windows:
398 - cookies are either v10 or not v10
399 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
400 - not v10: encrypted with DPAPI
402 Sources:
403 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
404 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
405 - KeyStorageLinux::CreateService
408 _cookie_counts = {}
410 def decrypt(self, encrypted_value):
411 raise NotImplementedError('Must be implemented by sub classes')
414 def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None):
415 if sys.platform == 'darwin':
416 return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version)
417 elif sys.platform in ('win32', 'cygwin'):
418 return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version)
419 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version)
422 class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
423 def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None):
424 self._logger = logger
425 self._v10_key = self.derive_key(b'peanuts')
426 self._empty_key = self.derive_key(b'')
427 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
428 self._browser_keyring_name = browser_keyring_name
429 self._keyring = keyring
430 self._meta_version = meta_version or 0
432 @functools.cached_property
433 def _v11_key(self):
434 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
435 return None if password is None else self.derive_key(password)
437 @staticmethod
438 def derive_key(password):
439 # values from
440 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
441 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
443 def decrypt(self, encrypted_value):
446 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
447 with an empty password. The failure detection is not the same as what chromium uses so the
448 results won't be perfect
450 References:
451 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
452 - a bugfix to try an empty password as a fallback
454 version = encrypted_value[:3]
455 ciphertext = encrypted_value[3:]
457 if version == b'v10':
458 self._cookie_counts['v10'] += 1
459 return _decrypt_aes_cbc_multi(
460 ciphertext, (self._v10_key, self._empty_key), self._logger,
461 hash_prefix=self._meta_version >= 24)
463 elif version == b'v11':
464 self._cookie_counts['v11'] += 1
465 if self._v11_key is None:
466 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
467 return None
468 return _decrypt_aes_cbc_multi(
469 ciphertext, (self._v11_key, self._empty_key), self._logger,
470 hash_prefix=self._meta_version >= 24)
472 else:
473 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
474 self._cookie_counts['other'] += 1
475 return None
478 class MacChromeCookieDecryptor(ChromeCookieDecryptor):
479 def __init__(self, browser_keyring_name, logger, meta_version=None):
480 self._logger = logger
481 password = _get_mac_keyring_password(browser_keyring_name, logger)
482 self._v10_key = None if password is None else self.derive_key(password)
483 self._cookie_counts = {'v10': 0, 'other': 0}
484 self._meta_version = meta_version or 0
486 @staticmethod
487 def derive_key(password):
488 # values from
489 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
490 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
492 def decrypt(self, encrypted_value):
493 version = encrypted_value[:3]
494 ciphertext = encrypted_value[3:]
496 if version == b'v10':
497 self._cookie_counts['v10'] += 1
498 if self._v10_key is None:
499 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
500 return None
502 return _decrypt_aes_cbc_multi(
503 ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24)
505 else:
506 self._cookie_counts['other'] += 1
507 # other prefixes are considered 'old data' which were stored as plaintext
508 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
509 return encrypted_value
512 class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
513 def __init__(self, browser_root, logger, meta_version=None):
514 self._logger = logger
515 self._v10_key = _get_windows_v10_key(browser_root, logger)
516 self._cookie_counts = {'v10': 0, 'other': 0}
517 self._meta_version = meta_version or 0
519 def decrypt(self, encrypted_value):
520 version = encrypted_value[:3]
521 ciphertext = encrypted_value[3:]
523 if version == b'v10':
524 self._cookie_counts['v10'] += 1
525 if self._v10_key is None:
526 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
527 return None
529 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
530 # kNonceLength
531 nonce_length = 96 // 8
532 # boringssl
533 # EVP_AEAD_AES_GCM_TAG_LEN
534 authentication_tag_length = 16
536 raw_ciphertext = ciphertext
537 nonce = raw_ciphertext[:nonce_length]
538 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
539 authentication_tag = raw_ciphertext[-authentication_tag_length:]
541 return _decrypt_aes_gcm(
542 ciphertext, self._v10_key, nonce, authentication_tag, self._logger,
543 hash_prefix=self._meta_version >= 24)
545 else:
546 self._cookie_counts['other'] += 1
547 # any other prefix means the data is DPAPI encrypted
548 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
549 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
552 def _extract_safari_cookies(profile, logger):
553 if sys.platform != 'darwin':
554 raise ValueError(f'unsupported platform: {sys.platform}')
556 if profile:
557 cookies_path = os.path.expanduser(profile)
558 if not os.path.isfile(cookies_path):
559 raise FileNotFoundError('custom safari cookies database not found')
561 else:
562 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
564 if not os.path.isfile(cookies_path):
565 logger.debug('Trying secondary cookie location')
566 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
567 if not os.path.isfile(cookies_path):
568 raise FileNotFoundError('could not find safari cookies database')
570 with open(cookies_path, 'rb') as f:
571 cookies_data = f.read()
573 jar = parse_safari_cookies(cookies_data, logger=logger)
574 logger.info(f'Extracted {len(jar)} cookies from safari')
575 return jar
578 class ParserError(Exception):
579 pass
582 class DataParser:
583 def __init__(self, data, logger):
584 self._data = data
585 self.cursor = 0
586 self._logger = logger
588 def read_bytes(self, num_bytes):
589 if num_bytes < 0:
590 raise ParserError(f'invalid read of {num_bytes} bytes')
591 end = self.cursor + num_bytes
592 if end > len(self._data):
593 raise ParserError('reached end of input')
594 data = self._data[self.cursor:end]
595 self.cursor = end
596 return data
598 def expect_bytes(self, expected_value, message):
599 value = self.read_bytes(len(expected_value))
600 if value != expected_value:
601 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
603 def read_uint(self, big_endian=False):
604 data_format = '>I' if big_endian else '<I'
605 return struct.unpack(data_format, self.read_bytes(4))[0]
607 def read_double(self, big_endian=False):
608 data_format = '>d' if big_endian else '<d'
609 return struct.unpack(data_format, self.read_bytes(8))[0]
611 def read_cstring(self):
612 buffer = []
613 while True:
614 c = self.read_bytes(1)
615 if c == b'\x00':
616 return b''.join(buffer).decode()
617 else:
618 buffer.append(c)
620 def skip(self, num_bytes, description='unknown'):
621 if num_bytes > 0:
622 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
623 elif num_bytes < 0:
624 raise ParserError(f'invalid skip of {num_bytes} bytes')
626 def skip_to(self, offset, description='unknown'):
627 self.skip(offset - self.cursor, description)
629 def skip_to_end(self, description='unknown'):
630 self.skip_to(len(self._data), description)
633 def _mac_absolute_time_to_posix(timestamp):
634 return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
637 def _parse_safari_cookies_header(data, logger):
638 p = DataParser(data, logger)
639 p.expect_bytes(b'cook', 'database signature')
640 number_of_pages = p.read_uint(big_endian=True)
641 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
642 return page_sizes, p.cursor
645 def _parse_safari_cookies_page(data, jar, logger):
646 p = DataParser(data, logger)
647 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
648 number_of_cookies = p.read_uint()
649 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
650 if number_of_cookies == 0:
651 logger.debug(f'a cookies page of size {len(data)} has no cookies')
652 return
654 p.skip_to(record_offsets[0], 'unknown page header field')
656 with _create_progress_bar(logger) as progress_bar:
657 for i, record_offset in enumerate(record_offsets):
658 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
659 p.skip_to(record_offset, 'space between records')
660 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
661 p.read_bytes(record_length)
662 p.skip_to_end('space in between pages')
665 def _parse_safari_cookies_record(data, jar, logger):
666 p = DataParser(data, logger)
667 record_size = p.read_uint()
668 p.skip(4, 'unknown record field 1')
669 flags = p.read_uint()
670 is_secure = bool(flags & 0x0001)
671 p.skip(4, 'unknown record field 2')
672 domain_offset = p.read_uint()
673 name_offset = p.read_uint()
674 path_offset = p.read_uint()
675 value_offset = p.read_uint()
676 p.skip(8, 'unknown record field 3')
677 expiration_date = _mac_absolute_time_to_posix(p.read_double())
678 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
680 try:
681 p.skip_to(domain_offset)
682 domain = p.read_cstring()
684 p.skip_to(name_offset)
685 name = p.read_cstring()
687 p.skip_to(path_offset)
688 path = p.read_cstring()
690 p.skip_to(value_offset)
691 value = p.read_cstring()
692 except UnicodeDecodeError:
693 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
694 return record_size
696 p.skip_to(record_size, 'space at the end of the record')
698 cookie = http.cookiejar.Cookie(
699 version=0, name=name, value=value, port=None, port_specified=False,
700 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
701 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
702 comment=None, comment_url=None, rest={})
703 jar.set_cookie(cookie)
704 return record_size
707 def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
709 References:
710 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
711 - this data appears to be out of date but the important parts of the database structure is the same
712 - there are a few bytes here and there which are skipped during parsing
714 if jar is None:
715 jar = YoutubeDLCookieJar()
716 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
717 p = DataParser(data[body_start:], logger)
718 for page_size in page_sizes:
719 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
720 p.skip_to_end('footer')
721 return jar
724 class _LinuxDesktopEnvironment(Enum):
726 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
727 DesktopEnvironment
729 OTHER = auto()
730 CINNAMON = auto()
731 DEEPIN = auto()
732 GNOME = auto()
733 KDE3 = auto()
734 KDE4 = auto()
735 KDE5 = auto()
736 KDE6 = auto()
737 PANTHEON = auto()
738 UKUI = auto()
739 UNITY = auto()
740 XFCE = auto()
741 LXQT = auto()
744 class _LinuxKeyring(Enum):
746 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
747 SelectedLinuxBackend
749 KWALLET = auto() # KDE4
750 KWALLET5 = auto()
751 KWALLET6 = auto()
752 GNOMEKEYRING = auto()
753 BASICTEXT = auto()
756 SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
759 def _get_linux_desktop_environment(env, logger):
761 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
762 GetDesktopEnvironment
764 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
765 desktop_session = env.get('DESKTOP_SESSION', None)
766 if xdg_current_desktop is not None:
767 for part in map(str.strip, xdg_current_desktop.split(':')):
768 if part == 'Unity':
769 if desktop_session is not None and 'gnome-fallback' in desktop_session:
770 return _LinuxDesktopEnvironment.GNOME
771 else:
772 return _LinuxDesktopEnvironment.UNITY
773 elif part == 'Deepin':
774 return _LinuxDesktopEnvironment.DEEPIN
775 elif part == 'GNOME':
776 return _LinuxDesktopEnvironment.GNOME
777 elif part == 'X-Cinnamon':
778 return _LinuxDesktopEnvironment.CINNAMON
779 elif part == 'KDE':
780 kde_version = env.get('KDE_SESSION_VERSION', None)
781 if kde_version == '5':
782 return _LinuxDesktopEnvironment.KDE5
783 elif kde_version == '6':
784 return _LinuxDesktopEnvironment.KDE6
785 elif kde_version == '4':
786 return _LinuxDesktopEnvironment.KDE4
787 else:
788 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
789 return _LinuxDesktopEnvironment.KDE4
790 elif part == 'Pantheon':
791 return _LinuxDesktopEnvironment.PANTHEON
792 elif part == 'XFCE':
793 return _LinuxDesktopEnvironment.XFCE
794 elif part == 'UKUI':
795 return _LinuxDesktopEnvironment.UKUI
796 elif part == 'LXQt':
797 return _LinuxDesktopEnvironment.LXQT
798 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
800 elif desktop_session is not None:
801 if desktop_session == 'deepin':
802 return _LinuxDesktopEnvironment.DEEPIN
803 elif desktop_session in ('mate', 'gnome'):
804 return _LinuxDesktopEnvironment.GNOME
805 elif desktop_session in ('kde4', 'kde-plasma'):
806 return _LinuxDesktopEnvironment.KDE4
807 elif desktop_session == 'kde':
808 if 'KDE_SESSION_VERSION' in env:
809 return _LinuxDesktopEnvironment.KDE4
810 else:
811 return _LinuxDesktopEnvironment.KDE3
812 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
813 return _LinuxDesktopEnvironment.XFCE
814 elif desktop_session == 'ukui':
815 return _LinuxDesktopEnvironment.UKUI
816 else:
817 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
819 else:
820 if 'GNOME_DESKTOP_SESSION_ID' in env:
821 return _LinuxDesktopEnvironment.GNOME
822 elif 'KDE_FULL_SESSION' in env:
823 if 'KDE_SESSION_VERSION' in env:
824 return _LinuxDesktopEnvironment.KDE4
825 else:
826 return _LinuxDesktopEnvironment.KDE3
827 return _LinuxDesktopEnvironment.OTHER
830 def _choose_linux_keyring(logger):
832 SelectBackend in [1]
834 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
835 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
836 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
837 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
839 References:
840 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
842 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
843 logger.debug(f'detected desktop environment: {desktop_environment.name}')
844 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
845 linux_keyring = _LinuxKeyring.KWALLET
846 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
847 linux_keyring = _LinuxKeyring.KWALLET5
848 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
849 linux_keyring = _LinuxKeyring.KWALLET6
850 elif desktop_environment in (
851 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
853 linux_keyring = _LinuxKeyring.BASICTEXT
854 else:
855 linux_keyring = _LinuxKeyring.GNOMEKEYRING
856 return linux_keyring
859 def _get_kwallet_network_wallet(keyring, logger):
860 """ The name of the wallet used to store network passwords.
862 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
863 KWalletDBus::NetworkWallet
864 which does a dbus call to the following function:
865 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
866 Wallet::NetworkWallet
868 default_wallet = 'kdewallet'
869 try:
870 if keyring == _LinuxKeyring.KWALLET:
871 service_name = 'org.kde.kwalletd'
872 wallet_path = '/modules/kwalletd'
873 elif keyring == _LinuxKeyring.KWALLET5:
874 service_name = 'org.kde.kwalletd5'
875 wallet_path = '/modules/kwalletd5'
876 elif keyring == _LinuxKeyring.KWALLET6:
877 service_name = 'org.kde.kwalletd6'
878 wallet_path = '/modules/kwalletd6'
879 else:
880 raise ValueError(keyring)
882 stdout, _, returncode = Popen.run([
883 'dbus-send', '--session', '--print-reply=literal',
884 f'--dest={service_name}',
885 wallet_path,
886 'org.kde.KWallet.networkWallet',
887 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
889 if returncode:
890 logger.warning('failed to read NetworkWallet')
891 return default_wallet
892 else:
893 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
894 return stdout.strip()
895 except Exception as e:
896 logger.warning(f'exception while obtaining NetworkWallet: {e}')
897 return default_wallet
900 def _get_kwallet_password(browser_keyring_name, keyring, logger):
901 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
903 if shutil.which('kwallet-query') is None:
904 logger.error('kwallet-query command not found. KWallet and kwallet-query '
905 'must be installed to read from KWallet. kwallet-query should be'
906 'included in the kwallet package for your distribution')
907 return b''
909 network_wallet = _get_kwallet_network_wallet(keyring, logger)
911 try:
912 stdout, _, returncode = Popen.run([
913 'kwallet-query',
914 '--read-password', f'{browser_keyring_name} Safe Storage',
915 '--folder', f'{browser_keyring_name} Keys',
916 network_wallet,
917 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
919 if returncode:
920 logger.error(f'kwallet-query failed with return code {returncode}. '
921 'Please consult the kwallet-query man page for details')
922 return b''
923 else:
924 if stdout.lower().startswith(b'failed to read'):
925 logger.debug('failed to read password from kwallet. Using empty string instead')
926 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
927 # just tries to read the value (which kwallet returns "") whereas kwallet-query
928 # checks hasEntry. To verify this:
929 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
930 # while starting chrome.
931 # this was identified as a bug later and fixed in
932 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
933 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
934 return b''
935 else:
936 logger.debug('password found')
937 return stdout.rstrip(b'\n')
938 except Exception as e:
939 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
940 return b''
943 def _get_gnome_keyring_password(browser_keyring_name, logger):
944 if not secretstorage:
945 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
946 return b''
947 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
948 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
949 # and presumably searches for its key in the list. It appears that we must do the same.
950 # https://github.com/jaraco/keyring/issues/556
951 with contextlib.closing(secretstorage.dbus_init()) as con:
952 col = secretstorage.get_default_collection(con)
953 for item in col.get_all_items():
954 if item.get_label() == f'{browser_keyring_name} Safe Storage':
955 return item.get_secret()
956 logger.error('failed to read from keyring')
957 return b''
960 def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
961 # note: chrome/chromium can be run with the following flags to determine which keyring backend
962 # it has chosen to use
963 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
964 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
965 # will not be sufficient in all cases.
967 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
968 logger.debug(f'Chosen keyring: {keyring.name}')
970 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
971 return _get_kwallet_password(browser_keyring_name, keyring, logger)
972 elif keyring == _LinuxKeyring.GNOMEKEYRING:
973 return _get_gnome_keyring_password(browser_keyring_name, logger)
974 elif keyring == _LinuxKeyring.BASICTEXT:
975 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
976 return None
977 assert False, f'Unknown keyring {keyring}'
980 def _get_mac_keyring_password(browser_keyring_name, logger):
981 logger.debug('using find-generic-password to obtain password from OSX keychain')
982 try:
983 stdout, _, returncode = Popen.run(
984 ['security', 'find-generic-password',
985 '-w', # write password to stdout
986 '-a', browser_keyring_name, # match 'account'
987 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
988 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
989 if returncode:
990 logger.warning('find-generic-password failed')
991 return None
992 return stdout.rstrip(b'\n')
993 except Exception as e:
994 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
995 return None
998 def _get_windows_v10_key(browser_root, logger):
1000 References:
1001 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
1003 path = _newest(_find_files(browser_root, 'Local State', logger))
1004 if path is None:
1005 logger.error('could not find local state file')
1006 return None
1007 logger.debug(f'Found local state file at "{path}"')
1008 with open(path, encoding='utf8') as f:
1009 data = json.load(f)
1010 try:
1011 # kOsCryptEncryptedKeyPrefName in [1]
1012 base64_key = data['os_crypt']['encrypted_key']
1013 except KeyError:
1014 logger.error('no encrypted key in Local State')
1015 return None
1016 encrypted_key = base64.b64decode(base64_key)
1017 # kDPAPIKeyPrefix in [1]
1018 prefix = b'DPAPI'
1019 if not encrypted_key.startswith(prefix):
1020 logger.error('invalid key')
1021 return None
1022 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
1025 def pbkdf2_sha1(password, salt, iterations, key_length):
1026 return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
1029 def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False):
1030 for key in keys:
1031 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
1032 try:
1033 if hash_prefix:
1034 return plaintext[32:].decode()
1035 return plaintext.decode()
1036 except UnicodeDecodeError:
1037 pass
1038 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1039 return None
1042 def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False):
1043 try:
1044 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
1045 except ValueError:
1046 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
1047 return None
1049 try:
1050 if hash_prefix:
1051 return plaintext[32:].decode()
1052 return plaintext.decode()
1053 except UnicodeDecodeError:
1054 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1055 return None
1058 def _decrypt_windows_dpapi(ciphertext, logger):
1060 References:
1061 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1064 import ctypes
1065 import ctypes.wintypes
1067 class DATA_BLOB(ctypes.Structure):
1068 _fields_ = [('cbData', ctypes.wintypes.DWORD),
1069 ('pbData', ctypes.POINTER(ctypes.c_char))]
1071 buffer = ctypes.create_string_buffer(ciphertext)
1072 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1073 blob_out = DATA_BLOB()
1074 ret = ctypes.windll.crypt32.CryptUnprotectData(
1075 ctypes.byref(blob_in), # pDataIn
1076 None, # ppszDataDescr: human readable description of pDataIn
1077 None, # pOptionalEntropy: salt?
1078 None, # pvReserved: must be NULL
1079 None, # pPromptStruct: information about prompts to display
1080 0, # dwFlags
1081 ctypes.byref(blob_out), # pDataOut
1083 if not ret:
1084 message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
1085 logger.error(message)
1086 raise DownloadError(message) # force exit
1088 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1089 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1090 return result
1093 def _config_home():
1094 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1097 def _open_database_copy(database_path, tmpdir):
1098 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1099 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1100 shutil.copy(database_path, database_copy_path)
1101 conn = sqlite3.connect(database_copy_path)
1102 return conn.cursor()
1105 def _get_column_names(cursor, table_name):
1106 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
1107 return [row[1].decode() for row in table_info]
1110 def _newest(files):
1111 return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
1114 def _find_files(root, filename, logger):
1115 # if there are multiple browser profiles, take the most recently used one
1116 i = 0
1117 with _create_progress_bar(logger) as progress_bar:
1118 for curr_root, _, files in os.walk(root):
1119 for file in files:
1120 i += 1
1121 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1122 if file == filename:
1123 yield os.path.join(curr_root, file)
1126 def _merge_cookie_jars(jars):
1127 output_jar = YoutubeDLCookieJar()
1128 for jar in jars:
1129 for cookie in jar:
1130 output_jar.set_cookie(cookie)
1131 if jar.filename is not None:
1132 output_jar.filename = jar.filename
1133 return output_jar
1136 def _is_path(value):
1137 return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
1140 def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
1141 if browser_name not in SUPPORTED_BROWSERS:
1142 raise ValueError(f'unsupported browser: "{browser_name}"')
1143 if keyring not in (None, *SUPPORTED_KEYRINGS):
1144 raise ValueError(f'unsupported keyring: "{keyring}"')
1145 if profile is not None and _is_path(expand_path(profile)):
1146 profile = expand_path(profile)
1147 return browser_name, profile, keyring, container
1150 class LenientSimpleCookie(http.cookies.SimpleCookie):
1151 """More lenient version of http.cookies.SimpleCookie"""
1152 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
1153 # We use Morsel's legal key chars to avoid errors on setting values
1154 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1155 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
1157 _RESERVED = {
1158 'expires',
1159 'path',
1160 'comment',
1161 'domain',
1162 'max-age',
1163 'secure',
1164 'httponly',
1165 'version',
1166 'samesite',
1169 _FLAGS = {'secure', 'httponly'}
1171 # Added 'bad' group to catch the remaining value
1172 _COOKIE_PATTERN = re.compile(r'''
1173 \s* # Optional whitespace at start of cookie
1174 (?P<key> # Start of group 'key'
1175 [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
1176 ) # End of group 'key'
1177 ( # Optional group: there may not be a value.
1178 \s*=\s* # Equal Sign
1179 ( # Start of potential value
1180 (?P<val> # Start of group 'val'
1181 "(?:[^\\"]|\\.)*" # Any doublequoted string
1182 | # or
1183 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1184 | # or
1185 [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
1186 ) # End of group 'val'
1187 | # or
1188 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1189 ) # End of potential value
1190 )? # End of optional value group
1191 \s* # Any number of spaces.
1192 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1193 ''', re.ASCII | re.VERBOSE)
1195 def load(self, data):
1196 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1197 if not isinstance(data, str):
1198 return super().load(data)
1200 morsel = None
1201 for match in self._COOKIE_PATTERN.finditer(data):
1202 if match.group('bad'):
1203 morsel = None
1204 continue
1206 key, value = match.group('key', 'val')
1208 is_attribute = False
1209 if key.startswith('$'):
1210 key = key[1:]
1211 is_attribute = True
1213 lower_key = key.lower()
1214 if lower_key in self._RESERVED:
1215 if morsel is None:
1216 continue
1218 if value is None:
1219 if lower_key not in self._FLAGS:
1220 morsel = None
1221 continue
1222 value = True
1223 else:
1224 value, _ = self.value_decode(value)
1226 morsel[key] = value
1228 elif is_attribute:
1229 morsel = None
1231 elif value is not None:
1232 morsel = self.get(key, http.cookies.Morsel())
1233 real_value, coded_value = self.value_decode(value)
1234 morsel.set(key, real_value, coded_value)
1235 self[key] = morsel
1237 else:
1238 morsel = None
1241 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1243 See [1] for cookie file format.
1245 1. https://curl.haxx.se/docs/http-cookies.html
1247 _HTTPONLY_PREFIX = '#HttpOnly_'
1248 _ENTRY_LEN = 7
1249 _HEADER = '''# Netscape HTTP Cookie File
1250 # This file is generated by yt-dlp. Do not edit.
1253 _CookieFileEntry = collections.namedtuple(
1254 'CookieFileEntry',
1255 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1257 def __init__(self, filename=None, *args, **kwargs):
1258 super().__init__(None, *args, **kwargs)
1259 if is_path_like(filename):
1260 filename = os.fspath(filename)
1261 self.filename = filename
1263 @staticmethod
1264 def _true_or_false(cndn):
1265 return 'TRUE' if cndn else 'FALSE'
1267 @contextlib.contextmanager
1268 def open(self, file, *, write=False):
1269 if is_path_like(file):
1270 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1271 yield f
1272 else:
1273 if write:
1274 file.truncate(0)
1275 yield file
1277 def _really_save(self, f, ignore_discard, ignore_expires):
1278 now = time.time()
1279 for cookie in self:
1280 if (not ignore_discard and cookie.discard
1281 or not ignore_expires and cookie.is_expired(now)):
1282 continue
1283 name, value = cookie.name, cookie.value
1284 if value is None:
1285 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1286 # with no name, whereas http.cookiejar regards it as a
1287 # cookie with no value.
1288 name, value = '', name
1289 f.write('{}\n'.format('\t'.join((
1290 cookie.domain,
1291 self._true_or_false(cookie.domain.startswith('.')),
1292 cookie.path,
1293 self._true_or_false(cookie.secure),
1294 str_or_none(cookie.expires, default=''),
1295 name, value,
1296 ))))
1298 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
1300 Save cookies to a file.
1301 Code is taken from CPython 3.6
1302 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1304 if filename is None:
1305 if self.filename is not None:
1306 filename = self.filename
1307 else:
1308 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1310 # Store session cookies with `expires` set to 0 instead of an empty string
1311 for cookie in self:
1312 if cookie.expires is None:
1313 cookie.expires = 0
1315 with self.open(filename, write=True) as f:
1316 f.write(self._HEADER)
1317 self._really_save(f, ignore_discard, ignore_expires)
1319 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
1320 """Load cookies from a file."""
1321 if filename is None:
1322 if self.filename is not None:
1323 filename = self.filename
1324 else:
1325 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1327 def prepare_line(line):
1328 if line.startswith(self._HTTPONLY_PREFIX):
1329 line = line[len(self._HTTPONLY_PREFIX):]
1330 # comments and empty lines are fine
1331 if line.startswith('#') or not line.strip():
1332 return line
1333 cookie_list = line.split('\t')
1334 if len(cookie_list) != self._ENTRY_LEN:
1335 raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
1336 cookie = self._CookieFileEntry(*cookie_list)
1337 if cookie.expires_at and not cookie.expires_at.isdigit():
1338 raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
1339 return line
1341 cf = io.StringIO()
1342 with self.open(filename) as f:
1343 for line in f:
1344 try:
1345 cf.write(prepare_line(line))
1346 except http.cookiejar.LoadError as e:
1347 if f'{line.strip()} '[0] in '[{"':
1348 raise http.cookiejar.LoadError(
1349 'Cookies file must be Netscape formatted, not JSON. See '
1350 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1351 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1352 continue
1353 cf.seek(0)
1354 self._really_load(cf, filename, ignore_discard, ignore_expires)
1355 # Session cookies are denoted by either `expires` field set to
1356 # an empty string or 0. MozillaCookieJar only recognizes the former
1357 # (see [1]). So we need force the latter to be recognized as session
1358 # cookies on our own.
1359 # Session cookies may be important for cookies-based authentication,
1360 # e.g. usually, when user does not check 'Remember me' check box while
1361 # logging in on a site, some important cookies are stored as session
1362 # cookies so that not recognizing them will result in failed login.
1363 # 1. https://bugs.python.org/issue17164
1364 for cookie in self:
1365 # Treat `expires=0` cookies as session cookies
1366 if cookie.expires == 0:
1367 cookie.expires = None
1368 cookie.discard = True
1370 def get_cookie_header(self, url):
1371 """Generate a Cookie HTTP header for a given url"""
1372 cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
1373 self.add_cookie_header(cookie_req)
1374 return cookie_req.get_header('Cookie')
1376 def get_cookies_for_url(self, url):
1377 """Generate a list of Cookie objects for a given url"""
1378 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1379 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1380 self._policy._now = self._now = int(time.time())
1381 return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
1383 def clear(self, *args, **kwargs):
1384 with contextlib.suppress(KeyError):
1385 return super().clear(*args, **kwargs)