[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / cookies.py
blobfad323c9015fccd11be24cf864d48e023d260800
1 import base64
2 import collections
3 import contextlib
4 import datetime as dt
5 import functools
6 import glob
7 import hashlib
8 import http.cookiejar
9 import http.cookies
10 import io
11 import json
12 import os
13 import re
14 import shutil
15 import struct
16 import subprocess
17 import sys
18 import tempfile
19 import time
20 import urllib.request
21 from enum import Enum, auto
23 from .aes import (
24 aes_cbc_decrypt_bytes,
25 aes_gcm_decrypt_and_verify_bytes,
26 unpad_pkcs7,
28 from .dependencies import (
29 _SECRETSTORAGE_UNAVAILABLE_REASON,
30 secretstorage,
31 sqlite3,
33 from .minicurses import MultilinePrinter, QuietMultilinePrinter
34 from .utils import (
35 DownloadError,
36 YoutubeDLError,
37 Popen,
38 error_to_str,
39 expand_path,
40 is_path_like,
41 sanitize_url,
42 str_or_none,
43 try_call,
44 write_string,
46 from .utils._utils import _YDLLogger
47 from .utils.networking import normalize_url
49 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
50 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
53 class YDLLogger(_YDLLogger):
54 def warning(self, message, only_once=False): # compat
55 return super().warning(message, once=only_once)
57 class ProgressBar(MultilinePrinter):
58 _DELAY, _timer = 0.1, 0
60 def print(self, message):
61 if time.time() - self._timer > self._DELAY:
62 self.print_at_line(f'[Cookies] {message}', 0)
63 self._timer = time.time()
65 def progress_bar(self):
66 """Return a context manager with a print method. (Optional)"""
67 # Do not print to files/pipes, loggers, or when --no-progress is used
68 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
69 return
70 file = self._ydl._out_files.error
71 try:
72 if not file.isatty():
73 return
74 except BaseException:
75 return
76 return self.ProgressBar(file, preserve_output=False)
79 def _create_progress_bar(logger):
80 if hasattr(logger, 'progress_bar'):
81 printer = logger.progress_bar()
82 if printer:
83 return printer
84 printer = QuietMultilinePrinter()
85 printer.print = lambda _: None
86 return printer
89 class CookieLoadError(YoutubeDLError):
90 pass
93 def load_cookies(cookie_file, browser_specification, ydl):
94 try:
95 cookie_jars = []
96 if browser_specification is not None:
97 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
98 cookie_jars.append(
99 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
101 if cookie_file is not None:
102 is_filename = is_path_like(cookie_file)
103 if is_filename:
104 cookie_file = expand_path(cookie_file)
106 jar = YoutubeDLCookieJar(cookie_file)
107 if not is_filename or os.access(cookie_file, os.R_OK):
108 jar.load()
109 cookie_jars.append(jar)
111 return _merge_cookie_jars(cookie_jars)
112 except Exception:
113 raise CookieLoadError('failed to load cookies')
116 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
117 if browser_name == 'firefox':
118 return _extract_firefox_cookies(profile, container, logger)
119 elif browser_name == 'safari':
120 return _extract_safari_cookies(profile, logger)
121 elif browser_name in CHROMIUM_BASED_BROWSERS:
122 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
123 else:
124 raise ValueError(f'unknown browser: {browser_name}')
127 def _extract_firefox_cookies(profile, container, logger):
128 logger.info('Extracting cookies from firefox')
129 if not sqlite3:
130 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
131 'Please use a Python interpreter compiled with sqlite3 support')
132 return YoutubeDLCookieJar()
134 if profile is None:
135 search_roots = list(_firefox_browser_dirs())
136 elif _is_path(profile):
137 search_roots = [profile]
138 else:
139 search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
140 search_root = ', '.join(map(repr, search_roots))
142 cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
143 if cookie_database_path is None:
144 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
145 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
147 container_id = None
148 if container not in (None, 'none'):
149 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
150 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
151 raise FileNotFoundError(f'could not read containers.json in {search_root}')
152 with open(containers_path, encoding='utf8') as containers:
153 identities = json.load(containers).get('identities', [])
154 container_id = next((context.get('userContextId') for context in identities if container in (
155 context.get('name'),
156 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
157 )), None)
158 if not isinstance(container_id, int):
159 raise ValueError(f'could not find firefox container "{container}" in containers.json')
161 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
162 cursor = None
163 try:
164 cursor = _open_database_copy(cookie_database_path, tmpdir)
165 if isinstance(container_id, int):
166 logger.debug(
167 f'Only loading cookies from firefox container "{container}", ID {container_id}')
168 cursor.execute(
169 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
170 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
171 elif container == 'none':
172 logger.debug('Only loading cookies not belonging to any container')
173 cursor.execute(
174 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
175 else:
176 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
177 jar = YoutubeDLCookieJar()
178 with _create_progress_bar(logger) as progress_bar:
179 table = cursor.fetchall()
180 total_cookie_count = len(table)
181 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
182 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
183 cookie = http.cookiejar.Cookie(
184 version=0, name=name, value=value, port=None, port_specified=False,
185 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
186 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
187 comment=None, comment_url=None, rest={})
188 jar.set_cookie(cookie)
189 logger.info(f'Extracted {len(jar)} cookies from firefox')
190 return jar
191 finally:
192 if cursor is not None:
193 cursor.connection.close()
196 def _firefox_browser_dirs():
197 if sys.platform in ('cygwin', 'win32'):
198 yield from map(os.path.expandvars, (
199 R'%APPDATA%\Mozilla\Firefox\Profiles',
200 R'%LOCALAPPDATA%\Packages\Mozilla.Firefox_n80bbvh6b1yt2\LocalCache\Roaming\Mozilla\Firefox\Profiles',
203 elif sys.platform == 'darwin':
204 yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
206 else:
207 yield from map(os.path.expanduser, (
208 '~/.mozilla/firefox',
209 '~/snap/firefox/common/.mozilla/firefox',
210 '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
214 def _firefox_cookie_dbs(roots):
215 for root in map(os.path.abspath, roots):
216 for pattern in ('', '*/', 'Profiles/*/'):
217 yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
220 def _get_chromium_based_browser_settings(browser_name):
221 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
222 if sys.platform in ('cygwin', 'win32'):
223 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
224 appdata_roaming = os.path.expandvars('%APPDATA%')
225 browser_dir = {
226 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
227 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
228 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
229 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
230 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
231 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
232 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
233 }[browser_name]
235 elif sys.platform == 'darwin':
236 appdata = os.path.expanduser('~/Library/Application Support')
237 browser_dir = {
238 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
239 'chrome': os.path.join(appdata, 'Google/Chrome'),
240 'chromium': os.path.join(appdata, 'Chromium'),
241 'edge': os.path.join(appdata, 'Microsoft Edge'),
242 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
243 'vivaldi': os.path.join(appdata, 'Vivaldi'),
244 'whale': os.path.join(appdata, 'Naver/Whale'),
245 }[browser_name]
247 else:
248 config = _config_home()
249 browser_dir = {
250 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
251 'chrome': os.path.join(config, 'google-chrome'),
252 'chromium': os.path.join(config, 'chromium'),
253 'edge': os.path.join(config, 'microsoft-edge'),
254 'opera': os.path.join(config, 'opera'),
255 'vivaldi': os.path.join(config, 'vivaldi'),
256 'whale': os.path.join(config, 'naver-whale'),
257 }[browser_name]
259 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
260 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
261 keyring_name = {
262 'brave': 'Brave',
263 'chrome': 'Chrome',
264 'chromium': 'Chromium',
265 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
266 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
267 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
268 'whale': 'Whale',
269 }[browser_name]
271 browsers_without_profiles = {'opera'}
273 return {
274 'browser_dir': browser_dir,
275 'keyring_name': keyring_name,
276 'supports_profiles': browser_name not in browsers_without_profiles,
280 def _extract_chrome_cookies(browser_name, profile, keyring, logger):
281 logger.info(f'Extracting cookies from {browser_name}')
283 if not sqlite3:
284 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
285 'Please use a Python interpreter compiled with sqlite3 support')
286 return YoutubeDLCookieJar()
288 config = _get_chromium_based_browser_settings(browser_name)
290 if profile is None:
291 search_root = config['browser_dir']
292 elif _is_path(profile):
293 search_root = profile
294 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
295 else:
296 if config['supports_profiles']:
297 search_root = os.path.join(config['browser_dir'], profile)
298 else:
299 logger.error(f'{browser_name} does not support profiles')
300 search_root = config['browser_dir']
302 cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
303 if cookie_database_path is None:
304 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
305 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
307 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
308 cursor = None
309 try:
310 cursor = _open_database_copy(cookie_database_path, tmpdir)
312 # meta_version is necessary to determine if we need to trim the hash prefix from the cookies
313 # Ref: https://chromium.googlesource.com/chromium/src/+/b02dcebd7cafab92770734dc2bc317bd07f1d891/net/extras/sqlite/sqlite_persistent_cookie_store.cc#223
314 meta_version = int(cursor.execute('SELECT value FROM meta WHERE key = "version"').fetchone()[0])
315 decryptor = get_cookie_decryptor(
316 config['browser_dir'], config['keyring_name'], logger,
317 keyring=keyring, meta_version=meta_version)
319 cursor.connection.text_factory = bytes
320 column_names = _get_column_names(cursor, 'cookies')
321 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
322 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
323 jar = YoutubeDLCookieJar()
324 failed_cookies = 0
325 unencrypted_cookies = 0
326 with _create_progress_bar(logger) as progress_bar:
327 table = cursor.fetchall()
328 total_cookie_count = len(table)
329 for i, line in enumerate(table):
330 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
331 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
332 if not cookie:
333 failed_cookies += 1
334 continue
335 elif not is_encrypted:
336 unencrypted_cookies += 1
337 jar.set_cookie(cookie)
338 if failed_cookies > 0:
339 failed_message = f' ({failed_cookies} could not be decrypted)'
340 else:
341 failed_message = ''
342 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
343 counts = decryptor._cookie_counts.copy()
344 counts['unencrypted'] = unencrypted_cookies
345 logger.debug(f'cookie version breakdown: {counts}')
346 return jar
347 except PermissionError as error:
348 if os.name == 'nt' and error.errno == 13:
349 message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
350 logger.error(message)
351 raise DownloadError(message) # force exit
352 raise
353 finally:
354 if cursor is not None:
355 cursor.connection.close()
358 def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
359 host_key = host_key.decode()
360 name = name.decode()
361 value = value.decode()
362 path = path.decode()
363 is_encrypted = not value and encrypted_value
365 if is_encrypted:
366 value = decryptor.decrypt(encrypted_value)
367 if value is None:
368 return is_encrypted, None
370 # In chrome, session cookies have expires_utc set to 0
371 # In our cookie-store, cookies that do not expire should have expires set to None
372 if not expires_utc:
373 expires_utc = None
375 return is_encrypted, http.cookiejar.Cookie(
376 version=0, name=name, value=value, port=None, port_specified=False,
377 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
378 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
379 comment=None, comment_url=None, rest={})
382 class ChromeCookieDecryptor:
384 Overview:
386 Linux:
387 - cookies are either v10 or v11
388 - v10: AES-CBC encrypted with a fixed key
389 - also attempts empty password if decryption fails
390 - v11: AES-CBC encrypted with an OS protected key (keyring)
391 - also attempts empty password if decryption fails
392 - v11 keys can be stored in various places depending on the activate desktop environment [2]
394 Mac:
395 - cookies are either v10 or not v10
396 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
397 - not v10: 'old data' stored as plaintext
399 Windows:
400 - cookies are either v10 or not v10
401 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
402 - not v10: encrypted with DPAPI
404 Sources:
405 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
406 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
407 - KeyStorageLinux::CreateService
410 _cookie_counts = {}
412 def decrypt(self, encrypted_value):
413 raise NotImplementedError('Must be implemented by sub classes')
416 def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None, meta_version=None):
417 if sys.platform == 'darwin':
418 return MacChromeCookieDecryptor(browser_keyring_name, logger, meta_version=meta_version)
419 elif sys.platform in ('win32', 'cygwin'):
420 return WindowsChromeCookieDecryptor(browser_root, logger, meta_version=meta_version)
421 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring, meta_version=meta_version)
424 class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
425 def __init__(self, browser_keyring_name, logger, *, keyring=None, meta_version=None):
426 self._logger = logger
427 self._v10_key = self.derive_key(b'peanuts')
428 self._empty_key = self.derive_key(b'')
429 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
430 self._browser_keyring_name = browser_keyring_name
431 self._keyring = keyring
432 self._meta_version = meta_version or 0
434 @functools.cached_property
435 def _v11_key(self):
436 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
437 return None if password is None else self.derive_key(password)
439 @staticmethod
440 def derive_key(password):
441 # values from
442 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
443 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
445 def decrypt(self, encrypted_value):
448 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
449 with an empty password. The failure detection is not the same as what chromium uses so the
450 results won't be perfect
452 References:
453 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
454 - a bugfix to try an empty password as a fallback
456 version = encrypted_value[:3]
457 ciphertext = encrypted_value[3:]
459 if version == b'v10':
460 self._cookie_counts['v10'] += 1
461 return _decrypt_aes_cbc_multi(
462 ciphertext, (self._v10_key, self._empty_key), self._logger,
463 hash_prefix=self._meta_version >= 24)
465 elif version == b'v11':
466 self._cookie_counts['v11'] += 1
467 if self._v11_key is None:
468 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
469 return None
470 return _decrypt_aes_cbc_multi(
471 ciphertext, (self._v11_key, self._empty_key), self._logger,
472 hash_prefix=self._meta_version >= 24)
474 else:
475 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
476 self._cookie_counts['other'] += 1
477 return None
480 class MacChromeCookieDecryptor(ChromeCookieDecryptor):
481 def __init__(self, browser_keyring_name, logger, meta_version=None):
482 self._logger = logger
483 password = _get_mac_keyring_password(browser_keyring_name, logger)
484 self._v10_key = None if password is None else self.derive_key(password)
485 self._cookie_counts = {'v10': 0, 'other': 0}
486 self._meta_version = meta_version or 0
488 @staticmethod
489 def derive_key(password):
490 # values from
491 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
492 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
494 def decrypt(self, encrypted_value):
495 version = encrypted_value[:3]
496 ciphertext = encrypted_value[3:]
498 if version == b'v10':
499 self._cookie_counts['v10'] += 1
500 if self._v10_key is None:
501 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
502 return None
504 return _decrypt_aes_cbc_multi(
505 ciphertext, (self._v10_key,), self._logger, hash_prefix=self._meta_version >= 24)
507 else:
508 self._cookie_counts['other'] += 1
509 # other prefixes are considered 'old data' which were stored as plaintext
510 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
511 return encrypted_value
514 class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
515 def __init__(self, browser_root, logger, meta_version=None):
516 self._logger = logger
517 self._v10_key = _get_windows_v10_key(browser_root, logger)
518 self._cookie_counts = {'v10': 0, 'other': 0}
519 self._meta_version = meta_version or 0
521 def decrypt(self, encrypted_value):
522 version = encrypted_value[:3]
523 ciphertext = encrypted_value[3:]
525 if version == b'v10':
526 self._cookie_counts['v10'] += 1
527 if self._v10_key is None:
528 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
529 return None
531 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
532 # kNonceLength
533 nonce_length = 96 // 8
534 # boringssl
535 # EVP_AEAD_AES_GCM_TAG_LEN
536 authentication_tag_length = 16
538 raw_ciphertext = ciphertext
539 nonce = raw_ciphertext[:nonce_length]
540 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
541 authentication_tag = raw_ciphertext[-authentication_tag_length:]
543 return _decrypt_aes_gcm(
544 ciphertext, self._v10_key, nonce, authentication_tag, self._logger,
545 hash_prefix=self._meta_version >= 24)
547 else:
548 self._cookie_counts['other'] += 1
549 # any other prefix means the data is DPAPI encrypted
550 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
551 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
554 def _extract_safari_cookies(profile, logger):
555 if sys.platform != 'darwin':
556 raise ValueError(f'unsupported platform: {sys.platform}')
558 if profile:
559 cookies_path = os.path.expanduser(profile)
560 if not os.path.isfile(cookies_path):
561 raise FileNotFoundError('custom safari cookies database not found')
563 else:
564 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
566 if not os.path.isfile(cookies_path):
567 logger.debug('Trying secondary cookie location')
568 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
569 if not os.path.isfile(cookies_path):
570 raise FileNotFoundError('could not find safari cookies database')
572 with open(cookies_path, 'rb') as f:
573 cookies_data = f.read()
575 jar = parse_safari_cookies(cookies_data, logger=logger)
576 logger.info(f'Extracted {len(jar)} cookies from safari')
577 return jar
580 class ParserError(Exception):
581 pass
584 class DataParser:
585 def __init__(self, data, logger):
586 self._data = data
587 self.cursor = 0
588 self._logger = logger
590 def read_bytes(self, num_bytes):
591 if num_bytes < 0:
592 raise ParserError(f'invalid read of {num_bytes} bytes')
593 end = self.cursor + num_bytes
594 if end > len(self._data):
595 raise ParserError('reached end of input')
596 data = self._data[self.cursor:end]
597 self.cursor = end
598 return data
600 def expect_bytes(self, expected_value, message):
601 value = self.read_bytes(len(expected_value))
602 if value != expected_value:
603 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
605 def read_uint(self, big_endian=False):
606 data_format = '>I' if big_endian else '<I'
607 return struct.unpack(data_format, self.read_bytes(4))[0]
609 def read_double(self, big_endian=False):
610 data_format = '>d' if big_endian else '<d'
611 return struct.unpack(data_format, self.read_bytes(8))[0]
613 def read_cstring(self):
614 buffer = []
615 while True:
616 c = self.read_bytes(1)
617 if c == b'\x00':
618 return b''.join(buffer).decode()
619 else:
620 buffer.append(c)
622 def skip(self, num_bytes, description='unknown'):
623 if num_bytes > 0:
624 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
625 elif num_bytes < 0:
626 raise ParserError(f'invalid skip of {num_bytes} bytes')
628 def skip_to(self, offset, description='unknown'):
629 self.skip(offset - self.cursor, description)
631 def skip_to_end(self, description='unknown'):
632 self.skip_to(len(self._data), description)
635 def _mac_absolute_time_to_posix(timestamp):
636 return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
639 def _parse_safari_cookies_header(data, logger):
640 p = DataParser(data, logger)
641 p.expect_bytes(b'cook', 'database signature')
642 number_of_pages = p.read_uint(big_endian=True)
643 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
644 return page_sizes, p.cursor
647 def _parse_safari_cookies_page(data, jar, logger):
648 p = DataParser(data, logger)
649 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
650 number_of_cookies = p.read_uint()
651 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
652 if number_of_cookies == 0:
653 logger.debug(f'a cookies page of size {len(data)} has no cookies')
654 return
656 p.skip_to(record_offsets[0], 'unknown page header field')
658 with _create_progress_bar(logger) as progress_bar:
659 for i, record_offset in enumerate(record_offsets):
660 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
661 p.skip_to(record_offset, 'space between records')
662 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
663 p.read_bytes(record_length)
664 p.skip_to_end('space in between pages')
667 def _parse_safari_cookies_record(data, jar, logger):
668 p = DataParser(data, logger)
669 record_size = p.read_uint()
670 p.skip(4, 'unknown record field 1')
671 flags = p.read_uint()
672 is_secure = bool(flags & 0x0001)
673 p.skip(4, 'unknown record field 2')
674 domain_offset = p.read_uint()
675 name_offset = p.read_uint()
676 path_offset = p.read_uint()
677 value_offset = p.read_uint()
678 p.skip(8, 'unknown record field 3')
679 expiration_date = _mac_absolute_time_to_posix(p.read_double())
680 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
682 try:
683 p.skip_to(domain_offset)
684 domain = p.read_cstring()
686 p.skip_to(name_offset)
687 name = p.read_cstring()
689 p.skip_to(path_offset)
690 path = p.read_cstring()
692 p.skip_to(value_offset)
693 value = p.read_cstring()
694 except UnicodeDecodeError:
695 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
696 return record_size
698 p.skip_to(record_size, 'space at the end of the record')
700 cookie = http.cookiejar.Cookie(
701 version=0, name=name, value=value, port=None, port_specified=False,
702 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
703 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
704 comment=None, comment_url=None, rest={})
705 jar.set_cookie(cookie)
706 return record_size
709 def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
711 References:
712 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
713 - this data appears to be out of date but the important parts of the database structure is the same
714 - there are a few bytes here and there which are skipped during parsing
716 if jar is None:
717 jar = YoutubeDLCookieJar()
718 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
719 p = DataParser(data[body_start:], logger)
720 for page_size in page_sizes:
721 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
722 p.skip_to_end('footer')
723 return jar
726 class _LinuxDesktopEnvironment(Enum):
728 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
729 DesktopEnvironment
731 OTHER = auto()
732 CINNAMON = auto()
733 DEEPIN = auto()
734 GNOME = auto()
735 KDE3 = auto()
736 KDE4 = auto()
737 KDE5 = auto()
738 KDE6 = auto()
739 PANTHEON = auto()
740 UKUI = auto()
741 UNITY = auto()
742 XFCE = auto()
743 LXQT = auto()
746 class _LinuxKeyring(Enum):
748 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
749 SelectedLinuxBackend
751 KWALLET = auto() # KDE4
752 KWALLET5 = auto()
753 KWALLET6 = auto()
754 GNOMEKEYRING = auto()
755 BASICTEXT = auto()
758 SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
761 def _get_linux_desktop_environment(env, logger):
763 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
764 GetDesktopEnvironment
766 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
767 desktop_session = env.get('DESKTOP_SESSION', None)
768 if xdg_current_desktop is not None:
769 for part in map(str.strip, xdg_current_desktop.split(':')):
770 if part == 'Unity':
771 if desktop_session is not None and 'gnome-fallback' in desktop_session:
772 return _LinuxDesktopEnvironment.GNOME
773 else:
774 return _LinuxDesktopEnvironment.UNITY
775 elif part == 'Deepin':
776 return _LinuxDesktopEnvironment.DEEPIN
777 elif part == 'GNOME':
778 return _LinuxDesktopEnvironment.GNOME
779 elif part == 'X-Cinnamon':
780 return _LinuxDesktopEnvironment.CINNAMON
781 elif part == 'KDE':
782 kde_version = env.get('KDE_SESSION_VERSION', None)
783 if kde_version == '5':
784 return _LinuxDesktopEnvironment.KDE5
785 elif kde_version == '6':
786 return _LinuxDesktopEnvironment.KDE6
787 elif kde_version == '4':
788 return _LinuxDesktopEnvironment.KDE4
789 else:
790 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
791 return _LinuxDesktopEnvironment.KDE4
792 elif part == 'Pantheon':
793 return _LinuxDesktopEnvironment.PANTHEON
794 elif part == 'XFCE':
795 return _LinuxDesktopEnvironment.XFCE
796 elif part == 'UKUI':
797 return _LinuxDesktopEnvironment.UKUI
798 elif part == 'LXQt':
799 return _LinuxDesktopEnvironment.LXQT
800 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
802 elif desktop_session is not None:
803 if desktop_session == 'deepin':
804 return _LinuxDesktopEnvironment.DEEPIN
805 elif desktop_session in ('mate', 'gnome'):
806 return _LinuxDesktopEnvironment.GNOME
807 elif desktop_session in ('kde4', 'kde-plasma'):
808 return _LinuxDesktopEnvironment.KDE4
809 elif desktop_session == 'kde':
810 if 'KDE_SESSION_VERSION' in env:
811 return _LinuxDesktopEnvironment.KDE4
812 else:
813 return _LinuxDesktopEnvironment.KDE3
814 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
815 return _LinuxDesktopEnvironment.XFCE
816 elif desktop_session == 'ukui':
817 return _LinuxDesktopEnvironment.UKUI
818 else:
819 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
821 else:
822 if 'GNOME_DESKTOP_SESSION_ID' in env:
823 return _LinuxDesktopEnvironment.GNOME
824 elif 'KDE_FULL_SESSION' in env:
825 if 'KDE_SESSION_VERSION' in env:
826 return _LinuxDesktopEnvironment.KDE4
827 else:
828 return _LinuxDesktopEnvironment.KDE3
829 return _LinuxDesktopEnvironment.OTHER
832 def _choose_linux_keyring(logger):
834 SelectBackend in [1]
836 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
837 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
838 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
839 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
841 References:
842 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
844 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
845 logger.debug(f'detected desktop environment: {desktop_environment.name}')
846 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
847 linux_keyring = _LinuxKeyring.KWALLET
848 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
849 linux_keyring = _LinuxKeyring.KWALLET5
850 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
851 linux_keyring = _LinuxKeyring.KWALLET6
852 elif desktop_environment in (
853 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
855 linux_keyring = _LinuxKeyring.BASICTEXT
856 else:
857 linux_keyring = _LinuxKeyring.GNOMEKEYRING
858 return linux_keyring
861 def _get_kwallet_network_wallet(keyring, logger):
862 """ The name of the wallet used to store network passwords.
864 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
865 KWalletDBus::NetworkWallet
866 which does a dbus call to the following function:
867 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
868 Wallet::NetworkWallet
870 default_wallet = 'kdewallet'
871 try:
872 if keyring == _LinuxKeyring.KWALLET:
873 service_name = 'org.kde.kwalletd'
874 wallet_path = '/modules/kwalletd'
875 elif keyring == _LinuxKeyring.KWALLET5:
876 service_name = 'org.kde.kwalletd5'
877 wallet_path = '/modules/kwalletd5'
878 elif keyring == _LinuxKeyring.KWALLET6:
879 service_name = 'org.kde.kwalletd6'
880 wallet_path = '/modules/kwalletd6'
881 else:
882 raise ValueError(keyring)
884 stdout, _, returncode = Popen.run([
885 'dbus-send', '--session', '--print-reply=literal',
886 f'--dest={service_name}',
887 wallet_path,
888 'org.kde.KWallet.networkWallet',
889 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
891 if returncode:
892 logger.warning('failed to read NetworkWallet')
893 return default_wallet
894 else:
895 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
896 return stdout.strip()
897 except Exception as e:
898 logger.warning(f'exception while obtaining NetworkWallet: {e}')
899 return default_wallet
902 def _get_kwallet_password(browser_keyring_name, keyring, logger):
903 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
905 if shutil.which('kwallet-query') is None:
906 logger.error('kwallet-query command not found. KWallet and kwallet-query '
907 'must be installed to read from KWallet. kwallet-query should be'
908 'included in the kwallet package for your distribution')
909 return b''
911 network_wallet = _get_kwallet_network_wallet(keyring, logger)
913 try:
914 stdout, _, returncode = Popen.run([
915 'kwallet-query',
916 '--read-password', f'{browser_keyring_name} Safe Storage',
917 '--folder', f'{browser_keyring_name} Keys',
918 network_wallet,
919 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
921 if returncode:
922 logger.error(f'kwallet-query failed with return code {returncode}. '
923 'Please consult the kwallet-query man page for details')
924 return b''
925 else:
926 if stdout.lower().startswith(b'failed to read'):
927 logger.debug('failed to read password from kwallet. Using empty string instead')
928 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
929 # just tries to read the value (which kwallet returns "") whereas kwallet-query
930 # checks hasEntry. To verify this:
931 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
932 # while starting chrome.
933 # this was identified as a bug later and fixed in
934 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
935 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
936 return b''
937 else:
938 logger.debug('password found')
939 return stdout.rstrip(b'\n')
940 except Exception as e:
941 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
942 return b''
945 def _get_gnome_keyring_password(browser_keyring_name, logger):
946 if not secretstorage:
947 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
948 return b''
949 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
950 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
951 # and presumably searches for its key in the list. It appears that we must do the same.
952 # https://github.com/jaraco/keyring/issues/556
953 with contextlib.closing(secretstorage.dbus_init()) as con:
954 col = secretstorage.get_default_collection(con)
955 for item in col.get_all_items():
956 if item.get_label() == f'{browser_keyring_name} Safe Storage':
957 return item.get_secret()
958 logger.error('failed to read from keyring')
959 return b''
962 def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
963 # note: chrome/chromium can be run with the following flags to determine which keyring backend
964 # it has chosen to use
965 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
966 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
967 # will not be sufficient in all cases.
969 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
970 logger.debug(f'Chosen keyring: {keyring.name}')
972 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
973 return _get_kwallet_password(browser_keyring_name, keyring, logger)
974 elif keyring == _LinuxKeyring.GNOMEKEYRING:
975 return _get_gnome_keyring_password(browser_keyring_name, logger)
976 elif keyring == _LinuxKeyring.BASICTEXT:
977 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
978 return None
979 assert False, f'Unknown keyring {keyring}'
982 def _get_mac_keyring_password(browser_keyring_name, logger):
983 logger.debug('using find-generic-password to obtain password from OSX keychain')
984 try:
985 stdout, _, returncode = Popen.run(
986 ['security', 'find-generic-password',
987 '-w', # write password to stdout
988 '-a', browser_keyring_name, # match 'account'
989 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
990 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
991 if returncode:
992 logger.warning('find-generic-password failed')
993 return None
994 return stdout.rstrip(b'\n')
995 except Exception as e:
996 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
997 return None
1000 def _get_windows_v10_key(browser_root, logger):
1002 References:
1003 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
1005 path = _newest(_find_files(browser_root, 'Local State', logger))
1006 if path is None:
1007 logger.error('could not find local state file')
1008 return None
1009 logger.debug(f'Found local state file at "{path}"')
1010 with open(path, encoding='utf8') as f:
1011 data = json.load(f)
1012 try:
1013 # kOsCryptEncryptedKeyPrefName in [1]
1014 base64_key = data['os_crypt']['encrypted_key']
1015 except KeyError:
1016 logger.error('no encrypted key in Local State')
1017 return None
1018 encrypted_key = base64.b64decode(base64_key)
1019 # kDPAPIKeyPrefix in [1]
1020 prefix = b'DPAPI'
1021 if not encrypted_key.startswith(prefix):
1022 logger.error('invalid key')
1023 return None
1024 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
1027 def pbkdf2_sha1(password, salt, iterations, key_length):
1028 return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
1031 def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16, hash_prefix=False):
1032 for key in keys:
1033 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
1034 try:
1035 if hash_prefix:
1036 return plaintext[32:].decode()
1037 return plaintext.decode()
1038 except UnicodeDecodeError:
1039 pass
1040 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1041 return None
1044 def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger, hash_prefix=False):
1045 try:
1046 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
1047 except ValueError:
1048 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
1049 return None
1051 try:
1052 if hash_prefix:
1053 return plaintext[32:].decode()
1054 return plaintext.decode()
1055 except UnicodeDecodeError:
1056 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1057 return None
1060 def _decrypt_windows_dpapi(ciphertext, logger):
1062 References:
1063 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1066 import ctypes
1067 import ctypes.wintypes
1069 class DATA_BLOB(ctypes.Structure):
1070 _fields_ = [('cbData', ctypes.wintypes.DWORD),
1071 ('pbData', ctypes.POINTER(ctypes.c_char))]
1073 buffer = ctypes.create_string_buffer(ciphertext)
1074 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1075 blob_out = DATA_BLOB()
1076 ret = ctypes.windll.crypt32.CryptUnprotectData(
1077 ctypes.byref(blob_in), # pDataIn
1078 None, # ppszDataDescr: human readable description of pDataIn
1079 None, # pOptionalEntropy: salt?
1080 None, # pvReserved: must be NULL
1081 None, # pPromptStruct: information about prompts to display
1082 0, # dwFlags
1083 ctypes.byref(blob_out), # pDataOut
1085 if not ret:
1086 message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
1087 logger.error(message)
1088 raise DownloadError(message) # force exit
1090 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1091 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1092 return result
1095 def _config_home():
1096 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1099 def _open_database_copy(database_path, tmpdir):
1100 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1101 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1102 shutil.copy(database_path, database_copy_path)
1103 conn = sqlite3.connect(database_copy_path)
1104 return conn.cursor()
1107 def _get_column_names(cursor, table_name):
1108 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
1109 return [row[1].decode() for row in table_info]
1112 def _newest(files):
1113 return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
1116 def _find_files(root, filename, logger):
1117 # if there are multiple browser profiles, take the most recently used one
1118 i = 0
1119 with _create_progress_bar(logger) as progress_bar:
1120 for curr_root, _, files in os.walk(root):
1121 for file in files:
1122 i += 1
1123 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1124 if file == filename:
1125 yield os.path.join(curr_root, file)
1128 def _merge_cookie_jars(jars):
1129 output_jar = YoutubeDLCookieJar()
1130 for jar in jars:
1131 for cookie in jar:
1132 output_jar.set_cookie(cookie)
1133 if jar.filename is not None:
1134 output_jar.filename = jar.filename
1135 return output_jar
1138 def _is_path(value):
1139 return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
1142 def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
1143 if browser_name not in SUPPORTED_BROWSERS:
1144 raise ValueError(f'unsupported browser: "{browser_name}"')
1145 if keyring not in (None, *SUPPORTED_KEYRINGS):
1146 raise ValueError(f'unsupported keyring: "{keyring}"')
1147 if profile is not None and _is_path(expand_path(profile)):
1148 profile = expand_path(profile)
1149 return browser_name, profile, keyring, container
1152 class LenientSimpleCookie(http.cookies.SimpleCookie):
1153 """More lenient version of http.cookies.SimpleCookie"""
1154 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
1155 # We use Morsel's legal key chars to avoid errors on setting values
1156 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1157 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
1159 _RESERVED = {
1160 'expires',
1161 'path',
1162 'comment',
1163 'domain',
1164 'max-age',
1165 'secure',
1166 'httponly',
1167 'version',
1168 'samesite',
1171 _FLAGS = {'secure', 'httponly'}
1173 # Added 'bad' group to catch the remaining value
1174 _COOKIE_PATTERN = re.compile(r'''
1175 \s* # Optional whitespace at start of cookie
1176 (?P<key> # Start of group 'key'
1177 [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
1178 ) # End of group 'key'
1179 ( # Optional group: there may not be a value.
1180 \s*=\s* # Equal Sign
1181 ( # Start of potential value
1182 (?P<val> # Start of group 'val'
1183 "(?:[^\\"]|\\.)*" # Any doublequoted string
1184 | # or
1185 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1186 | # or
1187 [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
1188 ) # End of group 'val'
1189 | # or
1190 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1191 ) # End of potential value
1192 )? # End of optional value group
1193 \s* # Any number of spaces.
1194 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1195 ''', re.ASCII | re.VERBOSE)
1197 def load(self, data):
1198 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1199 if not isinstance(data, str):
1200 return super().load(data)
1202 morsel = None
1203 for match in self._COOKIE_PATTERN.finditer(data):
1204 if match.group('bad'):
1205 morsel = None
1206 continue
1208 key, value = match.group('key', 'val')
1210 is_attribute = False
1211 if key.startswith('$'):
1212 key = key[1:]
1213 is_attribute = True
1215 lower_key = key.lower()
1216 if lower_key in self._RESERVED:
1217 if morsel is None:
1218 continue
1220 if value is None:
1221 if lower_key not in self._FLAGS:
1222 morsel = None
1223 continue
1224 value = True
1225 else:
1226 value, _ = self.value_decode(value)
1228 morsel[key] = value
1230 elif is_attribute:
1231 morsel = None
1233 elif value is not None:
1234 morsel = self.get(key, http.cookies.Morsel())
1235 real_value, coded_value = self.value_decode(value)
1236 morsel.set(key, real_value, coded_value)
1237 self[key] = morsel
1239 else:
1240 morsel = None
1243 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1245 See [1] for cookie file format.
1247 1. https://curl.haxx.se/docs/http-cookies.html
1249 _HTTPONLY_PREFIX = '#HttpOnly_'
1250 _ENTRY_LEN = 7
1251 _HEADER = '''# Netscape HTTP Cookie File
1252 # This file is generated by yt-dlp. Do not edit.
1255 _CookieFileEntry = collections.namedtuple(
1256 'CookieFileEntry',
1257 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1259 def __init__(self, filename=None, *args, **kwargs):
1260 super().__init__(None, *args, **kwargs)
1261 if is_path_like(filename):
1262 filename = os.fspath(filename)
1263 self.filename = filename
1265 @staticmethod
1266 def _true_or_false(cndn):
1267 return 'TRUE' if cndn else 'FALSE'
1269 @contextlib.contextmanager
1270 def open(self, file, *, write=False):
1271 if is_path_like(file):
1272 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1273 yield f
1274 else:
1275 if write:
1276 file.truncate(0)
1277 yield file
1279 def _really_save(self, f, ignore_discard, ignore_expires):
1280 now = time.time()
1281 for cookie in self:
1282 if ((not ignore_discard and cookie.discard)
1283 or (not ignore_expires and cookie.is_expired(now))):
1284 continue
1285 name, value = cookie.name, cookie.value
1286 if value is None:
1287 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1288 # with no name, whereas http.cookiejar regards it as a
1289 # cookie with no value.
1290 name, value = '', name
1291 f.write('{}\n'.format('\t'.join((
1292 cookie.domain,
1293 self._true_or_false(cookie.domain.startswith('.')),
1294 cookie.path,
1295 self._true_or_false(cookie.secure),
1296 str_or_none(cookie.expires, default=''),
1297 name, value,
1298 ))))
1300 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
1302 Save cookies to a file.
1303 Code is taken from CPython 3.6
1304 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1306 if filename is None:
1307 if self.filename is not None:
1308 filename = self.filename
1309 else:
1310 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1312 # Store session cookies with `expires` set to 0 instead of an empty string
1313 for cookie in self:
1314 if cookie.expires is None:
1315 cookie.expires = 0
1317 with self.open(filename, write=True) as f:
1318 f.write(self._HEADER)
1319 self._really_save(f, ignore_discard, ignore_expires)
1321 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
1322 """Load cookies from a file."""
1323 if filename is None:
1324 if self.filename is not None:
1325 filename = self.filename
1326 else:
1327 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1329 def prepare_line(line):
1330 if line.startswith(self._HTTPONLY_PREFIX):
1331 line = line[len(self._HTTPONLY_PREFIX):]
1332 # comments and empty lines are fine
1333 if line.startswith('#') or not line.strip():
1334 return line
1335 cookie_list = line.split('\t')
1336 if len(cookie_list) != self._ENTRY_LEN:
1337 raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
1338 cookie = self._CookieFileEntry(*cookie_list)
1339 if cookie.expires_at and not cookie.expires_at.isdigit():
1340 raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
1341 return line
1343 cf = io.StringIO()
1344 with self.open(filename) as f:
1345 for line in f:
1346 try:
1347 cf.write(prepare_line(line))
1348 except http.cookiejar.LoadError as e:
1349 if f'{line.strip()} '[0] in '[{"':
1350 raise http.cookiejar.LoadError(
1351 'Cookies file must be Netscape formatted, not JSON. See '
1352 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1353 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1354 continue
1355 cf.seek(0)
1356 self._really_load(cf, filename, ignore_discard, ignore_expires)
1357 # Session cookies are denoted by either `expires` field set to
1358 # an empty string or 0. MozillaCookieJar only recognizes the former
1359 # (see [1]). So we need force the latter to be recognized as session
1360 # cookies on our own.
1361 # Session cookies may be important for cookies-based authentication,
1362 # e.g. usually, when user does not check 'Remember me' check box while
1363 # logging in on a site, some important cookies are stored as session
1364 # cookies so that not recognizing them will result in failed login.
1365 # 1. https://bugs.python.org/issue17164
1366 for cookie in self:
1367 # Treat `expires=0` cookies as session cookies
1368 if cookie.expires == 0:
1369 cookie.expires = None
1370 cookie.discard = True
1372 def get_cookie_header(self, url):
1373 """Generate a Cookie HTTP header for a given url"""
1374 cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
1375 self.add_cookie_header(cookie_req)
1376 return cookie_req.get_header('Cookie')
1378 def get_cookies_for_url(self, url):
1379 """Generate a list of Cookie objects for a given url"""
1380 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1381 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1382 self._policy._now = self._now = int(time.time())
1383 return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
1385 def clear(self, *args, **kwargs):
1386 with contextlib.suppress(KeyError):
1387 return super().clear(*args, **kwargs)