[ie/cwtv] Fix extraction (#11230)
[yt-dlp3.git] / yt_dlp / cookies.py
blob4a69c576beb08a947c374b7ed48a4ec76fae6545
1 import base64
2 import collections
3 import contextlib
4 import datetime as dt
5 import functools
6 import glob
7 import hashlib
8 import http.cookiejar
9 import http.cookies
10 import io
11 import json
12 import os
13 import re
14 import shutil
15 import struct
16 import subprocess
17 import sys
18 import tempfile
19 import time
20 import urllib.request
21 from enum import Enum, auto
23 from .aes import (
24 aes_cbc_decrypt_bytes,
25 aes_gcm_decrypt_and_verify_bytes,
26 unpad_pkcs7,
28 from .compat import compat_os_name
29 from .dependencies import (
30 _SECRETSTORAGE_UNAVAILABLE_REASON,
31 secretstorage,
32 sqlite3,
34 from .minicurses import MultilinePrinter, QuietMultilinePrinter
35 from .utils import (
36 DownloadError,
37 YoutubeDLError,
38 Popen,
39 error_to_str,
40 expand_path,
41 is_path_like,
42 sanitize_url,
43 str_or_none,
44 try_call,
45 write_string,
47 from .utils._utils import _YDLLogger
48 from .utils.networking import normalize_url
50 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
51 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
54 class YDLLogger(_YDLLogger):
55 def warning(self, message, only_once=False): # compat
56 return super().warning(message, once=only_once)
58 class ProgressBar(MultilinePrinter):
59 _DELAY, _timer = 0.1, 0
61 def print(self, message):
62 if time.time() - self._timer > self._DELAY:
63 self.print_at_line(f'[Cookies] {message}', 0)
64 self._timer = time.time()
66 def progress_bar(self):
67 """Return a context manager with a print method. (Optional)"""
68 # Do not print to files/pipes, loggers, or when --no-progress is used
69 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
70 return
71 file = self._ydl._out_files.error
72 try:
73 if not file.isatty():
74 return
75 except BaseException:
76 return
77 return self.ProgressBar(file, preserve_output=False)
80 def _create_progress_bar(logger):
81 if hasattr(logger, 'progress_bar'):
82 printer = logger.progress_bar()
83 if printer:
84 return printer
85 printer = QuietMultilinePrinter()
86 printer.print = lambda _: None
87 return printer
90 class CookieLoadError(YoutubeDLError):
91 pass
94 def load_cookies(cookie_file, browser_specification, ydl):
95 try:
96 cookie_jars = []
97 if browser_specification is not None:
98 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
99 cookie_jars.append(
100 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
102 if cookie_file is not None:
103 is_filename = is_path_like(cookie_file)
104 if is_filename:
105 cookie_file = expand_path(cookie_file)
107 jar = YoutubeDLCookieJar(cookie_file)
108 if not is_filename or os.access(cookie_file, os.R_OK):
109 jar.load()
110 cookie_jars.append(jar)
112 return _merge_cookie_jars(cookie_jars)
113 except Exception:
114 raise CookieLoadError('failed to load cookies')
117 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
118 if browser_name == 'firefox':
119 return _extract_firefox_cookies(profile, container, logger)
120 elif browser_name == 'safari':
121 return _extract_safari_cookies(profile, logger)
122 elif browser_name in CHROMIUM_BASED_BROWSERS:
123 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
124 else:
125 raise ValueError(f'unknown browser: {browser_name}')
128 def _extract_firefox_cookies(profile, container, logger):
129 logger.info('Extracting cookies from firefox')
130 if not sqlite3:
131 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
132 'Please use a Python interpreter compiled with sqlite3 support')
133 return YoutubeDLCookieJar()
135 if profile is None:
136 search_roots = list(_firefox_browser_dirs())
137 elif _is_path(profile):
138 search_roots = [profile]
139 else:
140 search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
141 search_root = ', '.join(map(repr, search_roots))
143 cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
144 if cookie_database_path is None:
145 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
146 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
148 container_id = None
149 if container not in (None, 'none'):
150 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
151 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
152 raise FileNotFoundError(f'could not read containers.json in {search_root}')
153 with open(containers_path, encoding='utf8') as containers:
154 identities = json.load(containers).get('identities', [])
155 container_id = next((context.get('userContextId') for context in identities if container in (
156 context.get('name'),
157 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
158 )), None)
159 if not isinstance(container_id, int):
160 raise ValueError(f'could not find firefox container "{container}" in containers.json')
162 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
163 cursor = None
164 try:
165 cursor = _open_database_copy(cookie_database_path, tmpdir)
166 if isinstance(container_id, int):
167 logger.debug(
168 f'Only loading cookies from firefox container "{container}", ID {container_id}')
169 cursor.execute(
170 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
171 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
172 elif container == 'none':
173 logger.debug('Only loading cookies not belonging to any container')
174 cursor.execute(
175 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
176 else:
177 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
178 jar = YoutubeDLCookieJar()
179 with _create_progress_bar(logger) as progress_bar:
180 table = cursor.fetchall()
181 total_cookie_count = len(table)
182 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
183 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
184 cookie = http.cookiejar.Cookie(
185 version=0, name=name, value=value, port=None, port_specified=False,
186 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
187 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
188 comment=None, comment_url=None, rest={})
189 jar.set_cookie(cookie)
190 logger.info(f'Extracted {len(jar)} cookies from firefox')
191 return jar
192 finally:
193 if cursor is not None:
194 cursor.connection.close()
197 def _firefox_browser_dirs():
198 if sys.platform in ('cygwin', 'win32'):
199 yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
201 elif sys.platform == 'darwin':
202 yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
204 else:
205 yield from map(os.path.expanduser, (
206 '~/.mozilla/firefox',
207 '~/snap/firefox/common/.mozilla/firefox',
208 '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
212 def _firefox_cookie_dbs(roots):
213 for root in map(os.path.abspath, roots):
214 for pattern in ('', '*/', 'Profiles/*/'):
215 yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
218 def _get_chromium_based_browser_settings(browser_name):
219 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
220 if sys.platform in ('cygwin', 'win32'):
221 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
222 appdata_roaming = os.path.expandvars('%APPDATA%')
223 browser_dir = {
224 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
225 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
226 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
227 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
228 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
229 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
230 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
231 }[browser_name]
233 elif sys.platform == 'darwin':
234 appdata = os.path.expanduser('~/Library/Application Support')
235 browser_dir = {
236 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
237 'chrome': os.path.join(appdata, 'Google/Chrome'),
238 'chromium': os.path.join(appdata, 'Chromium'),
239 'edge': os.path.join(appdata, 'Microsoft Edge'),
240 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
241 'vivaldi': os.path.join(appdata, 'Vivaldi'),
242 'whale': os.path.join(appdata, 'Naver/Whale'),
243 }[browser_name]
245 else:
246 config = _config_home()
247 browser_dir = {
248 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
249 'chrome': os.path.join(config, 'google-chrome'),
250 'chromium': os.path.join(config, 'chromium'),
251 'edge': os.path.join(config, 'microsoft-edge'),
252 'opera': os.path.join(config, 'opera'),
253 'vivaldi': os.path.join(config, 'vivaldi'),
254 'whale': os.path.join(config, 'naver-whale'),
255 }[browser_name]
257 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
258 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
259 keyring_name = {
260 'brave': 'Brave',
261 'chrome': 'Chrome',
262 'chromium': 'Chromium',
263 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
264 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
265 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
266 'whale': 'Whale',
267 }[browser_name]
269 browsers_without_profiles = {'opera'}
271 return {
272 'browser_dir': browser_dir,
273 'keyring_name': keyring_name,
274 'supports_profiles': browser_name not in browsers_without_profiles,
278 def _extract_chrome_cookies(browser_name, profile, keyring, logger):
279 logger.info(f'Extracting cookies from {browser_name}')
281 if not sqlite3:
282 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
283 'Please use a Python interpreter compiled with sqlite3 support')
284 return YoutubeDLCookieJar()
286 config = _get_chromium_based_browser_settings(browser_name)
288 if profile is None:
289 search_root = config['browser_dir']
290 elif _is_path(profile):
291 search_root = profile
292 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
293 else:
294 if config['supports_profiles']:
295 search_root = os.path.join(config['browser_dir'], profile)
296 else:
297 logger.error(f'{browser_name} does not support profiles')
298 search_root = config['browser_dir']
300 cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
301 if cookie_database_path is None:
302 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
303 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
305 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
307 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
308 cursor = None
309 try:
310 cursor = _open_database_copy(cookie_database_path, tmpdir)
311 cursor.connection.text_factory = bytes
312 column_names = _get_column_names(cursor, 'cookies')
313 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
314 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
315 jar = YoutubeDLCookieJar()
316 failed_cookies = 0
317 unencrypted_cookies = 0
318 with _create_progress_bar(logger) as progress_bar:
319 table = cursor.fetchall()
320 total_cookie_count = len(table)
321 for i, line in enumerate(table):
322 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
323 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
324 if not cookie:
325 failed_cookies += 1
326 continue
327 elif not is_encrypted:
328 unencrypted_cookies += 1
329 jar.set_cookie(cookie)
330 if failed_cookies > 0:
331 failed_message = f' ({failed_cookies} could not be decrypted)'
332 else:
333 failed_message = ''
334 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
335 counts = decryptor._cookie_counts.copy()
336 counts['unencrypted'] = unencrypted_cookies
337 logger.debug(f'cookie version breakdown: {counts}')
338 return jar
339 except PermissionError as error:
340 if compat_os_name == 'nt' and error.errno == 13:
341 message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
342 logger.error(message)
343 raise DownloadError(message) # force exit
344 raise
345 finally:
346 if cursor is not None:
347 cursor.connection.close()
350 def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
351 host_key = host_key.decode()
352 name = name.decode()
353 value = value.decode()
354 path = path.decode()
355 is_encrypted = not value and encrypted_value
357 if is_encrypted:
358 value = decryptor.decrypt(encrypted_value)
359 if value is None:
360 return is_encrypted, None
362 # In chrome, session cookies have expires_utc set to 0
363 # In our cookie-store, cookies that do not expire should have expires set to None
364 if not expires_utc:
365 expires_utc = None
367 return is_encrypted, http.cookiejar.Cookie(
368 version=0, name=name, value=value, port=None, port_specified=False,
369 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
370 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
371 comment=None, comment_url=None, rest={})
374 class ChromeCookieDecryptor:
376 Overview:
378 Linux:
379 - cookies are either v10 or v11
380 - v10: AES-CBC encrypted with a fixed key
381 - also attempts empty password if decryption fails
382 - v11: AES-CBC encrypted with an OS protected key (keyring)
383 - also attempts empty password if decryption fails
384 - v11 keys can be stored in various places depending on the activate desktop environment [2]
386 Mac:
387 - cookies are either v10 or not v10
388 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
389 - not v10: 'old data' stored as plaintext
391 Windows:
392 - cookies are either v10 or not v10
393 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
394 - not v10: encrypted with DPAPI
396 Sources:
397 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
398 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
399 - KeyStorageLinux::CreateService
402 _cookie_counts = {}
404 def decrypt(self, encrypted_value):
405 raise NotImplementedError('Must be implemented by sub classes')
408 def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
409 if sys.platform == 'darwin':
410 return MacChromeCookieDecryptor(browser_keyring_name, logger)
411 elif sys.platform in ('win32', 'cygwin'):
412 return WindowsChromeCookieDecryptor(browser_root, logger)
413 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
416 class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
417 def __init__(self, browser_keyring_name, logger, *, keyring=None):
418 self._logger = logger
419 self._v10_key = self.derive_key(b'peanuts')
420 self._empty_key = self.derive_key(b'')
421 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
422 self._browser_keyring_name = browser_keyring_name
423 self._keyring = keyring
425 @functools.cached_property
426 def _v11_key(self):
427 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
428 return None if password is None else self.derive_key(password)
430 @staticmethod
431 def derive_key(password):
432 # values from
433 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
434 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
436 def decrypt(self, encrypted_value):
439 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
440 with an empty password. The failure detection is not the same as what chromium uses so the
441 results won't be perfect
443 References:
444 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
445 - a bugfix to try an empty password as a fallback
447 version = encrypted_value[:3]
448 ciphertext = encrypted_value[3:]
450 if version == b'v10':
451 self._cookie_counts['v10'] += 1
452 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
454 elif version == b'v11':
455 self._cookie_counts['v11'] += 1
456 if self._v11_key is None:
457 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
458 return None
459 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
461 else:
462 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
463 self._cookie_counts['other'] += 1
464 return None
467 class MacChromeCookieDecryptor(ChromeCookieDecryptor):
468 def __init__(self, browser_keyring_name, logger):
469 self._logger = logger
470 password = _get_mac_keyring_password(browser_keyring_name, logger)
471 self._v10_key = None if password is None else self.derive_key(password)
472 self._cookie_counts = {'v10': 0, 'other': 0}
474 @staticmethod
475 def derive_key(password):
476 # values from
477 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
478 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
480 def decrypt(self, encrypted_value):
481 version = encrypted_value[:3]
482 ciphertext = encrypted_value[3:]
484 if version == b'v10':
485 self._cookie_counts['v10'] += 1
486 if self._v10_key is None:
487 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
488 return None
490 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
492 else:
493 self._cookie_counts['other'] += 1
494 # other prefixes are considered 'old data' which were stored as plaintext
495 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
496 return encrypted_value
499 class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
500 def __init__(self, browser_root, logger):
501 self._logger = logger
502 self._v10_key = _get_windows_v10_key(browser_root, logger)
503 self._cookie_counts = {'v10': 0, 'other': 0}
505 def decrypt(self, encrypted_value):
506 version = encrypted_value[:3]
507 ciphertext = encrypted_value[3:]
509 if version == b'v10':
510 self._cookie_counts['v10'] += 1
511 if self._v10_key is None:
512 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
513 return None
515 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
516 # kNonceLength
517 nonce_length = 96 // 8
518 # boringssl
519 # EVP_AEAD_AES_GCM_TAG_LEN
520 authentication_tag_length = 16
522 raw_ciphertext = ciphertext
523 nonce = raw_ciphertext[:nonce_length]
524 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
525 authentication_tag = raw_ciphertext[-authentication_tag_length:]
527 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
529 else:
530 self._cookie_counts['other'] += 1
531 # any other prefix means the data is DPAPI encrypted
532 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
533 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
536 def _extract_safari_cookies(profile, logger):
537 if sys.platform != 'darwin':
538 raise ValueError(f'unsupported platform: {sys.platform}')
540 if profile:
541 cookies_path = os.path.expanduser(profile)
542 if not os.path.isfile(cookies_path):
543 raise FileNotFoundError('custom safari cookies database not found')
545 else:
546 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
548 if not os.path.isfile(cookies_path):
549 logger.debug('Trying secondary cookie location')
550 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
551 if not os.path.isfile(cookies_path):
552 raise FileNotFoundError('could not find safari cookies database')
554 with open(cookies_path, 'rb') as f:
555 cookies_data = f.read()
557 jar = parse_safari_cookies(cookies_data, logger=logger)
558 logger.info(f'Extracted {len(jar)} cookies from safari')
559 return jar
562 class ParserError(Exception):
563 pass
566 class DataParser:
567 def __init__(self, data, logger):
568 self._data = data
569 self.cursor = 0
570 self._logger = logger
572 def read_bytes(self, num_bytes):
573 if num_bytes < 0:
574 raise ParserError(f'invalid read of {num_bytes} bytes')
575 end = self.cursor + num_bytes
576 if end > len(self._data):
577 raise ParserError('reached end of input')
578 data = self._data[self.cursor:end]
579 self.cursor = end
580 return data
582 def expect_bytes(self, expected_value, message):
583 value = self.read_bytes(len(expected_value))
584 if value != expected_value:
585 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
587 def read_uint(self, big_endian=False):
588 data_format = '>I' if big_endian else '<I'
589 return struct.unpack(data_format, self.read_bytes(4))[0]
591 def read_double(self, big_endian=False):
592 data_format = '>d' if big_endian else '<d'
593 return struct.unpack(data_format, self.read_bytes(8))[0]
595 def read_cstring(self):
596 buffer = []
597 while True:
598 c = self.read_bytes(1)
599 if c == b'\x00':
600 return b''.join(buffer).decode()
601 else:
602 buffer.append(c)
604 def skip(self, num_bytes, description='unknown'):
605 if num_bytes > 0:
606 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
607 elif num_bytes < 0:
608 raise ParserError(f'invalid skip of {num_bytes} bytes')
610 def skip_to(self, offset, description='unknown'):
611 self.skip(offset - self.cursor, description)
613 def skip_to_end(self, description='unknown'):
614 self.skip_to(len(self._data), description)
617 def _mac_absolute_time_to_posix(timestamp):
618 return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
621 def _parse_safari_cookies_header(data, logger):
622 p = DataParser(data, logger)
623 p.expect_bytes(b'cook', 'database signature')
624 number_of_pages = p.read_uint(big_endian=True)
625 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
626 return page_sizes, p.cursor
629 def _parse_safari_cookies_page(data, jar, logger):
630 p = DataParser(data, logger)
631 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
632 number_of_cookies = p.read_uint()
633 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
634 if number_of_cookies == 0:
635 logger.debug(f'a cookies page of size {len(data)} has no cookies')
636 return
638 p.skip_to(record_offsets[0], 'unknown page header field')
640 with _create_progress_bar(logger) as progress_bar:
641 for i, record_offset in enumerate(record_offsets):
642 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
643 p.skip_to(record_offset, 'space between records')
644 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
645 p.read_bytes(record_length)
646 p.skip_to_end('space in between pages')
649 def _parse_safari_cookies_record(data, jar, logger):
650 p = DataParser(data, logger)
651 record_size = p.read_uint()
652 p.skip(4, 'unknown record field 1')
653 flags = p.read_uint()
654 is_secure = bool(flags & 0x0001)
655 p.skip(4, 'unknown record field 2')
656 domain_offset = p.read_uint()
657 name_offset = p.read_uint()
658 path_offset = p.read_uint()
659 value_offset = p.read_uint()
660 p.skip(8, 'unknown record field 3')
661 expiration_date = _mac_absolute_time_to_posix(p.read_double())
662 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
664 try:
665 p.skip_to(domain_offset)
666 domain = p.read_cstring()
668 p.skip_to(name_offset)
669 name = p.read_cstring()
671 p.skip_to(path_offset)
672 path = p.read_cstring()
674 p.skip_to(value_offset)
675 value = p.read_cstring()
676 except UnicodeDecodeError:
677 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
678 return record_size
680 p.skip_to(record_size, 'space at the end of the record')
682 cookie = http.cookiejar.Cookie(
683 version=0, name=name, value=value, port=None, port_specified=False,
684 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
685 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
686 comment=None, comment_url=None, rest={})
687 jar.set_cookie(cookie)
688 return record_size
691 def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
693 References:
694 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
695 - this data appears to be out of date but the important parts of the database structure is the same
696 - there are a few bytes here and there which are skipped during parsing
698 if jar is None:
699 jar = YoutubeDLCookieJar()
700 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
701 p = DataParser(data[body_start:], logger)
702 for page_size in page_sizes:
703 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
704 p.skip_to_end('footer')
705 return jar
708 class _LinuxDesktopEnvironment(Enum):
710 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
711 DesktopEnvironment
713 OTHER = auto()
714 CINNAMON = auto()
715 DEEPIN = auto()
716 GNOME = auto()
717 KDE3 = auto()
718 KDE4 = auto()
719 KDE5 = auto()
720 KDE6 = auto()
721 PANTHEON = auto()
722 UKUI = auto()
723 UNITY = auto()
724 XFCE = auto()
725 LXQT = auto()
728 class _LinuxKeyring(Enum):
730 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
731 SelectedLinuxBackend
733 KWALLET = auto() # KDE4
734 KWALLET5 = auto()
735 KWALLET6 = auto()
736 GNOMEKEYRING = auto()
737 BASICTEXT = auto()
740 SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
743 def _get_linux_desktop_environment(env, logger):
745 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
746 GetDesktopEnvironment
748 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
749 desktop_session = env.get('DESKTOP_SESSION', None)
750 if xdg_current_desktop is not None:
751 for part in map(str.strip, xdg_current_desktop.split(':')):
752 if part == 'Unity':
753 if desktop_session is not None and 'gnome-fallback' in desktop_session:
754 return _LinuxDesktopEnvironment.GNOME
755 else:
756 return _LinuxDesktopEnvironment.UNITY
757 elif part == 'Deepin':
758 return _LinuxDesktopEnvironment.DEEPIN
759 elif part == 'GNOME':
760 return _LinuxDesktopEnvironment.GNOME
761 elif part == 'X-Cinnamon':
762 return _LinuxDesktopEnvironment.CINNAMON
763 elif part == 'KDE':
764 kde_version = env.get('KDE_SESSION_VERSION', None)
765 if kde_version == '5':
766 return _LinuxDesktopEnvironment.KDE5
767 elif kde_version == '6':
768 return _LinuxDesktopEnvironment.KDE6
769 elif kde_version == '4':
770 return _LinuxDesktopEnvironment.KDE4
771 else:
772 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
773 return _LinuxDesktopEnvironment.KDE4
774 elif part == 'Pantheon':
775 return _LinuxDesktopEnvironment.PANTHEON
776 elif part == 'XFCE':
777 return _LinuxDesktopEnvironment.XFCE
778 elif part == 'UKUI':
779 return _LinuxDesktopEnvironment.UKUI
780 elif part == 'LXQt':
781 return _LinuxDesktopEnvironment.LXQT
782 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
784 elif desktop_session is not None:
785 if desktop_session == 'deepin':
786 return _LinuxDesktopEnvironment.DEEPIN
787 elif desktop_session in ('mate', 'gnome'):
788 return _LinuxDesktopEnvironment.GNOME
789 elif desktop_session in ('kde4', 'kde-plasma'):
790 return _LinuxDesktopEnvironment.KDE4
791 elif desktop_session == 'kde':
792 if 'KDE_SESSION_VERSION' in env:
793 return _LinuxDesktopEnvironment.KDE4
794 else:
795 return _LinuxDesktopEnvironment.KDE3
796 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
797 return _LinuxDesktopEnvironment.XFCE
798 elif desktop_session == 'ukui':
799 return _LinuxDesktopEnvironment.UKUI
800 else:
801 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
803 else:
804 if 'GNOME_DESKTOP_SESSION_ID' in env:
805 return _LinuxDesktopEnvironment.GNOME
806 elif 'KDE_FULL_SESSION' in env:
807 if 'KDE_SESSION_VERSION' in env:
808 return _LinuxDesktopEnvironment.KDE4
809 else:
810 return _LinuxDesktopEnvironment.KDE3
811 return _LinuxDesktopEnvironment.OTHER
814 def _choose_linux_keyring(logger):
816 SelectBackend in [1]
818 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
819 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
820 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
821 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
823 References:
824 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
826 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
827 logger.debug(f'detected desktop environment: {desktop_environment.name}')
828 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
829 linux_keyring = _LinuxKeyring.KWALLET
830 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
831 linux_keyring = _LinuxKeyring.KWALLET5
832 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
833 linux_keyring = _LinuxKeyring.KWALLET6
834 elif desktop_environment in (
835 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
837 linux_keyring = _LinuxKeyring.BASICTEXT
838 else:
839 linux_keyring = _LinuxKeyring.GNOMEKEYRING
840 return linux_keyring
843 def _get_kwallet_network_wallet(keyring, logger):
844 """ The name of the wallet used to store network passwords.
846 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
847 KWalletDBus::NetworkWallet
848 which does a dbus call to the following function:
849 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
850 Wallet::NetworkWallet
852 default_wallet = 'kdewallet'
853 try:
854 if keyring == _LinuxKeyring.KWALLET:
855 service_name = 'org.kde.kwalletd'
856 wallet_path = '/modules/kwalletd'
857 elif keyring == _LinuxKeyring.KWALLET5:
858 service_name = 'org.kde.kwalletd5'
859 wallet_path = '/modules/kwalletd5'
860 elif keyring == _LinuxKeyring.KWALLET6:
861 service_name = 'org.kde.kwalletd6'
862 wallet_path = '/modules/kwalletd6'
863 else:
864 raise ValueError(keyring)
866 stdout, _, returncode = Popen.run([
867 'dbus-send', '--session', '--print-reply=literal',
868 f'--dest={service_name}',
869 wallet_path,
870 'org.kde.KWallet.networkWallet',
871 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
873 if returncode:
874 logger.warning('failed to read NetworkWallet')
875 return default_wallet
876 else:
877 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
878 return stdout.strip()
879 except Exception as e:
880 logger.warning(f'exception while obtaining NetworkWallet: {e}')
881 return default_wallet
884 def _get_kwallet_password(browser_keyring_name, keyring, logger):
885 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
887 if shutil.which('kwallet-query') is None:
888 logger.error('kwallet-query command not found. KWallet and kwallet-query '
889 'must be installed to read from KWallet. kwallet-query should be'
890 'included in the kwallet package for your distribution')
891 return b''
893 network_wallet = _get_kwallet_network_wallet(keyring, logger)
895 try:
896 stdout, _, returncode = Popen.run([
897 'kwallet-query',
898 '--read-password', f'{browser_keyring_name} Safe Storage',
899 '--folder', f'{browser_keyring_name} Keys',
900 network_wallet,
901 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
903 if returncode:
904 logger.error(f'kwallet-query failed with return code {returncode}. '
905 'Please consult the kwallet-query man page for details')
906 return b''
907 else:
908 if stdout.lower().startswith(b'failed to read'):
909 logger.debug('failed to read password from kwallet. Using empty string instead')
910 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
911 # just tries to read the value (which kwallet returns "") whereas kwallet-query
912 # checks hasEntry. To verify this:
913 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
914 # while starting chrome.
915 # this was identified as a bug later and fixed in
916 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
917 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
918 return b''
919 else:
920 logger.debug('password found')
921 return stdout.rstrip(b'\n')
922 except Exception as e:
923 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
924 return b''
927 def _get_gnome_keyring_password(browser_keyring_name, logger):
928 if not secretstorage:
929 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
930 return b''
931 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
932 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
933 # and presumably searches for its key in the list. It appears that we must do the same.
934 # https://github.com/jaraco/keyring/issues/556
935 with contextlib.closing(secretstorage.dbus_init()) as con:
936 col = secretstorage.get_default_collection(con)
937 for item in col.get_all_items():
938 if item.get_label() == f'{browser_keyring_name} Safe Storage':
939 return item.get_secret()
940 logger.error('failed to read from keyring')
941 return b''
944 def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
945 # note: chrome/chromium can be run with the following flags to determine which keyring backend
946 # it has chosen to use
947 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
948 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
949 # will not be sufficient in all cases.
951 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
952 logger.debug(f'Chosen keyring: {keyring.name}')
954 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
955 return _get_kwallet_password(browser_keyring_name, keyring, logger)
956 elif keyring == _LinuxKeyring.GNOMEKEYRING:
957 return _get_gnome_keyring_password(browser_keyring_name, logger)
958 elif keyring == _LinuxKeyring.BASICTEXT:
959 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
960 return None
961 assert False, f'Unknown keyring {keyring}'
964 def _get_mac_keyring_password(browser_keyring_name, logger):
965 logger.debug('using find-generic-password to obtain password from OSX keychain')
966 try:
967 stdout, _, returncode = Popen.run(
968 ['security', 'find-generic-password',
969 '-w', # write password to stdout
970 '-a', browser_keyring_name, # match 'account'
971 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
972 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
973 if returncode:
974 logger.warning('find-generic-password failed')
975 return None
976 return stdout.rstrip(b'\n')
977 except Exception as e:
978 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
979 return None
982 def _get_windows_v10_key(browser_root, logger):
984 References:
985 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
987 path = _newest(_find_files(browser_root, 'Local State', logger))
988 if path is None:
989 logger.error('could not find local state file')
990 return None
991 logger.debug(f'Found local state file at "{path}"')
992 with open(path, encoding='utf8') as f:
993 data = json.load(f)
994 try:
995 # kOsCryptEncryptedKeyPrefName in [1]
996 base64_key = data['os_crypt']['encrypted_key']
997 except KeyError:
998 logger.error('no encrypted key in Local State')
999 return None
1000 encrypted_key = base64.b64decode(base64_key)
1001 # kDPAPIKeyPrefix in [1]
1002 prefix = b'DPAPI'
1003 if not encrypted_key.startswith(prefix):
1004 logger.error('invalid key')
1005 return None
1006 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
1009 def pbkdf2_sha1(password, salt, iterations, key_length):
1010 return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
1013 def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
1014 for key in keys:
1015 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
1016 try:
1017 return plaintext.decode()
1018 except UnicodeDecodeError:
1019 pass
1020 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1021 return None
1024 def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
1025 try:
1026 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
1027 except ValueError:
1028 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
1029 return None
1031 try:
1032 return plaintext.decode()
1033 except UnicodeDecodeError:
1034 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1035 return None
1038 def _decrypt_windows_dpapi(ciphertext, logger):
1040 References:
1041 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1044 import ctypes
1045 import ctypes.wintypes
1047 class DATA_BLOB(ctypes.Structure):
1048 _fields_ = [('cbData', ctypes.wintypes.DWORD),
1049 ('pbData', ctypes.POINTER(ctypes.c_char))]
1051 buffer = ctypes.create_string_buffer(ciphertext)
1052 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1053 blob_out = DATA_BLOB()
1054 ret = ctypes.windll.crypt32.CryptUnprotectData(
1055 ctypes.byref(blob_in), # pDataIn
1056 None, # ppszDataDescr: human readable description of pDataIn
1057 None, # pOptionalEntropy: salt?
1058 None, # pvReserved: must be NULL
1059 None, # pPromptStruct: information about prompts to display
1060 0, # dwFlags
1061 ctypes.byref(blob_out), # pDataOut
1063 if not ret:
1064 message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
1065 logger.error(message)
1066 raise DownloadError(message) # force exit
1068 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1069 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1070 return result
1073 def _config_home():
1074 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1077 def _open_database_copy(database_path, tmpdir):
1078 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1079 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1080 shutil.copy(database_path, database_copy_path)
1081 conn = sqlite3.connect(database_copy_path)
1082 return conn.cursor()
1085 def _get_column_names(cursor, table_name):
1086 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
1087 return [row[1].decode() for row in table_info]
1090 def _newest(files):
1091 return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
1094 def _find_files(root, filename, logger):
1095 # if there are multiple browser profiles, take the most recently used one
1096 i = 0
1097 with _create_progress_bar(logger) as progress_bar:
1098 for curr_root, _, files in os.walk(root):
1099 for file in files:
1100 i += 1
1101 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1102 if file == filename:
1103 yield os.path.join(curr_root, file)
1106 def _merge_cookie_jars(jars):
1107 output_jar = YoutubeDLCookieJar()
1108 for jar in jars:
1109 for cookie in jar:
1110 output_jar.set_cookie(cookie)
1111 if jar.filename is not None:
1112 output_jar.filename = jar.filename
1113 return output_jar
1116 def _is_path(value):
1117 return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
1120 def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
1121 if browser_name not in SUPPORTED_BROWSERS:
1122 raise ValueError(f'unsupported browser: "{browser_name}"')
1123 if keyring not in (None, *SUPPORTED_KEYRINGS):
1124 raise ValueError(f'unsupported keyring: "{keyring}"')
1125 if profile is not None and _is_path(expand_path(profile)):
1126 profile = expand_path(profile)
1127 return browser_name, profile, keyring, container
1130 class LenientSimpleCookie(http.cookies.SimpleCookie):
1131 """More lenient version of http.cookies.SimpleCookie"""
1132 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
1133 # We use Morsel's legal key chars to avoid errors on setting values
1134 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1135 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
1137 _RESERVED = {
1138 'expires',
1139 'path',
1140 'comment',
1141 'domain',
1142 'max-age',
1143 'secure',
1144 'httponly',
1145 'version',
1146 'samesite',
1149 _FLAGS = {'secure', 'httponly'}
1151 # Added 'bad' group to catch the remaining value
1152 _COOKIE_PATTERN = re.compile(r'''
1153 \s* # Optional whitespace at start of cookie
1154 (?P<key> # Start of group 'key'
1155 [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
1156 ) # End of group 'key'
1157 ( # Optional group: there may not be a value.
1158 \s*=\s* # Equal Sign
1159 ( # Start of potential value
1160 (?P<val> # Start of group 'val'
1161 "(?:[^\\"]|\\.)*" # Any doublequoted string
1162 | # or
1163 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1164 | # or
1165 [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
1166 ) # End of group 'val'
1167 | # or
1168 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1169 ) # End of potential value
1170 )? # End of optional value group
1171 \s* # Any number of spaces.
1172 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1173 ''', re.ASCII | re.VERBOSE)
1175 def load(self, data):
1176 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1177 if not isinstance(data, str):
1178 return super().load(data)
1180 morsel = None
1181 for match in self._COOKIE_PATTERN.finditer(data):
1182 if match.group('bad'):
1183 morsel = None
1184 continue
1186 key, value = match.group('key', 'val')
1188 is_attribute = False
1189 if key.startswith('$'):
1190 key = key[1:]
1191 is_attribute = True
1193 lower_key = key.lower()
1194 if lower_key in self._RESERVED:
1195 if morsel is None:
1196 continue
1198 if value is None:
1199 if lower_key not in self._FLAGS:
1200 morsel = None
1201 continue
1202 value = True
1203 else:
1204 value, _ = self.value_decode(value)
1206 morsel[key] = value
1208 elif is_attribute:
1209 morsel = None
1211 elif value is not None:
1212 morsel = self.get(key, http.cookies.Morsel())
1213 real_value, coded_value = self.value_decode(value)
1214 morsel.set(key, real_value, coded_value)
1215 self[key] = morsel
1217 else:
1218 morsel = None
1221 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1223 See [1] for cookie file format.
1225 1. https://curl.haxx.se/docs/http-cookies.html
1227 _HTTPONLY_PREFIX = '#HttpOnly_'
1228 _ENTRY_LEN = 7
1229 _HEADER = '''# Netscape HTTP Cookie File
1230 # This file is generated by yt-dlp. Do not edit.
1233 _CookieFileEntry = collections.namedtuple(
1234 'CookieFileEntry',
1235 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1237 def __init__(self, filename=None, *args, **kwargs):
1238 super().__init__(None, *args, **kwargs)
1239 if is_path_like(filename):
1240 filename = os.fspath(filename)
1241 self.filename = filename
1243 @staticmethod
1244 def _true_or_false(cndn):
1245 return 'TRUE' if cndn else 'FALSE'
1247 @contextlib.contextmanager
1248 def open(self, file, *, write=False):
1249 if is_path_like(file):
1250 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1251 yield f
1252 else:
1253 if write:
1254 file.truncate(0)
1255 yield file
1257 def _really_save(self, f, ignore_discard, ignore_expires):
1258 now = time.time()
1259 for cookie in self:
1260 if (not ignore_discard and cookie.discard
1261 or not ignore_expires and cookie.is_expired(now)):
1262 continue
1263 name, value = cookie.name, cookie.value
1264 if value is None:
1265 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1266 # with no name, whereas http.cookiejar regards it as a
1267 # cookie with no value.
1268 name, value = '', name
1269 f.write('{}\n'.format('\t'.join((
1270 cookie.domain,
1271 self._true_or_false(cookie.domain.startswith('.')),
1272 cookie.path,
1273 self._true_or_false(cookie.secure),
1274 str_or_none(cookie.expires, default=''),
1275 name, value,
1276 ))))
1278 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
1280 Save cookies to a file.
1281 Code is taken from CPython 3.6
1282 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1284 if filename is None:
1285 if self.filename is not None:
1286 filename = self.filename
1287 else:
1288 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1290 # Store session cookies with `expires` set to 0 instead of an empty string
1291 for cookie in self:
1292 if cookie.expires is None:
1293 cookie.expires = 0
1295 with self.open(filename, write=True) as f:
1296 f.write(self._HEADER)
1297 self._really_save(f, ignore_discard, ignore_expires)
1299 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
1300 """Load cookies from a file."""
1301 if filename is None:
1302 if self.filename is not None:
1303 filename = self.filename
1304 else:
1305 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1307 def prepare_line(line):
1308 if line.startswith(self._HTTPONLY_PREFIX):
1309 line = line[len(self._HTTPONLY_PREFIX):]
1310 # comments and empty lines are fine
1311 if line.startswith('#') or not line.strip():
1312 return line
1313 cookie_list = line.split('\t')
1314 if len(cookie_list) != self._ENTRY_LEN:
1315 raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
1316 cookie = self._CookieFileEntry(*cookie_list)
1317 if cookie.expires_at and not cookie.expires_at.isdigit():
1318 raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
1319 return line
1321 cf = io.StringIO()
1322 with self.open(filename) as f:
1323 for line in f:
1324 try:
1325 cf.write(prepare_line(line))
1326 except http.cookiejar.LoadError as e:
1327 if f'{line.strip()} '[0] in '[{"':
1328 raise http.cookiejar.LoadError(
1329 'Cookies file must be Netscape formatted, not JSON. See '
1330 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1331 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1332 continue
1333 cf.seek(0)
1334 self._really_load(cf, filename, ignore_discard, ignore_expires)
1335 # Session cookies are denoted by either `expires` field set to
1336 # an empty string or 0. MozillaCookieJar only recognizes the former
1337 # (see [1]). So we need force the latter to be recognized as session
1338 # cookies on our own.
1339 # Session cookies may be important for cookies-based authentication,
1340 # e.g. usually, when user does not check 'Remember me' check box while
1341 # logging in on a site, some important cookies are stored as session
1342 # cookies so that not recognizing them will result in failed login.
1343 # 1. https://bugs.python.org/issue17164
1344 for cookie in self:
1345 # Treat `expires=0` cookies as session cookies
1346 if cookie.expires == 0:
1347 cookie.expires = None
1348 cookie.discard = True
1350 def get_cookie_header(self, url):
1351 """Generate a Cookie HTTP header for a given url"""
1352 cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
1353 self.add_cookie_header(cookie_req)
1354 return cookie_req.get_header('Cookie')
1356 def get_cookies_for_url(self, url):
1357 """Generate a list of Cookie objects for a given url"""
1358 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1359 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1360 self._policy._now = self._now = int(time.time())
1361 return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
1363 def clear(self, *args, **kwargs):
1364 with contextlib.suppress(KeyError):
1365 return super().clear(*args, **kwargs)