[cleanup] Misc (#10807)
[yt-dlp.git] / yt_dlp / cookies.py
blobcff8d74a7466a61247b428abcdec49327c24f437
1 import base64
2 import collections
3 import contextlib
4 import datetime as dt
5 import functools
6 import glob
7 import hashlib
8 import http.cookiejar
9 import http.cookies
10 import io
11 import json
12 import os
13 import re
14 import shutil
15 import struct
16 import subprocess
17 import sys
18 import tempfile
19 import time
20 import urllib.request
21 from enum import Enum, auto
23 from .aes import (
24 aes_cbc_decrypt_bytes,
25 aes_gcm_decrypt_and_verify_bytes,
26 unpad_pkcs7,
28 from .compat import compat_os_name
29 from .dependencies import (
30 _SECRETSTORAGE_UNAVAILABLE_REASON,
31 secretstorage,
32 sqlite3,
34 from .minicurses import MultilinePrinter, QuietMultilinePrinter
35 from .utils import (
36 DownloadError,
37 Popen,
38 error_to_str,
39 expand_path,
40 is_path_like,
41 sanitize_url,
42 str_or_none,
43 try_call,
44 write_string,
46 from .utils._utils import _YDLLogger
47 from .utils.networking import normalize_url
49 CHROMIUM_BASED_BROWSERS = {'brave', 'chrome', 'chromium', 'edge', 'opera', 'vivaldi', 'whale'}
50 SUPPORTED_BROWSERS = CHROMIUM_BASED_BROWSERS | {'firefox', 'safari'}
53 class YDLLogger(_YDLLogger):
54 def warning(self, message, only_once=False): # compat
55 return super().warning(message, once=only_once)
57 class ProgressBar(MultilinePrinter):
58 _DELAY, _timer = 0.1, 0
60 def print(self, message):
61 if time.time() - self._timer > self._DELAY:
62 self.print_at_line(f'[Cookies] {message}', 0)
63 self._timer = time.time()
65 def progress_bar(self):
66 """Return a context manager with a print method. (Optional)"""
67 # Do not print to files/pipes, loggers, or when --no-progress is used
68 if not self._ydl or self._ydl.params.get('noprogress') or self._ydl.params.get('logger'):
69 return
70 file = self._ydl._out_files.error
71 try:
72 if not file.isatty():
73 return
74 except BaseException:
75 return
76 return self.ProgressBar(file, preserve_output=False)
79 def _create_progress_bar(logger):
80 if hasattr(logger, 'progress_bar'):
81 printer = logger.progress_bar()
82 if printer:
83 return printer
84 printer = QuietMultilinePrinter()
85 printer.print = lambda _: None
86 return printer
89 def load_cookies(cookie_file, browser_specification, ydl):
90 cookie_jars = []
91 if browser_specification is not None:
92 browser_name, profile, keyring, container = _parse_browser_specification(*browser_specification)
93 cookie_jars.append(
94 extract_cookies_from_browser(browser_name, profile, YDLLogger(ydl), keyring=keyring, container=container))
96 if cookie_file is not None:
97 is_filename = is_path_like(cookie_file)
98 if is_filename:
99 cookie_file = expand_path(cookie_file)
101 jar = YoutubeDLCookieJar(cookie_file)
102 if not is_filename or os.access(cookie_file, os.R_OK):
103 jar.load()
104 cookie_jars.append(jar)
106 return _merge_cookie_jars(cookie_jars)
109 def extract_cookies_from_browser(browser_name, profile=None, logger=YDLLogger(), *, keyring=None, container=None):
110 if browser_name == 'firefox':
111 return _extract_firefox_cookies(profile, container, logger)
112 elif browser_name == 'safari':
113 return _extract_safari_cookies(profile, logger)
114 elif browser_name in CHROMIUM_BASED_BROWSERS:
115 return _extract_chrome_cookies(browser_name, profile, keyring, logger)
116 else:
117 raise ValueError(f'unknown browser: {browser_name}')
120 def _extract_firefox_cookies(profile, container, logger):
121 logger.info('Extracting cookies from firefox')
122 if not sqlite3:
123 logger.warning('Cannot extract cookies from firefox without sqlite3 support. '
124 'Please use a Python interpreter compiled with sqlite3 support')
125 return YoutubeDLCookieJar()
127 if profile is None:
128 search_roots = list(_firefox_browser_dirs())
129 elif _is_path(profile):
130 search_roots = [profile]
131 else:
132 search_roots = [os.path.join(path, profile) for path in _firefox_browser_dirs()]
133 search_root = ', '.join(map(repr, search_roots))
135 cookie_database_path = _newest(_firefox_cookie_dbs(search_roots))
136 if cookie_database_path is None:
137 raise FileNotFoundError(f'could not find firefox cookies database in {search_root}')
138 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
140 container_id = None
141 if container not in (None, 'none'):
142 containers_path = os.path.join(os.path.dirname(cookie_database_path), 'containers.json')
143 if not os.path.isfile(containers_path) or not os.access(containers_path, os.R_OK):
144 raise FileNotFoundError(f'could not read containers.json in {search_root}')
145 with open(containers_path, encoding='utf8') as containers:
146 identities = json.load(containers).get('identities', [])
147 container_id = next((context.get('userContextId') for context in identities if container in (
148 context.get('name'),
149 try_call(lambda: re.fullmatch(r'userContext([^\.]+)\.label', context['l10nID']).group()),
150 )), None)
151 if not isinstance(container_id, int):
152 raise ValueError(f'could not find firefox container "{container}" in containers.json')
154 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
155 cursor = None
156 try:
157 cursor = _open_database_copy(cookie_database_path, tmpdir)
158 if isinstance(container_id, int):
159 logger.debug(
160 f'Only loading cookies from firefox container "{container}", ID {container_id}')
161 cursor.execute(
162 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE originAttributes LIKE ? OR originAttributes LIKE ?',
163 (f'%userContextId={container_id}', f'%userContextId={container_id}&%'))
164 elif container == 'none':
165 logger.debug('Only loading cookies not belonging to any container')
166 cursor.execute(
167 'SELECT host, name, value, path, expiry, isSecure FROM moz_cookies WHERE NOT INSTR(originAttributes,"userContextId=")')
168 else:
169 cursor.execute('SELECT host, name, value, path, expiry, isSecure FROM moz_cookies')
170 jar = YoutubeDLCookieJar()
171 with _create_progress_bar(logger) as progress_bar:
172 table = cursor.fetchall()
173 total_cookie_count = len(table)
174 for i, (host, name, value, path, expiry, is_secure) in enumerate(table):
175 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
176 cookie = http.cookiejar.Cookie(
177 version=0, name=name, value=value, port=None, port_specified=False,
178 domain=host, domain_specified=bool(host), domain_initial_dot=host.startswith('.'),
179 path=path, path_specified=bool(path), secure=is_secure, expires=expiry, discard=False,
180 comment=None, comment_url=None, rest={})
181 jar.set_cookie(cookie)
182 logger.info(f'Extracted {len(jar)} cookies from firefox')
183 return jar
184 finally:
185 if cursor is not None:
186 cursor.connection.close()
189 def _firefox_browser_dirs():
190 if sys.platform in ('cygwin', 'win32'):
191 yield os.path.expandvars(R'%APPDATA%\Mozilla\Firefox\Profiles')
193 elif sys.platform == 'darwin':
194 yield os.path.expanduser('~/Library/Application Support/Firefox/Profiles')
196 else:
197 yield from map(os.path.expanduser, (
198 '~/.mozilla/firefox',
199 '~/snap/firefox/common/.mozilla/firefox',
200 '~/.var/app/org.mozilla.firefox/.mozilla/firefox',
204 def _firefox_cookie_dbs(roots):
205 for root in map(os.path.abspath, roots):
206 for pattern in ('', '*/', 'Profiles/*/'):
207 yield from glob.iglob(os.path.join(root, pattern, 'cookies.sqlite'))
210 def _get_chromium_based_browser_settings(browser_name):
211 # https://chromium.googlesource.com/chromium/src/+/HEAD/docs/user_data_dir.md
212 if sys.platform in ('cygwin', 'win32'):
213 appdata_local = os.path.expandvars('%LOCALAPPDATA%')
214 appdata_roaming = os.path.expandvars('%APPDATA%')
215 browser_dir = {
216 'brave': os.path.join(appdata_local, R'BraveSoftware\Brave-Browser\User Data'),
217 'chrome': os.path.join(appdata_local, R'Google\Chrome\User Data'),
218 'chromium': os.path.join(appdata_local, R'Chromium\User Data'),
219 'edge': os.path.join(appdata_local, R'Microsoft\Edge\User Data'),
220 'opera': os.path.join(appdata_roaming, R'Opera Software\Opera Stable'),
221 'vivaldi': os.path.join(appdata_local, R'Vivaldi\User Data'),
222 'whale': os.path.join(appdata_local, R'Naver\Naver Whale\User Data'),
223 }[browser_name]
225 elif sys.platform == 'darwin':
226 appdata = os.path.expanduser('~/Library/Application Support')
227 browser_dir = {
228 'brave': os.path.join(appdata, 'BraveSoftware/Brave-Browser'),
229 'chrome': os.path.join(appdata, 'Google/Chrome'),
230 'chromium': os.path.join(appdata, 'Chromium'),
231 'edge': os.path.join(appdata, 'Microsoft Edge'),
232 'opera': os.path.join(appdata, 'com.operasoftware.Opera'),
233 'vivaldi': os.path.join(appdata, 'Vivaldi'),
234 'whale': os.path.join(appdata, 'Naver/Whale'),
235 }[browser_name]
237 else:
238 config = _config_home()
239 browser_dir = {
240 'brave': os.path.join(config, 'BraveSoftware/Brave-Browser'),
241 'chrome': os.path.join(config, 'google-chrome'),
242 'chromium': os.path.join(config, 'chromium'),
243 'edge': os.path.join(config, 'microsoft-edge'),
244 'opera': os.path.join(config, 'opera'),
245 'vivaldi': os.path.join(config, 'vivaldi'),
246 'whale': os.path.join(config, 'naver-whale'),
247 }[browser_name]
249 # Linux keyring names can be determined by snooping on dbus while opening the browser in KDE:
250 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
251 keyring_name = {
252 'brave': 'Brave',
253 'chrome': 'Chrome',
254 'chromium': 'Chromium',
255 'edge': 'Microsoft Edge' if sys.platform == 'darwin' else 'Chromium',
256 'opera': 'Opera' if sys.platform == 'darwin' else 'Chromium',
257 'vivaldi': 'Vivaldi' if sys.platform == 'darwin' else 'Chrome',
258 'whale': 'Whale',
259 }[browser_name]
261 browsers_without_profiles = {'opera'}
263 return {
264 'browser_dir': browser_dir,
265 'keyring_name': keyring_name,
266 'supports_profiles': browser_name not in browsers_without_profiles,
270 def _extract_chrome_cookies(browser_name, profile, keyring, logger):
271 logger.info(f'Extracting cookies from {browser_name}')
273 if not sqlite3:
274 logger.warning(f'Cannot extract cookies from {browser_name} without sqlite3 support. '
275 'Please use a Python interpreter compiled with sqlite3 support')
276 return YoutubeDLCookieJar()
278 config = _get_chromium_based_browser_settings(browser_name)
280 if profile is None:
281 search_root = config['browser_dir']
282 elif _is_path(profile):
283 search_root = profile
284 config['browser_dir'] = os.path.dirname(profile) if config['supports_profiles'] else profile
285 else:
286 if config['supports_profiles']:
287 search_root = os.path.join(config['browser_dir'], profile)
288 else:
289 logger.error(f'{browser_name} does not support profiles')
290 search_root = config['browser_dir']
292 cookie_database_path = _newest(_find_files(search_root, 'Cookies', logger))
293 if cookie_database_path is None:
294 raise FileNotFoundError(f'could not find {browser_name} cookies database in "{search_root}"')
295 logger.debug(f'Extracting cookies from: "{cookie_database_path}"')
297 decryptor = get_cookie_decryptor(config['browser_dir'], config['keyring_name'], logger, keyring=keyring)
299 with tempfile.TemporaryDirectory(prefix='yt_dlp') as tmpdir:
300 cursor = None
301 try:
302 cursor = _open_database_copy(cookie_database_path, tmpdir)
303 cursor.connection.text_factory = bytes
304 column_names = _get_column_names(cursor, 'cookies')
305 secure_column = 'is_secure' if 'is_secure' in column_names else 'secure'
306 cursor.execute(f'SELECT host_key, name, value, encrypted_value, path, expires_utc, {secure_column} FROM cookies')
307 jar = YoutubeDLCookieJar()
308 failed_cookies = 0
309 unencrypted_cookies = 0
310 with _create_progress_bar(logger) as progress_bar:
311 table = cursor.fetchall()
312 total_cookie_count = len(table)
313 for i, line in enumerate(table):
314 progress_bar.print(f'Loading cookie {i: 6d}/{total_cookie_count: 6d}')
315 is_encrypted, cookie = _process_chrome_cookie(decryptor, *line)
316 if not cookie:
317 failed_cookies += 1
318 continue
319 elif not is_encrypted:
320 unencrypted_cookies += 1
321 jar.set_cookie(cookie)
322 if failed_cookies > 0:
323 failed_message = f' ({failed_cookies} could not be decrypted)'
324 else:
325 failed_message = ''
326 logger.info(f'Extracted {len(jar)} cookies from {browser_name}{failed_message}')
327 counts = decryptor._cookie_counts.copy()
328 counts['unencrypted'] = unencrypted_cookies
329 logger.debug(f'cookie version breakdown: {counts}')
330 return jar
331 except PermissionError as error:
332 if compat_os_name == 'nt' and error.errno == 13:
333 message = 'Could not copy Chrome cookie database. See https://github.com/yt-dlp/yt-dlp/issues/7271 for more info'
334 logger.error(message)
335 raise DownloadError(message) # force exit
336 raise
337 finally:
338 if cursor is not None:
339 cursor.connection.close()
342 def _process_chrome_cookie(decryptor, host_key, name, value, encrypted_value, path, expires_utc, is_secure):
343 host_key = host_key.decode()
344 name = name.decode()
345 value = value.decode()
346 path = path.decode()
347 is_encrypted = not value and encrypted_value
349 if is_encrypted:
350 value = decryptor.decrypt(encrypted_value)
351 if value is None:
352 return is_encrypted, None
354 # In chrome, session cookies have expires_utc set to 0
355 # In our cookie-store, cookies that do not expire should have expires set to None
356 if not expires_utc:
357 expires_utc = None
359 return is_encrypted, http.cookiejar.Cookie(
360 version=0, name=name, value=value, port=None, port_specified=False,
361 domain=host_key, domain_specified=bool(host_key), domain_initial_dot=host_key.startswith('.'),
362 path=path, path_specified=bool(path), secure=is_secure, expires=expires_utc, discard=False,
363 comment=None, comment_url=None, rest={})
366 class ChromeCookieDecryptor:
368 Overview:
370 Linux:
371 - cookies are either v10 or v11
372 - v10: AES-CBC encrypted with a fixed key
373 - also attempts empty password if decryption fails
374 - v11: AES-CBC encrypted with an OS protected key (keyring)
375 - also attempts empty password if decryption fails
376 - v11 keys can be stored in various places depending on the activate desktop environment [2]
378 Mac:
379 - cookies are either v10 or not v10
380 - v10: AES-CBC encrypted with an OS protected key (keyring) and more key derivation iterations than linux
381 - not v10: 'old data' stored as plaintext
383 Windows:
384 - cookies are either v10 or not v10
385 - v10: AES-GCM encrypted with a key which is encrypted with DPAPI
386 - not v10: encrypted with DPAPI
388 Sources:
389 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/
390 - [2] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_linux.cc
391 - KeyStorageLinux::CreateService
394 _cookie_counts = {}
396 def decrypt(self, encrypted_value):
397 raise NotImplementedError('Must be implemented by sub classes')
400 def get_cookie_decryptor(browser_root, browser_keyring_name, logger, *, keyring=None):
401 if sys.platform == 'darwin':
402 return MacChromeCookieDecryptor(browser_keyring_name, logger)
403 elif sys.platform in ('win32', 'cygwin'):
404 return WindowsChromeCookieDecryptor(browser_root, logger)
405 return LinuxChromeCookieDecryptor(browser_keyring_name, logger, keyring=keyring)
408 class LinuxChromeCookieDecryptor(ChromeCookieDecryptor):
409 def __init__(self, browser_keyring_name, logger, *, keyring=None):
410 self._logger = logger
411 self._v10_key = self.derive_key(b'peanuts')
412 self._empty_key = self.derive_key(b'')
413 self._cookie_counts = {'v10': 0, 'v11': 0, 'other': 0}
414 self._browser_keyring_name = browser_keyring_name
415 self._keyring = keyring
417 @functools.cached_property
418 def _v11_key(self):
419 password = _get_linux_keyring_password(self._browser_keyring_name, self._keyring, self._logger)
420 return None if password is None else self.derive_key(password)
422 @staticmethod
423 def derive_key(password):
424 # values from
425 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_linux.cc
426 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1, key_length=16)
428 def decrypt(self, encrypted_value):
431 following the same approach as the fix in [1]: if cookies fail to decrypt then attempt to decrypt
432 with an empty password. The failure detection is not the same as what chromium uses so the
433 results won't be perfect
435 References:
436 - [1] https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/
437 - a bugfix to try an empty password as a fallback
439 version = encrypted_value[:3]
440 ciphertext = encrypted_value[3:]
442 if version == b'v10':
443 self._cookie_counts['v10'] += 1
444 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key, self._empty_key), self._logger)
446 elif version == b'v11':
447 self._cookie_counts['v11'] += 1
448 if self._v11_key is None:
449 self._logger.warning('cannot decrypt v11 cookies: no key found', only_once=True)
450 return None
451 return _decrypt_aes_cbc_multi(ciphertext, (self._v11_key, self._empty_key), self._logger)
453 else:
454 self._logger.warning(f'unknown cookie version: "{version}"', only_once=True)
455 self._cookie_counts['other'] += 1
456 return None
459 class MacChromeCookieDecryptor(ChromeCookieDecryptor):
460 def __init__(self, browser_keyring_name, logger):
461 self._logger = logger
462 password = _get_mac_keyring_password(browser_keyring_name, logger)
463 self._v10_key = None if password is None else self.derive_key(password)
464 self._cookie_counts = {'v10': 0, 'other': 0}
466 @staticmethod
467 def derive_key(password):
468 # values from
469 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
470 return pbkdf2_sha1(password, salt=b'saltysalt', iterations=1003, key_length=16)
472 def decrypt(self, encrypted_value):
473 version = encrypted_value[:3]
474 ciphertext = encrypted_value[3:]
476 if version == b'v10':
477 self._cookie_counts['v10'] += 1
478 if self._v10_key is None:
479 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
480 return None
482 return _decrypt_aes_cbc_multi(ciphertext, (self._v10_key,), self._logger)
484 else:
485 self._cookie_counts['other'] += 1
486 # other prefixes are considered 'old data' which were stored as plaintext
487 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_mac.mm
488 return encrypted_value
491 class WindowsChromeCookieDecryptor(ChromeCookieDecryptor):
492 def __init__(self, browser_root, logger):
493 self._logger = logger
494 self._v10_key = _get_windows_v10_key(browser_root, logger)
495 self._cookie_counts = {'v10': 0, 'other': 0}
497 def decrypt(self, encrypted_value):
498 version = encrypted_value[:3]
499 ciphertext = encrypted_value[3:]
501 if version == b'v10':
502 self._cookie_counts['v10'] += 1
503 if self._v10_key is None:
504 self._logger.warning('cannot decrypt v10 cookies: no key found', only_once=True)
505 return None
507 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
508 # kNonceLength
509 nonce_length = 96 // 8
510 # boringssl
511 # EVP_AEAD_AES_GCM_TAG_LEN
512 authentication_tag_length = 16
514 raw_ciphertext = ciphertext
515 nonce = raw_ciphertext[:nonce_length]
516 ciphertext = raw_ciphertext[nonce_length:-authentication_tag_length]
517 authentication_tag = raw_ciphertext[-authentication_tag_length:]
519 return _decrypt_aes_gcm(ciphertext, self._v10_key, nonce, authentication_tag, self._logger)
521 else:
522 self._cookie_counts['other'] += 1
523 # any other prefix means the data is DPAPI encrypted
524 # https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
525 return _decrypt_windows_dpapi(encrypted_value, self._logger).decode()
528 def _extract_safari_cookies(profile, logger):
529 if sys.platform != 'darwin':
530 raise ValueError(f'unsupported platform: {sys.platform}')
532 if profile:
533 cookies_path = os.path.expanduser(profile)
534 if not os.path.isfile(cookies_path):
535 raise FileNotFoundError('custom safari cookies database not found')
537 else:
538 cookies_path = os.path.expanduser('~/Library/Cookies/Cookies.binarycookies')
540 if not os.path.isfile(cookies_path):
541 logger.debug('Trying secondary cookie location')
542 cookies_path = os.path.expanduser('~/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies')
543 if not os.path.isfile(cookies_path):
544 raise FileNotFoundError('could not find safari cookies database')
546 with open(cookies_path, 'rb') as f:
547 cookies_data = f.read()
549 jar = parse_safari_cookies(cookies_data, logger=logger)
550 logger.info(f'Extracted {len(jar)} cookies from safari')
551 return jar
554 class ParserError(Exception):
555 pass
558 class DataParser:
559 def __init__(self, data, logger):
560 self._data = data
561 self.cursor = 0
562 self._logger = logger
564 def read_bytes(self, num_bytes):
565 if num_bytes < 0:
566 raise ParserError(f'invalid read of {num_bytes} bytes')
567 end = self.cursor + num_bytes
568 if end > len(self._data):
569 raise ParserError('reached end of input')
570 data = self._data[self.cursor:end]
571 self.cursor = end
572 return data
574 def expect_bytes(self, expected_value, message):
575 value = self.read_bytes(len(expected_value))
576 if value != expected_value:
577 raise ParserError(f'unexpected value: {value} != {expected_value} ({message})')
579 def read_uint(self, big_endian=False):
580 data_format = '>I' if big_endian else '<I'
581 return struct.unpack(data_format, self.read_bytes(4))[0]
583 def read_double(self, big_endian=False):
584 data_format = '>d' if big_endian else '<d'
585 return struct.unpack(data_format, self.read_bytes(8))[0]
587 def read_cstring(self):
588 buffer = []
589 while True:
590 c = self.read_bytes(1)
591 if c == b'\x00':
592 return b''.join(buffer).decode()
593 else:
594 buffer.append(c)
596 def skip(self, num_bytes, description='unknown'):
597 if num_bytes > 0:
598 self._logger.debug(f'skipping {num_bytes} bytes ({description}): {self.read_bytes(num_bytes)!r}')
599 elif num_bytes < 0:
600 raise ParserError(f'invalid skip of {num_bytes} bytes')
602 def skip_to(self, offset, description='unknown'):
603 self.skip(offset - self.cursor, description)
605 def skip_to_end(self, description='unknown'):
606 self.skip_to(len(self._data), description)
609 def _mac_absolute_time_to_posix(timestamp):
610 return int((dt.datetime(2001, 1, 1, 0, 0, tzinfo=dt.timezone.utc) + dt.timedelta(seconds=timestamp)).timestamp())
613 def _parse_safari_cookies_header(data, logger):
614 p = DataParser(data, logger)
615 p.expect_bytes(b'cook', 'database signature')
616 number_of_pages = p.read_uint(big_endian=True)
617 page_sizes = [p.read_uint(big_endian=True) for _ in range(number_of_pages)]
618 return page_sizes, p.cursor
621 def _parse_safari_cookies_page(data, jar, logger):
622 p = DataParser(data, logger)
623 p.expect_bytes(b'\x00\x00\x01\x00', 'page signature')
624 number_of_cookies = p.read_uint()
625 record_offsets = [p.read_uint() for _ in range(number_of_cookies)]
626 if number_of_cookies == 0:
627 logger.debug(f'a cookies page of size {len(data)} has no cookies')
628 return
630 p.skip_to(record_offsets[0], 'unknown page header field')
632 with _create_progress_bar(logger) as progress_bar:
633 for i, record_offset in enumerate(record_offsets):
634 progress_bar.print(f'Loading cookie {i: 6d}/{number_of_cookies: 6d}')
635 p.skip_to(record_offset, 'space between records')
636 record_length = _parse_safari_cookies_record(data[record_offset:], jar, logger)
637 p.read_bytes(record_length)
638 p.skip_to_end('space in between pages')
641 def _parse_safari_cookies_record(data, jar, logger):
642 p = DataParser(data, logger)
643 record_size = p.read_uint()
644 p.skip(4, 'unknown record field 1')
645 flags = p.read_uint()
646 is_secure = bool(flags & 0x0001)
647 p.skip(4, 'unknown record field 2')
648 domain_offset = p.read_uint()
649 name_offset = p.read_uint()
650 path_offset = p.read_uint()
651 value_offset = p.read_uint()
652 p.skip(8, 'unknown record field 3')
653 expiration_date = _mac_absolute_time_to_posix(p.read_double())
654 _creation_date = _mac_absolute_time_to_posix(p.read_double()) # noqa: F841
656 try:
657 p.skip_to(domain_offset)
658 domain = p.read_cstring()
660 p.skip_to(name_offset)
661 name = p.read_cstring()
663 p.skip_to(path_offset)
664 path = p.read_cstring()
666 p.skip_to(value_offset)
667 value = p.read_cstring()
668 except UnicodeDecodeError:
669 logger.warning('failed to parse Safari cookie because UTF-8 decoding failed', only_once=True)
670 return record_size
672 p.skip_to(record_size, 'space at the end of the record')
674 cookie = http.cookiejar.Cookie(
675 version=0, name=name, value=value, port=None, port_specified=False,
676 domain=domain, domain_specified=bool(domain), domain_initial_dot=domain.startswith('.'),
677 path=path, path_specified=bool(path), secure=is_secure, expires=expiration_date, discard=False,
678 comment=None, comment_url=None, rest={})
679 jar.set_cookie(cookie)
680 return record_size
683 def parse_safari_cookies(data, jar=None, logger=YDLLogger()):
685 References:
686 - https://github.com/libyal/dtformats/blob/main/documentation/Safari%20Cookies.asciidoc
687 - this data appears to be out of date but the important parts of the database structure is the same
688 - there are a few bytes here and there which are skipped during parsing
690 if jar is None:
691 jar = YoutubeDLCookieJar()
692 page_sizes, body_start = _parse_safari_cookies_header(data, logger)
693 p = DataParser(data[body_start:], logger)
694 for page_size in page_sizes:
695 _parse_safari_cookies_page(p.read_bytes(page_size), jar, logger)
696 p.skip_to_end('footer')
697 return jar
700 class _LinuxDesktopEnvironment(Enum):
702 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.h
703 DesktopEnvironment
705 OTHER = auto()
706 CINNAMON = auto()
707 DEEPIN = auto()
708 GNOME = auto()
709 KDE3 = auto()
710 KDE4 = auto()
711 KDE5 = auto()
712 KDE6 = auto()
713 PANTHEON = auto()
714 UKUI = auto()
715 UNITY = auto()
716 XFCE = auto()
717 LXQT = auto()
720 class _LinuxKeyring(Enum):
722 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.h
723 SelectedLinuxBackend
725 KWALLET = auto() # KDE4
726 KWALLET5 = auto()
727 KWALLET6 = auto()
728 GNOMEKEYRING = auto()
729 BASICTEXT = auto()
732 SUPPORTED_KEYRINGS = _LinuxKeyring.__members__.keys()
735 def _get_linux_desktop_environment(env, logger):
737 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/base/nix/xdg_util.cc
738 GetDesktopEnvironment
740 xdg_current_desktop = env.get('XDG_CURRENT_DESKTOP', None)
741 desktop_session = env.get('DESKTOP_SESSION', None)
742 if xdg_current_desktop is not None:
743 for part in map(str.strip, xdg_current_desktop.split(':')):
744 if part == 'Unity':
745 if desktop_session is not None and 'gnome-fallback' in desktop_session:
746 return _LinuxDesktopEnvironment.GNOME
747 else:
748 return _LinuxDesktopEnvironment.UNITY
749 elif part == 'Deepin':
750 return _LinuxDesktopEnvironment.DEEPIN
751 elif part == 'GNOME':
752 return _LinuxDesktopEnvironment.GNOME
753 elif part == 'X-Cinnamon':
754 return _LinuxDesktopEnvironment.CINNAMON
755 elif part == 'KDE':
756 kde_version = env.get('KDE_SESSION_VERSION', None)
757 if kde_version == '5':
758 return _LinuxDesktopEnvironment.KDE5
759 elif kde_version == '6':
760 return _LinuxDesktopEnvironment.KDE6
761 elif kde_version == '4':
762 return _LinuxDesktopEnvironment.KDE4
763 else:
764 logger.info(f'unknown KDE version: "{kde_version}". Assuming KDE4')
765 return _LinuxDesktopEnvironment.KDE4
766 elif part == 'Pantheon':
767 return _LinuxDesktopEnvironment.PANTHEON
768 elif part == 'XFCE':
769 return _LinuxDesktopEnvironment.XFCE
770 elif part == 'UKUI':
771 return _LinuxDesktopEnvironment.UKUI
772 elif part == 'LXQt':
773 return _LinuxDesktopEnvironment.LXQT
774 logger.info(f'XDG_CURRENT_DESKTOP is set to an unknown value: "{xdg_current_desktop}"')
776 elif desktop_session is not None:
777 if desktop_session == 'deepin':
778 return _LinuxDesktopEnvironment.DEEPIN
779 elif desktop_session in ('mate', 'gnome'):
780 return _LinuxDesktopEnvironment.GNOME
781 elif desktop_session in ('kde4', 'kde-plasma'):
782 return _LinuxDesktopEnvironment.KDE4
783 elif desktop_session == 'kde':
784 if 'KDE_SESSION_VERSION' in env:
785 return _LinuxDesktopEnvironment.KDE4
786 else:
787 return _LinuxDesktopEnvironment.KDE3
788 elif 'xfce' in desktop_session or desktop_session == 'xubuntu':
789 return _LinuxDesktopEnvironment.XFCE
790 elif desktop_session == 'ukui':
791 return _LinuxDesktopEnvironment.UKUI
792 else:
793 logger.info(f'DESKTOP_SESSION is set to an unknown value: "{desktop_session}"')
795 else:
796 if 'GNOME_DESKTOP_SESSION_ID' in env:
797 return _LinuxDesktopEnvironment.GNOME
798 elif 'KDE_FULL_SESSION' in env:
799 if 'KDE_SESSION_VERSION' in env:
800 return _LinuxDesktopEnvironment.KDE4
801 else:
802 return _LinuxDesktopEnvironment.KDE3
803 return _LinuxDesktopEnvironment.OTHER
806 def _choose_linux_keyring(logger):
808 SelectBackend in [1]
810 There is currently support for forcing chromium to use BASIC_TEXT by creating a file called
811 `Disable Local Encryption` [1] in the user data dir. The function to write this file (`WriteBackendUse()` [1])
812 does not appear to be called anywhere other than in tests, so the user would have to create this file manually
813 and so would be aware enough to tell yt-dlp to use the BASIC_TEXT keyring.
815 References:
816 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/key_storage_util_linux.cc
818 desktop_environment = _get_linux_desktop_environment(os.environ, logger)
819 logger.debug(f'detected desktop environment: {desktop_environment.name}')
820 if desktop_environment == _LinuxDesktopEnvironment.KDE4:
821 linux_keyring = _LinuxKeyring.KWALLET
822 elif desktop_environment == _LinuxDesktopEnvironment.KDE5:
823 linux_keyring = _LinuxKeyring.KWALLET5
824 elif desktop_environment == _LinuxDesktopEnvironment.KDE6:
825 linux_keyring = _LinuxKeyring.KWALLET6
826 elif desktop_environment in (
827 _LinuxDesktopEnvironment.KDE3, _LinuxDesktopEnvironment.LXQT, _LinuxDesktopEnvironment.OTHER,
829 linux_keyring = _LinuxKeyring.BASICTEXT
830 else:
831 linux_keyring = _LinuxKeyring.GNOMEKEYRING
832 return linux_keyring
835 def _get_kwallet_network_wallet(keyring, logger):
836 """ The name of the wallet used to store network passwords.
838 https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/kwallet_dbus.cc
839 KWalletDBus::NetworkWallet
840 which does a dbus call to the following function:
841 https://api.kde.org/frameworks/kwallet/html/classKWallet_1_1Wallet.html
842 Wallet::NetworkWallet
844 default_wallet = 'kdewallet'
845 try:
846 if keyring == _LinuxKeyring.KWALLET:
847 service_name = 'org.kde.kwalletd'
848 wallet_path = '/modules/kwalletd'
849 elif keyring == _LinuxKeyring.KWALLET5:
850 service_name = 'org.kde.kwalletd5'
851 wallet_path = '/modules/kwalletd5'
852 elif keyring == _LinuxKeyring.KWALLET6:
853 service_name = 'org.kde.kwalletd6'
854 wallet_path = '/modules/kwalletd6'
855 else:
856 raise ValueError(keyring)
858 stdout, _, returncode = Popen.run([
859 'dbus-send', '--session', '--print-reply=literal',
860 f'--dest={service_name}',
861 wallet_path,
862 'org.kde.KWallet.networkWallet',
863 ], text=True, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
865 if returncode:
866 logger.warning('failed to read NetworkWallet')
867 return default_wallet
868 else:
869 logger.debug(f'NetworkWallet = "{stdout.strip()}"')
870 return stdout.strip()
871 except Exception as e:
872 logger.warning(f'exception while obtaining NetworkWallet: {e}')
873 return default_wallet
876 def _get_kwallet_password(browser_keyring_name, keyring, logger):
877 logger.debug(f'using kwallet-query to obtain password from {keyring.name}')
879 if shutil.which('kwallet-query') is None:
880 logger.error('kwallet-query command not found. KWallet and kwallet-query '
881 'must be installed to read from KWallet. kwallet-query should be'
882 'included in the kwallet package for your distribution')
883 return b''
885 network_wallet = _get_kwallet_network_wallet(keyring, logger)
887 try:
888 stdout, _, returncode = Popen.run([
889 'kwallet-query',
890 '--read-password', f'{browser_keyring_name} Safe Storage',
891 '--folder', f'{browser_keyring_name} Keys',
892 network_wallet,
893 ], stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
895 if returncode:
896 logger.error(f'kwallet-query failed with return code {returncode}. '
897 'Please consult the kwallet-query man page for details')
898 return b''
899 else:
900 if stdout.lower().startswith(b'failed to read'):
901 logger.debug('failed to read password from kwallet. Using empty string instead')
902 # this sometimes occurs in KDE because chrome does not check hasEntry and instead
903 # just tries to read the value (which kwallet returns "") whereas kwallet-query
904 # checks hasEntry. To verify this:
905 # dbus-monitor "interface='org.kde.KWallet'" "type=method_return"
906 # while starting chrome.
907 # this was identified as a bug later and fixed in
908 # https://chromium.googlesource.com/chromium/src/+/bbd54702284caca1f92d656fdcadf2ccca6f4165%5E%21/#F0
909 # https://chromium.googlesource.com/chromium/src/+/5463af3c39d7f5b6d11db7fbd51e38cc1974d764
910 return b''
911 else:
912 logger.debug('password found')
913 return stdout.rstrip(b'\n')
914 except Exception as e:
915 logger.warning(f'exception running kwallet-query: {error_to_str(e)}')
916 return b''
919 def _get_gnome_keyring_password(browser_keyring_name, logger):
920 if not secretstorage:
921 logger.error(f'secretstorage not available {_SECRETSTORAGE_UNAVAILABLE_REASON}')
922 return b''
923 # the Gnome keyring does not seem to organise keys in the same way as KWallet,
924 # using `dbus-monitor` during startup, it can be observed that chromium lists all keys
925 # and presumably searches for its key in the list. It appears that we must do the same.
926 # https://github.com/jaraco/keyring/issues/556
927 with contextlib.closing(secretstorage.dbus_init()) as con:
928 col = secretstorage.get_default_collection(con)
929 for item in col.get_all_items():
930 if item.get_label() == f'{browser_keyring_name} Safe Storage':
931 return item.get_secret()
932 logger.error('failed to read from keyring')
933 return b''
936 def _get_linux_keyring_password(browser_keyring_name, keyring, logger):
937 # note: chrome/chromium can be run with the following flags to determine which keyring backend
938 # it has chosen to use
939 # chromium --enable-logging=stderr --v=1 2>&1 | grep key_storage_
940 # Chromium supports a flag: --password-store=<basic|gnome|kwallet> so the automatic detection
941 # will not be sufficient in all cases.
943 keyring = _LinuxKeyring[keyring] if keyring else _choose_linux_keyring(logger)
944 logger.debug(f'Chosen keyring: {keyring.name}')
946 if keyring in (_LinuxKeyring.KWALLET, _LinuxKeyring.KWALLET5, _LinuxKeyring.KWALLET6):
947 return _get_kwallet_password(browser_keyring_name, keyring, logger)
948 elif keyring == _LinuxKeyring.GNOMEKEYRING:
949 return _get_gnome_keyring_password(browser_keyring_name, logger)
950 elif keyring == _LinuxKeyring.BASICTEXT:
951 # when basic text is chosen, all cookies are stored as v10 (so no keyring password is required)
952 return None
953 assert False, f'Unknown keyring {keyring}'
956 def _get_mac_keyring_password(browser_keyring_name, logger):
957 logger.debug('using find-generic-password to obtain password from OSX keychain')
958 try:
959 stdout, _, returncode = Popen.run(
960 ['security', 'find-generic-password',
961 '-w', # write password to stdout
962 '-a', browser_keyring_name, # match 'account'
963 '-s', f'{browser_keyring_name} Safe Storage'], # match 'service'
964 stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
965 if returncode:
966 logger.warning('find-generic-password failed')
967 return None
968 return stdout.rstrip(b'\n')
969 except Exception as e:
970 logger.warning(f'exception running find-generic-password: {error_to_str(e)}')
971 return None
974 def _get_windows_v10_key(browser_root, logger):
976 References:
977 - [1] https://chromium.googlesource.com/chromium/src/+/refs/heads/main/components/os_crypt/sync/os_crypt_win.cc
979 path = _newest(_find_files(browser_root, 'Local State', logger))
980 if path is None:
981 logger.error('could not find local state file')
982 return None
983 logger.debug(f'Found local state file at "{path}"')
984 with open(path, encoding='utf8') as f:
985 data = json.load(f)
986 try:
987 # kOsCryptEncryptedKeyPrefName in [1]
988 base64_key = data['os_crypt']['encrypted_key']
989 except KeyError:
990 logger.error('no encrypted key in Local State')
991 return None
992 encrypted_key = base64.b64decode(base64_key)
993 # kDPAPIKeyPrefix in [1]
994 prefix = b'DPAPI'
995 if not encrypted_key.startswith(prefix):
996 logger.error('invalid key')
997 return None
998 return _decrypt_windows_dpapi(encrypted_key[len(prefix):], logger)
1001 def pbkdf2_sha1(password, salt, iterations, key_length):
1002 return hashlib.pbkdf2_hmac('sha1', password, salt, iterations, key_length)
1005 def _decrypt_aes_cbc_multi(ciphertext, keys, logger, initialization_vector=b' ' * 16):
1006 for key in keys:
1007 plaintext = unpad_pkcs7(aes_cbc_decrypt_bytes(ciphertext, key, initialization_vector))
1008 try:
1009 return plaintext.decode()
1010 except UnicodeDecodeError:
1011 pass
1012 logger.warning('failed to decrypt cookie (AES-CBC) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1013 return None
1016 def _decrypt_aes_gcm(ciphertext, key, nonce, authentication_tag, logger):
1017 try:
1018 plaintext = aes_gcm_decrypt_and_verify_bytes(ciphertext, key, authentication_tag, nonce)
1019 except ValueError:
1020 logger.warning('failed to decrypt cookie (AES-GCM) because the MAC check failed. Possibly the key is wrong?', only_once=True)
1021 return None
1023 try:
1024 return plaintext.decode()
1025 except UnicodeDecodeError:
1026 logger.warning('failed to decrypt cookie (AES-GCM) because UTF-8 decoding failed. Possibly the key is wrong?', only_once=True)
1027 return None
1030 def _decrypt_windows_dpapi(ciphertext, logger):
1032 References:
1033 - https://docs.microsoft.com/en-us/windows/win32/api/dpapi/nf-dpapi-cryptunprotectdata
1036 import ctypes
1037 import ctypes.wintypes
1039 class DATA_BLOB(ctypes.Structure):
1040 _fields_ = [('cbData', ctypes.wintypes.DWORD),
1041 ('pbData', ctypes.POINTER(ctypes.c_char))]
1043 buffer = ctypes.create_string_buffer(ciphertext)
1044 blob_in = DATA_BLOB(ctypes.sizeof(buffer), buffer)
1045 blob_out = DATA_BLOB()
1046 ret = ctypes.windll.crypt32.CryptUnprotectData(
1047 ctypes.byref(blob_in), # pDataIn
1048 None, # ppszDataDescr: human readable description of pDataIn
1049 None, # pOptionalEntropy: salt?
1050 None, # pvReserved: must be NULL
1051 None, # pPromptStruct: information about prompts to display
1052 0, # dwFlags
1053 ctypes.byref(blob_out), # pDataOut
1055 if not ret:
1056 message = 'Failed to decrypt with DPAPI. See https://github.com/yt-dlp/yt-dlp/issues/10927 for more info'
1057 logger.error(message)
1058 raise DownloadError(message) # force exit
1060 result = ctypes.string_at(blob_out.pbData, blob_out.cbData)
1061 ctypes.windll.kernel32.LocalFree(blob_out.pbData)
1062 return result
1065 def _config_home():
1066 return os.environ.get('XDG_CONFIG_HOME', os.path.expanduser('~/.config'))
1069 def _open_database_copy(database_path, tmpdir):
1070 # cannot open sqlite databases if they are already in use (e.g. by the browser)
1071 database_copy_path = os.path.join(tmpdir, 'temporary.sqlite')
1072 shutil.copy(database_path, database_copy_path)
1073 conn = sqlite3.connect(database_copy_path)
1074 return conn.cursor()
1077 def _get_column_names(cursor, table_name):
1078 table_info = cursor.execute(f'PRAGMA table_info({table_name})').fetchall()
1079 return [row[1].decode() for row in table_info]
1082 def _newest(files):
1083 return max(files, key=lambda path: os.lstat(path).st_mtime, default=None)
1086 def _find_files(root, filename, logger):
1087 # if there are multiple browser profiles, take the most recently used one
1088 i = 0
1089 with _create_progress_bar(logger) as progress_bar:
1090 for curr_root, _, files in os.walk(root):
1091 for file in files:
1092 i += 1
1093 progress_bar.print(f'Searching for "{filename}": {i: 6d} files searched')
1094 if file == filename:
1095 yield os.path.join(curr_root, file)
1098 def _merge_cookie_jars(jars):
1099 output_jar = YoutubeDLCookieJar()
1100 for jar in jars:
1101 for cookie in jar:
1102 output_jar.set_cookie(cookie)
1103 if jar.filename is not None:
1104 output_jar.filename = jar.filename
1105 return output_jar
1108 def _is_path(value):
1109 return any(sep in value for sep in (os.path.sep, os.path.altsep) if sep)
1112 def _parse_browser_specification(browser_name, profile=None, keyring=None, container=None):
1113 if browser_name not in SUPPORTED_BROWSERS:
1114 raise ValueError(f'unsupported browser: "{browser_name}"')
1115 if keyring not in (None, *SUPPORTED_KEYRINGS):
1116 raise ValueError(f'unsupported keyring: "{keyring}"')
1117 if profile is not None and _is_path(expand_path(profile)):
1118 profile = expand_path(profile)
1119 return browser_name, profile, keyring, container
1122 class LenientSimpleCookie(http.cookies.SimpleCookie):
1123 """More lenient version of http.cookies.SimpleCookie"""
1124 # From https://github.com/python/cpython/blob/v3.10.7/Lib/http/cookies.py
1125 # We use Morsel's legal key chars to avoid errors on setting values
1126 _LEGAL_KEY_CHARS = r'\w\d' + re.escape('!#$%&\'*+-.:^_`|~')
1127 _LEGAL_VALUE_CHARS = _LEGAL_KEY_CHARS + re.escape('(),/<=>?@[]{}')
1129 _RESERVED = {
1130 'expires',
1131 'path',
1132 'comment',
1133 'domain',
1134 'max-age',
1135 'secure',
1136 'httponly',
1137 'version',
1138 'samesite',
1141 _FLAGS = {'secure', 'httponly'}
1143 # Added 'bad' group to catch the remaining value
1144 _COOKIE_PATTERN = re.compile(r'''
1145 \s* # Optional whitespace at start of cookie
1146 (?P<key> # Start of group 'key'
1147 [''' + _LEGAL_KEY_CHARS + r''']+?# Any word of at least one letter
1148 ) # End of group 'key'
1149 ( # Optional group: there may not be a value.
1150 \s*=\s* # Equal Sign
1151 ( # Start of potential value
1152 (?P<val> # Start of group 'val'
1153 "(?:[^\\"]|\\.)*" # Any doublequoted string
1154 | # or
1155 \w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
1156 | # or
1157 [''' + _LEGAL_VALUE_CHARS + r''']* # Any word or empty string
1158 ) # End of group 'val'
1159 | # or
1160 (?P<bad>(?:\\;|[^;])*?) # 'bad' group fallback for invalid values
1161 ) # End of potential value
1162 )? # End of optional value group
1163 \s* # Any number of spaces.
1164 (\s+|;|$) # Ending either at space, semicolon, or EOS.
1165 ''', re.ASCII | re.VERBOSE)
1167 def load(self, data):
1168 # Workaround for https://github.com/yt-dlp/yt-dlp/issues/4776
1169 if not isinstance(data, str):
1170 return super().load(data)
1172 morsel = None
1173 for match in self._COOKIE_PATTERN.finditer(data):
1174 if match.group('bad'):
1175 morsel = None
1176 continue
1178 key, value = match.group('key', 'val')
1180 is_attribute = False
1181 if key.startswith('$'):
1182 key = key[1:]
1183 is_attribute = True
1185 lower_key = key.lower()
1186 if lower_key in self._RESERVED:
1187 if morsel is None:
1188 continue
1190 if value is None:
1191 if lower_key not in self._FLAGS:
1192 morsel = None
1193 continue
1194 value = True
1195 else:
1196 value, _ = self.value_decode(value)
1198 morsel[key] = value
1200 elif is_attribute:
1201 morsel = None
1203 elif value is not None:
1204 morsel = self.get(key, http.cookies.Morsel())
1205 real_value, coded_value = self.value_decode(value)
1206 morsel.set(key, real_value, coded_value)
1207 self[key] = morsel
1209 else:
1210 morsel = None
1213 class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
1215 See [1] for cookie file format.
1217 1. https://curl.haxx.se/docs/http-cookies.html
1219 _HTTPONLY_PREFIX = '#HttpOnly_'
1220 _ENTRY_LEN = 7
1221 _HEADER = '''# Netscape HTTP Cookie File
1222 # This file is generated by yt-dlp. Do not edit.
1225 _CookieFileEntry = collections.namedtuple(
1226 'CookieFileEntry',
1227 ('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
1229 def __init__(self, filename=None, *args, **kwargs):
1230 super().__init__(None, *args, **kwargs)
1231 if is_path_like(filename):
1232 filename = os.fspath(filename)
1233 self.filename = filename
1235 @staticmethod
1236 def _true_or_false(cndn):
1237 return 'TRUE' if cndn else 'FALSE'
1239 @contextlib.contextmanager
1240 def open(self, file, *, write=False):
1241 if is_path_like(file):
1242 with open(file, 'w' if write else 'r', encoding='utf-8') as f:
1243 yield f
1244 else:
1245 if write:
1246 file.truncate(0)
1247 yield file
1249 def _really_save(self, f, ignore_discard, ignore_expires):
1250 now = time.time()
1251 for cookie in self:
1252 if (not ignore_discard and cookie.discard
1253 or not ignore_expires and cookie.is_expired(now)):
1254 continue
1255 name, value = cookie.name, cookie.value
1256 if value is None:
1257 # cookies.txt regards 'Set-Cookie: foo' as a cookie
1258 # with no name, whereas http.cookiejar regards it as a
1259 # cookie with no value.
1260 name, value = '', name
1261 f.write('{}\n'.format('\t'.join((
1262 cookie.domain,
1263 self._true_or_false(cookie.domain.startswith('.')),
1264 cookie.path,
1265 self._true_or_false(cookie.secure),
1266 str_or_none(cookie.expires, default=''),
1267 name, value,
1268 ))))
1270 def save(self, filename=None, ignore_discard=True, ignore_expires=True):
1272 Save cookies to a file.
1273 Code is taken from CPython 3.6
1274 https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
1276 if filename is None:
1277 if self.filename is not None:
1278 filename = self.filename
1279 else:
1280 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1282 # Store session cookies with `expires` set to 0 instead of an empty string
1283 for cookie in self:
1284 if cookie.expires is None:
1285 cookie.expires = 0
1287 with self.open(filename, write=True) as f:
1288 f.write(self._HEADER)
1289 self._really_save(f, ignore_discard, ignore_expires)
1291 def load(self, filename=None, ignore_discard=True, ignore_expires=True):
1292 """Load cookies from a file."""
1293 if filename is None:
1294 if self.filename is not None:
1295 filename = self.filename
1296 else:
1297 raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
1299 def prepare_line(line):
1300 if line.startswith(self._HTTPONLY_PREFIX):
1301 line = line[len(self._HTTPONLY_PREFIX):]
1302 # comments and empty lines are fine
1303 if line.startswith('#') or not line.strip():
1304 return line
1305 cookie_list = line.split('\t')
1306 if len(cookie_list) != self._ENTRY_LEN:
1307 raise http.cookiejar.LoadError(f'invalid length {len(cookie_list)}')
1308 cookie = self._CookieFileEntry(*cookie_list)
1309 if cookie.expires_at and not cookie.expires_at.isdigit():
1310 raise http.cookiejar.LoadError(f'invalid expires at {cookie.expires_at}')
1311 return line
1313 cf = io.StringIO()
1314 with self.open(filename) as f:
1315 for line in f:
1316 try:
1317 cf.write(prepare_line(line))
1318 except http.cookiejar.LoadError as e:
1319 if f'{line.strip()} '[0] in '[{"':
1320 raise http.cookiejar.LoadError(
1321 'Cookies file must be Netscape formatted, not JSON. See '
1322 'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
1323 write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
1324 continue
1325 cf.seek(0)
1326 self._really_load(cf, filename, ignore_discard, ignore_expires)
1327 # Session cookies are denoted by either `expires` field set to
1328 # an empty string or 0. MozillaCookieJar only recognizes the former
1329 # (see [1]). So we need force the latter to be recognized as session
1330 # cookies on our own.
1331 # Session cookies may be important for cookies-based authentication,
1332 # e.g. usually, when user does not check 'Remember me' check box while
1333 # logging in on a site, some important cookies are stored as session
1334 # cookies so that not recognizing them will result in failed login.
1335 # 1. https://bugs.python.org/issue17164
1336 for cookie in self:
1337 # Treat `expires=0` cookies as session cookies
1338 if cookie.expires == 0:
1339 cookie.expires = None
1340 cookie.discard = True
1342 def get_cookie_header(self, url):
1343 """Generate a Cookie HTTP header for a given url"""
1344 cookie_req = urllib.request.Request(normalize_url(sanitize_url(url)))
1345 self.add_cookie_header(cookie_req)
1346 return cookie_req.get_header('Cookie')
1348 def get_cookies_for_url(self, url):
1349 """Generate a list of Cookie objects for a given url"""
1350 # Policy `_now` attribute must be set before calling `_cookies_for_request`
1351 # Ref: https://github.com/python/cpython/blob/3.7/Lib/http/cookiejar.py#L1360
1352 self._policy._now = self._now = int(time.time())
1353 return self._cookies_for_request(urllib.request.Request(normalize_url(sanitize_url(url))))
1355 def clear(self, *args, **kwargs):
1356 with contextlib.suppress(KeyError):
1357 return super().clear(*args, **kwargs)