[ie/youtube] Add age-gate workaround for some embeddable videos (#11821)
[yt-dlp.git] / yt_dlp / postprocessor / embedthumbnail.py
blobd8ba220caba7108a1a16e0dfd8cbb826e6ac6dd3
1 import base64
2 import os
3 import re
4 import subprocess
6 from .common import PostProcessor
7 from .ffmpeg import FFmpegPostProcessor, FFmpegThumbnailsConvertorPP
8 from ..compat import imghdr
9 from ..dependencies import mutagen
10 from ..utils import (
11 Popen,
12 PostProcessingError,
13 check_executable,
14 encodeArgument,
15 prepend_extension,
16 shell_quote,
19 if mutagen:
20 from mutagen.flac import FLAC, Picture
21 from mutagen.mp4 import MP4, MP4Cover
22 from mutagen.oggopus import OggOpus
23 from mutagen.oggvorbis import OggVorbis
26 class EmbedThumbnailPPError(PostProcessingError):
27 pass
30 class EmbedThumbnailPP(FFmpegPostProcessor):
32 def __init__(self, downloader=None, already_have_thumbnail=False):
33 FFmpegPostProcessor.__init__(self, downloader)
34 self._already_have_thumbnail = already_have_thumbnail
36 def _get_thumbnail_resolution(self, filename, thumbnail_dict):
37 def guess():
38 width, height = thumbnail_dict.get('width'), thumbnail_dict.get('height')
39 if width and height:
40 return width, height
42 try:
43 size_regex = r',\s*(?P<w>\d+)x(?P<h>\d+)\s*[,\[]'
44 size_result = self.run_ffmpeg(filename, None, ['-hide_banner'], expected_retcodes=(1,))
45 mobj = re.search(size_regex, size_result)
46 if mobj is None:
47 return guess()
48 except PostProcessingError as err:
49 self.report_warning(f'unable to find the thumbnail resolution; {err}')
50 return guess()
51 return int(mobj.group('w')), int(mobj.group('h'))
53 def _report_run(self, exe, filename):
54 self.to_screen(f'{exe}: Adding thumbnail to "{filename}"')
56 @PostProcessor._restrict_to(images=False)
57 def run(self, info):
58 filename = info['filepath']
59 temp_filename = prepend_extension(filename, 'temp')
61 if not info.get('thumbnails'):
62 self.to_screen('There aren\'t any thumbnails to embed')
63 return [], info
65 idx = next((-i for i, t in enumerate(info['thumbnails'][::-1], 1) if t.get('filepath')), None)
66 if idx is None:
67 self.to_screen('There are no thumbnails on disk')
68 return [], info
69 thumbnail_filename = info['thumbnails'][idx]['filepath']
70 if not os.path.exists(thumbnail_filename):
71 self.report_warning('Skipping embedding the thumbnail because the file is missing.')
72 return [], info
74 # Correct extension for WebP file with wrong extension (see #25687, #25717)
75 convertor = FFmpegThumbnailsConvertorPP(self._downloader)
76 convertor.fixup_webp(info, idx)
78 original_thumbnail = thumbnail_filename = info['thumbnails'][idx]['filepath']
80 # Convert unsupported thumbnail formats (see #25687, #25717)
81 # PNG is preferred since JPEG is lossy
82 thumbnail_ext = os.path.splitext(thumbnail_filename)[1][1:]
83 if info['ext'] not in ('mkv', 'mka') and thumbnail_ext not in ('jpg', 'jpeg', 'png'):
84 thumbnail_filename = convertor.convert_thumbnail(thumbnail_filename, 'png')
85 thumbnail_ext = 'png'
87 mtime = os.stat(filename).st_mtime
89 success = True
90 if info['ext'] == 'mp3':
91 options = [
92 '-c', 'copy', '-map', '0:0', '-map', '1:0', '-write_id3v1', '1', '-id3v2_version', '3',
93 '-metadata:s:v', 'title="Album cover"', '-metadata:s:v', 'comment=Cover (front)']
95 self._report_run('ffmpeg', filename)
96 self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
98 elif info['ext'] in ['mkv', 'mka']:
99 options = list(self.stream_copy_opts())
101 mimetype = f'image/{thumbnail_ext.replace("jpg", "jpeg")}'
102 old_stream, new_stream = self.get_stream_number(
103 filename, ('tags', 'mimetype'), mimetype)
104 if old_stream is not None:
105 options.extend(['-map', f'-0:{old_stream}'])
106 new_stream -= 1
107 options.extend([
108 '-attach', self._ffmpeg_filename_argument(thumbnail_filename),
109 f'-metadata:s:{new_stream}', f'mimetype={mimetype}',
110 f'-metadata:s:{new_stream}', f'filename=cover.{thumbnail_ext}'])
112 self._report_run('ffmpeg', filename)
113 self.run_ffmpeg(filename, temp_filename, options)
115 elif info['ext'] in ['m4a', 'mp4', 'm4v', 'mov']:
116 prefer_atomicparsley = 'embed-thumbnail-atomicparsley' in self.get_param('compat_opts', [])
117 # Method 1: Use mutagen
118 if not mutagen or prefer_atomicparsley:
119 success = False
120 else:
121 self._report_run('mutagen', filename)
122 f = {'jpeg': MP4Cover.FORMAT_JPEG, 'png': MP4Cover.FORMAT_PNG}
123 try:
124 with open(thumbnail_filename, 'rb') as thumbfile:
125 thumb_data = thumbfile.read()
127 type_ = imghdr.what(h=thumb_data)
128 if not type_:
129 raise ValueError('could not determine image type')
130 elif type_ not in f:
131 raise ValueError(f'incompatible image type: {type_}')
133 meta = MP4(filename)
134 # NOTE: the 'covr' atom is a non-standard MPEG-4 atom,
135 # Apple iTunes 'M4A' files include the 'moov.udta.meta.ilst' atom.
136 meta.tags['covr'] = [MP4Cover(data=thumb_data, imageformat=f[type_])]
137 meta.save()
138 temp_filename = filename
139 except Exception as err:
140 self.report_warning(f'unable to embed using mutagen; {err}')
141 success = False
143 # Method 2: Use AtomicParsley
144 if not success:
145 success = True
146 atomicparsley = next((
147 # libatomicparsley.so : See https://github.com/xibr/ytdlp-lazy/issues/1
148 x for x in ['AtomicParsley', 'atomicparsley', 'libatomicparsley.so']
149 if check_executable(x, ['-v'])), None)
150 if atomicparsley is None:
151 self.to_screen('Neither mutagen nor AtomicParsley was found. Falling back to ffmpeg')
152 success = False
153 else:
154 if not prefer_atomicparsley:
155 self.to_screen('mutagen was not found. Falling back to AtomicParsley')
156 cmd = [atomicparsley,
157 filename,
158 encodeArgument('--artwork'),
159 thumbnail_filename,
160 encodeArgument('-o'),
161 temp_filename]
162 cmd += [encodeArgument(o) for o in self._configuration_args('AtomicParsley')]
164 self._report_run('atomicparsley', filename)
165 self.write_debug(f'AtomicParsley command line: {shell_quote(cmd)}')
166 stdout, stderr, returncode = Popen.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
167 if returncode:
168 self.report_warning(f'Unable to embed thumbnails using AtomicParsley; {stderr.strip()}')
169 success = False
170 # for formats that don't support thumbnails (like 3gp) AtomicParsley
171 # won't create to the temporary file
172 elif 'No changes' in stdout:
173 self.report_warning('The file format doesn\'t support embedding a thumbnail')
174 success = False
176 # Method 3: Use ffmpeg+ffprobe
177 # Thumbnails attached using this method doesn't show up as cover in some cases
178 # See https://github.com/yt-dlp/yt-dlp/issues/2125, https://github.com/yt-dlp/yt-dlp/issues/411
179 if not success:
180 success = True
181 try:
182 options = [*self.stream_copy_opts(), '-map', '1']
184 old_stream, new_stream = self.get_stream_number(
185 filename, ('disposition', 'attached_pic'), 1)
186 if old_stream is not None:
187 options.extend(['-map', f'-0:{old_stream}'])
188 new_stream -= 1
189 options.extend([f'-disposition:{new_stream}', 'attached_pic'])
191 self._report_run('ffmpeg', filename)
192 self.run_ffmpeg_multiple_files([filename, thumbnail_filename], temp_filename, options)
193 except PostProcessingError as err:
194 success = False
195 raise EmbedThumbnailPPError(f'Unable to embed using ffprobe & ffmpeg; {err}')
197 elif info['ext'] in ['ogg', 'opus', 'flac']:
198 if not mutagen:
199 raise EmbedThumbnailPPError('module mutagen was not found. Please install using `python3 -m pip install mutagen`')
201 self._report_run('mutagen', filename)
202 f = {'opus': OggOpus, 'flac': FLAC, 'ogg': OggVorbis}[info['ext']](filename)
204 pic = Picture()
205 pic.mime = f'image/{imghdr.what(thumbnail_filename)}'
206 with open(thumbnail_filename, 'rb') as thumbfile:
207 pic.data = thumbfile.read()
208 pic.type = 3 # front cover
209 res = self._get_thumbnail_resolution(thumbnail_filename, info['thumbnails'][idx])
210 if res is not None:
211 pic.width, pic.height = res
213 if info['ext'] == 'flac':
214 f.add_picture(pic)
215 else:
216 # https://wiki.xiph.org/VorbisComment#METADATA_BLOCK_PICTURE
217 f['METADATA_BLOCK_PICTURE'] = base64.b64encode(pic.write()).decode('ascii')
218 f.save()
219 temp_filename = filename
221 else:
222 raise EmbedThumbnailPPError('Supported filetypes for thumbnail embedding are: mp3, mkv/mka, ogg/opus/flac, m4a/mp4/m4v/mov')
224 if success and temp_filename != filename:
225 os.replace(temp_filename, filename)
227 self.try_utime(filename, mtime, mtime)
228 converted = original_thumbnail != thumbnail_filename
229 self._delete_downloaded_files(
230 thumbnail_filename if converted or not self._already_have_thumbnail else None,
231 original_thumbnail if converted and not self._already_have_thumbnail else None,
232 info=info)
233 return [], info