[ie/dropout] Fix extraction (#12102)
[yt-dlp.git] / yt_dlp / extractor / twitter.py
blobc05b5bf9cb66cc55b72b8fe61fc1d0e40d81fc7b
1 import functools
2 import json
3 import random
4 import re
5 import urllib.parse
7 from .common import InfoExtractor
8 from .periscope import PeriscopeBaseIE, PeriscopeIE
9 from ..networking.exceptions import HTTPError
10 from ..utils import (
11 ExtractorError,
12 dict_get,
13 filter_dict,
14 float_or_none,
15 format_field,
16 int_or_none,
17 join_nonempty,
18 make_archive_id,
19 remove_end,
20 str_or_none,
21 strip_or_none,
22 traverse_obj,
23 try_call,
24 try_get,
25 unified_timestamp,
26 update_url_query,
27 url_or_none,
28 xpath_text,
32 class TwitterBaseIE(InfoExtractor):
33 _NETRC_MACHINE = 'twitter'
34 _API_BASE = 'https://api.x.com/1.1/'
35 _GRAPHQL_API_BASE = 'https://x.com/i/api/graphql/'
36 _BASE_REGEX = r'https?://(?:(?:www|m(?:obile)?)\.)?(?:(?:twitter|x)\.com|twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid\.onion)/'
37 _AUTH = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA'
38 _LEGACY_AUTH = 'AAAAAAAAAAAAAAAAAAAAAIK1zgAAAAAA2tUWuhGZ2JceoId5GwYWU5GspY4%3DUq7gzFoCZs1QfwGoVdvSac3IniczZEYXIcDyumCauIXpcAPorE'
39 _flow_token = None
41 _LOGIN_INIT_DATA = json.dumps({
42 'input_flow_data': {
43 'flow_context': {
44 'debug_overrides': {},
45 'start_location': {
46 'location': 'unknown',
50 'subtask_versions': {
51 'action_list': 2,
52 'alert_dialog': 1,
53 'app_download_cta': 1,
54 'check_logged_in_account': 1,
55 'choice_selection': 3,
56 'contacts_live_sync_permission_prompt': 0,
57 'cta': 7,
58 'email_verification': 2,
59 'end_flow': 1,
60 'enter_date': 1,
61 'enter_email': 2,
62 'enter_password': 5,
63 'enter_phone': 2,
64 'enter_recaptcha': 1,
65 'enter_text': 5,
66 'enter_username': 2,
67 'generic_urt': 3,
68 'in_app_notification': 1,
69 'interest_picker': 3,
70 'js_instrumentation': 1,
71 'menu_dialog': 1,
72 'notifications_permission_prompt': 2,
73 'open_account': 2,
74 'open_home_timeline': 1,
75 'open_link': 1,
76 'phone_verification': 4,
77 'privacy_options': 1,
78 'security_key': 3,
79 'select_avatar': 4,
80 'select_banner': 2,
81 'settings_list': 7,
82 'show_code': 1,
83 'sign_up': 2,
84 'sign_up_review': 4,
85 'tweet_selection_urt': 1,
86 'update_users': 1,
87 'upload_media': 1,
88 'user_recommendations_list': 4,
89 'user_recommendations_urt': 1,
90 'wait_spinner': 3,
91 'web_modal': 1,
93 }, separators=(',', ':')).encode()
95 def _extract_variant_formats(self, variant, video_id):
96 variant_url = variant.get('url')
97 if not variant_url:
98 return [], {}
99 elif '.m3u8' in variant_url:
100 fmts, subs = self._extract_m3u8_formats_and_subtitles(
101 variant_url, video_id, 'mp4', 'm3u8_native',
102 m3u8_id='hls', fatal=False)
103 for f in traverse_obj(fmts, lambda _, v: v['vcodec'] == 'none' and v.get('tbr') is None):
104 if mobj := re.match(r'hls-[Aa]udio-(?P<bitrate>\d{4,})', f['format_id']):
105 f['tbr'] = int_or_none(mobj.group('bitrate'), 1000)
106 return fmts, subs
107 else:
108 tbr = int_or_none(dict_get(variant, ('bitrate', 'bit_rate')), 1000) or None
109 f = {
110 'url': variant_url,
111 'format_id': join_nonempty('http', tbr),
112 'tbr': tbr,
114 self._search_dimensions_in_video_url(f, variant_url)
115 return [f], {}
117 def _extract_formats_from_vmap_url(self, vmap_url, video_id):
118 vmap_url = url_or_none(vmap_url)
119 if not vmap_url:
120 return [], {}
121 vmap_data = self._download_xml(vmap_url, video_id)
122 formats = []
123 subtitles = {}
124 urls = []
125 for video_variant in vmap_data.findall('.//{http://twitter.com/schema/videoVMapV2.xsd}videoVariant'):
126 video_variant.attrib['url'] = urllib.parse.unquote(
127 video_variant.attrib['url'])
128 urls.append(video_variant.attrib['url'])
129 fmts, subs = self._extract_variant_formats(
130 video_variant.attrib, video_id)
131 formats.extend(fmts)
132 subtitles = self._merge_subtitles(subtitles, subs)
133 video_url = strip_or_none(xpath_text(vmap_data, './/MediaFile'))
134 if video_url not in urls:
135 fmts, subs = self._extract_variant_formats({'url': video_url}, video_id)
136 formats.extend(fmts)
137 subtitles = self._merge_subtitles(subtitles, subs)
138 return formats, subtitles
140 @staticmethod
141 def _search_dimensions_in_video_url(a_format, video_url):
142 m = re.search(r'/(?P<width>\d+)x(?P<height>\d+)/', video_url)
143 if m:
144 a_format.update({
145 'width': int(m.group('width')),
146 'height': int(m.group('height')),
149 @property
150 def is_logged_in(self):
151 return bool(self._get_cookies(self._API_BASE).get('auth_token'))
153 @functools.cached_property
154 def _selected_api(self):
155 return self._configuration_arg('api', ['graphql'], ie_key='Twitter')[0]
157 def _fetch_guest_token(self, display_id):
158 guest_token = traverse_obj(self._download_json(
159 f'{self._API_BASE}guest/activate.json', display_id, 'Downloading guest token', data=b'',
160 headers=self._set_base_headers(legacy=display_id and self._selected_api == 'legacy')),
161 ('guest_token', {str}))
162 if not guest_token:
163 raise ExtractorError('Could not retrieve guest token')
164 return guest_token
166 def _set_base_headers(self, legacy=False):
167 bearer_token = self._LEGACY_AUTH if legacy and not self.is_logged_in else self._AUTH
168 return filter_dict({
169 'Authorization': f'Bearer {bearer_token}',
170 'x-csrf-token': try_call(lambda: self._get_cookies(self._API_BASE)['ct0'].value),
173 def _call_login_api(self, note, headers, query={}, data=None):
174 response = self._download_json(
175 f'{self._API_BASE}onboarding/task.json', None, note,
176 headers=headers, query=query, data=data, expected_status=400)
177 error = traverse_obj(response, ('errors', 0, 'message', {str}))
178 if error:
179 raise ExtractorError(f'Login failed, Twitter API says: {error}', expected=True)
180 elif traverse_obj(response, 'status') != 'success':
181 raise ExtractorError('Login was unsuccessful')
183 subtask = traverse_obj(
184 response, ('subtasks', ..., 'subtask_id', {str}), get_all=False)
185 if not subtask:
186 raise ExtractorError('Twitter API did not return next login subtask')
188 self._flow_token = response['flow_token']
190 return subtask
192 def _perform_login(self, username, password):
193 if self.is_logged_in:
194 return
196 guest_token = self._fetch_guest_token(None)
197 headers = {
198 **self._set_base_headers(),
199 'content-type': 'application/json',
200 'x-guest-token': guest_token,
201 'x-twitter-client-language': 'en',
202 'x-twitter-active-user': 'yes',
203 'Referer': 'https://x.com/',
204 'Origin': 'https://x.com',
207 def build_login_json(*subtask_inputs):
208 return json.dumps({
209 'flow_token': self._flow_token,
210 'subtask_inputs': subtask_inputs,
211 }, separators=(',', ':')).encode()
213 def input_dict(subtask_id, text):
214 return {
215 'subtask_id': subtask_id,
216 'enter_text': {
217 'text': text,
218 'link': 'next_link',
222 next_subtask = self._call_login_api(
223 'Downloading flow token', headers, query={'flow_name': 'login'}, data=self._LOGIN_INIT_DATA)
225 while not self.is_logged_in:
226 if next_subtask == 'LoginJsInstrumentationSubtask':
227 next_subtask = self._call_login_api(
228 'Submitting JS instrumentation response', headers, data=build_login_json({
229 'subtask_id': next_subtask,
230 'js_instrumentation': {
231 'response': '{}',
232 'link': 'next_link',
236 elif next_subtask == 'LoginEnterUserIdentifierSSO':
237 next_subtask = self._call_login_api(
238 'Submitting username', headers, data=build_login_json({
239 'subtask_id': next_subtask,
240 'settings_list': {
241 'setting_responses': [{
242 'key': 'user_identifier',
243 'response_data': {
244 'text_data': {
245 'result': username,
249 'link': 'next_link',
253 elif next_subtask == 'LoginEnterAlternateIdentifierSubtask':
254 next_subtask = self._call_login_api(
255 'Submitting alternate identifier', headers,
256 data=build_login_json(input_dict(next_subtask, self._get_tfa_info(
257 'one of username, phone number or email that was not used as --username'))))
259 elif next_subtask == 'LoginEnterPassword':
260 next_subtask = self._call_login_api(
261 'Submitting password', headers, data=build_login_json({
262 'subtask_id': next_subtask,
263 'enter_password': {
264 'password': password,
265 'link': 'next_link',
269 elif next_subtask == 'AccountDuplicationCheck':
270 next_subtask = self._call_login_api(
271 'Submitting account duplication check', headers, data=build_login_json({
272 'subtask_id': next_subtask,
273 'check_logged_in_account': {
274 'link': 'AccountDuplicationCheck_false',
278 elif next_subtask == 'LoginTwoFactorAuthChallenge':
279 next_subtask = self._call_login_api(
280 'Submitting 2FA token', headers, data=build_login_json(input_dict(
281 next_subtask, self._get_tfa_info('two-factor authentication token'))))
283 elif next_subtask == 'LoginAcid':
284 next_subtask = self._call_login_api(
285 'Submitting confirmation code', headers, data=build_login_json(input_dict(
286 next_subtask, self._get_tfa_info('confirmation code sent to your email or phone'))))
288 elif next_subtask == 'ArkoseLogin':
289 self.raise_login_required('Twitter is requiring captcha for this login attempt', method='cookies')
291 elif next_subtask == 'DenyLoginSubtask':
292 self.raise_login_required('Twitter rejected this login attempt as suspicious', method='cookies')
294 elif next_subtask == 'LoginSuccessSubtask':
295 raise ExtractorError('Twitter API did not grant auth token cookie')
297 else:
298 raise ExtractorError(f'Unrecognized subtask ID "{next_subtask}"')
300 self.report_login()
302 def _call_api(self, path, video_id, query={}, graphql=False):
303 headers = self._set_base_headers(legacy=not graphql and self._selected_api == 'legacy')
304 headers.update({
305 'x-twitter-auth-type': 'OAuth2Session',
306 'x-twitter-client-language': 'en',
307 'x-twitter-active-user': 'yes',
308 } if self.is_logged_in else {
309 'x-guest-token': self._fetch_guest_token(video_id),
311 allowed_status = {400, 401, 403, 404} if graphql else {403}
312 result = self._download_json(
313 (self._GRAPHQL_API_BASE if graphql else self._API_BASE) + path,
314 video_id, headers=headers, query=query, expected_status=allowed_status,
315 note=f'Downloading {"GraphQL" if graphql else "legacy API"} JSON')
317 if result.get('errors'):
318 errors = ', '.join(set(traverse_obj(result, ('errors', ..., 'message', {str}))))
319 if errors and 'not authorized' in errors:
320 self.raise_login_required(remove_end(errors, '.'))
321 raise ExtractorError(f'Error(s) while querying API: {errors or "Unknown error"}')
323 return result
325 def _build_graphql_query(self, media_id):
326 raise NotImplementedError('Method must be implemented to support GraphQL')
328 def _call_graphql_api(self, endpoint, media_id):
329 data = self._build_graphql_query(media_id)
330 query = {key: json.dumps(value, separators=(',', ':')) for key, value in data.items()}
331 return traverse_obj(self._call_api(endpoint, media_id, query=query, graphql=True), 'data')
334 class TwitterCardIE(InfoExtractor):
335 IE_NAME = 'twitter:card'
336 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/(?:cards/tfw/v1|videos(?:/tweet)?)/(?P<id>\d+)'
337 _TESTS = [
339 'url': 'https://twitter.com/i/cards/tfw/v1/560070183650213889',
340 # MD5 checksums are different in different places
341 'info_dict': {
342 'id': '560070131976392705',
343 'ext': 'mp4',
344 'title': "Twitter - You can now shoot, edit and share video on Twitter. Capture life's most moving moments from your perspective.",
345 'description': 'md5:18d3e24bb4f6e5007487dd546e53bd96',
346 'uploader': 'Twitter',
347 'uploader_id': 'Twitter',
348 'thumbnail': r're:^https?://.*\.jpg',
349 'duration': 30.033,
350 'timestamp': 1422366112,
351 'upload_date': '20150127',
352 'age_limit': 0,
353 'comment_count': int,
354 'tags': [],
355 'repost_count': int,
356 'like_count': int,
357 'display_id': '560070183650213889',
358 'uploader_url': 'https://twitter.com/Twitter',
362 'url': 'https://twitter.com/i/cards/tfw/v1/623160978427936768',
363 'md5': '7137eca597f72b9abbe61e5ae0161399',
364 'info_dict': {
365 'id': '623160978427936768',
366 'ext': 'mp4',
367 'title': "NASA - Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video.",
368 'description': "Fly over Pluto's icy Norgay Mountains and Sputnik Plain in this @NASANewHorizons #PlutoFlyby video. https://t.co/BJYgOjSeGA",
369 'uploader': 'NASA',
370 'uploader_id': 'NASA',
371 'timestamp': 1437408129,
372 'upload_date': '20150720',
373 'uploader_url': 'https://twitter.com/NASA',
374 'age_limit': 0,
375 'comment_count': int,
376 'like_count': int,
377 'repost_count': int,
378 'tags': ['PlutoFlyby'],
380 'params': {'format': '[protocol=https]'},
383 'url': 'https://twitter.com/i/cards/tfw/v1/654001591733886977',
384 'md5': 'b6d9683dd3f48e340ded81c0e917ad46',
385 'info_dict': {
386 'id': 'dq4Oj5quskI',
387 'ext': 'mp4',
388 'title': 'Ubuntu 11.10 Overview',
389 'description': 'md5:a831e97fa384863d6e26ce48d1c43376',
390 'upload_date': '20111013',
391 'uploader': 'OMG! UBUNTU!',
392 'uploader_id': 'omgubuntu',
393 'channel_url': 'https://www.youtube.com/channel/UCIiSwcm9xiFb3Y4wjzR41eQ',
394 'channel_id': 'UCIiSwcm9xiFb3Y4wjzR41eQ',
395 'channel_follower_count': int,
396 'chapters': 'count:8',
397 'uploader_url': 'http://www.youtube.com/user/omgubuntu',
398 'duration': 138,
399 'categories': ['Film & Animation'],
400 'age_limit': 0,
401 'comment_count': int,
402 'availability': 'public',
403 'like_count': int,
404 'thumbnail': 'https://i.ytimg.com/vi/dq4Oj5quskI/maxresdefault.jpg',
405 'view_count': int,
406 'tags': 'count:12',
407 'channel': 'OMG! UBUNTU!',
408 'playable_in_embed': True,
410 'add_ie': ['Youtube'],
413 'url': 'https://twitter.com/i/videos/tweet/705235433198714880',
414 'md5': '884812a2adc8aaf6fe52b15ccbfa3b88',
415 'info_dict': {
416 'id': '705235433198714880',
417 'ext': 'mp4',
418 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
419 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
420 'uploader': 'Brent Yarina',
421 'uploader_id': 'BTNBrentYarina',
422 'timestamp': 1456976204,
423 'upload_date': '20160303',
425 'skip': 'This content is no longer available.',
428 'url': 'https://twitter.com/i/videos/752274308186120192',
429 'only_matching': True,
433 def _real_extract(self, url):
434 status_id = self._match_id(url)
435 return self.url_result(
436 'https://twitter.com/statuses/' + status_id,
437 TwitterIE.ie_key(), status_id)
440 class TwitterIE(TwitterBaseIE):
441 IE_NAME = 'twitter'
442 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'(?:(?:i/web|[^/]+)/status|statuses)/(?P<id>\d+)(?:/(?:video|photo)/(?P<index>\d+))?'
444 _TESTS = [{
445 'url': 'https://twitter.com/freethenipple/status/643211948184596480',
446 'info_dict': {
447 'id': '643211870443208704',
448 'display_id': '643211948184596480',
449 'ext': 'mp4',
450 'title': 'FREE THE NIPPLE - FTN supporters on Hollywood Blvd today!',
451 'thumbnail': r're:^https?://.*\.jpg',
452 'description': 'FTN supporters on Hollywood Blvd today! http://t.co/c7jHH749xJ',
453 'channel_id': '549749560',
454 'uploader': 'FREE THE NIPPLE',
455 'uploader_id': 'freethenipple',
456 'duration': 12.922,
457 'timestamp': 1442188653,
458 'upload_date': '20150913',
459 'uploader_url': 'https://twitter.com/freethenipple',
460 'comment_count': int,
461 'repost_count': int,
462 'like_count': int,
463 'tags': [],
464 'age_limit': 18,
465 '_old_archive_ids': ['twitter 643211948184596480'],
467 'skip': 'Requires authentication',
468 }, {
469 'url': 'https://twitter.com/giphz/status/657991469417025536/photo/1',
470 'md5': 'f36dcd5fb92bf7057f155e7d927eeb42',
471 'info_dict': {
472 'id': '657991469417025536',
473 'ext': 'mp4',
474 'title': 'Gifs - tu vai cai tu vai cai tu nao eh capaz disso tu vai cai',
475 'description': 'Gifs on Twitter: "tu vai cai tu vai cai tu nao eh capaz disso tu vai cai https://t.co/tM46VHFlO5"',
476 'thumbnail': r're:^https?://.*\.png',
477 'uploader': 'Gifs',
478 'uploader_id': 'giphz',
480 'expected_warnings': ['height', 'width'],
481 'skip': 'Account suspended',
482 }, {
483 'url': 'https://twitter.com/starwars/status/665052190608723968',
484 'info_dict': {
485 'id': '665052190608723968',
486 'display_id': '665052190608723968',
487 'ext': 'mp4',
488 'title': r're:Star Wars.*A new beginning is coming December 18.*',
489 'description': 'A new beginning is coming December 18. Watch the official 60 second #TV spot for #StarWars: #TheForceAwakens. https://t.co/OkSqT2fjWJ',
490 'channel_id': '20106852',
491 'uploader_id': 'starwars',
492 'uploader': r're:Star Wars.*',
493 'timestamp': 1447395772,
494 'upload_date': '20151113',
495 'uploader_url': 'https://twitter.com/starwars',
496 'comment_count': int,
497 'repost_count': int,
498 'like_count': int,
499 'tags': ['TV', 'StarWars', 'TheForceAwakens'],
500 'age_limit': 0,
501 '_old_archive_ids': ['twitter 665052190608723968'],
503 }, {
504 'url': 'https://twitter.com/BTNBrentYarina/status/705235433198714880',
505 'info_dict': {
506 'id': '705235433198714880',
507 'ext': 'mp4',
508 'title': "Brent Yarina - Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight.",
509 'description': "Khalil Iverson's missed highlight dunk. And made highlight dunk. In one highlight. https://t.co/OrxcJ28Bns",
510 'uploader_id': 'BTNBrentYarina',
511 'uploader': 'Brent Yarina',
512 'timestamp': 1456976204,
513 'upload_date': '20160303',
514 'uploader_url': 'https://twitter.com/BTNBrentYarina',
515 'comment_count': int,
516 'repost_count': int,
517 'like_count': int,
518 'tags': [],
519 'age_limit': 0,
521 'params': {
522 # The same video as https://twitter.com/i/videos/tweet/705235433198714880
523 # Test case of TwitterCardIE
524 'skip_download': True,
526 'skip': 'Dead external link',
527 }, {
528 'url': 'https://twitter.com/jaydingeer/status/700207533655363584',
529 'info_dict': {
530 'id': '700207414000242688',
531 'display_id': '700207533655363584',
532 'ext': 'mp4',
533 'title': 'jaydin donte geer - BEAT PROD: @suhmeduh #Damndaniel',
534 'description': 'BEAT PROD: @suhmeduh https://t.co/HBrQ4AfpvZ #Damndaniel https://t.co/byBooq2ejZ',
535 'thumbnail': r're:^https?://.*\.jpg',
536 'channel_id': '1383165541',
537 'uploader': 'jaydin donte geer',
538 'uploader_id': 'jaydingeer',
539 'duration': 30.0,
540 'timestamp': 1455777459,
541 'upload_date': '20160218',
542 'uploader_url': 'https://twitter.com/jaydingeer',
543 'comment_count': int,
544 'repost_count': int,
545 'like_count': int,
546 'tags': ['Damndaniel'],
547 'age_limit': 0,
548 '_old_archive_ids': ['twitter 700207533655363584'],
550 }, {
551 'url': 'https://twitter.com/captainamerica/status/719944021058060289',
552 'info_dict': {
553 'id': '717462543795523584',
554 'display_id': '719944021058060289',
555 'ext': 'mp4',
556 'title': 'Captain America - @King0fNerd Are you sure you made the right choice? Find out in theaters.',
557 'description': '@King0fNerd Are you sure you made the right choice? Find out in theaters. https://t.co/GpgYi9xMJI',
558 'channel_id': '701615052',
559 'uploader_id': 'CaptainAmerica',
560 'uploader': 'Captain America',
561 'duration': 3.17,
562 'timestamp': 1460483005,
563 'upload_date': '20160412',
564 'uploader_url': 'https://twitter.com/CaptainAmerica',
565 'thumbnail': r're:^https?://.*\.jpg',
566 'comment_count': int,
567 'repost_count': int,
568 'like_count': int,
569 'tags': [],
570 'age_limit': 0,
571 '_old_archive_ids': ['twitter 719944021058060289'],
573 }, {
574 'url': 'https://twitter.com/OPP_HSD/status/779210622571536384',
575 'info_dict': {
576 'id': '1zqKVVlkqLaKB',
577 'ext': 'mp4',
578 'title': 'Sgt Kerry Schmidt - Ontario Provincial Police - Road rage, mischief, assault, rollover and fire in one occurrence',
579 'upload_date': '20160923',
580 'uploader_id': '1PmKqpJdOJQoY',
581 'uploader': 'Sgt Kerry Schmidt - Ontario Provincial Police',
582 'timestamp': 1474613214,
583 'thumbnail': r're:^https?://.*\.jpg',
585 'add_ie': ['Periscope'],
586 'skip': 'Broadcast not found',
587 }, {
588 # has mp4 formats via mobile API
589 'url': 'https://twitter.com/news_al3alm/status/852138619213144067',
590 'info_dict': {
591 'id': '852077943283097602',
592 'ext': 'mp4',
593 'title': 'عالم الأخبار - كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة',
594 'description': 'كلمة تاريخية بجلسة الجناسي التاريخية.. النائب خالد مؤنس العتيبي للمعارضين : اتقوا الله .. الظلم ظلمات يوم القيامة https://t.co/xg6OhpyKfN',
595 'channel_id': '2526757026',
596 'uploader': 'عالم الأخبار',
597 'uploader_id': 'news_al3alm',
598 'duration': 277.4,
599 'timestamp': 1492000653,
600 'upload_date': '20170412',
601 'display_id': '852138619213144067',
602 'age_limit': 0,
603 'uploader_url': 'https://twitter.com/news_al3alm',
604 'thumbnail': r're:^https?://.*\.jpg',
605 'tags': [],
606 'repost_count': int,
607 'like_count': int,
608 'comment_count': int,
609 '_old_archive_ids': ['twitter 852138619213144067'],
611 }, {
612 'url': 'https://twitter.com/i/web/status/910031516746514432',
613 'info_dict': {
614 'id': '910030238373089285',
615 'display_id': '910031516746514432',
616 'ext': 'mp4',
617 'title': 'Préfet de Guadeloupe - [Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre.',
618 'thumbnail': r're:^https?://.*\.jpg',
619 'description': '[Direct] #Maria Le centre se trouve actuellement au sud de Basse-Terre. Restez confinés. Réfugiez-vous dans la pièce la + sûre. https://t.co/mwx01Rs4lo',
620 'channel_id': '2319432498',
621 'uploader': 'Préfet de Guadeloupe',
622 'uploader_id': 'Prefet971',
623 'duration': 47.48,
624 'timestamp': 1505803395,
625 'upload_date': '20170919',
626 'uploader_url': 'https://twitter.com/Prefet971',
627 'comment_count': int,
628 'repost_count': int,
629 'like_count': int,
630 'tags': ['Maria'],
631 'age_limit': 0,
632 '_old_archive_ids': ['twitter 910031516746514432'],
634 'params': {
635 'skip_download': True, # requires ffmpeg
637 }, {
638 # card via api.twitter.com/1.1/videos/tweet/config
639 'url': 'https://twitter.com/LisPower1/status/1001551623938805763',
640 'info_dict': {
641 'id': '1001551417340022785',
642 'display_id': '1001551623938805763',
643 'ext': 'mp4',
644 'title': 're:.*?Shep is on a roll today.*?',
645 'thumbnail': r're:^https?://.*\.jpg',
646 'description': 'md5:37b9f2ff31720cef23b2bd42ee8a0f09',
647 'channel_id': '255036353',
648 'uploader': 'Lis Power',
649 'uploader_id': 'LisPower1',
650 'duration': 111.278,
651 'timestamp': 1527623489,
652 'upload_date': '20180529',
653 'uploader_url': 'https://twitter.com/LisPower1',
654 'comment_count': int,
655 'repost_count': int,
656 'like_count': int,
657 'tags': [],
658 'age_limit': 0,
659 '_old_archive_ids': ['twitter 1001551623938805763'],
661 'params': {
662 'skip_download': True, # requires ffmpeg
664 }, {
665 'url': 'https://twitter.com/foobar/status/1087791357756956680',
666 'info_dict': {
667 'id': '1087791272830607360',
668 'display_id': '1087791357756956680',
669 'ext': 'mp4',
670 'title': 'X - A new is coming. Some of you got an opt-in to try it now. Check out the emoji button, quick keyboard shortcuts, upgraded trends, advanced search, and more. Let us know your thoughts!',
671 'thumbnail': r're:^https?://.*\.jpg',
672 'description': 'md5:6dfd341a3310fb97d80d2bf7145df976',
673 'uploader': 'X',
674 'uploader_id': 'X',
675 'duration': 61.567,
676 'timestamp': 1548184644,
677 'upload_date': '20190122',
678 'uploader_url': 'https://twitter.com/X',
679 'comment_count': int,
680 'repost_count': int,
681 'like_count': int,
682 'view_count': int,
683 'tags': [],
684 'age_limit': 0,
686 'skip': 'This Tweet is unavailable',
687 }, {
688 # not available in Periscope
689 'url': 'https://twitter.com/ViviEducation/status/1136534865145286656',
690 'info_dict': {
691 'id': '1vOGwqejwoWxB',
692 'ext': 'mp4',
693 'title': 'Vivi - Vivi founder @lior_rauchy announcing our new student feedback tool live at @EduTECH_AU #EduTECH2019',
694 'uploader': 'Vivi',
695 'uploader_id': '1eVjYOLGkGrQL',
696 'thumbnail': r're:^https?://.*\.jpg',
697 'tags': ['EduTECH2019'],
698 'view_count': int,
700 'add_ie': ['TwitterBroadcast'],
701 'skip': 'Broadcast no longer exists',
702 }, {
703 # unified card
704 'url': 'https://twitter.com/BrooklynNets/status/1349794411333394432?s=20',
705 'info_dict': {
706 'id': '1349774757969989634',
707 'display_id': '1349794411333394432',
708 'ext': 'mp4',
709 'title': 'md5:d1c4941658e4caaa6cb579260d85dcba',
710 'thumbnail': r're:^https?://.*\.jpg',
711 'description': 'md5:71ead15ec44cee55071547d6447c6a3e',
712 'channel_id': '18552281',
713 'uploader': 'Brooklyn Nets',
714 'uploader_id': 'BrooklynNets',
715 'duration': 324.484,
716 'timestamp': 1610651040,
717 'upload_date': '20210114',
718 'uploader_url': 'https://twitter.com/BrooklynNets',
719 'comment_count': int,
720 'repost_count': int,
721 'like_count': int,
722 'tags': [],
723 'age_limit': 0,
724 '_old_archive_ids': ['twitter 1349794411333394432'],
726 'params': {
727 'skip_download': True,
729 }, {
730 'url': 'https://twitter.com/oshtru/status/1577855540407197696',
731 'info_dict': {
732 'id': '1577855447914409984',
733 'display_id': '1577855540407197696',
734 'ext': 'mp4',
735 'title': 'md5:466a3a8b049b5f5a13164ce915484b51',
736 'description': 'md5:b9c3699335447391d11753ab21c70a74',
737 'upload_date': '20221006',
738 'channel_id': '143077138',
739 'uploader': 'Oshtru',
740 'uploader_id': 'oshtru',
741 'uploader_url': 'https://twitter.com/oshtru',
742 'thumbnail': r're:^https?://.*\.jpg',
743 'duration': 30.03,
744 'timestamp': 1665025050,
745 'comment_count': int,
746 'repost_count': int,
747 'like_count': int,
748 'tags': [],
749 'age_limit': 0,
750 '_old_archive_ids': ['twitter 1577855540407197696'],
752 'params': {'skip_download': True},
753 }, {
754 'url': 'https://twitter.com/UltimaShadowX/status/1577719286659006464',
755 'info_dict': {
756 'id': '1577719286659006464',
757 'title': 'Ultima Reload - Test',
758 'description': 'Test https://t.co/Y3KEZD7Dad',
759 'channel_id': '168922496',
760 'uploader': 'Ultima Reload',
761 'uploader_id': 'UltimaShadowX',
762 'uploader_url': 'https://twitter.com/UltimaShadowX',
763 'upload_date': '20221005',
764 'timestamp': 1664992565,
765 'comment_count': int,
766 'repost_count': int,
767 'like_count': int,
768 'tags': [],
769 'age_limit': 0,
771 'playlist_count': 4,
772 'params': {'skip_download': True},
773 }, {
774 'url': 'https://twitter.com/MesoMax919/status/1575560063510810624',
775 'info_dict': {
776 'id': '1575559336759263233',
777 'display_id': '1575560063510810624',
778 'ext': 'mp4',
779 'title': 'md5:eec26382babd0f7c18f041db8ae1c9c9',
780 'thumbnail': r're:^https?://.*\.jpg',
781 'description': 'md5:95aea692fda36a12081b9629b02daa92',
782 'channel_id': '1094109584',
783 'uploader': 'Max Olson',
784 'uploader_id': 'MesoMax919',
785 'uploader_url': 'https://twitter.com/MesoMax919',
786 'duration': 21.321,
787 'timestamp': 1664477766,
788 'upload_date': '20220929',
789 'comment_count': int,
790 'repost_count': int,
791 'like_count': int,
792 'tags': ['HurricaneIan'],
793 'age_limit': 0,
794 '_old_archive_ids': ['twitter 1575560063510810624'],
796 }, {
797 # Adult content, fails if not logged in
798 'url': 'https://twitter.com/Rizdraws/status/1575199173472927762',
799 'info_dict': {
800 'id': '1575199163847000068',
801 'display_id': '1575199173472927762',
802 'ext': 'mp4',
803 'title': str,
804 'description': str,
805 'channel_id': '1217167793541480450',
806 'uploader': str,
807 'uploader_id': 'Rizdraws',
808 'uploader_url': 'https://twitter.com/Rizdraws',
809 'upload_date': '20220928',
810 'timestamp': 1664391723,
811 'thumbnail': r're:^https?://.+\.jpg',
812 'like_count': int,
813 'repost_count': int,
814 'comment_count': int,
815 'age_limit': 18,
816 'tags': [],
817 '_old_archive_ids': ['twitter 1575199173472927762'],
819 'params': {'skip_download': 'The media could not be played'},
820 'skip': 'Requires authentication',
821 }, {
822 # Playlist result only with graphql API
823 'url': 'https://twitter.com/Srirachachau/status/1395079556562706435',
824 'playlist_mincount': 2,
825 'info_dict': {
826 'id': '1395079556562706435',
827 'title': str,
828 'tags': [],
829 'channel_id': '21539378',
830 'uploader': str,
831 'like_count': int,
832 'upload_date': '20210519',
833 'age_limit': 0,
834 'repost_count': int,
835 'description': 'Here it is! Finished my gothic western cartoon. Pretty proud of it. It\'s got some goofs and lots of splashy over the top violence, something for everyone, hope you like it https://t.co/fOsG5glUnw',
836 'uploader_id': 'Srirachachau',
837 'comment_count': int,
838 'uploader_url': 'https://twitter.com/Srirachachau',
839 'timestamp': 1621447860,
841 }, {
842 'url': 'https://twitter.com/DavidToons_/status/1578353380363501568',
843 'playlist_mincount': 2,
844 'info_dict': {
845 'id': '1578353380363501568',
846 'title': str,
847 'channel_id': '2195866214',
848 'uploader_id': 'DavidToons_',
849 'repost_count': int,
850 'like_count': int,
851 'uploader': str,
852 'timestamp': 1665143744,
853 'uploader_url': 'https://twitter.com/DavidToons_',
854 'description': 'Chris sounds like Linda from Bob\'s Burgers, so as an animator: this had to be done. https://t.co/WgJauwIW1w',
855 'tags': [],
856 'comment_count': int,
857 'upload_date': '20221007',
858 'age_limit': 0,
860 }, {
861 'url': 'https://twitter.com/primevideouk/status/1578401165338976258',
862 'playlist_count': 2,
863 'info_dict': {
864 'id': '1578401165338976258',
865 'title': str,
866 'description': 'md5:659a6b517a034b4cee5d795381a2dc41',
867 'channel_id': '19338359',
868 'uploader': str,
869 'uploader_id': 'primevideouk',
870 'timestamp': 1665155137,
871 'upload_date': '20221007',
872 'age_limit': 0,
873 'uploader_url': 'https://twitter.com/primevideouk',
874 'comment_count': int,
875 'repost_count': int,
876 'like_count': int,
877 'tags': ['TheRingsOfPower'],
879 }, {
880 # Twitter Spaces
881 'url': 'https://twitter.com/MoniqueCamarra/status/1550101959377551360',
882 'info_dict': {
883 'id': '1lPJqmBeeNAJb',
884 'ext': 'm4a',
885 'title': 'EuroFile@6 Ukraine Up-date-Draghi Defenestration-the West',
886 'uploader': r're:Monique Camarra.+?',
887 'uploader_id': 'MoniqueCamarra',
888 'live_status': 'was_live',
889 'release_timestamp': 1658417414,
890 'description': r're:Twitter Space participated by Sergej Sumlenny.+',
891 'timestamp': 1658407771,
892 'release_date': '20220721',
893 'upload_date': '20220721',
895 'add_ie': ['TwitterSpaces'],
896 'params': {'skip_download': 'm3u8'},
897 }, {
898 # URL specifies video number but --yes-playlist
899 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/1',
900 'playlist_mincount': 2,
901 'info_dict': {
902 'id': '1600649710662213632',
903 'title': 'md5:be05989b0722e114103ed3851a0ffae2',
904 'timestamp': 1670459604.0,
905 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
906 'comment_count': int,
907 'uploader_id': 'CTVJLaidlaw',
908 'channel_id': '80082014',
909 'repost_count': int,
910 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
911 'upload_date': '20221208',
912 'age_limit': 0,
913 'uploader': 'Jocelyn Laidlaw',
914 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
915 'like_count': int,
917 }, {
918 # URL specifies video number and --no-playlist
919 'url': 'https://twitter.com/CTVJLaidlaw/status/1600649710662213632/video/2',
920 'info_dict': {
921 'id': '1600649511827013632',
922 'ext': 'mp4',
923 'title': 'md5:7662a0a27ce6faa3e5b160340f3cfab1',
924 'thumbnail': r're:^https?://.+\.jpg',
925 'timestamp': 1670459604.0,
926 'channel_id': '80082014',
927 'uploader_id': 'CTVJLaidlaw',
928 'uploader': 'Jocelyn Laidlaw',
929 'repost_count': int,
930 'comment_count': int,
931 'tags': ['colorectalcancer', 'cancerjourney', 'imnotaquitter'],
932 'duration': 102.226,
933 'uploader_url': 'https://twitter.com/CTVJLaidlaw',
934 'display_id': '1600649710662213632',
935 'like_count': int,
936 'description': 'md5:591c19ce66fadc2359725d5cd0d1052c',
937 'upload_date': '20221208',
938 'age_limit': 0,
939 '_old_archive_ids': ['twitter 1600649710662213632'],
941 'params': {'noplaylist': True},
942 }, {
943 # id pointing to TweetWithVisibilityResults type entity which wraps the actual Tweet over
944 # note the id different between extraction and url
945 'url': 'https://twitter.com/s2FAKER/status/1621117700482416640',
946 'info_dict': {
947 'id': '1621117577354424321',
948 'display_id': '1621117700482416640',
949 'ext': 'mp4',
950 'title': '뽀 - 아 최우제 이동속도 봐',
951 'description': '아 최우제 이동속도 봐 https://t.co/dxu2U5vXXB',
952 'duration': 24.598,
953 'channel_id': '1281839411068432384',
954 'uploader': '뽀',
955 'uploader_id': 's2FAKER',
956 'uploader_url': 'https://twitter.com/s2FAKER',
957 'upload_date': '20230202',
958 'timestamp': 1675339553.0,
959 'thumbnail': r're:https?://pbs\.twimg\.com/.+',
960 'age_limit': 18,
961 'tags': [],
962 'like_count': int,
963 'repost_count': int,
964 'comment_count': int,
965 '_old_archive_ids': ['twitter 1621117700482416640'],
967 'skip': 'Requires authentication',
968 }, {
969 'url': 'https://twitter.com/hlo_again/status/1599108751385972737/video/2',
970 'info_dict': {
971 'id': '1599108643743473680',
972 'display_id': '1599108751385972737',
973 'ext': 'mp4',
974 'title': '\u06ea - \U0001F48B',
975 'channel_id': '1347791436809441283',
976 'uploader_url': 'https://twitter.com/hlo_again',
977 'like_count': int,
978 'uploader_id': 'hlo_again',
979 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1599108643743473680/pu/img/UG3xjov4rgg5sbYM.jpg?name=orig',
980 'repost_count': int,
981 'duration': 9.531,
982 'comment_count': int,
983 'upload_date': '20221203',
984 'age_limit': 0,
985 'timestamp': 1670092210.0,
986 'tags': [],
987 'uploader': '\u06ea',
988 'description': '\U0001F48B https://t.co/bTj9Qz7vQP',
989 '_old_archive_ids': ['twitter 1599108751385972737'],
991 'params': {'noplaylist': True},
992 }, {
993 'url': 'https://twitter.com/MunTheShinobi/status/1600009574919962625',
994 'info_dict': {
995 'id': '1600009362759733248',
996 'display_id': '1600009574919962625',
997 'ext': 'mp4',
998 'channel_id': '211814412',
999 'uploader_url': 'https://twitter.com/MunTheShinobi',
1000 'description': 'This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525 https://t.co/cNsA0MoOml',
1001 'thumbnail': 'https://pbs.twimg.com/ext_tw_video_thumb/1600009362759733248/pu/img/XVhFQivj75H_YxxV.jpg?name=orig',
1002 'age_limit': 0,
1003 'uploader': 'Mün',
1004 'repost_count': int,
1005 'upload_date': '20221206',
1006 'title': 'Mün - This is a genius ad by Apple. \U0001f525\U0001f525\U0001f525\U0001f525\U0001f525',
1007 'comment_count': int,
1008 'like_count': int,
1009 'tags': [],
1010 'uploader_id': 'MunTheShinobi',
1011 'duration': 139.987,
1012 'timestamp': 1670306984.0,
1013 '_old_archive_ids': ['twitter 1600009574919962625'],
1015 }, {
1016 # retweeted_status (private)
1017 'url': 'https://twitter.com/liberdalau/status/1623739803874349067',
1018 'info_dict': {
1019 'id': '1623274794488659969',
1020 'display_id': '1623739803874349067',
1021 'ext': 'mp4',
1022 'title': 'Johnny Bullets - Me after going viral to over 30million people: Whoopsie-daisy',
1023 'description': 'md5:b06864cd3dc2554821cc327f5348485a',
1024 'uploader': 'Johnny Bullets',
1025 'uploader_id': 'Johnnybull3ts',
1026 'uploader_url': 'https://twitter.com/Johnnybull3ts',
1027 'age_limit': 0,
1028 'tags': [],
1029 'duration': 8.033,
1030 'timestamp': 1675853859.0,
1031 'upload_date': '20230208',
1032 'thumbnail': r're:https://pbs\.twimg\.com/ext_tw_video_thumb/.+',
1033 'like_count': int,
1034 'repost_count': int,
1036 'skip': 'Protected tweet',
1037 }, {
1038 # retweeted_status
1039 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1040 'info_dict': {
1041 'id': '1694928337846538240',
1042 'ext': 'mp4',
1043 'display_id': '1695424220702888009',
1044 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1045 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1046 'channel_id': '15212187',
1047 'uploader': 'Benny Johnson',
1048 'uploader_id': 'bennyjohnson',
1049 'uploader_url': 'https://twitter.com/bennyjohnson',
1050 'age_limit': 0,
1051 'tags': [],
1052 'duration': 45.001,
1053 'timestamp': 1692962814.0,
1054 'upload_date': '20230825',
1055 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1056 'like_count': int,
1057 'repost_count': int,
1058 'comment_count': int,
1059 '_old_archive_ids': ['twitter 1695424220702888009'],
1061 }, {
1062 # retweeted_status w/ legacy API
1063 'url': 'https://twitter.com/playstrumpcard/status/1695424220702888009',
1064 'info_dict': {
1065 'id': '1694928337846538240',
1066 'ext': 'mp4',
1067 'display_id': '1695424220702888009',
1068 'title': 'md5:e8daa9527bc2b947121395494f786d9d',
1069 'description': 'md5:004f2d37fd58737724ec75bc7e679938',
1070 'channel_id': '15212187',
1071 'uploader': 'Benny Johnson',
1072 'uploader_id': 'bennyjohnson',
1073 'uploader_url': 'https://twitter.com/bennyjohnson',
1074 'age_limit': 0,
1075 'tags': [],
1076 'duration': 45.001,
1077 'timestamp': 1692962814.0,
1078 'upload_date': '20230825',
1079 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1080 'like_count': int,
1081 'repost_count': int,
1082 '_old_archive_ids': ['twitter 1695424220702888009'],
1084 'params': {'extractor_args': {'twitter': {'api': ['legacy']}}},
1085 }, {
1086 # Broadcast embedded in tweet
1087 'url': 'https://twitter.com/JessicaDobsonWX/status/1731121063248175384',
1088 'info_dict': {
1089 'id': '1rmxPMjLzAXKN',
1090 'ext': 'mp4',
1091 'title': 'WAVE Weather Now - Saturday 12/2/23 Update',
1092 'uploader': 'Jessica Dobson',
1093 'uploader_id': 'JessicaDobsonWX',
1094 'uploader_url': 'https://twitter.com/JessicaDobsonWX',
1095 'timestamp': 1701566398,
1096 'upload_date': '20231203',
1097 'live_status': 'was_live',
1098 'thumbnail': r're:https://[^/]+pscp\.tv/.+\.jpg',
1099 'concurrent_view_count': int,
1100 'view_count': int,
1102 'add_ie': ['TwitterBroadcast'],
1103 }, {
1104 # Animated gif and quote tweet video
1105 'url': 'https://twitter.com/BAKKOOONN/status/1696256659889565950',
1106 'playlist_mincount': 2,
1107 'info_dict': {
1108 'id': '1696256659889565950',
1109 'title': 'BAKOON - https://t.co/zom968d0a0',
1110 'description': 'https://t.co/zom968d0a0',
1111 'tags': [],
1112 'channel_id': '1263540390',
1113 'uploader': 'BAKOON',
1114 'uploader_id': 'BAKKOOONN',
1115 'uploader_url': 'https://twitter.com/BAKKOOONN',
1116 'age_limit': 18,
1117 'timestamp': 1693254077.0,
1118 'upload_date': '20230828',
1119 'like_count': int,
1120 'comment_count': int,
1121 'repost_count': int,
1123 'skip': 'Requires authentication',
1124 }, {
1125 # "stale tweet" with typename "TweetWithVisibilityResults"
1126 'url': 'https://twitter.com/RobertKennedyJr/status/1724884212803834154',
1127 'md5': '511377ff8dfa7545307084dca4dce319',
1128 'info_dict': {
1129 'id': '1724883339285544960',
1130 'ext': 'mp4',
1131 'title': 'md5:cc56716f9ed0b368de2ba54c478e493c',
1132 'description': 'md5:9dc14f5b0f1311fc7caf591ae253a164',
1133 'display_id': '1724884212803834154',
1134 'channel_id': '337808606',
1135 'uploader': 'Robert F. Kennedy Jr',
1136 'uploader_id': 'RobertKennedyJr',
1137 'uploader_url': 'https://twitter.com/RobertKennedyJr',
1138 'upload_date': '20231115',
1139 'timestamp': 1700079417.0,
1140 'duration': 341.048,
1141 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1142 'tags': ['Kennedy24'],
1143 'repost_count': int,
1144 'like_count': int,
1145 'comment_count': int,
1146 'age_limit': 0,
1147 '_old_archive_ids': ['twitter 1724884212803834154'],
1149 }, {
1150 # x.com
1151 'url': 'https://x.com/historyinmemes/status/1790637656616943991',
1152 'md5': 'daca3952ba0defe2cfafb1276d4c1ea5',
1153 'info_dict': {
1154 'id': '1790637589910654976',
1155 'ext': 'mp4',
1156 'title': 'Historic Vids - One of the most intense moments in history',
1157 'description': 'One of the most intense moments in history https://t.co/Zgzhvix8ES',
1158 'display_id': '1790637656616943991',
1159 'uploader': 'Historic Vids',
1160 'uploader_id': 'historyinmemes',
1161 'uploader_url': 'https://twitter.com/historyinmemes',
1162 'channel_id': '855481986290524160',
1163 'upload_date': '20240515',
1164 'timestamp': 1715756260.0,
1165 'duration': 15.488,
1166 'tags': [],
1167 'comment_count': int,
1168 'repost_count': int,
1169 'like_count': int,
1170 'thumbnail': r're:https://pbs\.twimg\.com/amplify_video_thumb/.+',
1171 'age_limit': 0,
1172 '_old_archive_ids': ['twitter 1790637656616943991'],
1174 }, {
1175 # onion route
1176 'url': 'https://twitter3e4tixl4xyajtrzo62zg5vztmjuricljdp2c5kshju4avyoid.onion/TwitterBlue/status/1484226494708662273',
1177 'only_matching': True,
1178 }, {
1179 # Twitch Clip Embed
1180 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1181 'only_matching': True,
1182 }, {
1183 # promo_video_website card
1184 'url': 'https://twitter.com/GunB1g/status/1163218564784017422',
1185 'only_matching': True,
1186 }, {
1187 # promo_video_convo card
1188 'url': 'https://twitter.com/poco_dandy/status/1047395834013384704',
1189 'only_matching': True,
1190 }, {
1191 # appplayer card
1192 'url': 'https://twitter.com/poco_dandy/status/1150646424461176832',
1193 'only_matching': True,
1194 }, {
1195 # video_direct_message card
1196 'url': 'https://twitter.com/qarev001/status/1348948114569269251',
1197 'only_matching': True,
1198 }, {
1199 # poll2choice_video card
1200 'url': 'https://twitter.com/CAF_Online/status/1349365911120195585',
1201 'only_matching': True,
1202 }, {
1203 # poll3choice_video card
1204 'url': 'https://twitter.com/SamsungMobileSA/status/1348609186725289984',
1205 'only_matching': True,
1206 }, {
1207 # poll4choice_video card
1208 'url': 'https://twitter.com/SouthamptonFC/status/1347577658079641604',
1209 'only_matching': True,
1212 _MEDIA_ID_RE = re.compile(r'_video/(\d+)/')
1214 @property
1215 def _GRAPHQL_ENDPOINT(self):
1216 if self.is_logged_in:
1217 return 'zZXycP0V6H7m-2r0mOnFcA/TweetDetail'
1218 return '2ICDjqPd81tulZcYrtpTuQ/TweetResultByRestId'
1220 def _graphql_to_legacy(self, data, twid):
1221 result = traverse_obj(data, (
1222 'threaded_conversation_with_injections_v2', 'instructions', 0, 'entries',
1223 lambda _, v: v['entryId'] == f'tweet-{twid}', 'content', 'itemContent',
1224 'tweet_results', 'result', ('tweet', None), {dict},
1225 ), default={}, get_all=False) if self.is_logged_in else traverse_obj(
1226 data, ('tweetResult', 'result', {dict}), default={})
1228 typename = result.get('__typename')
1229 if typename not in ('Tweet', 'TweetWithVisibilityResults', 'TweetTombstone', 'TweetUnavailable', None):
1230 self.report_warning(f'Unknown typename: {typename}', twid, only_once=True)
1232 if 'tombstone' in result:
1233 cause = remove_end(traverse_obj(result, ('tombstone', 'text', 'text', {str})), '. Learn more')
1234 raise ExtractorError(f'Twitter API says: {cause or "Unknown error"}', expected=True)
1235 elif typename == 'TweetUnavailable':
1236 reason = result.get('reason')
1237 if reason == 'NsfwLoggedOut':
1238 self.raise_login_required('NSFW tweet requires authentication')
1239 elif reason == 'Protected':
1240 self.raise_login_required('You are not authorized to view this protected tweet')
1241 raise ExtractorError(reason or 'Requested tweet is unavailable', expected=True)
1242 # Result for "stale tweet" needs additional transformation
1243 elif typename == 'TweetWithVisibilityResults':
1244 result = traverse_obj(result, ('tweet', {dict})) or {}
1246 status = result.get('legacy', {})
1247 status.update(traverse_obj(result, {
1248 'user': ('core', 'user_results', 'result', 'legacy'),
1249 'card': ('card', 'legacy'),
1250 'quoted_status': ('quoted_status_result', 'result', 'legacy'),
1251 'retweeted_status': ('legacy', 'retweeted_status_result', 'result', 'legacy'),
1252 }, expected_type=dict, default={}))
1254 # extra transformations needed since result does not match legacy format
1255 if status.get('retweeted_status'):
1256 status['retweeted_status']['user'] = traverse_obj(status, (
1257 'retweeted_status_result', 'result', 'core', 'user_results', 'result', 'legacy', {dict})) or {}
1259 binding_values = {
1260 binding_value.get('key'): binding_value.get('value')
1261 for binding_value in traverse_obj(status, ('card', 'binding_values', ..., {dict}))
1263 if binding_values:
1264 status['card']['binding_values'] = binding_values
1266 return status
1268 def _build_graphql_query(self, media_id):
1269 return {
1270 'variables': {
1271 'focalTweetId': media_id,
1272 'includePromotedContent': True,
1273 'with_rux_injections': False,
1274 'withBirdwatchNotes': True,
1275 'withCommunity': True,
1276 'withDownvotePerspective': False,
1277 'withQuickPromoteEligibilityTweetFields': True,
1278 'withReactionsMetadata': False,
1279 'withReactionsPerspective': False,
1280 'withSuperFollowsTweetFields': True,
1281 'withSuperFollowsUserFields': True,
1282 'withV2Timeline': True,
1283 'withVoice': True,
1285 'features': {
1286 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': False,
1287 'interactive_text_enabled': True,
1288 'responsive_web_edit_tweet_api_enabled': True,
1289 'responsive_web_enhance_cards_enabled': True,
1290 'responsive_web_graphql_timeline_navigation_enabled': False,
1291 'responsive_web_text_conversations_enabled': False,
1292 'responsive_web_uc_gql_enabled': True,
1293 'standardized_nudges_misinfo': True,
1294 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1295 'tweetypie_unmention_optimization_enabled': True,
1296 'unified_cards_ad_metadata_container_dynamic_card_content_query_enabled': True,
1297 'verified_phone_label_enabled': False,
1298 'vibe_api_enabled': True,
1300 } if self.is_logged_in else {
1301 'variables': {
1302 'tweetId': media_id,
1303 'withCommunity': False,
1304 'includePromotedContent': False,
1305 'withVoice': False,
1307 'features': {
1308 'creator_subscriptions_tweet_preview_api_enabled': True,
1309 'tweetypie_unmention_optimization_enabled': True,
1310 'responsive_web_edit_tweet_api_enabled': True,
1311 'graphql_is_translatable_rweb_tweet_is_translatable_enabled': True,
1312 'view_counts_everywhere_api_enabled': True,
1313 'longform_notetweets_consumption_enabled': True,
1314 'responsive_web_twitter_article_tweet_consumption_enabled': False,
1315 'tweet_awards_web_tipping_enabled': False,
1316 'freedom_of_speech_not_reach_fetch_enabled': True,
1317 'standardized_nudges_misinfo': True,
1318 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': True,
1319 'longform_notetweets_rich_text_read_enabled': True,
1320 'longform_notetweets_inline_media_enabled': True,
1321 'responsive_web_graphql_exclude_directive_enabled': True,
1322 'verified_phone_label_enabled': False,
1323 'responsive_web_media_download_video_enabled': False,
1324 'responsive_web_graphql_skip_user_profile_image_extensions_enabled': False,
1325 'responsive_web_graphql_timeline_navigation_enabled': True,
1326 'responsive_web_enhance_cards_enabled': False,
1328 'fieldToggles': {
1329 'withArticleRichContentState': False,
1333 def _call_syndication_api(self, twid):
1334 self.report_warning(
1335 'Not all metadata or media is available via syndication endpoint', twid, only_once=True)
1336 status = self._download_json(
1337 'https://cdn.syndication.twimg.com/tweet-result', twid, 'Downloading syndication JSON',
1338 headers={'User-Agent': 'Googlebot'}, query={
1339 'id': twid,
1340 # TODO: token = ((Number(twid) / 1e15) * Math.PI).toString(36).replace(/(0+|\.)/g, '')
1341 'token': ''.join(random.choices('123456789abcdefghijklmnopqrstuvwxyz', k=10)),
1343 if not status:
1344 raise ExtractorError('Syndication endpoint returned empty JSON response')
1345 # Transform the result so its structure matches that of legacy/graphql
1346 media = []
1347 for detail in traverse_obj(status, ((None, 'quoted_tweet'), 'mediaDetails', ..., {dict})):
1348 detail['id_str'] = traverse_obj(detail, (
1349 'video_info', 'variants', ..., 'url', {self._MEDIA_ID_RE.search}, 1), get_all=False) or twid
1350 media.append(detail)
1351 status['extended_entities'] = {'media': media}
1353 return status
1355 def _extract_status(self, twid):
1356 if self._selected_api not in ('graphql', 'legacy', 'syndication'):
1357 raise ExtractorError(f'{self._selected_api!r} is not a valid API selection', expected=True)
1359 try:
1360 if self.is_logged_in or self._selected_api == 'graphql':
1361 status = self._graphql_to_legacy(self._call_graphql_api(self._GRAPHQL_ENDPOINT, twid), twid)
1362 elif self._selected_api == 'legacy':
1363 status = self._call_api(f'statuses/show/{twid}.json', twid, {
1364 'cards_platform': 'Web-12',
1365 'include_cards': 1,
1366 'include_reply_count': 1,
1367 'include_user_entities': 0,
1368 'tweet_mode': 'extended',
1370 except ExtractorError as e:
1371 if not isinstance(e.cause, HTTPError) or not e.cause.status == 429:
1372 raise
1373 self.report_warning('Rate-limit exceeded; falling back to syndication endpoint')
1374 status = self._call_syndication_api(twid)
1376 if self._selected_api == 'syndication':
1377 status = self._call_syndication_api(twid)
1379 return traverse_obj(status, 'retweeted_status', None, expected_type=dict) or {}
1381 def _real_extract(self, url):
1382 twid, selected_index = self._match_valid_url(url).group('id', 'index')
1383 status = self._extract_status(twid)
1385 title = description = traverse_obj(
1386 status, (('full_text', 'text'), {lambda x: x.replace('\n', ' ')}), get_all=False) or ''
1387 # strip 'https -_t.co_BJYgOjSeGA' junk from filenames
1388 title = re.sub(r'\s+(https?://[^ ]+)', '', title)
1389 user = status.get('user') or {}
1390 uploader = user.get('name')
1391 if uploader:
1392 title = f'{uploader} - {title}'
1393 uploader_id = user.get('screen_name')
1395 info = {
1396 'id': twid,
1397 'title': title,
1398 'description': description,
1399 'uploader': uploader,
1400 'timestamp': unified_timestamp(status.get('created_at')),
1401 'channel_id': str_or_none(status.get('user_id_str')) or str_or_none(user.get('id_str')),
1402 'uploader_id': uploader_id,
1403 'uploader_url': format_field(uploader_id, None, 'https://twitter.com/%s'),
1404 'like_count': int_or_none(status.get('favorite_count')),
1405 'repost_count': int_or_none(status.get('retweet_count')),
1406 'comment_count': int_or_none(status.get('reply_count')),
1407 'age_limit': 18 if status.get('possibly_sensitive') else 0,
1408 'tags': traverse_obj(status, ('entities', 'hashtags', ..., 'text')),
1411 def extract_from_video_info(media):
1412 media_id = traverse_obj(media, 'id_str', 'id', expected_type=str_or_none)
1413 self.write_debug(f'Extracting from video info: {media_id}')
1415 formats = []
1416 subtitles = {}
1417 for variant in traverse_obj(media, ('video_info', 'variants', ...)):
1418 fmts, subs = self._extract_variant_formats(variant, twid)
1419 subtitles = self._merge_subtitles(subtitles, subs)
1420 formats.extend(fmts)
1422 thumbnails = []
1423 media_url = media.get('media_url_https') or media.get('media_url')
1424 if media_url:
1425 def add_thumbnail(name, size):
1426 thumbnails.append({
1427 'id': name,
1428 'url': update_url_query(media_url, {'name': name}),
1429 'width': int_or_none(size.get('w') or size.get('width')),
1430 'height': int_or_none(size.get('h') or size.get('height')),
1432 for name, size in media.get('sizes', {}).items():
1433 add_thumbnail(name, size)
1434 add_thumbnail('orig', media.get('original_info') or {})
1436 return {
1437 'id': media_id,
1438 'formats': formats,
1439 'subtitles': subtitles,
1440 'thumbnails': thumbnails,
1441 'view_count': traverse_obj(media, ('mediaStats', 'viewCount', {int_or_none})), # No longer available
1442 'duration': float_or_none(traverse_obj(media, ('video_info', 'duration_millis')), 1000),
1443 # Prioritize m3u8 formats for compat, see https://github.com/yt-dlp/yt-dlp/issues/8117
1444 '_format_sort_fields': ('res', 'proto:m3u8', 'br', 'size'), # http format codec is unknown
1447 def extract_from_card_info(card):
1448 if not card:
1449 return
1451 self.write_debug(f'Extracting from card info: {card.get("url")}')
1452 binding_values = card['binding_values']
1454 def get_binding_value(k):
1455 o = binding_values.get(k) or {}
1456 return try_get(o, lambda x: x[x['type'].lower() + '_value'])
1458 card_name = card['name'].split(':')[-1]
1459 if card_name == 'player':
1460 yield {
1461 '_type': 'url',
1462 'url': get_binding_value('player_url'),
1464 elif card_name == 'periscope_broadcast':
1465 yield {
1466 '_type': 'url',
1467 'url': get_binding_value('url') or get_binding_value('player_url'),
1468 'ie_key': PeriscopeIE.ie_key(),
1470 elif card_name == 'broadcast':
1471 yield {
1472 '_type': 'url',
1473 'url': get_binding_value('broadcast_url'),
1474 'ie_key': TwitterBroadcastIE.ie_key(),
1476 elif card_name == 'audiospace':
1477 yield {
1478 '_type': 'url',
1479 'url': f'https://twitter.com/i/spaces/{get_binding_value("id")}',
1480 'ie_key': TwitterSpacesIE.ie_key(),
1482 elif card_name == 'summary':
1483 yield {
1484 '_type': 'url',
1485 'url': get_binding_value('card_url'),
1487 elif card_name == 'unified_card':
1488 unified_card = self._parse_json(get_binding_value('unified_card'), twid)
1489 yield from map(extract_from_video_info, traverse_obj(
1490 unified_card, ('media_entities', ...), expected_type=dict))
1491 # amplify, promo_video_website, promo_video_convo, appplayer,
1492 # video_direct_message, poll2choice_video, poll3choice_video,
1493 # poll4choice_video, ...
1494 else:
1495 is_amplify = card_name == 'amplify'
1496 vmap_url = get_binding_value('amplify_url_vmap') if is_amplify else get_binding_value('player_stream_url')
1497 content_id = get_binding_value('%s_content_id' % (card_name if is_amplify else 'player'))
1498 formats, subtitles = self._extract_formats_from_vmap_url(vmap_url, content_id or twid)
1500 thumbnails = []
1501 for suffix in ('_small', '', '_large', '_x_large', '_original'):
1502 image = get_binding_value('player_image' + suffix) or {}
1503 image_url = image.get('url')
1504 if not image_url or '/player-placeholder' in image_url:
1505 continue
1506 thumbnails.append({
1507 'id': suffix[1:] if suffix else 'medium',
1508 'url': image_url,
1509 'width': int_or_none(image.get('width')),
1510 'height': int_or_none(image.get('height')),
1513 yield {
1514 'formats': formats,
1515 'subtitles': subtitles,
1516 'thumbnails': thumbnails,
1517 'duration': int_or_none(get_binding_value(
1518 'content_duration_seconds')),
1521 videos = traverse_obj(status, (
1522 (None, 'quoted_status'), 'extended_entities', 'media', lambda _, m: m['type'] != 'photo', {dict}))
1524 if self._yes_playlist(twid, selected_index, video_label='URL-specified video number'):
1525 selected_entries = (*map(extract_from_video_info, videos), *extract_from_card_info(status.get('card')))
1526 else:
1527 desired_obj = traverse_obj(status, (
1528 (None, 'quoted_status'), 'extended_entities', 'media', int(selected_index) - 1, {dict}), get_all=False)
1529 if not desired_obj:
1530 raise ExtractorError(f'Video #{selected_index} is unavailable', expected=True)
1531 elif desired_obj.get('type') != 'video':
1532 raise ExtractorError(f'Media #{selected_index} is not a video', expected=True)
1534 # Restore original archive id and video index in title
1535 for index, entry in enumerate(videos, 1):
1536 if entry.get('id') != desired_obj.get('id'):
1537 continue
1538 if index == 1:
1539 info['_old_archive_ids'] = [make_archive_id(self, twid)]
1540 if len(videos) != 1:
1541 info['title'] += f' #{index}'
1542 break
1544 return {**info, **extract_from_video_info(desired_obj), 'display_id': twid}
1546 entries = [{**info, **data, 'display_id': twid} for data in selected_entries]
1547 if not entries:
1548 expanded_url = traverse_obj(status, ('entities', 'urls', 0, 'expanded_url'), expected_type=url_or_none)
1549 if not expanded_url or expanded_url == url:
1550 self.raise_no_formats('No video could be found in this tweet', expected=True)
1551 return info
1553 return self.url_result(expanded_url, display_id=twid, **info)
1555 entries[0]['_old_archive_ids'] = [make_archive_id(self, twid)]
1557 if len(entries) == 1:
1558 return entries[0]
1560 for index, entry in enumerate(entries, 1):
1561 entry['title'] += f' #{index}'
1563 return self.playlist_result(entries, **info)
1566 class TwitterAmplifyIE(TwitterBaseIE):
1567 IE_NAME = 'twitter:amplify'
1568 _VALID_URL = r'https?://amp\.twimg\.com/v/(?P<id>[0-9a-f\-]{36})'
1570 _TEST = {
1571 'url': 'https://amp.twimg.com/v/0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1572 'md5': 'fec25801d18a4557c5c9f33d2c379ffa',
1573 'info_dict': {
1574 'id': '0ba0c3c7-0af3-4c0a-bed5-7efd1ffa2951',
1575 'ext': 'mp4',
1576 'title': 'Twitter Video',
1577 'thumbnail': 're:^https?://.*',
1579 'params': {'format': '[protocol=https]'},
1582 def _real_extract(self, url):
1583 video_id = self._match_id(url)
1584 webpage = self._download_webpage(url, video_id)
1586 vmap_url = self._html_search_meta(
1587 'twitter:amplify:vmap', webpage, 'vmap url')
1588 formats, _ = self._extract_formats_from_vmap_url(vmap_url, video_id)
1590 thumbnails = []
1591 thumbnail = self._html_search_meta(
1592 'twitter:image:src', webpage, 'thumbnail', fatal=False)
1594 def _find_dimension(target):
1595 w = int_or_none(self._html_search_meta(
1596 f'twitter:{target}:width', webpage, fatal=False))
1597 h = int_or_none(self._html_search_meta(
1598 f'twitter:{target}:height', webpage, fatal=False))
1599 return w, h
1601 if thumbnail:
1602 thumbnail_w, thumbnail_h = _find_dimension('image')
1603 thumbnails.append({
1604 'url': thumbnail,
1605 'width': thumbnail_w,
1606 'height': thumbnail_h,
1609 video_w, video_h = _find_dimension('player')
1610 formats[0].update({
1611 'width': video_w,
1612 'height': video_h,
1615 return {
1616 'id': video_id,
1617 'title': 'Twitter Video',
1618 'formats': formats,
1619 'thumbnails': thumbnails,
1623 class TwitterBroadcastIE(TwitterBaseIE, PeriscopeBaseIE):
1624 IE_NAME = 'twitter:broadcast'
1625 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/broadcasts/(?P<id>[0-9a-zA-Z]{13})'
1627 _TESTS = [{
1628 # untitled Periscope video
1629 'url': 'https://twitter.com/i/broadcasts/1yNGaQLWpejGj',
1630 'info_dict': {
1631 'id': '1yNGaQLWpejGj',
1632 'ext': 'mp4',
1633 'title': 'Andrea May Sahouri - Periscope Broadcast',
1634 'uploader': 'Andrea May Sahouri',
1635 'uploader_id': 'andreamsahouri',
1636 'uploader_url': 'https://twitter.com/andreamsahouri',
1637 'timestamp': 1590973638,
1638 'upload_date': '20200601',
1639 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1640 'view_count': int,
1642 }, {
1643 'url': 'https://twitter.com/i/broadcasts/1ZkKzeyrPbaxv',
1644 'info_dict': {
1645 'id': '1ZkKzeyrPbaxv',
1646 'ext': 'mp4',
1647 'title': 'Starship | SN10 | High-Altitude Flight Test',
1648 'uploader': 'SpaceX',
1649 'uploader_id': 'SpaceX',
1650 'uploader_url': 'https://twitter.com/SpaceX',
1651 'timestamp': 1614812942,
1652 'upload_date': '20210303',
1653 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1654 'view_count': int,
1656 }, {
1657 'url': 'https://twitter.com/i/broadcasts/1OyKAVQrgzwGb',
1658 'info_dict': {
1659 'id': '1OyKAVQrgzwGb',
1660 'ext': 'mp4',
1661 'title': 'Starship Flight Test',
1662 'uploader': 'SpaceX',
1663 'uploader_id': 'SpaceX',
1664 'uploader_url': 'https://twitter.com/SpaceX',
1665 'timestamp': 1681993964,
1666 'upload_date': '20230420',
1667 'thumbnail': r're:^https?://[^?#]+\.jpg\?token=',
1668 'view_count': int,
1672 def _real_extract(self, url):
1673 broadcast_id = self._match_id(url)
1674 broadcast = self._call_api(
1675 'broadcasts/show.json', broadcast_id,
1676 {'ids': broadcast_id})['broadcasts'][broadcast_id]
1677 if not broadcast:
1678 raise ExtractorError('Broadcast no longer exists', expected=True)
1679 info = self._parse_broadcast_data(broadcast, broadcast_id)
1680 info['title'] = broadcast.get('status') or info.get('title')
1681 info['uploader_id'] = broadcast.get('twitter_username') or info.get('uploader_id')
1682 info['uploader_url'] = format_field(broadcast, 'twitter_username', 'https://twitter.com/%s', default=None)
1683 if info['live_status'] == 'is_upcoming':
1684 return info
1686 media_key = broadcast['media_key']
1687 source = self._call_api(
1688 f'live_video_stream/status/{media_key}', media_key)['source']
1689 m3u8_url = source.get('noRedirectPlaybackUrl') or source['location']
1690 if '/live_video_stream/geoblocked/' in m3u8_url:
1691 self.raise_geo_restricted()
1692 m3u8_id = urllib.parse.parse_qs(urllib.parse.urlparse(
1693 m3u8_url).query).get('type', [None])[0]
1694 state, width, height = self._extract_common_format_info(broadcast)
1695 info['formats'] = self._extract_pscp_m3u8_formats(
1696 m3u8_url, broadcast_id, m3u8_id, state, width, height)
1697 return info
1700 class TwitterSpacesIE(TwitterBaseIE):
1701 IE_NAME = 'twitter:spaces'
1702 _VALID_URL = TwitterBaseIE._BASE_REGEX + r'i/spaces/(?P<id>[0-9a-zA-Z]{13})'
1704 _TESTS = [{
1705 'url': 'https://twitter.com/i/spaces/1RDxlgyvNXzJL',
1706 'info_dict': {
1707 'id': '1RDxlgyvNXzJL',
1708 'ext': 'm4a',
1709 'title': 'King Carlo e la mossa Kansas City per fare il Grande Centro',
1710 'description': 'Twitter Space participated by annarita digiorgio, Signor Ernesto, Raffaello Colosimo, Simone M. Sepe',
1711 'uploader': r're:Lucio Di Gaetano.*?',
1712 'uploader_id': 'luciodigaetano',
1713 'live_status': 'was_live',
1714 'timestamp': 1659877956,
1715 'upload_date': '20220807',
1716 'release_timestamp': 1659904215,
1717 'release_date': '20220807',
1719 'skip': 'No longer available',
1720 }, {
1721 # post_live/TimedOut but downloadable
1722 'url': 'https://twitter.com/i/spaces/1vAxRAVQWONJl',
1723 'info_dict': {
1724 'id': '1vAxRAVQWONJl',
1725 'ext': 'm4a',
1726 'title': 'Framing Up FinOps: Billing Tools',
1727 'description': 'Twitter Space participated by rupa, Alfonso Hernandez',
1728 'uploader': 'Google Cloud',
1729 'uploader_id': 'googlecloud',
1730 'live_status': 'post_live',
1731 'timestamp': 1681409554,
1732 'upload_date': '20230413',
1733 'release_timestamp': 1681839000,
1734 'release_date': '20230418',
1735 'protocol': 'm3u8', # ffmpeg is forced
1736 'container': 'm4a_dash', # audio-only format fixup is applied
1738 'params': {'skip_download': 'm3u8'},
1739 }, {
1740 # Needs ffmpeg as downloader, see: https://github.com/yt-dlp/yt-dlp/issues/7536
1741 'url': 'https://twitter.com/i/spaces/1eaKbrQbjoRKX',
1742 'info_dict': {
1743 'id': '1eaKbrQbjoRKX',
1744 'ext': 'm4a',
1745 'title': 'あ',
1746 'description': 'Twitter Space participated by nobody yet',
1747 'uploader': '息根とめる',
1748 'uploader_id': 'tomeru_ikinone',
1749 'live_status': 'was_live',
1750 'timestamp': 1685617198,
1751 'upload_date': '20230601',
1752 'protocol': 'm3u8', # ffmpeg is forced
1753 'container': 'm4a_dash', # audio-only format fixup is applied
1755 'params': {'skip_download': 'm3u8'},
1756 }, {
1757 # Video Space
1758 'url': 'https://x.com/i/spaces/1DXGydznBYWKM',
1759 'info_dict': {
1760 'id': '1DXGydznBYWKM',
1761 'ext': 'mp4',
1762 'title': 'America and Israel’s “special relationship”',
1763 'description': 'Twitter Space participated by nobody yet',
1764 'uploader': 'Candace Owens',
1765 'uploader_id': 'RealCandaceO',
1766 'live_status': 'was_live',
1767 'timestamp': 1723931351,
1768 'upload_date': '20240817',
1769 'release_timestamp': 1723932000,
1770 'release_date': '20240817',
1771 'protocol': 'm3u8_native', # not ffmpeg, detected as video space
1773 'params': {'skip_download': 'm3u8'},
1776 SPACE_STATUS = {
1777 'notstarted': 'is_upcoming',
1778 'ended': 'was_live',
1779 'running': 'is_live',
1780 'timedout': 'post_live',
1783 def _build_graphql_query(self, space_id):
1784 return {
1785 'variables': {
1786 'id': space_id,
1787 'isMetatagsQuery': True,
1788 'withDownvotePerspective': False,
1789 'withReactionsMetadata': False,
1790 'withReactionsPerspective': False,
1791 'withReplays': True,
1792 'withSuperFollowsUserFields': True,
1793 'withSuperFollowsTweetFields': True,
1795 'features': {
1796 'dont_mention_me_view_api_enabled': True,
1797 'interactive_text_enabled': True,
1798 'responsive_web_edit_tweet_api_enabled': True,
1799 'responsive_web_enhance_cards_enabled': True,
1800 'responsive_web_uc_gql_enabled': True,
1801 'spaces_2022_h2_clipping': True,
1802 'spaces_2022_h2_spaces_communities': False,
1803 'standardized_nudges_misinfo': True,
1804 'tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled': False,
1805 'vibe_api_enabled': True,
1809 def _real_extract(self, url):
1810 space_id = self._match_id(url)
1811 space_data = self._call_graphql_api('HPEisOmj1epUNLCWTYhUWw/AudioSpaceById', space_id)['audioSpace']
1812 if not space_data:
1813 raise ExtractorError('Twitter Space not found', expected=True)
1815 metadata = space_data['metadata']
1816 live_status = try_call(lambda: self.SPACE_STATUS[metadata['state'].lower()])
1817 is_live = live_status == 'is_live'
1819 formats = []
1820 headers = {'Referer': 'https://twitter.com/'}
1821 if live_status == 'is_upcoming':
1822 self.raise_no_formats('Twitter Space not started yet', expected=True)
1823 elif not is_live and not metadata.get('is_space_available_for_replay'):
1824 self.raise_no_formats('Twitter Space ended and replay is disabled', expected=True)
1825 elif metadata.get('media_key'):
1826 source = traverse_obj(
1827 self._call_api(f'live_video_stream/status/{metadata["media_key"]}', metadata['media_key']),
1828 ('source', ('noRedirectPlaybackUrl', 'location'), {url_or_none}), get_all=False)
1829 is_audio_space = source and 'audio-space' in source
1830 formats = self._extract_m3u8_formats(
1831 source, metadata['media_key'], 'm4a' if is_audio_space else 'mp4',
1832 # XXX: Some audio-only Spaces need ffmpeg as downloader
1833 entry_protocol='m3u8' if is_audio_space else 'm3u8_native',
1834 live=is_live, headers=headers, fatal=False) if source else []
1835 if is_audio_space:
1836 for fmt in formats:
1837 fmt.update({'vcodec': 'none', 'acodec': 'aac'})
1838 if not is_live:
1839 fmt['container'] = 'm4a_dash'
1841 participants = ', '.join(traverse_obj(
1842 space_data, ('participants', 'speakers', ..., 'display_name'))) or 'nobody yet'
1844 if not formats and live_status == 'post_live':
1845 self.raise_no_formats('Twitter Space ended but not downloadable yet', expected=True)
1847 return {
1848 'id': space_id,
1849 'title': metadata.get('title'),
1850 'description': f'Twitter Space participated by {participants}',
1851 'uploader': traverse_obj(
1852 metadata, ('creator_results', 'result', 'legacy', 'name')),
1853 'uploader_id': traverse_obj(
1854 metadata, ('creator_results', 'result', 'legacy', 'screen_name')),
1855 'live_status': live_status,
1856 'release_timestamp': try_call(
1857 lambda: int_or_none(metadata['scheduled_start'], scale=1000)),
1858 'timestamp': int_or_none(metadata.get('created_at'), scale=1000),
1859 'formats': formats,
1860 'http_headers': headers,
1864 class TwitterShortenerIE(TwitterBaseIE):
1865 IE_NAME = 'twitter:shortener'
1866 _VALID_URL = r'https?://t\.co/(?P<id>[^?#]+)|tco:(?P<eid>[^?#]+)'
1867 _BASE_URL = 'https://t.co/'
1869 def _real_extract(self, url):
1870 mobj = self._match_valid_url(url)
1871 eid, shortcode = mobj.group('eid', 'id')
1872 if eid:
1873 shortcode = eid
1874 url = self._BASE_URL + shortcode
1875 new_url = self._request_webpage(url, shortcode, headers={'User-Agent': 'curl'}).url
1876 __UNSAFE_LINK = 'https://twitter.com/safety/unsafe_link_warning?unsafe_link='
1877 if new_url.startswith(__UNSAFE_LINK):
1878 new_url = new_url.replace(__UNSAFE_LINK, '')
1879 return self.url_result(new_url)